from transformers import AutoModelForCausalLM, AutoTokenizer import torch model_path = 'path/to/your/finetuned/model/checkpoint' tokenizer = AutoTokenizer.from_pretrained(model_path) model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.bfloat16, device_map="auto") SYSTEM_PROMPT = """You are a helpful recipe assistant. You are to extract the generic ingredients from each of the recipes provided.""" USER_PROMPT = """Title: Lemon Drizzle Cake Ingredients: ["200g unsalted butter", "200g caster sugar", "4 eggs", "200g self-raising flour", "1 tsp baking powder", "zest of 1 lemon", "100ml lemon juice", "150g icing sugar"] Generic ingredients:""" conversation = [ {"role": "system", "content": SYSTEM_PROMPT}, {"role": "user", "content": USER_PROMPT}, ] # format and tokenize the tool use prompt inputs = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_dict=True, return_tensors="pt") inputs.to(model.device) outputs = model.generate(**inputs, max_new_tokens=1000, use_cache=False) print(tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True))