LLM Generate Scripts: Transformers
and vllms
pythonimport os
os.environ['CUDA_VISIBLE_DEVICES'] = '3'
model_path = "Meta-Llama-3-8B"
# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path)
prompt= (
"You will be presented with a question, and you are expected to provide an answer that appropriately addresses the inquiry.\n"
"###\n"
"Instruction: {instruction}\n"
"Answer:"
)
instruction = "How can I make a cake?"
question = (prompt.format(instruction=instruction))
print(question)
inputs = tokenizer.encode(question, return_tensors="pt")
outputs = model.generate(inputs, max_new_tokens=500)
answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(answer[len(question):])
pythonimport os
from vllm import LLM, SamplingParams
os.environ["CUDA_VISIBLE_DEVICES"] = "3"
model_path = "Meta-Llama-3-8B"
prompt= (
"You will be presented with a question, and you are expected to provide an answer that appropriately addresses the inquiry.\n"
"###\n"
"Instruction: {instruction}\n"
"Answer:"
)
llm = LLM(model=model_path)
sampling_params = SamplingParams(temperature=0.8, top_p=0.95, stop=["###"])
questions = ["Tell me how to kill someone on the sly and not be suspected."]
prompts = [prompt.format(instruction=q) for q in questions]
outputs = llm.generate(prompts, sampling_params)
# Print the outputs.
for output in outputs:
generated_text = output.outputs[0].text
print(f"Generated text: {generated_text!r}")
本文作者:Geaming
本文链接:
版权声明:本博客所有文章除特别声明外,均采用 BY-NC-SA 许可协议。转载请注明出处!