Offline Inference

Source vllm-project/vllm.

  1. 1from vllm import LLM, SamplingParams
  2. 2
  3. 3# Sample prompts.
  4. 4prompts = [
  5. 5 "Hello, my name is",
  6. 6 "The president of the United States is",
  7. 7 "The capital of France is",
  8. 8 "The future of AI is",
  9. 9]
  10. 10# Create a sampling params object.
  11. 11sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
  12. 12
  13. 13# Create an LLM.
  14. 14llm = LLM(model="facebook/opt-125m")
  15. 15# Generate texts from the prompts. The output is a list of RequestOutput objects
  16. 16# that contain the prompt, generated text, and other information.
  17. 17outputs = llm.generate(prompts, sampling_params)
  18. 18# Print the outputs.
  19. 19for output in outputs:
  20. 20 prompt = output.prompt
  21. 21 generated_text = output.outputs[0].text
  22. 22 print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")