Offline Inference Embedding

Source vllm-project/vllm.

  1. 1from vllm import LLM
  2. 2
  3. 3# Sample prompts.
  4. 4prompts = [
  5. 5 "Hello, my name is",
  6. 6 "The president of the United States is",
  7. 7 "The capital of France is",
  8. 8 "The future of AI is",
  9. 9]
  10. 10
  11. 11# Create an LLM.
  12. 12model = LLM(model="intfloat/e5-mistral-7b-instruct", enforce_eager=True)
  13. 13# Generate embedding. The output is a list of EmbeddingRequestOutputs.
  14. 14outputs = model.encode(prompts)
  15. 15# Print the outputs.
  16. 16for output in outputs:
  17. 17 print(output.outputs.embedding) # list of 4096 floats