Phi3V Example

Source vllm-project/vllm.

  1. 1from vllm import LLM, SamplingParams
  2. 2from vllm.assets.image import ImageAsset
  3. 3
  4. 4
  5. 5def run_phi3v():
  6. 6 model_path = "microsoft/Phi-3-vision-128k-instruct"
  7. 7
  8. 8 # Note: The default setting of max_num_seqs (256) and
  9. 9 # max_model_len (128k) for this model may cause OOM.
  10. 10 # You may lower either to run this example on lower-end GPUs.
  11. 11
  12. 12 # In this example, we override max_num_seqs to 5 while
  13. 13 # keeping the original context length of 128k.
  14. 14 llm = LLM(
  15. 15 model=model_path,
  16. 16 trust_remote_code=True,
  17. 17 max_num_seqs=5,
  18. 18 )
  19. 19
  20. 20 image = ImageAsset("cherry_blossom").pil_image
  21. 21
  22. 22 # single-image prompt
  23. 23 prompt = "<|user|>\n<|image_1|>\nWhat is the season?<|end|>\n<|assistant|>\n" # noqa: E501
  24. 24 sampling_params = SamplingParams(temperature=0, max_tokens=64)
  25. 25
  26. 26 outputs = llm.generate(
  27. 27 {
  28. 28 "prompt": prompt,
  29. 29 "multi_modal_data": {
  30. 30 "image": image
  31. 31 },
  32. 32 },
  33. 33 sampling_params=sampling_params)
  34. 34 for o in outputs:
  35. 35 generated_text = o.outputs[0].text
  36. 36 print(generated_text)
  37. 37
  38. 38
  39. 39if __name__ == "__main__":
  40. 40 run_phi3v()