OpenAI Vision API Client

Source vllm-project/vllm.

  1. 1"""An example showing how to use vLLM to serve VLMs.
  2. 2
  3. 3Launch the vLLM server with the following command:
  4. 4vllm serve llava-hf/llava-1.5-7b-hf --chat-template template_llava.jinja
  5. 5"""
  6. 6import base64
  7. 7
  8. 8import requests
  9. 9from openai import OpenAI
  10. 10
  11. 11# Modify OpenAI's API key and API base to use vLLM's API server.
  12. 12openai_api_key = "EMPTY"
  13. 13openai_api_base = "http://localhost:8000/v1"
  14. 14
  15. 15client = OpenAI(
  16. 16 # defaults to os.environ.get("OPENAI_API_KEY")
  17. 17 api_key=openai_api_key,
  18. 18 base_url=openai_api_base,
  19. 19)
  20. 20
  21. 21models = client.models.list()
  22. 22model = models.data[0].id
  23. 23
  24. 24image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
  25. 25
  26. 26# Use image url in the payload
  27. 27chat_completion_from_url = client.chat.completions.create(
  28. 28 messages=[{
  29. 29 "role":
  30. 30 "user",
  31. 31 "content": [
  32. 32 {
  33. 33 "type": "text",
  34. 34 "text": "What’s in this image?"
  35. 35 },
  36. 36 {
  37. 37 "type": "image_url",
  38. 38 "image_url": {
  39. 39 "url": image_url
  40. 40 },
  41. 41 },
  42. 42 ],
  43. 43 }],
  44. 44 model=model,
  45. 45)
  46. 46
  47. 47result = chat_completion_from_url.choices[0].message.content
  48. 48print(f"Chat completion output:{result}")
  49. 49
  50. 50
  51. 51# Use base64 encoded image in the payload
  52. 52def encode_image_base64_from_url(image_url: str) -> str:
  53. 53 """Encode an image retrieved from a remote url to base64 format."""
  54. 54
  55. 55 with requests.get(image_url) as response:
  56. 56 response.raise_for_status()
  57. 57 result = base64.b64encode(response.content).decode('utf-8')
  58. 58
  59. 59 return result
  60. 60
  61. 61
  62. 62image_base64 = encode_image_base64_from_url(image_url=image_url)
  63. 63chat_completion_from_base64 = client.chat.completions.create(
  64. 64 messages=[{
  65. 65 "role":
  66. 66 "user",
  67. 67 "content": [
  68. 68 {
  69. 69 "type": "text",
  70. 70 "text": "What’s in this image?"
  71. 71 },
  72. 72 {
  73. 73 "type": "image_url",
  74. 74 "image_url": {
  75. 75 "url": f"data:image/jpeg;base64,{image_base64}"
  76. 76 },
  77. 77 },
  78. 78 ],
  79. 79 }],
  80. 80 model=model,
  81. 81)
  82. 82
  83. 83result = chat_completion_from_base64.choices[0].message.content
  84. 84print(f"Chat completion output:{result}")