2025-05-27 15:46:31 +08:00

146 lines
4.5 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os
import base64
import cv2
import numpy as np
from PIL import Image
from io import BytesIO
from openai import OpenAI
class ThermalVision:
"""
热成像视觉分析类
使用VLM(Vision Language Model)分析热成像图片
"""
def __init__(self, api_key=None, base_url=None, model="Qwen/Qwen2.5-VL-72B-Instruct", text_model="Qwen/Qwen2.5-14B-Instruct"):
"""
初始化热成像视觉分析器
"""
# self.api_key = api_key or "sk-mfztogyrhxnflvhhvcaccpmbpcyzfmukgmstllnufpfscjuw"
# self.base_url = base_url or "http://api-sh.siliconflow.com/v1"
self.api_key = api_key or "sk-36930e681f094274964ffe6c51d62078"
self.base_url = base_url or "https://dashscope.aliyuncs.com/compatible-mode/v1"
# self.model = model
# self.text_model = text_model
self.model = "qwen2.5-vl-72b-instruct"
self.text_model = "qwen2.5-14b-instruct"
self.client = OpenAI(
api_key=self.api_key,
base_url=self.base_url
)
def encode_image(self, image):
"""
将图像转换为base64编码
"""
if isinstance(image, np.ndarray):
if len(image.shape) == 3 and image.shape[2] == 3:
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
pil_image = Image.fromarray(image_rgb)
else:
pil_image = Image.fromarray(image)
buffered = BytesIO()
pil_image.save(buffered, format="PNG")
img_bytes = buffered.getvalue()
elif isinstance(image, Image.Image):
buffered = BytesIO()
image.save(buffered, format="PNG")
img_bytes = buffered.getvalue()
elif isinstance(image, str) and os.path.isfile(image):
with open(image, "rb") as f:
img_bytes = f.read()
elif isinstance(image, bytes):
img_bytes = image
else:
raise ValueError("不支持的图像格式")
return base64.b64encode(img_bytes).decode('utf-8')
def analyze(self, image, prompt, extra_images=None, stream=True, system_prompt=None):
"""
分析图像
参数:
image: 图像数据
prompt: 分析提示词(用户输入)
extra_images: 额外的图像列表
stream: 是否使用流式输出
system_prompt: 系统提示(可选)设置AI的角色和分析框架
"""
base64_str = self.encode_image(image)
content = [
{
"type": "image_url",
"image_url": {
"url": f"data:image/png;base64,{base64_str}"
}
}
]
if extra_images:
for img in extra_images:
img_base64 = self.encode_image(img)
content.append({
"type": "image_url",
"image_url": {
"url": f"data:image/png;base64,{img_base64}"
}
})
content.append({
"type": "text",
"text": prompt
})
messages = []
if system_prompt:
messages.append({
"role": "system",
"content": system_prompt
})
messages.append({
"role": "user",
"content": content
})
response = self.client.chat.completions.create(
model=self.model,
messages=messages,
stream=stream
)
return response if stream else response.choices[0].message.content
def summarize(self, prompt, system_prompt=None, stream=True):
"""
分析总结(按摩前)
参数:
prompt: 用户输入的提示词
system_prompt: 系统提示(可选)设置AI的角色和对话框架
stream: 是否使用流式输出
返回:
response: 模型响应或流对象
"""
messages = []
if system_prompt:
messages.append({
"role": "system",
"content": system_prompt
})
messages.append({
"role": "user",
"content": prompt
})
response = self.client.chat.completions.create(
model=self.text_model,
messages=messages,
stream=stream
)
return response if stream else response.choices[0].message.content