146 lines
4.5 KiB
Python
146 lines
4.5 KiB
Python
import os
|
||
import base64
|
||
import cv2
|
||
import numpy as np
|
||
from PIL import Image
|
||
from io import BytesIO
|
||
from openai import OpenAI
|
||
|
||
class ThermalVision:
|
||
"""
|
||
热成像视觉分析类
|
||
使用VLM(Vision Language Model)分析热成像图片
|
||
"""
|
||
|
||
def __init__(self, api_key=None, base_url=None, model="Qwen/Qwen2.5-VL-72B-Instruct", text_model="Qwen/Qwen2.5-14B-Instruct"):
|
||
"""
|
||
初始化热成像视觉分析器
|
||
"""
|
||
# self.api_key = api_key or "sk-mfztogyrhxnflvhhvcaccpmbpcyzfmukgmstllnufpfscjuw"
|
||
# self.base_url = base_url or "http://api-sh.siliconflow.com/v1"
|
||
self.api_key = api_key or "sk-36930e681f094274964ffe6c51d62078"
|
||
self.base_url = base_url or "https://dashscope.aliyuncs.com/compatible-mode/v1"
|
||
# self.model = model
|
||
# self.text_model = text_model
|
||
self.model = "qwen2.5-vl-72b-instruct"
|
||
self.text_model = "qwen2.5-14b-instruct"
|
||
self.client = OpenAI(
|
||
api_key=self.api_key,
|
||
base_url=self.base_url
|
||
)
|
||
|
||
def encode_image(self, image):
|
||
"""
|
||
将图像转换为base64编码
|
||
"""
|
||
if isinstance(image, np.ndarray):
|
||
if len(image.shape) == 3 and image.shape[2] == 3:
|
||
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
||
pil_image = Image.fromarray(image_rgb)
|
||
else:
|
||
pil_image = Image.fromarray(image)
|
||
buffered = BytesIO()
|
||
pil_image.save(buffered, format="PNG")
|
||
img_bytes = buffered.getvalue()
|
||
elif isinstance(image, Image.Image):
|
||
buffered = BytesIO()
|
||
image.save(buffered, format="PNG")
|
||
img_bytes = buffered.getvalue()
|
||
elif isinstance(image, str) and os.path.isfile(image):
|
||
with open(image, "rb") as f:
|
||
img_bytes = f.read()
|
||
elif isinstance(image, bytes):
|
||
img_bytes = image
|
||
else:
|
||
raise ValueError("不支持的图像格式")
|
||
return base64.b64encode(img_bytes).decode('utf-8')
|
||
|
||
def analyze(self, image, prompt, extra_images=None, stream=True, system_prompt=None):
|
||
"""
|
||
分析图像
|
||
|
||
参数:
|
||
image: 图像数据
|
||
prompt: 分析提示词(用户输入)
|
||
extra_images: 额外的图像列表
|
||
stream: 是否使用流式输出
|
||
system_prompt: 系统提示(可选),设置AI的角色和分析框架
|
||
"""
|
||
base64_str = self.encode_image(image)
|
||
content = [
|
||
{
|
||
"type": "image_url",
|
||
"image_url": {
|
||
"url": f"data:image/png;base64,{base64_str}"
|
||
}
|
||
}
|
||
]
|
||
|
||
if extra_images:
|
||
for img in extra_images:
|
||
img_base64 = self.encode_image(img)
|
||
content.append({
|
||
"type": "image_url",
|
||
"image_url": {
|
||
"url": f"data:image/png;base64,{img_base64}"
|
||
}
|
||
})
|
||
|
||
content.append({
|
||
"type": "text",
|
||
"text": prompt
|
||
})
|
||
|
||
messages = []
|
||
|
||
if system_prompt:
|
||
messages.append({
|
||
"role": "system",
|
||
"content": system_prompt
|
||
})
|
||
|
||
messages.append({
|
||
"role": "user",
|
||
"content": content
|
||
})
|
||
|
||
response = self.client.chat.completions.create(
|
||
model=self.model,
|
||
messages=messages,
|
||
stream=stream
|
||
)
|
||
|
||
return response if stream else response.choices[0].message.content
|
||
|
||
def summarize(self, prompt, system_prompt=None, stream=True):
|
||
"""
|
||
分析总结(按摩前)
|
||
|
||
参数:
|
||
prompt: 用户输入的提示词
|
||
system_prompt: 系统提示(可选),设置AI的角色和对话框架
|
||
stream: 是否使用流式输出
|
||
|
||
返回:
|
||
response: 模型响应或流对象
|
||
"""
|
||
messages = []
|
||
|
||
if system_prompt:
|
||
messages.append({
|
||
"role": "system",
|
||
"content": system_prompt
|
||
})
|
||
|
||
messages.append({
|
||
"role": "user",
|
||
"content": prompt
|
||
})
|
||
|
||
response = self.client.chat.completions.create(
|
||
model=self.text_model,
|
||
messages=messages,
|
||
stream=stream
|
||
)
|
||
|
||
return response if stream else response.choices[0].message.content |