194 lines
6.4 KiB
Python
194 lines
6.4 KiB
Python
import json
|
||
import requests
|
||
from pathlib import Path
|
||
from urllib.parse import urlparse
|
||
from PIL import Image
|
||
import io
|
||
import asyncio
|
||
import os
|
||
from typing import Dict, Optional
|
||
from settings import settings
|
||
|
||
image_to_text_model = "Doubao"
|
||
deepseek_api_model = "deepseek-chat"
|
||
doubao_api_model = "doubao-1.5-vision-pro-250328"
|
||
chatgpt_api_model = "gpt-4o"
|
||
|
||
default_system_message = """
|
||
请用中英文分别描述该图片,使用结构化描述,描述的内容用于ai绘画,因此请优化内容,不要用这是开头,使之适合用作ai绘画prompts。
|
||
输出格式为:
|
||
{
|
||
"中文描述": "中文内容",
|
||
"英文描述": "英文内容"
|
||
}
|
||
"""
|
||
|
||
default_user_content = "请用中英文分别生成该图片的内容描述。"
|
||
|
||
class ImageDescribeService:
|
||
def __init__(self, system_message: str = None, user_content: str = None):
|
||
"""初始化图像描述服务
|
||
|
||
Args:
|
||
system_message: 自定义系统提示词
|
||
user_content: 自定义用户提示词
|
||
"""
|
||
self.system_message = system_message or default_system_message
|
||
self.user_content = user_content or default_user_content
|
||
|
||
def send_to_chatgpt(self, image_url: str) -> Optional[Dict]:
|
||
"""向ChatGPT发送图像描述请求"""
|
||
payload = {
|
||
"model": chatgpt_api_model,
|
||
"messages": [
|
||
{
|
||
"role": "system",
|
||
"content": self.system_message
|
||
},
|
||
{
|
||
"role": "user",
|
||
"content": f"{self.user_content}\n\n图片链接: {image_url}"
|
||
}
|
||
],
|
||
"temperature": 0.9,
|
||
"top_p": 0.9
|
||
}
|
||
|
||
headers = {
|
||
"Content-Type": "application/json",
|
||
"Authorization": f"Bearer {settings.chatgpt_api_key}"
|
||
}
|
||
|
||
response = requests.post(settings.chatgpt_api_url, headers=headers, json=payload)
|
||
if response.status_code != 200:
|
||
print(f"Error: {response.status_code}, {response.text}")
|
||
return None
|
||
return response.json()
|
||
|
||
def send_to_deepseek(self, image_url: str) -> Optional[Dict]:
|
||
"""向DeepSeek发送图像描述请求"""
|
||
payload = {
|
||
"model": deepseek_api_model,
|
||
"messages": [
|
||
{
|
||
"role": "system",
|
||
"content": self.system_message
|
||
},
|
||
{
|
||
"role": "user",
|
||
"content": f"{self.user_content}\n\n图片链接: {image_url}"
|
||
}
|
||
],
|
||
"temperature": 0.9,
|
||
"top_p": 0.9
|
||
}
|
||
|
||
headers = {
|
||
"Content-Type": "application/json",
|
||
"Authorization": f"Bearer {settings.deepseek_api_key}"
|
||
}
|
||
|
||
response = requests.post(settings.deepseek_api_url, headers=headers, json=payload)
|
||
if response.status_code != 200:
|
||
print(f"Error: {response.status_code}, {response.text}")
|
||
return None
|
||
return response.json()
|
||
|
||
def send_to_doubao(self, image_url: str) -> Optional[Dict]:
|
||
"""向Doubao发送图像描述请求"""
|
||
payload = {
|
||
"model": doubao_api_model,
|
||
"messages": [
|
||
{
|
||
"role": "system",
|
||
"content": self.system_message
|
||
},
|
||
{
|
||
"role": "user",
|
||
"content": [
|
||
{
|
||
"type": "text",
|
||
"text": self.user_content
|
||
},
|
||
{
|
||
"type": "image_url",
|
||
"image_url": {
|
||
"url": image_url
|
||
}
|
||
}
|
||
]
|
||
}
|
||
],
|
||
"temperature": 0.9,
|
||
"top_p": 0.9
|
||
}
|
||
|
||
headers = {
|
||
"Content-Type": "application/json",
|
||
"Authorization": f"Bearer {settings.doubao_api_key}"
|
||
}
|
||
|
||
response = requests.post(settings.doubao_api_url, headers=headers, json=payload)
|
||
if response.status_code != 200:
|
||
print(f"Error: {response.status_code}, {response.text}")
|
||
return None
|
||
return response.json()
|
||
|
||
def describe_image_sync(self, image_url: str) -> Dict:
|
||
"""同步处理图像描述请求"""
|
||
try:
|
||
# 选择合适的AI模型处理请求
|
||
if image_to_text_model == "DeepSeek":
|
||
ai_response = self.send_to_deepseek(image_url)
|
||
elif image_to_text_model == "Doubao":
|
||
ai_response = self.send_to_doubao(image_url)
|
||
else:
|
||
ai_response = self.send_to_chatgpt(image_url)
|
||
|
||
if ai_response is None:
|
||
return {
|
||
"status": "error",
|
||
"message": "AI服务请求失败"
|
||
}
|
||
|
||
choices = ai_response.get("choices", [])
|
||
if not choices:
|
||
return {
|
||
"status": "error",
|
||
"message": "AI响应无效"
|
||
}
|
||
|
||
message = choices[0].get("message", {}).get("content", "")
|
||
response_data = json.loads(message)
|
||
cn_description = response_data.get("中文描述", "")
|
||
en_description = response_data.get("英文描述", "")
|
||
|
||
return {
|
||
"status": "success",
|
||
"data": {
|
||
"cn_description": cn_description,
|
||
"en_description": en_description
|
||
}
|
||
}
|
||
|
||
except Exception as e:
|
||
print(f"描述任务处理失败: {str(e)}")
|
||
return {
|
||
"status": "error",
|
||
"message": f"处理描述任务时发生错误: {str(e)}"
|
||
}
|
||
|
||
async def describe_image(self, image_url: str) -> Dict:
|
||
"""异步处理图像描述请求"""
|
||
try:
|
||
loop = asyncio.get_event_loop()
|
||
result = await loop.run_in_executor(None, self.describe_image_sync, image_url)
|
||
return result
|
||
except Exception as e:
|
||
return {
|
||
"status": "error",
|
||
"message": f"图像描述失败: {str(e)}"
|
||
}
|
||
|
||
|