import json import requests from pathlib import Path from urllib.parse import urlparse from PIL import Image import io import asyncio import os from typing import Dict, Optional from settings import settings image_to_text_model = "Doubao" deepseek_api_model = "deepseek-chat" doubao_api_model = "doubao-1.5-vision-pro-250328" chatgpt_api_model = "gpt-4o" default_system_message = """ 请用中英文分别描述该图片,使用结构化描述,描述的内容用于ai绘画,因此请优化内容,不要用这是开头,使之适合用作ai绘画prompts。 输出格式为: { "中文描述": "中文内容", "英文描述": "英文内容" } """ default_user_content = "请用中英文分别生成该图片的内容描述。" class ImageDescribeService: def __init__(self, system_message: str = None, user_content: str = None): """初始化图像描述服务 Args: system_message: 自定义系统提示词 user_content: 自定义用户提示词 """ self.system_message = system_message or default_system_message self.user_content = user_content or default_user_content def send_to_chatgpt(self, image_url: str) -> Optional[Dict]: """向ChatGPT发送图像描述请求""" payload = { "model": chatgpt_api_model, "messages": [ { "role": "system", "content": self.system_message }, { "role": "user", "content": f"{self.user_content}\n\n图片链接: {image_url}" } ], "temperature": 0.9, "top_p": 0.9 } headers = { "Content-Type": "application/json", "Authorization": f"Bearer {settings.chatgpt_api_key}" } response = requests.post(settings.chatgpt_api_url, headers=headers, json=payload) if response.status_code != 200: print(f"Error: {response.status_code}, {response.text}") return None return response.json() def send_to_deepseek(self, image_url: str) -> Optional[Dict]: """向DeepSeek发送图像描述请求""" payload = { "model": deepseek_api_model, "messages": [ { "role": "system", "content": self.system_message }, { "role": "user", "content": f"{self.user_content}\n\n图片链接: {image_url}" } ], "temperature": 0.9, "top_p": 0.9 } headers = { "Content-Type": "application/json", "Authorization": f"Bearer {settings.deepseek_api_key}" } response = requests.post(settings.deepseek_api_url, headers=headers, json=payload) if response.status_code != 200: print(f"Error: {response.status_code}, {response.text}") return None return response.json() def send_to_doubao(self, image_url: str) -> Optional[Dict]: """向Doubao发送图像描述请求""" payload = { "model": doubao_api_model, "messages": [ { "role": "system", "content": self.system_message }, { "role": "user", "content": [ { "type": "text", "text": self.user_content }, { "type": "image_url", "image_url": { "url": image_url } } ] } ], "temperature": 0.9, "top_p": 0.9 } headers = { "Content-Type": "application/json", "Authorization": f"Bearer {settings.doubao_api_key}" } response = requests.post(settings.doubao_api_url, headers=headers, json=payload) if response.status_code != 200: print(f"Error: {response.status_code}, {response.text}") return None return response.json() def describe_image_sync(self, image_url: str) -> Dict: """同步处理图像描述请求""" try: # 选择合适的AI模型处理请求 if image_to_text_model == "DeepSeek": ai_response = self.send_to_deepseek(image_url) elif image_to_text_model == "Doubao": ai_response = self.send_to_doubao(image_url) else: ai_response = self.send_to_chatgpt(image_url) if ai_response is None: return { "status": "error", "message": "AI服务请求失败" } choices = ai_response.get("choices", []) if not choices: return { "status": "error", "message": "AI响应无效" } message = choices[0].get("message", {}).get("content", "") response_data = json.loads(message) cn_description = response_data.get("中文描述", "") en_description = response_data.get("英文描述", "") return { "status": "success", "data": { "cn_description": cn_description, "en_description": en_description } } except Exception as e: print(f"描述任务处理失败: {str(e)}") return { "status": "error", "message": f"处理描述任务时发生错误: {str(e)}" } async def describe_image(self, image_url: str) -> Dict: """异步处理图像描述请求""" try: loop = asyncio.get_event_loop() result = await loop.run_in_executor(None, self.describe_image_sync, image_url) return result except Exception as e: return { "status": "error", "message": f"图像描述失败: {str(e)}" }