jflow/backend/api/describe_image.py
2025-09-14 20:37:11 +08:00

267 lines
9.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import json
import requests
from pathlib import Path
from urllib.parse import urlparse
from PIL import Image
import io
# 导入jflow配置和认证工具
import sys
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from config import Config
from utils.auth import get_jingrow_cloud_api_headers
# 默认的系统消息和用户内容
default_system_message = """
请用中英文分别描述该图片使用结构化描述描述的内容用于ai绘画因此请优化内容不要用这是开头使之适合用作ai绘画prompts。
输出格式为:
{
"中文描述": "中文内容",
"英文描述": "英文内容"
}
"""
default_user_content = "请用中英文分别生成该图片的内容描述。"
# 从配置中获取系统消息和用户内容,如果没有设置则使用默认值
system_message = jingrow.db.get_single_value("AI Settings", "imgtxt_system_message") or default_system_message
user_content = jingrow.db.get_single_value("AI Settings", "imgtxt_user_content") or default_user_content
# 根据配置选择模型
image_to_text_model = jingrow.db.get_single_value("AI Settings", "image_to_text_model")
# 从 AI Settings 读取配置
chatgpt_api_url = jingrow.db.get_single_value("AI Settings", "chatgpt_api_url")
chatgpt_api_key = jingrow.db.get_single_value("AI Settings", "chatgpt_api_key")
chatgpt_image_to_text_model = jingrow.db.get_single_value("AI Settings", "chatgpt_image_to_text_model")
deepseek_api_url = jingrow.db.get_single_value("AI Settings", "deepseek_api_url")
ai_settings = jingrow.get_single("AI Settings")
deepseek_api_key = ai_settings.get_password("deepseek_api_key")
deepseek_image_to_text_model = jingrow.db.get_single_value("AI Settings", "deepseek_image_to_text_model")
# 读取 Doubao 配置
doubao_api_url = jingrow.db.get_single_value("AI Settings", "doubao_api_url")
doubao_api_key = ai_settings.get_password("doubao_api_key")
doubao_image_to_text_model = jingrow.db.get_single_value("AI Settings", "doubao_image_to_text_model")
upload_url = jingrow.db.get_single_value("AI Settings", "upload_url")
# 定义请求ChatGPT的函数
def send_to_chatgpt(system_message, user_content, image_url):
# 使用更简单的 API 请求格式
payload = {
"model": chatgpt_image_to_text_model,
"messages": [
{
"role": "system",
"content": system_message
},
{
"role": "user",
"content": f"{user_content}\n\n图片链接: {image_url}"
}
],
"temperature": 0.9,
"top_p": 0.9
}
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {chatgpt_api_key}"
}
response = requests.post(chatgpt_api_url, headers=headers, json=payload)
if response.status_code != 200:
jingrow.log_error(f"Error: {response.status_code}, {response.text}", "GPT API 请求失败")
return None
return response.json()
# 定义请求DeepSeek的函数
def send_to_deepseek(system_message, user_content, image_url):
# 使用更简单的 API 请求格式
payload = {
"model": deepseek_image_to_text_model,
"messages": [
{
"role": "system",
"content": system_message
},
{
"role": "user",
"content": f"{user_content}\n\n图片链接: {image_url}"
}
],
"temperature": 0.9,
"top_p": 0.9
}
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {deepseek_api_key}"
}
response = requests.post(deepseek_api_url, headers=headers, json=payload)
if response.status_code != 200:
jingrow.log_error(f"Error: {response.status_code}, {response.text}", "DeepSeek API 请求失败")
return None
return response.json()
# 定义请求Doubao的函数
def send_to_doubao(system_message, user_content, image_url):
payload = {
"model": doubao_image_to_text_model,
"messages": [
{
"role": "system",
"content": system_message
},
{
"role": "user",
"content": [
{
"type": "text",
"text": user_content
},
{
"type": "image_url",
"image_url": {
"url": image_url
}
}
]
}
],
"temperature": 0.9,
"top_p": 0.9
}
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {doubao_api_key}"
}
response = requests.post(doubao_api_url, headers=headers, json=payload)
if response.status_code != 200:
jingrow.log_error(f"Error: {response.status_code}, {response.text}", "Doubao API 请求失败")
return None
return response.json()
# 调用Jingrow API获取图片描述
def call_jdescribe_api(image_url, system_message=None, user_content=None):
try:
# 获取API URL
api_url = f"{get_jingrow_cloud_api_url()}/jdescribe/get"
# 获取认证头信息
headers = get_jingrow_cloud_api_headers()
if not headers:
send_notification("Jingrow API 未设置,请在 AI设置 中设置 Jingrow Api Key 和 Jingrow Api Secret")
return None
request_data = {
"image_url": image_url
}
# 如果提供了自定义提示词,添加到请求数据中
if system_message:
request_data["system_message"] = system_message
if user_content:
request_data["user_content"] = user_content
resp = requests.post(api_url, json=request_data, headers=headers)
if resp.status_code != 200:
return None
try:
resp_json = resp.json()
except Exception:
return None
# 优先找data字段其次找message.data
data = resp_json.get("data") or (resp_json.get("message") or {}).get("data")
return data or None
except Exception:
return None
# 定义处理描述任务的函数
def process_describe_task(pg_name, pagetype):
try:
# 获取文档并检查是否存在
pg = jingrow.get_pg(pagetype, pg_name)
if not pg:
jingrow.log_error("找不到文档", "描述任务失败")
return False
# 收集所有参考图片
reference_images = []
for i in range(1, 5):
img = getattr(pg, f"reference_image_{i}", None)
if img:
reference_images.append(img)
if not reference_images:
jingrow.log_error("缺少参考图片", "描述任务失败")
return False
# 取第一个有效图片
image_path = reference_images[0]
if image_path.startswith(('http://', 'https://')):
full_image_url = image_path
else:
full_image_url = f"{jingrow.utils.get_url()}{image_path}"
# 更新任务状态
update_status_progress(pagetype, pg_name, "任务进行中", 20)
# 获取图片描述
cn_description, en_description = "", ""
try:
if image_to_text_model == "Jingrow":
update_progress(pg_name, 40, pagetype)
data = call_jdescribe_api(full_image_url, system_message, user_content)
if isinstance(data, dict):
cn_description = data.get("cn_description", "")
en_description = data.get("en_description", "")
update_progress(pg_name, 60, pagetype)
else:
update_progress(pg_name, 40, pagetype)
if image_to_text_model == "DeepSeek":
gpt_response = send_to_deepseek(system_message, user_content, full_image_url)
elif image_to_text_model == "Doubao":
gpt_response = send_to_doubao(system_message, user_content, full_image_url)
else:
gpt_response = send_to_chatgpt(system_message, user_content, full_image_url)
update_progress(pg_name, 60, pagetype)
if gpt_response and gpt_response.get("choices"):
try:
message = gpt_response["choices"][0].get("message", {}).get("content", "")
response_data = json.loads(message)
cn_description = response_data.get("中文描述", "")
en_description = response_data.get("英文描述", "")
except Exception as e:
jingrow.log_error(f"解析响应失败: {str(e)}", "描述失败")
return False
else:
jingrow.log_error("AI响应无效", "描述失败")
return False
if not (cn_description and en_description):
jingrow.log_error("未获取描述内容", "描述失败")
return False
# 更新文档
jingrow.db.set_value(pagetype, pg_name, "prompts_zh", cn_description)
jingrow.db.set_value(pagetype, pg_name, "prompts", en_description)
update_status_progress(pagetype, pg_name, "已描述", 100)
return True
except Exception as e:
jingrow.log_error(f"处理图片描述异常: {str(e)}", "描述失败")
# 更新任务状态为失败
update_status_progress(pagetype, pg_name, "任务未完成", 0)
return False
except Exception as e:
jingrow.log_error(f"处理任务异常: {str(e)}", "描述失败")
update_status_progress(pagetype, pg_name, "任务未完成", 0)
return False