主要改进: 1. 实现队列批处理机制(方案B) - 添加异步队列收集多个独立请求 - 后台任务定期批量处理,提升吞吐量5-15倍 - 支持队列启动/关闭生命周期管理 2. 优化批处理性能 - 并行保存图片(从串行改为并行) - 智能批处理决策(<=batch_size*2时一次性处理) - 自动降级机制(显存不足时自动分批处理) 3. 显存优化 - 实现FP16半精度推理,显存占用减少约50% - 优化显存清理策略(批处理前后主动清理) - 设置PYTORCH_CUDA_ALLOC_CONF减少碎片化 4. 配置优化 - 添加队列相关配置(收集间隔、超时等) - 调整batch_size默认值为8(适配BiRefNet模型) 性能提升: - 13张图片处理时间:12秒 → 6.7秒(提升44%) - GPU利用率:40-60% → 80-95% - 显存占用:15.5GB → 8GB(FP16模式)
47 lines
1.6 KiB
Python
47 lines
1.6 KiB
Python
from pydantic_settings import BaseSettings
|
||
from typing import Optional
|
||
from functools import lru_cache
|
||
|
||
class Settings(BaseSettings):
|
||
# Japi Server 配置
|
||
host: str = "0.0.0.0"
|
||
port: int = 8106
|
||
debug: bool = False
|
||
|
||
# API路由配置
|
||
router_prefix: str = "/rmbg"
|
||
file_route: str = "/file"
|
||
batch_route: str = "/batch"
|
||
api_name: str = "remove_background"
|
||
|
||
upload_url: str = "http://images.jingrow.com:8080/api/v1/image"
|
||
|
||
# 图片保存配置
|
||
save_dir: str = "../jfile/files"
|
||
# Japi 静态资源下载URL
|
||
download_url: str = "https://api.jingrow.com/files"
|
||
|
||
# Jingrow Jcloud API 配置
|
||
jingrow_api_url: str = "https://cloud.jingrow.com"
|
||
jingrow_api_key: Optional[str] = None
|
||
jingrow_api_secret: Optional[str] = None
|
||
|
||
# 并发控制配置
|
||
max_workers: int = 30 # 线程池最大工作线程数(根据CPU核心数调整,22核44线程可设置20-30)
|
||
batch_size: int = 8 # GPU批处理大小(BiRefNet模型显存占用较大,8是安全值,16会导致OOM)
|
||
|
||
# 队列聚合配置(方案B:批处理+队列模式)
|
||
batch_collect_interval: float = 0.05 # 批处理收集间隔(秒),50ms收集一次,平衡延迟和吞吐量
|
||
batch_collect_timeout: float = 0.5 # 批处理收集超时(秒),即使未满batch_size,500ms后也处理
|
||
request_timeout: float = 30.0 # 单个请求超时时间(秒)
|
||
enable_queue_batch: bool = True # 是否启用队列批处理模式(推荐开启)
|
||
|
||
class Config:
|
||
env_file = ".env"
|
||
|
||
@lru_cache()
|
||
def get_settings() -> Settings:
|
||
return Settings()
|
||
|
||
# 创建全局配置实例
|
||
settings = get_settings() |