feat: add automatic node dependency management with uv workspace

2025-11-12 02:58:49 +08:00 · 2025-11-12 02:58:49 +08:00 · 94c2e051c1
commit 94c2e051c1
parent b6b52b34b9
5 changed files with 2838 additions and 147 deletions
--- a/apps/jingrow/jingrow/api/node_management.py
+++ b/apps/jingrow/jingrow/api/node_management.py
@ -14,6 +14,7 @@ from jingrow.utils.jingrow_api import get_record_id, create_record, update_recor
 from jingrow.utils.auth import get_jingrow_cloud_url, get_jingrow_cloud_api_headers
 from jingrow.config import Config
 from jingrow.utils.path import get_root_path, get_jingrow_root
+from jingrow.utils.node_dependencies import install_node_dependencies


 logger = logging.getLogger(__name__)
@ -580,6 +581,12 @@ def _install_single_node_directory(node_dir: str) -> Dict[str, Any]:
 		# 复制整个节点目录
 		shutil.copytree(node_dir_path, target_node_dir)
 		
+		# 安装节点依赖（如果节点有独立的 pyproject.toml）
+		dep_result = install_node_dependencies(node_type)
+		if not dep_result.get("success"):
+			# 依赖安装失败，记录警告但继续安装节点（可能依赖已存在或稍后安装）
+			logger.warning(f"节点 {node_type} 依赖安装失败: {dep_result.get('error', dep_result.get('message'))}")
+		
 		# 导入到数据库
 		# 检查是否已存在
 		exists_res = get_record_id(
--- a/apps/jingrow/jingrow/utils/node_dependencies.py
+++ b/apps/jingrow/jingrow/utils/node_dependencies.py
@ -0,0 +1,321 @@
+"""
+节点依赖管理工具
+支持节点独立管理自己的依赖，使用 uv pip install 直接安装
+"""
+import subprocess
+import logging
+import re
+from pathlib import Path
+from typing import Dict, Any, Optional, List
+from jingrow.utils.path import get_jingrow_root
+
+logger = logging.getLogger(__name__)
+
+
+def get_node_pyproject_path(node_type: str) -> Optional[Path]:
+    """获取节点的 pyproject.toml 路径"""
+    jingrow_root = get_jingrow_root()
+    node_dir = jingrow_root / "ai" / "nodes" / node_type
+    pyproject_path = node_dir / "pyproject.toml"
+    return pyproject_path if pyproject_path.exists() else None
+
+
+def has_node_dependencies(node_type: str) -> bool:
+    """检查节点是否有独立的依赖配置"""
+    return get_node_pyproject_path(node_type) is not None
+
+
+def discover_nodes_with_dependencies() -> List[str]:
+    """
+    自动发现所有有 pyproject.toml 的节点
+    
+    Returns:
+        节点类型列表
+    """
+    jingrow_root = get_jingrow_root()
+    nodes_dir = jingrow_root / "ai" / "nodes"
+    
+    if not nodes_dir.exists():
+        return []
+    
+    nodes_with_deps = []
+    for node_dir in nodes_dir.iterdir():
+        if node_dir.is_dir() and not node_dir.name.startswith('_'):
+            pyproject_path = node_dir / "pyproject.toml"
+            if pyproject_path.exists():
+                nodes_with_deps.append(node_dir.name)
+    
+    return sorted(nodes_with_deps)
+
+
+def update_workspace_members() -> Dict[str, Any]:
+    """
+    自动更新 pyproject.toml 中的 workspace members
+    自动发现所有有 pyproject.toml 的节点，更新为具体列表（避免通配符匹配到无 pyproject.toml 的目录）
+    
+    Returns:
+        更新结果字典
+    """
+    try:
+        jingrow_root = get_jingrow_root()
+        pyproject_path = jingrow_root.parent / "pyproject.toml"
+        
+        if not pyproject_path.exists():
+            return {"success": False, "error": "pyproject.toml 不存在"}
+        
+        # 发现所有有依赖的节点
+        nodes_with_deps = discover_nodes_with_dependencies()
+        
+        # 读取 pyproject.toml
+        with open(pyproject_path, 'r', encoding='utf-8') as f:
+            content = f.read()
+        
+        # 构建新的 members 列表
+        members_lines = []
+        for node_type in nodes_with_deps:
+            members_lines.append(f'    "jingrow/ai/nodes/{node_type}",')
+        
+        # 如果没有节点，使用空列表
+        if not members_lines:
+            members_lines = []
+        
+        # 替换 workspace members 部分
+        # 匹配 [tool.uv.workspace] 到下一个 [ 或文件结尾
+        pattern = r'(\[tool\.uv\.workspace\]\s+members\s*=\s*\[)(.*?)(\]\s*(?:exclude\s*=.*?)?)'
+        
+        def replace_workspace(match):
+            if members_lines:
+                return f'{match.group(1)}\n' + '\n'.join(members_lines) + f'\n{match.group(3)}'
+            else:
+                return f'{match.group(1)}\n{match.group(3)}'
+        
+        if re.search(pattern, content, re.DOTALL):
+            new_content = re.sub(pattern, replace_workspace, content, flags=re.DOTALL)
+        else:
+            # 如果没有 workspace 配置，添加一个
+            if members_lines:
+                workspace_section = f'\n\n[tool.uv.workspace]\nmembers = [\n' + '\n'.join(members_lines) + '\n]\n'
+            else:
+                workspace_section = '\n\n[tool.uv.workspace]\nmembers = []\n'
+            new_content = content.rstrip() + workspace_section
+        
+        # 只在内容有变化时才写入
+        if new_content != content:
+            with open(pyproject_path, 'w', encoding='utf-8') as f:
+                f.write(new_content)
+            return {
+                "success": True,
+                "message": f"已更新 workspace members: {', '.join(nodes_with_deps) if nodes_with_deps else '无'}"
+            }
+        else:
+            return {
+                "success": True,
+                "message": "workspace members 无需更新"
+            }
+        
+    except Exception as e:
+        logger.error(f"更新 workspace members 失败: {str(e)}")
+        return {"success": False, "error": str(e)}
+
+
+def install_node_dependencies(node_type: str, sync: bool = True) -> Dict[str, Any]:
+    """
+    安装节点的依赖
+    
+    Args:
+        node_type: 节点类型
+        sync: 是否同步安装（当前未使用，保留以兼容未来扩展）
+    
+    Returns:
+        安装结果字典
+    """
+    try:
+        jingrow_root = get_jingrow_root()
+        node_dir = jingrow_root / "ai" / "nodes" / node_type
+        
+        if not node_dir.exists():
+            return {"success": False, "error": f"节点目录不存在: {node_type}"}
+        
+        pyproject_path = node_dir / "pyproject.toml"
+        if not pyproject_path.exists():
+            # 节点没有独立依赖，返回成功（使用根依赖）
+            return {"success": True, "message": "节点使用根依赖，无需安装"}
+        
+        # 更新 workspace members（确保节点在 workspace 中）
+        update_result = update_workspace_members()
+        if not update_result.get("success"):
+            logger.warning(f"更新 workspace members 失败: {update_result.get('error')}")
+        
+        # 获取项目根目录（apps/jingrow）
+        project_root = jingrow_root.parent
+        
+        # 读取节点的 pyproject.toml 获取依赖列表，然后直接安装依赖
+        dependencies = []
+        try:
+            # 使用简单文本解析提取依赖（兼容 Python 3.10）
+            with open(pyproject_path, 'r', encoding='utf-8') as f:
+                content = f.read()
+            
+            # 提取 dependencies 列表
+            in_dependencies = False
+            bracket_count = 0
+            for line in content.split('\n'):
+                stripped = line.strip()
+                if stripped.startswith('dependencies') and '=' in stripped and '[' in stripped:
+                    in_dependencies = True
+                    bracket_count = stripped.count('[') - stripped.count(']')
+                    if ']' in stripped:
+                        in_dependencies = False
+                    continue
+                elif in_dependencies:
+                    bracket_count += stripped.count('[') - stripped.count(']')
+                    if bracket_count <= 0 and ']' in stripped:
+                        break
+                    dep = stripped.strip('",\' ,')
+                    if dep and not dep.startswith('#'):
+                        dependencies.append(dep)
+        except Exception as e:
+            logger.warning(f"解析节点 {node_type} 的 pyproject.toml 失败: {str(e)}")
+        
+        if dependencies:
+            # 直接安装依赖包
+            cmd = ["uv", "pip", "install"] + dependencies
+        else:
+            # 如果没有依赖或解析失败，尝试安装节点包本身（可编辑模式）
+            cmd = ["uv", "pip", "install", "-e", str(node_dir)]
+        
+        result = subprocess.run(
+            cmd,
+            cwd=project_root,
+            capture_output=True,
+            text=True,
+            timeout=300
+        )
+        
+        if result.returncode == 0:
+            return {
+                "success": True,
+                "message": f"节点 {node_type} 依赖安装成功",
+                "output": result.stdout
+            }
+        else:
+            error_msg = result.stderr or result.stdout
+            logger.error(f"安装节点 {node_type} 依赖失败: {error_msg}")
+            return {
+                "success": False,
+                "error": f"依赖安装失败: {error_msg}",
+                "stderr": result.stderr,
+                "stdout": result.stdout
+            }
+            
+    except subprocess.TimeoutExpired:
+        return {"success": False, "error": "依赖安装超时"}
+    except Exception as e:
+        logger.error(f"安装节点 {node_type} 依赖时发生异常: {str(e)}")
+        return {"success": False, "error": str(e)}
+
+
+def check_node_dependencies(node_type: str) -> Dict[str, Any]:
+    """
+    检查节点依赖是否已安装（简单检查，通过尝试导入主要依赖）
+    
+    Args:
+        node_type: 节点类型
+    
+    Returns:
+        检查结果字典
+    """
+    pyproject_path = get_node_pyproject_path(node_type)
+    if not pyproject_path:
+        return {"success": True, "message": "节点无独立依赖"}
+    
+    try:
+        # 读取 pyproject.toml 获取依赖列表（使用简单文本解析，避免额外依赖）
+        with open(pyproject_path, 'r', encoding='utf-8') as f:
+            content = f.read()
+        
+        # 简单提取 dependencies 列表
+        dependencies = []
+        in_dependencies = False
+        bracket_count = 0
+        for line in content.split('\n'):
+            stripped = line.strip()
+            # 检测 dependencies 数组开始
+            if stripped.startswith('dependencies') and '=' in stripped and '[' in stripped:
+                in_dependencies = True
+                bracket_count = stripped.count('[') - stripped.count(']')
+                # 检查是否在同一行结束
+                if ']' in stripped:
+                    in_dependencies = False
+                continue
+            elif in_dependencies:
+                # 计算括号
+                bracket_count += stripped.count('[') - stripped.count(']')
+                if bracket_count <= 0 and ']' in stripped:
+                    break
+                # 提取依赖字符串（去除引号和逗号）
+                dep = stripped.strip('",\' ,')
+                if dep and not dep.startswith('#'):
+                    dependencies.append(dep)
+        
+        if not dependencies:
+            return {"success": True, "message": "节点无依赖需要检查"}
+        
+        # 简单检查：尝试导入依赖包（提取包名）
+        missing = []
+        # 包名到模块名的映射（处理特殊情况）
+        package_to_module = {
+            "beautifulsoup4": "bs4",
+            "Pillow": "PIL",
+        }
+        
+        for dep in dependencies:
+            # 解析依赖字符串，提取包名（处理版本约束）
+            package_name = dep.split(">=")[0].split("==")[0].split("~=")[0].split("<")[0].strip()
+            # 处理 extras，如 "package[extra]"
+            package_name = package_name.split("[")[0]
+            
+            # 获取模块名（优先使用映射，否则转换 - 为 _）
+            module_name = package_to_module.get(package_name, package_name.replace("-", "_"))
+            
+            try:
+                __import__(module_name)
+            except ImportError:
+                missing.append(package_name)
+        
+        if missing:
+            return {
+                "success": False,
+                "missing": missing,
+                "message": f"缺少依赖: {', '.join(missing)}"
+            }
+        else:
+            return {"success": True, "message": "所有依赖已安装"}
+            
+    except Exception as e:
+        logger.warning(f"检查节点 {node_type} 依赖时发生异常: {str(e)}")
+        # 检查失败时返回 None，让安装流程处理
+        return {"success": None, "message": f"检查依赖时出错: {str(e)}"}
+
+
+def ensure_node_dependencies(node_type: str) -> Dict[str, Any]:
+    """
+    确保节点依赖已安装（检查并安装）
+    
+    Args:
+        node_type: 节点类型
+    
+    Returns:
+        结果字典
+    """
+    if not has_node_dependencies(node_type):
+        return {"success": True, "message": "节点无独立依赖"}
+    
+    # 先检查
+    check_result = check_node_dependencies(node_type)
+    if check_result.get("success") is True:
+        return {"success": True, "message": "依赖已安装"}
+    
+    # 如果检查失败或不确定，尝试安装
+    return install_node_dependencies(node_type)
+
--- a/apps/jingrow/pyproject.toml
+++ b/apps/jingrow/pyproject.toml
@ -19,3 +19,8 @@ dependencies = [
    "Pillow>=10.0.0",
    "Jinja2>=3.1.0",
 ]
+
+[tool.uv.workspace]
+members = [
+    "jingrow/ai/nodes/web_scrapers",
+]
--- a/apps/jingrow/uv.lock
+++ b/apps/jingrow/uv.lock
--- a/dev.sh
+++ b/dev.sh
@ -136,7 +136,13 @@ check_deps() {
    # 检查后端依赖（优先使用 uv）
    info "同步后端依赖 (uv)..."
    if [ -f "apps/jingrow/pyproject.toml" ]; then
-        (cd apps/jingrow && uv sync) || {
+        # 在 uv sync 之前，自动更新 workspace members（包含所有有依赖的节点）
+        info "自动更新 workspace members..."
+        (cd apps/jingrow && uv run python -c "from jingrow.utils.node_dependencies import update_workspace_members; result = update_workspace_members(); print(result.get('message', '更新完成'))" 2>&1) || {
+            warn "更新 workspace members 失败，继续执行 uv sync"
+        }
+        # 使用 --all-packages 确保安装所有 workspace members 的依赖
+        (cd apps/jingrow && uv sync --all-packages) || {
            error "uv 同步失败"
            exit 1
        }