diff --git a/apps/jingrow/jingrow/ai/nodes/web_scrapers_create/web_scrapers_create.json b/apps/jingrow/jingrow/ai/nodes/made_in_china_scraper/made_in_china_scraper.json similarity index 90% rename from apps/jingrow/jingrow/ai/nodes/web_scrapers_create/web_scrapers_create.json rename to apps/jingrow/jingrow/ai/nodes/made_in_china_scraper/made_in_china_scraper.json index 0ca886e..a7f5d05 100644 --- a/apps/jingrow/jingrow/ai/nodes/web_scrapers_create/web_scrapers_create.json +++ b/apps/jingrow/jingrow/ai/nodes/made_in_china_scraper/made_in_china_scraper.json @@ -1,10 +1,10 @@ { "metadata": { - "type": "web_scrapers_create", - "label": "网站采集并创建", + "type": "made_in_china_scraper", + "label": "中国制造网产品采集", "icon": "fa-spider", "color": "rgba(71, 180, 133, 1)", - "description": "采集网站产品信息并立即创建记录(一体化节点),支持字段映射和图片上传", + "description": "采集中国制造网产品信息并立即创建记录(一体化节点),支持字段映射和图片上传", "group": "数据", "component_type": "GenericNode" }, @@ -68,6 +68,11 @@ "title": "分类名称", "description": "要关联的分类名称(分类记录的title字段值)。如果配置了此字段,所有创建的记录都会关联到该分类;如果不配置,会尝试从页面提取分类名称" }, + "parent_category": { + "type": "string", + "title": "父分类", + "description": "创建分类时的父分类名称(分类记录的title字段值)。如果不配置,使用默认值 Products" + }, "max_pages": { "type": "integer", "title": "最大页数", @@ -105,6 +110,7 @@ "category_pagetype", "category_field", "category_name", + "parent_category", "max_pages", "default_site" ] diff --git a/apps/jingrow/jingrow/ai/nodes/web_scrapers_create/web_scrapers_create.py b/apps/jingrow/jingrow/ai/nodes/made_in_china_scraper/made_in_china_scraper.py similarity index 97% rename from apps/jingrow/jingrow/ai/nodes/web_scrapers_create/web_scrapers_create.py rename to apps/jingrow/jingrow/ai/nodes/made_in_china_scraper/made_in_china_scraper.py index 6bbbfeb..43345a7 100644 --- a/apps/jingrow/jingrow/ai/nodes/web_scrapers_create/web_scrapers_create.py +++ b/apps/jingrow/jingrow/ai/nodes/made_in_china_scraper/made_in_china_scraper.py @@ -749,11 +749,11 @@ async def upload_images(images: List[str], record_name: str, record_type: str, f return uploaded_urls -def get_or_create_category(category_name: str, category_pagetype: str, site: str = None) -> Optional[str]: +def get_or_create_category(category_name: str, category_pagetype: str, site: str = None, parent_category: str = "Products") -> Optional[str]: """查找或创建分类,返回分类记录的name,失败返回None 统一使用title字段查找和创建分类 如果提供了site,会在查找和创建时使用site字段过滤 - 创建分类时会自动设置父分类为"Products" + 创建分类时会自动设置父分类(默认值为"Products") """ if not category_name or not category_pagetype: return None @@ -772,16 +772,17 @@ def get_or_create_category(category_name: str, category_pagetype: str, site: str if site: category_data["site"] = site - # 查找父分类"Products"并设置 - parent_filters = [["title", "=", "Products"]] - if site: - parent_filters.append(["site", "=", site]) - parent_categories = jingrow.get_list(category_pagetype, filters=parent_filters, limit=1) - if parent_categories: - parent_name = parent_categories[0].get("name") - # 父分类字段名格式:parent_分类pagetype名称(小写下划线格式) - parent_field = "parent_" + category_pagetype.lower().replace(" ", "_") - category_data[parent_field] = parent_name + # 查找父分类并设置 + if parent_category: + parent_filters = [["title", "=", parent_category]] + if site: + parent_filters.append(["site", "=", site]) + parent_categories = jingrow.get_list(category_pagetype, filters=parent_filters, limit=1) + if parent_categories: + parent_name = parent_categories[0].get("name") + # 父分类字段名格式:parent_分类pagetype名称(小写下划线格式) + parent_field = "parent_" + category_pagetype.lower().replace(" ", "_") + category_data[parent_field] = parent_name created = jingrow.create_pg(category_pagetype, category_data) if created: @@ -792,7 +793,7 @@ def get_or_create_category(category_name: str, category_pagetype: str, site: str return None -def map_product_data_to_record(product_data: Dict[str, Any], field_map: List[Dict], label2field: Dict, record_type: str, default_site: str = "", category_name: str = None, category_field: str = None, category_pagetype: str = None) -> Dict[str, Any]: +def map_product_data_to_record(product_data: Dict[str, Any], field_map: List[Dict], label2field: Dict, record_type: str, default_site: str = "", category_name: str = None, category_field: str = None, category_pagetype: str = None, parent_category: str = "Products") -> Dict[str, Any]: """将产品数据映射为记录字段""" record_data = {} mapped_fields = set() @@ -871,7 +872,7 @@ def map_product_data_to_record(product_data: Dict[str, Any], field_map: List[Dic if site_value.startswith(('http://', 'https://')): site_value = None - category_record_name = get_or_create_category(category_name, category_pagetype, site_value) + category_record_name = get_or_create_category(category_name, category_pagetype, site_value, parent_category) if category_record_name: record_data[category_field] = category_record_name @@ -932,8 +933,11 @@ async def create_record_async(product_data: Dict[str, Any], config: Dict[str, An pass break + # 获取父分类配置(默认值为"Products") + parent_category = config.get("parent_category", "Products") + # 映射字段 - record_data = map_product_data_to_record(product_data, field_map, label2field, record_type, default_site, category_name, category_field, category_pagetype) + record_data = map_product_data_to_record(product_data, field_map, label2field, record_type, default_site, category_name, category_field, category_pagetype, parent_category) # 处理图片上传 image_field = None diff --git a/apps/jingrow/jingrow/ai/nodes/web_scrapers_create/pyproject.toml b/apps/jingrow/jingrow/ai/nodes/made_in_china_scraper/pyproject.toml similarity index 89% rename from apps/jingrow/jingrow/ai/nodes/web_scrapers_create/pyproject.toml rename to apps/jingrow/jingrow/ai/nodes/made_in_china_scraper/pyproject.toml index 8016de5..115bfba 100644 --- a/apps/jingrow/jingrow/ai/nodes/web_scrapers_create/pyproject.toml +++ b/apps/jingrow/jingrow/ai/nodes/made_in_china_scraper/pyproject.toml @@ -1,5 +1,5 @@ [project] -name = "web_scrapers_create" +name = "made_in_china_scraper" version = "1.0.0" requires-python = ">=3.10" dependencies = [