From a535c8ce73ea7e26215f18811cf2675bd1f5bf2b Mon Sep 17 00:00:00 2001 From: jingrow Date: Fri, 14 Nov 2025 15:30:10 +0800 Subject: [PATCH] convert product description to table format --- .../core/pagetype/form/controls/Jeditor.vue | 105 ++++++++++++++++++ .../web_scrapers_create.py | 96 +++++++++++++++- 2 files changed, 196 insertions(+), 5 deletions(-) diff --git a/apps/jingrow/frontend/src/core/pagetype/form/controls/Jeditor.vue b/apps/jingrow/frontend/src/core/pagetype/form/controls/Jeditor.vue index 468e33d..a215c25 100644 --- a/apps/jingrow/frontend/src/core/pagetype/form/controls/Jeditor.vue +++ b/apps/jingrow/frontend/src/core/pagetype/form/controls/Jeditor.vue @@ -974,5 +974,110 @@ watch(() => props.disabled, (disabled) => { max-width: 100% !important; width: auto !important; } + +/* 表格标准样式 */ +.jeditor table { + border-collapse: collapse; + border-spacing: 0; + width: 100%; + margin: 12px 0; + overflow: hidden; + border: 1px solid #e5e7eb; + border-radius: 6px; + box-shadow: 0 1px 3px rgba(0, 0, 0, 0.05); +} + +.jeditor table td, +.jeditor table th { + min-width: 1em; + padding: 8px 12px; + border: 1px solid #e5e7eb; + vertical-align: top; + box-sizing: border-box; + position: relative; + background-color: #fff; +} + +.jeditor table th { + font-weight: 600; + text-align: left; + background-color: #f8f9fa; + color: #374151; + border-bottom: 2px solid #e5e7eb; +} + +.jeditor table td { + background-color: #fff; + color: #1f2937; +} + +/* 表格选中状态 */ +.jeditor table .selectedCell:after { + z-index: 2; + position: absolute; + content: ""; + left: 0; right: 0; top: 0; bottom: 0; + background: rgba(31, 199, 111, 0.1); + pointer-events: none; +} + +.jeditor table .column-resize-handle { + position: absolute; + right: -2px; + top: 0; + bottom: -2px; + width: 4px; + background-color: #1fc76f; + pointer-events: none; + opacity: 0; + transition: opacity 0.2s ease; +} + +.jeditor table:hover .column-resize-handle, +.jeditor table .column-resize-handle.selected { + opacity: 1; +} + +/* 表格行悬浮效果 */ +.jeditor table tr:hover td { + background-color: #f9fafb; +} + +.jeditor table tr:hover th { + background-color: #f1f3f5; +} + +/* 表格响应式处理 */ +.jeditor .ProseMirror { + overflow-x: auto; +} + +/* 当表格过宽时,允许横向滚动 */ +.jeditor table { + max-width: 100%; + display: table; +} + +.jeditor table td, +.jeditor table th { + white-space: normal; + word-wrap: break-word; +} + +/* 表格编辑时的焦点样式 */ +.jeditor table td:focus, +.jeditor table th:focus { + outline: 2px solid #1fc76f; + outline-offset: -2px; +} + +/* 表格空单元格样式 */ +.jeditor table td p.is-editor-empty:first-child::before { + content: attr(data-placeholder); + float: left; + color: #adb5bd; + pointer-events: none; + height: 0; +} diff --git a/apps/jingrow/jingrow/ai/nodes/web_scrapers_create/web_scrapers_create.py b/apps/jingrow/jingrow/ai/nodes/web_scrapers_create/web_scrapers_create.py index 54a2aed..0e864c4 100644 --- a/apps/jingrow/jingrow/ai/nodes/web_scrapers_create/web_scrapers_create.py +++ b/apps/jingrow/jingrow/ai/nodes/web_scrapers_create/web_scrapers_create.py @@ -3,6 +3,7 @@ import re import asyncio import requests import uuid +import html from typing import Dict, Any, Optional, List from urllib.parse import urljoin, urlparse, parse_qs, urlencode @@ -516,17 +517,102 @@ def extract_product_title(soup): return '' +def convert_bsc_info_to_table(bsc_info): + """将 bsc-info 的 div 结构转换为表格格式""" + if not bsc_info: + return '' + + # 查找所有子区块 + subblocks = bsc_info.find_all('div', class_='sr-layout-subblock') + if not subblocks: + return '' + + html_parts = [] + + for subblock in subblocks: + # 提取标题 + title_elem = subblock.find('div', class_='sr-txt-title') + title = '' + if title_elem: + h2 = title_elem.find('h2') + if h2: + title = h2.get_text(strip=True) + + # 查找所有 bsc-item + content_div = subblock.find('div', class_='sr-layout-content') + if not content_div: + continue + + basic_info_list = content_div.find('div', class_='basic-info-list') + if not basic_info_list: + continue + + items = basic_info_list.find_all('div', class_='bsc-item') + if not items: + continue + + # 构建表格HTML + table_html = [] + if title: + title_escaped = html.escape(title) + table_html.append(f'

{title_escaped}

') + + table_html.append('') + table_html.append('') + + for item in items: + # 检查是否是全宽项目 + is_full = 'full' in item.get('class', []) + + label_elem = item.find('div', class_='bac-item-label') + value_elem = item.find('div', class_='bac-item-value') + + if not label_elem or not value_elem: + continue + + label_text = label_elem.get_text(strip=True) + value_text = value_elem.get_text(strip=True) + + if not label_text and not value_text: + continue + + # HTML转义,确保特殊字符正确显示 + label_text_escaped = html.escape(label_text) + value_text_escaped = html.escape(value_text) + + # 如果是全宽,使用单列布局,否则使用两列布局 + if is_full: + table_html.append('') + table_html.append(f'') + table_html.append('') + else: + # 两列布局:标签列和值列 + table_html.append('') + table_html.append(f'') + table_html.append(f'') + table_html.append('') + + table_html.append('') + table_html.append('
') + table_html.append(f'{label_text_escaped}: {value_text_escaped}') + table_html.append('
{label_text_escaped}{value_text_escaped}
') + + html_parts.append(''.join(table_html)) + + return ''.join(html_parts) + + def extract_product_description(soup, base_url=''): - """从产品详情页提取完整的产品详细描述(只提取 bsc-info 部分)""" + """从产品详情页提取完整的产品详细描述(只提取 bsc-info 部分,并转换为表格格式)""" detail_tab_item = soup.find('div', class_='detail-tab') if detail_tab_item: # 查找 bsc-info 容器 bsc_info = detail_tab_item.find('div', class_='bsc-info') if bsc_info: - # 提取完整HTML内容(保留所有格式、表格等) - html_content = str(bsc_info) - if html_content: - return html_content + # 转换为表格格式 + table_html = convert_bsc_info_to_table(bsc_info) + if table_html: + return table_html return ''