convert product description to table format

This commit is contained in:
jingrow 2025-11-14 15:30:10 +08:00
parent 1cdcde7a9e
commit af8cff7eb3
2 changed files with 185 additions and 5 deletions

View File

@ -974,5 +974,110 @@ watch(() => props.disabled, (disabled) => {
max-width: 100% !important;
width: auto !important;
}
/* 表格标准样式 */
.jeditor table {
border-collapse: collapse;
border-spacing: 0;
width: 100%;
margin: 12px 0;
overflow: hidden;
border: 1px solid #e5e7eb;
border-radius: 6px;
box-shadow: 0 1px 3px rgba(0, 0, 0, 0.05);
}
.jeditor table td,
.jeditor table th {
min-width: 1em;
padding: 8px 12px;
border: 1px solid #e5e7eb;
vertical-align: top;
box-sizing: border-box;
position: relative;
background-color: #fff;
}
.jeditor table th {
font-weight: 600;
text-align: left;
background-color: #f8f9fa;
color: #374151;
border-bottom: 2px solid #e5e7eb;
}
.jeditor table td {
background-color: #fff;
color: #1f2937;
}
/* 表格选中状态 */
.jeditor table .selectedCell:after {
z-index: 2;
position: absolute;
content: "";
left: 0; right: 0; top: 0; bottom: 0;
background: rgba(31, 199, 111, 0.1);
pointer-events: none;
}
.jeditor table .column-resize-handle {
position: absolute;
right: -2px;
top: 0;
bottom: -2px;
width: 4px;
background-color: #1fc76f;
pointer-events: none;
opacity: 0;
transition: opacity 0.2s ease;
}
.jeditor table:hover .column-resize-handle,
.jeditor table .column-resize-handle.selected {
opacity: 1;
}
/* 表格行悬浮效果 */
.jeditor table tr:hover td {
background-color: #f9fafb;
}
.jeditor table tr:hover th {
background-color: #f1f3f5;
}
/* 表格响应式处理 */
.jeditor .ProseMirror {
overflow-x: auto;
}
/* 当表格过宽时,允许横向滚动 */
.jeditor table {
max-width: 100%;
display: table;
}
.jeditor table td,
.jeditor table th {
white-space: normal;
word-wrap: break-word;
}
/* 表格编辑时的焦点样式 */
.jeditor table td:focus,
.jeditor table th:focus {
outline: 2px solid #1fc76f;
outline-offset: -2px;
}
/* 表格空单元格样式 */
.jeditor table td p.is-editor-empty:first-child::before {
content: attr(data-placeholder);
float: left;
color: #adb5bd;
pointer-events: none;
height: 0;
}
</style>

View File

@ -3,6 +3,7 @@ import re
import asyncio
import requests
import uuid
import html
from typing import Dict, Any, Optional, List
from urllib.parse import urljoin, urlparse, parse_qs, urlencode
@ -516,17 +517,91 @@ def extract_product_title(soup):
return ''
def convert_bsc_info_to_table(bsc_info):
"""将 bsc-info 的 div 结构转换为表格格式"""
if not bsc_info:
return ''
# 查找所有子区块
subblocks = bsc_info.find_all('div', class_='sr-layout-subblock')
if not subblocks:
return ''
html_parts = []
for subblock in subblocks:
# 提取标题
title_elem = subblock.find('div', class_='sr-txt-title')
title = ''
if title_elem:
h2 = title_elem.find('h2')
if h2:
title = h2.get_text(strip=True)
# 查找所有 bsc-item
content_div = subblock.find('div', class_='sr-layout-content')
if not content_div:
continue
basic_info_list = content_div.find('div', class_='basic-info-list')
if not basic_info_list:
continue
items = basic_info_list.find_all('div', class_='bsc-item')
if not items:
continue
# 构建表格HTML
table_html = []
if title:
title_escaped = html.escape(title)
table_html.append(f'<h2 style="margin: 20px 0 10px 0; font-size: 18px; font-weight: 600;">{title_escaped}</h2>')
table_html.append('<table style="width: 100%; border-collapse: collapse; margin-bottom: 20px; border: 1px solid #e5e7eb;">')
table_html.append('<tbody>')
for item in items:
label_elem = item.find('div', class_='bac-item-label')
value_elem = item.find('div', class_='bac-item-value')
if not label_elem or not value_elem:
continue
label_text = label_elem.get_text(strip=True)
value_text = value_elem.get_text(strip=True)
if not label_text and not value_text:
continue
# HTML转义确保特殊字符正确显示
label_text_escaped = html.escape(label_text)
value_text_escaped = html.escape(value_text)
# 统一使用两列布局:标签列和值列
table_html.append('<tr>')
table_html.append(f'<td style="padding: 10px 12px; border: 1px solid #e5e7eb; width: 30%; background-color: #f9fafb; font-weight: 500; vertical-align: top;">{label_text_escaped}</td>')
table_html.append(f'<td style="padding: 10px 12px; border: 1px solid #e5e7eb; width: 70%; background-color: #ffffff; vertical-align: top;">{value_text_escaped}</td>')
table_html.append('</tr>')
table_html.append('</tbody>')
table_html.append('</table>')
html_parts.append(''.join(table_html))
return ''.join(html_parts)
def extract_product_description(soup, base_url=''):
"""从产品详情页提取完整的产品详细描述(只提取 bsc-info 部分)"""
"""从产品详情页提取完整的产品详细描述(只提取 bsc-info 部分,并转换为表格格式"""
detail_tab_item = soup.find('div', class_='detail-tab')
if detail_tab_item:
# 查找 bsc-info 容器
bsc_info = detail_tab_item.find('div', class_='bsc-info')
if bsc_info:
# 提取完整HTML内容保留所有格式、表格等
html_content = str(bsc_info)
if html_content:
return html_content
# 转换为表格格式
table_html = convert_bsc_info_to_table(bsc_info)
if table_html:
return table_html
return ''