优化:Block Editor审阅模式diff功能的实现
This commit is contained in:
parent
7436b6e9df
commit
5b0e92de8d
@ -1,7 +1,7 @@
|
||||
/**
|
||||
* plugins/ai-review/decorations.ts — AI 审阅装饰器工厂
|
||||
*
|
||||
* 将 computeDiff 输出的 DiffSegment[] 映射为 ProseMirror DecorationSet。
|
||||
* 将 structuredDiff 输出的 DiffSegment[] 映射为 ProseMirror DecorationSet。
|
||||
* 双 Decoration 体系:
|
||||
* - delete/replace 段 → Decoration.widget(原始文本,灰色删除线)
|
||||
* - insert/replace 段 → Decoration.inline(修改文本,绿色高亮)
|
||||
@ -49,7 +49,7 @@ export function createDeleteWidgetDom(
|
||||
* @param doc 编辑器当前 doc
|
||||
* @param insertFrom AI 输出在文档中的起始位置
|
||||
* @param modifiedText 完整的 AI 输出文本
|
||||
* @param segments computeDiff 输出的差异段数组
|
||||
* @param segments structuredDiff 输出的差异段数组
|
||||
* @returns { decoSet, segmentRanges } DecorationSet + 各段文档位置
|
||||
*/
|
||||
export function buildDecorationSet(
|
||||
|
||||
@ -1,13 +1,8 @@
|
||||
/**
|
||||
* plugins/ai-review/diff.ts — 文本差异算法
|
||||
* plugins/ai-review/diff.ts — Diff 类型定义
|
||||
*
|
||||
* 实现字符级 LCS 最长公共子序列 diff 算法,专为中文优化。
|
||||
* 输出 DiffSegment[],供 decoration 工厂使用。
|
||||
*
|
||||
* 算法特点:
|
||||
* - 以字符为单位(适用于中文、英文、混排)
|
||||
* - 合并相邻的相同类型段,保持输出简洁
|
||||
* - Replace 段同时包含 original 和 modified 文本
|
||||
* 定义 diff 系统共用的类型接口。
|
||||
* 实际的 diff 计算由 dmpDiff.ts(diff-match-patch)和 structuredDiff.ts(块级对齐)完成。
|
||||
*/
|
||||
|
||||
export type DiffType = 'equal' | 'replace' | 'insert' | 'delete'
|
||||
@ -19,144 +14,3 @@ export interface DiffSegment {
|
||||
/** 修改后的文本(仅 replace / insert / equal 时有值) */
|
||||
modified: string
|
||||
}
|
||||
|
||||
/**
|
||||
* 计算两个字符串的字符级 diff。
|
||||
* @param original 原始文本
|
||||
* @param modified 修改后文本
|
||||
* @returns DiffSegment[] 按文档顺序排列的差异段
|
||||
*/
|
||||
export function computeDiff(original: string, modified: string): DiffSegment[] {
|
||||
if (original === modified) {
|
||||
return [{ type: 'equal', original, modified }]
|
||||
}
|
||||
if (!original) {
|
||||
return [{ type: 'insert', original: '', modified }]
|
||||
}
|
||||
if (!modified) {
|
||||
return [{ type: 'delete', original, modified: '' }]
|
||||
}
|
||||
|
||||
// 1. 生成 LCS 表
|
||||
const oLen = original.length
|
||||
const mLen = modified.length
|
||||
|
||||
// 优化:只保留两行,降低内存
|
||||
let prevRow = new Array(mLen + 1).fill(0)
|
||||
let currRow = new Array(mLen + 1).fill(0)
|
||||
|
||||
for (let i = 1; i <= oLen; i++) {
|
||||
for (let j = 1; j <= mLen; j++) {
|
||||
if (original[i - 1] === modified[j - 1]) {
|
||||
currRow[j] = prevRow[j - 1] + 1
|
||||
} else {
|
||||
currRow[j] = Math.max(prevRow[j], currRow[j - 1])
|
||||
}
|
||||
}
|
||||
;[prevRow, currRow] = [currRow, prevRow]
|
||||
}
|
||||
|
||||
// 2. 回溯 LCS,构建 diff
|
||||
const segments: { type: DiffType; chs: string[]; oStart: number }[] = []
|
||||
let i = oLen
|
||||
let j = mLen
|
||||
|
||||
// 暂存当前段
|
||||
let currentType: DiffType | null = null
|
||||
let currentChs: string[] = []
|
||||
let currentOStart = i
|
||||
|
||||
function flushSegment(type: DiffType, chs: string[], oStart: number) {
|
||||
if (chs.length === 0) return
|
||||
segments.unshift({ type, chs: [...chs], oStart })
|
||||
chs.length = 0
|
||||
}
|
||||
|
||||
// 注意:回溯是逆向的,我们用 unshift 来正向构建
|
||||
while (i > 0 || j > 0) {
|
||||
if (i > 0 && j > 0 && original[i - 1] === modified[j - 1]) {
|
||||
// 相等
|
||||
if (currentType !== 'equal') {
|
||||
flushSegment(currentType!, currentChs, currentOStart)
|
||||
currentType = 'equal'
|
||||
currentChs = []
|
||||
currentOStart = i - 1
|
||||
}
|
||||
currentChs.unshift(original[i - 1])
|
||||
i--
|
||||
j--
|
||||
} else if (j > 0 && (i === 0 || prevRow[j] < prevRow[j - 1] + 1)) {
|
||||
// 插入(在 modified 中新增的字符)
|
||||
if (currentType !== 'insert') {
|
||||
flushSegment(currentType!, currentChs, currentOStart)
|
||||
currentType = 'insert'
|
||||
currentChs = []
|
||||
currentOStart = i
|
||||
}
|
||||
currentChs.unshift(modified[j - 1])
|
||||
j--
|
||||
} else if (i > 0) {
|
||||
// 删除(在 original 中被移除的字符)
|
||||
if (currentType !== 'delete') {
|
||||
flushSegment(currentType!, currentChs, currentOStart)
|
||||
currentType = 'delete'
|
||||
currentChs = []
|
||||
currentOStart = i - 1
|
||||
}
|
||||
currentChs.unshift(original[i - 1])
|
||||
i--
|
||||
}
|
||||
}
|
||||
flushSegment(currentType!, currentChs, currentOStart)
|
||||
|
||||
// 3. 合并相邻的 insert + delete 为 replace
|
||||
const merged: { type: DiffType; oText: string; mText: string }[] = []
|
||||
let pending: { oText: string; mText: string } | null = null
|
||||
|
||||
for (const seg of segments) {
|
||||
if (seg.type === 'delete') {
|
||||
if (pending) {
|
||||
pending.oText = seg.chs.join('') + pending.oText
|
||||
} else {
|
||||
pending = { oText: seg.chs.join(''), mText: '' }
|
||||
}
|
||||
} else if (seg.type === 'insert') {
|
||||
if (pending) {
|
||||
pending.mText = pending.mText + seg.chs.join('')
|
||||
} else {
|
||||
pending = { oText: '', mText: seg.chs.join('') }
|
||||
}
|
||||
} else {
|
||||
// 先 flush 待合并的段
|
||||
if (pending) {
|
||||
if (pending.oText && pending.mText) {
|
||||
merged.push({ type: 'replace', oText: pending.oText, mText: pending.mText })
|
||||
} else if (pending.oText) {
|
||||
merged.push({ type: 'delete', oText: pending.oText, mText: '' })
|
||||
} else if (pending.mText) {
|
||||
merged.push({ type: 'insert', oText: '', mText: pending.mText })
|
||||
}
|
||||
pending = null
|
||||
}
|
||||
merged.push({ type: 'equal', oText: seg.chs.join(''), mText: seg.chs.join('') })
|
||||
}
|
||||
}
|
||||
|
||||
// 最后的 pending
|
||||
if (pending) {
|
||||
if (pending.oText && pending.mText) {
|
||||
merged.push({ type: 'replace', oText: pending.oText, mText: pending.mText })
|
||||
} else if (pending.oText) {
|
||||
merged.push({ type: 'delete', oText: pending.oText, mText: '' })
|
||||
} else if (pending.mText) {
|
||||
merged.push({ type: 'insert', oText: '', mText: pending.mText })
|
||||
}
|
||||
}
|
||||
|
||||
// 4. 转换为 final DiffSegment[]
|
||||
return merged.map(s => ({
|
||||
type: s.type,
|
||||
original: s.oText,
|
||||
modified: s.mText,
|
||||
}))
|
||||
}
|
||||
|
||||
@ -20,7 +20,7 @@ const dmp = new diff_match_patch()
|
||||
*
|
||||
* @param original 原始文本
|
||||
* @param modified 修改后文本
|
||||
* @returns DiffSegment[] 与 computeDiff 输出格式完全一致
|
||||
* @returns DiffSegment[] 供 structuredDiff 和 decoration 系统使用
|
||||
*/
|
||||
export function computeDmpDiff(original: string, modified: string): DiffSegment[] {
|
||||
if (original === modified) {
|
||||
|
||||
@ -1,16 +1,16 @@
|
||||
/**
|
||||
* plugins/ai-review/structuredDiff.ts — 块级结构化差异算法
|
||||
*
|
||||
* 将 ProseMirror JSON 节点数组按块对齐,跳过未变更的整块段落,
|
||||
* 仅在变更块内部执行字符级 LCS diff。
|
||||
* 三层 diff 架构:
|
||||
* Layer 1 — 块级 LCS 对齐:按文本内容匹配原始与修改的块(替代索引 1:1 对齐)
|
||||
* Layer 2 — 相似度路由:大幅改写 → 整块 replace;小幅改写 → 进入 Layer 3
|
||||
* Layer 3 — diff-match-patch + 智能降噪:Myers O(ND) diff + Unicode 感知降噪
|
||||
*
|
||||
* 相比纯字符级 diff 的优势:
|
||||
* - 未变更的整块段落不产生任何 decoration(用户不会看到"无关文本被灰色覆盖")
|
||||
* 相比索引对齐的优势:
|
||||
* - 段落增删不会导致后续块错位匹配(消除"无关文本被灰色覆盖")
|
||||
* - 未变更的整块段落不产生任何 decoration
|
||||
* - diff 不会跨块边界,避免块间误匹配
|
||||
* - 与 ProseMirror 文档模型天然对齐
|
||||
*
|
||||
* 用法:当 originalContent(原始 PM 节点数组)和 modifiedContent(修改后 PM 节点数组)
|
||||
* 同时存在时,此函数替代 computeDiff。
|
||||
*/
|
||||
|
||||
import type { DiffSegment } from './diff'
|
||||
@ -34,53 +34,192 @@ function extractNodeText(node: PMNode): string {
|
||||
return node.content.map(extractNodeText).join('')
|
||||
}
|
||||
|
||||
// ── 相似度阈值 ────────────────────────────────────────────────────────────────
|
||||
// ── 阈值常量 ──────────────────────────────────────────────────────────────────
|
||||
|
||||
/** 变更显著性阈值:相似度低于此值时,整块标记为 replace 而非逐字 diff */
|
||||
const SIMILARITY_THRESHOLD = 0.6
|
||||
/** 长度比阈值:短文本 / 长文本低于此值时视为大幅改写,跳过逐字 diff */
|
||||
const LENGTH_RATIO_THRESHOLD = 0.3
|
||||
|
||||
/** 微小段合并阈值:非 equal 段双向均短于此值时降级为 equal(消除噪声) */
|
||||
const TINY_SEGMENT_THRESHOLD = 2
|
||||
/** 微小段降噪:仅标点/空白变更时降级为 equal */
|
||||
const PUNCTUATION_ONLY_REGEX = /^[\p{P}\p{S}\s]+$/u
|
||||
|
||||
// ── 块内智能 diff ────────────────────────────────────────────────────────────
|
||||
/** 微小段降噪:双向均短于此值且为实质性内容变更时仍保留 */
|
||||
const TINY_CONTENT_THRESHOLD = 1
|
||||
|
||||
// ── Layer 1:块级 LCS 对齐 ───────────────────────────────────────────────────
|
||||
|
||||
/** 块级对齐操作 */
|
||||
interface BlockOp {
|
||||
/** equal: 文本完全相同, diff: 需要块内 diff, insert: 新增块, delete: 删除块 */
|
||||
op: 'equal' | 'diff' | 'insert' | 'delete'
|
||||
origIdx: number
|
||||
modIdx: number
|
||||
}
|
||||
|
||||
/**
|
||||
* 纯 LCS 长度计算(O(m*n),仅 DP 表不回溯,速度比完整 diff 快 2~3 倍)。
|
||||
* 块级 LCS 对齐:找到原始块与修改块的最优匹配。
|
||||
*
|
||||
* 使用 O(n*m) DP 算法(块数通常 < 20,开销极小)。
|
||||
* 文本完全相同的块标记为 equal,不同的块标记为 diff。
|
||||
* 未匹配的块标记为 insert 或 delete。
|
||||
*/
|
||||
function lcsLength(a: string, b: string): number {
|
||||
const m = a.length
|
||||
const n = b.length
|
||||
let prev = new Array(n + 1).fill(0)
|
||||
let curr = new Array(n + 1).fill(0)
|
||||
for (let i = 1; i <= m; i++) {
|
||||
for (let j = 1; j <= n; j++) {
|
||||
if (a[i - 1] === b[j - 1]) {
|
||||
curr[j] = prev[j - 1] + 1
|
||||
} else {
|
||||
curr[j] = Math.max(prev[j], curr[j - 1])
|
||||
}
|
||||
function alignBlocks(origTexts: string[], modTexts: string[]): BlockOp[] {
|
||||
const n = origTexts.length
|
||||
const m = modTexts.length
|
||||
|
||||
if (n === 0 && m === 0) return []
|
||||
if (n === 0) return modTexts.map((_, j) => ({ op: 'insert' as const, origIdx: -1, modIdx: j }))
|
||||
if (m === 0) return origTexts.map((_, i) => ({ op: 'delete' as const, origIdx: i, modIdx: -1 }))
|
||||
|
||||
// 标准 LCS DP
|
||||
const dp: number[][] = Array.from({ length: n + 1 }, () => new Array(m + 1).fill(0))
|
||||
for (let i = 1; i <= n; i++) {
|
||||
for (let j = 1; j <= m; j++) {
|
||||
dp[i][j] = origTexts[i - 1] === modTexts[j - 1]
|
||||
? dp[i - 1][j - 1] + 1
|
||||
: Math.max(dp[i - 1][j], dp[i][j - 1])
|
||||
}
|
||||
;[prev, curr] = [curr, prev]
|
||||
}
|
||||
return prev[n]
|
||||
|
||||
// 回溯构建操作序列
|
||||
const ops: BlockOp[] = []
|
||||
let i = n
|
||||
let j = m
|
||||
while (i > 0 || j > 0) {
|
||||
if (i > 0 && j > 0 && origTexts[i - 1] === modTexts[j - 1]) {
|
||||
ops.push({ op: 'equal', origIdx: i - 1, modIdx: j - 1 })
|
||||
i--
|
||||
j--
|
||||
} else if (j > 0 && (i === 0 || dp[i][j - 1] >= dp[i - 1][j])) {
|
||||
ops.push({ op: 'insert', origIdx: -1, modIdx: j - 1 })
|
||||
j--
|
||||
} else {
|
||||
ops.push({ op: 'delete', origIdx: i - 1, modIdx: -1 })
|
||||
i--
|
||||
}
|
||||
}
|
||||
|
||||
// 回溯是逆向的,翻转为正向
|
||||
ops.reverse()
|
||||
|
||||
// 对 LCS 未匹配的块做二次匹配:文本不同但位置相近的块标记为 diff(而非 delete+insert)
|
||||
return refineAlignment(ops)
|
||||
}
|
||||
|
||||
/**
|
||||
* 二次精化:将相邻的 delete + insert 对重新配对为 diff。
|
||||
*
|
||||
* LCS 只匹配文本完全相同的块。对于文本不同但语义相关的块
|
||||
* (如 AI 改写了某段话),LCS 会产生 delete + insert 对。
|
||||
* 此步骤将它们重新配对为 diff,让块内 smart diff 做更细粒度的对比。
|
||||
*/
|
||||
function refineAlignment(ops: BlockOp[]): BlockOp[] {
|
||||
const refined: BlockOp[] = []
|
||||
let idx = 0
|
||||
|
||||
while (idx < ops.length) {
|
||||
const op = ops[idx]
|
||||
|
||||
// 检测连续的 delete...insert 或 insert...delete 模式
|
||||
if (op.op === 'delete' || op.op === 'insert') {
|
||||
// 收集连续的 delete 和 insert
|
||||
const deletes: BlockOp[] = []
|
||||
const inserts: BlockOp[] = []
|
||||
let scanIdx = idx
|
||||
|
||||
while (scanIdx < ops.length) {
|
||||
const curr = ops[scanIdx]
|
||||
if (curr.op === 'delete') {
|
||||
deletes.push(curr)
|
||||
scanIdx++
|
||||
} else if (curr.op === 'insert') {
|
||||
inserts.push(curr)
|
||||
scanIdx++
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// 将 delete 和 insert 配对为 diff
|
||||
const pairCount = Math.min(deletes.length, inserts.length)
|
||||
for (let k = 0; k < pairCount; k++) {
|
||||
refined.push({
|
||||
op: 'diff',
|
||||
origIdx: deletes[k].origIdx,
|
||||
modIdx: inserts[k].modIdx,
|
||||
})
|
||||
}
|
||||
// 未配对的保持原样
|
||||
for (let k = pairCount; k < deletes.length; k++) {
|
||||
refined.push(deletes[k])
|
||||
}
|
||||
for (let k = pairCount; k < inserts.length; k++) {
|
||||
refined.push(inserts[k])
|
||||
}
|
||||
|
||||
idx = scanIdx
|
||||
} else {
|
||||
refined.push(op)
|
||||
idx++
|
||||
}
|
||||
}
|
||||
|
||||
return refined
|
||||
}
|
||||
|
||||
// ── Layer 2:块内相似度路由 ──────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* 快速相似度判断(O(min(m,n)),替代 lcsLength 的 O(mn))。
|
||||
*
|
||||
* 策略:基于长度比 + 公共前缀/后缀占比判断。
|
||||
* - 长度比 < 0.3 → 大幅改写(如 10 字变 100 字)
|
||||
* - 公共前后缀占比 > 0.4 → 小幅改写(大部分文本保留)
|
||||
* - 否则 → 需要精确判断,返回 -1(交给调用方决定是否 diff)
|
||||
*/
|
||||
function quickSimilarity(a: string, b: string): number {
|
||||
const maxLen = Math.max(a.length, b.length)
|
||||
if (maxLen === 0) return 1
|
||||
const minLen = Math.min(a.length, b.length)
|
||||
|
||||
// 长度比:短文本占长文本的比例
|
||||
const lenRatio = minLen / maxLen
|
||||
|
||||
// 如果长度差距悬殊,直接判定为大幅改写
|
||||
if (lenRatio < LENGTH_RATIO_THRESHOLD) return lenRatio
|
||||
|
||||
// 公共前缀长度
|
||||
let prefixLen = 0
|
||||
const minForPrefix = Math.min(a.length, b.length)
|
||||
while (prefixLen < minForPrefix && a[prefixLen] === b[prefixLen]) {
|
||||
prefixLen++
|
||||
}
|
||||
|
||||
// 公共后缀长度(不超过前缀未覆盖的部分)
|
||||
let suffixLen = 0
|
||||
const maxSuffix = minForPrefix - prefixLen
|
||||
while (suffixLen < maxSuffix &&
|
||||
a[a.length - 1 - suffixLen] === b[b.length - 1 - suffixLen]) {
|
||||
suffixLen++
|
||||
}
|
||||
|
||||
// 公共部分占比
|
||||
return (prefixLen + suffixLen) / maxLen
|
||||
}
|
||||
|
||||
/**
|
||||
* 块内智能 diff:
|
||||
* 1. 相似度低于阈值 → 整块 replace(跳过逐字 diff,避免碎片)
|
||||
* 2. 相似度高于阈值 → diff-match-patch diff + 微小段降噪
|
||||
* 1. 快速相似度 < 阈值 → 整块 replace(跳过逐字 diff,避免碎片)
|
||||
* 2. 快速相似度 ≥ 阈值 → diff-match-patch diff + 智能降噪
|
||||
*/
|
||||
function computeSmartDiff(orig: string, mod: string): DiffSegment[] {
|
||||
if (orig === mod) {
|
||||
return [{ type: 'equal', original: orig, modified: mod }]
|
||||
}
|
||||
|
||||
// 计算相似度:LCS长度 / max(原文长度, 改文长度)
|
||||
const lcsLen = lcsLength(orig, mod)
|
||||
const similarity = lcsLen / Math.max(orig.length, mod.length, 1)
|
||||
const similarity = quickSimilarity(orig, mod)
|
||||
|
||||
// 相似度低于阈值 → 大幅改写,整块标记为 replace
|
||||
if (similarity < SIMILARITY_THRESHOLD) {
|
||||
if (similarity < LENGTH_RATIO_THRESHOLD) {
|
||||
return [{ type: 'replace', original: orig, modified: mod }]
|
||||
}
|
||||
|
||||
@ -89,37 +228,54 @@ function computeSmartDiff(orig: string, mod: string): DiffSegment[] {
|
||||
return denoiseSegments(segs)
|
||||
}
|
||||
|
||||
// ── Layer 3:智能降噪 ────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* 后处理降噪:将双向均短于阈值的 replace 段降级为 equal(不产生 decoration)。
|
||||
* 典型场景:AI 更改标点符号(。→!)、单字替换,这些细微差异不值得独立突出。
|
||||
* 智能降噪:识别无意义的微小变更并降级为 equal。
|
||||
*
|
||||
* 降噪规则(按优先级):
|
||||
* 1. 纯标点/符号/空白变更(如 。→!、,→;)→ 降噪
|
||||
* 2. 双向均 ≤ 1 字符的实质性替换(如 不→很)→ 保留(有语义意义)
|
||||
* 3. 双向均 ≤ 2 字符且均为标点/符号 → 降噪
|
||||
* 4. 其他 → 保留
|
||||
*/
|
||||
function denoiseSegments(segs: DiffSegment[]): DiffSegment[] {
|
||||
return segs.map(seg => {
|
||||
if (
|
||||
seg.type === 'replace' &&
|
||||
seg.original.length <= TINY_SEGMENT_THRESHOLD &&
|
||||
seg.modified.length <= TINY_SEGMENT_THRESHOLD
|
||||
) {
|
||||
return { type: 'equal', original: seg.original, modified: seg.modified }
|
||||
if (seg.type !== 'replace') return seg
|
||||
|
||||
const { original, modified } = seg
|
||||
|
||||
// 规则 1:纯标点/符号/空白变更
|
||||
if (PUNCTUATION_ONLY_REGEX.test(original) && PUNCTUATION_ONLY_REGEX.test(modified)) {
|
||||
return { type: 'equal' as const, original, modified }
|
||||
}
|
||||
|
||||
// 规则 2:极短实质性替换(1 字符 → 1 字符)→ 保留
|
||||
if (original.length <= TINY_CONTENT_THRESHOLD && modified.length <= TINY_CONTENT_THRESHOLD) {
|
||||
return seg
|
||||
}
|
||||
|
||||
// 规则 3:短文本(≤2 字符)且含标点 → 降噪
|
||||
if (original.length <= 2 && modified.length <= 2) {
|
||||
const origHasPunct = /[^\p{L}\p{N}]/u.test(original)
|
||||
const modHasPunct = /[^\p{L}\p{N}]/u.test(modified)
|
||||
if (origHasPunct || modHasPunct) {
|
||||
return { type: 'equal' as const, original, modified }
|
||||
}
|
||||
}
|
||||
|
||||
return seg
|
||||
})
|
||||
}
|
||||
|
||||
// ── 主入口 ───────────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* 块级结构化 diff。
|
||||
*
|
||||
* @param originalContent 原始内容的 PM JSON 节点数组(doc.slice().toJSON().content)
|
||||
* @param modifiedContent 修改后内容的 PM JSON 节点数组
|
||||
* @returns DiffSegment[] 可直接输入 buildDecorationSet
|
||||
*
|
||||
* 算法:
|
||||
* 1. 按索引对齐原始与修改的块
|
||||
* 2. 文本完全相同的块 → 单个 equal 段(跳过 decoration)
|
||||
* 3. 文本不同的块 → 在块范围内执行字符级 LCS diff
|
||||
* 4. 仅出现在原始中的块 → delete 段
|
||||
* 5. 仅出现在修改中的块 → insert 段
|
||||
* 6. 块间 \n 作为独立 equal 段插入,维持 charOffset 与 textBetween 一致
|
||||
*/
|
||||
export function computeStructuredDiff(
|
||||
originalContent: PMNode[],
|
||||
@ -137,35 +293,49 @@ export function computeStructuredDiff(
|
||||
if (modTexts.length === 0) {
|
||||
return [{ type: 'delete', original: origTexts.join('\n'), modified: '' }]
|
||||
}
|
||||
const maxLen = Math.max(origTexts.length, modTexts.length)
|
||||
|
||||
// Layer 1:块级 LCS 对齐
|
||||
const ops = alignBlocks(origTexts, modTexts)
|
||||
|
||||
const segments: DiffSegment[] = []
|
||||
let isFirstBlock = true
|
||||
|
||||
for (let i = 0; i < maxLen; i++) {
|
||||
// 块间分隔符:\n 作为独立 equal 段
|
||||
if (i > 0) {
|
||||
for (const op of ops) {
|
||||
// 块间分隔符:\n 作为独立 equal 段(维持 charOffset 与 textBetween 一致)
|
||||
if (!isFirstBlock) {
|
||||
segments.push({ type: 'equal', original: '\n', modified: '\n' })
|
||||
}
|
||||
isFirstBlock = false
|
||||
|
||||
const orig = i < origTexts.length ? origTexts[i] : ''
|
||||
const mod = i < modTexts.length ? modTexts[i] : ''
|
||||
|
||||
if (orig === mod) {
|
||||
// ── 未变更块:单段 equal,不产生 decoration ──
|
||||
if (orig !== '') {
|
||||
segments.push({ type: 'equal', original: orig, modified: mod })
|
||||
switch (op.op) {
|
||||
case 'equal': {
|
||||
const text = origTexts[op.origIdx]
|
||||
if (text !== '') {
|
||||
segments.push({ type: 'equal', original: text, modified: text })
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
case 'diff': {
|
||||
const orig = origTexts[op.origIdx]
|
||||
const mod = modTexts[op.modIdx]
|
||||
// Layer 2 + 3:块内智能 diff
|
||||
const innerSegs = computeSmartDiff(orig, mod)
|
||||
segments.push(...innerSegs)
|
||||
break
|
||||
}
|
||||
|
||||
case 'insert': {
|
||||
const mod = modTexts[op.modIdx]
|
||||
segments.push({ type: 'insert', original: '', modified: mod })
|
||||
break
|
||||
}
|
||||
|
||||
case 'delete': {
|
||||
const orig = origTexts[op.origIdx]
|
||||
segments.push({ type: 'delete', original: orig, modified: '' })
|
||||
break
|
||||
}
|
||||
// 空块跳过(不输出,保留位置计数但装饰系统会忽略)
|
||||
} else if (orig === '') {
|
||||
// ── 新增块 ──
|
||||
segments.push({ type: 'insert', original: '', modified: mod })
|
||||
} else if (mod === '') {
|
||||
// ── 删除块 ──
|
||||
segments.push({ type: 'delete', original: orig, modified: '' })
|
||||
} else {
|
||||
// ── 变更块:块内智能 diff(相似度阈值 + 降噪) ──
|
||||
const innerSegs = computeSmartDiff(orig, mod)
|
||||
segments.push(...innerSegs)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -6282,16 +6282,7 @@ string_decoder@~1.1.1:
|
||||
is-fullwidth-code-point "^3.0.0"
|
||||
strip-ansi "^6.0.1"
|
||||
|
||||
string-width@^4.1.0, string-width@^4.2.0:
|
||||
version "4.2.3"
|
||||
resolved "https://registry.npmmirror.com/string-width/-/string-width-4.2.3.tgz"
|
||||
integrity sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==
|
||||
dependencies:
|
||||
emoji-regex "^8.0.0"
|
||||
is-fullwidth-code-point "^3.0.0"
|
||||
strip-ansi "^6.0.1"
|
||||
|
||||
string-width@^4.2.3:
|
||||
string-width@^4.1.0, string-width@^4.2.0, string-width@^4.2.3:
|
||||
version "4.2.3"
|
||||
resolved "https://registry.npmmirror.com/string-width/-/string-width-4.2.3.tgz"
|
||||
integrity sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user