优化:Block Editor审阅模式diff功能的实现

This commit is contained in:
jingrow 2026-06-14 04:26:17 +08:00
parent 7436b6e9df
commit 5b0e92de8d
5 changed files with 249 additions and 234 deletions

View File

@ -1,7 +1,7 @@
/**
* plugins/ai-review/decorations.ts AI
*
* computeDiff DiffSegment[] ProseMirror DecorationSet
* structuredDiff DiffSegment[] ProseMirror DecorationSet
* Decoration
* - delete/replace Decoration.widget线
* - insert/replace Decoration.inline绿
@ -49,7 +49,7 @@ export function createDeleteWidgetDom(
* @param doc doc
* @param insertFrom AI
* @param modifiedText AI
* @param segments computeDiff
* @param segments structuredDiff
* @returns { decoSet, segmentRanges } DecorationSet +
*/
export function buildDecorationSet(

View File

@ -1,13 +1,8 @@
/**
* plugins/ai-review/diff.ts
* plugins/ai-review/diff.ts Diff
*
* LCS diff
* DiffSegment[] decoration 使
*
*
* -
* -
* - Replace original modified
* diff
* diff dmpDiff.tsdiff-match-patch structuredDiff.ts
*/
export type DiffType = 'equal' | 'replace' | 'insert' | 'delete'
@ -19,144 +14,3 @@ export interface DiffSegment {
/** 修改后的文本(仅 replace / insert / equal 时有值) */
modified: string
}
/**
* diff
* @param original
* @param modified
* @returns DiffSegment[]
*/
export function computeDiff(original: string, modified: string): DiffSegment[] {
if (original === modified) {
return [{ type: 'equal', original, modified }]
}
if (!original) {
return [{ type: 'insert', original: '', modified }]
}
if (!modified) {
return [{ type: 'delete', original, modified: '' }]
}
// 1. 生成 LCS 表
const oLen = original.length
const mLen = modified.length
// 优化:只保留两行,降低内存
let prevRow = new Array(mLen + 1).fill(0)
let currRow = new Array(mLen + 1).fill(0)
for (let i = 1; i <= oLen; i++) {
for (let j = 1; j <= mLen; j++) {
if (original[i - 1] === modified[j - 1]) {
currRow[j] = prevRow[j - 1] + 1
} else {
currRow[j] = Math.max(prevRow[j], currRow[j - 1])
}
}
;[prevRow, currRow] = [currRow, prevRow]
}
// 2. 回溯 LCS构建 diff
const segments: { type: DiffType; chs: string[]; oStart: number }[] = []
let i = oLen
let j = mLen
// 暂存当前段
let currentType: DiffType | null = null
let currentChs: string[] = []
let currentOStart = i
function flushSegment(type: DiffType, chs: string[], oStart: number) {
if (chs.length === 0) return
segments.unshift({ type, chs: [...chs], oStart })
chs.length = 0
}
// 注意:回溯是逆向的,我们用 unshift 来正向构建
while (i > 0 || j > 0) {
if (i > 0 && j > 0 && original[i - 1] === modified[j - 1]) {
// 相等
if (currentType !== 'equal') {
flushSegment(currentType!, currentChs, currentOStart)
currentType = 'equal'
currentChs = []
currentOStart = i - 1
}
currentChs.unshift(original[i - 1])
i--
j--
} else if (j > 0 && (i === 0 || prevRow[j] < prevRow[j - 1] + 1)) {
// 插入(在 modified 中新增的字符)
if (currentType !== 'insert') {
flushSegment(currentType!, currentChs, currentOStart)
currentType = 'insert'
currentChs = []
currentOStart = i
}
currentChs.unshift(modified[j - 1])
j--
} else if (i > 0) {
// 删除(在 original 中被移除的字符)
if (currentType !== 'delete') {
flushSegment(currentType!, currentChs, currentOStart)
currentType = 'delete'
currentChs = []
currentOStart = i - 1
}
currentChs.unshift(original[i - 1])
i--
}
}
flushSegment(currentType!, currentChs, currentOStart)
// 3. 合并相邻的 insert + delete 为 replace
const merged: { type: DiffType; oText: string; mText: string }[] = []
let pending: { oText: string; mText: string } | null = null
for (const seg of segments) {
if (seg.type === 'delete') {
if (pending) {
pending.oText = seg.chs.join('') + pending.oText
} else {
pending = { oText: seg.chs.join(''), mText: '' }
}
} else if (seg.type === 'insert') {
if (pending) {
pending.mText = pending.mText + seg.chs.join('')
} else {
pending = { oText: '', mText: seg.chs.join('') }
}
} else {
// 先 flush 待合并的段
if (pending) {
if (pending.oText && pending.mText) {
merged.push({ type: 'replace', oText: pending.oText, mText: pending.mText })
} else if (pending.oText) {
merged.push({ type: 'delete', oText: pending.oText, mText: '' })
} else if (pending.mText) {
merged.push({ type: 'insert', oText: '', mText: pending.mText })
}
pending = null
}
merged.push({ type: 'equal', oText: seg.chs.join(''), mText: seg.chs.join('') })
}
}
// 最后的 pending
if (pending) {
if (pending.oText && pending.mText) {
merged.push({ type: 'replace', oText: pending.oText, mText: pending.mText })
} else if (pending.oText) {
merged.push({ type: 'delete', oText: pending.oText, mText: '' })
} else if (pending.mText) {
merged.push({ type: 'insert', oText: '', mText: pending.mText })
}
}
// 4. 转换为 final DiffSegment[]
return merged.map(s => ({
type: s.type,
original: s.oText,
modified: s.mText,
}))
}

View File

@ -20,7 +20,7 @@ const dmp = new diff_match_patch()
*
* @param original
* @param modified
* @returns DiffSegment[] computeDiff
* @returns DiffSegment[] structuredDiff decoration 使
*/
export function computeDmpDiff(original: string, modified: string): DiffSegment[] {
if (original === modified) {

View File

@ -1,16 +1,16 @@
/**
* plugins/ai-review/structuredDiff.ts
*
* ProseMirror JSON
* LCS diff
* diff
* Layer 1 LCS 1:1
* Layer 2 replace Layer 3
* Layer 3 diff-match-patch + Myers O(ND) diff + Unicode
*
* diff
* - decoration"无关文本被灰色覆盖"
*
* - "无关文本被灰色覆盖"
* - decoration
* - diff
* - ProseMirror
*
* originalContent PM modifiedContent PM
* computeDiff
*/
import type { DiffSegment } from './diff'
@ -34,53 +34,192 @@ function extractNodeText(node: PMNode): string {
return node.content.map(extractNodeText).join('')
}
// ── 相似度阈值 ────────────────────────────────────────────────────────────────
// ── 阈值常量 ──────────────────────────────────────────────────────────────────
/** 变更显著性阈值:相似度低于此值时,整块标记为 replace 而非逐字 diff */
const SIMILARITY_THRESHOLD = 0.6
/** 长度比阈值:短文本 / 长文本低于此值时视为大幅改写,跳过逐字 diff */
const LENGTH_RATIO_THRESHOLD = 0.3
/** 微小段合并阈值:非 equal 段双向均短于此值时降级为 equal消除噪声 */
const TINY_SEGMENT_THRESHOLD = 2
/** 微小段降噪:仅标点/空白变更时降级为 equal */
const PUNCTUATION_ONLY_REGEX = /^[\p{P}\p{S}\s]+$/u
// ── 块内智能 diff ────────────────────────────────────────────────────────────
/** 微小段降噪:双向均短于此值且为实质性内容变更时仍保留 */
const TINY_CONTENT_THRESHOLD = 1
// ── Layer 1块级 LCS 对齐 ───────────────────────────────────────────────────
/** 块级对齐操作 */
interface BlockOp {
/** equal: 文本完全相同, diff: 需要块内 diff, insert: 新增块, delete: 删除块 */
op: 'equal' | 'diff' | 'insert' | 'delete'
origIdx: number
modIdx: number
}
/**
* LCS O(m*n) DP diff 2~3
* LCS
*
* 使 O(n*m) DP < 20
* equal diff
* insert delete
*/
function lcsLength(a: string, b: string): number {
const m = a.length
const n = b.length
let prev = new Array(n + 1).fill(0)
let curr = new Array(n + 1).fill(0)
for (let i = 1; i <= m; i++) {
for (let j = 1; j <= n; j++) {
if (a[i - 1] === b[j - 1]) {
curr[j] = prev[j - 1] + 1
} else {
curr[j] = Math.max(prev[j], curr[j - 1])
}
function alignBlocks(origTexts: string[], modTexts: string[]): BlockOp[] {
const n = origTexts.length
const m = modTexts.length
if (n === 0 && m === 0) return []
if (n === 0) return modTexts.map((_, j) => ({ op: 'insert' as const, origIdx: -1, modIdx: j }))
if (m === 0) return origTexts.map((_, i) => ({ op: 'delete' as const, origIdx: i, modIdx: -1 }))
// 标准 LCS DP
const dp: number[][] = Array.from({ length: n + 1 }, () => new Array(m + 1).fill(0))
for (let i = 1; i <= n; i++) {
for (let j = 1; j <= m; j++) {
dp[i][j] = origTexts[i - 1] === modTexts[j - 1]
? dp[i - 1][j - 1] + 1
: Math.max(dp[i - 1][j], dp[i][j - 1])
}
;[prev, curr] = [curr, prev]
}
return prev[n]
// 回溯构建操作序列
const ops: BlockOp[] = []
let i = n
let j = m
while (i > 0 || j > 0) {
if (i > 0 && j > 0 && origTexts[i - 1] === modTexts[j - 1]) {
ops.push({ op: 'equal', origIdx: i - 1, modIdx: j - 1 })
i--
j--
} else if (j > 0 && (i === 0 || dp[i][j - 1] >= dp[i - 1][j])) {
ops.push({ op: 'insert', origIdx: -1, modIdx: j - 1 })
j--
} else {
ops.push({ op: 'delete', origIdx: i - 1, modIdx: -1 })
i--
}
}
// 回溯是逆向的,翻转为正向
ops.reverse()
// 对 LCS 未匹配的块做二次匹配:文本不同但位置相近的块标记为 diff而非 delete+insert
return refineAlignment(ops)
}
/**
* delete + insert diff
*
* LCS
* AI LCS delete + insert
* diff smart diff
*/
function refineAlignment(ops: BlockOp[]): BlockOp[] {
const refined: BlockOp[] = []
let idx = 0
while (idx < ops.length) {
const op = ops[idx]
// 检测连续的 delete...insert 或 insert...delete 模式
if (op.op === 'delete' || op.op === 'insert') {
// 收集连续的 delete 和 insert
const deletes: BlockOp[] = []
const inserts: BlockOp[] = []
let scanIdx = idx
while (scanIdx < ops.length) {
const curr = ops[scanIdx]
if (curr.op === 'delete') {
deletes.push(curr)
scanIdx++
} else if (curr.op === 'insert') {
inserts.push(curr)
scanIdx++
} else {
break
}
}
// 将 delete 和 insert 配对为 diff
const pairCount = Math.min(deletes.length, inserts.length)
for (let k = 0; k < pairCount; k++) {
refined.push({
op: 'diff',
origIdx: deletes[k].origIdx,
modIdx: inserts[k].modIdx,
})
}
// 未配对的保持原样
for (let k = pairCount; k < deletes.length; k++) {
refined.push(deletes[k])
}
for (let k = pairCount; k < inserts.length; k++) {
refined.push(inserts[k])
}
idx = scanIdx
} else {
refined.push(op)
idx++
}
}
return refined
}
// ── Layer 2块内相似度路由 ──────────────────────────────────────────────────
/**
* O(min(m,n)) lcsLength O(mn)
*
* + /
* - < 0.3 10 100
* - > 0.4
* - -1 diff
*/
function quickSimilarity(a: string, b: string): number {
const maxLen = Math.max(a.length, b.length)
if (maxLen === 0) return 1
const minLen = Math.min(a.length, b.length)
// 长度比:短文本占长文本的比例
const lenRatio = minLen / maxLen
// 如果长度差距悬殊,直接判定为大幅改写
if (lenRatio < LENGTH_RATIO_THRESHOLD) return lenRatio
// 公共前缀长度
let prefixLen = 0
const minForPrefix = Math.min(a.length, b.length)
while (prefixLen < minForPrefix && a[prefixLen] === b[prefixLen]) {
prefixLen++
}
// 公共后缀长度(不超过前缀未覆盖的部分)
let suffixLen = 0
const maxSuffix = minForPrefix - prefixLen
while (suffixLen < maxSuffix &&
a[a.length - 1 - suffixLen] === b[b.length - 1 - suffixLen]) {
suffixLen++
}
// 公共部分占比
return (prefixLen + suffixLen) / maxLen
}
/**
* diff
* 1. replace diff
* 2. diff-match-patch diff +
* 1. < replace diff
* 2. diff-match-patch diff +
*/
function computeSmartDiff(orig: string, mod: string): DiffSegment[] {
if (orig === mod) {
return [{ type: 'equal', original: orig, modified: mod }]
}
// 计算相似度LCS长度 / max(原文长度, 改文长度)
const lcsLen = lcsLength(orig, mod)
const similarity = lcsLen / Math.max(orig.length, mod.length, 1)
const similarity = quickSimilarity(orig, mod)
// 相似度低于阈值 → 大幅改写,整块标记为 replace
if (similarity < SIMILARITY_THRESHOLD) {
if (similarity < LENGTH_RATIO_THRESHOLD) {
return [{ type: 'replace', original: orig, modified: mod }]
}
@ -89,37 +228,54 @@ function computeSmartDiff(orig: string, mod: string): DiffSegment[] {
return denoiseSegments(segs)
}
// ── Layer 3智能降噪 ────────────────────────────────────────────────────────
/**
* replace equal decoration
* AI
* equal
*
*
* 1. //
* 2. 1
* 3. 2 /
* 4.
*/
function denoiseSegments(segs: DiffSegment[]): DiffSegment[] {
return segs.map(seg => {
if (
seg.type === 'replace' &&
seg.original.length <= TINY_SEGMENT_THRESHOLD &&
seg.modified.length <= TINY_SEGMENT_THRESHOLD
) {
return { type: 'equal', original: seg.original, modified: seg.modified }
if (seg.type !== 'replace') return seg
const { original, modified } = seg
// 规则 1纯标点/符号/空白变更
if (PUNCTUATION_ONLY_REGEX.test(original) && PUNCTUATION_ONLY_REGEX.test(modified)) {
return { type: 'equal' as const, original, modified }
}
// 规则 2极短实质性替换1 字符 → 1 字符)→ 保留
if (original.length <= TINY_CONTENT_THRESHOLD && modified.length <= TINY_CONTENT_THRESHOLD) {
return seg
}
// 规则 3短文本≤2 字符)且含标点 → 降噪
if (original.length <= 2 && modified.length <= 2) {
const origHasPunct = /[^\p{L}\p{N}]/u.test(original)
const modHasPunct = /[^\p{L}\p{N}]/u.test(modified)
if (origHasPunct || modHasPunct) {
return { type: 'equal' as const, original, modified }
}
}
return seg
})
}
// ── 主入口 ───────────────────────────────────────────────────────────────────
/**
* diff
*
* @param originalContent PM JSON doc.slice().toJSON().content
* @param modifiedContent PM JSON
* @returns DiffSegment[] buildDecorationSet
*
*
* 1.
* 2. equal decoration
* 3. LCS diff
* 4. delete
* 5. insert
* 6. \n equal charOffset textBetween
*/
export function computeStructuredDiff(
originalContent: PMNode[],
@ -137,35 +293,49 @@ export function computeStructuredDiff(
if (modTexts.length === 0) {
return [{ type: 'delete', original: origTexts.join('\n'), modified: '' }]
}
const maxLen = Math.max(origTexts.length, modTexts.length)
// Layer 1块级 LCS 对齐
const ops = alignBlocks(origTexts, modTexts)
const segments: DiffSegment[] = []
let isFirstBlock = true
for (let i = 0; i < maxLen; i++) {
// 块间分隔符:\n 作为独立 equal 段
if (i > 0) {
for (const op of ops) {
// 块间分隔符:\n 作为独立 equal 段(维持 charOffset 与 textBetween 一致)
if (!isFirstBlock) {
segments.push({ type: 'equal', original: '\n', modified: '\n' })
}
isFirstBlock = false
const orig = i < origTexts.length ? origTexts[i] : ''
const mod = i < modTexts.length ? modTexts[i] : ''
if (orig === mod) {
// ── 未变更块:单段 equal不产生 decoration ──
if (orig !== '') {
segments.push({ type: 'equal', original: orig, modified: mod })
switch (op.op) {
case 'equal': {
const text = origTexts[op.origIdx]
if (text !== '') {
segments.push({ type: 'equal', original: text, modified: text })
}
break
}
case 'diff': {
const orig = origTexts[op.origIdx]
const mod = modTexts[op.modIdx]
// Layer 2 + 3块内智能 diff
const innerSegs = computeSmartDiff(orig, mod)
segments.push(...innerSegs)
break
}
case 'insert': {
const mod = modTexts[op.modIdx]
segments.push({ type: 'insert', original: '', modified: mod })
break
}
case 'delete': {
const orig = origTexts[op.origIdx]
segments.push({ type: 'delete', original: orig, modified: '' })
break
}
// 空块跳过(不输出,保留位置计数但装饰系统会忽略)
} else if (orig === '') {
// ── 新增块 ──
segments.push({ type: 'insert', original: '', modified: mod })
} else if (mod === '') {
// ── 删除块 ──
segments.push({ type: 'delete', original: orig, modified: '' })
} else {
// ── 变更块:块内智能 diff相似度阈值 + 降噪) ──
const innerSegs = computeSmartDiff(orig, mod)
segments.push(...innerSegs)
}
}

View File

@ -6282,16 +6282,7 @@ string_decoder@~1.1.1:
is-fullwidth-code-point "^3.0.0"
strip-ansi "^6.0.1"
string-width@^4.1.0, string-width@^4.2.0:
version "4.2.3"
resolved "https://registry.npmmirror.com/string-width/-/string-width-4.2.3.tgz"
integrity sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==
dependencies:
emoji-regex "^8.0.0"
is-fullwidth-code-point "^3.0.0"
strip-ansi "^6.0.1"
string-width@^4.2.3:
string-width@^4.1.0, string-width@^4.2.0, string-width@^4.2.3:
version "4.2.3"
resolved "https://registry.npmmirror.com/string-width/-/string-width-4.2.3.tgz"
integrity sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==