Following a redline through the engine
The path a draft takes from baseline-plus-current text to email-safe redline HTML — the doc section first, then the modules that do the work.
src/redline/diff.ts175 lines
Outline 14 symbols
- CHAR_DIFF_THRESHOLD const
- LOCALIZED_CHANGE_RATIO const
- WORD_PAIR_TYPO_RATIO const
- SHORT_TOKEN_CHAR_DIFF_MAX const
- mapChange function
- mapChanges function
- changedCharRatio function export
- isLocalizedChange function
- shouldCharDiffWordPair function
- refineWordPair function
- refineChangeGroup function
- refineWordDiff function
- computeDiff function export
- hasChanges function export
1import { diffChars, diffWordsWithSpace } from 'diff';
2import type { DiffPart } from './types';
3
4const CHAR_DIFF_THRESHOLD = 40;
5/**
6 * Below this ratio of changed characters, the overall text change is considered
7 * localized — use word+char refinement rather than raw word diff.
8 */
9const LOCALIZED_CHANGE_RATIO = 0.18;
10/**
11 * Within a changed word pair, below this ratio the change looks like a typo
12 * (single/double char substitution) → drill down to char-level diff.
13 * Higher than LOCALIZED_CHANGE_RATIO: a 7-char word with one transposed letter
14 * has 2/7 ≈ 28 % which should still get char-level treatment.
15 */
16const WORD_PAIR_TYPO_RATIO = 0.35;
17/** Word tokens this length or shorter always use char-level diff (short words, numbers, codes). */
18const SHORT_TOKEN_CHAR_DIFF_MAX = 5;
19
20function mapChange(part: { added?: boolean; removed?: boolean; value: string }): DiffPart {
21 if (part.added) {
22 return { op: 'insert', value: part.value };
23 }
24 if (part.removed) {
25 return { op: 'delete', value: part.value };
26 }
27 return { op: 'equal', value: part.value };
28}
29
30function mapChanges(
31 changes: Array<{ added?: boolean; removed?: boolean; value: string }>,
32): DiffPart[] {
33 return changes.map(mapChange);
34}
35
36/** Share of characters that differ — small values mean a localized edit. */
37export function changedCharRatio(oldText: string, newText: string): number {
38 const changes = diffChars(oldText, newText);
39 const changedChars = changes
40 .filter((part) => part.added || part.removed)
41 .reduce((sum, part) => sum + part.value.length, 0);
42 return changedChars / Math.max(oldText.length, newText.length, 1);
43}
44
45function isLocalizedChange(oldText: string, newText: string): boolean {
46 if (oldText.length < CHAR_DIFF_THRESHOLD && newText.length < CHAR_DIFF_THRESHOLD) {
47 return true;
48 }
49 if (newText.startsWith(oldText) || oldText.startsWith(newText)) {
50 return true;
51 }
52 return changedCharRatio(oldText, newText) <= LOCALIZED_CHANGE_RATIO;
53}
54
55/** Char-level diff within a replaced word pair when the edit looks like a typo, not a swap. */
56function shouldCharDiffWordPair(oldToken: string, newToken: string): boolean {
57 if (oldToken === newToken) {
58 return false;
59 }
60 if (oldToken.startsWith(newToken) || newToken.startsWith(oldToken)) {
61 return true;
62 }
63
64 const maxLen = Math.max(oldToken.length, newToken.length);
65 if (maxLen <= SHORT_TOKEN_CHAR_DIFF_MAX && !/\s/.test(`${oldToken}${newToken}`)) {
66 return true;
67 }
68
69 return changedCharRatio(oldToken, newToken) <= WORD_PAIR_TYPO_RATIO;
70}
71
72function refineWordPair(oldToken: string, newToken: string): DiffPart[] {
73 if (oldToken === newToken) {
74 return [{ op: 'equal', value: oldToken }];
75 }
76 if (shouldCharDiffWordPair(oldToken, newToken)) {
77 return mapChanges(diffChars(oldToken, newToken));
78 }
79 return [
80 { op: 'delete', value: oldToken },
81 { op: 'insert', value: newToken },
82 ];
83}
84
85function refineChangeGroup(removed: string[], added: string[]): DiffPart[] {
86 const parts: DiffPart[] = [];
87 const pairs = Math.max(removed.length, added.length);
88
89 for (let i = 0; i < pairs; i++) {
90 const oldToken = removed[i] ?? '';
91 const newToken = added[i] ?? '';
92 if (oldToken && newToken) {
93 parts.push(...refineWordPair(oldToken, newToken));
94 } else if (oldToken) {
95 parts.push({ op: 'delete', value: oldToken });
96 } else if (newToken) {
97 parts.push({ op: 'insert', value: newToken });
98 }
99 }
100
101 return parts;
102}
103
104/**
105 * Word-aligned diff with char-level refinement inside each changed token.
106 * Avoids LCS "stutter" (e.g. Stuart→James showing a shared "a" as unchanged).
107 */
108function refineWordDiff(oldText: string, newText: string): DiffPart[] {
109 const raw = diffWordsWithSpace(oldText, newText);
110 const parts: DiffPart[] = [];
111 let index = 0;
112
113 while (index < raw.length) {
114 const part = raw[index];
115 if (!part.removed && !part.added) {
116 parts.push({ op: 'equal', value: part.value });
117 index++;
118 continue;
119 }
120
121 const removed: string[] = [];
122 const added: string[] = [];
123 while (index < raw.length && (raw[index].removed || raw[index].added)) {
124 if (raw[index].removed) {
125 removed.push(raw[index].value);
126 }
127 if (raw[index].added) {
128 added.push(raw[index].value);
129 }
130 index++;
131 }
132
133 parts.push(...refineChangeGroup(removed, added));
134 }
135
136 return parts;
137}
138
139/**
140 * Diff plain text with granularity suited to legal prose:
141 * - word-aligned + char refinement for localized edits (typo-safe, no shared-letter stutter)
142 * - word level for broader edits (avoids whole-sentence false positives from abbreviations)
143 */
144export function computeDiff(oldText: string, newText: string): DiffPart[] {
145 if (oldText === newText) {
146 return [{ op: 'equal', value: oldText }];
147 }
148
149 if (newText.startsWith(oldText) || oldText.startsWith(newText)) {
150 return mapChanges(diffWordsWithSpace(oldText, newText));
151 }
152
153 if (
154 (oldText.length < CHAR_DIFF_THRESHOLD && newText.length < CHAR_DIFF_THRESHOLD) ||
155 isLocalizedChange(oldText, newText)
156 ) {
157 return refineWordDiff(oldText, newText);
158 }
159
160 const wordParts = mapChanges(diffWordsWithSpace(oldText, newText));
161 const deletedChars = wordParts
162 .filter((part) => part.op === 'delete')
163 .reduce((sum, part) => sum + part.value.length, 0);
164
165 if (deletedChars > 0 && deletedChars >= oldText.length * 0.45) {
166 return refineWordDiff(oldText, newText);
167 }
168
169 return wordParts;
170}
171
172export function hasChanges(parts: DiffPart[]): boolean {
173 return parts.some((part) => part.op !== 'equal');
174}
175