Skip to main content

vtcode_commons/
diff.rs

1//! Diff utilities for generating structured diffs.
2
3use hashbrown::HashMap;
4use serde::Serialize;
5use std::cmp::min;
6
7/// Represents a chunk of text in a diff (Equal, Delete, or Insert).
8#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9pub enum Chunk<'a> {
10    Equal(&'a str),
11    Delete(&'a str),
12    Insert(&'a str),
13}
14
15/// Compute an optimal diff between two strings using Myers algorithm.
16pub fn compute_diff_chunks<'a>(old: &'a str, new: &'a str) -> Vec<Chunk<'a>> {
17    if old.is_empty() && new.is_empty() {
18        return Vec::with_capacity(0);
19    }
20    if old.is_empty() {
21        return vec![Chunk::Insert(new)];
22    }
23    if new.is_empty() {
24        return vec![Chunk::Delete(old)];
25    }
26
27    // Strip common prefix and suffix first (optimization)
28    let mut prefix_len = 0;
29    for (o, n) in old.chars().zip(new.chars()) {
30        if o == n {
31            prefix_len += o.len_utf8();
32        } else {
33            break;
34        }
35    }
36
37    let mut suffix_len = 0;
38    let old_rest = &old[prefix_len..];
39    let new_rest = &new[prefix_len..];
40
41    let old_chars: Vec<char> = old_rest.chars().collect();
42    let new_chars: Vec<char> = new_rest.chars().collect();
43
44    for (o, n) in old_chars.iter().rev().zip(new_chars.iter().rev()) {
45        if o == n {
46            suffix_len += o.len_utf8();
47        } else {
48            break;
49        }
50    }
51
52    let old_middle_end = old_rest.len() - suffix_len;
53    let new_middle_end = new_rest.len() - suffix_len;
54
55    let old_middle = &old_rest[..old_middle_end];
56    let new_middle = &new_rest[..new_middle_end];
57
58    let mut result = Vec::with_capacity(old_middle.len() + new_middle.len());
59
60    // Add common prefix
61    if prefix_len > 0 {
62        result.push(Chunk::Equal(&old[..prefix_len]));
63    }
64
65    // Compute optimal diff for the middle section
66    if !old_middle.is_empty() || !new_middle.is_empty() {
67        let old_chars: Vec<char> = old_middle.chars().collect();
68        let new_chars: Vec<char> = new_middle.chars().collect();
69        let old_byte_starts: Vec<usize> = old_middle.char_indices().map(|(idx, _)| idx).collect();
70        let new_byte_starts: Vec<usize> = new_middle.char_indices().map(|(idx, _)| idx).collect();
71        let edits = myers_diff(&old_chars, &new_chars);
72
73        let mut old_pos = 0;
74        let mut new_pos = 0;
75
76        for edit in edits {
77            match edit {
78                Edit::Equal => {
79                    old_pos += 1;
80                    new_pos += 1;
81                }
82                Edit::Delete => {
83                    let Some(ch) = old_chars.get(old_pos).copied() else {
84                        break;
85                    };
86                    let Some(byte_start) = old_byte_starts.get(old_pos).copied() else {
87                        break;
88                    };
89                    let byte_end = byte_start + ch.len_utf8();
90                    result.push(Chunk::Delete(&old_middle[byte_start..byte_end]));
91                    old_pos += 1;
92                }
93                Edit::Insert => {
94                    let Some(ch) = new_chars.get(new_pos).copied() else {
95                        break;
96                    };
97                    let Some(byte_start) = new_byte_starts.get(new_pos).copied() else {
98                        break;
99                    };
100                    let byte_end = byte_start + ch.len_utf8();
101                    result.push(Chunk::Insert(&new_middle[byte_start..byte_end]));
102                    new_pos += 1;
103                }
104            }
105        }
106    }
107
108    // Add common suffix
109    if suffix_len > 0 {
110        result.push(Chunk::Equal(&old[old.len() - suffix_len..]));
111    }
112
113    result
114}
115
116#[derive(Debug, Clone, Copy, PartialEq, Eq)]
117enum Edit {
118    Equal,
119    Delete,
120    Insert,
121}
122
123fn myers_diff(old: &[char], new: &[char]) -> Vec<Edit> {
124    let n = old.len();
125    let m = new.len();
126
127    if n == 0 {
128        return vec![Edit::Insert; m];
129    }
130    if m == 0 {
131        return vec![Edit::Delete; n];
132    }
133
134    let max_d = n + m;
135    let max_d_i32 = max_d as i32;
136    let mut v = vec![0; 2 * max_d + 1];
137    let mut v_index = vec![0usize; (max_d + 1) * (2 * max_d + 1)];
138    let row_len = 2 * max_d + 1;
139
140    v[max_d] = 0;
141
142    for d in 0..=max_d {
143        let d_i32 = d as i32;
144        let row_start = d * row_len;
145        for k in (-d_i32..=d_i32).step_by(2) {
146            let k_idx = (k + max_d_i32) as usize;
147
148            let x = if k == -d_i32 || (k != d_i32 && v[k_idx - 1] < v[k_idx + 1]) {
149                v[k_idx + 1]
150            } else {
151                v[k_idx - 1] + 1
152            };
153
154            let mut x = x;
155            let mut y = (x as i32 - k) as usize;
156
157            while x < n && y < m && old[x] == new[y] {
158                x += 1;
159                y += 1;
160            }
161
162            v[k_idx] = x;
163            v_index[row_start + k_idx] = x;
164
165            if x >= n && y >= m {
166                return backtrack_myers(old, new, &v_index, d, k, max_d);
167            }
168        }
169    }
170
171    vec![]
172}
173
174fn backtrack_myers(
175    old: &[char],
176    new: &[char],
177    v_index: &[usize],
178    d: usize,
179    mut k: i32,
180    max_d: usize,
181) -> Vec<Edit> {
182    let mut edits = Vec::with_capacity(old.len() + new.len());
183    let mut x = old.len();
184    let mut y = new.len();
185    let max_d_i32 = max_d as i32;
186    let row_len = 2 * max_d + 1;
187
188    for cur_d in (0..=d).rev() {
189        if cur_d == 0 {
190            while x > 0 && y > 0 {
191                edits.push(Edit::Equal);
192                x -= 1;
193                y -= 1;
194            }
195            break;
196        }
197
198        let k_idx = (k + max_d_i32) as usize;
199        let prev_row_start = (cur_d - 1) * row_len;
200
201        let prev_k = if k == -(cur_d as i32)
202            || (k != cur_d as i32
203                && v_index[prev_row_start + k_idx - 1] < v_index[prev_row_start + k_idx + 1])
204        {
205            k + 1
206        } else {
207            k - 1
208        };
209
210        let prev_k_idx = (prev_k + max_d_i32) as usize;
211        let prev_x_val = v_index[prev_row_start + prev_k_idx];
212        let prev_y = (prev_x_val as i32 - prev_k) as usize;
213
214        let (move_x, move_y) = if prev_k == k + 1 {
215            (prev_x_val, prev_y + 1)
216        } else {
217            (prev_x_val + 1, prev_y)
218        };
219
220        while x > move_x && y > move_y {
221            edits.push(Edit::Equal);
222            x -= 1;
223            y -= 1;
224        }
225
226        if prev_k == k + 1 {
227            edits.push(Edit::Insert);
228            y -= 1;
229        } else {
230            edits.push(Edit::Delete);
231            x -= 1;
232        }
233
234        k = prev_k;
235    }
236
237    edits.reverse();
238    edits
239}
240
241/// Options for diff generation.
242#[derive(Debug, Clone)]
243pub struct DiffOptions<'a> {
244    pub context_lines: usize,
245    pub old_label: Option<&'a str>,
246    pub new_label: Option<&'a str>,
247    pub missing_newline_hint: bool,
248}
249
250impl Default for DiffOptions<'_> {
251    fn default() -> Self {
252        Self {
253            context_lines: 3,
254            old_label: None,
255            new_label: None,
256            missing_newline_hint: true,
257        }
258    }
259}
260
261/// A diff rendered with both structured hunks and formatted text.
262#[derive(Debug, Clone, Serialize)]
263pub struct DiffBundle {
264    pub hunks: Vec<DiffHunk>,
265    pub formatted: String,
266    pub is_empty: bool,
267}
268
269/// A diff hunk with metadata for old/new ranges.
270#[derive(Debug, Clone, Serialize)]
271pub struct DiffHunk {
272    pub old_start: usize,
273    pub old_lines: usize,
274    pub new_start: usize,
275    pub new_lines: usize,
276    pub lines: Vec<DiffLine>,
277}
278
279/// A single diff line annotated with metadata and type.
280#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
281#[serde(rename_all = "snake_case")]
282pub enum DiffLineKind {
283    Context,
284    Addition,
285    Deletion,
286}
287
288/// Metadata for a single line inside a diff hunk.
289#[derive(Debug, Clone, Serialize)]
290pub struct DiffLine {
291    pub kind: DiffLineKind,
292    pub old_line: Option<usize>,
293    pub new_line: Option<usize>,
294    pub text: String,
295}
296
297/// Compute a structured diff bundle.
298pub fn compute_diff<F>(old: &str, new: &str, options: DiffOptions<'_>, formatter: F) -> DiffBundle
299where
300    F: FnOnce(&[DiffHunk], &DiffOptions<'_>) -> String,
301{
302    let old_lines_owned = split_lines_with_terminator(old);
303    let new_lines_owned = split_lines_with_terminator(new);
304
305    let old_refs: Vec<&str> = old_lines_owned.iter().map(|s| s.as_str()).collect();
306    let new_refs: Vec<&str> = new_lines_owned.iter().map(|s| s.as_str()).collect();
307
308    let records = collect_line_records(&old_refs, &new_refs);
309    let has_changes = records
310        .iter()
311        .any(|record| matches!(record.kind, DiffLineKind::Addition | DiffLineKind::Deletion));
312
313    let hunks = if has_changes {
314        build_hunks(&records, options.context_lines)
315    } else {
316        Vec::new()
317    };
318
319    let formatted = if hunks.is_empty() {
320        String::new()
321    } else {
322        formatter(&hunks, &options)
323    };
324
325    DiffBundle {
326        hunks,
327        formatted,
328        is_empty: !has_changes,
329    }
330}
331
332fn split_lines_with_terminator(text: &str) -> Vec<String> {
333    if text.is_empty() {
334        return Vec::with_capacity(0);
335    }
336
337    let mut lines: Vec<String> = text
338        .split_inclusive('\n')
339        .map(|line| line.to_string())
340        .collect();
341
342    if lines.is_empty() {
343        lines.push(text.to_string());
344    }
345
346    lines
347}
348
349fn collect_line_records<'a>(
350    old_lines: &'a [&'a str],
351    new_lines: &'a [&'a str],
352) -> Vec<LineRecord<'a>> {
353    let (old_encoded, new_encoded) = encode_line_sequences(old_lines, new_lines);
354    let mut records = Vec::with_capacity(old_lines.len() + new_lines.len());
355    let mut old_index = 0usize;
356    let mut new_index = 0usize;
357
358    for chunk in compute_diff_chunks(old_encoded.as_str(), new_encoded.as_str()) {
359        match chunk {
360            Chunk::Equal(text) => {
361                for _ in text.chars() {
362                    let old_line = old_index + 1;
363                    let new_line = new_index + 1;
364                    let line = old_lines[old_index];
365                    records.push(LineRecord {
366                        kind: DiffLineKind::Context,
367                        old_line: Some(old_line),
368                        new_line: Some(new_line),
369                        text: line,
370                        anchor_old: old_line,
371                        anchor_new: new_line,
372                    });
373                    old_index += 1;
374                    new_index += 1;
375                }
376            }
377            Chunk::Delete(text) => {
378                for _ in text.chars() {
379                    let old_line = old_index + 1;
380                    let anchor_new = new_index + 1;
381                    let line = old_lines[old_index];
382                    records.push(LineRecord {
383                        kind: DiffLineKind::Deletion,
384                        old_line: Some(old_line),
385                        new_line: None,
386                        text: line,
387                        anchor_old: old_line,
388                        anchor_new,
389                    });
390                    old_index += 1;
391                }
392            }
393            Chunk::Insert(text) => {
394                for _ in text.chars() {
395                    let new_line = new_index + 1;
396                    let anchor_old = old_index + 1;
397                    let line = new_lines[new_index];
398                    records.push(LineRecord {
399                        kind: DiffLineKind::Addition,
400                        old_line: None,
401                        new_line: Some(new_line),
402                        text: line,
403                        anchor_old,
404                        anchor_new: new_line,
405                    });
406                    new_index += 1;
407                }
408            }
409        }
410    }
411
412    records
413}
414
415fn encode_line_sequences<'a>(
416    old_lines: &'a [&'a str],
417    new_lines: &'a [&'a str],
418) -> (String, String) {
419    let mut token_map: HashMap<&'a str, char> = HashMap::new();
420    let mut next_codepoint: u32 = 0;
421
422    let old_encoded = encode_line_list(old_lines, &mut token_map, &mut next_codepoint);
423    let new_encoded = encode_line_list(new_lines, &mut token_map, &mut next_codepoint);
424
425    (old_encoded, new_encoded)
426}
427
428fn encode_line_list<'a>(
429    lines: &'a [&'a str],
430    map: &mut HashMap<&'a str, char>,
431    next_codepoint: &mut u32,
432) -> String {
433    let mut encoded = String::with_capacity(lines.len());
434    for &line in lines {
435        let token = if let Some(&value) = map.get(line) {
436            value
437        } else {
438            let Some(ch) = next_token_char(next_codepoint) else {
439                break;
440            };
441            map.insert(line, ch);
442            ch
443        };
444        encoded.push(token);
445    }
446    encoded
447}
448
449fn next_token_char(counter: &mut u32) -> Option<char> {
450    while *counter <= 0x10FFFF {
451        let candidate = *counter;
452        *counter += 1;
453        if (0xD800..=0xDFFF).contains(&candidate) {
454            continue;
455        }
456        if let Some(ch) = char::from_u32(candidate) {
457            return Some(ch);
458        }
459    }
460    None
461}
462
463#[derive(Debug)]
464struct LineRecord<'a> {
465    kind: DiffLineKind,
466    old_line: Option<usize>,
467    new_line: Option<usize>,
468    text: &'a str,
469    anchor_old: usize,
470    anchor_new: usize,
471}
472
473fn build_hunks(records: &[LineRecord<'_>], context: usize) -> Vec<DiffHunk> {
474    if records.is_empty() {
475        return Vec::new();
476    }
477
478    let ranges = compute_hunk_ranges(records, context);
479    let mut hunks = Vec::with_capacity(ranges.len());
480
481    for (start, end) in ranges {
482        let slice = &records[start..=end];
483
484        let old_start = slice
485            .iter()
486            .filter_map(|r| r.old_line)
487            .min()
488            .or_else(|| slice.iter().map(|r| r.anchor_old).min())
489            .unwrap_or(1);
490
491        let new_start = slice
492            .iter()
493            .filter_map(|r| r.new_line)
494            .min()
495            .or_else(|| slice.iter().map(|r| r.anchor_new).min())
496            .unwrap_or(1);
497
498        let old_lines = slice
499            .iter()
500            .filter(|r| matches!(r.kind, DiffLineKind::Context | DiffLineKind::Deletion))
501            .count();
502        let new_lines = slice
503            .iter()
504            .filter(|r| matches!(r.kind, DiffLineKind::Context | DiffLineKind::Addition))
505            .count();
506
507        let lines = slice
508            .iter()
509            .map(|record| DiffLine {
510                kind: record.kind.clone(),
511                old_line: record.old_line,
512                new_line: record.new_line,
513                text: record.text.to_string(),
514            })
515            .collect();
516
517        hunks.push(DiffHunk {
518            old_start,
519            old_lines,
520            new_start,
521            new_lines,
522            lines,
523        });
524    }
525
526    hunks
527}
528
529fn compute_hunk_ranges(records: &[LineRecord<'_>], context: usize) -> Vec<(usize, usize)> {
530    let mut ranges = Vec::with_capacity(4);
531    let mut current_start: Option<usize> = None;
532    let mut current_end: usize = 0;
533
534    for (idx, record) in records.iter().enumerate() {
535        if record.kind != DiffLineKind::Context {
536            let start = idx.saturating_sub(context);
537            let end = min(idx + context, records.len().saturating_sub(1));
538
539            if let Some(existing_start) = current_start {
540                if start < existing_start {
541                    current_start = Some(start);
542                }
543                if end > current_end {
544                    current_end = end;
545                }
546            } else {
547                current_start = Some(start);
548                current_end = end;
549            }
550        } else if let Some(start) = current_start
551            && idx > current_end
552        {
553            ranges.push((start, current_end));
554            current_start = None;
555        }
556    }
557
558    if let Some(start) = current_start {
559        ranges.push((start, current_end));
560    }
561
562    ranges
563}