Skip to main content

vtcode_commons/
diff.rs

1//! Diff utilities for generating structured diffs.
2
3use serde::Serialize;
4use std::cmp::min;
5use std::collections::HashMap;
6
7/// Represents a chunk of text in a diff (Equal, Delete, or Insert).
8#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9pub enum Chunk<'a> {
10    Equal(&'a str),
11    Delete(&'a str),
12    Insert(&'a str),
13}
14
15/// Compute an optimal diff between two strings using Myers algorithm.
16pub fn compute_diff_chunks<'a>(old: &'a str, new: &'a str) -> Vec<Chunk<'a>> {
17    if old.is_empty() && new.is_empty() {
18        return Vec::with_capacity(0);
19    }
20    if old.is_empty() {
21        return vec![Chunk::Insert(new)];
22    }
23    if new.is_empty() {
24        return vec![Chunk::Delete(old)];
25    }
26
27    // Strip common prefix and suffix first (optimization)
28    let mut prefix_len = 0;
29    for (o, n) in old.chars().zip(new.chars()) {
30        if o == n {
31            prefix_len += o.len_utf8();
32        } else {
33            break;
34        }
35    }
36
37    let mut suffix_len = 0;
38    let old_rest = &old[prefix_len..];
39    let new_rest = &new[prefix_len..];
40
41    let old_chars: Vec<char> = old_rest.chars().collect();
42    let new_chars: Vec<char> = new_rest.chars().collect();
43
44    for (o, n) in old_chars.iter().rev().zip(new_chars.iter().rev()) {
45        if o == n {
46            suffix_len += o.len_utf8();
47        } else {
48            break;
49        }
50    }
51
52    let old_middle_end = old_rest.len() - suffix_len;
53    let new_middle_end = new_rest.len() - suffix_len;
54
55    let old_middle = &old_rest[..old_middle_end];
56    let new_middle = &new_rest[..new_middle_end];
57
58    let mut result = Vec::with_capacity(old_middle.len() + new_middle.len());
59
60    // Add common prefix
61    if prefix_len > 0 {
62        result.push(Chunk::Equal(&old[..prefix_len]));
63    }
64
65    // Compute optimal diff for the middle section
66    if !old_middle.is_empty() || !new_middle.is_empty() {
67        let old_chars: Vec<char> = old_middle.chars().collect();
68        let new_chars: Vec<char> = new_middle.chars().collect();
69        let edits = myers_diff(&old_chars, &new_chars);
70
71        let mut old_pos = 0;
72        let mut new_pos = 0;
73
74        for edit in edits {
75            match edit {
76                Edit::Equal => {
77                    old_pos += 1;
78                    new_pos += 1;
79                }
80                Edit::Delete => {
81                    let ch = old_chars[old_pos];
82                    let byte_start = old_middle.char_indices().nth(old_pos).unwrap().0;
83                    let byte_end = byte_start + ch.len_utf8();
84                    result.push(Chunk::Delete(&old_middle[byte_start..byte_end]));
85                    old_pos += 1;
86                }
87                Edit::Insert => {
88                    let ch = new_chars[new_pos];
89                    let byte_start = new_middle.char_indices().nth(new_pos).unwrap().0;
90                    let byte_end = byte_start + ch.len_utf8();
91                    result.push(Chunk::Insert(&new_middle[byte_start..byte_end]));
92                    new_pos += 1;
93                }
94            }
95        }
96    }
97
98    // Add common suffix
99    if suffix_len > 0 {
100        result.push(Chunk::Equal(&old[old.len() - suffix_len..]));
101    }
102
103    result
104}
105
106#[derive(Debug, Clone, Copy, PartialEq, Eq)]
107enum Edit {
108    Equal,
109    Delete,
110    Insert,
111}
112
113fn myers_diff(old: &[char], new: &[char]) -> Vec<Edit> {
114    let n = old.len();
115    let m = new.len();
116
117    if n == 0 {
118        return vec![Edit::Insert; m];
119    }
120    if m == 0 {
121        return vec![Edit::Delete; n];
122    }
123
124    let max_d = n + m;
125    let mut v = vec![0; 2 * max_d + 1];
126    let mut v_index = vec![0usize; (max_d + 1) * (2 * max_d + 1)];
127    let row_len = 2 * max_d + 1;
128
129    v[max_d] = 0;
130
131    for d in 0..=max_d {
132        for k in (-(d as i32)..=(d as i32)).step_by(2) {
133            let k_idx = (k + max_d as i32) as usize;
134
135            let x = if k == -(d as i32) || (k != d as i32 && v[k_idx - 1] < v[k_idx + 1]) {
136                v[k_idx + 1]
137            } else {
138                v[k_idx - 1] + 1
139            };
140
141            let mut x = x;
142            let mut y = (x as i32 - k) as usize;
143
144            while x < n && y < m && old[x] == new[y] {
145                x += 1;
146                y += 1;
147            }
148
149            v[k_idx] = x;
150            v_index[d * row_len + k_idx] = x;
151
152            if x >= n && y >= m {
153                return backtrack_myers(old, new, &v_index, d, k, max_d);
154            }
155        }
156    }
157
158    vec![]
159}
160
161fn backtrack_myers(
162    old: &[char],
163    new: &[char],
164    v_index: &[usize],
165    d: usize,
166    mut k: i32,
167    max_d: usize,
168) -> Vec<Edit> {
169    let mut edits = Vec::with_capacity(old.len() + new.len());
170    let mut x = old.len();
171    let mut y = new.len();
172    let row_len = 2 * max_d + 1;
173
174    for cur_d in (0..=d).rev() {
175        if cur_d == 0 {
176            while x > 0 && y > 0 {
177                edits.push(Edit::Equal);
178                x -= 1;
179                y -= 1;
180            }
181            break;
182        }
183
184        let k_idx = (k + max_d as i32) as usize;
185
186        let prev_k = if k == -(cur_d as i32)
187            || (k != cur_d as i32
188                && v_index[(cur_d - 1) * row_len + k_idx - 1]
189                    < v_index[(cur_d - 1) * row_len + k_idx + 1])
190        {
191            k + 1
192        } else {
193            k - 1
194        };
195
196        let prev_k_idx = (prev_k + max_d as i32) as usize;
197        let prev_x_val = v_index[(cur_d - 1) * row_len + prev_k_idx];
198        let prev_y = (prev_x_val as i32 - prev_k) as usize;
199
200        let (move_x, move_y) = if prev_k == k + 1 {
201            (prev_x_val, prev_y + 1)
202        } else {
203            (prev_x_val + 1, prev_y)
204        };
205
206        while x > move_x && y > move_y {
207            edits.push(Edit::Equal);
208            x -= 1;
209            y -= 1;
210        }
211
212        if prev_k == k + 1 {
213            edits.push(Edit::Insert);
214            y -= 1;
215        } else {
216            edits.push(Edit::Delete);
217            x -= 1;
218        }
219
220        k = prev_k;
221    }
222
223    edits.reverse();
224    edits
225}
226
227/// Options for diff generation.
228#[derive(Debug, Clone)]
229pub struct DiffOptions<'a> {
230    pub context_lines: usize,
231    pub old_label: Option<&'a str>,
232    pub new_label: Option<&'a str>,
233    pub missing_newline_hint: bool,
234}
235
236impl Default for DiffOptions<'_> {
237    fn default() -> Self {
238        Self {
239            context_lines: 3,
240            old_label: None,
241            new_label: None,
242            missing_newline_hint: true,
243        }
244    }
245}
246
247/// A diff rendered with both structured hunks and formatted text.
248#[derive(Debug, Clone, Serialize)]
249pub struct DiffBundle {
250    pub hunks: Vec<DiffHunk>,
251    pub formatted: String,
252    pub is_empty: bool,
253}
254
255/// A diff hunk with metadata for old/new ranges.
256#[derive(Debug, Clone, Serialize)]
257pub struct DiffHunk {
258    pub old_start: usize,
259    pub old_lines: usize,
260    pub new_start: usize,
261    pub new_lines: usize,
262    pub lines: Vec<DiffLine>,
263}
264
265/// A single diff line annotated with metadata and type.
266#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
267#[serde(rename_all = "snake_case")]
268pub enum DiffLineKind {
269    Context,
270    Addition,
271    Deletion,
272}
273
274/// Metadata for a single line inside a diff hunk.
275#[derive(Debug, Clone, Serialize)]
276pub struct DiffLine {
277    pub kind: DiffLineKind,
278    pub old_line: Option<usize>,
279    pub new_line: Option<usize>,
280    pub text: String,
281}
282
283/// Compute a structured diff bundle.
284pub fn compute_diff<F>(old: &str, new: &str, options: DiffOptions<'_>, formatter: F) -> DiffBundle
285where
286    F: FnOnce(&[DiffHunk], &DiffOptions<'_>) -> String,
287{
288    let old_lines_owned = split_lines_with_terminator(old);
289    let new_lines_owned = split_lines_with_terminator(new);
290
291    let old_refs: Vec<&str> = old_lines_owned.iter().map(|s| s.as_str()).collect();
292    let new_refs: Vec<&str> = new_lines_owned.iter().map(|s| s.as_str()).collect();
293
294    let records = collect_line_records(&old_refs, &new_refs);
295    let has_changes = records
296        .iter()
297        .any(|record| matches!(record.kind, DiffLineKind::Addition | DiffLineKind::Deletion));
298
299    let hunks = if has_changes {
300        build_hunks(&records, options.context_lines)
301    } else {
302        Vec::new()
303    };
304
305    let formatted = if hunks.is_empty() {
306        String::new()
307    } else {
308        formatter(&hunks, &options)
309    };
310
311    DiffBundle {
312        hunks,
313        formatted,
314        is_empty: !has_changes,
315    }
316}
317
318fn split_lines_with_terminator(text: &str) -> Vec<String> {
319    if text.is_empty() {
320        return Vec::with_capacity(0);
321    }
322
323    let mut lines: Vec<String> = text
324        .split_inclusive('\n')
325        .map(|line| line.to_string())
326        .collect();
327
328    if lines.is_empty() {
329        lines.push(text.to_string());
330    }
331
332    lines
333}
334
335fn collect_line_records<'a>(
336    old_lines: &'a [&'a str],
337    new_lines: &'a [&'a str],
338) -> Vec<LineRecord<'a>> {
339    let (old_encoded, new_encoded) = encode_line_sequences(old_lines, new_lines);
340    let mut records = Vec::with_capacity(old_lines.len() + new_lines.len());
341    let mut old_index = 0usize;
342    let mut new_index = 0usize;
343
344    for chunk in compute_diff_chunks(old_encoded.as_str(), new_encoded.as_str()) {
345        match chunk {
346            Chunk::Equal(text) => {
347                for _ in text.chars() {
348                    let old_line = old_index + 1;
349                    let new_line = new_index + 1;
350                    let line = old_lines[old_index];
351                    records.push(LineRecord {
352                        kind: DiffLineKind::Context,
353                        old_line: Some(old_line),
354                        new_line: Some(new_line),
355                        text: line,
356                        anchor_old: old_line,
357                        anchor_new: new_line,
358                    });
359                    old_index += 1;
360                    new_index += 1;
361                }
362            }
363            Chunk::Delete(text) => {
364                for _ in text.chars() {
365                    let old_line = old_index + 1;
366                    let anchor_new = new_index + 1;
367                    let line = old_lines[old_index];
368                    records.push(LineRecord {
369                        kind: DiffLineKind::Deletion,
370                        old_line: Some(old_line),
371                        new_line: None,
372                        text: line,
373                        anchor_old: old_line,
374                        anchor_new,
375                    });
376                    old_index += 1;
377                }
378            }
379            Chunk::Insert(text) => {
380                for _ in text.chars() {
381                    let new_line = new_index + 1;
382                    let anchor_old = old_index + 1;
383                    let line = new_lines[new_index];
384                    records.push(LineRecord {
385                        kind: DiffLineKind::Addition,
386                        old_line: None,
387                        new_line: Some(new_line),
388                        text: line,
389                        anchor_old,
390                        anchor_new: new_line,
391                    });
392                    new_index += 1;
393                }
394            }
395        }
396    }
397
398    records
399}
400
401fn encode_line_sequences<'a>(
402    old_lines: &'a [&'a str],
403    new_lines: &'a [&'a str],
404) -> (String, String) {
405    let mut token_map: HashMap<&'a str, char> = HashMap::new();
406    let mut next_codepoint: u32 = 0;
407
408    let old_encoded = encode_line_list(old_lines, &mut token_map, &mut next_codepoint);
409    let new_encoded = encode_line_list(new_lines, &mut token_map, &mut next_codepoint);
410
411    (old_encoded, new_encoded)
412}
413
414fn encode_line_list<'a>(
415    lines: &'a [&'a str],
416    map: &mut HashMap<&'a str, char>,
417    next_codepoint: &mut u32,
418) -> String {
419    let mut encoded = String::with_capacity(lines.len());
420    for &line in lines {
421        let token = if let Some(&value) = map.get(line) {
422            value
423        } else {
424            let ch = next_token_char(next_codepoint).expect("exceeded diff token capacity");
425            map.insert(line, ch);
426            ch
427        };
428        encoded.push(token);
429    }
430    encoded
431}
432
433fn next_token_char(counter: &mut u32) -> Option<char> {
434    while *counter <= 0x10FFFF {
435        let candidate = *counter;
436        *counter += 1;
437        if (0xD800..=0xDFFF).contains(&candidate) {
438            continue;
439        }
440        if let Some(ch) = char::from_u32(candidate) {
441            return Some(ch);
442        }
443    }
444    None
445}
446
447#[derive(Debug)]
448struct LineRecord<'a> {
449    kind: DiffLineKind,
450    old_line: Option<usize>,
451    new_line: Option<usize>,
452    text: &'a str,
453    anchor_old: usize,
454    anchor_new: usize,
455}
456
457fn build_hunks(records: &[LineRecord<'_>], context: usize) -> Vec<DiffHunk> {
458    if records.is_empty() {
459        return Vec::new();
460    }
461
462    let ranges = compute_hunk_ranges(records, context);
463    let mut hunks = Vec::with_capacity(ranges.len());
464
465    for (start, end) in ranges {
466        let slice = &records[start..=end];
467
468        let old_start = slice
469            .iter()
470            .filter_map(|r| r.old_line)
471            .min()
472            .or_else(|| slice.iter().map(|r| r.anchor_old).min())
473            .unwrap_or(1);
474
475        let new_start = slice
476            .iter()
477            .filter_map(|r| r.new_line)
478            .min()
479            .or_else(|| slice.iter().map(|r| r.anchor_new).min())
480            .unwrap_or(1);
481
482        let old_lines = slice
483            .iter()
484            .filter(|r| matches!(r.kind, DiffLineKind::Context | DiffLineKind::Deletion))
485            .count();
486        let new_lines = slice
487            .iter()
488            .filter(|r| matches!(r.kind, DiffLineKind::Context | DiffLineKind::Addition))
489            .count();
490
491        let lines = slice
492            .iter()
493            .map(|record| DiffLine {
494                kind: record.kind.clone(),
495                old_line: record.old_line,
496                new_line: record.new_line,
497                text: record.text.to_string(),
498            })
499            .collect();
500
501        hunks.push(DiffHunk {
502            old_start,
503            old_lines,
504            new_start,
505            new_lines,
506            lines,
507        });
508    }
509
510    hunks
511}
512
513fn compute_hunk_ranges(records: &[LineRecord<'_>], context: usize) -> Vec<(usize, usize)> {
514    let mut ranges = Vec::with_capacity(4);
515    let mut current_start: Option<usize> = None;
516    let mut current_end: usize = 0;
517
518    for (idx, record) in records.iter().enumerate() {
519        if record.kind != DiffLineKind::Context {
520            let start = idx.saturating_sub(context);
521            let end = min(idx + context, records.len().saturating_sub(1));
522
523            if let Some(existing_start) = current_start {
524                if start < existing_start {
525                    current_start = Some(start);
526                }
527                if end > current_end {
528                    current_end = end;
529                }
530            } else {
531                current_start = Some(start);
532                current_end = end;
533            }
534        } else if let Some(start) = current_start
535            && idx > current_end
536        {
537            ranges.push((start, current_end));
538            current_start = None;
539        }
540    }
541
542    if let Some(start) = current_start {
543        ranges.push((start, current_end));
544    }
545
546    ranges
547}