Skip to main content

vtcode_commons/
diff.rs

1//! Diff utilities for generating structured diffs.
2
3use hashbrown::HashMap;
4use serde::Serialize;
5use std::cmp::min;
6
7/// Represents a chunk of text in a diff (Equal, Delete, or Insert).
8#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9pub enum Chunk<'a> {
10    Equal(&'a str),
11    Delete(&'a str),
12    Insert(&'a str),
13}
14
15/// Compute an optimal diff between two strings using Myers algorithm.
16pub fn compute_diff_chunks<'a>(old: &'a str, new: &'a str) -> Vec<Chunk<'a>> {
17    if old.is_empty() && new.is_empty() {
18        return Vec::with_capacity(0);
19    }
20    if old.is_empty() {
21        return vec![Chunk::Insert(new)];
22    }
23    if new.is_empty() {
24        return vec![Chunk::Delete(old)];
25    }
26
27    // Strip common prefix and suffix first (optimization)
28    let mut prefix_len = 0;
29    for (o, n) in old.chars().zip(new.chars()) {
30        if o == n {
31            prefix_len += o.len_utf8();
32        } else {
33            break;
34        }
35    }
36
37    let mut suffix_len = 0;
38    let old_rest = &old[prefix_len..];
39    let new_rest = &new[prefix_len..];
40
41    let old_chars: Vec<char> = old_rest.chars().collect();
42    let new_chars: Vec<char> = new_rest.chars().collect();
43
44    for (o, n) in old_chars.iter().rev().zip(new_chars.iter().rev()) {
45        if o == n {
46            suffix_len += o.len_utf8();
47        } else {
48            break;
49        }
50    }
51
52    let old_middle_end = old_rest.len() - suffix_len;
53    let new_middle_end = new_rest.len() - suffix_len;
54
55    let old_middle = &old_rest[..old_middle_end];
56    let new_middle = &new_rest[..new_middle_end];
57
58    let mut result = Vec::with_capacity(old_middle.len() + new_middle.len());
59
60    // Add common prefix
61    if prefix_len > 0 {
62        result.push(Chunk::Equal(&old[..prefix_len]));
63    }
64
65    // Compute optimal diff for the middle section
66    if !old_middle.is_empty() || !new_middle.is_empty() {
67        let old_chars: Vec<char> = old_middle.chars().collect();
68        let new_chars: Vec<char> = new_middle.chars().collect();
69        let old_byte_starts: Vec<usize> = old_middle.char_indices().map(|(idx, _)| idx).collect();
70        let new_byte_starts: Vec<usize> = new_middle.char_indices().map(|(idx, _)| idx).collect();
71        let edits = myers_diff(&old_chars, &new_chars);
72
73        let mut old_pos = 0;
74        let mut new_pos = 0;
75
76        for edit in edits {
77            match edit {
78                Edit::Equal => {
79                    old_pos += 1;
80                    new_pos += 1;
81                }
82                Edit::Delete => {
83                    let Some(ch) = old_chars.get(old_pos).copied() else {
84                        break;
85                    };
86                    let Some(byte_start) = old_byte_starts.get(old_pos).copied() else {
87                        break;
88                    };
89                    let byte_end = byte_start + ch.len_utf8();
90                    result.push(Chunk::Delete(&old_middle[byte_start..byte_end]));
91                    old_pos += 1;
92                }
93                Edit::Insert => {
94                    let Some(ch) = new_chars.get(new_pos).copied() else {
95                        break;
96                    };
97                    let Some(byte_start) = new_byte_starts.get(new_pos).copied() else {
98                        break;
99                    };
100                    let byte_end = byte_start + ch.len_utf8();
101                    result.push(Chunk::Insert(&new_middle[byte_start..byte_end]));
102                    new_pos += 1;
103                }
104            }
105        }
106    }
107
108    // Add common suffix
109    if suffix_len > 0 {
110        result.push(Chunk::Equal(&old[old.len() - suffix_len..]));
111    }
112
113    result
114}
115
116#[derive(Debug, Clone, Copy, PartialEq, Eq)]
117enum Edit {
118    Equal,
119    Delete,
120    Insert,
121}
122
123#[allow(clippy::cast_sign_loss)]
124fn myers_diff(old: &[char], new: &[char]) -> Vec<Edit> {
125    let n = old.len();
126    let m = new.len();
127
128    if n == 0 {
129        return vec![Edit::Insert; m];
130    }
131    if m == 0 {
132        return vec![Edit::Delete; n];
133    }
134
135    let max_d = n + m;
136    let max_d_i32 = max_d as i32;
137    let mut v = vec![0; 2 * max_d + 1];
138    let mut v_index = vec![0usize; (max_d + 1) * (2 * max_d + 1)];
139    let row_len = 2 * max_d + 1;
140
141    v[max_d] = 0;
142
143    for d in 0..=max_d {
144        let d_i32 = d as i32;
145        let row_start = d * row_len;
146        for k in (-d_i32..=d_i32).step_by(2) {
147            let k_idx = (k + max_d_i32) as usize;
148
149            let x = if k == -d_i32 || (k != d_i32 && v[k_idx - 1] < v[k_idx + 1]) {
150                v[k_idx + 1]
151            } else {
152                v[k_idx - 1] + 1
153            };
154
155            let mut x = x;
156            let mut y = (x as i32 - k) as usize;
157
158            while x < n && y < m && old[x] == new[y] {
159                x += 1;
160                y += 1;
161            }
162
163            v[k_idx] = x;
164            v_index[row_start + k_idx] = x;
165
166            if x >= n && y >= m {
167                return backtrack_myers(old, new, &v_index, d, k, max_d);
168            }
169        }
170    }
171
172    vec![]
173}
174
175#[allow(clippy::cast_sign_loss)]
176fn backtrack_myers(
177    old: &[char],
178    new: &[char],
179    v_index: &[usize],
180    d: usize,
181    mut k: i32,
182    max_d: usize,
183) -> Vec<Edit> {
184    let mut edits = Vec::with_capacity(old.len() + new.len());
185    let mut x = old.len();
186    let mut y = new.len();
187    let max_d_i32 = max_d as i32;
188    let row_len = 2 * max_d + 1;
189
190    for cur_d in (0..=d).rev() {
191        if cur_d == 0 {
192            while x > 0 && y > 0 {
193                edits.push(Edit::Equal);
194                x -= 1;
195                y -= 1;
196            }
197            break;
198        }
199
200        let k_idx = (k + max_d_i32) as usize;
201        let prev_row_start = (cur_d - 1) * row_len;
202
203        let prev_k = if k == -(cur_d as i32)
204            || (k != cur_d as i32
205                && v_index[prev_row_start + k_idx - 1] < v_index[prev_row_start + k_idx + 1])
206        {
207            k + 1
208        } else {
209            k - 1
210        };
211
212        let prev_k_idx = (prev_k + max_d_i32) as usize;
213        let prev_x_val = v_index[prev_row_start + prev_k_idx];
214        let prev_y = (prev_x_val as i32 - prev_k) as usize;
215
216        let (move_x, move_y) = if prev_k == k + 1 {
217            (prev_x_val, prev_y + 1)
218        } else {
219            (prev_x_val + 1, prev_y)
220        };
221
222        while x > move_x && y > move_y {
223            edits.push(Edit::Equal);
224            x -= 1;
225            y -= 1;
226        }
227
228        if prev_k == k + 1 {
229            edits.push(Edit::Insert);
230            y -= 1;
231        } else {
232            edits.push(Edit::Delete);
233            x -= 1;
234        }
235
236        k = prev_k;
237    }
238
239    edits.reverse();
240    edits
241}
242
243/// Options for diff generation.
244#[derive(Debug, Clone)]
245pub struct DiffOptions<'a> {
246    pub context_lines: usize,
247    pub old_label: Option<&'a str>,
248    pub new_label: Option<&'a str>,
249    pub missing_newline_hint: bool,
250}
251
252impl Default for DiffOptions<'_> {
253    fn default() -> Self {
254        Self {
255            context_lines: 3,
256            old_label: None,
257            new_label: None,
258            missing_newline_hint: true,
259        }
260    }
261}
262
263/// A diff rendered with both structured hunks and formatted text.
264#[derive(Debug, Clone, Serialize)]
265pub struct DiffBundle {
266    pub hunks: Vec<DiffHunk>,
267    pub formatted: String,
268    pub is_empty: bool,
269}
270
271/// A diff hunk with metadata for old/new ranges.
272#[derive(Debug, Clone, Serialize)]
273pub struct DiffHunk {
274    pub old_start: usize,
275    pub old_lines: usize,
276    pub new_start: usize,
277    pub new_lines: usize,
278    pub lines: Vec<DiffLine>,
279}
280
281/// A single diff line annotated with metadata and type.
282#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
283#[serde(rename_all = "snake_case")]
284pub enum DiffLineKind {
285    Context,
286    Addition,
287    Deletion,
288}
289
290/// Metadata for a single line inside a diff hunk.
291#[derive(Debug, Clone, Serialize)]
292pub struct DiffLine {
293    pub kind: DiffLineKind,
294    pub old_line: Option<usize>,
295    pub new_line: Option<usize>,
296    pub text: String,
297}
298
299/// Compute a structured diff bundle.
300pub fn compute_diff<F>(old: &str, new: &str, options: DiffOptions<'_>, formatter: F) -> DiffBundle
301where
302    F: FnOnce(&[DiffHunk], &DiffOptions<'_>) -> String,
303{
304    let old_lines_owned = split_lines_with_terminator(old);
305    let new_lines_owned = split_lines_with_terminator(new);
306
307    let old_refs: Vec<&str> = old_lines_owned.iter().map(|s| s.as_str()).collect();
308    let new_refs: Vec<&str> = new_lines_owned.iter().map(|s| s.as_str()).collect();
309
310    let records = collect_line_records(&old_refs, &new_refs);
311    let has_changes = records
312        .iter()
313        .any(|record| matches!(record.kind, DiffLineKind::Addition | DiffLineKind::Deletion));
314
315    let hunks = if has_changes {
316        build_hunks(&records, options.context_lines)
317    } else {
318        Vec::new()
319    };
320
321    let formatted = if hunks.is_empty() {
322        String::new()
323    } else {
324        formatter(&hunks, &options)
325    };
326
327    DiffBundle {
328        hunks,
329        formatted,
330        is_empty: !has_changes,
331    }
332}
333
334fn split_lines_with_terminator(text: &str) -> Vec<String> {
335    if text.is_empty() {
336        return Vec::with_capacity(0);
337    }
338
339    let mut lines: Vec<String> = text
340        .split_inclusive('\n')
341        .map(|line| line.to_string())
342        .collect();
343
344    if lines.is_empty() {
345        lines.push(text.to_string());
346    }
347
348    lines
349}
350
351fn collect_line_records<'a>(
352    old_lines: &'a [&'a str],
353    new_lines: &'a [&'a str],
354) -> Vec<LineRecord<'a>> {
355    let (old_encoded, new_encoded) = encode_line_sequences(old_lines, new_lines);
356    let mut records = Vec::with_capacity(old_lines.len() + new_lines.len());
357    let mut old_index = 0usize;
358    let mut new_index = 0usize;
359
360    for chunk in compute_diff_chunks(old_encoded.as_str(), new_encoded.as_str()) {
361        match chunk {
362            Chunk::Equal(text) => {
363                for _ in text.chars() {
364                    let old_line = old_index + 1;
365                    let new_line = new_index + 1;
366                    let line = old_lines[old_index];
367                    records.push(LineRecord {
368                        kind: DiffLineKind::Context,
369                        old_line: Some(old_line),
370                        new_line: Some(new_line),
371                        text: line,
372                        anchor_old: old_line,
373                        anchor_new: new_line,
374                    });
375                    old_index += 1;
376                    new_index += 1;
377                }
378            }
379            Chunk::Delete(text) => {
380                for _ in text.chars() {
381                    let old_line = old_index + 1;
382                    let anchor_new = new_index + 1;
383                    let line = old_lines[old_index];
384                    records.push(LineRecord {
385                        kind: DiffLineKind::Deletion,
386                        old_line: Some(old_line),
387                        new_line: None,
388                        text: line,
389                        anchor_old: old_line,
390                        anchor_new,
391                    });
392                    old_index += 1;
393                }
394            }
395            Chunk::Insert(text) => {
396                for _ in text.chars() {
397                    let new_line = new_index + 1;
398                    let anchor_old = old_index + 1;
399                    let line = new_lines[new_index];
400                    records.push(LineRecord {
401                        kind: DiffLineKind::Addition,
402                        old_line: None,
403                        new_line: Some(new_line),
404                        text: line,
405                        anchor_old,
406                        anchor_new: new_line,
407                    });
408                    new_index += 1;
409                }
410            }
411        }
412    }
413
414    records
415}
416
417fn encode_line_sequences<'a>(
418    old_lines: &'a [&'a str],
419    new_lines: &'a [&'a str],
420) -> (String, String) {
421    let mut token_map: HashMap<&'a str, char> = HashMap::new();
422    let mut next_codepoint: u32 = 0;
423
424    let old_encoded = encode_line_list(old_lines, &mut token_map, &mut next_codepoint);
425    let new_encoded = encode_line_list(new_lines, &mut token_map, &mut next_codepoint);
426
427    (old_encoded, new_encoded)
428}
429
430fn encode_line_list<'a>(
431    lines: &'a [&'a str],
432    map: &mut HashMap<&'a str, char>,
433    next_codepoint: &mut u32,
434) -> String {
435    let mut encoded = String::with_capacity(lines.len());
436    for &line in lines {
437        let token = if let Some(&value) = map.get(line) {
438            value
439        } else {
440            let Some(ch) = next_token_char(next_codepoint) else {
441                break;
442            };
443            map.insert(line, ch);
444            ch
445        };
446        encoded.push(token);
447    }
448    encoded
449}
450
451fn next_token_char(counter: &mut u32) -> Option<char> {
452    while *counter <= 0x10FFFF {
453        let candidate = *counter;
454        *counter += 1;
455        if (0xD800..=0xDFFF).contains(&candidate) {
456            continue;
457        }
458        if let Some(ch) = char::from_u32(candidate) {
459            return Some(ch);
460        }
461    }
462    None
463}
464
465#[derive(Debug)]
466struct LineRecord<'a> {
467    kind: DiffLineKind,
468    old_line: Option<usize>,
469    new_line: Option<usize>,
470    text: &'a str,
471    anchor_old: usize,
472    anchor_new: usize,
473}
474
475fn build_hunks(records: &[LineRecord<'_>], context: usize) -> Vec<DiffHunk> {
476    if records.is_empty() {
477        return Vec::new();
478    }
479
480    let ranges = compute_hunk_ranges(records, context);
481    let mut hunks = Vec::with_capacity(ranges.len());
482
483    for (start, end) in ranges {
484        let slice = &records[start..=end];
485
486        let old_start = slice
487            .iter()
488            .filter_map(|r| r.old_line)
489            .min()
490            .or_else(|| slice.iter().map(|r| r.anchor_old).min())
491            .unwrap_or(1);
492
493        let new_start = slice
494            .iter()
495            .filter_map(|r| r.new_line)
496            .min()
497            .or_else(|| slice.iter().map(|r| r.anchor_new).min())
498            .unwrap_or(1);
499
500        let old_lines = slice
501            .iter()
502            .filter(|r| matches!(r.kind, DiffLineKind::Context | DiffLineKind::Deletion))
503            .count();
504        let new_lines = slice
505            .iter()
506            .filter(|r| matches!(r.kind, DiffLineKind::Context | DiffLineKind::Addition))
507            .count();
508
509        let lines = slice
510            .iter()
511            .map(|record| DiffLine {
512                kind: record.kind.clone(),
513                old_line: record.old_line,
514                new_line: record.new_line,
515                text: record.text.to_string(),
516            })
517            .collect();
518
519        hunks.push(DiffHunk {
520            old_start,
521            old_lines,
522            new_start,
523            new_lines,
524            lines,
525        });
526    }
527
528    hunks
529}
530
531fn compute_hunk_ranges(records: &[LineRecord<'_>], context: usize) -> Vec<(usize, usize)> {
532    let mut ranges = Vec::with_capacity(4);
533    let mut current_start: Option<usize> = None;
534    let mut current_end: usize = 0;
535
536    for (idx, record) in records.iter().enumerate() {
537        if record.kind != DiffLineKind::Context {
538            let start = idx.saturating_sub(context);
539            let end = min(idx + context, records.len().saturating_sub(1));
540
541            if let Some(existing_start) = current_start {
542                if start < existing_start {
543                    current_start = Some(start);
544                }
545                if end > current_end {
546                    current_end = end;
547                }
548            } else {
549                current_start = Some(start);
550                current_end = end;
551            }
552        } else if let Some(start) = current_start
553            && idx > current_end
554        {
555            ranges.push((start, current_end));
556            current_start = None;
557        }
558    }
559
560    if let Some(start) = current_start {
561        ranges.push((start, current_end));
562    }
563
564    ranges
565}