Skip to main content

vtcode_commons/
diff.rs

1//! Diff utilities for generating structured diffs.
2
3use hashbrown::HashMap;
4use serde::Serialize;
5use std::cmp::min;
6
7/// Represents a chunk of text in a diff (Equal, Delete, or Insert).
8#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9pub enum Chunk<'a> {
10    Equal(&'a str),
11    Delete(&'a str),
12    Insert(&'a str),
13}
14
15/// Compute an optimal diff between two strings using Myers algorithm.
16#[inline]
17pub fn compute_diff_chunks<'a>(old: &'a str, new: &'a str) -> Vec<Chunk<'a>> {
18    if old.is_empty() && new.is_empty() {
19        return Vec::with_capacity(0);
20    }
21    if old.is_empty() {
22        return vec![Chunk::Insert(new)];
23    }
24    if new.is_empty() {
25        return vec![Chunk::Delete(old)];
26    }
27
28    // Strip common prefix first (optimisation).
29    let prefix_byte_len: usize = old
30        .chars()
31        .zip(new.chars())
32        .take_while(|(o, n)| o == n)
33        .map(|(c, _)| c.len_utf8())
34        .sum();
35
36    // Strip common suffix on the remaining text.
37    let old_rest = &old[prefix_byte_len..];
38    let new_rest = &new[prefix_byte_len..];
39
40    let suffix_byte_len: usize = old_rest
41        .chars()
42        .rev()
43        .zip(new_rest.chars().rev())
44        .take_while(|(o, n)| o == n)
45        .map(|(c, _)| c.len_utf8())
46        .sum();
47
48    let old_middle_end = old_rest.len() - suffix_byte_len;
49    let new_middle_end = new_rest.len() - suffix_byte_len;
50
51    let old_middle = &old_rest[..old_middle_end];
52    let new_middle = &new_rest[..new_middle_end];
53
54    let mut result = Vec::with_capacity(old_middle.len() + new_middle.len());
55
56    // Add common prefix
57    if prefix_byte_len > 0 {
58        result.push(Chunk::Equal(&old[..prefix_byte_len]));
59    }
60
61    // Compute optimal diff for the middle section
62    if !old_middle.is_empty() || !new_middle.is_empty() {
63        let old_chars: Vec<char> = old_middle.chars().collect();
64        let new_chars: Vec<char> = new_middle.chars().collect();
65        let old_byte_starts: Vec<usize> = old_middle.char_indices().map(|(idx, _)| idx).collect();
66        let new_byte_starts: Vec<usize> = new_middle.char_indices().map(|(idx, _)| idx).collect();
67        let edits = myers_diff(&old_chars, &new_chars);
68
69        let mut old_pos = 0;
70        let mut new_pos = 0;
71        // Track the start of a consecutive Equal run so we can emit a single
72        // Chunk::Equal for the whole run (instead of one per character).
73        let mut equal_run_start: Option<usize> = None;
74
75        for edit in edits {
76            match edit {
77                Edit::Equal => {
78                    if equal_run_start.is_none() {
79                        equal_run_start = Some(old_pos);
80                    }
81                    old_pos += 1;
82                    new_pos += 1;
83                }
84                Edit::Delete => {
85                    // Flush any accumulated equal run before emitting a Delete
86                    if let Some(start) = equal_run_start.take() {
87                        let byte_start = old_byte_starts[start];
88                        let byte_end = old_byte_starts[old_pos];
89                        if byte_start < byte_end {
90                            result.push(Chunk::Equal(&old_middle[byte_start..byte_end]));
91                        }
92                    }
93                    let Some(ch) = old_chars.get(old_pos).copied() else {
94                        break;
95                    };
96                    let Some(byte_start) = old_byte_starts.get(old_pos).copied() else {
97                        break;
98                    };
99                    let byte_end = byte_start + ch.len_utf8();
100                    result.push(Chunk::Delete(&old_middle[byte_start..byte_end]));
101                    old_pos += 1;
102                }
103                Edit::Insert => {
104                    // Flush any accumulated equal run before emitting an Insert.
105                    // old_pos may equal old_byte_starts.len() when the equal run
106                    // reaches the end of old_middle, so use old_middle.len() as fallback.
107                    if let Some(start) = equal_run_start.take() {
108                        let byte_start = old_byte_starts[start];
109                        let byte_end = if old_pos < old_byte_starts.len() {
110                            old_byte_starts[old_pos]
111                        } else {
112                            old_middle.len()
113                        };
114                        if byte_start < byte_end {
115                            result.push(Chunk::Equal(&old_middle[byte_start..byte_end]));
116                        }
117                    }
118                    let Some(ch) = new_chars.get(new_pos).copied() else {
119                        break;
120                    };
121                    let Some(byte_start) = new_byte_starts.get(new_pos).copied() else {
122                        break;
123                    };
124                    let byte_end = byte_start + ch.len_utf8();
125                    result.push(Chunk::Insert(&new_middle[byte_start..byte_end]));
126                    new_pos += 1;
127                }
128            }
129        }
130        // Flush any trailing equal run
131        if let Some(start) = equal_run_start.take() {
132            let byte_start = old_byte_starts[start];
133            let byte_end = old_middle.len();
134            if byte_start < byte_end {
135                result.push(Chunk::Equal(&old_middle[byte_start..byte_end]));
136            }
137        }
138    }
139
140    // Add common suffix
141    if suffix_byte_len > 0 {
142        result.push(Chunk::Equal(&old[old.len() - suffix_byte_len..]));
143    }
144
145    result
146}
147
148#[derive(Debug, Clone, Copy, PartialEq, Eq)]
149enum Edit {
150    Equal,
151    Delete,
152    Insert,
153}
154
155/// Advance along matching characters. Extracted from `myers_diff` so the
156/// compiler sees a tight leaf loop with no surrounding state, enabling better
157/// register allocation and (in some cases) auto-vectorization heuristics.
158#[inline]
159fn advance_matching(old: &[char], new: &[char], mut x: usize, mut y: usize) -> (usize, usize) {
160    while x < old.len() && y < new.len() && old[x] == new[y] {
161        x += 1;
162        y += 1;
163    }
164    (x, y)
165}
166
167/// Erase the trailing equal run during backtracking. Same rationale as
168/// `advance_matching` — a focused leaf function that the compiler can
169/// optimise in isolation.
170/// Returns the final `(x, y)` position after removing equal edits.
171#[inline]
172fn backtrack_equal_run(
173    mut x: usize,
174    mut y: usize,
175    move_x: usize,
176    move_y: usize,
177    edits: &mut Vec<Edit>,
178) -> (usize, usize) {
179    while x > move_x && y > move_y {
180        edits.push(Edit::Equal);
181        x -= 1;
182        y -= 1;
183    }
184    (x, y)
185}
186
187#[allow(clippy::cast_sign_loss)]
188fn myers_diff(old: &[char], new: &[char]) -> Vec<Edit> {
189    let n = old.len();
190    let m = new.len();
191
192    if n == 0 {
193        return vec![Edit::Insert; m];
194    }
195    if m == 0 {
196        return vec![Edit::Delete; n];
197    }
198
199    let max_d = n.saturating_add(m).min(i32::MAX as usize);
200    let max_d_i32 = max_d as i32;
201    let mut v = vec![0; 2 * max_d + 1];
202    let mut v_index = vec![0usize; (max_d + 1) * (2 * max_d + 1)];
203    let row_len = 2 * max_d + 1;
204
205    v[max_d] = 0;
206
207    for d in 0..=max_d {
208        let d_i32 = d as i32;
209        let row_start = d * row_len;
210        for k in (-d_i32..=d_i32).step_by(2) {
211            let k_idx = (k + max_d_i32) as usize;
212
213            let x = if k == -d_i32 || (k != d_i32 && v[k_idx - 1] < v[k_idx + 1]) {
214                v[k_idx + 1]
215            } else {
216                v[k_idx - 1] + 1
217            };
218
219            let mut x = x;
220            let mut y = (x as i32 - k) as usize;
221
222            (x, y) = advance_matching(old, new, x, y);
223
224            v[k_idx] = x;
225            v_index[row_start + k_idx] = x;
226
227            if x >= n && y >= m {
228                return backtrack_myers(old, new, &v_index, d, k, max_d);
229            }
230        }
231    }
232
233    vec![]
234}
235
236#[allow(clippy::cast_sign_loss)]
237fn backtrack_myers(
238    old: &[char],
239    new: &[char],
240    v_index: &[usize],
241    d: usize,
242    mut k: i32,
243    max_d: usize,
244) -> Vec<Edit> {
245    let mut edits = Vec::with_capacity(old.len() + new.len());
246    let mut x = old.len();
247    let mut y = new.len();
248    let max_d_i32 = max_d as i32;
249    let row_len = 2 * max_d + 1;
250
251    for cur_d in (0..=d).rev() {
252        if cur_d == 0 {
253            while x > 0 && y > 0 {
254                edits.push(Edit::Equal);
255                x -= 1;
256                y -= 1;
257            }
258            break;
259        }
260
261        let k_idx = (k + max_d_i32) as usize;
262        let prev_row_start = (cur_d - 1) * row_len;
263
264        let cur_d_i32 = cur_d as i32;
265        let prev_k = if k == cur_d_i32.wrapping_neg()
266            || (k != cur_d_i32
267                && v_index[prev_row_start + k_idx - 1] < v_index[prev_row_start + k_idx + 1])
268        {
269            k + 1
270        } else {
271            k - 1
272        };
273
274        let prev_k_idx = (prev_k + max_d_i32) as usize;
275        let prev_x_val = v_index[prev_row_start + prev_k_idx];
276        let prev_y = (prev_x_val as i32 - prev_k) as usize;
277
278        let (move_x, move_y) = if prev_k == k + 1 {
279            (prev_x_val, prev_y + 1)
280        } else {
281            (prev_x_val + 1, prev_y)
282        };
283
284        (x, y) = backtrack_equal_run(x, y, move_x, move_y, &mut edits);
285
286        if prev_k == k + 1 {
287            edits.push(Edit::Insert);
288            y -= 1;
289        } else {
290            edits.push(Edit::Delete);
291            x -= 1;
292        }
293
294        k = prev_k;
295    }
296
297    edits.reverse();
298    edits
299}
300
301/// Options for diff generation.
302#[derive(Debug, Clone)]
303pub struct DiffOptions<'a> {
304    pub context_lines: usize,
305    pub old_label: Option<&'a str>,
306    pub new_label: Option<&'a str>,
307    pub missing_newline_hint: bool,
308}
309
310impl Default for DiffOptions<'_> {
311    fn default() -> Self {
312        Self {
313            context_lines: 3,
314            old_label: None,
315            new_label: None,
316            missing_newline_hint: true,
317        }
318    }
319}
320
321/// A diff rendered with both structured hunks and formatted text.
322#[derive(Debug, Clone, Serialize)]
323pub struct DiffBundle {
324    pub hunks: Vec<DiffHunk>,
325    pub formatted: String,
326    pub is_empty: bool,
327}
328
329/// A diff hunk with metadata for old/new ranges.
330#[derive(Debug, Clone, Serialize)]
331pub struct DiffHunk {
332    pub old_start: usize,
333    pub old_lines: usize,
334    pub new_start: usize,
335    pub new_lines: usize,
336    pub lines: Vec<DiffLine>,
337}
338
339/// A single diff line annotated with metadata and type.
340#[derive(Debug, Clone, Copy, Serialize, PartialEq, Eq)]
341#[serde(rename_all = "snake_case")]
342pub enum DiffLineKind {
343    Context,
344    Addition,
345    Deletion,
346}
347
348/// Metadata for a single line inside a diff hunk.
349#[derive(Debug, Clone, Serialize)]
350pub struct DiffLine {
351    pub kind: DiffLineKind,
352    pub old_line: Option<u32>,
353    pub new_line: Option<u32>,
354    pub text: String,
355}
356
357/// Compute a structured diff bundle.
358pub fn compute_diff<F>(old: &str, new: &str, options: DiffOptions<'_>, formatter: F) -> DiffBundle
359where
360    F: FnOnce(&[DiffHunk], &DiffOptions<'_>) -> String,
361{
362    let old_lines_owned = split_lines_with_terminator(old);
363    let new_lines_owned = split_lines_with_terminator(new);
364
365    let old_refs: Vec<&str> = old_lines_owned.iter().map(|s| s.as_str()).collect();
366    let new_refs: Vec<&str> = new_lines_owned.iter().map(|s| s.as_str()).collect();
367
368    let records = collect_line_records(&old_refs, &new_refs);
369    let has_changes = records
370        .iter()
371        .any(|record| matches!(record.kind, DiffLineKind::Addition | DiffLineKind::Deletion));
372
373    let hunks = if has_changes {
374        build_hunks(&records, options.context_lines)
375    } else {
376        Vec::new()
377    };
378
379    let formatted = if hunks.is_empty() {
380        String::new()
381    } else {
382        formatter(&hunks, &options)
383    };
384
385    DiffBundle {
386        hunks,
387        formatted,
388        is_empty: !has_changes,
389    }
390}
391
392fn split_lines_with_terminator(text: &str) -> Vec<String> {
393    if text.is_empty() {
394        return Vec::with_capacity(0);
395    }
396
397    let mut lines: Vec<String> = text
398        .split_inclusive('\n')
399        .map(|line| line.to_string())
400        .collect();
401
402    if lines.is_empty() {
403        lines.push(text.to_string());
404    }
405
406    lines
407}
408
409#[inline]
410fn collect_line_records<'a>(
411    old_lines: &'a [&'a str],
412    new_lines: &'a [&'a str],
413) -> Vec<LineRecord<'a>> {
414    let (old_encoded, new_encoded) = encode_line_sequences(old_lines, new_lines);
415    let mut records = Vec::with_capacity(old_lines.len() + new_lines.len());
416    let mut old_index = 0u32;
417    let mut new_index = 0u32;
418
419    for chunk in compute_diff_chunks(old_encoded.as_str(), new_encoded.as_str()) {
420        match chunk {
421            Chunk::Equal(text) => {
422                for _ in text.chars() {
423                    let old_line = old_index + 1;
424                    let new_line = new_index + 1;
425                    let line = old_lines[old_index as usize];
426                    records.push(LineRecord {
427                        kind: DiffLineKind::Context,
428                        old_line: Some(old_line),
429                        new_line: Some(new_line),
430                        text: line,
431                        anchor_old: old_line,
432                        anchor_new: new_line,
433                    });
434                    old_index += 1;
435                    new_index += 1;
436                }
437            }
438            Chunk::Delete(text) => {
439                for _ in text.chars() {
440                    let old_line = old_index + 1;
441                    let anchor_new = new_index + 1;
442                    let line = old_lines[old_index as usize];
443                    records.push(LineRecord {
444                        kind: DiffLineKind::Deletion,
445                        old_line: Some(old_line),
446                        new_line: None,
447                        text: line,
448                        anchor_old: old_line,
449                        anchor_new,
450                    });
451                    old_index += 1;
452                }
453            }
454            Chunk::Insert(text) => {
455                for _ in text.chars() {
456                    let new_line = new_index + 1;
457                    let anchor_old = old_index + 1;
458                    let line = new_lines[new_index as usize];
459                    records.push(LineRecord {
460                        kind: DiffLineKind::Addition,
461                        old_line: None,
462                        new_line: Some(new_line),
463                        text: line,
464                        anchor_old,
465                        anchor_new: new_line,
466                    });
467                    new_index += 1;
468                }
469            }
470        }
471    }
472
473    records
474}
475
476fn encode_line_sequences<'a>(
477    old_lines: &'a [&'a str],
478    new_lines: &'a [&'a str],
479) -> (String, String) {
480    let mut token_map: HashMap<&'a str, char> = HashMap::new();
481    let mut next_codepoint: u32 = 0;
482
483    let old_encoded = encode_line_list(old_lines, &mut token_map, &mut next_codepoint);
484    let new_encoded = encode_line_list(new_lines, &mut token_map, &mut next_codepoint);
485
486    (old_encoded, new_encoded)
487}
488
489fn encode_line_list<'a>(
490    lines: &'a [&'a str],
491    map: &mut HashMap<&'a str, char>,
492    next_codepoint: &mut u32,
493) -> String {
494    let mut encoded = String::with_capacity(lines.len());
495    for &line in lines {
496        let token = if let Some(&value) = map.get(line) {
497            value
498        } else {
499            let Some(ch) = next_token_char(next_codepoint) else {
500                break;
501            };
502            map.insert(line, ch);
503            ch
504        };
505        encoded.push(token);
506    }
507    encoded
508}
509
510fn next_token_char(counter: &mut u32) -> Option<char> {
511    while *counter <= 0x10FFFF {
512        let candidate = *counter;
513        *counter += 1;
514        if (0xD800..=0xDFFF).contains(&candidate) {
515            continue;
516        }
517        if let Some(ch) = char::from_u32(candidate) {
518            return Some(ch);
519        }
520    }
521    None
522}
523
524#[derive(Debug)]
525struct LineRecord<'a> {
526    kind: DiffLineKind,
527    old_line: Option<u32>,
528    new_line: Option<u32>,
529    text: &'a str,
530    anchor_old: u32,
531    anchor_new: u32,
532}
533
534fn build_hunks(records: &[LineRecord<'_>], context: usize) -> Vec<DiffHunk> {
535    if records.is_empty() {
536        return Vec::new();
537    }
538
539    let ranges = compute_hunk_ranges(records, context);
540    let mut hunks = Vec::with_capacity(ranges.len());
541
542    for (start, end) in ranges {
543        let slice = &records[start..=end];
544
545        let old_start = slice
546            .iter()
547            .filter_map(|r| r.old_line)
548            .min()
549            .or_else(|| slice.iter().map(|r| r.anchor_old).min())
550            .unwrap_or(1) as usize;
551
552        let new_start = slice
553            .iter()
554            .filter_map(|r| r.new_line)
555            .min()
556            .or_else(|| slice.iter().map(|r| r.anchor_new).min())
557            .unwrap_or(1) as usize;
558
559        let old_lines = slice
560            .iter()
561            .filter(|r| matches!(r.kind, DiffLineKind::Context | DiffLineKind::Deletion))
562            .count();
563        let new_lines = slice
564            .iter()
565            .filter(|r| matches!(r.kind, DiffLineKind::Context | DiffLineKind::Addition))
566            .count();
567
568        let lines = slice
569            .iter()
570            .map(|record| DiffLine {
571                kind: record.kind,
572                old_line: record.old_line,
573                new_line: record.new_line,
574                text: record.text.to_string(),
575            })
576            .collect();
577
578        hunks.push(DiffHunk {
579            old_start,
580            old_lines,
581            new_start,
582            new_lines,
583            lines,
584        });
585    }
586
587    hunks
588}
589
590fn compute_hunk_ranges(records: &[LineRecord<'_>], context: usize) -> Vec<(usize, usize)> {
591    let mut ranges = Vec::with_capacity(4);
592    let mut current_start: Option<usize> = None;
593    let mut current_end: usize = 0;
594
595    for (idx, record) in records.iter().enumerate() {
596        if record.kind != DiffLineKind::Context {
597            let start = idx.saturating_sub(context);
598            let end = min(idx + context, records.len().saturating_sub(1));
599
600            if let Some(existing_start) = current_start {
601                // Close the previous range if this change is beyond its context window
602                if idx > current_end {
603                    ranges.push((existing_start, current_end));
604                    current_start = Some(start);
605                    current_end = end;
606                } else {
607                    if start < existing_start {
608                        current_start = Some(start);
609                    }
610                    if end > current_end {
611                        current_end = end;
612                    }
613                }
614            } else {
615                current_start = Some(start);
616                current_end = end;
617            }
618        } else if let Some(start) = current_start
619            && idx > current_end
620        {
621            ranges.push((start, current_end));
622            current_start = None;
623        }
624    }
625
626    if let Some(start) = current_start {
627        ranges.push((start, current_end));
628    }
629
630    ranges
631}
632
633#[cfg(test)]
634mod tests {
635    use super::*;
636
637    // ── compute_diff_chunks ──────────────────────────────────────────
638
639    #[test]
640    fn chunks_both_empty() {
641        let chunks = compute_diff_chunks("", "");
642        assert!(chunks.is_empty());
643    }
644
645    #[test]
646    fn chunks_old_empty() {
647        let chunks = compute_diff_chunks("", "hello");
648        assert_eq!(chunks, vec![Chunk::Insert("hello")]);
649    }
650
651    #[test]
652    fn chunks_new_empty() {
653        let chunks = compute_diff_chunks("hello", "");
654        assert_eq!(chunks, vec![Chunk::Delete("hello")]);
655    }
656
657    #[test]
658    fn chunks_identical() {
659        let chunks = compute_diff_chunks("abc", "abc");
660        assert_eq!(chunks.len(), 1);
661        assert!(matches!(chunks[0], Chunk::Equal("abc")));
662    }
663
664    #[test]
665    fn chunks_single_insertion() {
666        let chunks = compute_diff_chunks("ac", "abc");
667        // Common prefix "a", insert "b", common suffix "c"
668        assert_eq!(chunks.len(), 3);
669        assert!(matches!(chunks[0], Chunk::Equal("a")));
670        assert!(matches!(chunks[1], Chunk::Insert("b")));
671        assert!(matches!(chunks[2], Chunk::Equal("c")));
672    }
673
674    #[test]
675    fn chunks_single_deletion() {
676        let chunks = compute_diff_chunks("abc", "ac");
677        assert_eq!(chunks.len(), 3);
678        assert!(matches!(chunks[0], Chunk::Equal("a")));
679        assert!(matches!(chunks[1], Chunk::Delete("b")));
680        assert!(matches!(chunks[2], Chunk::Equal("c")));
681    }
682
683    #[test]
684    fn chunks_replacement() {
685        let chunks = compute_diff_chunks("abc", "axc");
686        // Equal("a"), Delete("b"), Insert("x"), Equal("c")
687        assert_eq!(chunks.len(), 4);
688        assert!(matches!(chunks[0], Chunk::Equal("a")));
689        assert!(matches!(chunks[1], Chunk::Delete("b")));
690        assert!(matches!(chunks[2], Chunk::Insert("x")));
691        assert!(matches!(chunks[3], Chunk::Equal("c")));
692    }
693
694    #[test]
695    fn chunks_completely_different() {
696        let chunks = compute_diff_chunks("aaa", "bbb");
697        // No common prefix or suffix
698        assert!(!chunks.is_empty());
699        // All old chars deleted, all new chars inserted
700        let deletes: usize = chunks
701            .iter()
702            .filter(|c| matches!(c, Chunk::Delete(_)))
703            .count();
704        let inserts: usize = chunks
705            .iter()
706            .filter(|c| matches!(c, Chunk::Insert(_)))
707            .count();
708        assert!(deletes > 0 || inserts > 0);
709    }
710
711    #[test]
712    fn chunks_multiline() {
713        let old = "line1\nline2\nline3\n";
714        let new = "line1\nline modified\nline3\n";
715        let chunks = compute_diff_chunks(old, new);
716
717        // Should have at least some Equal chunks for the unchanged lines
718        let has_equal = chunks.iter().any(|c| matches!(c, Chunk::Equal(_)));
719        assert!(has_equal);
720
721        // Should have a delete and insert for the changed line
722        let has_delete = chunks.iter().any(|c| matches!(c, Chunk::Delete(_)));
723        let has_insert = chunks.iter().any(|c| matches!(c, Chunk::Insert(_)));
724        assert!(has_delete || has_insert);
725    }
726
727    #[test]
728    fn chunks_unicode() {
729        let old = "hello \u{00e9}l\u{00e8}ve";
730        let new = "hello \u{00e9}l\u{00e8}ve you";
731        let chunks = compute_diff_chunks(old, new);
732
733        // Common prefix should include unicode chars
734        let prefix = match &chunks[0] {
735            Chunk::Equal(s) => s,
736            _ => panic!("expected Equal prefix"),
737        };
738        assert!(prefix.starts_with("hello "));
739    }
740
741    #[test]
742    fn chunks_append_only() {
743        let old = "a\nb\n";
744        let new = "a\nb\nc\nd\n";
745        let chunks = compute_diff_chunks(old, new);
746        let has_insert = chunks.iter().any(|c| matches!(c, Chunk::Insert(_)));
747        assert!(has_insert);
748    }
749
750    #[test]
751    fn chunks_remove_only() {
752        let old = "a\nb\nc\n";
753        let new = "a\n";
754        let chunks = compute_diff_chunks(old, new);
755        let has_delete = chunks.iter().any(|c| matches!(c, Chunk::Delete(_)));
756        assert!(has_delete);
757    }
758
759    // ── compute_diff ─────────────────────────────────────────────────
760
761    fn identity_formatter(hunks: &[DiffHunk], _opts: &DiffOptions<'_>) -> String {
762        hunks
763            .iter()
764            .flat_map(|h| h.lines.iter().map(|l| l.text.clone()))
765            .collect::<Vec<_>>()
766            .join("")
767    }
768
769    #[test]
770    fn diff_identical_content() {
771        let result = compute_diff(
772            "hello\n",
773            "hello\n",
774            DiffOptions::default(),
775            identity_formatter,
776        );
777        assert!(result.is_empty);
778        assert!(result.hunks.is_empty());
779        assert!(result.formatted.is_empty());
780    }
781
782    #[test]
783    fn diff_empty_both() {
784        let result = compute_diff("", "", DiffOptions::default(), identity_formatter);
785        assert!(result.is_empty);
786        assert!(result.hunks.is_empty());
787    }
788
789    #[test]
790    fn diff_old_empty() {
791        let result = compute_diff(
792            "",
793            "line1\nline2\n",
794            DiffOptions::default(),
795            identity_formatter,
796        );
797        assert!(!result.is_empty);
798        assert!(!result.hunks.is_empty());
799        // All lines should be additions
800        for hunk in &result.hunks {
801            for line in &hunk.lines {
802                assert_eq!(line.kind, DiffLineKind::Addition);
803            }
804        }
805    }
806
807    #[test]
808    fn diff_new_empty() {
809        let result = compute_diff(
810            "line1\nline2\n",
811            "",
812            DiffOptions::default(),
813            identity_formatter,
814        );
815        assert!(!result.is_empty);
816        assert!(!result.hunks.is_empty());
817        for hunk in &result.hunks {
818            for line in &hunk.lines {
819                assert_eq!(line.kind, DiffLineKind::Deletion);
820            }
821        }
822    }
823
824    #[test]
825    fn diff_single_line_change() {
826        let old = "aaa\nbbb\nccc\n";
827        let new = "aaa\nxxx\nccc\n";
828        let result = compute_diff(old, new, DiffOptions::default(), identity_formatter);
829
830        assert!(!result.is_empty);
831        assert_eq!(result.hunks.len(), 1);
832
833        let hunk = &result.hunks[0];
834        // Should have context lines for aaa and ccc, plus the change
835        let kinds: Vec<DiffLineKind> = hunk.lines.iter().map(|l| l.kind).collect();
836        assert!(kinds.contains(&DiffLineKind::Context));
837        assert!(kinds.contains(&DiffLineKind::Deletion));
838        assert!(kinds.contains(&DiffLineKind::Addition));
839    }
840
841    #[test]
842    fn diff_line_numbers() {
843        let old = "line1\nline2\nline3\n";
844        let new = "line1\nline2 modified\nline3\n";
845        let result = compute_diff(old, new, DiffOptions::default(), identity_formatter);
846
847        let hunk = &result.hunks[0];
848        // Context lines should have both old_line and new_line
849        for line in &hunk.lines {
850            if line.kind == DiffLineKind::Context {
851                assert!(line.old_line.is_some());
852                assert!(line.new_line.is_some());
853            }
854        }
855        // Deletion should have old_line but no new_line
856        for line in &hunk.lines {
857            if line.kind == DiffLineKind::Deletion {
858                assert!(line.old_line.is_some());
859                assert!(line.new_line.is_none());
860            }
861        }
862        // Addition should have new_line but no old_line
863        for line in &hunk.lines {
864            if line.kind == DiffLineKind::Addition {
865                assert!(line.old_line.is_none());
866                assert!(line.new_line.is_some());
867            }
868        }
869    }
870
871    #[test]
872    fn diff_context_lines_zero() {
873        let old = "a\nb\nc\nd\ne\n";
874        let new = "a\nb\nX\nd\ne\n";
875        let opts = DiffOptions {
876            context_lines: 0,
877            ..DiffOptions::default()
878        };
879        let result = compute_diff(old, new, opts, identity_formatter);
880
881        assert!(!result.is_empty);
882        // With 0 context, only the changed line and its neighbors should appear
883        let hunk = &result.hunks[0];
884        // Should be minimal: just the deletion and addition
885        let context_count = hunk
886            .lines
887            .iter()
888            .filter(|l| l.kind == DiffLineKind::Context)
889            .count();
890        assert!(context_count <= 2); // At most one context line on each side
891    }
892
893    #[test]
894    fn diff_context_lines_large() {
895        let old = "a\nb\nc\nd\ne\n";
896        let new = "a\nb\nX\nd\ne\n";
897        let opts = DiffOptions {
898            context_lines: 10,
899            ..DiffOptions::default()
900        };
901        let result = compute_diff(old, new, opts, identity_formatter);
902
903        assert!(!result.is_empty);
904        // With 10 context lines and only 6 total lines (trailing newline creates 6th), all lines appear
905        let hunk = &result.hunks[0];
906        assert_eq!(hunk.lines.len(), 6);
907    }
908
909    #[test]
910    fn diff_hunk_metadata() {
911        let old = "aaa\nbbb\nccc\n";
912        let new = "aaa\nxxx\nccc\n";
913        let result = compute_diff(old, new, DiffOptions::default(), identity_formatter);
914
915        let hunk = &result.hunks[0];
916        assert!(hunk.old_start >= 1);
917        assert!(hunk.new_start >= 1);
918        assert!(hunk.old_lines > 0);
919        assert!(hunk.new_lines > 0);
920        assert!(!hunk.lines.is_empty());
921    }
922
923    #[test]
924    fn diff_multiple_hunks() {
925        // Insert in first half and insert in second half with small context => two hunks
926        let old = "a\nb\nc\nd\ne\nf\ng\nh\n";
927        let new = "a\nINSERTED1\nb\nc\nd\ne\nf\ng\nINSERTED2\nh\n";
928        let opts = DiffOptions {
929            context_lines: 1,
930            ..DiffOptions::default()
931        };
932        let result = compute_diff(old, new, opts, identity_formatter);
933
934        assert!(!result.is_empty);
935        assert!(
936            result.hunks.len() >= 2,
937            "expected at least 2 hunks, got {}",
938            result.hunks.len()
939        );
940    }
941
942    #[test]
943    fn diff_formatter_called() {
944        let old = "aaa\n";
945        let new = "bbb\n";
946        let mut called = false;
947        let formatter = |hunks: &[DiffHunk], _opts: &DiffOptions<'_>| -> String {
948            called = true;
949            hunks
950                .iter()
951                .flat_map(|h| h.lines.iter().map(|l| l.text.clone()))
952                .collect::<Vec<_>>()
953                .join("")
954        };
955
956        let result = compute_diff(old, new, DiffOptions::default(), formatter);
957        assert!(called);
958        assert!(!result.formatted.is_empty());
959    }
960
961    #[test]
962    fn diff_formatter_not_called_when_empty() {
963        let mut called = false;
964        let formatter = |_hunks: &[DiffHunk], _opts: &DiffOptions<'_>| -> String {
965            called = true;
966            String::new()
967        };
968
969        let result = compute_diff("same\n", "same\n", DiffOptions::default(), formatter);
970        assert!(!called);
971        assert!(result.formatted.is_empty());
972    }
973
974    #[test]
975    fn diff_options_labels() {
976        let old = "aaa\n";
977        let new = "bbb\n";
978        let opts = DiffOptions {
979            old_label: Some("old.txt"),
980            new_label: Some("new.txt"),
981            ..DiffOptions::default()
982        };
983        let result = compute_diff(old, new, opts, identity_formatter);
984        assert!(!result.is_empty);
985        // Labels are passed to formatter but don't affect hunks
986        assert_eq!(result.hunks.len(), 1);
987    }
988
989    #[test]
990    fn diff_insertion_only() {
991        let old = "line1\nline3\n";
992        let new = "line1\nline2\nline3\n";
993        let result = compute_diff(old, new, DiffOptions::default(), identity_formatter);
994
995        assert!(!result.is_empty);
996        let additions: Vec<&DiffLine> = result
997            .hunks
998            .iter()
999            .flat_map(|h| h.lines.iter())
1000            .filter(|l| l.kind == DiffLineKind::Addition)
1001            .collect();
1002        assert_eq!(additions.len(), 1);
1003        assert_eq!(additions[0].text, "line2\n");
1004    }
1005
1006    #[test]
1007    fn diff_deletion_only() {
1008        let old = "line1\nline2\nline3\n";
1009        let new = "line1\nline3\n";
1010        let result = compute_diff(old, new, DiffOptions::default(), identity_formatter);
1011
1012        assert!(!result.is_empty);
1013        let deletions: Vec<&DiffLine> = result
1014            .hunks
1015            .iter()
1016            .flat_map(|h| h.lines.iter())
1017            .filter(|l| l.kind == DiffLineKind::Deletion)
1018            .collect();
1019        assert_eq!(deletions.len(), 1);
1020        assert_eq!(deletions[0].text, "line2\n");
1021    }
1022
1023    // ── DiffBundle serialization ─────────────────────────────────────
1024
1025    #[test]
1026    fn diff_bundle_serializes() {
1027        let old = "aaa\nbbb\n";
1028        let new = "aaa\nxxx\n";
1029        let result = compute_diff(old, new, DiffOptions::default(), identity_formatter);
1030
1031        let json = serde_json::to_string(&result).unwrap();
1032        assert!(json.contains("hunks"));
1033        assert!(json.contains("formatted"));
1034        assert!(json.contains("is_empty"));
1035    }
1036
1037    #[test]
1038    fn diff_hunk_serializes() {
1039        let hunk = DiffHunk {
1040            old_start: 1,
1041            old_lines: 2,
1042            new_start: 1,
1043            new_lines: 2,
1044            lines: vec![DiffLine {
1045                kind: DiffLineKind::Context,
1046                old_line: Some(1),
1047                new_line: Some(1),
1048                text: "hello\n".to_string(),
1049            }],
1050        };
1051        let json = serde_json::to_string(&hunk).unwrap();
1052        assert!(json.contains("old_start"));
1053        assert!(json.contains("context"));
1054    }
1055
1056    #[test]
1057    fn diff_line_kind_serializes() {
1058        assert_eq!(
1059            serde_json::to_string(&DiffLineKind::Context).unwrap(),
1060            "\"context\""
1061        );
1062        assert_eq!(
1063            serde_json::to_string(&DiffLineKind::Addition).unwrap(),
1064            "\"addition\""
1065        );
1066        assert_eq!(
1067            serde_json::to_string(&DiffLineKind::Deletion).unwrap(),
1068            "\"deletion\""
1069        );
1070    }
1071
1072    // ── Edge cases ───────────────────────────────────────────────────
1073
1074    #[test]
1075    fn chunks_very_long_identical() {
1076        let text = "x".repeat(10_000);
1077        let chunks = compute_diff_chunks(&text, &text);
1078        assert_eq!(chunks.len(), 1);
1079        assert!(matches!(chunks[0], Chunk::Equal(_)));
1080    }
1081
1082    #[test]
1083    fn chunks_single_char_diff() {
1084        let chunks = compute_diff_chunks("a", "b");
1085        assert!(!chunks.is_empty());
1086        let has_delete = chunks.iter().any(|c| matches!(c, Chunk::Delete(_)));
1087        let has_insert = chunks.iter().any(|c| matches!(c, Chunk::Insert(_)));
1088        assert!(has_delete && has_insert);
1089    }
1090
1091    #[test]
1092    fn diff_no_trailing_newline() {
1093        let old = "line1\nline2";
1094        let new = "line1\nline2\n";
1095        let result = compute_diff(old, new, DiffOptions::default(), identity_formatter);
1096        assert!(!result.is_empty);
1097    }
1098
1099    #[test]
1100    fn diff_only_newlines_differ() {
1101        let old = "a\nb\n";
1102        let new = "a\nb";
1103        let result = compute_diff(old, new, DiffOptions::default(), identity_formatter);
1104        assert!(!result.is_empty);
1105    }
1106
1107    #[test]
1108    fn chunks_prefix_suffix_optimization() {
1109        // Verify that common prefix and suffix are preserved as Equal chunks.
1110        // Myers works character-by-character, so the middle diff is char-level.
1111        let old = "AAAA BBBB CCCC";
1112        let new = "AAAA DDDD CCCC";
1113        let chunks = compute_diff_chunks(old, new);
1114
1115        // First chunk should be Equal prefix "AAAA "
1116        assert!(matches!(&chunks[0], Chunk::Equal(s) if *s == "AAAA "));
1117        // Last chunk should be Equal suffix " CCCC"
1118        assert!(matches!(chunks.last().unwrap(), Chunk::Equal(s) if *s == " CCCC"));
1119        // Middle should contain deletes and inserts (character-level)
1120        let has_delete = chunks.iter().any(|c| matches!(c, Chunk::Delete(_)));
1121        let has_insert = chunks.iter().any(|c| matches!(c, Chunk::Insert(_)));
1122        assert!(has_delete, "expected Delete chunks in middle");
1123        assert!(has_insert, "expected Insert chunks in middle");
1124    }
1125}