Skip to main content

git_internal/
diff.rs

1//! Unified diff generation utilities that compare blobs/trees, map deltas back to line numbers,
2//! and emit Myers-based unified diffs for Git objects while guarding against pathological inputs.
3
4use std::{
5    collections::{HashMap, HashSet, VecDeque},
6    fmt::Write,
7    path::{Path, PathBuf},
8};
9
10use path_absolutize::Absolutize;
11use similar::{Algorithm, ChangeTag, TextDiff};
12
13use crate::hash::ObjectHash;
14
15/// Result item for a single file diff:
16/// - `path`: logical file path
17/// - `data`: unified diff text or a large-file marker
18#[derive(Debug, Clone)]
19pub struct DiffItem {
20    /// The file path being diffed.
21    pub path: String,
22    /// The complete unified diff output string for that file, or a large-file marker if the file is too large to diff.
23    pub data: String,
24}
25
26/// Unified diff generator and helpers.
27pub struct Diff;
28
29/// Diff line operation types primarily used by blame computation to map parent/child lines.
30#[derive(Debug, Clone, PartialEq, Eq)]
31pub enum DiffOperation {
32    Insert { line: usize, content: String },
33    Delete { line: usize },
34    Equal { old_line: usize, new_line: usize },
35}
36
37/// Internal representation of diff lines used while assembling unified hunks.
38#[derive(Debug, Clone, Copy)]
39enum EditLine<'a> {
40    // old_line, new_line, text
41    Context(Option<usize>, Option<usize>, &'a str),
42    // old_line, text
43    Delete(usize, &'a str),
44    // new_line, text
45    Insert(usize, &'a str),
46}
47
48impl Diff {
49    /// Compute Myers line-level operations (equal/insert/delete) for blame/line mapping.
50    fn compute_line_operations(old_lines: &[String], new_lines: &[String]) -> Vec<DiffOperation> {
51        if old_lines.is_empty() && new_lines.is_empty() {
52            return Vec::new();
53        }
54
55        let old_refs: Vec<&str> = old_lines.iter().map(|s| s.as_str()).collect();
56        let new_refs: Vec<&str> = new_lines.iter().map(|s| s.as_str()).collect();
57
58        let diff = TextDiff::configure()
59            .algorithm(Algorithm::Myers)
60            .diff_slices(&old_refs, &new_refs);
61
62        let mut operations = Vec::with_capacity(old_lines.len() + new_lines.len());
63        let mut old_line_no = 1usize;
64        let mut new_line_no = 1usize;
65
66        for change in diff.iter_all_changes() {
67            match change.tag() {
68                ChangeTag::Equal => {
69                    operations.push(DiffOperation::Equal {
70                        old_line: old_line_no,
71                        new_line: new_line_no,
72                    });
73                    old_line_no += 1;
74                    new_line_no += 1;
75                }
76                ChangeTag::Delete => {
77                    operations.push(DiffOperation::Delete { line: old_line_no });
78                    old_line_no += 1;
79                }
80                ChangeTag::Insert => {
81                    operations.push(DiffOperation::Insert {
82                        line: new_line_no,
83                        content: change.value().to_string(),
84                    });
85                    new_line_no += 1;
86                }
87            }
88        }
89
90        operations
91    }
92
93    const MAX_DIFF_LINES: usize = 10_000; // safety cap for pathological inputs
94    const LARGE_FILE_MARKER: &'static str = "<LargeFile>";
95    const LARGE_FILE_END: &'static str = "</LargeFile>";
96    const SHORT_HASH_LEN: usize = 7;
97
98    /// Compute diffs for a set of files, honoring an optional filter and emitting unified diffs.
99    pub fn diff<F>(
100        old_blobs: Vec<(PathBuf, ObjectHash)>,
101        new_blobs: Vec<(PathBuf, ObjectHash)>,
102        filter: Vec<PathBuf>,
103        read_content: F,
104    ) -> Vec<DiffItem>
105    where
106        F: Fn(&PathBuf, &ObjectHash) -> Vec<u8>,
107    {
108        let (processed_files, old_blobs_map, new_blobs_map) =
109            Self::prepare_diff_data(old_blobs, new_blobs, &filter);
110
111        let mut diff_results: Vec<DiffItem> = Vec::with_capacity(processed_files.len());
112        for file in processed_files {
113            // Read bytes once per file to avoid duplicate IO and conversions.
114            let old_hash = old_blobs_map.get(&file);
115            let new_hash = new_blobs_map.get(&file);
116            let old_bytes = old_hash.map_or_else(Vec::new, |h| read_content(&file, h));
117            let new_bytes = new_hash.map_or_else(Vec::new, |h| read_content(&file, h));
118
119            if let Some(large_file_marker) =
120                Self::is_large_file_bytes(&file, &old_bytes, &new_bytes)
121            {
122                diff_results.push(DiffItem {
123                    path: file.to_string_lossy().to_string(),
124                    data: large_file_marker,
125                });
126            } else {
127                let diff = Self::diff_for_file_preloaded(
128                    &file, old_hash, new_hash, &old_bytes, &new_bytes,
129                );
130                diff_results.push(DiffItem {
131                    path: file.to_string_lossy().to_string(),
132                    data: diff,
133                });
134            }
135        }
136
137        diff_results
138    }
139
140    /// Large-file detection without re-reading: counts lines from already-loaded bytes.
141    fn is_large_file_bytes(file: &Path, old_bytes: &[u8], new_bytes: &[u8]) -> Option<String> {
142        let old_lines = String::from_utf8_lossy(old_bytes).lines().count();
143        let new_lines = String::from_utf8_lossy(new_bytes).lines().count();
144        let total_lines = old_lines + new_lines;
145        if total_lines > Self::MAX_DIFF_LINES {
146            Some(format!(
147                "{}{}:{}:{}{}\n",
148                Self::LARGE_FILE_MARKER,
149                file.display(),
150                total_lines,
151                Self::MAX_DIFF_LINES,
152                Self::LARGE_FILE_END
153            ))
154        } else {
155            None
156        }
157    }
158
159    /// Build maps, union file set, and apply filter/path checks.
160    fn prepare_diff_data(
161        old_blobs: Vec<(PathBuf, ObjectHash)>,
162        new_blobs: Vec<(PathBuf, ObjectHash)>,
163        filter: &[PathBuf],
164    ) -> (
165        Vec<PathBuf>,
166        HashMap<PathBuf, ObjectHash>,
167        HashMap<PathBuf, ObjectHash>,
168    ) {
169        let old_blobs_map: HashMap<PathBuf, ObjectHash> = old_blobs.into_iter().collect();
170        let new_blobs_map: HashMap<PathBuf, ObjectHash> = new_blobs.into_iter().collect();
171        // union set
172        let union_files: HashSet<PathBuf> = old_blobs_map
173            .keys()
174            .chain(new_blobs_map.keys())
175            .cloned()
176            .collect();
177
178        // filter files that should be processed
179        let processed_files: Vec<PathBuf> = union_files
180            .into_iter()
181            .filter(|file| Self::should_process(file, filter, &old_blobs_map, &new_blobs_map))
182            .collect();
183
184        (processed_files, old_blobs_map, new_blobs_map)
185    }
186
187    /// Filter by path and hash equality; only process differing or unmatched files.
188    fn should_process(
189        file: &PathBuf,
190        filter: &[PathBuf],
191        old_blobs: &HashMap<PathBuf, ObjectHash>,
192        new_blobs: &HashMap<PathBuf, ObjectHash>,
193    ) -> bool {
194        if !filter.is_empty()
195            && !filter
196                .iter()
197                .any(|path| Self::sub_of(file, path).unwrap_or(false))
198        {
199            return false;
200        }
201
202        old_blobs.get(file) != new_blobs.get(file)
203    }
204
205    /// Check whether `path` is under `parent` (absolutized).
206    fn sub_of(path: &PathBuf, parent: &PathBuf) -> Result<bool, std::io::Error> {
207        let path_abs: PathBuf = path.absolutize()?.to_path_buf();
208        let parent_abs: PathBuf = parent.absolutize()?.to_path_buf();
209        Ok(path_abs.starts_with(parent_abs))
210    }
211
212    /// Shorten hash to 7 chars for diff headers; return zeros if missing.
213    fn short_hash(hash: Option<&ObjectHash>) -> String {
214        hash.map(|h| {
215            let hex = h.to_string();
216            let take = Self::SHORT_HASH_LEN.min(hex.len());
217            hex[..take].to_string()
218        })
219        .unwrap_or_else(|| "0".repeat(Self::SHORT_HASH_LEN))
220    }
221
222    /// Format a single file's unified diff string.
223    pub fn diff_for_file_string(
224        file: &PathBuf,
225        old_blobs: &HashMap<PathBuf, ObjectHash>,
226        new_blobs: &HashMap<PathBuf, ObjectHash>,
227        read_content: &dyn Fn(&PathBuf, &ObjectHash) -> Vec<u8>,
228    ) -> String {
229        let new_hash = new_blobs.get(file);
230        let old_hash = old_blobs.get(file);
231        let old_bytes = old_hash.map_or_else(Vec::new, |h| read_content(file, h));
232        let new_bytes = new_hash.map_or_else(Vec::new, |h| read_content(file, h));
233
234        Self::diff_for_file_preloaded(file, old_hash, new_hash, &old_bytes, &new_bytes)
235    }
236
237    /// Format a single file's unified diff using preloaded bytes to avoid re-reading.
238    fn diff_for_file_preloaded(
239        file: &Path,
240        old_hash: Option<&ObjectHash>,
241        new_hash: Option<&ObjectHash>,
242        old_bytes: &[u8],
243        new_bytes: &[u8],
244    ) -> String {
245        let mut out = String::new();
246
247        // It's safe to ignore the Result when writing into a String; allocation errors panic elsewhere.
248        let _ = writeln!(out, "diff --git a/{} b/{}", file.display(), file.display());
249
250        if old_hash.is_none() {
251            let _ = writeln!(out, "new file mode 100644");
252        } else if new_hash.is_none() {
253            let _ = writeln!(out, "deleted file mode 100644");
254        }
255
256        let old_index = Self::short_hash(old_hash);
257        let new_index = Self::short_hash(new_hash);
258        let _ = writeln!(out, "index {old_index}..{new_index} 100644");
259
260        match (
261            std::str::from_utf8(old_bytes),
262            std::str::from_utf8(new_bytes),
263        ) {
264            (Ok(old_text), Ok(new_text)) => {
265                let (old_pref, new_pref) = if old_text.is_empty() {
266                    ("/dev/null".to_string(), format!("b/{}", file.display()))
267                } else if new_text.is_empty() {
268                    (format!("a/{}", file.display()), "/dev/null".to_string())
269                } else {
270                    (
271                        format!("a/{}", file.display()),
272                        format!("b/{}", file.display()),
273                    )
274                };
275
276                let _ = writeln!(out, "--- {old_pref}");
277                let _ = writeln!(out, "+++ {new_pref}");
278
279                let unified = Self::compute_unified_diff(old_text, new_text, 3);
280                out.push_str(&unified);
281            }
282            _ => {
283                let _ = writeln!(out, "Binary files differ");
284            }
285        }
286
287        out
288    }
289
290    /// Streaming unified diff that minimizes allocations by borrowing lines
291    fn compute_unified_diff(old_text: &str, new_text: &str, context: usize) -> String {
292        // Myers line diff
293        let diff = TextDiff::configure()
294            .algorithm(Algorithm::Myers)
295            .diff_lines(old_text, new_text);
296
297        // Reserve capacity heuristic to reduce allocations
298        let mut out = String::with_capacity(((old_text.len() + new_text.len()) / 16).max(4096));
299
300        // Rolling prefix context (last `context` equal lines when outside a hunk)
301        let mut prefix_ctx: VecDeque<EditLine> = VecDeque::with_capacity(context);
302        let mut cur_hunk: Vec<EditLine> = Vec::new();
303        let mut eq_run: Vec<EditLine> = Vec::new(); // accumulating equal lines while in hunk
304        let mut in_hunk = false;
305
306        let mut last_old_seen = 0usize;
307        let mut last_new_seen = 0usize;
308        let mut old_line_no = 1usize;
309        let mut new_line_no = 1usize;
310
311        for change in diff.iter_all_changes() {
312            let line = change.value().trim_end_matches(['\r', '\n']);
313            match change.tag() {
314                ChangeTag::Equal => {
315                    let entry = EditLine::Context(Some(old_line_no), Some(new_line_no), line);
316                    old_line_no += 1;
317                    new_line_no += 1;
318                    if in_hunk {
319                        eq_run.push(entry);
320                        // Flush once trailing equal lines exceed 2*context
321                        if eq_run.len() > context * 2 {
322                            Self::flush_hunk_to_out(
323                                &mut out,
324                                &mut cur_hunk,
325                                &mut eq_run,
326                                &mut prefix_ctx,
327                                context,
328                                &mut last_old_seen,
329                                &mut last_new_seen,
330                            );
331                            in_hunk = false;
332                        }
333                    } else {
334                        if prefix_ctx.len() == context {
335                            prefix_ctx.pop_front();
336                        }
337                        prefix_ctx.push_back(entry);
338                    }
339                }
340                ChangeTag::Delete => {
341                    let entry = EditLine::Delete(old_line_no, line);
342                    old_line_no += 1;
343                    if !in_hunk {
344                        cur_hunk.extend(prefix_ctx.iter().copied());
345                        prefix_ctx.clear();
346                        in_hunk = true;
347                    }
348                    if !eq_run.is_empty() {
349                        cur_hunk.append(&mut eq_run);
350                    }
351                    cur_hunk.push(entry);
352                }
353                ChangeTag::Insert => {
354                    let entry = EditLine::Insert(new_line_no, line);
355                    new_line_no += 1;
356                    if !in_hunk {
357                        cur_hunk.extend(prefix_ctx.iter().copied());
358                        prefix_ctx.clear();
359                        in_hunk = true;
360                    }
361                    if !eq_run.is_empty() {
362                        cur_hunk.append(&mut eq_run);
363                    }
364                    cur_hunk.push(entry);
365                }
366            }
367        }
368
369        if in_hunk {
370            Self::flush_hunk_to_out(
371                &mut out,
372                &mut cur_hunk,
373                &mut eq_run,
374                &mut prefix_ctx,
375                context,
376                &mut last_old_seen,
377                &mut last_new_seen,
378            );
379        }
380
381        out
382    }
383
384    // Flush the current hunk into the output; trailing context is in `eq_run`
385    fn flush_hunk_to_out<'a>(
386        out: &mut String,
387        cur_hunk: &mut Vec<EditLine<'a>>,
388        eq_run: &mut Vec<EditLine<'a>>,
389        prefix_ctx: &mut VecDeque<EditLine<'a>>,
390        context: usize,
391        last_old_seen: &mut usize,
392        last_new_seen: &mut usize,
393    ) {
394        // 1. Append up to `context` trailing equal lines to the current hunk.
395        let trail_to_take = eq_run.len().min(context);
396        for entry in eq_run.iter().take(trail_to_take) {
397            cur_hunk.push(*entry);
398        }
399
400        // 2. Compute header numbers (line ranges/counts) by scanning the hunk.
401        let mut old_first: Option<usize> = None;
402        let mut old_count: usize = 0;
403        let mut new_first: Option<usize> = None;
404        let mut new_count: usize = 0;
405
406        for e in cur_hunk.iter() {
407            match *e {
408                EditLine::Context(o, n, _) => {
409                    if let Some(o) = o {
410                        if old_first.is_none() {
411                            old_first = Some(o);
412                        }
413                        old_count += 1;
414                    }
415                    if let Some(n) = n {
416                        if new_first.is_none() {
417                            new_first = Some(n);
418                        }
419                        new_count += 1;
420                    }
421                }
422                EditLine::Delete(o, _) => {
423                    if old_first.is_none() {
424                        old_first = Some(o);
425                    }
426                    old_count += 1;
427                }
428                EditLine::Insert(n, _) => {
429                    if new_first.is_none() {
430                        new_first = Some(n);
431                    }
432                    new_count += 1;
433                }
434            }
435        }
436
437        if old_count == 0 && new_count == 0 {
438            cur_hunk.clear();
439            eq_run.clear();
440            return;
441        }
442
443        let old_start = old_first.unwrap_or(*last_old_seen + 1);
444        let new_start = new_first.unwrap_or(*last_new_seen + 1);
445
446        let _ = writeln!(
447            out,
448            "@@ -{old_start},{old_count} +{new_start},{new_count} @@"
449        );
450
451        // 3. Output the hunk according to Myers change order
452        for &e in cur_hunk.iter() {
453            match e {
454                EditLine::Context(o, n, txt) => {
455                    let _ = writeln!(out, " {txt}");
456                    if let Some(o) = o {
457                        *last_old_seen = (*last_old_seen).max(o);
458                    }
459                    if let Some(n) = n {
460                        *last_new_seen = (*last_new_seen).max(n);
461                    }
462                }
463                EditLine::Delete(o, txt) => {
464                    let _ = writeln!(out, "-{txt}");
465                    *last_old_seen = (*last_old_seen).max(o);
466                }
467                EditLine::Insert(n, txt) => {
468                    let _ = writeln!(out, "+{txt}");
469                    *last_new_seen = (*last_new_seen).max(n);
470                }
471            }
472        }
473
474        // 4. Preserve last `context` equal lines from eq_run for prefix of next hunk.
475        prefix_ctx.clear();
476        if context > 0 {
477            let keep_start = eq_run.len().saturating_sub(context);
478            for entry in eq_run.iter().skip(keep_start) {
479                prefix_ctx.push_back(*entry);
480            }
481        }
482
483        cur_hunk.clear();
484        eq_run.clear();
485    }
486}
487
488/// Compute Myers diff operations for blame/line-mapping scenarios.
489pub fn compute_diff(old_lines: &[String], new_lines: &[String]) -> Vec<DiffOperation> {
490    Diff::compute_line_operations(old_lines, new_lines)
491}
492
493#[cfg(test)]
494mod tests {
495    use std::{collections::HashMap, fs, path::PathBuf, process::Command};
496
497    use tempfile::tempdir;
498
499    use super::{Diff, DiffOperation, compute_diff};
500    use crate::hash::{HashKind, ObjectHash, set_hash_kind_for_test};
501
502    /// Helper: run our diff on in-memory blobs and return diff text plus their hashes.
503    fn run_diff(
504        logical_path: &str,
505        old_bytes: &[u8],
506        new_bytes: &[u8],
507    ) -> (String, ObjectHash, ObjectHash) {
508        let file = PathBuf::from(logical_path);
509        let old_hash = ObjectHash::new(old_bytes);
510        let new_hash = ObjectHash::new(new_bytes);
511
512        let mut blob_store: HashMap<ObjectHash, Vec<u8>> = HashMap::new();
513        blob_store.insert(old_hash, old_bytes.to_vec());
514        blob_store.insert(new_hash, new_bytes.to_vec());
515
516        let mut old_map = HashMap::new();
517        let mut new_map = HashMap::new();
518        old_map.insert(file.clone(), old_hash);
519        new_map.insert(file.clone(), new_hash);
520
521        let reader = |_: &PathBuf, h: &ObjectHash| -> Vec<u8> {
522            blob_store.get(h).cloned().unwrap_or_default()
523        };
524
525        let diff = Diff::diff_for_file_string(&file, &old_map, &new_map, &reader);
526        (diff, old_hash, new_hash)
527    }
528
529    /// Helper: shorten hash to 7 chars for diff header normalization.
530    fn short_hash(hash: &ObjectHash) -> String {
531        hash.to_string().chars().take(7).collect()
532    }
533
534    /// Helper: run `git diff --no-index` on temp files and normalize headers for comparison.
535    fn normalized_git_diff(
536        logical_path: &str,
537        old_bytes: &[u8],
538        new_bytes: &[u8],
539        old_hash: &ObjectHash,
540        new_hash: &ObjectHash,
541    ) -> Option<String> {
542        let temp_dir = tempdir().ok()?;
543        let old_file = temp_dir.path().join("old.txt");
544        let new_file = temp_dir.path().join("new.txt");
545
546        fs::write(&old_file, old_bytes).ok()?;
547        fs::write(&new_file, new_bytes).ok()?;
548
549        let output = Command::new("git")
550            .current_dir(temp_dir.path())
551            .args(["diff", "--no-index", "--unified=3", "old.txt", "new.txt"])
552            .output()
553            .ok()?;
554
555        let stdout = String::from_utf8_lossy(&output.stdout);
556        if stdout.is_empty() {
557            return None;
558        }
559
560        let short_old = short_hash(old_hash);
561        let short_new = short_hash(new_hash);
562
563        let mut normalized = Vec::new();
564        for line in stdout.lines() {
565            let rewritten = if line.starts_with("diff --git ") {
566                format!("diff --git a/{logical_path} b/{logical_path}")
567            } else if line.starts_with("index ") {
568                format!("index {short_old}..{short_new} 100644")
569            } else if line.starts_with("--- ") {
570                format!("--- a/{logical_path}")
571            } else if line.starts_with("+++ ") {
572                format!("+++ b/{logical_path}")
573            } else if line.starts_with("@@") {
574                match line.rfind("@@") {
575                    Some(pos) if pos + 2 <= line.len() => line[..pos + 2].to_string(),
576                    _ => line.to_string(),
577                }
578            } else {
579                line.to_string()
580            };
581            normalized.push(rewritten);
582        }
583
584        Some(normalized.join("\n") + "\n")
585    }
586
587    /// Basic text diff should include headers and expected +/- markers.
588    #[test]
589    fn unified_diff_basic_changes() {
590        let _guard = set_hash_kind_for_test(HashKind::Sha256);
591        let old = b"a\nb\nc\n" as &[u8];
592        let new = b"a\nB\nc\nd\n" as &[u8];
593        let (diff, _, _) = run_diff("foo.txt", old, new);
594
595        assert!(diff.contains("diff --git a/foo.txt b/foo.txt"));
596        assert!(diff.contains("index "));
597        assert!(diff.contains("--- a/foo.txt"));
598        assert!(diff.contains("+++ b/foo.txt"));
599        assert!(diff.contains("@@"));
600        assert!(diff.contains("-b"));
601        assert!(diff.contains("+B"));
602        assert!(diff.contains("+d"));
603    }
604
605    /// Non-text inputs should yield a binary files notice.
606    #[test]
607    fn binary_files_detection() {
608        let _guard = set_hash_kind_for_test(HashKind::Sha256);
609        let old_bytes = vec![0u8, 159, 146, 150];
610        let new_bytes = vec![0xFF, 0x00, 0x01];
611        let (diff, _, _) = run_diff("bin.dat", &old_bytes, &new_bytes);
612        assert!(diff.contains("Binary files differ"));
613    }
614
615    /// Fixture diff should match git's inserted/deleted lines.
616    #[test]
617    fn diff_matches_git_for_fixture() {
618        let _guard = set_hash_kind_for_test(HashKind::Sha256); //use it to test SHA1/SHA-256 diffs as well
619        let base: PathBuf = [env!("CARGO_MANIFEST_DIR"), "tests", "diff"]
620            .iter()
621            .collect();
622        let old_bytes = fs::read(base.join("old.txt")).expect("read old.txt");
623        let new_bytes = fs::read(base.join("new.txt")).expect("read new.txt");
624
625        let (diff_output, old_hash, new_hash) = run_diff("fixture.txt", &old_bytes, &new_bytes);
626        let git_output =
627            normalized_git_diff("fixture.txt", &old_bytes, &new_bytes, &old_hash, &new_hash)
628                .expect("git diff output");
629
630        fn collect(s: &str, prefix: char) -> Vec<String> {
631            s.lines()
632                .filter(|l| l.starts_with(prefix))
633                .map(|l| l.to_string())
634                .collect()
635        }
636        let ours_del = collect(&diff_output, '-');
637        let ours_ins = collect(&diff_output, '+');
638        let git_del = collect(&git_output, '-');
639        let git_ins = collect(&git_output, '+');
640
641        use std::collections::HashSet;
642        let ours_del_set: HashSet<_> = ours_del.iter().collect();
643        let git_del_set: HashSet<_> = git_del.iter().collect();
644        let ours_ins_set: HashSet<_> = ours_ins.iter().collect();
645        let git_ins_set: HashSet<_> = git_ins.iter().collect();
646
647        assert_eq!(
648            ours_del_set, git_del_set,
649            "deleted lines differ from git output"
650        );
651        assert_eq!(
652            ours_ins_set, git_ins_set,
653            "inserted lines differ from git output"
654        );
655    }
656
657    /// Large input should still match git's inserted/deleted sets.
658    #[test]
659    fn diff_matches_git_for_large_change() {
660        let _guard = set_hash_kind_for_test(HashKind::Sha256);
661        let old_lines: Vec<String> = (0..5_000).map(|i| format!("line {i}")).collect();
662        let mut new_lines = old_lines.clone();
663        for idx in [10, 499, 1_234, 3_210, 4_999] {
664            new_lines[idx] = format!("updated line {idx}");
665        }
666        new_lines.insert(2_500, "inserted middle line".into());
667        new_lines.push("new tail line".into());
668
669        let old_text = old_lines.join("\n") + "\n";
670        let new_text = new_lines.join("\n") + "\n";
671
672        let (diff_output, old_hash, new_hash) = run_diff(
673            "large_fixture.txt",
674            old_text.as_bytes(),
675            new_text.as_bytes(),
676        );
677        let git_output = normalized_git_diff(
678            "large_fixture.txt",
679            old_text.as_bytes(),
680            new_text.as_bytes(),
681            &old_hash,
682            &new_hash,
683        )
684        .expect("git diff output");
685
686        fn collect(s: &str, prefix: char) -> Vec<String> {
687            s.lines()
688                .filter(|l| l.starts_with(prefix))
689                .map(|l| l.to_string())
690                .collect()
691        }
692        use std::collections::HashSet;
693        let ours_del: HashSet<_> = collect(&diff_output, '-').into_iter().collect();
694        let ours_ins: HashSet<_> = collect(&diff_output, '+').into_iter().collect();
695        let git_del: HashSet<_> = collect(&git_output, '-').into_iter().collect();
696        let git_ins: HashSet<_> = collect(&git_output, '+').into_iter().collect();
697        assert_eq!(ours_del, git_del, "deleted lines differ from git output");
698        assert_eq!(ours_ins, git_ins, "inserted lines differ from git output");
699    }
700
701    /// Line mapping operations should match expected Equal/Delete/Insert sequence.
702    #[test]
703    fn compute_diff_operations_basic_mapping() {
704        let _guard = set_hash_kind_for_test(HashKind::Sha256);
705        let old_lines = vec!["a".to_string(), "b".to_string(), "c".to_string()];
706        let new_lines = vec![
707            "a".to_string(),
708            "B".to_string(),
709            "c".to_string(),
710            "d".to_string(),
711        ];
712
713        let ops = compute_diff(&old_lines, &new_lines);
714
715        let expected = vec![
716            DiffOperation::Equal {
717                old_line: 1,
718                new_line: 1,
719            },
720            DiffOperation::Delete { line: 2 },
721            DiffOperation::Insert {
722                line: 2,
723                content: "B".to_string(),
724            },
725            DiffOperation::Equal {
726                old_line: 3,
727                new_line: 3,
728            },
729            DiffOperation::Insert {
730                line: 4,
731                content: "d".to_string(),
732            },
733        ];
734
735        assert_eq!(ops, expected);
736    }
737}