agcodex_core/
turn_diff_tracker.rs

1use std::collections::HashMap;
2use std::fs;
3use std::path::Path;
4use std::path::PathBuf;
5use std::process::Command;
6
7use crate::error::CodexErr;
8use crate::error::Result;
9use sha1::digest::Output;
10use uuid::Uuid;
11
12use crate::protocol::FileChange;
13
14const ZERO_OID: &str = "0000000000000000000000000000000000000000";
15const DEV_NULL: &str = "/dev/null";
16
17struct BaselineFileInfo {
18    path: PathBuf,
19    content: Vec<u8>,
20    mode: FileMode,
21    oid: String,
22}
23
24/// Tracks sets of changes to files and exposes the overall unified diff.
25/// Internally, the way this works is now:
26/// 1. Maintain an in-memory baseline snapshot of files when they are first seen.
27///    For new additions, do not create a baseline so that diffs are shown as proper additions (using /dev/null).
28/// 2. Keep a stable internal filename (uuid) per external path for rename tracking.
29/// 3. To compute the aggregated unified diff, compare each baseline snapshot to the current file on disk entirely in-memory
30///    using the `similar` crate and emit unified diffs with rewritten external paths.
31#[derive(Default)]
32pub struct TurnDiffTracker {
33    /// Map external path -> internal filename (uuid).
34    external_to_temp_name: HashMap<PathBuf, String>,
35    /// Internal filename -> baseline file info.
36    baseline_file_info: HashMap<String, BaselineFileInfo>,
37    /// Internal filename -> external path as of current accumulated state (after applying all changes).
38    /// This is where renames are tracked.
39    temp_name_to_current_path: HashMap<String, PathBuf>,
40    /// Cache of known git worktree roots to avoid repeated filesystem walks.
41    git_root_cache: Vec<PathBuf>,
42}
43
44impl TurnDiffTracker {
45    pub fn new() -> Self {
46        Self::default()
47    }
48
49    /// Front-run apply patch calls to track the starting contents of any modified files.
50    /// - Creates an in-memory baseline snapshot for files that already exist on disk when first seen.
51    /// - For additions, we intentionally do not create a baseline snapshot so that diffs are proper additions.
52    /// - Also updates internal mappings for move/rename events.
53    pub fn on_patch_begin(&mut self, changes: &HashMap<PathBuf, FileChange>) {
54        for (path, change) in changes.iter() {
55            // Ensure a stable internal filename exists for this external path.
56            if !self.external_to_temp_name.contains_key(path) {
57                let internal = Uuid::new_v4().to_string();
58                self.external_to_temp_name
59                    .insert(path.clone(), internal.clone());
60                self.temp_name_to_current_path
61                    .insert(internal.clone(), path.clone());
62
63                // If the file exists on disk now, snapshot as baseline; else leave missing to represent /dev/null.
64                let baseline_file_info = if path.exists() {
65                    let mode = file_mode_for_path(path);
66                    let mode_val = mode.unwrap_or(FileMode::Regular);
67                    let content = blob_bytes(path, &mode_val).unwrap_or_default();
68                    let oid = if mode == Some(FileMode::Symlink) {
69                        format!("{:x}", git_blob_sha1_hex_bytes(&content))
70                    } else {
71                        self.git_blob_oid_for_path(path)
72                            .unwrap_or_else(|| format!("{:x}", git_blob_sha1_hex_bytes(&content)))
73                    };
74                    Some(BaselineFileInfo {
75                        path: path.clone(),
76                        content,
77                        mode: mode_val,
78                        oid,
79                    })
80                } else {
81                    Some(BaselineFileInfo {
82                        path: path.clone(),
83                        content: vec![],
84                        mode: FileMode::Regular,
85                        oid: ZERO_OID.to_string(),
86                    })
87                };
88
89                if let Some(baseline_file_info) = baseline_file_info {
90                    self.baseline_file_info
91                        .insert(internal.clone(), baseline_file_info);
92                }
93            }
94
95            // Track rename/move in current mapping if provided in an Update.
96            if let FileChange::Update {
97                move_path: Some(dest),
98                ..
99            } = change
100            {
101                let uuid_filename = match self.external_to_temp_name.get(path) {
102                    Some(i) => i.clone(),
103                    None => {
104                        // This should be rare, but if we haven't mapped the source, create it with no baseline.
105                        let i = Uuid::new_v4().to_string();
106                        self.baseline_file_info.insert(
107                            i.clone(),
108                            BaselineFileInfo {
109                                path: path.clone(),
110                                content: vec![],
111                                mode: FileMode::Regular,
112                                oid: ZERO_OID.to_string(),
113                            },
114                        );
115                        i
116                    }
117                };
118                // Update current external mapping for temp file name.
119                self.temp_name_to_current_path
120                    .insert(uuid_filename.clone(), dest.clone());
121                // Update forward file_mapping: external current -> internal name.
122                self.external_to_temp_name.remove(path);
123                self.external_to_temp_name
124                    .insert(dest.clone(), uuid_filename);
125            };
126        }
127    }
128
129    fn get_path_for_internal(&self, internal: &str) -> Option<PathBuf> {
130        self.temp_name_to_current_path
131            .get(internal)
132            .cloned()
133            .or_else(|| {
134                self.baseline_file_info
135                    .get(internal)
136                    .map(|info| info.path.clone())
137            })
138    }
139
140    /// Find the git worktree root for a file/directory by walking up to the first ancestor containing a `.git` entry.
141    /// Uses a simple cache of known roots and avoids negative-result caching for simplicity.
142    fn find_git_root_cached(&mut self, start: &Path) -> Option<PathBuf> {
143        let dir = if start.is_dir() {
144            start
145        } else {
146            start.parent()?
147        };
148
149        // Fast path: if any cached root is an ancestor of this path, use it.
150        if let Some(root) = self
151            .git_root_cache
152            .iter()
153            .find(|r| dir.starts_with(r))
154            .cloned()
155        {
156            return Some(root);
157        }
158
159        // Walk up to find a `.git` marker.
160        let mut cur = dir.to_path_buf();
161        loop {
162            let git_marker = cur.join(".git");
163            if git_marker.is_dir() || git_marker.is_file() {
164                if !self.git_root_cache.iter().any(|r| r == &cur) {
165                    self.git_root_cache.push(cur.clone());
166                }
167                return Some(cur);
168            }
169
170            // On Windows, avoid walking above the drive or UNC share root.
171            #[cfg(windows)]
172            {
173                if is_windows_drive_or_unc_root(&cur) {
174                    return None;
175                }
176            }
177
178            if let Some(parent) = cur.parent() {
179                cur = parent.to_path_buf();
180            } else {
181                return None;
182            }
183        }
184    }
185
186    /// Return a display string for `path` relative to its git root if found, else absolute.
187    fn relative_to_git_root_str(&mut self, path: &Path) -> String {
188        let s = if let Some(root) = self.find_git_root_cached(path) {
189            if let Ok(rel) = path.strip_prefix(&root) {
190                rel.display().to_string()
191            } else {
192                path.display().to_string()
193            }
194        } else {
195            path.display().to_string()
196        };
197        s.replace('\\', "/")
198    }
199
200    /// Ask git to compute the blob SHA-1 for the file at `path` within its repository.
201    /// Returns None if no repository is found or git invocation fails.
202    fn git_blob_oid_for_path(&mut self, path: &Path) -> Option<String> {
203        let root = self.find_git_root_cached(path)?;
204        // Compute a path relative to the repo root for better portability across platforms.
205        let rel = path.strip_prefix(&root).unwrap_or(path);
206        let output = Command::new("git")
207            .arg("-C")
208            .arg(&root)
209            .arg("hash-object")
210            .arg("--")
211            .arg(rel)
212            .output()
213            .ok()?;
214        if !output.status.success() {
215            return None;
216        }
217        let s = String::from_utf8_lossy(&output.stdout).trim().to_string();
218        if s.len() == 40 { Some(s) } else { None }
219    }
220
221    /// Recompute the aggregated unified diff by comparing all of the in-memory snapshots that were
222    /// collected before the first time they were touched by apply_patch during this turn with
223    /// the current repo state.
224    pub fn get_unified_diff(&mut self) -> Result<Option<String>> {
225        let mut aggregated = String::new();
226
227        // Compute diffs per tracked internal file in a stable order by external path.
228        let mut baseline_file_names: Vec<String> =
229            self.baseline_file_info.keys().cloned().collect();
230        // Sort lexicographically by full repo-relative path to match git behavior.
231        baseline_file_names.sort_by_key(|internal| {
232            self.get_path_for_internal(internal)
233                .map(|p| self.relative_to_git_root_str(&p))
234                .unwrap_or_default()
235        });
236
237        for internal in baseline_file_names {
238            aggregated.push_str(self.get_file_diff(&internal).as_str());
239            if !aggregated.ends_with('\n') {
240                aggregated.push('\n');
241            }
242        }
243
244        if aggregated.trim().is_empty() {
245            Ok(None)
246        } else {
247            Ok(Some(aggregated))
248        }
249    }
250
251    fn get_file_diff(&mut self, internal_file_name: &str) -> String {
252        let mut aggregated = String::new();
253
254        // Snapshot lightweight fields only.
255        let (baseline_external_path, baseline_mode, left_oid) = {
256            if let Some(info) = self.baseline_file_info.get(internal_file_name) {
257                (info.path.clone(), info.mode, info.oid.clone())
258            } else {
259                (PathBuf::new(), FileMode::Regular, ZERO_OID.to_string())
260            }
261        };
262        let current_external_path = match self.get_path_for_internal(internal_file_name) {
263            Some(p) => p,
264            None => return aggregated,
265        };
266
267        let current_mode = file_mode_for_path(&current_external_path).unwrap_or(FileMode::Regular);
268        let right_bytes = blob_bytes(&current_external_path, &current_mode);
269
270        // Compute displays with &mut self before borrowing any baseline content.
271        let left_display = self.relative_to_git_root_str(&baseline_external_path);
272        let right_display = self.relative_to_git_root_str(&current_external_path);
273
274        // Compute right oid before borrowing baseline content.
275        let right_oid = if let Some(b) = right_bytes.as_ref() {
276            if current_mode == FileMode::Symlink {
277                format!("{:x}", git_blob_sha1_hex_bytes(b))
278            } else {
279                self.git_blob_oid_for_path(&current_external_path)
280                    .unwrap_or_else(|| format!("{:x}", git_blob_sha1_hex_bytes(b)))
281            }
282        } else {
283            ZERO_OID.to_string()
284        };
285
286        // Borrow baseline content only after all &mut self uses are done.
287        let left_present = left_oid.as_str() != ZERO_OID;
288        let left_bytes: Option<&[u8]> = if left_present {
289            self.baseline_file_info
290                .get(internal_file_name)
291                .map(|i| i.content.as_slice())
292        } else {
293            None
294        };
295
296        // Fast path: identical bytes or both missing.
297        if left_bytes == right_bytes.as_deref() {
298            return aggregated;
299        }
300
301        aggregated.push_str(&format!("diff --git a/{left_display} b/{right_display}\n"));
302
303        let is_add = !left_present && right_bytes.is_some();
304        let is_delete = left_present && right_bytes.is_none();
305
306        if is_add {
307            aggregated.push_str(&format!("new file mode {current_mode}\n"));
308        } else if is_delete {
309            aggregated.push_str(&format!("deleted file mode {baseline_mode}\n"));
310        } else if baseline_mode != current_mode {
311            aggregated.push_str(&format!("old mode {baseline_mode}\n"));
312            aggregated.push_str(&format!("new mode {current_mode}\n"));
313        }
314
315        let left_text = left_bytes.and_then(|b| std::str::from_utf8(b).ok());
316        let right_text = right_bytes
317            .as_deref()
318            .and_then(|b| std::str::from_utf8(b).ok());
319
320        let can_text_diff = matches!(
321            (left_text, right_text, is_add, is_delete),
322            (Some(_), Some(_), _, _) | (_, Some(_), true, _) | (Some(_), _, _, true)
323        );
324
325        if can_text_diff {
326            let l = left_text.unwrap_or("");
327            let r = right_text.unwrap_or("");
328
329            aggregated.push_str(&format!("index {left_oid}..{right_oid}\n"));
330
331            let old_header = if left_present {
332                format!("a/{left_display}")
333            } else {
334                DEV_NULL.to_string()
335            };
336            let new_header = if right_bytes.is_some() {
337                format!("b/{right_display}")
338            } else {
339                DEV_NULL.to_string()
340            };
341
342            let diff = similar::TextDiff::from_lines(l, r);
343            let unified = diff
344                .unified_diff()
345                .context_radius(3)
346                .header(&old_header, &new_header)
347                .to_string();
348
349            aggregated.push_str(&unified);
350        } else {
351            aggregated.push_str(&format!("index {left_oid}..{right_oid}\n"));
352            let old_header = if left_present {
353                format!("a/{left_display}")
354            } else {
355                DEV_NULL.to_string()
356            };
357            let new_header = if right_bytes.is_some() {
358                format!("b/{right_display}")
359            } else {
360                DEV_NULL.to_string()
361            };
362            aggregated.push_str(&format!("--- {old_header}\n"));
363            aggregated.push_str(&format!("+++ {new_header}\n"));
364            aggregated.push_str("Binary files differ\n");
365        }
366        aggregated
367    }
368}
369
370/// Compute the Git SHA-1 blob object ID for the given content (bytes).
371fn git_blob_sha1_hex_bytes(data: &[u8]) -> Output<sha1::Sha1> {
372    // Git blob hash is sha1 of: "blob <len>\0<data>"
373    let header = format!("blob {}\0", data.len());
374    use sha1::Digest;
375    let mut hasher = sha1::Sha1::new();
376    hasher.update(header.as_bytes());
377    hasher.update(data);
378    hasher.finalize()
379}
380
381#[derive(Clone, Copy, Debug, PartialEq, Eq)]
382enum FileMode {
383    Regular,
384    #[cfg(unix)]
385    Executable,
386    Symlink,
387}
388
389impl FileMode {
390    const fn as_str(&self) -> &'static str {
391        match self {
392            FileMode::Regular => "100644",
393            #[cfg(unix)]
394            FileMode::Executable => "100755",
395            FileMode::Symlink => "120000",
396        }
397    }
398}
399
400impl std::fmt::Display for FileMode {
401    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
402        f.write_str(self.as_str())
403    }
404}
405
406#[cfg(unix)]
407fn file_mode_for_path(path: &Path) -> Option<FileMode> {
408    use std::os::unix::fs::PermissionsExt;
409    let meta = fs::symlink_metadata(path).ok()?;
410    let ft = meta.file_type();
411    if ft.is_symlink() {
412        return Some(FileMode::Symlink);
413    }
414    let mode = meta.permissions().mode();
415    let is_exec = (mode & 0o111) != 0;
416    Some(if is_exec {
417        FileMode::Executable
418    } else {
419        FileMode::Regular
420    })
421}
422
423#[cfg(not(unix))]
424fn file_mode_for_path(_path: &Path) -> Option<FileMode> {
425    // Default to non-executable on non-unix.
426    Some(FileMode::Regular)
427}
428
429fn blob_bytes(path: &Path, mode: &FileMode) -> Option<Vec<u8>> {
430    if path.exists() {
431        let contents = if *mode == FileMode::Symlink {
432            symlink_blob_bytes(path).ok_or_else(|| {
433                CodexErr::General(format!(
434                    "failed to read symlink target for {}",
435                    path.display()
436                ))
437            })
438        } else {
439            fs::read(path).map_err(CodexErr::Io)
440        };
441        contents.ok()
442    } else {
443        None
444    }
445}
446
447#[cfg(unix)]
448fn symlink_blob_bytes(path: &Path) -> Option<Vec<u8>> {
449    use std::os::unix::ffi::OsStrExt;
450    let target = std::fs::read_link(path).ok()?;
451    Some(target.as_os_str().as_bytes().to_vec())
452}
453
454#[cfg(not(unix))]
455fn symlink_blob_bytes(_path: &Path) -> Option<Vec<u8>> {
456    None
457}
458
459#[cfg(windows)]
460fn is_windows_drive_or_unc_root(p: &std::path::Path) -> bool {
461    use std::path::Component;
462    let mut comps = p.components();
463    matches!(
464        (comps.next(), comps.next(), comps.next()),
465        (Some(Component::Prefix(_)), Some(Component::RootDir), None)
466    )
467}
468
469#[cfg(test)]
470mod tests {
471    use super::*;
472    use pretty_assertions::assert_eq;
473    use tempfile::tempdir;
474
475    /// Compute the Git SHA-1 blob object ID for the given content (string).
476    /// This delegates to the bytes version to avoid UTF-8 lossy conversions here.
477    fn git_blob_sha1_hex(data: &str) -> String {
478        format!("{:x}", git_blob_sha1_hex_bytes(data.as_bytes()))
479    }
480
481    fn normalize_diff_for_test(input: &str, root: &Path) -> String {
482        let root_str = root.display().to_string().replace('\\', "/");
483        let replaced = input.replace(&root_str, "<TMP>");
484        // Split into blocks on lines starting with "diff --git ", sort blocks for determinism, and rejoin
485        let mut blocks: Vec<String> = Vec::new();
486        let mut current = String::new();
487        for line in replaced.lines() {
488            if line.starts_with("diff --git ") && !current.is_empty() {
489                blocks.push(current);
490                current = String::new();
491            }
492            if !current.is_empty() {
493                current.push('\n');
494            }
495            current.push_str(line);
496        }
497        if !current.is_empty() {
498            blocks.push(current);
499        }
500        blocks.sort();
501        let mut out = blocks.join("\n");
502        if !out.ends_with('\n') {
503            out.push('\n');
504        }
505        out
506    }
507
508    #[test]
509    fn accumulates_add_and_update() {
510        let mut acc = TurnDiffTracker::new();
511
512        let dir = tempdir().unwrap();
513        let file = dir.path().join("a.txt");
514
515        // First patch: add file (baseline should be /dev/null).
516        let add_changes = HashMap::from([(
517            file.clone(),
518            FileChange::Add {
519                content: "foo\n".to_string(),
520            },
521        )]);
522        acc.on_patch_begin(&add_changes);
523
524        // Simulate apply: create the file on disk.
525        fs::write(&file, "foo\n").unwrap();
526        let first = acc.get_unified_diff().unwrap().unwrap();
527        let first = normalize_diff_for_test(&first, dir.path());
528        let expected_first = {
529            let mode = file_mode_for_path(&file).unwrap_or(FileMode::Regular);
530            let right_oid = git_blob_sha1_hex("foo\n");
531            format!(
532                r#"diff --git a/<TMP>/a.txt b/<TMP>/a.txt
533new file mode {mode}
534index {ZERO_OID}..{right_oid}
535--- {DEV_NULL}
536+++ b/<TMP>/a.txt
537@@ -0,0 +1 @@
538+foo
539"#,
540            )
541        };
542        assert_eq!(first, expected_first);
543
544        // Second patch: update the file on disk.
545        let update_changes = HashMap::from([(
546            file.clone(),
547            FileChange::Update {
548                unified_diff: "".to_owned(),
549                move_path: None,
550            },
551        )]);
552        acc.on_patch_begin(&update_changes);
553
554        // Simulate apply: append a new line.
555        fs::write(&file, "foo\nbar\n").unwrap();
556        let combined = acc.get_unified_diff().unwrap().unwrap();
557        let combined = normalize_diff_for_test(&combined, dir.path());
558        let expected_combined = {
559            let mode = file_mode_for_path(&file).unwrap_or(FileMode::Regular);
560            let right_oid = git_blob_sha1_hex("foo\nbar\n");
561            format!(
562                r#"diff --git a/<TMP>/a.txt b/<TMP>/a.txt
563new file mode {mode}
564index {ZERO_OID}..{right_oid}
565--- {DEV_NULL}
566+++ b/<TMP>/a.txt
567@@ -0,0 +1,2 @@
568+foo
569+bar
570"#,
571            )
572        };
573        assert_eq!(combined, expected_combined);
574    }
575
576    #[test]
577    fn accumulates_delete() {
578        let dir = tempdir().unwrap();
579        let file = dir.path().join("b.txt");
580        fs::write(&file, "x\n").unwrap();
581
582        let mut acc = TurnDiffTracker::new();
583        let del_changes = HashMap::from([(file.clone(), FileChange::Delete)]);
584        acc.on_patch_begin(&del_changes);
585
586        // Simulate apply: delete the file from disk.
587        let baseline_mode = file_mode_for_path(&file).unwrap_or(FileMode::Regular);
588        fs::remove_file(&file).unwrap();
589        let diff = acc.get_unified_diff().unwrap().unwrap();
590        let diff = normalize_diff_for_test(&diff, dir.path());
591        let expected = {
592            let left_oid = git_blob_sha1_hex("x\n");
593            format!(
594                r#"diff --git a/<TMP>/b.txt b/<TMP>/b.txt
595deleted file mode {baseline_mode}
596index {left_oid}..{ZERO_OID}
597--- a/<TMP>/b.txt
598+++ {DEV_NULL}
599@@ -1 +0,0 @@
600-x
601"#,
602            )
603        };
604        assert_eq!(diff, expected);
605    }
606
607    #[test]
608    fn accumulates_move_and_update() {
609        let dir = tempdir().unwrap();
610        let src = dir.path().join("src.txt");
611        let dest = dir.path().join("dst.txt");
612        fs::write(&src, "line\n").unwrap();
613
614        let mut acc = TurnDiffTracker::new();
615        let mv_changes = HashMap::from([(
616            src.clone(),
617            FileChange::Update {
618                unified_diff: "".to_owned(),
619                move_path: Some(dest.clone()),
620            },
621        )]);
622        acc.on_patch_begin(&mv_changes);
623
624        // Simulate apply: move and update content.
625        fs::rename(&src, &dest).unwrap();
626        fs::write(&dest, "line2\n").unwrap();
627
628        let out = acc.get_unified_diff().unwrap().unwrap();
629        let out = normalize_diff_for_test(&out, dir.path());
630        let expected = {
631            let left_oid = git_blob_sha1_hex("line\n");
632            let right_oid = git_blob_sha1_hex("line2\n");
633            format!(
634                r#"diff --git a/<TMP>/src.txt b/<TMP>/dst.txt
635index {left_oid}..{right_oid}
636--- a/<TMP>/src.txt
637+++ b/<TMP>/dst.txt
638@@ -1 +1 @@
639-line
640+line2
641"#
642            )
643        };
644        assert_eq!(out, expected);
645    }
646
647    #[test]
648    fn move_without_1change_yields_no_diff() {
649        let dir = tempdir().unwrap();
650        let src = dir.path().join("moved.txt");
651        let dest = dir.path().join("renamed.txt");
652        fs::write(&src, "same\n").unwrap();
653
654        let mut acc = TurnDiffTracker::new();
655        let mv_changes = HashMap::from([(
656            src.clone(),
657            FileChange::Update {
658                unified_diff: "".to_owned(),
659                move_path: Some(dest.clone()),
660            },
661        )]);
662        acc.on_patch_begin(&mv_changes);
663
664        // Simulate apply: move only, no content change.
665        fs::rename(&src, &dest).unwrap();
666
667        let diff = acc.get_unified_diff().unwrap();
668        assert_eq!(diff, None);
669    }
670
671    #[test]
672    fn move_declared_but_file_only_appears_at_dest_is_add() {
673        let dir = tempdir().unwrap();
674        let src = dir.path().join("src.txt");
675        let dest = dir.path().join("dest.txt");
676        let mut acc = TurnDiffTracker::new();
677        let mv = HashMap::from([(
678            src.clone(),
679            FileChange::Update {
680                unified_diff: "".into(),
681                move_path: Some(dest.clone()),
682            },
683        )]);
684        acc.on_patch_begin(&mv);
685        // No file existed initially; create only dest
686        fs::write(&dest, "hello\n").unwrap();
687        let diff = acc.get_unified_diff().unwrap().unwrap();
688        let diff = normalize_diff_for_test(&diff, dir.path());
689        let expected = {
690            let mode = file_mode_for_path(&dest).unwrap_or(FileMode::Regular);
691            let right_oid = git_blob_sha1_hex("hello\n");
692            format!(
693                r#"diff --git a/<TMP>/src.txt b/<TMP>/dest.txt
694new file mode {mode}
695index {ZERO_OID}..{right_oid}
696--- {DEV_NULL}
697+++ b/<TMP>/dest.txt
698@@ -0,0 +1 @@
699+hello
700"#,
701            )
702        };
703        assert_eq!(diff, expected);
704    }
705
706    #[test]
707    fn update_persists_across_new_baseline_for_new_file() {
708        let dir = tempdir().unwrap();
709        let a = dir.path().join("a.txt");
710        let b = dir.path().join("b.txt");
711        fs::write(&a, "foo\n").unwrap();
712        fs::write(&b, "z\n").unwrap();
713
714        let mut acc = TurnDiffTracker::new();
715
716        // First: update existing a.txt (baseline snapshot is created for a).
717        let update_a = HashMap::from([(
718            a.clone(),
719            FileChange::Update {
720                unified_diff: "".to_owned(),
721                move_path: None,
722            },
723        )]);
724        acc.on_patch_begin(&update_a);
725        // Simulate apply: modify a.txt on disk.
726        fs::write(&a, "foo\nbar\n").unwrap();
727        let first = acc.get_unified_diff().unwrap().unwrap();
728        let first = normalize_diff_for_test(&first, dir.path());
729        let expected_first = {
730            let left_oid = git_blob_sha1_hex("foo\n");
731            let right_oid = git_blob_sha1_hex("foo\nbar\n");
732            format!(
733                r#"diff --git a/<TMP>/a.txt b/<TMP>/a.txt
734index {left_oid}..{right_oid}
735--- a/<TMP>/a.txt
736+++ b/<TMP>/a.txt
737@@ -1 +1,2 @@
738 foo
739+bar
740"#
741            )
742        };
743        assert_eq!(first, expected_first);
744
745        // Next: introduce a brand-new path b.txt into baseline snapshots via a delete change.
746        let del_b = HashMap::from([(b.clone(), FileChange::Delete)]);
747        acc.on_patch_begin(&del_b);
748        // Simulate apply: delete b.txt.
749        let baseline_mode = file_mode_for_path(&b).unwrap_or(FileMode::Regular);
750        fs::remove_file(&b).unwrap();
751
752        let combined = acc.get_unified_diff().unwrap().unwrap();
753        let combined = normalize_diff_for_test(&combined, dir.path());
754        let expected = {
755            let left_oid_a = git_blob_sha1_hex("foo\n");
756            let right_oid_a = git_blob_sha1_hex("foo\nbar\n");
757            let left_oid_b = git_blob_sha1_hex("z\n");
758            format!(
759                r#"diff --git a/<TMP>/a.txt b/<TMP>/a.txt
760index {left_oid_a}..{right_oid_a}
761--- a/<TMP>/a.txt
762+++ b/<TMP>/a.txt
763@@ -1 +1,2 @@
764 foo
765+bar
766diff --git a/<TMP>/b.txt b/<TMP>/b.txt
767deleted file mode {baseline_mode}
768index {left_oid_b}..{ZERO_OID}
769--- a/<TMP>/b.txt
770+++ {DEV_NULL}
771@@ -1 +0,0 @@
772-z
773"#,
774            )
775        };
776        assert_eq!(combined, expected);
777    }
778
779    #[test]
780    fn binary_files_differ_update() {
781        let dir = tempdir().unwrap();
782        let file = dir.path().join("bin.dat");
783
784        // Initial non-UTF8 bytes
785        let left_bytes: Vec<u8> = vec![0xff, 0xfe, 0xfd, 0x00];
786        // Updated non-UTF8 bytes
787        let right_bytes: Vec<u8> = vec![0x01, 0x02, 0x03, 0x00];
788
789        fs::write(&file, &left_bytes).unwrap();
790
791        let mut acc = TurnDiffTracker::new();
792        let update_changes = HashMap::from([(
793            file.clone(),
794            FileChange::Update {
795                unified_diff: "".to_owned(),
796                move_path: None,
797            },
798        )]);
799        acc.on_patch_begin(&update_changes);
800
801        // Apply update on disk
802        fs::write(&file, &right_bytes).unwrap();
803
804        let diff = acc.get_unified_diff().unwrap().unwrap();
805        let diff = normalize_diff_for_test(&diff, dir.path());
806        let expected = {
807            let left_oid = format!("{:x}", git_blob_sha1_hex_bytes(&left_bytes));
808            let right_oid = format!("{:x}", git_blob_sha1_hex_bytes(&right_bytes));
809            format!(
810                r#"diff --git a/<TMP>/bin.dat b/<TMP>/bin.dat
811index {left_oid}..{right_oid}
812--- a/<TMP>/bin.dat
813+++ b/<TMP>/bin.dat
814Binary files differ
815"#
816            )
817        };
818        assert_eq!(diff, expected);
819    }
820
821    #[test]
822    fn filenames_with_spaces_add_and_update() {
823        let mut acc = TurnDiffTracker::new();
824
825        let dir = tempdir().unwrap();
826        let file = dir.path().join("name with spaces.txt");
827
828        // First patch: add file (baseline should be /dev/null).
829        let add_changes = HashMap::from([(
830            file.clone(),
831            FileChange::Add {
832                content: "foo\n".to_string(),
833            },
834        )]);
835        acc.on_patch_begin(&add_changes);
836
837        // Simulate apply: create the file on disk.
838        fs::write(&file, "foo\n").unwrap();
839        let first = acc.get_unified_diff().unwrap().unwrap();
840        let first = normalize_diff_for_test(&first, dir.path());
841        let expected_first = {
842            let mode = file_mode_for_path(&file).unwrap_or(FileMode::Regular);
843            let right_oid = git_blob_sha1_hex("foo\n");
844            format!(
845                r#"diff --git a/<TMP>/name with spaces.txt b/<TMP>/name with spaces.txt
846new file mode {mode}
847index {ZERO_OID}..{right_oid}
848--- {DEV_NULL}
849+++ b/<TMP>/name with spaces.txt
850@@ -0,0 +1 @@
851+foo
852"#,
853            )
854        };
855        assert_eq!(first, expected_first);
856
857        // Second patch: update the file on disk.
858        let update_changes = HashMap::from([(
859            file.clone(),
860            FileChange::Update {
861                unified_diff: "".to_owned(),
862                move_path: None,
863            },
864        )]);
865        acc.on_patch_begin(&update_changes);
866
867        // Simulate apply: append a new line with a space.
868        fs::write(&file, "foo\nbar baz\n").unwrap();
869        let combined = acc.get_unified_diff().unwrap().unwrap();
870        let combined = normalize_diff_for_test(&combined, dir.path());
871        let expected_combined = {
872            let mode = file_mode_for_path(&file).unwrap_or(FileMode::Regular);
873            let right_oid = git_blob_sha1_hex("foo\nbar baz\n");
874            format!(
875                r#"diff --git a/<TMP>/name with spaces.txt b/<TMP>/name with spaces.txt
876new file mode {mode}
877index {ZERO_OID}..{right_oid}
878--- {DEV_NULL}
879+++ b/<TMP>/name with spaces.txt
880@@ -0,0 +1,2 @@
881+foo
882+bar baz
883"#,
884            )
885        };
886        assert_eq!(combined, expected_combined);
887    }
888}