Skip to main content

wisp/
git_diff.rs

1use std::fmt;
2use std::path::{Path, PathBuf};
3
4#[allow(dead_code)]
5pub struct GitDiffDocument {
6    pub repo_root: PathBuf,
7    pub files: Vec<FileDiff>,
8}
9
10pub struct FileDiff {
11    pub old_path: Option<String>,
12    pub path: String,
13    pub status: FileStatus,
14    pub hunks: Vec<Hunk>,
15    pub binary: bool,
16}
17
18#[derive(Debug, Clone, Copy, PartialEq, Eq)]
19pub enum FileStatus {
20    Modified,
21    Added,
22    Deleted,
23    Renamed,
24}
25
26#[allow(dead_code)]
27pub struct Hunk {
28    pub header: String,
29    pub old_start: usize,
30    pub old_count: usize,
31    pub new_start: usize,
32    pub new_count: usize,
33    pub lines: Vec<PatchLine>,
34}
35
36pub struct PatchLine {
37    pub kind: PatchLineKind,
38    pub text: String,
39    pub old_line_no: Option<usize>,
40    pub new_line_no: Option<usize>,
41}
42
43#[derive(Debug, Clone, Copy, PartialEq, Eq)]
44pub enum PatchLineKind {
45    HunkHeader,
46    Context,
47    Added,
48    Removed,
49    Meta,
50}
51
52#[derive(Debug)]
53pub enum GitDiffError {
54    NotARepository,
55    CommandFailed { stderr: String },
56    ParseError(String),
57}
58
59impl fmt::Display for GitDiffError {
60    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
61        match self {
62            Self::NotARepository => write!(f, "Not a git repository"),
63            Self::CommandFailed { stderr } => write!(f, "Git command failed: {stderr}"),
64            Self::ParseError(msg) => write!(f, "Failed to parse diff: {msg}"),
65        }
66    }
67}
68
69impl std::error::Error for GitDiffError {}
70
71impl FileStatus {
72    pub fn marker(self) -> char {
73        match self {
74            Self::Modified => 'M',
75            Self::Added => 'A',
76            Self::Deleted => 'D',
77            Self::Renamed => 'R',
78        }
79    }
80}
81
82impl FileDiff {
83    pub fn additions(&self) -> usize {
84        self.hunks
85            .iter()
86            .flat_map(|hunk| &hunk.lines)
87            .filter(|line| line.kind == PatchLineKind::Added)
88            .count()
89    }
90
91    pub fn deletions(&self) -> usize {
92        self.hunks
93            .iter()
94            .flat_map(|hunk| &hunk.lines)
95            .filter(|line| line.kind == PatchLineKind::Removed)
96            .count()
97    }
98}
99
100pub(crate) async fn load_git_diff(
101    working_dir: &Path,
102    cached_repo_root: Option<&Path>,
103) -> Result<GitDiffDocument, GitDiffError> {
104    let repo_root = match cached_repo_root {
105        Some(root) => root.to_path_buf(),
106        None => resolve_repo_root(working_dir).await?,
107    };
108    let diff_output = run_git_diff(&repo_root).await?;
109
110    if diff_output.trim().is_empty() {
111        return Ok(GitDiffDocument {
112            repo_root,
113            files: Vec::new(),
114        });
115    }
116
117    let files = parse_unified_diff(&diff_output)?;
118    Ok(GitDiffDocument { repo_root, files })
119}
120
121async fn resolve_repo_root(working_dir: &Path) -> Result<PathBuf, GitDiffError> {
122    let output = tokio::process::Command::new("git")
123        .arg("rev-parse")
124        .arg("--show-toplevel")
125        .current_dir(working_dir)
126        .output()
127        .await
128        .map_err(|e| GitDiffError::CommandFailed {
129            stderr: e.to_string(),
130        })?;
131
132    if !output.status.success() {
133        let stderr = String::from_utf8_lossy(&output.stderr);
134        if stderr.contains("not a git repository") {
135            return Err(GitDiffError::NotARepository);
136        }
137        return Err(GitDiffError::CommandFailed {
138            stderr: stderr.into_owned(),
139        });
140    }
141
142    let root = String::from_utf8_lossy(&output.stdout).trim().to_string();
143    Ok(PathBuf::from(root))
144}
145
146async fn run_git_diff(repo_root: &Path) -> Result<String, GitDiffError> {
147    let output = tokio::process::Command::new("git")
148        .args([
149            "diff",
150            "--no-ext-diff",
151            "--find-renames",
152            "--unified=3",
153            "HEAD",
154        ])
155        .current_dir(repo_root)
156        .output()
157        .await
158        .map_err(|e| GitDiffError::CommandFailed {
159            stderr: e.to_string(),
160        })?;
161
162    if !output.status.success() {
163        let stderr = String::from_utf8_lossy(&output.stderr);
164        return Err(GitDiffError::CommandFailed {
165            stderr: stderr.into_owned(),
166        });
167    }
168
169    Ok(String::from_utf8_lossy(&output.stdout).into_owned())
170}
171
172pub(crate) fn parse_unified_diff(input: &str) -> Result<Vec<FileDiff>, GitDiffError> {
173    split_diff_files(input)
174        .into_iter()
175        .map(parse_file_diff)
176        .collect()
177}
178
179fn split_diff_files(input: &str) -> Vec<&str> {
180    let mut chunks = Vec::new();
181    let mut start = None;
182    let mut line_start = 0;
183
184    while line_start < input.len() {
185        let line_end = input[line_start..]
186            .find('\n')
187            .map(|idx| line_start + idx + 1)
188            .unwrap_or(input.len());
189        let line = &input[line_start..line_end];
190
191        if line.starts_with("diff --git ") {
192            if let Some(s) = start {
193                chunks.push(&input[s..line_start]);
194            }
195            start = Some(line_start);
196        }
197
198        line_start = line_end;
199    }
200
201    if let Some(s) = start {
202        chunks.push(&input[s..]);
203    }
204
205    chunks
206}
207
208fn parse_file_diff(chunk: &str) -> Result<FileDiff, GitDiffError> {
209    let lines: Vec<&str> = chunk.lines().collect();
210    if lines.is_empty() {
211        return Err(GitDiffError::ParseError("Empty diff chunk".to_string()));
212    }
213
214    let (old_path, new_path) = parse_diff_header(lines[0])?;
215    let (status, binary, rename_from, hunk_start) = scan_file_metadata(&lines);
216    let hunks = if binary {
217        Vec::new()
218    } else {
219        parse_file_hunks(&lines[hunk_start..])?
220    };
221
222    Ok(FileDiff {
223        old_path: resolve_old_path(status, rename_from, old_path),
224        path: new_path,
225        status,
226        hunks,
227        binary,
228    })
229}
230
231fn scan_file_metadata(lines: &[&str]) -> (FileStatus, bool, Option<String>, usize) {
232    let mut status = FileStatus::Modified;
233    let mut binary = false;
234    let mut rename_from = None;
235    let mut i = 1;
236
237    while i < lines.len() {
238        let line = lines[i];
239        if line.starts_with("new file mode") {
240            status = FileStatus::Added;
241        } else if line.starts_with("deleted file mode") {
242            status = FileStatus::Deleted;
243        } else if let Some(from) = line.strip_prefix("rename from ") {
244            status = FileStatus::Renamed;
245            rename_from = Some(from.to_string());
246        } else if line.starts_with("rename to ") {
247            status = FileStatus::Renamed;
248        } else if line.starts_with("Binary files ") {
249            binary = true;
250        } else if line.starts_with("@@") {
251            break;
252        }
253        i += 1;
254    }
255
256    (status, binary, rename_from, i)
257}
258
259fn parse_file_hunks(lines: &[&str]) -> Result<Vec<Hunk>, GitDiffError> {
260    let mut hunks = Vec::new();
261    let mut i = 0;
262
263    while i < lines.len() {
264        if lines[i].starts_with("@@") {
265            let (hunk, consumed) = parse_hunk(&lines[i..])?;
266            hunks.push(hunk);
267            i += consumed;
268        } else {
269            i += 1;
270        }
271    }
272
273    Ok(hunks)
274}
275
276fn resolve_old_path(
277    status: FileStatus,
278    rename_from: Option<String>,
279    old_path: String,
280) -> Option<String> {
281    if status == FileStatus::Added {
282        None
283    } else if status == FileStatus::Renamed {
284        rename_from.or(Some(old_path))
285    } else {
286        Some(old_path)
287    }
288}
289
290fn parse_diff_header(line: &str) -> Result<(String, String), GitDiffError> {
291    let rest = line
292        .strip_prefix("diff --git ")
293        .ok_or_else(|| GitDiffError::ParseError(format!("Invalid diff header: {line}")))?;
294
295    if let Some((a, b)) = rest.split_once(" b/") {
296        let old = a.strip_prefix("a/").unwrap_or(a).to_string();
297        let new = b.to_string();
298        Ok((old, new))
299    } else {
300        Err(GitDiffError::ParseError(format!(
301            "Cannot parse paths from: {line}"
302        )))
303    }
304}
305
306fn parse_hunk(lines: &[&str]) -> Result<(Hunk, usize), GitDiffError> {
307    let header = lines[0];
308    let (old_start, old_count, new_start, new_count) = parse_hunk_header(header)?;
309
310    let mut patch_lines = Vec::new();
311    patch_lines.push(PatchLine {
312        kind: PatchLineKind::HunkHeader,
313        text: header.to_string(),
314        old_line_no: None,
315        new_line_no: None,
316    });
317
318    let mut old_line = old_start;
319    let mut new_line = new_start;
320    let mut i = 1;
321
322    while i < lines.len() {
323        let line = lines[i];
324        if line.starts_with("@@") {
325            break;
326        }
327
328        if let Some(text) = line.strip_prefix('+') {
329            patch_lines.push(PatchLine {
330                kind: PatchLineKind::Added,
331                text: text.to_string(),
332                old_line_no: None,
333                new_line_no: Some(new_line),
334            });
335            new_line += 1;
336        } else if let Some(text) = line.strip_prefix('-') {
337            patch_lines.push(PatchLine {
338                kind: PatchLineKind::Removed,
339                text: text.to_string(),
340                old_line_no: Some(old_line),
341                new_line_no: None,
342            });
343            old_line += 1;
344        } else if let Some(text) = line.strip_prefix(' ') {
345            patch_lines.push(PatchLine {
346                kind: PatchLineKind::Context,
347                text: text.to_string(),
348                old_line_no: Some(old_line),
349                new_line_no: Some(new_line),
350            });
351            old_line += 1;
352            new_line += 1;
353        } else if line.starts_with('\\') {
354            patch_lines.push(PatchLine {
355                kind: PatchLineKind::Meta,
356                text: line.to_string(),
357                old_line_no: None,
358                new_line_no: None,
359            });
360        } else {
361            // Treat as context (git sometimes omits the leading space for empty lines)
362            patch_lines.push(PatchLine {
363                kind: PatchLineKind::Context,
364                text: line.to_string(),
365                old_line_no: Some(old_line),
366                new_line_no: Some(new_line),
367            });
368            old_line += 1;
369            new_line += 1;
370        }
371        i += 1;
372    }
373
374    Ok((
375        Hunk {
376            header: header.to_string(),
377            old_start,
378            old_count,
379            new_start,
380            new_count,
381            lines: patch_lines,
382        },
383        i,
384    ))
385}
386
387fn parse_hunk_header(header: &str) -> Result<(usize, usize, usize, usize), GitDiffError> {
388    // Format: @@ -old_start,old_count +new_start,new_count @@
389    let err = || GitDiffError::ParseError(format!("Invalid hunk header: {header}"));
390
391    let rest = header.strip_prefix("@@ -").ok_or_else(err)?;
392    let at_end = rest.find(" @@").ok_or_else(err)?;
393    let range_part = &rest[..at_end];
394
395    let (old_range, new_range) = range_part.split_once(" +").ok_or_else(err)?;
396
397    let (old_start, old_count) = parse_range(old_range).ok_or_else(err)?;
398    let (new_start, new_count) = parse_range(new_range).ok_or_else(err)?;
399
400    Ok((old_start, old_count, new_start, new_count))
401}
402
403fn parse_range(s: &str) -> Option<(usize, usize)> {
404    if let Some((start, count)) = s.split_once(',') {
405        Some((start.parse().ok()?, count.parse().ok()?))
406    } else {
407        let start: usize = s.parse().ok()?;
408        Some((start, 1))
409    }
410}
411
412#[cfg(test)]
413mod tests {
414    use super::*;
415
416    #[test]
417    fn parse_modified_file() {
418        let input = "\
419diff --git a/src/main.rs b/src/main.rs
420index abc1234..def5678 100644
421--- a/src/main.rs
422+++ b/src/main.rs
423@@ -1,3 +1,4 @@
424 fn main() {
425+    println!(\"hello\");
426     let x = 1;
427 }
428";
429        let files = parse_unified_diff(input).unwrap();
430        assert_eq!(files.len(), 1);
431        assert_eq!(files[0].path, "src/main.rs");
432        assert_eq!(files[0].status, FileStatus::Modified);
433        assert_eq!(files[0].additions(), 1);
434        assert_eq!(files[0].deletions(), 0);
435        assert!(!files[0].binary);
436        assert_eq!(files[0].hunks.len(), 1);
437    }
438
439    #[test]
440    fn parse_added_file() {
441        let input = "\
442diff --git a/new_file.txt b/new_file.txt
443new file mode 100644
444index 0000000..abc1234
445--- /dev/null
446+++ b/new_file.txt
447@@ -0,0 +1,2 @@
448+line one
449+line two
450";
451        let files = parse_unified_diff(input).unwrap();
452        assert_eq!(files.len(), 1);
453        assert_eq!(files[0].path, "new_file.txt");
454        assert_eq!(files[0].status, FileStatus::Added);
455        assert!(files[0].old_path.is_none());
456        assert_eq!(files[0].additions(), 2);
457        assert_eq!(files[0].deletions(), 0);
458    }
459
460    #[test]
461    fn parse_deleted_file() {
462        let input = "\
463diff --git a/old_file.txt b/old_file.txt
464deleted file mode 100644
465index abc1234..0000000
466--- a/old_file.txt
467+++ /dev/null
468@@ -1,2 +0,0 @@
469-line one
470-line two
471";
472        let files = parse_unified_diff(input).unwrap();
473        assert_eq!(files.len(), 1);
474        assert_eq!(files[0].path, "old_file.txt");
475        assert_eq!(files[0].status, FileStatus::Deleted);
476        assert_eq!(files[0].additions(), 0);
477        assert_eq!(files[0].deletions(), 2);
478    }
479
480    #[test]
481    fn parse_renamed_file() {
482        let input = "\
483diff --git a/old_name.rs b/new_name.rs
484similarity index 95%
485rename from old_name.rs
486rename to new_name.rs
487index abc1234..def5678 100644
488--- a/old_name.rs
489+++ b/new_name.rs
490@@ -1,3 +1,3 @@
491 fn main() {
492-    old();
493+    new();
494 }
495";
496        let files = parse_unified_diff(input).unwrap();
497        assert_eq!(files.len(), 1);
498        assert_eq!(files[0].path, "new_name.rs");
499        assert_eq!(files[0].status, FileStatus::Renamed);
500        assert_eq!(files[0].old_path.as_deref(), Some("old_name.rs"));
501        assert_eq!(files[0].additions(), 1);
502        assert_eq!(files[0].deletions(), 1);
503    }
504
505    #[test]
506    fn parse_hunk_header_tracking() {
507        let input = "\
508diff --git a/file.rs b/file.rs
509index abc..def 100644
510--- a/file.rs
511+++ b/file.rs
512@@ -10,4 +10,5 @@ fn context_label() {
513 context
514-removed
515+added1
516+added2
517 context
518";
519        let files = parse_unified_diff(input).unwrap();
520        let hunk = &files[0].hunks[0];
521        assert_eq!(hunk.old_start, 10);
522        assert_eq!(hunk.old_count, 4);
523        assert_eq!(hunk.new_start, 10);
524        assert_eq!(hunk.new_count, 5);
525
526        // Check line number tracking
527        let lines = &hunk.lines;
528        // HunkHeader
529        assert_eq!(lines[0].kind, PatchLineKind::HunkHeader);
530        // context at old=10, new=10
531        assert_eq!(lines[1].kind, PatchLineKind::Context);
532        assert_eq!(lines[1].old_line_no, Some(10));
533        assert_eq!(lines[1].new_line_no, Some(10));
534        // removed at old=11
535        assert_eq!(lines[2].kind, PatchLineKind::Removed);
536        assert_eq!(lines[2].old_line_no, Some(11));
537        assert_eq!(lines[2].new_line_no, None);
538        // added at new=11
539        assert_eq!(lines[3].kind, PatchLineKind::Added);
540        assert_eq!(lines[3].old_line_no, None);
541        assert_eq!(lines[3].new_line_no, Some(11));
542        // added at new=12
543        assert_eq!(lines[4].kind, PatchLineKind::Added);
544        assert_eq!(lines[4].old_line_no, None);
545        assert_eq!(lines[4].new_line_no, Some(12));
546        // context at old=12, new=13
547        assert_eq!(lines[5].kind, PatchLineKind::Context);
548        assert_eq!(lines[5].old_line_no, Some(12));
549        assert_eq!(lines[5].new_line_no, Some(13));
550    }
551
552    #[test]
553    fn parse_meta_line() {
554        let input = "\
555diff --git a/file.txt b/file.txt
556index abc..def 100644
557--- a/file.txt
558+++ b/file.txt
559@@ -1,1 +1,1 @@
560-old
561\\ No newline at end of file
562+new
563";
564        let files = parse_unified_diff(input).unwrap();
565        let hunk = &files[0].hunks[0];
566        let meta = hunk.lines.iter().find(|l| l.kind == PatchLineKind::Meta);
567        assert!(meta.is_some());
568        assert!(meta.unwrap().text.contains("No newline"));
569    }
570
571    #[test]
572    fn parse_binary_diff() {
573        let input = "\
574diff --git a/image.png b/image.png
575new file mode 100644
576index 0000000..abc1234
577Binary files /dev/null and b/image.png differ
578";
579        let files = parse_unified_diff(input).unwrap();
580        assert_eq!(files.len(), 1);
581        assert!(files[0].binary);
582        assert!(files[0].hunks.is_empty());
583    }
584
585    #[test]
586    fn parse_empty_diff() {
587        let files = parse_unified_diff("").unwrap();
588        assert!(files.is_empty());
589    }
590
591    #[test]
592    fn parse_multiple_files() {
593        let input = "\
594diff --git a/a.rs b/a.rs
595index abc..def 100644
596--- a/a.rs
597+++ b/a.rs
598@@ -1,1 +1,1 @@
599-old_a
600+new_a
601diff --git a/b.rs b/b.rs
602new file mode 100644
603index 0000000..abc1234
604--- /dev/null
605+++ b/b.rs
606@@ -0,0 +1,1 @@
607+new_b
608";
609        let files = parse_unified_diff(input).unwrap();
610        assert_eq!(files.len(), 2);
611        assert_eq!(files[0].path, "a.rs");
612        assert_eq!(files[0].status, FileStatus::Modified);
613        assert_eq!(files[1].path, "b.rs");
614        assert_eq!(files[1].status, FileStatus::Added);
615    }
616
617    #[test]
618    fn parse_diff_marker_inside_hunk_line() {
619        let input = "\
620diff --git a/file.rs b/file.rs
621index abc..def 100644
622--- a/file.rs
623+++ b/file.rs
624@@ -1,1 +1,2 @@
625 fn main() {
626+cannot parse paths from: diff --git /m)
627 }
628";
629        let files = parse_unified_diff(input).unwrap();
630        assert_eq!(files.len(), 1);
631        assert_eq!(files[0].path, "file.rs");
632        assert_eq!(files[0].status, FileStatus::Modified);
633        assert_eq!(files[0].additions(), 1);
634    }
635
636    #[test]
637    fn parse_multiple_hunks() {
638        let input = "\
639diff --git a/file.rs b/file.rs
640index abc..def 100644
641--- a/file.rs
642+++ b/file.rs
643@@ -1,3 +1,3 @@
644 fn a() {
645-    old_a();
646+    new_a();
647 }
648@@ -10,3 +10,3 @@
649 fn b() {
650-    old_b();
651+    new_b();
652 }
653";
654        let files = parse_unified_diff(input).unwrap();
655        assert_eq!(files[0].hunks.len(), 2);
656        assert_eq!(files[0].hunks[0].old_start, 1);
657        assert_eq!(files[0].hunks[1].old_start, 10);
658    }
659
660    #[test]
661    fn parse_hunk_header_without_comma() {
662        let (start, count, new_start, new_count) =
663            parse_hunk_header("@@ -1 +1 @@ fn main()").unwrap();
664        assert_eq!(start, 1);
665        assert_eq!(count, 1);
666        assert_eq!(new_start, 1);
667        assert_eq!(new_count, 1);
668    }
669
670    #[test]
671    fn file_status_marker() {
672        assert_eq!(FileStatus::Modified.marker(), 'M');
673        assert_eq!(FileStatus::Added.marker(), 'A');
674        assert_eq!(FileStatus::Deleted.marker(), 'D');
675        assert_eq!(FileStatus::Renamed.marker(), 'R');
676    }
677}