1use std::fmt;
2use std::path::{Path, PathBuf};
3
4#[doc = include_str!("docs/git_diff_document.md")]
5#[allow(dead_code)]
6pub struct GitDiffDocument {
7 pub repo_root: PathBuf,
8 pub files: Vec<FileDiff>,
9}
10
11pub struct FileDiff {
12 pub old_path: Option<String>,
13 pub path: String,
14 pub status: FileStatus,
15 pub hunks: Vec<Hunk>,
16 pub binary: bool,
17}
18
19#[derive(Debug, Clone, Copy, PartialEq, Eq)]
20pub enum FileStatus {
21 Modified,
22 Added,
23 Deleted,
24 Renamed,
25 Untracked,
26}
27
28#[allow(dead_code)]
29pub struct Hunk {
30 pub header: String,
31 pub old_start: usize,
32 pub old_count: usize,
33 pub new_start: usize,
34 pub new_count: usize,
35 pub lines: Vec<PatchLine>,
36}
37
38pub struct PatchLine {
39 pub kind: PatchLineKind,
40 pub text: String,
41 pub old_line_no: Option<usize>,
42 pub new_line_no: Option<usize>,
43}
44
45#[derive(Debug, Clone, Copy, PartialEq, Eq)]
46pub enum PatchLineKind {
47 HunkHeader,
48 Context,
49 Added,
50 Removed,
51 Meta,
52}
53
54#[doc = include_str!("docs/git_diff_error.md")]
55#[derive(Debug)]
56pub enum GitDiffError {
57 NotARepository,
58 CommandFailed { stderr: String },
59 ParseError(String),
60}
61
62impl fmt::Display for GitDiffError {
63 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
64 match self {
65 Self::NotARepository => write!(f, "Not a git repository"),
66 Self::CommandFailed { stderr } => write!(f, "Git command failed: {stderr}"),
67 Self::ParseError(msg) => write!(f, "Failed to parse diff: {msg}"),
68 }
69 }
70}
71
72impl std::error::Error for GitDiffError {}
73
74impl FileStatus {
75 pub fn marker(self) -> char {
76 match self {
77 Self::Modified => 'M',
78 Self::Added => 'A',
79 Self::Deleted => 'D',
80 Self::Renamed => 'R',
81 Self::Untracked => '?',
82 }
83 }
84}
85
86impl FileDiff {
87 pub fn additions(&self) -> usize {
88 self.hunks.iter().flat_map(|hunk| &hunk.lines).filter(|line| line.kind == PatchLineKind::Added).count()
89 }
90
91 pub fn deletions(&self) -> usize {
92 self.hunks.iter().flat_map(|hunk| &hunk.lines).filter(|line| line.kind == PatchLineKind::Removed).count()
93 }
94}
95
96pub(crate) async fn load_git_diff(
97 working_dir: &Path,
98 cached_repo_root: Option<&Path>,
99) -> Result<GitDiffDocument, GitDiffError> {
100 let repo_root = match cached_repo_root {
101 Some(root) => root.to_path_buf(),
102 None => resolve_repo_root(working_dir).await?,
103 };
104 let diff_output =
105 run_git_command(&repo_root, &["diff", "--no-ext-diff", "--find-renames", "--unified=3", "HEAD"]).await?;
106
107 let mut files = if diff_output.trim().is_empty() { Vec::new() } else { parse_unified_diff(&diff_output)? };
108
109 let untracked_stdout = run_git_command(&repo_root, &["ls-files", "--others", "--exclude-standard"]).await?;
110 for path in untracked_stdout.lines().filter(|l| !l.is_empty()).map(String::from) {
111 files.push(build_untracked_file_diff(&repo_root, path).await);
112 }
113
114 Ok(GitDiffDocument { repo_root, files })
115}
116
117async fn resolve_repo_root(working_dir: &Path) -> Result<PathBuf, GitDiffError> {
118 let output = tokio::process::Command::new("git")
119 .arg("rev-parse")
120 .arg("--show-toplevel")
121 .current_dir(working_dir)
122 .output()
123 .await
124 .map_err(|e| GitDiffError::CommandFailed { stderr: e.to_string() })?;
125
126 if !output.status.success() {
127 let stderr = String::from_utf8_lossy(&output.stderr);
128 if stderr.contains("not a git repository") {
129 return Err(GitDiffError::NotARepository);
130 }
131 return Err(GitDiffError::CommandFailed { stderr: stderr.into_owned() });
132 }
133
134 let root = String::from_utf8_lossy(&output.stdout).trim().to_string();
135 Ok(PathBuf::from(root))
136}
137
138async fn run_git_command(repo_root: &Path, args: &[&str]) -> Result<String, GitDiffError> {
139 let output = tokio::process::Command::new("git")
140 .args(args)
141 .current_dir(repo_root)
142 .output()
143 .await
144 .map_err(|e| GitDiffError::CommandFailed { stderr: e.to_string() })?;
145
146 if !output.status.success() {
147 let stderr = String::from_utf8_lossy(&output.stderr);
148 return Err(GitDiffError::CommandFailed { stderr: stderr.into_owned() });
149 }
150
151 Ok(String::from_utf8_lossy(&output.stdout).into_owned())
152}
153
154async fn build_untracked_file_diff(repo_root: &Path, relative_path: String) -> FileDiff {
155 let full_path = repo_root.join(&relative_path);
156 let Ok(bytes) = tokio::fs::read(&full_path).await else {
157 return binary_untracked(relative_path);
158 };
159
160 if bytes.iter().take(8192).any(|&b| b == 0) {
161 return binary_untracked(relative_path);
162 }
163
164 let Ok(content) = String::from_utf8(bytes) else {
165 return binary_untracked(relative_path);
166 };
167
168 let text_lines: Vec<&str> = content.lines().collect();
169 let line_count = text_lines.len();
170
171 let hunk_header = format!("@@ -0,0 +1,{line_count} @@");
172
173 let mut patch_lines = vec![PatchLine {
174 kind: PatchLineKind::HunkHeader,
175 text: hunk_header.clone(),
176 old_line_no: None,
177 new_line_no: None,
178 }];
179
180 for (i, line) in text_lines.iter().enumerate() {
181 patch_lines.push(PatchLine {
182 kind: PatchLineKind::Added,
183 text: line.to_string(),
184 old_line_no: None,
185 new_line_no: Some(i + 1),
186 });
187 }
188
189 let hunk = Hunk {
190 header: hunk_header,
191 old_start: 0,
192 old_count: 0,
193 new_start: 1,
194 new_count: line_count,
195 lines: patch_lines,
196 };
197
198 FileDiff { old_path: None, path: relative_path, status: FileStatus::Untracked, hunks: vec![hunk], binary: false }
199}
200
201fn binary_untracked(path: String) -> FileDiff {
202 FileDiff { old_path: None, path, status: FileStatus::Untracked, hunks: Vec::new(), binary: true }
203}
204
205pub(crate) fn parse_unified_diff(input: &str) -> Result<Vec<FileDiff>, GitDiffError> {
206 split_diff_files(input).into_iter().map(parse_file_diff).collect()
207}
208
209fn split_diff_files(input: &str) -> Vec<&str> {
210 let mut chunks = Vec::new();
211 let mut start = None;
212 let mut line_start = 0;
213
214 while line_start < input.len() {
215 let line_end = input[line_start..].find('\n').map_or(input.len(), |idx| line_start + idx + 1);
216 let line = &input[line_start..line_end];
217
218 if line.starts_with("diff --git ") {
219 if let Some(s) = start {
220 chunks.push(&input[s..line_start]);
221 }
222 start = Some(line_start);
223 }
224
225 line_start = line_end;
226 }
227
228 if let Some(s) = start {
229 chunks.push(&input[s..]);
230 }
231
232 chunks
233}
234
235fn parse_file_diff(chunk: &str) -> Result<FileDiff, GitDiffError> {
236 let lines: Vec<&str> = chunk.lines().collect();
237 if lines.is_empty() {
238 return Err(GitDiffError::ParseError("Empty diff chunk".to_string()));
239 }
240
241 let (old_path, new_path) = parse_diff_header(lines[0])?;
242 let (status, binary, rename_from, hunk_start) = scan_file_metadata(&lines);
243 let hunks = if binary { Vec::new() } else { parse_file_hunks(&lines[hunk_start..])? };
244
245 Ok(FileDiff { old_path: resolve_old_path(status, rename_from, old_path), path: new_path, status, hunks, binary })
246}
247
248fn scan_file_metadata(lines: &[&str]) -> (FileStatus, bool, Option<String>, usize) {
249 let mut status = FileStatus::Modified;
250 let mut binary = false;
251 let mut rename_from = None;
252 let mut i = 1;
253
254 while i < lines.len() {
255 let line = lines[i];
256 if line.starts_with("new file mode") {
257 status = FileStatus::Added;
258 } else if line.starts_with("deleted file mode") {
259 status = FileStatus::Deleted;
260 } else if let Some(from) = line.strip_prefix("rename from ") {
261 status = FileStatus::Renamed;
262 rename_from = Some(from.to_string());
263 } else if line.starts_with("rename to ") {
264 status = FileStatus::Renamed;
265 } else if line.starts_with("Binary files ") {
266 binary = true;
267 } else if line.starts_with("@@") {
268 break;
269 }
270 i += 1;
271 }
272
273 (status, binary, rename_from, i)
274}
275
276fn parse_file_hunks(lines: &[&str]) -> Result<Vec<Hunk>, GitDiffError> {
277 let mut hunks = Vec::new();
278 let mut i = 0;
279
280 while i < lines.len() {
281 if lines[i].starts_with("@@") {
282 let (hunk, consumed) = parse_hunk(&lines[i..])?;
283 hunks.push(hunk);
284 i += consumed;
285 } else {
286 i += 1;
287 }
288 }
289
290 Ok(hunks)
291}
292
293fn resolve_old_path(status: FileStatus, rename_from: Option<String>, old_path: String) -> Option<String> {
294 if status == FileStatus::Added || status == FileStatus::Untracked {
295 None
296 } else if status == FileStatus::Renamed {
297 rename_from.or(Some(old_path))
298 } else {
299 Some(old_path)
300 }
301}
302
303fn parse_diff_header(line: &str) -> Result<(String, String), GitDiffError> {
304 let rest = line
305 .strip_prefix("diff --git ")
306 .ok_or_else(|| GitDiffError::ParseError(format!("Invalid diff header: {line}")))?;
307
308 if let Some((a, b)) = rest.split_once(" b/") {
309 let old = a.strip_prefix("a/").unwrap_or(a).to_string();
310 let new = b.to_string();
311 Ok((old, new))
312 } else {
313 Err(GitDiffError::ParseError(format!("Cannot parse paths from: {line}")))
314 }
315}
316
317fn parse_hunk(lines: &[&str]) -> Result<(Hunk, usize), GitDiffError> {
318 let header = lines[0];
319 let (old_start, old_count, new_start, new_count) = parse_hunk_header(header)?;
320
321 let mut patch_lines = Vec::new();
322 patch_lines.push(PatchLine {
323 kind: PatchLineKind::HunkHeader,
324 text: header.to_string(),
325 old_line_no: None,
326 new_line_no: None,
327 });
328
329 let mut old_line = old_start;
330 let mut new_line = new_start;
331 let mut i = 1;
332
333 while i < lines.len() {
334 let line = lines[i];
335 if line.starts_with("@@") {
336 break;
337 }
338
339 if let Some(text) = line.strip_prefix('+') {
340 patch_lines.push(PatchLine {
341 kind: PatchLineKind::Added,
342 text: text.to_string(),
343 old_line_no: None,
344 new_line_no: Some(new_line),
345 });
346 new_line += 1;
347 } else if let Some(text) = line.strip_prefix('-') {
348 patch_lines.push(PatchLine {
349 kind: PatchLineKind::Removed,
350 text: text.to_string(),
351 old_line_no: Some(old_line),
352 new_line_no: None,
353 });
354 old_line += 1;
355 } else if let Some(text) = line.strip_prefix(' ') {
356 patch_lines.push(PatchLine {
357 kind: PatchLineKind::Context,
358 text: text.to_string(),
359 old_line_no: Some(old_line),
360 new_line_no: Some(new_line),
361 });
362 old_line += 1;
363 new_line += 1;
364 } else if line.starts_with('\\') {
365 patch_lines.push(PatchLine {
366 kind: PatchLineKind::Meta,
367 text: line.to_string(),
368 old_line_no: None,
369 new_line_no: None,
370 });
371 } else {
372 patch_lines.push(PatchLine {
374 kind: PatchLineKind::Context,
375 text: line.to_string(),
376 old_line_no: Some(old_line),
377 new_line_no: Some(new_line),
378 });
379 old_line += 1;
380 new_line += 1;
381 }
382 i += 1;
383 }
384
385 Ok((Hunk { header: header.to_string(), old_start, old_count, new_start, new_count, lines: patch_lines }, i))
386}
387
388fn parse_hunk_header(header: &str) -> Result<(usize, usize, usize, usize), GitDiffError> {
389 let err = || GitDiffError::ParseError(format!("Invalid hunk header: {header}"));
391
392 let rest = header.strip_prefix("@@ -").ok_or_else(err)?;
393 let at_end = rest.find(" @@").ok_or_else(err)?;
394 let range_part = &rest[..at_end];
395
396 let (old_range, new_range) = range_part.split_once(" +").ok_or_else(err)?;
397
398 let (old_start, old_count) = parse_range(old_range).ok_or_else(err)?;
399 let (new_start, new_count) = parse_range(new_range).ok_or_else(err)?;
400
401 Ok((old_start, old_count, new_start, new_count))
402}
403
404fn parse_range(s: &str) -> Option<(usize, usize)> {
405 if let Some((start, count)) = s.split_once(',') {
406 Some((start.parse().ok()?, count.parse().ok()?))
407 } else {
408 let start: usize = s.parse().ok()?;
409 Some((start, 1))
410 }
411}
412
413#[cfg(test)]
414mod tests {
415 use super::*;
416
417 #[test]
418 fn parse_modified_file() {
419 let input = "\
420diff --git a/src/main.rs b/src/main.rs
421index abc1234..def5678 100644
422--- a/src/main.rs
423+++ b/src/main.rs
424@@ -1,3 +1,4 @@
425 fn main() {
426+ println!(\"hello\");
427 let x = 1;
428 }
429";
430 let files = parse_unified_diff(input).unwrap();
431 assert_eq!(files.len(), 1);
432 assert_eq!(files[0].path, "src/main.rs");
433 assert_eq!(files[0].status, FileStatus::Modified);
434 assert_eq!(files[0].additions(), 1);
435 assert_eq!(files[0].deletions(), 0);
436 assert!(!files[0].binary);
437 assert_eq!(files[0].hunks.len(), 1);
438 }
439
440 #[test]
441 fn parse_added_file() {
442 let input = "\
443diff --git a/new_file.txt b/new_file.txt
444new file mode 100644
445index 0000000..abc1234
446--- /dev/null
447+++ b/new_file.txt
448@@ -0,0 +1,2 @@
449+line one
450+line two
451";
452 let files = parse_unified_diff(input).unwrap();
453 assert_eq!(files.len(), 1);
454 assert_eq!(files[0].path, "new_file.txt");
455 assert_eq!(files[0].status, FileStatus::Added);
456 assert!(files[0].old_path.is_none());
457 assert_eq!(files[0].additions(), 2);
458 assert_eq!(files[0].deletions(), 0);
459 }
460
461 #[test]
462 fn parse_deleted_file() {
463 let input = "\
464diff --git a/old_file.txt b/old_file.txt
465deleted file mode 100644
466index abc1234..0000000
467--- a/old_file.txt
468+++ /dev/null
469@@ -1,2 +0,0 @@
470-line one
471-line two
472";
473 let files = parse_unified_diff(input).unwrap();
474 assert_eq!(files.len(), 1);
475 assert_eq!(files[0].path, "old_file.txt");
476 assert_eq!(files[0].status, FileStatus::Deleted);
477 assert_eq!(files[0].additions(), 0);
478 assert_eq!(files[0].deletions(), 2);
479 }
480
481 #[test]
482 fn parse_renamed_file() {
483 let input = "\
484diff --git a/old_name.rs b/new_name.rs
485similarity index 95%
486rename from old_name.rs
487rename to new_name.rs
488index abc1234..def5678 100644
489--- a/old_name.rs
490+++ b/new_name.rs
491@@ -1,3 +1,3 @@
492 fn main() {
493- old();
494+ new();
495 }
496";
497 let files = parse_unified_diff(input).unwrap();
498 assert_eq!(files.len(), 1);
499 assert_eq!(files[0].path, "new_name.rs");
500 assert_eq!(files[0].status, FileStatus::Renamed);
501 assert_eq!(files[0].old_path.as_deref(), Some("old_name.rs"));
502 assert_eq!(files[0].additions(), 1);
503 assert_eq!(files[0].deletions(), 1);
504 }
505
506 #[test]
507 fn parse_hunk_header_tracking() {
508 let input = "\
509diff --git a/file.rs b/file.rs
510index abc..def 100644
511--- a/file.rs
512+++ b/file.rs
513@@ -10,4 +10,5 @@ fn context_label() {
514 context
515-removed
516+added1
517+added2
518 context
519";
520 let files = parse_unified_diff(input).unwrap();
521 let hunk = &files[0].hunks[0];
522 assert_eq!(hunk.old_start, 10);
523 assert_eq!(hunk.old_count, 4);
524 assert_eq!(hunk.new_start, 10);
525 assert_eq!(hunk.new_count, 5);
526
527 let lines = &hunk.lines;
529 assert_eq!(lines[0].kind, PatchLineKind::HunkHeader);
531 assert_eq!(lines[1].kind, PatchLineKind::Context);
533 assert_eq!(lines[1].old_line_no, Some(10));
534 assert_eq!(lines[1].new_line_no, Some(10));
535 assert_eq!(lines[2].kind, PatchLineKind::Removed);
537 assert_eq!(lines[2].old_line_no, Some(11));
538 assert_eq!(lines[2].new_line_no, None);
539 assert_eq!(lines[3].kind, PatchLineKind::Added);
541 assert_eq!(lines[3].old_line_no, None);
542 assert_eq!(lines[3].new_line_no, Some(11));
543 assert_eq!(lines[4].kind, PatchLineKind::Added);
545 assert_eq!(lines[4].old_line_no, None);
546 assert_eq!(lines[4].new_line_no, Some(12));
547 assert_eq!(lines[5].kind, PatchLineKind::Context);
549 assert_eq!(lines[5].old_line_no, Some(12));
550 assert_eq!(lines[5].new_line_no, Some(13));
551 }
552
553 #[test]
554 fn parse_meta_line() {
555 let input = "\
556diff --git a/file.txt b/file.txt
557index abc..def 100644
558--- a/file.txt
559+++ b/file.txt
560@@ -1,1 +1,1 @@
561-old
562\\ No newline at end of file
563+new
564";
565 let files = parse_unified_diff(input).unwrap();
566 let hunk = &files[0].hunks[0];
567 let meta = hunk.lines.iter().find(|l| l.kind == PatchLineKind::Meta);
568 assert!(meta.is_some());
569 assert!(meta.unwrap().text.contains("No newline"));
570 }
571
572 #[test]
573 fn parse_binary_diff() {
574 let input = "\
575diff --git a/image.png b/image.png
576new file mode 100644
577index 0000000..abc1234
578Binary files /dev/null and b/image.png differ
579";
580 let files = parse_unified_diff(input).unwrap();
581 assert_eq!(files.len(), 1);
582 assert!(files[0].binary);
583 assert!(files[0].hunks.is_empty());
584 }
585
586 #[test]
587 fn parse_empty_diff() {
588 let files = parse_unified_diff("").unwrap();
589 assert!(files.is_empty());
590 }
591
592 #[test]
593 fn parse_multiple_files() {
594 let input = "\
595diff --git a/a.rs b/a.rs
596index abc..def 100644
597--- a/a.rs
598+++ b/a.rs
599@@ -1,1 +1,1 @@
600-old_a
601+new_a
602diff --git a/b.rs b/b.rs
603new file mode 100644
604index 0000000..abc1234
605--- /dev/null
606+++ b/b.rs
607@@ -0,0 +1,1 @@
608+new_b
609";
610 let files = parse_unified_diff(input).unwrap();
611 assert_eq!(files.len(), 2);
612 assert_eq!(files[0].path, "a.rs");
613 assert_eq!(files[0].status, FileStatus::Modified);
614 assert_eq!(files[1].path, "b.rs");
615 assert_eq!(files[1].status, FileStatus::Added);
616 }
617
618 #[test]
619 fn parse_diff_marker_inside_hunk_line() {
620 let input = "\
621diff --git a/file.rs b/file.rs
622index abc..def 100644
623--- a/file.rs
624+++ b/file.rs
625@@ -1,1 +1,2 @@
626 fn main() {
627+cannot parse paths from: diff --git /m)
628 }
629";
630 let files = parse_unified_diff(input).unwrap();
631 assert_eq!(files.len(), 1);
632 assert_eq!(files[0].path, "file.rs");
633 assert_eq!(files[0].status, FileStatus::Modified);
634 assert_eq!(files[0].additions(), 1);
635 }
636
637 #[test]
638 fn parse_multiple_hunks() {
639 let input = "\
640diff --git a/file.rs b/file.rs
641index abc..def 100644
642--- a/file.rs
643+++ b/file.rs
644@@ -1,3 +1,3 @@
645 fn a() {
646- old_a();
647+ new_a();
648 }
649@@ -10,3 +10,3 @@
650 fn b() {
651- old_b();
652+ new_b();
653 }
654";
655 let files = parse_unified_diff(input).unwrap();
656 assert_eq!(files[0].hunks.len(), 2);
657 assert_eq!(files[0].hunks[0].old_start, 1);
658 assert_eq!(files[0].hunks[1].old_start, 10);
659 }
660
661 #[test]
662 fn parse_hunk_header_without_comma() {
663 let (start, count, new_start, new_count) = parse_hunk_header("@@ -1 +1 @@ fn main()").unwrap();
664 assert_eq!(start, 1);
665 assert_eq!(count, 1);
666 assert_eq!(new_start, 1);
667 assert_eq!(new_count, 1);
668 }
669
670 #[test]
671 fn file_status_marker() {
672 assert_eq!(FileStatus::Modified.marker(), 'M');
673 assert_eq!(FileStatus::Added.marker(), 'A');
674 assert_eq!(FileStatus::Deleted.marker(), 'D');
675 assert_eq!(FileStatus::Renamed.marker(), 'R');
676 assert_eq!(FileStatus::Untracked.marker(), '?');
677 }
678
679 #[tokio::test]
680 async fn build_untracked_text_file() {
681 let dir = tempfile::tempdir().unwrap();
682 let file_path = dir.path().join("hello.txt");
683 std::fs::write(&file_path, "line one\nline two\nline three\n").unwrap();
684
685 let diff = build_untracked_file_diff(dir.path(), "hello.txt".to_string()).await;
686 assert_eq!(diff.path, "hello.txt");
687 assert!(diff.old_path.is_none());
688 assert_eq!(diff.status, FileStatus::Untracked);
689 assert!(!diff.binary);
690 assert_eq!(diff.hunks.len(), 1);
691 assert_eq!(diff.additions(), 3);
692 assert_eq!(diff.deletions(), 0);
693
694 let hunk = &diff.hunks[0];
695 assert_eq!(hunk.old_start, 0);
696 assert_eq!(hunk.old_count, 0);
697 assert_eq!(hunk.new_start, 1);
698 assert_eq!(hunk.new_count, 3);
699 assert_eq!(hunk.lines[1].new_line_no, Some(1));
700 assert_eq!(hunk.lines[1].text, "line one");
701 }
702
703 #[tokio::test]
704 async fn build_untracked_binary_file() {
705 let dir = tempfile::tempdir().unwrap();
706 let file_path = dir.path().join("image.bin");
707 std::fs::write(&file_path, b"PNG\x00\x00binary data").unwrap();
708
709 let diff = build_untracked_file_diff(dir.path(), "image.bin".to_string()).await;
710 assert_eq!(diff.status, FileStatus::Untracked);
711 assert!(diff.binary);
712 assert!(diff.hunks.is_empty());
713 }
714
715 #[tokio::test]
716 async fn build_untracked_missing_file() {
717 let dir = tempfile::tempdir().unwrap();
718 let diff = build_untracked_file_diff(dir.path(), "does_not_exist.txt".to_string()).await;
719 assert!(diff.binary);
720 assert!(diff.hunks.is_empty());
721 }
722}