1use std::fmt;
2use std::path::{Path, PathBuf};
3
4#[allow(dead_code)]
5pub struct GitDiffDocument {
6 pub repo_root: PathBuf,
7 pub files: Vec<FileDiff>,
8}
9
10pub struct FileDiff {
11 pub old_path: Option<String>,
12 pub path: String,
13 pub status: FileStatus,
14 pub hunks: Vec<Hunk>,
15 pub binary: bool,
16}
17
18#[derive(Debug, Clone, Copy, PartialEq, Eq)]
19pub enum FileStatus {
20 Modified,
21 Added,
22 Deleted,
23 Renamed,
24}
25
26#[allow(dead_code)]
27pub struct Hunk {
28 pub header: String,
29 pub old_start: usize,
30 pub old_count: usize,
31 pub new_start: usize,
32 pub new_count: usize,
33 pub lines: Vec<PatchLine>,
34}
35
36pub struct PatchLine {
37 pub kind: PatchLineKind,
38 pub text: String,
39 pub old_line_no: Option<usize>,
40 pub new_line_no: Option<usize>,
41}
42
43#[derive(Debug, Clone, Copy, PartialEq, Eq)]
44pub enum PatchLineKind {
45 HunkHeader,
46 Context,
47 Added,
48 Removed,
49 Meta,
50}
51
52#[derive(Debug)]
53pub enum GitDiffError {
54 NotARepository,
55 CommandFailed { stderr: String },
56 ParseError(String),
57}
58
59impl fmt::Display for GitDiffError {
60 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
61 match self {
62 Self::NotARepository => write!(f, "Not a git repository"),
63 Self::CommandFailed { stderr } => write!(f, "Git command failed: {stderr}"),
64 Self::ParseError(msg) => write!(f, "Failed to parse diff: {msg}"),
65 }
66 }
67}
68
69impl std::error::Error for GitDiffError {}
70
71impl FileStatus {
72 pub fn marker(self) -> char {
73 match self {
74 Self::Modified => 'M',
75 Self::Added => 'A',
76 Self::Deleted => 'D',
77 Self::Renamed => 'R',
78 }
79 }
80}
81
82impl FileDiff {
83 pub fn additions(&self) -> usize {
84 self.hunks
85 .iter()
86 .flat_map(|hunk| &hunk.lines)
87 .filter(|line| line.kind == PatchLineKind::Added)
88 .count()
89 }
90
91 pub fn deletions(&self) -> usize {
92 self.hunks
93 .iter()
94 .flat_map(|hunk| &hunk.lines)
95 .filter(|line| line.kind == PatchLineKind::Removed)
96 .count()
97 }
98}
99
100pub(crate) async fn load_git_diff(
101 working_dir: &Path,
102 cached_repo_root: Option<&Path>,
103) -> Result<GitDiffDocument, GitDiffError> {
104 let repo_root = match cached_repo_root {
105 Some(root) => root.to_path_buf(),
106 None => resolve_repo_root(working_dir).await?,
107 };
108 let diff_output = run_git_diff(&repo_root).await?;
109
110 if diff_output.trim().is_empty() {
111 return Ok(GitDiffDocument {
112 repo_root,
113 files: Vec::new(),
114 });
115 }
116
117 let files = parse_unified_diff(&diff_output)?;
118 Ok(GitDiffDocument { repo_root, files })
119}
120
121async fn resolve_repo_root(working_dir: &Path) -> Result<PathBuf, GitDiffError> {
122 let output = tokio::process::Command::new("git")
123 .arg("rev-parse")
124 .arg("--show-toplevel")
125 .current_dir(working_dir)
126 .output()
127 .await
128 .map_err(|e| GitDiffError::CommandFailed {
129 stderr: e.to_string(),
130 })?;
131
132 if !output.status.success() {
133 let stderr = String::from_utf8_lossy(&output.stderr);
134 if stderr.contains("not a git repository") {
135 return Err(GitDiffError::NotARepository);
136 }
137 return Err(GitDiffError::CommandFailed {
138 stderr: stderr.into_owned(),
139 });
140 }
141
142 let root = String::from_utf8_lossy(&output.stdout).trim().to_string();
143 Ok(PathBuf::from(root))
144}
145
146async fn run_git_diff(repo_root: &Path) -> Result<String, GitDiffError> {
147 let output = tokio::process::Command::new("git")
148 .args([
149 "diff",
150 "--no-ext-diff",
151 "--find-renames",
152 "--unified=3",
153 "HEAD",
154 ])
155 .current_dir(repo_root)
156 .output()
157 .await
158 .map_err(|e| GitDiffError::CommandFailed {
159 stderr: e.to_string(),
160 })?;
161
162 if !output.status.success() {
163 let stderr = String::from_utf8_lossy(&output.stderr);
164 return Err(GitDiffError::CommandFailed {
165 stderr: stderr.into_owned(),
166 });
167 }
168
169 Ok(String::from_utf8_lossy(&output.stdout).into_owned())
170}
171
172pub(crate) fn parse_unified_diff(input: &str) -> Result<Vec<FileDiff>, GitDiffError> {
173 split_diff_files(input)
174 .into_iter()
175 .map(parse_file_diff)
176 .collect()
177}
178
179fn split_diff_files(input: &str) -> Vec<&str> {
180 let mut chunks = Vec::new();
181 let mut start = None;
182 let mut line_start = 0;
183
184 while line_start < input.len() {
185 let line_end = input[line_start..]
186 .find('\n')
187 .map(|idx| line_start + idx + 1)
188 .unwrap_or(input.len());
189 let line = &input[line_start..line_end];
190
191 if line.starts_with("diff --git ") {
192 if let Some(s) = start {
193 chunks.push(&input[s..line_start]);
194 }
195 start = Some(line_start);
196 }
197
198 line_start = line_end;
199 }
200
201 if let Some(s) = start {
202 chunks.push(&input[s..]);
203 }
204
205 chunks
206}
207
208fn parse_file_diff(chunk: &str) -> Result<FileDiff, GitDiffError> {
209 let lines: Vec<&str> = chunk.lines().collect();
210 if lines.is_empty() {
211 return Err(GitDiffError::ParseError("Empty diff chunk".to_string()));
212 }
213
214 let (old_path, new_path) = parse_diff_header(lines[0])?;
215 let (status, binary, rename_from, hunk_start) = scan_file_metadata(&lines);
216 let hunks = if binary {
217 Vec::new()
218 } else {
219 parse_file_hunks(&lines[hunk_start..])?
220 };
221
222 Ok(FileDiff {
223 old_path: resolve_old_path(status, rename_from, old_path),
224 path: new_path,
225 status,
226 hunks,
227 binary,
228 })
229}
230
231fn scan_file_metadata(lines: &[&str]) -> (FileStatus, bool, Option<String>, usize) {
232 let mut status = FileStatus::Modified;
233 let mut binary = false;
234 let mut rename_from = None;
235 let mut i = 1;
236
237 while i < lines.len() {
238 let line = lines[i];
239 if line.starts_with("new file mode") {
240 status = FileStatus::Added;
241 } else if line.starts_with("deleted file mode") {
242 status = FileStatus::Deleted;
243 } else if let Some(from) = line.strip_prefix("rename from ") {
244 status = FileStatus::Renamed;
245 rename_from = Some(from.to_string());
246 } else if line.starts_with("rename to ") {
247 status = FileStatus::Renamed;
248 } else if line.starts_with("Binary files ") {
249 binary = true;
250 } else if line.starts_with("@@") {
251 break;
252 }
253 i += 1;
254 }
255
256 (status, binary, rename_from, i)
257}
258
259fn parse_file_hunks(lines: &[&str]) -> Result<Vec<Hunk>, GitDiffError> {
260 let mut hunks = Vec::new();
261 let mut i = 0;
262
263 while i < lines.len() {
264 if lines[i].starts_with("@@") {
265 let (hunk, consumed) = parse_hunk(&lines[i..])?;
266 hunks.push(hunk);
267 i += consumed;
268 } else {
269 i += 1;
270 }
271 }
272
273 Ok(hunks)
274}
275
276fn resolve_old_path(
277 status: FileStatus,
278 rename_from: Option<String>,
279 old_path: String,
280) -> Option<String> {
281 if status == FileStatus::Added {
282 None
283 } else if status == FileStatus::Renamed {
284 rename_from.or(Some(old_path))
285 } else {
286 Some(old_path)
287 }
288}
289
290fn parse_diff_header(line: &str) -> Result<(String, String), GitDiffError> {
291 let rest = line
292 .strip_prefix("diff --git ")
293 .ok_or_else(|| GitDiffError::ParseError(format!("Invalid diff header: {line}")))?;
294
295 if let Some((a, b)) = rest.split_once(" b/") {
296 let old = a.strip_prefix("a/").unwrap_or(a).to_string();
297 let new = b.to_string();
298 Ok((old, new))
299 } else {
300 Err(GitDiffError::ParseError(format!(
301 "Cannot parse paths from: {line}"
302 )))
303 }
304}
305
306fn parse_hunk(lines: &[&str]) -> Result<(Hunk, usize), GitDiffError> {
307 let header = lines[0];
308 let (old_start, old_count, new_start, new_count) = parse_hunk_header(header)?;
309
310 let mut patch_lines = Vec::new();
311 patch_lines.push(PatchLine {
312 kind: PatchLineKind::HunkHeader,
313 text: header.to_string(),
314 old_line_no: None,
315 new_line_no: None,
316 });
317
318 let mut old_line = old_start;
319 let mut new_line = new_start;
320 let mut i = 1;
321
322 while i < lines.len() {
323 let line = lines[i];
324 if line.starts_with("@@") {
325 break;
326 }
327
328 if let Some(text) = line.strip_prefix('+') {
329 patch_lines.push(PatchLine {
330 kind: PatchLineKind::Added,
331 text: text.to_string(),
332 old_line_no: None,
333 new_line_no: Some(new_line),
334 });
335 new_line += 1;
336 } else if let Some(text) = line.strip_prefix('-') {
337 patch_lines.push(PatchLine {
338 kind: PatchLineKind::Removed,
339 text: text.to_string(),
340 old_line_no: Some(old_line),
341 new_line_no: None,
342 });
343 old_line += 1;
344 } else if let Some(text) = line.strip_prefix(' ') {
345 patch_lines.push(PatchLine {
346 kind: PatchLineKind::Context,
347 text: text.to_string(),
348 old_line_no: Some(old_line),
349 new_line_no: Some(new_line),
350 });
351 old_line += 1;
352 new_line += 1;
353 } else if line.starts_with('\\') {
354 patch_lines.push(PatchLine {
355 kind: PatchLineKind::Meta,
356 text: line.to_string(),
357 old_line_no: None,
358 new_line_no: None,
359 });
360 } else {
361 patch_lines.push(PatchLine {
363 kind: PatchLineKind::Context,
364 text: line.to_string(),
365 old_line_no: Some(old_line),
366 new_line_no: Some(new_line),
367 });
368 old_line += 1;
369 new_line += 1;
370 }
371 i += 1;
372 }
373
374 Ok((
375 Hunk {
376 header: header.to_string(),
377 old_start,
378 old_count,
379 new_start,
380 new_count,
381 lines: patch_lines,
382 },
383 i,
384 ))
385}
386
387fn parse_hunk_header(header: &str) -> Result<(usize, usize, usize, usize), GitDiffError> {
388 let err = || GitDiffError::ParseError(format!("Invalid hunk header: {header}"));
390
391 let rest = header.strip_prefix("@@ -").ok_or_else(err)?;
392 let at_end = rest.find(" @@").ok_or_else(err)?;
393 let range_part = &rest[..at_end];
394
395 let (old_range, new_range) = range_part.split_once(" +").ok_or_else(err)?;
396
397 let (old_start, old_count) = parse_range(old_range).ok_or_else(err)?;
398 let (new_start, new_count) = parse_range(new_range).ok_or_else(err)?;
399
400 Ok((old_start, old_count, new_start, new_count))
401}
402
403fn parse_range(s: &str) -> Option<(usize, usize)> {
404 if let Some((start, count)) = s.split_once(',') {
405 Some((start.parse().ok()?, count.parse().ok()?))
406 } else {
407 let start: usize = s.parse().ok()?;
408 Some((start, 1))
409 }
410}
411
412#[cfg(test)]
413mod tests {
414 use super::*;
415
416 #[test]
417 fn parse_modified_file() {
418 let input = "\
419diff --git a/src/main.rs b/src/main.rs
420index abc1234..def5678 100644
421--- a/src/main.rs
422+++ b/src/main.rs
423@@ -1,3 +1,4 @@
424 fn main() {
425+ println!(\"hello\");
426 let x = 1;
427 }
428";
429 let files = parse_unified_diff(input).unwrap();
430 assert_eq!(files.len(), 1);
431 assert_eq!(files[0].path, "src/main.rs");
432 assert_eq!(files[0].status, FileStatus::Modified);
433 assert_eq!(files[0].additions(), 1);
434 assert_eq!(files[0].deletions(), 0);
435 assert!(!files[0].binary);
436 assert_eq!(files[0].hunks.len(), 1);
437 }
438
439 #[test]
440 fn parse_added_file() {
441 let input = "\
442diff --git a/new_file.txt b/new_file.txt
443new file mode 100644
444index 0000000..abc1234
445--- /dev/null
446+++ b/new_file.txt
447@@ -0,0 +1,2 @@
448+line one
449+line two
450";
451 let files = parse_unified_diff(input).unwrap();
452 assert_eq!(files.len(), 1);
453 assert_eq!(files[0].path, "new_file.txt");
454 assert_eq!(files[0].status, FileStatus::Added);
455 assert!(files[0].old_path.is_none());
456 assert_eq!(files[0].additions(), 2);
457 assert_eq!(files[0].deletions(), 0);
458 }
459
460 #[test]
461 fn parse_deleted_file() {
462 let input = "\
463diff --git a/old_file.txt b/old_file.txt
464deleted file mode 100644
465index abc1234..0000000
466--- a/old_file.txt
467+++ /dev/null
468@@ -1,2 +0,0 @@
469-line one
470-line two
471";
472 let files = parse_unified_diff(input).unwrap();
473 assert_eq!(files.len(), 1);
474 assert_eq!(files[0].path, "old_file.txt");
475 assert_eq!(files[0].status, FileStatus::Deleted);
476 assert_eq!(files[0].additions(), 0);
477 assert_eq!(files[0].deletions(), 2);
478 }
479
480 #[test]
481 fn parse_renamed_file() {
482 let input = "\
483diff --git a/old_name.rs b/new_name.rs
484similarity index 95%
485rename from old_name.rs
486rename to new_name.rs
487index abc1234..def5678 100644
488--- a/old_name.rs
489+++ b/new_name.rs
490@@ -1,3 +1,3 @@
491 fn main() {
492- old();
493+ new();
494 }
495";
496 let files = parse_unified_diff(input).unwrap();
497 assert_eq!(files.len(), 1);
498 assert_eq!(files[0].path, "new_name.rs");
499 assert_eq!(files[0].status, FileStatus::Renamed);
500 assert_eq!(files[0].old_path.as_deref(), Some("old_name.rs"));
501 assert_eq!(files[0].additions(), 1);
502 assert_eq!(files[0].deletions(), 1);
503 }
504
505 #[test]
506 fn parse_hunk_header_tracking() {
507 let input = "\
508diff --git a/file.rs b/file.rs
509index abc..def 100644
510--- a/file.rs
511+++ b/file.rs
512@@ -10,4 +10,5 @@ fn context_label() {
513 context
514-removed
515+added1
516+added2
517 context
518";
519 let files = parse_unified_diff(input).unwrap();
520 let hunk = &files[0].hunks[0];
521 assert_eq!(hunk.old_start, 10);
522 assert_eq!(hunk.old_count, 4);
523 assert_eq!(hunk.new_start, 10);
524 assert_eq!(hunk.new_count, 5);
525
526 let lines = &hunk.lines;
528 assert_eq!(lines[0].kind, PatchLineKind::HunkHeader);
530 assert_eq!(lines[1].kind, PatchLineKind::Context);
532 assert_eq!(lines[1].old_line_no, Some(10));
533 assert_eq!(lines[1].new_line_no, Some(10));
534 assert_eq!(lines[2].kind, PatchLineKind::Removed);
536 assert_eq!(lines[2].old_line_no, Some(11));
537 assert_eq!(lines[2].new_line_no, None);
538 assert_eq!(lines[3].kind, PatchLineKind::Added);
540 assert_eq!(lines[3].old_line_no, None);
541 assert_eq!(lines[3].new_line_no, Some(11));
542 assert_eq!(lines[4].kind, PatchLineKind::Added);
544 assert_eq!(lines[4].old_line_no, None);
545 assert_eq!(lines[4].new_line_no, Some(12));
546 assert_eq!(lines[5].kind, PatchLineKind::Context);
548 assert_eq!(lines[5].old_line_no, Some(12));
549 assert_eq!(lines[5].new_line_no, Some(13));
550 }
551
552 #[test]
553 fn parse_meta_line() {
554 let input = "\
555diff --git a/file.txt b/file.txt
556index abc..def 100644
557--- a/file.txt
558+++ b/file.txt
559@@ -1,1 +1,1 @@
560-old
561\\ No newline at end of file
562+new
563";
564 let files = parse_unified_diff(input).unwrap();
565 let hunk = &files[0].hunks[0];
566 let meta = hunk.lines.iter().find(|l| l.kind == PatchLineKind::Meta);
567 assert!(meta.is_some());
568 assert!(meta.unwrap().text.contains("No newline"));
569 }
570
571 #[test]
572 fn parse_binary_diff() {
573 let input = "\
574diff --git a/image.png b/image.png
575new file mode 100644
576index 0000000..abc1234
577Binary files /dev/null and b/image.png differ
578";
579 let files = parse_unified_diff(input).unwrap();
580 assert_eq!(files.len(), 1);
581 assert!(files[0].binary);
582 assert!(files[0].hunks.is_empty());
583 }
584
585 #[test]
586 fn parse_empty_diff() {
587 let files = parse_unified_diff("").unwrap();
588 assert!(files.is_empty());
589 }
590
591 #[test]
592 fn parse_multiple_files() {
593 let input = "\
594diff --git a/a.rs b/a.rs
595index abc..def 100644
596--- a/a.rs
597+++ b/a.rs
598@@ -1,1 +1,1 @@
599-old_a
600+new_a
601diff --git a/b.rs b/b.rs
602new file mode 100644
603index 0000000..abc1234
604--- /dev/null
605+++ b/b.rs
606@@ -0,0 +1,1 @@
607+new_b
608";
609 let files = parse_unified_diff(input).unwrap();
610 assert_eq!(files.len(), 2);
611 assert_eq!(files[0].path, "a.rs");
612 assert_eq!(files[0].status, FileStatus::Modified);
613 assert_eq!(files[1].path, "b.rs");
614 assert_eq!(files[1].status, FileStatus::Added);
615 }
616
617 #[test]
618 fn parse_diff_marker_inside_hunk_line() {
619 let input = "\
620diff --git a/file.rs b/file.rs
621index abc..def 100644
622--- a/file.rs
623+++ b/file.rs
624@@ -1,1 +1,2 @@
625 fn main() {
626+cannot parse paths from: diff --git /m)
627 }
628";
629 let files = parse_unified_diff(input).unwrap();
630 assert_eq!(files.len(), 1);
631 assert_eq!(files[0].path, "file.rs");
632 assert_eq!(files[0].status, FileStatus::Modified);
633 assert_eq!(files[0].additions(), 1);
634 }
635
636 #[test]
637 fn parse_multiple_hunks() {
638 let input = "\
639diff --git a/file.rs b/file.rs
640index abc..def 100644
641--- a/file.rs
642+++ b/file.rs
643@@ -1,3 +1,3 @@
644 fn a() {
645- old_a();
646+ new_a();
647 }
648@@ -10,3 +10,3 @@
649 fn b() {
650- old_b();
651+ new_b();
652 }
653";
654 let files = parse_unified_diff(input).unwrap();
655 assert_eq!(files[0].hunks.len(), 2);
656 assert_eq!(files[0].hunks[0].old_start, 1);
657 assert_eq!(files[0].hunks[1].old_start, 10);
658 }
659
660 #[test]
661 fn parse_hunk_header_without_comma() {
662 let (start, count, new_start, new_count) =
663 parse_hunk_header("@@ -1 +1 @@ fn main()").unwrap();
664 assert_eq!(start, 1);
665 assert_eq!(count, 1);
666 assert_eq!(new_start, 1);
667 assert_eq!(new_count, 1);
668 }
669
670 #[test]
671 fn file_status_marker() {
672 assert_eq!(FileStatus::Modified.marker(), 'M');
673 assert_eq!(FileStatus::Added.marker(), 'A');
674 assert_eq!(FileStatus::Deleted.marker(), 'D');
675 assert_eq!(FileStatus::Renamed.marker(), 'R');
676 }
677}