1use std::fmt;
2use std::path::PathBuf;
3
4use argus_core::{ArgusError, ChangeType, DiffHunk};
5
6#[derive(Debug, Clone)]
25pub struct FileDiff {
26 pub old_path: PathBuf,
28 pub new_path: PathBuf,
30 pub hunks: Vec<DiffHunk>,
32 pub is_new_file: bool,
34 pub is_deleted_file: bool,
36 pub is_rename: bool,
38}
39
40impl fmt::Display for FileDiff {
41 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
42 write!(
43 f,
44 "{} ({} hunks)",
45 self.new_path.display(),
46 self.hunks.len()
47 )
48 }
49}
50
51pub fn parse_unified_diff(input: &str) -> Result<Vec<FileDiff>, ArgusError> {
69 let mut files: Vec<FileDiff> = Vec::new();
70 let mut current: Option<FileDiff> = None;
71 let mut current_hunk: Option<DiffHunk> = None;
72 let mut is_binary = false;
73
74 for line in input.lines() {
75 if line.starts_with("diff --git ") {
76 flush_hunk(&mut current, &mut current_hunk);
77 if let Some(file) = current.take() {
78 if !is_binary {
79 files.push(file);
80 }
81 }
82 is_binary = false;
83 current = Some(FileDiff {
84 old_path: PathBuf::new(),
85 new_path: PathBuf::new(),
86 hunks: Vec::new(),
87 is_new_file: false,
88 is_deleted_file: false,
89 is_rename: false,
90 });
91 continue;
92 }
93
94 if line.starts_with("--- ") && current.is_none() {
97 current = Some(FileDiff {
98 old_path: PathBuf::new(),
99 new_path: PathBuf::new(),
100 hunks: Vec::new(),
101 is_new_file: false,
102 is_deleted_file: false,
103 is_rename: false,
104 });
105 }
106
107 let Some(file) = current.as_mut() else {
108 continue;
109 };
110
111 if line.starts_with("Binary files ") && line.ends_with(" differ") {
112 is_binary = true;
113 continue;
114 }
115
116 if line.starts_with("new file mode") {
117 file.is_new_file = true;
118 continue;
119 }
120
121 if line.starts_with("deleted file mode") {
122 file.is_deleted_file = true;
123 continue;
124 }
125
126 if line.starts_with("rename from ") || line.starts_with("rename to ") {
127 file.is_rename = true;
128 continue;
129 }
130
131 if line.starts_with("index ") || line.starts_with("similarity index") {
132 continue;
133 }
134
135 if let Some(path) = line.strip_prefix("--- ") {
136 file.old_path = parse_path(path);
137 continue;
138 }
139
140 if let Some(path) = line.strip_prefix("+++ ") {
141 file.new_path = parse_path(path);
142 if file.new_path == std::path::Path::new("/dev/null") {
143 file.is_deleted_file = true;
144 }
145 continue;
146 }
147
148 if line.starts_with("@@ ") {
149 flush_hunk(&mut current, &mut current_hunk);
150 let file = current.as_ref().unwrap();
152 let file_path = if file.is_deleted_file {
153 file.old_path.clone()
154 } else {
155 file.new_path.clone()
156 };
157 let (old_start, old_lines, new_start, new_lines) = parse_hunk_header(line)?;
158 let change_type = if file.is_new_file || old_lines == 0 {
159 ChangeType::Add
160 } else if file.is_deleted_file || new_lines == 0 {
161 ChangeType::Delete
162 } else {
163 ChangeType::Modify
164 };
165 current_hunk = Some(DiffHunk {
166 file_path,
167 old_start,
168 old_lines,
169 new_start,
170 new_lines,
171 content: String::new(),
172 change_type,
173 });
174 continue;
175 }
176
177 if line == "\\ No newline at end of file" {
178 continue;
179 }
180
181 if let Some(hunk) = current_hunk.as_mut() {
182 if line.starts_with('+') || line.starts_with('-') || line.starts_with(' ') {
183 hunk.content.push_str(line);
184 hunk.content.push('\n');
185 }
186 }
187 }
188
189 flush_hunk(&mut current, &mut current_hunk);
190 if let Some(file) = current.take() {
191 if !is_binary {
192 files.push(file);
193 }
194 }
195
196 Ok(files)
197}
198
199fn flush_hunk(current: &mut Option<FileDiff>, hunk: &mut Option<DiffHunk>) {
200 if let Some(h) = hunk.take() {
201 if let Some(file) = current.as_mut() {
202 file.hunks.push(h);
203 }
204 }
205}
206
207fn parse_path(raw: &str) -> PathBuf {
208 let normalized = raw.trim_matches('"');
209
210 if normalized == "/dev/null" {
211 return PathBuf::from("/dev/null");
212 }
213
214 let stripped = normalized
215 .strip_prefix("a/")
216 .or_else(|| normalized.strip_prefix("b/"))
217 .unwrap_or(normalized);
218
219 PathBuf::from(stripped)
220}
221
222fn parse_hunk_header(line: &str) -> Result<(u32, u32, u32, u32), ArgusError> {
223 let inner = line
224 .strip_prefix("@@ ")
225 .and_then(|s| {
226 let end = s.find(" @@")?;
227 Some(&s[..end])
228 })
229 .ok_or_else(|| ArgusError::Parse(format!("invalid hunk header: {line}")))?;
230
231 let parts: Vec<&str> = inner.split(' ').collect();
232 if parts.len() != 2 {
233 return Err(ArgusError::Parse(format!("invalid hunk header: {line}")));
234 }
235
236 let old = parts[0]
237 .strip_prefix('-')
238 .ok_or_else(|| ArgusError::Parse(format!("invalid old range in hunk: {line}")))?;
239 let new = parts[1]
240 .strip_prefix('+')
241 .ok_or_else(|| ArgusError::Parse(format!("invalid new range in hunk: {line}")))?;
242
243 let (old_start, old_lines) = parse_range(old, line)?;
244 let (new_start, new_lines) = parse_range(new, line)?;
245
246 Ok((old_start, old_lines, new_start, new_lines))
247}
248
249fn parse_range(range: &str, context: &str) -> Result<(u32, u32), ArgusError> {
250 if let Some((start, count)) = range.split_once(',') {
251 let s = start
252 .parse()
253 .map_err(|_| ArgusError::Parse(format!("invalid range number in: {context}")))?;
254 let c = count
255 .parse()
256 .map_err(|_| ArgusError::Parse(format!("invalid range count in: {context}")))?;
257 Ok((s, c))
258 } else {
259 let s = range
260 .parse()
261 .map_err(|_| ArgusError::Parse(format!("invalid range number in: {context}")))?;
262 Ok((s, 1))
263 }
264}
265
266#[cfg(test)]
267mod tests {
268 use super::*;
269
270 #[test]
271 fn empty_diff_returns_empty_vec() {
272 let files = parse_unified_diff("").unwrap();
273 assert!(files.is_empty());
274 }
275
276 #[test]
277 fn single_file_single_hunk() {
278 let diff = "\
279diff --git a/src/main.rs b/src/main.rs
280index abc1234..def5678 100644
281--- a/src/main.rs
282+++ b/src/main.rs
283@@ -1,3 +1,4 @@
284 fn main() {
285+ println!(\"hello\");
286 let x = 1;
287 }
288";
289 let files = parse_unified_diff(diff).unwrap();
290 assert_eq!(files.len(), 1);
291 assert_eq!(files[0].new_path, PathBuf::from("src/main.rs"));
292 assert_eq!(files[0].hunks.len(), 1);
293 assert_eq!(files[0].hunks[0].old_start, 1);
294 assert_eq!(files[0].hunks[0].old_lines, 3);
295 assert_eq!(files[0].hunks[0].new_start, 1);
296 assert_eq!(files[0].hunks[0].new_lines, 4);
297 assert_eq!(files[0].hunks[0].change_type, ChangeType::Modify);
298 assert!(files[0].hunks[0].content.contains("+ println!"));
299 }
300
301 #[test]
302 fn single_file_multiple_hunks() {
303 let diff = "\
304diff --git a/lib.rs b/lib.rs
305--- a/lib.rs
306+++ b/lib.rs
307@@ -1,3 +1,4 @@
308 fn foo() {
309+ bar();
310 }
311@@ -10,3 +11,4 @@
312 fn baz() {
313+ qux();
314 }
315";
316 let files = parse_unified_diff(diff).unwrap();
317 assert_eq!(files.len(), 1);
318 assert_eq!(files[0].hunks.len(), 2);
319 assert_eq!(files[0].hunks[0].old_start, 1);
320 assert_eq!(files[0].hunks[1].old_start, 10);
321 }
322
323 #[test]
324 fn multiple_files() {
325 let diff = "\
326diff --git a/a.rs b/a.rs
327--- a/a.rs
328+++ b/a.rs
329@@ -1 +1,2 @@
330 line1
331+line2
332diff --git a/b.rs b/b.rs
333--- a/b.rs
334+++ b/b.rs
335@@ -1 +1,2 @@
336 line1
337+line2
338";
339 let files = parse_unified_diff(diff).unwrap();
340 assert_eq!(files.len(), 2);
341 assert_eq!(files[0].new_path, PathBuf::from("a.rs"));
342 assert_eq!(files[1].new_path, PathBuf::from("b.rs"));
343 }
344
345 #[test]
346 fn new_file() {
347 let diff = "\
348diff --git a/new.rs b/new.rs
349new file mode 100644
350--- /dev/null
351+++ b/new.rs
352@@ -0,0 +1,3 @@
353+fn hello() {
354+ println!(\"new\");
355+}
356";
357 let files = parse_unified_diff(diff).unwrap();
358 assert_eq!(files.len(), 1);
359 assert!(files[0].is_new_file);
360 assert_eq!(files[0].old_path, PathBuf::from("/dev/null"));
361 assert_eq!(files[0].new_path, PathBuf::from("new.rs"));
362 assert_eq!(files[0].hunks[0].change_type, ChangeType::Add);
363 }
364
365 #[test]
366 fn deleted_file() {
367 let diff = "\
368diff --git a/old.rs b/old.rs
369deleted file mode 100644
370--- a/old.rs
371+++ /dev/null
372@@ -1,3 +0,0 @@
373-fn goodbye() {
374- println!(\"old\");
375-}
376";
377 let files = parse_unified_diff(diff).unwrap();
378 assert_eq!(files.len(), 1);
379 assert!(files[0].is_deleted_file);
380 assert_eq!(files[0].new_path, PathBuf::from("/dev/null"));
381 assert_eq!(files[0].hunks[0].change_type, ChangeType::Delete);
382 }
383
384 #[test]
385 fn renamed_file() {
386 let diff = "\
387diff --git a/old_name.rs b/new_name.rs
388similarity index 100%
389rename from old_name.rs
390rename to new_name.rs
391";
392 let files = parse_unified_diff(diff).unwrap();
393 assert_eq!(files.len(), 1);
394 assert!(files[0].is_rename);
395 }
396
397 #[test]
398 fn hunk_only_additions() {
399 let diff = "\
400diff --git a/add.rs b/add.rs
401--- a/add.rs
402+++ b/add.rs
403@@ -5,0 +6,3 @@
404+line1
405+line2
406+line3
407";
408 let files = parse_unified_diff(diff).unwrap();
409 assert_eq!(files[0].hunks[0].change_type, ChangeType::Add);
410 assert_eq!(files[0].hunks[0].old_lines, 0);
411 assert_eq!(files[0].hunks[0].new_lines, 3);
412 }
413
414 #[test]
415 fn hunk_only_deletions() {
416 let diff = "\
417diff --git a/del.rs b/del.rs
418--- a/del.rs
419+++ b/del.rs
420@@ -1,3 +0,0 @@
421-line1
422-line2
423-line3
424";
425 let files = parse_unified_diff(diff).unwrap();
426 assert_eq!(files[0].hunks[0].change_type, ChangeType::Delete);
427 assert_eq!(files[0].hunks[0].new_lines, 0);
428 }
429
430 #[test]
431 fn binary_files_skipped() {
432 let diff = "\
433diff --git a/image.png b/image.png
434Binary files a/image.png and b/image.png differ
435diff --git a/code.rs b/code.rs
436--- a/code.rs
437+++ b/code.rs
438@@ -1 +1,2 @@
439 line1
440+line2
441";
442 let files = parse_unified_diff(diff).unwrap();
443 assert_eq!(files.len(), 1);
444 assert_eq!(files[0].new_path, PathBuf::from("code.rs"));
445 }
446
447 #[test]
448 fn no_newline_at_eof_handled() {
449 let diff = "\
450diff --git a/f.rs b/f.rs
451--- a/f.rs
452+++ b/f.rs
453@@ -1 +1 @@
454-old
455\\ No newline at end of file
456+new
457\\ No newline at end of file
458";
459 let files = parse_unified_diff(diff).unwrap();
460 assert_eq!(files.len(), 1);
461 let content = &files[0].hunks[0].content;
462 assert!(!content.contains("No newline"));
463 assert!(content.contains("-old"));
464 assert!(content.contains("+new"));
465 }
466
467 #[test]
468 fn parse_path_handles_quoted_paths() {
469 assert_eq!(
470 parse_path("\"a/src/my file.rs\""),
471 PathBuf::from("src/my file.rs")
472 );
473 assert_eq!(
474 parse_path("\"b/src/my file.rs\""),
475 PathBuf::from("src/my file.rs")
476 );
477 }
478
479 #[test]
480 fn quoted_paths_are_parsed_in_unified_diff() {
481 let diff = r#"--- "a/src/my file.rs"
482+++ "b/src/my file.rs"
483@@ -1 +1,2 @@
484 old
485+new
486"#;
487
488 let files = parse_unified_diff(diff).unwrap();
489 assert_eq!(files.len(), 1);
490 assert_eq!(files[0].old_path, PathBuf::from("src/my file.rs"));
491 assert_eq!(files[0].new_path, PathBuf::from("src/my file.rs"));
492 assert_eq!(files[0].hunks[0].file_path, PathBuf::from("src/my file.rs"));
493 }
494
495 #[test]
496 fn real_world_fixture() {
497 let diff = include_str!("../tests/fixtures/simple.diff");
498 let files = parse_unified_diff(diff).unwrap();
499 assert!(!files.is_empty());
500 for file in &files {
501 assert!(!file.hunks.is_empty() || file.is_rename);
502 }
503 }
504}