1use std::fmt;
2use std::path::PathBuf;
3
4use argus_core::{ArgusError, ChangeType, DiffHunk};
5
6#[derive(Debug, Clone)]
25pub struct FileDiff {
26 pub old_path: PathBuf,
28 pub new_path: PathBuf,
30 pub hunks: Vec<DiffHunk>,
32 pub is_new_file: bool,
34 pub is_deleted_file: bool,
36 pub is_rename: bool,
38}
39
40impl fmt::Display for FileDiff {
41 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
42 write!(
43 f,
44 "{} ({} hunks)",
45 self.new_path.display(),
46 self.hunks.len()
47 )
48 }
49}
50
51pub fn parse_unified_diff(input: &str) -> Result<Vec<FileDiff>, ArgusError> {
69 let mut files: Vec<FileDiff> = Vec::new();
70 let mut current: Option<FileDiff> = None;
71 let mut current_hunk: Option<DiffHunk> = None;
72 let mut is_binary = false;
73
74 for line in input.lines() {
75 if line.starts_with("diff --git ") {
76 flush_hunk(&mut current, &mut current_hunk);
77 if let Some(file) = current.take() {
78 if !is_binary {
79 files.push(file);
80 }
81 }
82 is_binary = false;
83 current = Some(FileDiff {
84 old_path: PathBuf::new(),
85 new_path: PathBuf::new(),
86 hunks: Vec::new(),
87 is_new_file: false,
88 is_deleted_file: false,
89 is_rename: false,
90 });
91 continue;
92 }
93
94 let Some(file) = current.as_mut() else {
95 continue;
96 };
97
98 if line.starts_with("Binary files ") && line.ends_with(" differ") {
99 is_binary = true;
100 continue;
101 }
102
103 if line.starts_with("new file mode") {
104 file.is_new_file = true;
105 continue;
106 }
107
108 if line.starts_with("deleted file mode") {
109 file.is_deleted_file = true;
110 continue;
111 }
112
113 if line.starts_with("rename from ") || line.starts_with("rename to ") {
114 file.is_rename = true;
115 continue;
116 }
117
118 if line.starts_with("index ") || line.starts_with("similarity index") {
119 continue;
120 }
121
122 if let Some(path) = line.strip_prefix("--- ") {
123 file.old_path = parse_path(path);
124 continue;
125 }
126
127 if let Some(path) = line.strip_prefix("+++ ") {
128 file.new_path = parse_path(path);
129 if path == "/dev/null" {
130 file.is_deleted_file = true;
131 }
132 continue;
133 }
134
135 if line.starts_with("@@ ") {
136 flush_hunk(&mut current, &mut current_hunk);
137 let file = current.as_ref().unwrap();
139 let file_path = if file.is_deleted_file {
140 file.old_path.clone()
141 } else {
142 file.new_path.clone()
143 };
144 let (old_start, old_lines, new_start, new_lines) = parse_hunk_header(line)?;
145 let change_type = if file.is_new_file || old_lines == 0 {
146 ChangeType::Add
147 } else if file.is_deleted_file || new_lines == 0 {
148 ChangeType::Delete
149 } else {
150 ChangeType::Modify
151 };
152 current_hunk = Some(DiffHunk {
153 file_path,
154 old_start,
155 old_lines,
156 new_start,
157 new_lines,
158 content: String::new(),
159 change_type,
160 });
161 continue;
162 }
163
164 if line == "\\ No newline at end of file" {
165 continue;
166 }
167
168 if let Some(hunk) = current_hunk.as_mut() {
169 if line.starts_with('+') || line.starts_with('-') || line.starts_with(' ') {
170 hunk.content.push_str(line);
171 hunk.content.push('\n');
172 }
173 }
174 }
175
176 flush_hunk(&mut current, &mut current_hunk);
177 if let Some(file) = current.take() {
178 if !is_binary {
179 files.push(file);
180 }
181 }
182
183 Ok(files)
184}
185
186fn flush_hunk(current: &mut Option<FileDiff>, hunk: &mut Option<DiffHunk>) {
187 if let Some(h) = hunk.take() {
188 if let Some(file) = current.as_mut() {
189 file.hunks.push(h);
190 }
191 }
192}
193
194fn parse_path(raw: &str) -> PathBuf {
195 if raw == "/dev/null" {
196 return PathBuf::from("/dev/null");
197 }
198 let stripped = raw
199 .strip_prefix("a/")
200 .or_else(|| raw.strip_prefix("b/"))
201 .unwrap_or(raw);
202 PathBuf::from(stripped)
203}
204
205fn parse_hunk_header(line: &str) -> Result<(u32, u32, u32, u32), ArgusError> {
206 let inner = line
207 .strip_prefix("@@ ")
208 .and_then(|s| {
209 let end = s.find(" @@")?;
210 Some(&s[..end])
211 })
212 .ok_or_else(|| ArgusError::Parse(format!("invalid hunk header: {line}")))?;
213
214 let parts: Vec<&str> = inner.split(' ').collect();
215 if parts.len() != 2 {
216 return Err(ArgusError::Parse(format!("invalid hunk header: {line}")));
217 }
218
219 let old = parts[0]
220 .strip_prefix('-')
221 .ok_or_else(|| ArgusError::Parse(format!("invalid old range in hunk: {line}")))?;
222 let new = parts[1]
223 .strip_prefix('+')
224 .ok_or_else(|| ArgusError::Parse(format!("invalid new range in hunk: {line}")))?;
225
226 let (old_start, old_lines) = parse_range(old, line)?;
227 let (new_start, new_lines) = parse_range(new, line)?;
228
229 Ok((old_start, old_lines, new_start, new_lines))
230}
231
232fn parse_range(range: &str, context: &str) -> Result<(u32, u32), ArgusError> {
233 if let Some((start, count)) = range.split_once(',') {
234 let s = start
235 .parse()
236 .map_err(|_| ArgusError::Parse(format!("invalid range number in: {context}")))?;
237 let c = count
238 .parse()
239 .map_err(|_| ArgusError::Parse(format!("invalid range count in: {context}")))?;
240 Ok((s, c))
241 } else {
242 let s = range
243 .parse()
244 .map_err(|_| ArgusError::Parse(format!("invalid range number in: {context}")))?;
245 Ok((s, 1))
246 }
247}
248
249#[cfg(test)]
250mod tests {
251 use super::*;
252
253 #[test]
254 fn empty_diff_returns_empty_vec() {
255 let files = parse_unified_diff("").unwrap();
256 assert!(files.is_empty());
257 }
258
259 #[test]
260 fn single_file_single_hunk() {
261 let diff = "\
262diff --git a/src/main.rs b/src/main.rs
263index abc1234..def5678 100644
264--- a/src/main.rs
265+++ b/src/main.rs
266@@ -1,3 +1,4 @@
267 fn main() {
268+ println!(\"hello\");
269 let x = 1;
270 }
271";
272 let files = parse_unified_diff(diff).unwrap();
273 assert_eq!(files.len(), 1);
274 assert_eq!(files[0].new_path, PathBuf::from("src/main.rs"));
275 assert_eq!(files[0].hunks.len(), 1);
276 assert_eq!(files[0].hunks[0].old_start, 1);
277 assert_eq!(files[0].hunks[0].old_lines, 3);
278 assert_eq!(files[0].hunks[0].new_start, 1);
279 assert_eq!(files[0].hunks[0].new_lines, 4);
280 assert_eq!(files[0].hunks[0].change_type, ChangeType::Modify);
281 assert!(files[0].hunks[0].content.contains("+ println!"));
282 }
283
284 #[test]
285 fn single_file_multiple_hunks() {
286 let diff = "\
287diff --git a/lib.rs b/lib.rs
288--- a/lib.rs
289+++ b/lib.rs
290@@ -1,3 +1,4 @@
291 fn foo() {
292+ bar();
293 }
294@@ -10,3 +11,4 @@
295 fn baz() {
296+ qux();
297 }
298";
299 let files = parse_unified_diff(diff).unwrap();
300 assert_eq!(files.len(), 1);
301 assert_eq!(files[0].hunks.len(), 2);
302 assert_eq!(files[0].hunks[0].old_start, 1);
303 assert_eq!(files[0].hunks[1].old_start, 10);
304 }
305
306 #[test]
307 fn multiple_files() {
308 let diff = "\
309diff --git a/a.rs b/a.rs
310--- a/a.rs
311+++ b/a.rs
312@@ -1 +1,2 @@
313 line1
314+line2
315diff --git a/b.rs b/b.rs
316--- a/b.rs
317+++ b/b.rs
318@@ -1 +1,2 @@
319 line1
320+line2
321";
322 let files = parse_unified_diff(diff).unwrap();
323 assert_eq!(files.len(), 2);
324 assert_eq!(files[0].new_path, PathBuf::from("a.rs"));
325 assert_eq!(files[1].new_path, PathBuf::from("b.rs"));
326 }
327
328 #[test]
329 fn new_file() {
330 let diff = "\
331diff --git a/new.rs b/new.rs
332new file mode 100644
333--- /dev/null
334+++ b/new.rs
335@@ -0,0 +1,3 @@
336+fn hello() {
337+ println!(\"new\");
338+}
339";
340 let files = parse_unified_diff(diff).unwrap();
341 assert_eq!(files.len(), 1);
342 assert!(files[0].is_new_file);
343 assert_eq!(files[0].old_path, PathBuf::from("/dev/null"));
344 assert_eq!(files[0].new_path, PathBuf::from("new.rs"));
345 assert_eq!(files[0].hunks[0].change_type, ChangeType::Add);
346 }
347
348 #[test]
349 fn deleted_file() {
350 let diff = "\
351diff --git a/old.rs b/old.rs
352deleted file mode 100644
353--- a/old.rs
354+++ /dev/null
355@@ -1,3 +0,0 @@
356-fn goodbye() {
357- println!(\"old\");
358-}
359";
360 let files = parse_unified_diff(diff).unwrap();
361 assert_eq!(files.len(), 1);
362 assert!(files[0].is_deleted_file);
363 assert_eq!(files[0].new_path, PathBuf::from("/dev/null"));
364 assert_eq!(files[0].hunks[0].change_type, ChangeType::Delete);
365 }
366
367 #[test]
368 fn renamed_file() {
369 let diff = "\
370diff --git a/old_name.rs b/new_name.rs
371similarity index 100%
372rename from old_name.rs
373rename to new_name.rs
374";
375 let files = parse_unified_diff(diff).unwrap();
376 assert_eq!(files.len(), 1);
377 assert!(files[0].is_rename);
378 }
379
380 #[test]
381 fn hunk_only_additions() {
382 let diff = "\
383diff --git a/add.rs b/add.rs
384--- a/add.rs
385+++ b/add.rs
386@@ -5,0 +6,3 @@
387+line1
388+line2
389+line3
390";
391 let files = parse_unified_diff(diff).unwrap();
392 assert_eq!(files[0].hunks[0].change_type, ChangeType::Add);
393 assert_eq!(files[0].hunks[0].old_lines, 0);
394 assert_eq!(files[0].hunks[0].new_lines, 3);
395 }
396
397 #[test]
398 fn hunk_only_deletions() {
399 let diff = "\
400diff --git a/del.rs b/del.rs
401--- a/del.rs
402+++ b/del.rs
403@@ -1,3 +0,0 @@
404-line1
405-line2
406-line3
407";
408 let files = parse_unified_diff(diff).unwrap();
409 assert_eq!(files[0].hunks[0].change_type, ChangeType::Delete);
410 assert_eq!(files[0].hunks[0].new_lines, 0);
411 }
412
413 #[test]
414 fn binary_files_skipped() {
415 let diff = "\
416diff --git a/image.png b/image.png
417Binary files a/image.png and b/image.png differ
418diff --git a/code.rs b/code.rs
419--- a/code.rs
420+++ b/code.rs
421@@ -1 +1,2 @@
422 line1
423+line2
424";
425 let files = parse_unified_diff(diff).unwrap();
426 assert_eq!(files.len(), 1);
427 assert_eq!(files[0].new_path, PathBuf::from("code.rs"));
428 }
429
430 #[test]
431 fn no_newline_at_eof_handled() {
432 let diff = "\
433diff --git a/f.rs b/f.rs
434--- a/f.rs
435+++ b/f.rs
436@@ -1 +1 @@
437-old
438\\ No newline at end of file
439+new
440\\ No newline at end of file
441";
442 let files = parse_unified_diff(diff).unwrap();
443 assert_eq!(files.len(), 1);
444 let content = &files[0].hunks[0].content;
445 assert!(!content.contains("No newline"));
446 assert!(content.contains("-old"));
447 assert!(content.contains("+new"));
448 }
449
450 #[test]
451 fn real_world_fixture() {
452 let diff = include_str!("../tests/fixtures/simple.diff");
453 let files = parse_unified_diff(diff).unwrap();
454 assert!(!files.is_empty());
455 for file in &files {
456 assert!(!file.hunks.is_empty() || file.is_rename);
457 }
458 }
459}