1use std::fmt;
2use std::path::PathBuf;
3
4use argus_core::{ArgusError, ChangeType, DiffHunk};
5
6#[derive(Debug, Clone)]
25pub struct FileDiff {
26 pub old_path: PathBuf,
28 pub new_path: PathBuf,
30 pub hunks: Vec<DiffHunk>,
32 pub is_new_file: bool,
34 pub is_deleted_file: bool,
36 pub is_rename: bool,
38}
39
40impl fmt::Display for FileDiff {
41 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
42 write!(
43 f,
44 "{} ({} hunks)",
45 self.new_path.display(),
46 self.hunks.len()
47 )
48 }
49}
50
51pub fn parse_unified_diff(input: &str) -> Result<Vec<FileDiff>, ArgusError> {
69 let mut files: Vec<FileDiff> = Vec::new();
70 let mut current: Option<FileDiff> = None;
71 let mut current_hunk: Option<DiffHunk> = None;
72 let mut is_binary = false;
73
74 for line in input.lines() {
75 if line.starts_with("diff --git ") {
76 flush_hunk(&mut current, &mut current_hunk);
77 if let Some(file) = current.take() {
78 if !is_binary {
79 files.push(file);
80 }
81 }
82 is_binary = false;
83 current = Some(FileDiff {
84 old_path: PathBuf::new(),
85 new_path: PathBuf::new(),
86 hunks: Vec::new(),
87 is_new_file: false,
88 is_deleted_file: false,
89 is_rename: false,
90 });
91 continue;
92 }
93
94 if line.starts_with("--- ") && current.is_none() {
97 current = Some(FileDiff {
98 old_path: PathBuf::new(),
99 new_path: PathBuf::new(),
100 hunks: Vec::new(),
101 is_new_file: false,
102 is_deleted_file: false,
103 is_rename: false,
104 });
105 }
106
107 let Some(file) = current.as_mut() else {
108 continue;
109 };
110
111 if line.starts_with("Binary files ") && line.ends_with(" differ") {
112 is_binary = true;
113 continue;
114 }
115
116 if line.starts_with("new file mode") {
117 file.is_new_file = true;
118 continue;
119 }
120
121 if line.starts_with("deleted file mode") {
122 file.is_deleted_file = true;
123 continue;
124 }
125
126 if line.starts_with("rename from ") || line.starts_with("rename to ") {
127 file.is_rename = true;
128 continue;
129 }
130
131 if line.starts_with("index ") || line.starts_with("similarity index") {
132 continue;
133 }
134
135 if let Some(path) = line.strip_prefix("--- ") {
136 file.old_path = parse_path(path);
137 continue;
138 }
139
140 if let Some(path) = line.strip_prefix("+++ ") {
141 file.new_path = parse_path(path);
142 if path == "/dev/null" {
143 file.is_deleted_file = true;
144 }
145 continue;
146 }
147
148 if line.starts_with("@@ ") {
149 flush_hunk(&mut current, &mut current_hunk);
150 let file = current.as_ref().unwrap();
152 let file_path = if file.is_deleted_file {
153 file.old_path.clone()
154 } else {
155 file.new_path.clone()
156 };
157 let (old_start, old_lines, new_start, new_lines) = parse_hunk_header(line)?;
158 let change_type = if file.is_new_file || old_lines == 0 {
159 ChangeType::Add
160 } else if file.is_deleted_file || new_lines == 0 {
161 ChangeType::Delete
162 } else {
163 ChangeType::Modify
164 };
165 current_hunk = Some(DiffHunk {
166 file_path,
167 old_start,
168 old_lines,
169 new_start,
170 new_lines,
171 content: String::new(),
172 change_type,
173 });
174 continue;
175 }
176
177 if line == "\\ No newline at end of file" {
178 continue;
179 }
180
181 if let Some(hunk) = current_hunk.as_mut() {
182 if line.starts_with('+') || line.starts_with('-') || line.starts_with(' ') {
183 hunk.content.push_str(line);
184 hunk.content.push('\n');
185 }
186 }
187 }
188
189 flush_hunk(&mut current, &mut current_hunk);
190 if let Some(file) = current.take() {
191 if !is_binary {
192 files.push(file);
193 }
194 }
195
196 Ok(files)
197}
198
199fn flush_hunk(current: &mut Option<FileDiff>, hunk: &mut Option<DiffHunk>) {
200 if let Some(h) = hunk.take() {
201 if let Some(file) = current.as_mut() {
202 file.hunks.push(h);
203 }
204 }
205}
206
207fn parse_path(raw: &str) -> PathBuf {
208 if raw == "/dev/null" {
209 return PathBuf::from("/dev/null");
210 }
211 let stripped = raw
212 .strip_prefix("a/")
213 .or_else(|| raw.strip_prefix("b/"))
214 .unwrap_or(raw);
215 PathBuf::from(stripped)
216}
217
218fn parse_hunk_header(line: &str) -> Result<(u32, u32, u32, u32), ArgusError> {
219 let inner = line
220 .strip_prefix("@@ ")
221 .and_then(|s| {
222 let end = s.find(" @@")?;
223 Some(&s[..end])
224 })
225 .ok_or_else(|| ArgusError::Parse(format!("invalid hunk header: {line}")))?;
226
227 let parts: Vec<&str> = inner.split(' ').collect();
228 if parts.len() != 2 {
229 return Err(ArgusError::Parse(format!("invalid hunk header: {line}")));
230 }
231
232 let old = parts[0]
233 .strip_prefix('-')
234 .ok_or_else(|| ArgusError::Parse(format!("invalid old range in hunk: {line}")))?;
235 let new = parts[1]
236 .strip_prefix('+')
237 .ok_or_else(|| ArgusError::Parse(format!("invalid new range in hunk: {line}")))?;
238
239 let (old_start, old_lines) = parse_range(old, line)?;
240 let (new_start, new_lines) = parse_range(new, line)?;
241
242 Ok((old_start, old_lines, new_start, new_lines))
243}
244
245fn parse_range(range: &str, context: &str) -> Result<(u32, u32), ArgusError> {
246 if let Some((start, count)) = range.split_once(',') {
247 let s = start
248 .parse()
249 .map_err(|_| ArgusError::Parse(format!("invalid range number in: {context}")))?;
250 let c = count
251 .parse()
252 .map_err(|_| ArgusError::Parse(format!("invalid range count in: {context}")))?;
253 Ok((s, c))
254 } else {
255 let s = range
256 .parse()
257 .map_err(|_| ArgusError::Parse(format!("invalid range number in: {context}")))?;
258 Ok((s, 1))
259 }
260}
261
262#[cfg(test)]
263mod tests {
264 use super::*;
265
266 #[test]
267 fn empty_diff_returns_empty_vec() {
268 let files = parse_unified_diff("").unwrap();
269 assert!(files.is_empty());
270 }
271
272 #[test]
273 fn single_file_single_hunk() {
274 let diff = "\
275diff --git a/src/main.rs b/src/main.rs
276index abc1234..def5678 100644
277--- a/src/main.rs
278+++ b/src/main.rs
279@@ -1,3 +1,4 @@
280 fn main() {
281+ println!(\"hello\");
282 let x = 1;
283 }
284";
285 let files = parse_unified_diff(diff).unwrap();
286 assert_eq!(files.len(), 1);
287 assert_eq!(files[0].new_path, PathBuf::from("src/main.rs"));
288 assert_eq!(files[0].hunks.len(), 1);
289 assert_eq!(files[0].hunks[0].old_start, 1);
290 assert_eq!(files[0].hunks[0].old_lines, 3);
291 assert_eq!(files[0].hunks[0].new_start, 1);
292 assert_eq!(files[0].hunks[0].new_lines, 4);
293 assert_eq!(files[0].hunks[0].change_type, ChangeType::Modify);
294 assert!(files[0].hunks[0].content.contains("+ println!"));
295 }
296
297 #[test]
298 fn single_file_multiple_hunks() {
299 let diff = "\
300diff --git a/lib.rs b/lib.rs
301--- a/lib.rs
302+++ b/lib.rs
303@@ -1,3 +1,4 @@
304 fn foo() {
305+ bar();
306 }
307@@ -10,3 +11,4 @@
308 fn baz() {
309+ qux();
310 }
311";
312 let files = parse_unified_diff(diff).unwrap();
313 assert_eq!(files.len(), 1);
314 assert_eq!(files[0].hunks.len(), 2);
315 assert_eq!(files[0].hunks[0].old_start, 1);
316 assert_eq!(files[0].hunks[1].old_start, 10);
317 }
318
319 #[test]
320 fn multiple_files() {
321 let diff = "\
322diff --git a/a.rs b/a.rs
323--- a/a.rs
324+++ b/a.rs
325@@ -1 +1,2 @@
326 line1
327+line2
328diff --git a/b.rs b/b.rs
329--- a/b.rs
330+++ b/b.rs
331@@ -1 +1,2 @@
332 line1
333+line2
334";
335 let files = parse_unified_diff(diff).unwrap();
336 assert_eq!(files.len(), 2);
337 assert_eq!(files[0].new_path, PathBuf::from("a.rs"));
338 assert_eq!(files[1].new_path, PathBuf::from("b.rs"));
339 }
340
341 #[test]
342 fn new_file() {
343 let diff = "\
344diff --git a/new.rs b/new.rs
345new file mode 100644
346--- /dev/null
347+++ b/new.rs
348@@ -0,0 +1,3 @@
349+fn hello() {
350+ println!(\"new\");
351+}
352";
353 let files = parse_unified_diff(diff).unwrap();
354 assert_eq!(files.len(), 1);
355 assert!(files[0].is_new_file);
356 assert_eq!(files[0].old_path, PathBuf::from("/dev/null"));
357 assert_eq!(files[0].new_path, PathBuf::from("new.rs"));
358 assert_eq!(files[0].hunks[0].change_type, ChangeType::Add);
359 }
360
361 #[test]
362 fn deleted_file() {
363 let diff = "\
364diff --git a/old.rs b/old.rs
365deleted file mode 100644
366--- a/old.rs
367+++ /dev/null
368@@ -1,3 +0,0 @@
369-fn goodbye() {
370- println!(\"old\");
371-}
372";
373 let files = parse_unified_diff(diff).unwrap();
374 assert_eq!(files.len(), 1);
375 assert!(files[0].is_deleted_file);
376 assert_eq!(files[0].new_path, PathBuf::from("/dev/null"));
377 assert_eq!(files[0].hunks[0].change_type, ChangeType::Delete);
378 }
379
380 #[test]
381 fn renamed_file() {
382 let diff = "\
383diff --git a/old_name.rs b/new_name.rs
384similarity index 100%
385rename from old_name.rs
386rename to new_name.rs
387";
388 let files = parse_unified_diff(diff).unwrap();
389 assert_eq!(files.len(), 1);
390 assert!(files[0].is_rename);
391 }
392
393 #[test]
394 fn hunk_only_additions() {
395 let diff = "\
396diff --git a/add.rs b/add.rs
397--- a/add.rs
398+++ b/add.rs
399@@ -5,0 +6,3 @@
400+line1
401+line2
402+line3
403";
404 let files = parse_unified_diff(diff).unwrap();
405 assert_eq!(files[0].hunks[0].change_type, ChangeType::Add);
406 assert_eq!(files[0].hunks[0].old_lines, 0);
407 assert_eq!(files[0].hunks[0].new_lines, 3);
408 }
409
410 #[test]
411 fn hunk_only_deletions() {
412 let diff = "\
413diff --git a/del.rs b/del.rs
414--- a/del.rs
415+++ b/del.rs
416@@ -1,3 +0,0 @@
417-line1
418-line2
419-line3
420";
421 let files = parse_unified_diff(diff).unwrap();
422 assert_eq!(files[0].hunks[0].change_type, ChangeType::Delete);
423 assert_eq!(files[0].hunks[0].new_lines, 0);
424 }
425
426 #[test]
427 fn binary_files_skipped() {
428 let diff = "\
429diff --git a/image.png b/image.png
430Binary files a/image.png and b/image.png differ
431diff --git a/code.rs b/code.rs
432--- a/code.rs
433+++ b/code.rs
434@@ -1 +1,2 @@
435 line1
436+line2
437";
438 let files = parse_unified_diff(diff).unwrap();
439 assert_eq!(files.len(), 1);
440 assert_eq!(files[0].new_path, PathBuf::from("code.rs"));
441 }
442
443 #[test]
444 fn no_newline_at_eof_handled() {
445 let diff = "\
446diff --git a/f.rs b/f.rs
447--- a/f.rs
448+++ b/f.rs
449@@ -1 +1 @@
450-old
451\\ No newline at end of file
452+new
453\\ No newline at end of file
454";
455 let files = parse_unified_diff(diff).unwrap();
456 assert_eq!(files.len(), 1);
457 let content = &files[0].hunks[0].content;
458 assert!(!content.contains("No newline"));
459 assert!(content.contains("-old"));
460 assert!(content.contains("+new"));
461 }
462
463 #[test]
464 fn real_world_fixture() {
465 let diff = include_str!("../tests/fixtures/simple.diff");
466 let files = parse_unified_diff(diff).unwrap();
467 assert!(!files.is_empty());
468 for file in &files {
469 assert!(!file.hunks.is_empty() || file.is_rename);
470 }
471 }
472}