Skip to main content

argus_difflens/
parser.rs

1use std::fmt;
2use std::path::PathBuf;
3
4use argus_core::{ArgusError, ChangeType, DiffHunk};
5
6/// A complete diff for a single file, containing one or more hunks.
7///
8/// # Examples
9///
10/// ```
11/// use argus_difflens::parser::{parse_unified_diff, FileDiff};
12///
13/// let diff = "diff --git a/hello.rs b/hello.rs\n\
14///             --- a/hello.rs\n\
15///             +++ b/hello.rs\n\
16///             @@ -1,3 +1,4 @@\n\
17///              fn main() {\n\
18///             +    println!(\"hello\");\n\
19///              }\n";
20/// let files = parse_unified_diff(diff).unwrap();
21/// assert_eq!(files.len(), 1);
22/// assert_eq!(files[0].hunks.len(), 1);
23/// ```
24#[derive(Debug, Clone)]
25pub struct FileDiff {
26    /// Path in the old version.
27    pub old_path: PathBuf,
28    /// Path in the new version.
29    pub new_path: PathBuf,
30    /// Parsed hunks for this file.
31    pub hunks: Vec<DiffHunk>,
32    /// Whether this is a newly created file.
33    pub is_new_file: bool,
34    /// Whether this file was deleted.
35    pub is_deleted_file: bool,
36    /// Whether this file was renamed.
37    pub is_rename: bool,
38}
39
40impl fmt::Display for FileDiff {
41    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
42        write!(
43            f,
44            "{} ({} hunks)",
45            self.new_path.display(),
46            self.hunks.len()
47        )
48    }
49}
50
51/// Parse a unified diff string (as produced by `git diff`) into structured [`FileDiff`] entries.
52///
53/// Handles standard unified diff format including new files, deleted files,
54/// renamed files, and binary files (which are skipped).
55///
56/// # Errors
57///
58/// Returns [`ArgusError::Parse`] if a hunk header is malformed.
59///
60/// # Examples
61///
62/// ```
63/// use argus_difflens::parser::parse_unified_diff;
64///
65/// let files = parse_unified_diff("").unwrap();
66/// assert!(files.is_empty());
67/// ```
68pub fn parse_unified_diff(input: &str) -> Result<Vec<FileDiff>, ArgusError> {
69    let mut files: Vec<FileDiff> = Vec::new();
70    let mut current: Option<FileDiff> = None;
71    let mut current_hunk: Option<DiffHunk> = None;
72    let mut is_binary = false;
73
74    for line in input.lines() {
75        if line.starts_with("diff --git ") {
76            flush_hunk(&mut current, &mut current_hunk);
77            if let Some(file) = current.take() {
78                if !is_binary {
79                    files.push(file);
80                }
81            }
82            is_binary = false;
83            current = Some(FileDiff {
84                old_path: PathBuf::new(),
85                new_path: PathBuf::new(),
86                hunks: Vec::new(),
87                is_new_file: false,
88                is_deleted_file: false,
89                is_rename: false,
90            });
91            continue;
92        }
93
94        // Implicitly start a file if we see a header but have no current file
95        // This handles standard patches that lack the "diff --git" command line
96        if line.starts_with("--- ") && current.is_none() {
97            current = Some(FileDiff {
98                old_path: PathBuf::new(),
99                new_path: PathBuf::new(),
100                hunks: Vec::new(),
101                is_new_file: false,
102                is_deleted_file: false,
103                is_rename: false,
104            });
105        }
106
107        let Some(file) = current.as_mut() else {
108            continue;
109        };
110
111        if line.starts_with("Binary files ") && line.ends_with(" differ") {
112            is_binary = true;
113            continue;
114        }
115
116        if line.starts_with("new file mode") {
117            file.is_new_file = true;
118            continue;
119        }
120
121        if line.starts_with("deleted file mode") {
122            file.is_deleted_file = true;
123            continue;
124        }
125
126        if line.starts_with("rename from ") || line.starts_with("rename to ") {
127            file.is_rename = true;
128            continue;
129        }
130
131        if line.starts_with("index ") || line.starts_with("similarity index") {
132            continue;
133        }
134
135        if let Some(path) = line.strip_prefix("--- ") {
136            file.old_path = parse_path(path);
137            continue;
138        }
139
140        if let Some(path) = line.strip_prefix("+++ ") {
141            file.new_path = parse_path(path);
142            if file.new_path == std::path::Path::new("/dev/null") {
143                file.is_deleted_file = true;
144            }
145            continue;
146        }
147
148        if line.starts_with("@@ ") {
149            flush_hunk(&mut current, &mut current_hunk);
150            // Re-borrow after flush
151            let file = current.as_ref().unwrap();
152            let file_path = if file.is_deleted_file {
153                file.old_path.clone()
154            } else {
155                file.new_path.clone()
156            };
157            let (old_start, old_lines, new_start, new_lines) = parse_hunk_header(line)?;
158            let change_type = if file.is_new_file || old_lines == 0 {
159                ChangeType::Add
160            } else if file.is_deleted_file || new_lines == 0 {
161                ChangeType::Delete
162            } else {
163                ChangeType::Modify
164            };
165            current_hunk = Some(DiffHunk {
166                file_path,
167                old_start,
168                old_lines,
169                new_start,
170                new_lines,
171                content: String::new(),
172                change_type,
173            });
174            continue;
175        }
176
177        if line == "\\ No newline at end of file" {
178            continue;
179        }
180
181        if let Some(hunk) = current_hunk.as_mut() {
182            if line.starts_with('+') || line.starts_with('-') || line.starts_with(' ') {
183                hunk.content.push_str(line);
184                hunk.content.push('\n');
185            }
186        }
187    }
188
189    flush_hunk(&mut current, &mut current_hunk);
190    if let Some(file) = current.take() {
191        if !is_binary {
192            files.push(file);
193        }
194    }
195
196    Ok(files)
197}
198
199fn flush_hunk(current: &mut Option<FileDiff>, hunk: &mut Option<DiffHunk>) {
200    if let Some(h) = hunk.take() {
201        if let Some(file) = current.as_mut() {
202            file.hunks.push(h);
203        }
204    }
205}
206
207fn parse_path(raw: &str) -> PathBuf {
208    let normalized = raw.trim_matches('"');
209
210    if normalized == "/dev/null" {
211        return PathBuf::from("/dev/null");
212    }
213
214    let stripped = normalized
215        .strip_prefix("a/")
216        .or_else(|| normalized.strip_prefix("b/"))
217        .unwrap_or(normalized);
218
219    PathBuf::from(stripped)
220}
221
222fn parse_hunk_header(line: &str) -> Result<(u32, u32, u32, u32), ArgusError> {
223    let inner = line
224        .strip_prefix("@@ ")
225        .and_then(|s| {
226            let end = s.find(" @@")?;
227            Some(&s[..end])
228        })
229        .ok_or_else(|| ArgusError::Parse(format!("invalid hunk header: {line}")))?;
230
231    let parts: Vec<&str> = inner.split(' ').collect();
232    if parts.len() != 2 {
233        return Err(ArgusError::Parse(format!("invalid hunk header: {line}")));
234    }
235
236    let old = parts[0]
237        .strip_prefix('-')
238        .ok_or_else(|| ArgusError::Parse(format!("invalid old range in hunk: {line}")))?;
239    let new = parts[1]
240        .strip_prefix('+')
241        .ok_or_else(|| ArgusError::Parse(format!("invalid new range in hunk: {line}")))?;
242
243    let (old_start, old_lines) = parse_range(old, line)?;
244    let (new_start, new_lines) = parse_range(new, line)?;
245
246    Ok((old_start, old_lines, new_start, new_lines))
247}
248
249fn parse_range(range: &str, context: &str) -> Result<(u32, u32), ArgusError> {
250    if let Some((start, count)) = range.split_once(',') {
251        let s = start
252            .parse()
253            .map_err(|_| ArgusError::Parse(format!("invalid range number in: {context}")))?;
254        let c = count
255            .parse()
256            .map_err(|_| ArgusError::Parse(format!("invalid range count in: {context}")))?;
257        Ok((s, c))
258    } else {
259        let s = range
260            .parse()
261            .map_err(|_| ArgusError::Parse(format!("invalid range number in: {context}")))?;
262        Ok((s, 1))
263    }
264}
265
266#[cfg(test)]
267mod tests {
268    use super::*;
269
270    #[test]
271    fn empty_diff_returns_empty_vec() {
272        let files = parse_unified_diff("").unwrap();
273        assert!(files.is_empty());
274    }
275
276    #[test]
277    fn single_file_single_hunk() {
278        let diff = "\
279diff --git a/src/main.rs b/src/main.rs
280index abc1234..def5678 100644
281--- a/src/main.rs
282+++ b/src/main.rs
283@@ -1,3 +1,4 @@
284 fn main() {
285+    println!(\"hello\");
286     let x = 1;
287 }
288";
289        let files = parse_unified_diff(diff).unwrap();
290        assert_eq!(files.len(), 1);
291        assert_eq!(files[0].new_path, PathBuf::from("src/main.rs"));
292        assert_eq!(files[0].hunks.len(), 1);
293        assert_eq!(files[0].hunks[0].old_start, 1);
294        assert_eq!(files[0].hunks[0].old_lines, 3);
295        assert_eq!(files[0].hunks[0].new_start, 1);
296        assert_eq!(files[0].hunks[0].new_lines, 4);
297        assert_eq!(files[0].hunks[0].change_type, ChangeType::Modify);
298        assert!(files[0].hunks[0].content.contains("+    println!"));
299    }
300
301    #[test]
302    fn single_file_multiple_hunks() {
303        let diff = "\
304diff --git a/lib.rs b/lib.rs
305--- a/lib.rs
306+++ b/lib.rs
307@@ -1,3 +1,4 @@
308 fn foo() {
309+    bar();
310 }
311@@ -10,3 +11,4 @@
312 fn baz() {
313+    qux();
314 }
315";
316        let files = parse_unified_diff(diff).unwrap();
317        assert_eq!(files.len(), 1);
318        assert_eq!(files[0].hunks.len(), 2);
319        assert_eq!(files[0].hunks[0].old_start, 1);
320        assert_eq!(files[0].hunks[1].old_start, 10);
321    }
322
323    #[test]
324    fn multiple_files() {
325        let diff = "\
326diff --git a/a.rs b/a.rs
327--- a/a.rs
328+++ b/a.rs
329@@ -1 +1,2 @@
330 line1
331+line2
332diff --git a/b.rs b/b.rs
333--- a/b.rs
334+++ b/b.rs
335@@ -1 +1,2 @@
336 line1
337+line2
338";
339        let files = parse_unified_diff(diff).unwrap();
340        assert_eq!(files.len(), 2);
341        assert_eq!(files[0].new_path, PathBuf::from("a.rs"));
342        assert_eq!(files[1].new_path, PathBuf::from("b.rs"));
343    }
344
345    #[test]
346    fn new_file() {
347        let diff = "\
348diff --git a/new.rs b/new.rs
349new file mode 100644
350--- /dev/null
351+++ b/new.rs
352@@ -0,0 +1,3 @@
353+fn hello() {
354+    println!(\"new\");
355+}
356";
357        let files = parse_unified_diff(diff).unwrap();
358        assert_eq!(files.len(), 1);
359        assert!(files[0].is_new_file);
360        assert_eq!(files[0].old_path, PathBuf::from("/dev/null"));
361        assert_eq!(files[0].new_path, PathBuf::from("new.rs"));
362        assert_eq!(files[0].hunks[0].change_type, ChangeType::Add);
363    }
364
365    #[test]
366    fn deleted_file() {
367        let diff = "\
368diff --git a/old.rs b/old.rs
369deleted file mode 100644
370--- a/old.rs
371+++ /dev/null
372@@ -1,3 +0,0 @@
373-fn goodbye() {
374-    println!(\"old\");
375-}
376";
377        let files = parse_unified_diff(diff).unwrap();
378        assert_eq!(files.len(), 1);
379        assert!(files[0].is_deleted_file);
380        assert_eq!(files[0].new_path, PathBuf::from("/dev/null"));
381        assert_eq!(files[0].hunks[0].change_type, ChangeType::Delete);
382    }
383
384    #[test]
385    fn renamed_file() {
386        let diff = "\
387diff --git a/old_name.rs b/new_name.rs
388similarity index 100%
389rename from old_name.rs
390rename to new_name.rs
391";
392        let files = parse_unified_diff(diff).unwrap();
393        assert_eq!(files.len(), 1);
394        assert!(files[0].is_rename);
395    }
396
397    #[test]
398    fn hunk_only_additions() {
399        let diff = "\
400diff --git a/add.rs b/add.rs
401--- a/add.rs
402+++ b/add.rs
403@@ -5,0 +6,3 @@
404+line1
405+line2
406+line3
407";
408        let files = parse_unified_diff(diff).unwrap();
409        assert_eq!(files[0].hunks[0].change_type, ChangeType::Add);
410        assert_eq!(files[0].hunks[0].old_lines, 0);
411        assert_eq!(files[0].hunks[0].new_lines, 3);
412    }
413
414    #[test]
415    fn hunk_only_deletions() {
416        let diff = "\
417diff --git a/del.rs b/del.rs
418--- a/del.rs
419+++ b/del.rs
420@@ -1,3 +0,0 @@
421-line1
422-line2
423-line3
424";
425        let files = parse_unified_diff(diff).unwrap();
426        assert_eq!(files[0].hunks[0].change_type, ChangeType::Delete);
427        assert_eq!(files[0].hunks[0].new_lines, 0);
428    }
429
430    #[test]
431    fn binary_files_skipped() {
432        let diff = "\
433diff --git a/image.png b/image.png
434Binary files a/image.png and b/image.png differ
435diff --git a/code.rs b/code.rs
436--- a/code.rs
437+++ b/code.rs
438@@ -1 +1,2 @@
439 line1
440+line2
441";
442        let files = parse_unified_diff(diff).unwrap();
443        assert_eq!(files.len(), 1);
444        assert_eq!(files[0].new_path, PathBuf::from("code.rs"));
445    }
446
447    #[test]
448    fn no_newline_at_eof_handled() {
449        let diff = "\
450diff --git a/f.rs b/f.rs
451--- a/f.rs
452+++ b/f.rs
453@@ -1 +1 @@
454-old
455\\ No newline at end of file
456+new
457\\ No newline at end of file
458";
459        let files = parse_unified_diff(diff).unwrap();
460        assert_eq!(files.len(), 1);
461        let content = &files[0].hunks[0].content;
462        assert!(!content.contains("No newline"));
463        assert!(content.contains("-old"));
464        assert!(content.contains("+new"));
465    }
466
467    #[test]
468    fn parse_path_handles_quoted_paths() {
469        assert_eq!(
470            parse_path("\"a/src/my file.rs\""),
471            PathBuf::from("src/my file.rs")
472        );
473        assert_eq!(
474            parse_path("\"b/src/my file.rs\""),
475            PathBuf::from("src/my file.rs")
476        );
477    }
478
479    #[test]
480    fn quoted_paths_are_parsed_in_unified_diff() {
481        let diff = r#"--- "a/src/my file.rs"
482+++ "b/src/my file.rs"
483@@ -1 +1,2 @@
484 old
485+new
486"#;
487
488        let files = parse_unified_diff(diff).unwrap();
489        assert_eq!(files.len(), 1);
490        assert_eq!(files[0].old_path, PathBuf::from("src/my file.rs"));
491        assert_eq!(files[0].new_path, PathBuf::from("src/my file.rs"));
492        assert_eq!(files[0].hunks[0].file_path, PathBuf::from("src/my file.rs"));
493    }
494
495    #[test]
496    fn real_world_fixture() {
497        let diff = include_str!("../tests/fixtures/simple.diff");
498        let files = parse_unified_diff(diff).unwrap();
499        assert!(!files.is_empty());
500        for file in &files {
501            assert!(!file.hunks.is_empty() || file.is_rename);
502        }
503    }
504}