Skip to main content

argus_difflens/
parser.rs

1use std::fmt;
2use std::path::PathBuf;
3
4use argus_core::{ArgusError, ChangeType, DiffHunk};
5
6/// A complete diff for a single file, containing one or more hunks.
7///
8/// # Examples
9///
10/// ```
11/// use argus_difflens::parser::{parse_unified_diff, FileDiff};
12///
13/// let diff = "diff --git a/hello.rs b/hello.rs\n\
14///             --- a/hello.rs\n\
15///             +++ b/hello.rs\n\
16///             @@ -1,3 +1,4 @@\n\
17///              fn main() {\n\
18///             +    println!(\"hello\");\n\
19///              }\n";
20/// let files = parse_unified_diff(diff).unwrap();
21/// assert_eq!(files.len(), 1);
22/// assert_eq!(files[0].hunks.len(), 1);
23/// ```
24#[derive(Debug, Clone)]
25pub struct FileDiff {
26    /// Path in the old version.
27    pub old_path: PathBuf,
28    /// Path in the new version.
29    pub new_path: PathBuf,
30    /// Parsed hunks for this file.
31    pub hunks: Vec<DiffHunk>,
32    /// Whether this is a newly created file.
33    pub is_new_file: bool,
34    /// Whether this file was deleted.
35    pub is_deleted_file: bool,
36    /// Whether this file was renamed.
37    pub is_rename: bool,
38}
39
40impl fmt::Display for FileDiff {
41    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
42        write!(
43            f,
44            "{} ({} hunks)",
45            self.new_path.display(),
46            self.hunks.len()
47        )
48    }
49}
50
51/// Parse a unified diff string (as produced by `git diff`) into structured [`FileDiff`] entries.
52///
53/// Handles standard unified diff format including new files, deleted files,
54/// renamed files, and binary files (which are skipped).
55///
56/// # Errors
57///
58/// Returns [`ArgusError::Parse`] if a hunk header is malformed.
59///
60/// # Examples
61///
62/// ```
63/// use argus_difflens::parser::parse_unified_diff;
64///
65/// let files = parse_unified_diff("").unwrap();
66/// assert!(files.is_empty());
67/// ```
68pub fn parse_unified_diff(input: &str) -> Result<Vec<FileDiff>, ArgusError> {
69    let mut files: Vec<FileDiff> = Vec::new();
70    let mut current: Option<FileDiff> = None;
71    let mut current_hunk: Option<DiffHunk> = None;
72    let mut is_binary = false;
73
74    for line in input.lines() {
75        if line.starts_with("diff --git ") {
76            flush_hunk(&mut current, &mut current_hunk);
77            if let Some(file) = current.take() {
78                if !is_binary {
79                    files.push(file);
80                }
81            }
82            is_binary = false;
83            current = Some(FileDiff {
84                old_path: PathBuf::new(),
85                new_path: PathBuf::new(),
86                hunks: Vec::new(),
87                is_new_file: false,
88                is_deleted_file: false,
89                is_rename: false,
90            });
91            continue;
92        }
93
94        // Implicitly start a file if we see a header but have no current file
95        // This handles standard patches that lack the "diff --git" command line
96        if line.starts_with("--- ") && current.is_none() {
97            current = Some(FileDiff {
98                old_path: PathBuf::new(),
99                new_path: PathBuf::new(),
100                hunks: Vec::new(),
101                is_new_file: false,
102                is_deleted_file: false,
103                is_rename: false,
104            });
105        }
106
107        let Some(file) = current.as_mut() else {
108            continue;
109        };
110
111        if line.starts_with("Binary files ") && line.ends_with(" differ") {
112            is_binary = true;
113            continue;
114        }
115
116        if line.starts_with("new file mode") {
117            file.is_new_file = true;
118            continue;
119        }
120
121        if line.starts_with("deleted file mode") {
122            file.is_deleted_file = true;
123            continue;
124        }
125
126        if line.starts_with("rename from ") || line.starts_with("rename to ") {
127            file.is_rename = true;
128            continue;
129        }
130
131        if line.starts_with("index ") || line.starts_with("similarity index") {
132            continue;
133        }
134
135        if let Some(path) = line.strip_prefix("--- ") {
136            file.old_path = parse_path(path);
137            continue;
138        }
139
140        if let Some(path) = line.strip_prefix("+++ ") {
141            file.new_path = parse_path(path);
142            if path == "/dev/null" {
143                file.is_deleted_file = true;
144            }
145            continue;
146        }
147
148        if line.starts_with("@@ ") {
149            flush_hunk(&mut current, &mut current_hunk);
150            // Re-borrow after flush
151            let file = current.as_ref().unwrap();
152            let file_path = if file.is_deleted_file {
153                file.old_path.clone()
154            } else {
155                file.new_path.clone()
156            };
157            let (old_start, old_lines, new_start, new_lines) = parse_hunk_header(line)?;
158            let change_type = if file.is_new_file || old_lines == 0 {
159                ChangeType::Add
160            } else if file.is_deleted_file || new_lines == 0 {
161                ChangeType::Delete
162            } else {
163                ChangeType::Modify
164            };
165            current_hunk = Some(DiffHunk {
166                file_path,
167                old_start,
168                old_lines,
169                new_start,
170                new_lines,
171                content: String::new(),
172                change_type,
173            });
174            continue;
175        }
176
177        if line == "\\ No newline at end of file" {
178            continue;
179        }
180
181        if let Some(hunk) = current_hunk.as_mut() {
182            if line.starts_with('+') || line.starts_with('-') || line.starts_with(' ') {
183                hunk.content.push_str(line);
184                hunk.content.push('\n');
185            }
186        }
187    }
188
189    flush_hunk(&mut current, &mut current_hunk);
190    if let Some(file) = current.take() {
191        if !is_binary {
192            files.push(file);
193        }
194    }
195
196    Ok(files)
197}
198
199fn flush_hunk(current: &mut Option<FileDiff>, hunk: &mut Option<DiffHunk>) {
200    if let Some(h) = hunk.take() {
201        if let Some(file) = current.as_mut() {
202            file.hunks.push(h);
203        }
204    }
205}
206
207fn parse_path(raw: &str) -> PathBuf {
208    if raw == "/dev/null" {
209        return PathBuf::from("/dev/null");
210    }
211    let stripped = raw
212        .strip_prefix("a/")
213        .or_else(|| raw.strip_prefix("b/"))
214        .unwrap_or(raw);
215    PathBuf::from(stripped)
216}
217
218fn parse_hunk_header(line: &str) -> Result<(u32, u32, u32, u32), ArgusError> {
219    let inner = line
220        .strip_prefix("@@ ")
221        .and_then(|s| {
222            let end = s.find(" @@")?;
223            Some(&s[..end])
224        })
225        .ok_or_else(|| ArgusError::Parse(format!("invalid hunk header: {line}")))?;
226
227    let parts: Vec<&str> = inner.split(' ').collect();
228    if parts.len() != 2 {
229        return Err(ArgusError::Parse(format!("invalid hunk header: {line}")));
230    }
231
232    let old = parts[0]
233        .strip_prefix('-')
234        .ok_or_else(|| ArgusError::Parse(format!("invalid old range in hunk: {line}")))?;
235    let new = parts[1]
236        .strip_prefix('+')
237        .ok_or_else(|| ArgusError::Parse(format!("invalid new range in hunk: {line}")))?;
238
239    let (old_start, old_lines) = parse_range(old, line)?;
240    let (new_start, new_lines) = parse_range(new, line)?;
241
242    Ok((old_start, old_lines, new_start, new_lines))
243}
244
245fn parse_range(range: &str, context: &str) -> Result<(u32, u32), ArgusError> {
246    if let Some((start, count)) = range.split_once(',') {
247        let s = start
248            .parse()
249            .map_err(|_| ArgusError::Parse(format!("invalid range number in: {context}")))?;
250        let c = count
251            .parse()
252            .map_err(|_| ArgusError::Parse(format!("invalid range count in: {context}")))?;
253        Ok((s, c))
254    } else {
255        let s = range
256            .parse()
257            .map_err(|_| ArgusError::Parse(format!("invalid range number in: {context}")))?;
258        Ok((s, 1))
259    }
260}
261
262#[cfg(test)]
263mod tests {
264    use super::*;
265
266    #[test]
267    fn empty_diff_returns_empty_vec() {
268        let files = parse_unified_diff("").unwrap();
269        assert!(files.is_empty());
270    }
271
272    #[test]
273    fn single_file_single_hunk() {
274        let diff = "\
275diff --git a/src/main.rs b/src/main.rs
276index abc1234..def5678 100644
277--- a/src/main.rs
278+++ b/src/main.rs
279@@ -1,3 +1,4 @@
280 fn main() {
281+    println!(\"hello\");
282     let x = 1;
283 }
284";
285        let files = parse_unified_diff(diff).unwrap();
286        assert_eq!(files.len(), 1);
287        assert_eq!(files[0].new_path, PathBuf::from("src/main.rs"));
288        assert_eq!(files[0].hunks.len(), 1);
289        assert_eq!(files[0].hunks[0].old_start, 1);
290        assert_eq!(files[0].hunks[0].old_lines, 3);
291        assert_eq!(files[0].hunks[0].new_start, 1);
292        assert_eq!(files[0].hunks[0].new_lines, 4);
293        assert_eq!(files[0].hunks[0].change_type, ChangeType::Modify);
294        assert!(files[0].hunks[0].content.contains("+    println!"));
295    }
296
297    #[test]
298    fn single_file_multiple_hunks() {
299        let diff = "\
300diff --git a/lib.rs b/lib.rs
301--- a/lib.rs
302+++ b/lib.rs
303@@ -1,3 +1,4 @@
304 fn foo() {
305+    bar();
306 }
307@@ -10,3 +11,4 @@
308 fn baz() {
309+    qux();
310 }
311";
312        let files = parse_unified_diff(diff).unwrap();
313        assert_eq!(files.len(), 1);
314        assert_eq!(files[0].hunks.len(), 2);
315        assert_eq!(files[0].hunks[0].old_start, 1);
316        assert_eq!(files[0].hunks[1].old_start, 10);
317    }
318
319    #[test]
320    fn multiple_files() {
321        let diff = "\
322diff --git a/a.rs b/a.rs
323--- a/a.rs
324+++ b/a.rs
325@@ -1 +1,2 @@
326 line1
327+line2
328diff --git a/b.rs b/b.rs
329--- a/b.rs
330+++ b/b.rs
331@@ -1 +1,2 @@
332 line1
333+line2
334";
335        let files = parse_unified_diff(diff).unwrap();
336        assert_eq!(files.len(), 2);
337        assert_eq!(files[0].new_path, PathBuf::from("a.rs"));
338        assert_eq!(files[1].new_path, PathBuf::from("b.rs"));
339    }
340
341    #[test]
342    fn new_file() {
343        let diff = "\
344diff --git a/new.rs b/new.rs
345new file mode 100644
346--- /dev/null
347+++ b/new.rs
348@@ -0,0 +1,3 @@
349+fn hello() {
350+    println!(\"new\");
351+}
352";
353        let files = parse_unified_diff(diff).unwrap();
354        assert_eq!(files.len(), 1);
355        assert!(files[0].is_new_file);
356        assert_eq!(files[0].old_path, PathBuf::from("/dev/null"));
357        assert_eq!(files[0].new_path, PathBuf::from("new.rs"));
358        assert_eq!(files[0].hunks[0].change_type, ChangeType::Add);
359    }
360
361    #[test]
362    fn deleted_file() {
363        let diff = "\
364diff --git a/old.rs b/old.rs
365deleted file mode 100644
366--- a/old.rs
367+++ /dev/null
368@@ -1,3 +0,0 @@
369-fn goodbye() {
370-    println!(\"old\");
371-}
372";
373        let files = parse_unified_diff(diff).unwrap();
374        assert_eq!(files.len(), 1);
375        assert!(files[0].is_deleted_file);
376        assert_eq!(files[0].new_path, PathBuf::from("/dev/null"));
377        assert_eq!(files[0].hunks[0].change_type, ChangeType::Delete);
378    }
379
380    #[test]
381    fn renamed_file() {
382        let diff = "\
383diff --git a/old_name.rs b/new_name.rs
384similarity index 100%
385rename from old_name.rs
386rename to new_name.rs
387";
388        let files = parse_unified_diff(diff).unwrap();
389        assert_eq!(files.len(), 1);
390        assert!(files[0].is_rename);
391    }
392
393    #[test]
394    fn hunk_only_additions() {
395        let diff = "\
396diff --git a/add.rs b/add.rs
397--- a/add.rs
398+++ b/add.rs
399@@ -5,0 +6,3 @@
400+line1
401+line2
402+line3
403";
404        let files = parse_unified_diff(diff).unwrap();
405        assert_eq!(files[0].hunks[0].change_type, ChangeType::Add);
406        assert_eq!(files[0].hunks[0].old_lines, 0);
407        assert_eq!(files[0].hunks[0].new_lines, 3);
408    }
409
410    #[test]
411    fn hunk_only_deletions() {
412        let diff = "\
413diff --git a/del.rs b/del.rs
414--- a/del.rs
415+++ b/del.rs
416@@ -1,3 +0,0 @@
417-line1
418-line2
419-line3
420";
421        let files = parse_unified_diff(diff).unwrap();
422        assert_eq!(files[0].hunks[0].change_type, ChangeType::Delete);
423        assert_eq!(files[0].hunks[0].new_lines, 0);
424    }
425
426    #[test]
427    fn binary_files_skipped() {
428        let diff = "\
429diff --git a/image.png b/image.png
430Binary files a/image.png and b/image.png differ
431diff --git a/code.rs b/code.rs
432--- a/code.rs
433+++ b/code.rs
434@@ -1 +1,2 @@
435 line1
436+line2
437";
438        let files = parse_unified_diff(diff).unwrap();
439        assert_eq!(files.len(), 1);
440        assert_eq!(files[0].new_path, PathBuf::from("code.rs"));
441    }
442
443    #[test]
444    fn no_newline_at_eof_handled() {
445        let diff = "\
446diff --git a/f.rs b/f.rs
447--- a/f.rs
448+++ b/f.rs
449@@ -1 +1 @@
450-old
451\\ No newline at end of file
452+new
453\\ No newline at end of file
454";
455        let files = parse_unified_diff(diff).unwrap();
456        assert_eq!(files.len(), 1);
457        let content = &files[0].hunks[0].content;
458        assert!(!content.contains("No newline"));
459        assert!(content.contains("-old"));
460        assert!(content.contains("+new"));
461    }
462
463    #[test]
464    fn real_world_fixture() {
465        let diff = include_str!("../tests/fixtures/simple.diff");
466        let files = parse_unified_diff(diff).unwrap();
467        assert!(!files.is_empty());
468        for file in &files {
469            assert!(!file.hunks.is_empty() || file.is_rename);
470        }
471    }
472}