Skip to main content

argus_difflens/
parser.rs

1use std::fmt;
2use std::path::PathBuf;
3
4use argus_core::{ArgusError, ChangeType, DiffHunk};
5
6/// A complete diff for a single file, containing one or more hunks.
7///
8/// # Examples
9///
10/// ```
11/// use argus_difflens::parser::{parse_unified_diff, FileDiff};
12///
13/// let diff = "diff --git a/hello.rs b/hello.rs\n\
14///             --- a/hello.rs\n\
15///             +++ b/hello.rs\n\
16///             @@ -1,3 +1,4 @@\n\
17///              fn main() {\n\
18///             +    println!(\"hello\");\n\
19///              }\n";
20/// let files = parse_unified_diff(diff).unwrap();
21/// assert_eq!(files.len(), 1);
22/// assert_eq!(files[0].hunks.len(), 1);
23/// ```
24#[derive(Debug, Clone)]
25pub struct FileDiff {
26    /// Path in the old version.
27    pub old_path: PathBuf,
28    /// Path in the new version.
29    pub new_path: PathBuf,
30    /// Parsed hunks for this file.
31    pub hunks: Vec<DiffHunk>,
32    /// Whether this is a newly created file.
33    pub is_new_file: bool,
34    /// Whether this file was deleted.
35    pub is_deleted_file: bool,
36    /// Whether this file was renamed.
37    pub is_rename: bool,
38}
39
40impl fmt::Display for FileDiff {
41    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
42        write!(
43            f,
44            "{} ({} hunks)",
45            self.new_path.display(),
46            self.hunks.len()
47        )
48    }
49}
50
51/// Parse a unified diff string (as produced by `git diff`) into structured [`FileDiff`] entries.
52///
53/// Handles standard unified diff format including new files, deleted files,
54/// renamed files, and binary files (which are skipped).
55///
56/// # Errors
57///
58/// Returns [`ArgusError::Parse`] if a hunk header is malformed.
59///
60/// # Examples
61///
62/// ```
63/// use argus_difflens::parser::parse_unified_diff;
64///
65/// let files = parse_unified_diff("").unwrap();
66/// assert!(files.is_empty());
67/// ```
68pub fn parse_unified_diff(input: &str) -> Result<Vec<FileDiff>, ArgusError> {
69    let mut files: Vec<FileDiff> = Vec::new();
70    let mut current: Option<FileDiff> = None;
71    let mut current_hunk: Option<DiffHunk> = None;
72    let mut is_binary = false;
73
74    for line in input.lines() {
75        if line.starts_with("diff --git ") {
76            flush_hunk(&mut current, &mut current_hunk);
77            if let Some(file) = current.take() {
78                if !is_binary {
79                    files.push(file);
80                }
81            }
82            is_binary = false;
83            current = Some(FileDiff {
84                old_path: PathBuf::new(),
85                new_path: PathBuf::new(),
86                hunks: Vec::new(),
87                is_new_file: false,
88                is_deleted_file: false,
89                is_rename: false,
90            });
91            continue;
92        }
93
94        let Some(file) = current.as_mut() else {
95            continue;
96        };
97
98        if line.starts_with("Binary files ") && line.ends_with(" differ") {
99            is_binary = true;
100            continue;
101        }
102
103        if line.starts_with("new file mode") {
104            file.is_new_file = true;
105            continue;
106        }
107
108        if line.starts_with("deleted file mode") {
109            file.is_deleted_file = true;
110            continue;
111        }
112
113        if line.starts_with("rename from ") || line.starts_with("rename to ") {
114            file.is_rename = true;
115            continue;
116        }
117
118        if line.starts_with("index ") || line.starts_with("similarity index") {
119            continue;
120        }
121
122        if let Some(path) = line.strip_prefix("--- ") {
123            file.old_path = parse_path(path);
124            continue;
125        }
126
127        if let Some(path) = line.strip_prefix("+++ ") {
128            file.new_path = parse_path(path);
129            if path == "/dev/null" {
130                file.is_deleted_file = true;
131            }
132            continue;
133        }
134
135        if line.starts_with("@@ ") {
136            flush_hunk(&mut current, &mut current_hunk);
137            // Re-borrow after flush
138            let file = current.as_ref().unwrap();
139            let file_path = if file.is_deleted_file {
140                file.old_path.clone()
141            } else {
142                file.new_path.clone()
143            };
144            let (old_start, old_lines, new_start, new_lines) = parse_hunk_header(line)?;
145            let change_type = if file.is_new_file || old_lines == 0 {
146                ChangeType::Add
147            } else if file.is_deleted_file || new_lines == 0 {
148                ChangeType::Delete
149            } else {
150                ChangeType::Modify
151            };
152            current_hunk = Some(DiffHunk {
153                file_path,
154                old_start,
155                old_lines,
156                new_start,
157                new_lines,
158                content: String::new(),
159                change_type,
160            });
161            continue;
162        }
163
164        if line == "\\ No newline at end of file" {
165            continue;
166        }
167
168        if let Some(hunk) = current_hunk.as_mut() {
169            if line.starts_with('+') || line.starts_with('-') || line.starts_with(' ') {
170                hunk.content.push_str(line);
171                hunk.content.push('\n');
172            }
173        }
174    }
175
176    flush_hunk(&mut current, &mut current_hunk);
177    if let Some(file) = current.take() {
178        if !is_binary {
179            files.push(file);
180        }
181    }
182
183    Ok(files)
184}
185
186fn flush_hunk(current: &mut Option<FileDiff>, hunk: &mut Option<DiffHunk>) {
187    if let Some(h) = hunk.take() {
188        if let Some(file) = current.as_mut() {
189            file.hunks.push(h);
190        }
191    }
192}
193
194fn parse_path(raw: &str) -> PathBuf {
195    if raw == "/dev/null" {
196        return PathBuf::from("/dev/null");
197    }
198    let stripped = raw
199        .strip_prefix("a/")
200        .or_else(|| raw.strip_prefix("b/"))
201        .unwrap_or(raw);
202    PathBuf::from(stripped)
203}
204
205fn parse_hunk_header(line: &str) -> Result<(u32, u32, u32, u32), ArgusError> {
206    let inner = line
207        .strip_prefix("@@ ")
208        .and_then(|s| {
209            let end = s.find(" @@")?;
210            Some(&s[..end])
211        })
212        .ok_or_else(|| ArgusError::Parse(format!("invalid hunk header: {line}")))?;
213
214    let parts: Vec<&str> = inner.split(' ').collect();
215    if parts.len() != 2 {
216        return Err(ArgusError::Parse(format!("invalid hunk header: {line}")));
217    }
218
219    let old = parts[0]
220        .strip_prefix('-')
221        .ok_or_else(|| ArgusError::Parse(format!("invalid old range in hunk: {line}")))?;
222    let new = parts[1]
223        .strip_prefix('+')
224        .ok_or_else(|| ArgusError::Parse(format!("invalid new range in hunk: {line}")))?;
225
226    let (old_start, old_lines) = parse_range(old, line)?;
227    let (new_start, new_lines) = parse_range(new, line)?;
228
229    Ok((old_start, old_lines, new_start, new_lines))
230}
231
232fn parse_range(range: &str, context: &str) -> Result<(u32, u32), ArgusError> {
233    if let Some((start, count)) = range.split_once(',') {
234        let s = start
235            .parse()
236            .map_err(|_| ArgusError::Parse(format!("invalid range number in: {context}")))?;
237        let c = count
238            .parse()
239            .map_err(|_| ArgusError::Parse(format!("invalid range count in: {context}")))?;
240        Ok((s, c))
241    } else {
242        let s = range
243            .parse()
244            .map_err(|_| ArgusError::Parse(format!("invalid range number in: {context}")))?;
245        Ok((s, 1))
246    }
247}
248
249#[cfg(test)]
250mod tests {
251    use super::*;
252
253    #[test]
254    fn empty_diff_returns_empty_vec() {
255        let files = parse_unified_diff("").unwrap();
256        assert!(files.is_empty());
257    }
258
259    #[test]
260    fn single_file_single_hunk() {
261        let diff = "\
262diff --git a/src/main.rs b/src/main.rs
263index abc1234..def5678 100644
264--- a/src/main.rs
265+++ b/src/main.rs
266@@ -1,3 +1,4 @@
267 fn main() {
268+    println!(\"hello\");
269     let x = 1;
270 }
271";
272        let files = parse_unified_diff(diff).unwrap();
273        assert_eq!(files.len(), 1);
274        assert_eq!(files[0].new_path, PathBuf::from("src/main.rs"));
275        assert_eq!(files[0].hunks.len(), 1);
276        assert_eq!(files[0].hunks[0].old_start, 1);
277        assert_eq!(files[0].hunks[0].old_lines, 3);
278        assert_eq!(files[0].hunks[0].new_start, 1);
279        assert_eq!(files[0].hunks[0].new_lines, 4);
280        assert_eq!(files[0].hunks[0].change_type, ChangeType::Modify);
281        assert!(files[0].hunks[0].content.contains("+    println!"));
282    }
283
284    #[test]
285    fn single_file_multiple_hunks() {
286        let diff = "\
287diff --git a/lib.rs b/lib.rs
288--- a/lib.rs
289+++ b/lib.rs
290@@ -1,3 +1,4 @@
291 fn foo() {
292+    bar();
293 }
294@@ -10,3 +11,4 @@
295 fn baz() {
296+    qux();
297 }
298";
299        let files = parse_unified_diff(diff).unwrap();
300        assert_eq!(files.len(), 1);
301        assert_eq!(files[0].hunks.len(), 2);
302        assert_eq!(files[0].hunks[0].old_start, 1);
303        assert_eq!(files[0].hunks[1].old_start, 10);
304    }
305
306    #[test]
307    fn multiple_files() {
308        let diff = "\
309diff --git a/a.rs b/a.rs
310--- a/a.rs
311+++ b/a.rs
312@@ -1 +1,2 @@
313 line1
314+line2
315diff --git a/b.rs b/b.rs
316--- a/b.rs
317+++ b/b.rs
318@@ -1 +1,2 @@
319 line1
320+line2
321";
322        let files = parse_unified_diff(diff).unwrap();
323        assert_eq!(files.len(), 2);
324        assert_eq!(files[0].new_path, PathBuf::from("a.rs"));
325        assert_eq!(files[1].new_path, PathBuf::from("b.rs"));
326    }
327
328    #[test]
329    fn new_file() {
330        let diff = "\
331diff --git a/new.rs b/new.rs
332new file mode 100644
333--- /dev/null
334+++ b/new.rs
335@@ -0,0 +1,3 @@
336+fn hello() {
337+    println!(\"new\");
338+}
339";
340        let files = parse_unified_diff(diff).unwrap();
341        assert_eq!(files.len(), 1);
342        assert!(files[0].is_new_file);
343        assert_eq!(files[0].old_path, PathBuf::from("/dev/null"));
344        assert_eq!(files[0].new_path, PathBuf::from("new.rs"));
345        assert_eq!(files[0].hunks[0].change_type, ChangeType::Add);
346    }
347
348    #[test]
349    fn deleted_file() {
350        let diff = "\
351diff --git a/old.rs b/old.rs
352deleted file mode 100644
353--- a/old.rs
354+++ /dev/null
355@@ -1,3 +0,0 @@
356-fn goodbye() {
357-    println!(\"old\");
358-}
359";
360        let files = parse_unified_diff(diff).unwrap();
361        assert_eq!(files.len(), 1);
362        assert!(files[0].is_deleted_file);
363        assert_eq!(files[0].new_path, PathBuf::from("/dev/null"));
364        assert_eq!(files[0].hunks[0].change_type, ChangeType::Delete);
365    }
366
367    #[test]
368    fn renamed_file() {
369        let diff = "\
370diff --git a/old_name.rs b/new_name.rs
371similarity index 100%
372rename from old_name.rs
373rename to new_name.rs
374";
375        let files = parse_unified_diff(diff).unwrap();
376        assert_eq!(files.len(), 1);
377        assert!(files[0].is_rename);
378    }
379
380    #[test]
381    fn hunk_only_additions() {
382        let diff = "\
383diff --git a/add.rs b/add.rs
384--- a/add.rs
385+++ b/add.rs
386@@ -5,0 +6,3 @@
387+line1
388+line2
389+line3
390";
391        let files = parse_unified_diff(diff).unwrap();
392        assert_eq!(files[0].hunks[0].change_type, ChangeType::Add);
393        assert_eq!(files[0].hunks[0].old_lines, 0);
394        assert_eq!(files[0].hunks[0].new_lines, 3);
395    }
396
397    #[test]
398    fn hunk_only_deletions() {
399        let diff = "\
400diff --git a/del.rs b/del.rs
401--- a/del.rs
402+++ b/del.rs
403@@ -1,3 +0,0 @@
404-line1
405-line2
406-line3
407";
408        let files = parse_unified_diff(diff).unwrap();
409        assert_eq!(files[0].hunks[0].change_type, ChangeType::Delete);
410        assert_eq!(files[0].hunks[0].new_lines, 0);
411    }
412
413    #[test]
414    fn binary_files_skipped() {
415        let diff = "\
416diff --git a/image.png b/image.png
417Binary files a/image.png and b/image.png differ
418diff --git a/code.rs b/code.rs
419--- a/code.rs
420+++ b/code.rs
421@@ -1 +1,2 @@
422 line1
423+line2
424";
425        let files = parse_unified_diff(diff).unwrap();
426        assert_eq!(files.len(), 1);
427        assert_eq!(files[0].new_path, PathBuf::from("code.rs"));
428    }
429
430    #[test]
431    fn no_newline_at_eof_handled() {
432        let diff = "\
433diff --git a/f.rs b/f.rs
434--- a/f.rs
435+++ b/f.rs
436@@ -1 +1 @@
437-old
438\\ No newline at end of file
439+new
440\\ No newline at end of file
441";
442        let files = parse_unified_diff(diff).unwrap();
443        assert_eq!(files.len(), 1);
444        let content = &files[0].hunks[0].content;
445        assert!(!content.contains("No newline"));
446        assert!(content.contains("-old"));
447        assert!(content.contains("+new"));
448    }
449
450    #[test]
451    fn real_world_fixture() {
452        let diff = include_str!("../tests/fixtures/simple.diff");
453        let files = parse_unified_diff(diff).unwrap();
454        assert!(!files.is_empty());
455        for file in &files {
456            assert!(!file.hunks.is_empty() || file.is_rename);
457        }
458    }
459}