rust_diff_analyzer/git/
diff_parser.rs

1// SPDX-FileCopyrightText: 2025 RAprogramm <andrey.rozanov.vl@gmail.com>
2// SPDX-License-Identifier: MIT
3
4use std::path::PathBuf;
5
6use masterror::AppError;
7
8use super::hunk::{Hunk, HunkLine};
9use crate::error::DiffParseError;
10
11/// A file diff containing all hunks for a single file
12#[derive(Debug, Clone, PartialEq, Eq)]
13pub struct FileDiff {
14    /// Path to the file (new path if renamed)
15    pub path: PathBuf,
16    /// Original path (if renamed)
17    pub old_path: Option<PathBuf>,
18    /// Hunks in this file diff
19    pub hunks: Vec<Hunk>,
20}
21
22impl FileDiff {
23    /// Creates a new file diff
24    ///
25    /// # Arguments
26    ///
27    /// * `path` - Path to the file
28    ///
29    /// # Returns
30    ///
31    /// A new FileDiff with empty hunks
32    ///
33    /// # Examples
34    ///
35    /// ```
36    /// use std::path::PathBuf;
37    ///
38    /// use rust_diff_analyzer::git::FileDiff;
39    ///
40    /// let diff = FileDiff::new(PathBuf::from("src/lib.rs"));
41    /// assert!(diff.hunks.is_empty());
42    /// ```
43    pub fn new(path: PathBuf) -> Self {
44        Self {
45            path,
46            old_path: None,
47            hunks: Vec::new(),
48        }
49    }
50
51    /// Returns total number of added lines
52    ///
53    /// # Returns
54    ///
55    /// Sum of added lines across all hunks
56    ///
57    /// # Examples
58    ///
59    /// ```
60    /// use std::path::PathBuf;
61    ///
62    /// use rust_diff_analyzer::git::FileDiff;
63    ///
64    /// let diff = FileDiff::new(PathBuf::from("src/lib.rs"));
65    /// assert_eq!(diff.total_added(), 0);
66    /// ```
67    pub fn total_added(&self) -> usize {
68        self.hunks.iter().map(|h| h.added_count()).sum()
69    }
70
71    /// Returns total number of removed lines
72    ///
73    /// # Returns
74    ///
75    /// Sum of removed lines across all hunks
76    ///
77    /// # Examples
78    ///
79    /// ```
80    /// use std::path::PathBuf;
81    ///
82    /// use rust_diff_analyzer::git::FileDiff;
83    ///
84    /// let diff = FileDiff::new(PathBuf::from("src/lib.rs"));
85    /// assert_eq!(diff.total_removed(), 0);
86    /// ```
87    pub fn total_removed(&self) -> usize {
88        self.hunks.iter().map(|h| h.removed_count()).sum()
89    }
90
91    /// Returns all added line numbers
92    ///
93    /// # Returns
94    ///
95    /// Vector of all added line numbers
96    ///
97    /// # Examples
98    ///
99    /// ```
100    /// use std::path::PathBuf;
101    ///
102    /// use rust_diff_analyzer::git::FileDiff;
103    ///
104    /// let diff = FileDiff::new(PathBuf::from("src/lib.rs"));
105    /// assert!(diff.all_added_lines().is_empty());
106    /// ```
107    pub fn all_added_lines(&self) -> Vec<usize> {
108        self.hunks.iter().flat_map(|h| h.added_lines()).collect()
109    }
110
111    /// Returns all removed line numbers
112    ///
113    /// # Returns
114    ///
115    /// Vector of all removed line numbers
116    ///
117    /// # Examples
118    ///
119    /// ```
120    /// use std::path::PathBuf;
121    ///
122    /// use rust_diff_analyzer::git::FileDiff;
123    ///
124    /// let diff = FileDiff::new(PathBuf::from("src/lib.rs"));
125    /// assert!(diff.all_removed_lines().is_empty());
126    /// ```
127    pub fn all_removed_lines(&self) -> Vec<usize> {
128        self.hunks.iter().flat_map(|h| h.removed_lines()).collect()
129    }
130
131    /// Checks if file path ends with .rs extension
132    ///
133    /// # Returns
134    ///
135    /// `true` if file is a Rust source file
136    ///
137    /// # Examples
138    ///
139    /// ```
140    /// use std::path::PathBuf;
141    ///
142    /// use rust_diff_analyzer::git::FileDiff;
143    ///
144    /// let diff = FileDiff::new(PathBuf::from("src/lib.rs"));
145    /// assert!(diff.is_rust_file());
146    ///
147    /// let diff = FileDiff::new(PathBuf::from("README.md"));
148    /// assert!(!diff.is_rust_file());
149    /// ```
150    pub fn is_rust_file(&self) -> bool {
151        self.path
152            .extension()
153            .map(|ext| ext == "rs")
154            .unwrap_or(false)
155    }
156}
157
158/// Parses unified diff format into structured file diffs
159///
160/// # Arguments
161///
162/// * `input` - Unified diff content as string
163///
164/// # Returns
165///
166/// Vector of file diffs or parse error
167///
168/// # Errors
169///
170/// Returns error if diff format is invalid
171///
172/// # Examples
173///
174/// ```
175/// use rust_diff_analyzer::git::parse_diff;
176///
177/// let diff = r#"diff --git a/src/lib.rs b/src/lib.rs
178/// index 1234567..abcdefg 100644
179/// --- a/src/lib.rs
180/// +++ b/src/lib.rs
181/// @@ -1,3 +1,4 @@
182///  fn main() {
183/// +    println!("Hello");
184///  }
185/// "#;
186///
187/// let files = parse_diff(diff).unwrap();
188/// assert_eq!(files.len(), 1);
189/// ```
190pub fn parse_diff(input: &str) -> Result<Vec<FileDiff>, AppError> {
191    let mut files = Vec::new();
192    let mut current_file: Option<FileDiff> = None;
193    let mut current_hunk: Option<Hunk> = None;
194    let mut old_line = 0;
195    let mut new_line = 0;
196
197    for line in input.lines() {
198        if line.starts_with("diff --git") {
199            if let Some(mut file) = current_file.take() {
200                if let Some(hunk) = current_hunk.take() {
201                    file.hunks.push(hunk);
202                }
203                files.push(file);
204            }
205
206            let path = parse_diff_header(line)?;
207            current_file = Some(FileDiff::new(path));
208            current_hunk = None;
209        } else if line.starts_with("@@") {
210            if let Some(ref mut file) = current_file {
211                if let Some(hunk) = current_hunk.take() {
212                    file.hunks.push(hunk);
213                }
214
215                let (old_start, old_count, new_start, new_count) = parse_hunk_header(line)?;
216                current_hunk = Some(Hunk::new(old_start, old_count, new_start, new_count));
217                old_line = old_start;
218                new_line = new_start;
219            }
220        } else if let Some(ref mut hunk) = current_hunk
221            && let Some(first_char) = line.chars().next()
222        {
223            let content = if line.len() > 1 {
224                line[1..].to_string()
225            } else {
226                String::new()
227            };
228
229            match first_char {
230                '+' => {
231                    hunk.lines.push(HunkLine::added(new_line, content));
232                    new_line += 1;
233                }
234                '-' => {
235                    hunk.lines.push(HunkLine::removed(old_line, content));
236                    old_line += 1;
237                }
238                ' ' => {
239                    hunk.lines
240                        .push(HunkLine::context(old_line, new_line, content));
241                    old_line += 1;
242                    new_line += 1;
243                }
244                '\\' => {}
245                _ => {}
246            }
247        }
248    }
249
250    if let Some(mut file) = current_file {
251        if let Some(hunk) = current_hunk {
252            file.hunks.push(hunk);
253        }
254        files.push(file);
255    }
256
257    Ok(files)
258}
259
260fn parse_diff_header(line: &str) -> Result<PathBuf, AppError> {
261    let parts: Vec<&str> = line.split_whitespace().collect();
262    if parts.len() < 4 {
263        return Err(DiffParseError {
264            message: format!("invalid diff header: {}", line),
265        }
266        .into());
267    }
268
269    let b_path = parts[3];
270    let path = b_path.strip_prefix("b/").unwrap_or(b_path);
271    Ok(PathBuf::from(path))
272}
273
274fn parse_hunk_header(line: &str) -> Result<(usize, usize, usize, usize), AppError> {
275    let line = line
276        .strip_prefix("@@")
277        .and_then(|s| s.split("@@").next())
278        .ok_or_else(|| {
279            AppError::from(DiffParseError {
280                message: format!("invalid hunk header: {}", line),
281            })
282        })?
283        .trim();
284
285    let parts: Vec<&str> = line.split_whitespace().collect();
286    if parts.len() < 2 {
287        return Err(DiffParseError {
288            message: format!("invalid hunk header: {}", line),
289        }
290        .into());
291    }
292
293    let old_range = parts[0].strip_prefix('-').ok_or_else(|| {
294        AppError::from(DiffParseError {
295            message: format!("invalid old range: {}", parts[0]),
296        })
297    })?;
298
299    let new_range = parts[1].strip_prefix('+').ok_or_else(|| {
300        AppError::from(DiffParseError {
301            message: format!("invalid new range: {}", parts[1]),
302        })
303    })?;
304
305    let (old_start, old_count) = parse_range(old_range)?;
306    let (new_start, new_count) = parse_range(new_range)?;
307
308    Ok((old_start, old_count, new_start, new_count))
309}
310
311fn parse_range(range: &str) -> Result<(usize, usize), AppError> {
312    let parts: Vec<&str> = range.split(',').collect();
313
314    let start = parts[0].parse::<usize>().map_err(|_| {
315        AppError::from(DiffParseError {
316            message: format!("invalid line number: {}", parts[0]),
317        })
318    })?;
319
320    let count = if parts.len() > 1 {
321        parts[1].parse::<usize>().map_err(|_| {
322            AppError::from(DiffParseError {
323                message: format!("invalid line count: {}", parts[1]),
324            })
325        })?
326    } else {
327        1
328    };
329
330    Ok((start, count))
331}
332
333#[cfg(test)]
334mod tests {
335    use super::*;
336
337    #[test]
338    fn test_parse_simple_diff() {
339        let diff = r#"diff --git a/src/lib.rs b/src/lib.rs
340index 1234567..abcdefg 100644
341--- a/src/lib.rs
342+++ b/src/lib.rs
343@@ -1,3 +1,4 @@
344 fn main() {
345+    println!("Hello");
346 }
347"#;
348
349        let files = parse_diff(diff).expect("parse should succeed");
350        assert_eq!(files.len(), 1);
351        assert_eq!(files[0].path, PathBuf::from("src/lib.rs"));
352        assert_eq!(files[0].hunks.len(), 1);
353        assert_eq!(files[0].total_added(), 1);
354    }
355
356    #[test]
357    fn test_parse_multiple_hunks() {
358        let diff = r#"diff --git a/src/lib.rs b/src/lib.rs
359--- a/src/lib.rs
360+++ b/src/lib.rs
361@@ -1,3 +1,4 @@
362 fn main() {
363+    println!("Hello");
364 }
365@@ -10,2 +11,3 @@
366 fn test() {
367+    assert!(true);
368 }
369"#;
370
371        let files = parse_diff(diff).expect("parse should succeed");
372        assert_eq!(files[0].hunks.len(), 2);
373        assert_eq!(files[0].total_added(), 2);
374    }
375
376    #[test]
377    fn test_parse_multiple_files() {
378        let diff = r#"diff --git a/src/a.rs b/src/a.rs
379--- a/src/a.rs
380+++ b/src/a.rs
381@@ -1,1 +1,2 @@
382 fn a() {}
383+fn a2() {}
384diff --git a/src/b.rs b/src/b.rs
385--- a/src/b.rs
386+++ b/src/b.rs
387@@ -1,1 +1,2 @@
388 fn b() {}
389+fn b2() {}
390"#;
391
392        let files = parse_diff(diff).expect("parse should succeed");
393        assert_eq!(files.len(), 2);
394        assert_eq!(files[0].path, PathBuf::from("src/a.rs"));
395        assert_eq!(files[1].path, PathBuf::from("src/b.rs"));
396    }
397
398    #[test]
399    fn test_is_rust_file() {
400        let rust_diff = FileDiff::new(PathBuf::from("src/lib.rs"));
401        assert!(rust_diff.is_rust_file());
402
403        let md_diff = FileDiff::new(PathBuf::from("README.md"));
404        assert!(!md_diff.is_rust_file());
405    }
406}