Skip to main content

sqry_core/git/
parser.rs

1//! Parsers for git command output
2//!
3//! This module provides parsers for git porcelain and diff output formats.
4//! All output is expected to be null-terminated (-z flag) to handle filenames
5//! with spaces, newlines, and other special characters.
6
7use super::{ChangeSet, GitError, Result};
8use std::iter::Peekable;
9use std::path::PathBuf;
10use std::str::Split;
11
12type NullSplit<'a> = Peekable<Split<'a, char>>;
13
14fn parse_porcelain_entry(
15    entry: &str,
16    entries: &mut NullSplit<'_>,
17    changeset: &mut ChangeSet,
18) -> Result<()> {
19    if entry.len() < 3 {
20        return Err(GitError::InvalidOutput(format!(
21            "Status entry too short: '{entry}'"
22        )));
23    }
24
25    let status = &entry[..2];
26    if entry.as_bytes().get(2).copied() != Some(b' ') {
27        return Err(GitError::InvalidOutput(format!(
28            "Missing space separator after status in entry: '{entry}'"
29        )));
30    }
31    let filename = &entry[3..];
32    if filename.is_empty() {
33        return Err(GitError::InvalidOutput(
34            "Missing filename in status entry".to_string(),
35        ));
36    }
37
38    match status {
39        "A " | " A" | "AM" | "??" => {
40            changeset.added.push(PathBuf::from(filename));
41        }
42        "M " | " M" | "MM" | "UU" | "AA" | "AU" | "UA" => {
43            changeset.modified.push(PathBuf::from(filename));
44        }
45        "D " | " D" | "DD" | "DU" | "UD" => {
46            changeset.deleted.push(PathBuf::from(filename));
47        }
48        "R " | "RM" => {
49            let old_path = PathBuf::from(filename);
50            let Some(new_filename) = entries.next() else {
51                return Err(GitError::InvalidOutput(format!(
52                    "Rename entry missing new filename: '{entry}'"
53                )));
54            };
55            if new_filename.is_empty() {
56                return Err(GitError::InvalidOutput(
57                    "Rename entry missing new filename".to_string(),
58                ));
59            }
60            changeset
61                .renamed
62                .push((old_path, PathBuf::from(new_filename)));
63        }
64        "C " | "CM" => {
65            if entries.next().is_none() {
66                return Err(GitError::InvalidOutput(
67                    "Copy entry missing old filename".to_string(),
68                ));
69            }
70            changeset.added.push(PathBuf::from(filename));
71        }
72        "!!" => {}
73        _ => {
74            log::warn!("Unknown git status code: '{status}' for file '{filename}'");
75        }
76    }
77
78    Ok(())
79}
80
81fn parse_diff_entry(
82    status_code: &str,
83    entries: &mut NullSplit<'_>,
84    changeset: &mut ChangeSet,
85) -> Result<()> {
86    let status_char = status_code.chars().next().ok_or_else(|| {
87        GitError::InvalidOutput(format!(
88            "Missing status character in diff entry: '{status_code}'"
89        ))
90    })?;
91
92    match status_char {
93        'A' => {
94            let Some(filename) = entries.next() else {
95                return Err(GitError::InvalidOutput(format!(
96                    "Added entry missing filename after status '{status_code}'"
97                )));
98            };
99            if filename.is_empty() {
100                return Err(GitError::InvalidOutput(
101                    "Added entry missing filename".to_string(),
102                ));
103            }
104            changeset.added.push(PathBuf::from(filename));
105        }
106        'M' | 'T' | 'U' | 'X' => {
107            let Some(filename) = entries.next() else {
108                return Err(GitError::InvalidOutput(format!(
109                    "Modified entry missing filename after status '{status_code}'"
110                )));
111            };
112            if filename.is_empty() {
113                return Err(GitError::InvalidOutput(
114                    "Modified entry missing filename".to_string(),
115                ));
116            }
117            changeset.modified.push(PathBuf::from(filename));
118        }
119        'D' => {
120            let Some(filename) = entries.next() else {
121                return Err(GitError::InvalidOutput(format!(
122                    "Deleted entry missing filename after status '{status_code}'"
123                )));
124            };
125            if filename.is_empty() {
126                return Err(GitError::InvalidOutput(
127                    "Deleted entry missing filename".to_string(),
128                ));
129            }
130            changeset.deleted.push(PathBuf::from(filename));
131        }
132        'R' => {
133            let Some(old_filename) = entries.next() else {
134                return Err(GitError::InvalidOutput(format!(
135                    "Rename entry missing old filename after status '{status_code}'"
136                )));
137            };
138            if old_filename.is_empty() {
139                return Err(GitError::InvalidOutput(
140                    "Rename entry missing old filename".to_string(),
141                ));
142            }
143            let Some(new_filename) = entries.next() else {
144                return Err(GitError::InvalidOutput(format!(
145                    "Rename entry missing new filename after old '{old_filename}'"
146                )));
147            };
148            if new_filename.is_empty() {
149                return Err(GitError::InvalidOutput(
150                    "Rename entry missing new filename".to_string(),
151                ));
152            }
153            changeset
154                .renamed
155                .push((PathBuf::from(old_filename), PathBuf::from(new_filename)));
156        }
157        'C' => {
158            let Some(old_filename) = entries.next() else {
159                return Err(GitError::InvalidOutput(format!(
160                    "Copy entry missing old filename after status '{status_code}'"
161                )));
162            };
163            if old_filename.is_empty() {
164                return Err(GitError::InvalidOutput(
165                    "Copy entry missing old filename".to_string(),
166                ));
167            }
168            let Some(new_filename) = entries.next() else {
169                return Err(GitError::InvalidOutput(
170                    "Copy entry missing new filename".to_string(),
171                ));
172            };
173            if new_filename.is_empty() {
174                return Err(GitError::InvalidOutput(
175                    "Copy entry missing new filename".to_string(),
176                ));
177            }
178            changeset.added.push(PathBuf::from(new_filename));
179        }
180        _ => {
181            return Err(GitError::InvalidOutput(format!(
182                "Unknown diff status code: '{status_code}'"
183            )));
184        }
185    }
186
187    Ok(())
188}
189
190/// Parse `git status --porcelain=v1 -z` output
191///
192/// Format: `XY filename\0` where:
193/// - X = index status
194/// - Y = working tree status
195/// - filename = path relative to repo root
196///
197/// Special cases:
198/// - Renames: `R  old\0new\0`
199/// - Copies: `C  old\0new\0`
200/// - Untracked: `?? filename\0`
201/// - Ignored: `!! filename\0`
202/// - Merge conflicts: `UU filename\0`, `AA filename\0`, etc.
203///
204/// See: <https://git-scm.com/docs/git-status#_short_format>
205///
206/// # Errors
207///
208/// Returns [`GitError::InvalidOutput`] when the porcelain stream is malformed
209/// (missing status codes, filenames, or rename/copy metadata).
210pub fn parse_porcelain(output: &str) -> Result<ChangeSet> {
211    let mut changeset = ChangeSet::new();
212
213    if output.is_empty() {
214        return Ok(changeset);
215    }
216
217    // Split by null terminator
218    let mut entries: NullSplit<'_> = output.split('\0').peekable();
219
220    while let Some(entry) = entries.next() {
221        if entry.is_empty() {
222            // Trailing NUL can produce an empty entry; skip it.
223            continue;
224        }
225
226        parse_porcelain_entry(entry, &mut entries, &mut changeset)?;
227    }
228
229    Ok(changeset)
230}
231
232/// Parse `git diff --name-status -z` output
233///
234/// Format: `status\0filename\0` or `status\0old\0new\0` for renames
235///
236/// Status codes:
237/// - A = added
238/// - M = modified
239/// - D = deleted
240/// - `R<similarity>` = renamed (e.g., R075)
241/// - `C<similarity>` = copied
242/// - T = type change (treat as modified)
243/// - U = unmerged (treat as modified)
244/// - X = unknown (treat as modified)
245///
246/// See: <https://git-scm.com/docs/git-diff#_diff_format>
247///
248/// # Errors
249///
250/// Returns [`GitError::InvalidOutput`] when the diff stream is truncated or
251/// otherwise malformed (missing filenames, rename metadata, or status codes).
252pub fn parse_diff_name_status(output: &str) -> Result<ChangeSet> {
253    let mut changeset = ChangeSet::new();
254
255    if output.is_empty() {
256        return Ok(changeset);
257    }
258
259    // Split by null terminator
260    let mut entries: NullSplit<'_> = output.split('\0').peekable();
261
262    while let Some(status_code) = entries.next() {
263        if status_code.is_empty() {
264            // Trailing NUL yields empty entries; skip
265            continue;
266        }
267
268        parse_diff_entry(status_code, &mut entries, &mut changeset)?;
269    }
270
271    Ok(changeset)
272}
273
274#[cfg(test)]
275mod tests {
276    use super::*;
277
278    #[test]
279    fn test_parse_porcelain_empty() {
280        let changes = parse_porcelain("").unwrap();
281        assert!(changes.is_empty());
282    }
283
284    #[test]
285    fn test_parse_porcelain_modified() {
286        let output = "M  file1.rs\0 M file2.rs\0";
287        let changes = parse_porcelain(output).unwrap();
288        assert_eq!(changes.modified.len(), 2);
289        assert_eq!(changes.modified[0], PathBuf::from("file1.rs"));
290        assert_eq!(changes.modified[1], PathBuf::from("file2.rs"));
291    }
292
293    #[test]
294    fn test_parse_porcelain_added() {
295        let output = "A  new.rs\0?? untracked.rs\0";
296        let changes = parse_porcelain(output).unwrap();
297        assert_eq!(changes.added.len(), 2);
298        assert_eq!(changes.added[0], PathBuf::from("new.rs"));
299        assert_eq!(changes.added[1], PathBuf::from("untracked.rs"));
300    }
301
302    #[test]
303    fn test_parse_porcelain_deleted() {
304        let output = "D  deleted.rs\0 D removed.rs\0";
305        let changes = parse_porcelain(output).unwrap();
306        assert_eq!(changes.deleted.len(), 2);
307        assert_eq!(changes.deleted[0], PathBuf::from("deleted.rs"));
308        assert_eq!(changes.deleted[1], PathBuf::from("removed.rs"));
309    }
310
311    #[test]
312    fn test_parse_porcelain_renamed() {
313        let output = "R  old.rs\0new.rs\0";
314        let changes = parse_porcelain(output).unwrap();
315        assert_eq!(changes.renamed.len(), 1);
316        assert_eq!(changes.renamed[0].0, PathBuf::from("old.rs"));
317        assert_eq!(changes.renamed[0].1, PathBuf::from("new.rs"));
318    }
319
320    #[test]
321    fn test_parse_porcelain_merge_conflicts_uu() {
322        let output = "UU conflict.rs\0";
323        let changes = parse_porcelain(output).unwrap();
324        assert_eq!(changes.modified.len(), 1);
325        assert_eq!(changes.modified[0], PathBuf::from("conflict.rs"));
326    }
327
328    #[test]
329    fn test_parse_porcelain_merge_conflicts_aa() {
330        let output = "AA both_added.rs\0";
331        let changes = parse_porcelain(output).unwrap();
332        assert_eq!(changes.modified.len(), 1);
333        assert_eq!(changes.modified[0], PathBuf::from("both_added.rs"));
334    }
335
336    #[test]
337    fn test_parse_porcelain_spaces_in_filename() {
338        let output = "M  file with spaces.rs\0";
339        let changes = parse_porcelain(output).unwrap();
340        assert_eq!(changes.modified.len(), 1);
341        assert_eq!(changes.modified[0], PathBuf::from("file with spaces.rs"));
342    }
343
344    #[test]
345    fn test_parse_porcelain_newlines_in_filename() {
346        // Null-terminated output handles newlines in filenames
347        let output = "M  file\nwith\nnewlines.rs\0";
348        let changes = parse_porcelain(output).unwrap();
349        assert_eq!(changes.modified.len(), 1);
350        assert_eq!(
351            changes.modified[0],
352            PathBuf::from("file\nwith\nnewlines.rs")
353        );
354    }
355
356    #[test]
357    fn test_parse_porcelain_malformed_too_short() {
358        let output = "M\0"; // Missing space and filename
359        let result = parse_porcelain(output);
360        assert!(result.is_err());
361        assert!(matches!(result.unwrap_err(), GitError::InvalidOutput(_)));
362    }
363
364    #[test]
365    fn test_parse_porcelain_malformed_rename_missing_new() {
366        let output = "R  old.rs\0"; // Missing new filename
367        let result = parse_porcelain(output);
368        assert!(result.is_err());
369        assert!(matches!(result.unwrap_err(), GitError::InvalidOutput(_)));
370    }
371
372    #[test]
373    fn test_parse_porcelain_malformed_empty_filename() {
374        // Missing filename after status; should be invalid
375        let output = "M  \0";
376        let result = parse_porcelain(output);
377        assert!(result.is_err());
378        assert!(matches!(result.unwrap_err(), GitError::InvalidOutput(_)));
379    }
380
381    #[test]
382    fn test_parse_diff_empty() {
383        let changes = parse_diff_name_status("").unwrap();
384        assert!(changes.is_empty());
385    }
386
387    #[test]
388    fn test_parse_diff_added() {
389        let output = "A\0new.rs\0";
390        let changes = parse_diff_name_status(output).unwrap();
391        assert_eq!(changes.added.len(), 1);
392        assert_eq!(changes.added[0], PathBuf::from("new.rs"));
393    }
394
395    #[test]
396    fn test_parse_diff_modified() {
397        let output = "M\0file.rs\0";
398        let changes = parse_diff_name_status(output).unwrap();
399        assert_eq!(changes.modified.len(), 1);
400        assert_eq!(changes.modified[0], PathBuf::from("file.rs"));
401    }
402
403    #[test]
404    fn test_parse_diff_deleted() {
405        let output = "D\0old.rs\0";
406        let changes = parse_diff_name_status(output).unwrap();
407        assert_eq!(changes.deleted.len(), 1);
408        assert_eq!(changes.deleted[0], PathBuf::from("old.rs"));
409    }
410
411    #[test]
412    fn test_parse_diff_renamed() {
413        let output = "R075\0old.rs\0new.rs\0";
414        let changes = parse_diff_name_status(output).unwrap();
415        assert_eq!(changes.renamed.len(), 1);
416        assert_eq!(changes.renamed[0].0, PathBuf::from("old.rs"));
417        assert_eq!(changes.renamed[0].1, PathBuf::from("new.rs"));
418    }
419
420    #[test]
421    fn test_parse_diff_complex() {
422        let output = "A\0added.rs\0M\0modified.rs\0D\0deleted.rs\0R050\0old.rs\0new.rs\0";
423        let changes = parse_diff_name_status(output).unwrap();
424        assert_eq!(changes.added.len(), 1);
425        assert_eq!(changes.modified.len(), 1);
426        assert_eq!(changes.deleted.len(), 1);
427        assert_eq!(changes.renamed.len(), 1);
428    }
429
430    #[test]
431    fn test_parse_diff_invalid_status() {
432        let output = "Z\0file.rs\0"; // Invalid status code
433        let result = parse_diff_name_status(output);
434        assert!(result.is_err());
435        assert!(matches!(result.unwrap_err(), GitError::InvalidOutput(_)));
436    }
437
438    #[test]
439    fn test_parse_diff_malformed_missing_filename() {
440        let output = "A\0"; // Missing filename
441        let result = parse_diff_name_status(output);
442        assert!(result.is_err());
443        assert!(matches!(result.unwrap_err(), GitError::InvalidOutput(_)));
444    }
445
446    #[test]
447    fn test_parse_diff_malformed_rename_missing_new() {
448        let output = "R075\0old.rs\0"; // Missing new filename
449        let result = parse_diff_name_status(output);
450        assert!(result.is_err());
451        assert!(matches!(result.unwrap_err(), GitError::InvalidOutput(_)));
452    }
453
454    #[test]
455    fn test_parse_diff_malformed_empty_filename() {
456        let output = "A\0\0"; // Added but empty filename
457        let result = parse_diff_name_status(output);
458        assert!(result.is_err());
459        assert!(matches!(result.unwrap_err(), GitError::InvalidOutput(_)));
460    }
461}