Skip to main content

git_lfs_git/
diff_index.rs

1//! `git diff-index -z` parser.
2//!
3//! Used by `git lfs status` to enumerate staged + unstaged changes
4//! against HEAD. The `-z` form is mandatory for correctness: paths can
5//! contain spaces, newlines, and quoting metacharacters; without `-z`,
6//! git would render those quoted and we'd have to undo the encoding.
7//!
8//! Output format (one record per change):
9//! ```text
10//! :<src-mode> <dst-mode> <src-sha> <dst-sha> <status>\0<src>\0[<dst>\0]
11//! ```
12//! `<status>` is a single letter A/M/D/R/C/T/U/X, optionally followed by
13//! a 1–3 digit similarity score for `R` and `C`. The trailing `<dst>`
14//! field is only present for renames and copies.
15
16use std::path::Path;
17use std::process::Command;
18
19use crate::Error;
20
21#[derive(Debug, Clone, PartialEq, Eq)]
22pub struct DiffEntry {
23    pub src_sha: String,
24    pub dst_sha: String,
25    pub status: char,
26    /// Present only for `R` (rename) and `C` (copy); the similarity
27    /// score git computed.
28    pub similarity: Option<u16>,
29    pub src_name: String,
30    /// Present only for `R` and `C`.
31    pub dst_name: Option<String>,
32}
33
34impl DiffEntry {
35    /// The "current" path of this entry — `dst_name` for renames/copies
36    /// (which is the path the diff lands at), `src_name` otherwise.
37    pub fn path(&self) -> &str {
38        self.dst_name.as_deref().unwrap_or(&self.src_name)
39    }
40}
41
42/// Run `git diff-index -z [--cached] <ref>` and return the parsed entries.
43///
44/// `cached = true` reports staged changes (HEAD vs index); `cached = false`
45/// reports working-tree changes (HEAD vs working tree, including unstaged).
46pub fn diff_index(cwd: &Path, refname: &str, cached: bool) -> Result<Vec<DiffEntry>, Error> {
47    let mut cmd = Command::new("git");
48    cmd.arg("-C").arg(cwd).args(["diff-index", "-z"]);
49    if cached {
50        cmd.arg("--cached");
51    }
52    cmd.arg(refname);
53    let out = cmd.output()?;
54    if !out.status.success() {
55        return Err(Error::Failed(format!(
56            "git diff-index failed: {}",
57            String::from_utf8_lossy(&out.stderr).trim()
58        )));
59    }
60    parse(&out.stdout)
61}
62
63fn parse(bytes: &[u8]) -> Result<Vec<DiffEntry>, Error> {
64    // Strip the trailing NUL git always emits so the iterator below
65    // doesn't see a phantom empty token at the end.
66    let trimmed = bytes.strip_suffix(b"\0").unwrap_or(bytes);
67    if trimmed.is_empty() {
68        return Ok(Vec::new());
69    }
70
71    let mut tokens = trimmed.split(|&b| b == 0);
72    let mut entries = Vec::new();
73    while let Some(meta) = tokens.next() {
74        let meta_s = std::str::from_utf8(meta)
75            .map_err(|e| Error::Failed(format!("diff-index: non-utf8 metadata: {e}")))?;
76        let body = meta_s
77            .strip_prefix(':')
78            .ok_or_else(|| Error::Failed(format!("diff-index: missing ':' in {meta_s:?}")))?;
79        let parts: Vec<&str> = body.split_whitespace().collect();
80        if parts.len() != 5 {
81            return Err(Error::Failed(format!(
82                "diff-index: expected 5 metadata fields in {meta_s:?}, got {}",
83                parts.len()
84            )));
85        }
86        let src_sha = parts[2].to_owned();
87        let dst_sha = parts[3].to_owned();
88        let status_field = parts[4];
89        let status = status_field
90            .chars()
91            .next()
92            .ok_or_else(|| Error::Failed(format!("diff-index: empty status in {meta_s:?}")))?;
93        let similarity = if status_field.len() > 1 {
94            status_field[1..].parse::<u16>().ok()
95        } else {
96            None
97        };
98
99        let src = tokens.next().ok_or_else(|| {
100            Error::Failed(format!("diff-index: missing src name for {meta_s:?}"))
101        })?;
102        let src_name = std::str::from_utf8(src)
103            .map_err(|e| Error::Failed(format!("diff-index: non-utf8 src name: {e}")))?
104            .to_owned();
105
106        let dst_name = if matches!(status, 'R' | 'C') {
107            let dst = tokens.next().ok_or_else(|| {
108                Error::Failed(format!(
109                    "diff-index: missing dst name for {status} record {meta_s:?}"
110                ))
111            })?;
112            Some(
113                std::str::from_utf8(dst)
114                    .map_err(|e| Error::Failed(format!("diff-index: non-utf8 dst name: {e}")))?
115                    .to_owned(),
116            )
117        } else {
118            None
119        };
120
121        entries.push(DiffEntry {
122            src_sha,
123            dst_sha,
124            status,
125            similarity,
126            src_name,
127            dst_name,
128        });
129    }
130    Ok(entries)
131}
132
133#[cfg(test)]
134mod tests {
135    use super::*;
136
137    #[test]
138    fn parse_empty_input() {
139        assert!(parse(b"").unwrap().is_empty());
140        assert!(parse(b"\0").unwrap().is_empty());
141    }
142
143    #[test]
144    fn parse_modification() {
145        let raw = b":100644 100644 abc 123 M\0file.txt\0";
146        let entries = parse(raw).unwrap();
147        assert_eq!(entries.len(), 1);
148        let e = &entries[0];
149        assert_eq!(e.src_sha, "abc");
150        assert_eq!(e.dst_sha, "123");
151        assert_eq!(e.status, 'M');
152        assert_eq!(e.similarity, None);
153        assert_eq!(e.src_name, "file.txt");
154        assert_eq!(e.dst_name, None);
155    }
156
157    #[test]
158    fn parse_addition_has_zero_src_sha() {
159        let raw = b":000000 100644 0000000 1234567 A\0new.bin\0";
160        let entries = parse(raw).unwrap();
161        assert_eq!(entries[0].status, 'A');
162        assert_eq!(entries[0].src_sha, "0000000");
163        assert_eq!(entries[0].dst_sha, "1234567");
164    }
165
166    #[test]
167    fn parse_rename_with_score_and_two_paths() {
168        let raw = b":100644 100644 abc 123 R86\0old/path.txt\0new/path.txt\0";
169        let entries = parse(raw).unwrap();
170        let e = &entries[0];
171        assert_eq!(e.status, 'R');
172        assert_eq!(e.similarity, Some(86));
173        assert_eq!(e.src_name, "old/path.txt");
174        assert_eq!(e.dst_name.as_deref(), Some("new/path.txt"));
175        assert_eq!(e.path(), "new/path.txt");
176    }
177
178    #[test]
179    fn parse_multiple_records() {
180        let raw = b":100644 100644 a 1 M\0a.txt\0\
181                   :100644 100644 b 2 M\0b.txt\0\
182                   :100644 100644 c 3 R100\0c.txt\0d.txt\0";
183        let entries = parse(raw).unwrap();
184        assert_eq!(entries.len(), 3);
185        assert_eq!(entries[0].src_name, "a.txt");
186        assert_eq!(entries[1].src_name, "b.txt");
187        assert_eq!(entries[2].status, 'R');
188        assert_eq!(entries[2].dst_name.as_deref(), Some("d.txt"));
189    }
190
191    #[test]
192    fn parse_path_with_embedded_special_chars() {
193        // With -z, paths are emitted raw — including newlines and tabs
194        // that would normally be quote-escaped without -z.
195        let raw = b":100644 100644 a 1 M\0name with\nnewline\0";
196        let entries = parse(raw).unwrap();
197        assert_eq!(entries[0].src_name, "name with\nnewline");
198    }
199
200    #[test]
201    fn parse_missing_colon_errors() {
202        let raw = b"100644 100644 a 1 M\0file\0";
203        assert!(parse(raw).is_err());
204    }
205
206    #[test]
207    fn parse_truncated_record_errors() {
208        // Status R, but no dst name follows — malformed.
209        let raw = b":100644 100644 a 1 R86\0only-src\0";
210        assert!(parse(raw).is_err());
211    }
212
213    #[test]
214    fn diff_index_against_real_repo_finds_staged_modification() {
215        use crate::tests::commit_helper::*;
216        let repo = init_repo();
217        commit_file(&repo, "a.txt", b"first");
218        // Modify and stage.
219        std::fs::write(repo.path().join("a.txt"), b"second").unwrap();
220        std::process::Command::new("git")
221            .arg("-C")
222            .arg(repo.path())
223            .args(["add", "a.txt"])
224            .status()
225            .unwrap();
226
227        let staged = diff_index(repo.path(), "HEAD", true).unwrap();
228        assert_eq!(staged.len(), 1, "{staged:?}");
229        assert_eq!(staged[0].status, 'M');
230        assert_eq!(staged[0].src_name, "a.txt");
231
232        // Working tree matches index, so unstaged diff is empty.
233        let unstaged = diff_index(repo.path(), "HEAD", false).unwrap();
234        // diff_index without --cached compares HEAD vs working tree, so
235        // this includes the staged change — the caller is responsible
236        // for deduping (which `status` does).
237        assert_eq!(unstaged.len(), 1);
238    }
239}