Skip to main content

git_lfs_git/
rev_list.rs

1//! `git rev-list --objects --do-walk --stdin` wrapper.
2//!
3//! Walks history reachable from `include` refs but not from `exclude`
4//! refs, emitting every commit + tree + blob OID along the way (with the
5//! blob's path appended for blobs and trees that have a name in the
6//! parent tree). This is the entry point upstream uses to find every
7//! object that *could* be an LFS pointer; we then narrow with
8//! `cat-file --batch-check` and read the survivors with `cat-file --batch`.
9//!
10//! Output format from `git rev-list --objects` is one object per line,
11//! either `<oid>` (commit) or `<oid> <name>` (tree/blob with a path).
12
13use std::io::{BufRead, BufReader, Write};
14use std::path::Path;
15use std::process::{Command, Stdio};
16
17use crate::Error;
18
19/// One entry yielded by [`rev_list`].
20#[derive(Debug, Clone, PartialEq, Eq)]
21pub struct RevListEntry {
22    pub oid: String,
23    /// `Some` for trees and blobs that have a path in their parent tree;
24    /// `None` for commits and root trees.
25    pub name: Option<String>,
26}
27
28/// Run `git rev-list --objects --do-walk --stdin -- ` against `cwd` with
29/// the given include/exclude refs and collect every emitted object.
30///
31/// Refs are passed via stdin (one per line) so we don't blow the
32/// command-line length limit on big refspecs. Excludes are prefixed with
33/// `^` per `git-rev-list(1)`.
34///
35/// Returns OIDs in the order git emitted them. Callers that need
36/// deduplication should layer it on top.
37pub fn rev_list(
38    cwd: &Path,
39    include: &[&str],
40    exclude: &[&str],
41) -> Result<Vec<RevListEntry>, Error> {
42    let mut child = Command::new("git")
43        .arg("-C")
44        .arg(cwd)
45        .args(["rev-list", "--objects", "--do-walk", "--stdin", "--"])
46        .stdin(Stdio::piped())
47        .stdout(Stdio::piped())
48        .stderr(Stdio::piped())
49        .spawn()?;
50
51    {
52        let mut stdin = child.stdin.take().expect("piped");
53        for r in include {
54            writeln!(stdin, "{r}")?;
55        }
56        for r in exclude {
57            writeln!(stdin, "^{r}")?;
58        }
59        // Dropping `stdin` closes the pipe so rev-list can finish reading.
60    }
61
62    let stdout = child.stdout.take().expect("piped");
63    let mut entries = Vec::new();
64    for line in BufReader::new(stdout).lines() {
65        let line = line?;
66        if line.is_empty() {
67            continue;
68        }
69        entries.push(parse_line(&line));
70    }
71
72    let status = child.wait()?;
73    if !status.success() {
74        let mut stderr = String::new();
75        if let Some(mut s) = child.stderr {
76            use std::io::Read;
77            let _ = s.read_to_string(&mut stderr);
78        }
79        return Err(Error::Failed(format!(
80            "git rev-list failed: {}",
81            stderr.trim()
82        )));
83    }
84    Ok(entries)
85}
86
87fn parse_line(line: &str) -> RevListEntry {
88    match line.split_once(' ') {
89        Some((oid, name)) => RevListEntry {
90            oid: oid.to_owned(),
91            name: Some(name.to_owned()),
92        },
93        None => RevListEntry {
94            oid: line.to_owned(),
95            name: None,
96        },
97    }
98}
99
100#[cfg(test)]
101mod tests {
102    use super::*;
103    use crate::tests::commit_helper::*;
104
105    #[test]
106    fn parse_line_commit_only() {
107        let e = parse_line("1234567");
108        assert_eq!(e.oid, "1234567");
109        assert!(e.name.is_none());
110    }
111
112    #[test]
113    fn parse_line_blob_with_path() {
114        let e = parse_line("1234567 path/to/file.bin");
115        assert_eq!(e.oid, "1234567");
116        assert_eq!(e.name.as_deref(), Some("path/to/file.bin"));
117    }
118
119    #[test]
120    fn rev_list_empty_include_returns_nothing() {
121        let repo = init_repo();
122        commit_file(&repo, "a.txt", b"hello");
123        let entries = rev_list(repo.path(), &[], &[]).unwrap();
124        assert!(entries.is_empty());
125    }
126
127    #[test]
128    fn rev_list_one_commit_yields_commit_tree_and_blob() {
129        let repo = init_repo();
130        commit_file(&repo, "a.txt", b"hello");
131        let entries = rev_list(repo.path(), &["HEAD"], &[]).unwrap();
132
133        // We expect one commit, one root tree, one blob. Order is
134        // git-defined but: commit comes first, then the tree, then blobs.
135        assert_eq!(entries.len(), 3, "{entries:?}");
136        assert!(entries[0].name.is_none(), "commit has no name");
137        let blob = entries.iter().find(|e| e.name.as_deref() == Some("a.txt"));
138        assert!(blob.is_some(), "no blob with path 'a.txt' in {entries:?}");
139    }
140
141    #[test]
142    fn rev_list_excludes_filter_ancestors() {
143        let repo = init_repo();
144        commit_file(&repo, "a.txt", b"first");
145        let first = head_oid(&repo);
146        commit_file(&repo, "b.txt", b"second");
147
148        // include=HEAD, exclude=first → only the second commit + its tree
149        // + the new blob (a.txt is unchanged so not re-emitted).
150        let entries = rev_list(repo.path(), &["HEAD"], &[&first]).unwrap();
151        let blobs: Vec<_> = entries
152            .iter()
153            .filter_map(|e| e.name.as_deref())
154            .collect();
155        assert!(blobs.contains(&"b.txt"), "{entries:?}");
156        assert!(!blobs.contains(&"a.txt"), "{entries:?}");
157    }
158
159    #[test]
160    fn rev_list_unknown_ref_errors() {
161        let repo = init_repo();
162        commit_file(&repo, "a.txt", b"x");
163        let err = rev_list(repo.path(), &["does-not-exist"], &[]).unwrap_err();
164        match err {
165            Error::Failed(msg) => assert!(msg.contains("does-not-exist") || msg.contains("unknown")),
166            _ => panic!("expected Failed, got {err:?}"),
167        }
168    }
169}