Skip to main content

git_lfs_git/
rev_list.rs

1//! `git rev-list --objects --do-walk --stdin` wrapper.
2//!
3//! Walks history reachable from `include` refs but not from `exclude`
4//! refs, emitting every commit + tree + blob OID along the way (with the
5//! blob's path appended for blobs and trees that have a name in the
6//! parent tree). This is the entry point upstream uses to find every
7//! object that *could* be an LFS pointer; we then narrow with
8//! `cat-file --batch-check` and read the survivors with `cat-file --batch`.
9//!
10//! Output format from `git rev-list --objects` is one object per line,
11//! either `<oid>` (commit) or `<oid> <name>` (tree/blob with a path).
12
13use std::io::{BufRead, BufReader, Write};
14use std::path::Path;
15use std::process::{Command, Stdio};
16
17use crate::Error;
18
19/// One entry yielded by [`rev_list`].
20#[derive(Debug, Clone, PartialEq, Eq)]
21pub struct RevListEntry {
22    pub oid: String,
23    /// `Some` for trees and blobs that have a path in their parent tree;
24    /// `None` for commits and root trees.
25    pub name: Option<String>,
26}
27
28/// Run `git rev-list --objects --do-walk --stdin -- ` against `cwd` with
29/// the given include/exclude refs and collect every emitted object.
30///
31/// Refs are passed via stdin (one per line) so we don't blow the
32/// command-line length limit on big refspecs. Excludes are prefixed with
33/// `^` per `git-rev-list(1)`.
34///
35/// Returns OIDs in the order git emitted them. Callers that need
36/// deduplication should layer it on top.
37pub fn rev_list(
38    cwd: &Path,
39    include: &[&str],
40    exclude: &[&str],
41) -> Result<Vec<RevListEntry>, Error> {
42    rev_list_with_args(cwd, include, exclude, &[])
43}
44
45/// [`rev_list`] with extra command-line args spliced before `--stdin`.
46///
47/// Used for the upstream `--not --remotes=<name>` optimization: pre-push
48/// invokes rev-list with that pair on the command line so the trace
49/// (`GIT_TRACE=1`) shows it verbatim — `t-pre-push.sh` greps for
50/// `rev-list.*--not --remotes=origin` to confirm the optimization
51/// kicked in for a `git push <url>` whose URL matches a configured
52/// remote.
53pub fn rev_list_with_args(
54    cwd: &Path,
55    include: &[&str],
56    exclude: &[&str],
57    extra_cmdline_args: &[&str],
58) -> Result<Vec<RevListEntry>, Error> {
59    let mut cmd = Command::new("git");
60    cmd.arg("-C").arg(cwd);
61    cmd.args(["rev-list", "--objects", "--do-walk"]);
62    cmd.args(extra_cmdline_args);
63    cmd.args(["--stdin", "--"]);
64    // Inherit stderr so `GIT_TRACE=1` users see the rev-list
65    // invocation. t-pre-push 37 greps the trace for a literal
66    // `rev-list.*--not --remotes=origin` to confirm the upstream
67    // optimization fired. The cost is failure messages no longer
68    // appear in our wrapped Error — exit status still tells us
69    // *that* it failed, just not why.
70    let mut child = cmd
71        .stdin(Stdio::piped())
72        .stdout(Stdio::piped())
73        .stderr(Stdio::inherit())
74        .spawn()?;
75
76    {
77        let mut stdin = child.stdin.take().expect("piped");
78        for r in include {
79            writeln!(stdin, "{r}")?;
80        }
81        for r in exclude {
82            writeln!(stdin, "^{r}")?;
83        }
84        // Dropping `stdin` closes the pipe so rev-list can finish reading.
85    }
86
87    let stdout = child.stdout.take().expect("piped");
88    let mut entries = Vec::new();
89    for line in BufReader::new(stdout).lines() {
90        let line = line?;
91        if line.is_empty() {
92            continue;
93        }
94        entries.push(parse_line(&line));
95    }
96
97    let status = child.wait()?;
98    if !status.success() {
99        return Err(Error::Failed(format!("git rev-list failed: {status}")));
100    }
101    Ok(entries)
102}
103
104fn parse_line(line: &str) -> RevListEntry {
105    match line.split_once(' ') {
106        Some((oid, name)) => RevListEntry {
107            oid: oid.to_owned(),
108            name: Some(name.to_owned()),
109        },
110        None => RevListEntry {
111            oid: line.to_owned(),
112            name: None,
113        },
114    }
115}
116
117#[cfg(test)]
118mod tests {
119    use super::*;
120    use crate::tests::commit_helper::*;
121
122    #[test]
123    fn parse_line_commit_only() {
124        let e = parse_line("1234567");
125        assert_eq!(e.oid, "1234567");
126        assert!(e.name.is_none());
127    }
128
129    #[test]
130    fn parse_line_blob_with_path() {
131        let e = parse_line("1234567 path/to/file.bin");
132        assert_eq!(e.oid, "1234567");
133        assert_eq!(e.name.as_deref(), Some("path/to/file.bin"));
134    }
135
136    #[test]
137    fn rev_list_empty_include_returns_nothing() {
138        let repo = init_repo();
139        commit_file(&repo, "a.txt", b"hello");
140        let entries = rev_list(repo.path(), &[], &[]).unwrap();
141        assert!(entries.is_empty());
142    }
143
144    #[test]
145    fn rev_list_one_commit_yields_commit_tree_and_blob() {
146        let repo = init_repo();
147        commit_file(&repo, "a.txt", b"hello");
148        let entries = rev_list(repo.path(), &["HEAD"], &[]).unwrap();
149
150        // We expect one commit, one root tree, one blob. Order is
151        // git-defined but: commit comes first, then the tree, then blobs.
152        assert_eq!(entries.len(), 3, "{entries:?}");
153        assert!(entries[0].name.is_none(), "commit has no name");
154        let blob = entries.iter().find(|e| e.name.as_deref() == Some("a.txt"));
155        assert!(blob.is_some(), "no blob with path 'a.txt' in {entries:?}");
156    }
157
158    #[test]
159    fn rev_list_excludes_filter_ancestors() {
160        let repo = init_repo();
161        commit_file(&repo, "a.txt", b"first");
162        let first = head_oid(&repo);
163        commit_file(&repo, "b.txt", b"second");
164
165        // include=HEAD, exclude=first → only the second commit + its tree
166        // + the new blob (a.txt is unchanged so not re-emitted).
167        let entries = rev_list(repo.path(), &["HEAD"], &[&first]).unwrap();
168        let blobs: Vec<_> = entries.iter().filter_map(|e| e.name.as_deref()).collect();
169        assert!(blobs.contains(&"b.txt"), "{entries:?}");
170        assert!(!blobs.contains(&"a.txt"), "{entries:?}");
171    }
172
173    #[test]
174    fn rev_list_unknown_ref_errors() {
175        let repo = init_repo();
176        commit_file(&repo, "a.txt", b"x");
177        // We only inspect that it failed — stderr inherits to the
178        // parent (so `GIT_TRACE=1` users see git's error directly),
179        // which means our wrapped message no longer carries git's
180        // text.
181        let err = rev_list(repo.path(), &["does-not-exist"], &[]).unwrap_err();
182        assert!(matches!(err, Error::Failed(_)), "got {err:?}");
183    }
184}