Skip to main content

git_lfs_git/
scanner.rs

1//! Scanner: walk git history, find LFS pointer blobs.
2//!
3//! This is the entry point used by `git lfs fetch`/`pull`/`push` to
4//! enumerate the LFS pointers reachable from a set of refs. The pipeline
5//! mirrors upstream:
6//!
7//! 1. [`rev_list`](crate::rev_list::rev_list) emits every reachable object
8//!    (commits, trees, blobs).
9//! 2. [`CatFileBatchCheck`] filters those to blobs whose size could fit in
10//!    a pointer file (≤ [`MAX_POINTER_SIZE`]). Blobs are read from index;
11//!    cheap header-only check, no content I/O.
12//! 3. [`CatFileBatch`] reads the surviving candidates' content. Each is
13//!    parsed as a [`Pointer`]; non-pointers are silently skipped.
14//! 4. The output is deduplicated by LFS OID (the pointer's content OID,
15//!    not the git blob OID): the same LFS object can appear in many
16//!    blobs/paths, but we only need to fetch it once.
17
18use std::path::{Path, PathBuf};
19use std::process::Command;
20
21use git_lfs_pointer::{Extension, MAX_POINTER_SIZE, Oid, Pointer};
22
23use crate::Error;
24use crate::cat_file::{CatFileBatch, CatFileBatchCheck, CatFileHeader};
25
26/// One LFS pointer discovered by the scanner.
27#[derive(Debug, Clone)]
28pub struct PointerEntry {
29    /// LFS object OID (the `oid sha256:...` field of the pointer file).
30    pub oid: Oid,
31    /// Object size in bytes (per the pointer's `size` field).
32    pub size: u64,
33    /// First working-tree path the pointer was found at. A single LFS
34    /// object can appear under many paths in history; we keep the first.
35    /// Useful for progress display ("downloading foo/bar.bin"); not the
36    /// authoritative source — caller should not rely on it for routing.
37    pub path: Option<PathBuf>,
38    /// Every working-tree path the pointer was seen at (across history
39    /// and refs). Callers that filter by path (`--include`/`--exclude`)
40    /// must check this set rather than just `path`, otherwise an LFS
41    /// OID shared between two paths gets filtered out whenever the
42    /// scanner happens to dedup down to the wrong one. Always
43    /// non-empty when `path` is `Some`.
44    pub paths: Vec<PathBuf>,
45    /// `true` if the pointer's source bytes were byte-canonical. Used by
46    /// `git lfs fsck --pointers` to flag pointers that parse but don't
47    /// match the canonical encoding.
48    pub canonical: bool,
49    /// Pointer extensions in priority-ascending order, mirroring
50    /// `Pointer::extensions`. Empty for plain pointers; non-empty when
51    /// the file was committed through a configured `lfs.extension.<n>`
52    /// chain. The materialize/checkout paths replay these in reverse to
53    /// reconstruct the working-tree content.
54    pub extensions: Vec<Extension>,
55}
56
57/// Walk history reachable from `include` minus `exclude`, return unique
58/// LFS pointers.
59///
60/// Order is undefined and should not be relied on. Callers that want a
61/// stable order should sort the result.
62///
63/// **History semantics**: matches upstream's `ScanRefs` — every blob in
64/// every commit's tree is examined, including blobs that have since been
65/// deleted or modified. This catches LFS objects from the full history
66/// of the named refs, which is what `git lfs fetch <ref>` is documented
67/// to do.
68pub fn scan_pointers(
69    cwd: &Path,
70    include: &[&str],
71    exclude: &[&str],
72) -> Result<Vec<PointerEntry>, Error> {
73    scan_pointers_with_args(cwd, include, exclude, &[])
74}
75
76/// [`scan_pointers`] with extra rev-list cmdline args. See
77/// [`rev_list_with_args`](crate::rev_list_with_args).
78pub fn scan_pointers_with_args(
79    cwd: &Path,
80    include: &[&str],
81    exclude: &[&str],
82    extra_cmdline_args: &[&str],
83) -> Result<Vec<PointerEntry>, Error> {
84    let entries = crate::rev_list::rev_list_with_args(cwd, include, exclude, extra_cmdline_args)?;
85
86    // Phase 1: header-only check. Filter to blobs whose size could plausibly
87    // be a pointer file. Tracking name alongside so we can report it.
88    let mut bcheck = CatFileBatchCheck::spawn(cwd)?;
89    let mut candidates: Vec<(String, Option<String>)> = Vec::new();
90    for entry in entries {
91        match bcheck.check(&entry.oid)? {
92            CatFileHeader::Found { kind, size, .. }
93                if kind == "blob" && (size as usize) < MAX_POINTER_SIZE =>
94            {
95                candidates.push((entry.oid, entry.name));
96            }
97            // Trees, commits, oversized blobs, missing — all skipped.
98            _ => {}
99        }
100    }
101    drop(bcheck);
102
103    // Phase 2: read content of each candidate, parse as pointer, dedup
104    // by LFS OID. Same LFS object referenced from multiple paths/commits
105    // collapses to one entry — but we accumulate every path it appeared
106    // at so include/exclude filters can match any of them.
107    let mut batch = CatFileBatch::spawn(cwd)?;
108    let mut by_oid: std::collections::HashMap<Oid, usize> = std::collections::HashMap::new();
109    let mut out: Vec<PointerEntry> = Vec::new();
110    for (oid, name) in candidates {
111        let Some(blob) = batch.read(&oid)? else {
112            continue;
113        };
114        let Ok(pointer) = Pointer::parse(&blob.content) else {
115            continue;
116        };
117        let path_buf = name.map(PathBuf::from);
118        if let Some(&idx) = by_oid.get(&pointer.oid) {
119            if let Some(p) = path_buf
120                && !out[idx].paths.contains(&p)
121            {
122                out[idx].paths.push(p);
123            }
124            continue;
125        }
126        let paths: Vec<PathBuf> = path_buf.iter().cloned().collect();
127        by_oid.insert(pointer.oid, out.len());
128        out.push(PointerEntry {
129            oid: pointer.oid,
130            size: pointer.size,
131            path: path_buf,
132            paths,
133            canonical: pointer.canonical,
134            extensions: pointer.extensions.clone(),
135        });
136    }
137    Ok(out)
138}
139
140/// Scan the index for LFS pointers via
141/// `git ls-files --stage -z -- :(attr:filter=lfs)`.
142///
143/// Honors sparse-checkout (only entries in the sparse cone are listed)
144/// and works in bare repos against whatever's been written into the
145/// index. Empty result when the index is empty or no path matches the
146/// `filter=lfs` attribute. Symlinks (mode 120000) are skipped — they
147/// can never be LFS pointers.
148///
149/// This is the discovery path upstream's pull / fetch use on Git 2.42+;
150/// it sidesteps the rev-list traversal that's expensive on partial
151/// clones with `--filter=tree:0` and over-broad in bare repos with no
152/// committed `.gitattributes` reachable via the index.
153pub fn scan_index_lfs(cwd: &Path) -> Result<Vec<PointerEntry>, Error> {
154    // Run from the work-tree top (or git-dir for bare): `git ls-files`
155    // from a subdir restricts output to that subdir's entries, so
156    // running from `repo/dir1/` would miss `repo/a.dat`. Resolve via
157    // `--show-toplevel` first; fall back to the git-dir for bare repos
158    // (which legitimately have no work tree).
159    let scan_cwd = match crate::run_git(cwd, &["rev-parse", "--show-toplevel"]) {
160        Ok(s) if !s.is_empty() => PathBuf::from(s),
161        _ => crate::run_git(cwd, &["rev-parse", "--absolute-git-dir"])
162            .map(PathBuf::from)
163            .unwrap_or_else(|_| cwd.to_path_buf()),
164    };
165    // Apply a parent-dir-existence filter only when there's a reason
166    // to: cone-mode sparse-checkout marks out-of-cone entries by
167    // omitting their working-tree parents, and bare repos have no
168    // working-tree subdirs at all. For ordinary checkouts where the
169    // user just `rm`'d a file, we want to fetch and restore — not
170    // skip — so the filter stays off.
171    let filter_by_parent_dir = is_bare_repo(&scan_cwd) || is_sparse_checkout(&scan_cwd);
172
173    let out = Command::new("git")
174        .arg("-C")
175        .arg(&scan_cwd)
176        .args(["ls-files", "--stage", "-z", "--", ":(attr:filter=lfs)"])
177        .output()?;
178    if !out.status.success() {
179        return Err(Error::Failed(
180            String::from_utf8_lossy(&out.stderr).trim().to_owned(),
181        ));
182    }
183
184    let mut candidates: Vec<(String, PathBuf)> = Vec::new();
185    for record in out.stdout.split(|&b| b == 0).filter(|s| !s.is_empty()) {
186        let s = match std::str::from_utf8(record) {
187            Ok(s) => s,
188            Err(_) => continue,
189        };
190        // `<mode> SP <oid> SP <stage>\t<path>`
191        let Some((meta, path)) = s.split_once('\t') else {
192            continue;
193        };
194        let parts: Vec<&str> = meta.split_whitespace().collect();
195        if parts.len() < 3 {
196            continue;
197        }
198        let mode = parts[0];
199        let oid = parts[1];
200        if mode == "120000" {
201            continue;
202        }
203        let path = PathBuf::from(path);
204        // Skip paths whose parent dir isn't materialized in the work
205        // tree: that's how cone-mode sparse-checkout marks out-of-cone
206        // entries when ls-files emits the *expanded* index (the trees
207        // are local but the working-tree dirs were never created).
208        // The same check naturally drops non-root entries in bare
209        // repos, where only the top-level scan_cwd exists as a
210        // directory. Skipped on plain checkouts so a user `rm`'d
211        // file still gets restored by `git lfs pull`.
212        if filter_by_parent_dir
213            && let Some(parent) = path.parent()
214            && !parent.as_os_str().is_empty()
215            && !scan_cwd.join(parent).is_dir()
216        {
217            continue;
218        }
219        candidates.push((oid.to_string(), path));
220    }
221    if candidates.is_empty() {
222        return Ok(Vec::new());
223    }
224
225    let mut batch = CatFileBatch::spawn(cwd)?;
226    let mut by_oid: std::collections::HashMap<Oid, usize> = std::collections::HashMap::new();
227    let mut out: Vec<PointerEntry> = Vec::new();
228    for (oid, path) in candidates {
229        let Some(blob) = batch.read(&oid)? else {
230            continue;
231        };
232        let Ok(pointer) = Pointer::parse(&blob.content) else {
233            continue;
234        };
235        if let Some(&idx) = by_oid.get(&pointer.oid) {
236            if !out[idx].paths.contains(&path) {
237                out[idx].paths.push(path);
238            }
239            continue;
240        }
241        by_oid.insert(pointer.oid, out.len());
242        out.push(PointerEntry {
243            oid: pointer.oid,
244            size: pointer.size,
245            path: Some(path.clone()),
246            paths: vec![path],
247            canonical: pointer.canonical,
248            extensions: pointer.extensions.clone(),
249        });
250    }
251    Ok(out)
252}
253
254fn is_bare_repo(cwd: &Path) -> bool {
255    crate::run_git(cwd, &["rev-parse", "--is-bare-repository"])
256        .map(|s| s.trim() == "true")
257        .unwrap_or(false)
258}
259
260fn is_sparse_checkout(cwd: &Path) -> bool {
261    crate::run_git(cwd, &["config", "--get", "core.sparseCheckout"])
262        .map(|s| s.trim().eq_ignore_ascii_case("true"))
263        .unwrap_or(false)
264}
265
266/// One blob found while walking a tree, before any pointer-parsing or
267/// size-based filtering. Paths and OIDs are reported verbatim from
268/// `git ls-tree`.
269#[derive(Debug, Clone)]
270pub struct TreeBlob {
271    /// Working-tree path of the blob.
272    pub path: PathBuf,
273    /// Git blob OID (the SHA-1 of the blob in the object database).
274    pub blob_oid: String,
275    /// Size of the blob in bytes, per `cat-file --batch-check`.
276    pub size: u64,
277    /// Git tree-entry mode in octal (e.g. `100644`, `100755`,
278    /// `120000` for symlinks). Callers that classify entries by
279    /// mode (e.g. `fsck --pointers` skipping symlinks) read this.
280    pub mode: String,
281}
282
283/// Walk the tree at `reference` and return *every* blob — no size filter,
284/// no pointer parsing. Used by `fsck --pointers` for its full-tree sweep
285/// when classifying paths against `.gitattributes`.
286pub fn scan_tree_blobs(cwd: &Path, reference: &str) -> Result<Vec<TreeBlob>, Error> {
287    // `git ls-tree` only takes a tree-ish, not a range. For a `<a>..<b>`
288    // reference (used by `git lfs fsck HEAD^..HEAD`), walk every commit
289    // in the range and union their tree blobs (deduped by path+oid). A
290    // bare ref still takes the cheap one-shot path.
291    if reference.contains("..") {
292        return scan_blobs_in_range(cwd, reference);
293    }
294    scan_tree_blobs_for_ref(cwd, reference)
295}
296
297fn scan_tree_blobs_for_ref(cwd: &Path, reference: &str) -> Result<Vec<TreeBlob>, Error> {
298    let out = Command::new("git")
299        .arg("-C")
300        .arg(cwd)
301        .args(["ls-tree", "--full-tree", "-r", "-z", reference])
302        .output()?;
303    if !out.status.success() {
304        return Err(Error::Failed(format!(
305            "git ls-tree failed: {}",
306            String::from_utf8_lossy(&out.stderr).trim()
307        )));
308    }
309    let mut bcheck = CatFileBatchCheck::spawn(cwd)?;
310    let mut blobs = Vec::new();
311    for record in out.stdout.split(|&b| b == 0).filter(|s| !s.is_empty()) {
312        let s = std::str::from_utf8(record)
313            .map_err(|e| Error::Failed(format!("ls-tree: non-utf8 record: {e}")))?;
314        let (header, path) = s
315            .split_once('\t')
316            .ok_or_else(|| Error::Failed(format!("ls-tree: malformed record {s:?}")))?;
317        let mut parts = header.split_whitespace();
318        let mode = parts
319            .next()
320            .ok_or_else(|| Error::Failed(format!("ls-tree: missing mode in {s:?}")))?;
321        let kind = parts.next();
322        let oid = parts
323            .next()
324            .ok_or_else(|| Error::Failed(format!("ls-tree: missing oid in {s:?}")))?;
325        if kind != Some("blob") {
326            continue;
327        }
328        if let CatFileHeader::Found { kind, size, .. } = bcheck.check(oid)?
329            && kind == "blob"
330        {
331            blobs.push(TreeBlob {
332                path: PathBuf::from(path),
333                blob_oid: oid.to_owned(),
334                size,
335                mode: mode.to_owned(),
336            });
337        }
338    }
339    Ok(blobs)
340}
341
342/// Expand a `<a>..<b>` rev-range into the concrete commits it names
343/// and union their tree blobs (deduped by path + blob OID). Mirrors
344/// upstream's behavior for `git lfs fsck HEAD^..HEAD`: every blob
345/// reachable from any commit in the range is checked once.
346fn scan_blobs_in_range(cwd: &Path, range: &str) -> Result<Vec<TreeBlob>, Error> {
347    let out = Command::new("git")
348        .arg("-C")
349        .arg(cwd)
350        .args(["rev-list", range])
351        .output()?;
352    if !out.status.success() {
353        return Err(Error::Failed(format!(
354            "git rev-list failed: {}",
355            String::from_utf8_lossy(&out.stderr).trim()
356        )));
357    }
358    let mut seen: std::collections::HashSet<(PathBuf, String)> = std::collections::HashSet::new();
359    let mut all = Vec::new();
360    for line in String::from_utf8_lossy(&out.stdout).lines() {
361        let commit = line.trim();
362        if commit.is_empty() {
363            continue;
364        }
365        for blob in scan_tree_blobs_for_ref(cwd, commit)? {
366            if seen.insert((blob.path.clone(), blob.blob_oid.clone())) {
367                all.push(blob);
368            }
369        }
370    }
371    Ok(all)
372}
373
374/// Walk the tree at `reference`, returning one entry per LFS pointer blob.
375///
376/// Unlike [`scan_pointers`], this does *not* walk history and does *not*
377/// dedupe by LFS OID — each path in the tree that points at an LFS
378/// pointer becomes its own entry. Multiple paths pointing at the same
379/// LFS object yield multiple entries, with their working-tree paths
380/// preserved. This matches upstream's `ScanTree` semantics, used by
381/// `ls-files` and `status`.
382///
383/// Paths are read from `git ls-tree -r -z` so embedded newlines or
384/// quoting metacharacters round-trip cleanly.
385pub fn scan_tree(cwd: &Path, reference: &str) -> Result<Vec<PointerEntry>, Error> {
386    let out = Command::new("git")
387        .arg("-C")
388        .arg(cwd)
389        .args(["ls-tree", "--full-tree", "-r", "-z", reference])
390        .output()?;
391    if !out.status.success() {
392        return Err(Error::Failed(format!(
393            "git ls-tree failed: {}",
394            String::from_utf8_lossy(&out.stderr).trim()
395        )));
396    }
397
398    // Phase 1: parse `<mode> <type> <oid>\t<path>` records, keep blobs
399    // small enough to be a pointer.
400    let mut bcheck = CatFileBatchCheck::spawn(cwd)?;
401    let mut candidates: Vec<(String, String)> = Vec::new();
402    for record in out.stdout.split(|&b| b == 0).filter(|s| !s.is_empty()) {
403        let s = std::str::from_utf8(record)
404            .map_err(|e| Error::Failed(format!("ls-tree: non-utf8 record: {e}")))?;
405        let (header, path) = s
406            .split_once('\t')
407            .ok_or_else(|| Error::Failed(format!("ls-tree: malformed record {s:?}")))?;
408        let mut parts = header.split_whitespace();
409        let _mode = parts.next();
410        let kind = parts.next();
411        let oid = parts
412            .next()
413            .ok_or_else(|| Error::Failed(format!("ls-tree: missing oid in {s:?}")))?;
414        if kind != Some("blob") {
415            continue;
416        }
417        if let CatFileHeader::Found { kind, size, .. } = bcheck.check(oid)?
418            && kind == "blob"
419            && (size as usize) < MAX_POINTER_SIZE
420        {
421            candidates.push((oid.to_owned(), path.to_owned()));
422        }
423    }
424    drop(bcheck);
425
426    // Phase 2: read each candidate blob, parse as pointer, emit one
427    // entry per path. No OID dedup — that's intentional, callers may
428    // want to know every path an object lives at in this tree.
429    let mut batch = CatFileBatch::spawn(cwd)?;
430    let mut entries = Vec::new();
431    for (oid, path) in candidates {
432        let Some(blob) = batch.read(&oid)? else {
433            continue;
434        };
435        let Ok(pointer) = Pointer::parse(&blob.content) else {
436            continue;
437        };
438        let path_buf = PathBuf::from(path);
439        entries.push(PointerEntry {
440            oid: pointer.oid,
441            size: pointer.size,
442            path: Some(path_buf.clone()),
443            paths: vec![path_buf],
444            canonical: pointer.canonical,
445            extensions: pointer.extensions.clone(),
446        });
447    }
448    Ok(entries)
449}
450
451#[cfg(test)]
452mod tests {
453    use super::*;
454    use crate::tests::commit_helper::*;
455
456    /// Build a canonical pointer text for a known content. Mirrors what
457    /// `git lfs clean` would emit, so we don't need to wire the filter
458    /// crate into git's tests.
459    fn pointer_text(content: &[u8]) -> Vec<u8> {
460        use sha2::{Digest, Sha256};
461        let oid_bytes: [u8; 32] = Sha256::digest(content).into();
462        let oid_hex = oid_bytes.iter().fold(String::new(), |mut s, b| {
463            use std::fmt::Write;
464            let _ = write!(s, "{b:02x}");
465            s
466        });
467        format!(
468            "version https://git-lfs.github.com/spec/v1\noid sha256:{oid_hex}\nsize {}\n",
469            content.len()
470        )
471        .into_bytes()
472    }
473
474    #[test]
475    fn empty_repo_returns_no_pointers() {
476        let repo = init_repo();
477        commit_file(&repo, "a.txt", b"plain content");
478        let result = scan_pointers(repo.path(), &["HEAD"], &[]).unwrap();
479        assert!(result.is_empty());
480    }
481
482    #[test]
483    fn finds_pointer_blobs_skips_plain_blobs() {
484        let repo = init_repo();
485        // Plain content + LFS pointer side-by-side.
486        commit_file(&repo, "plain.txt", b"just text");
487        let pointer = pointer_text(b"this would be the actual binary content");
488        commit_file(&repo, "big.bin", &pointer);
489
490        let result = scan_pointers(repo.path(), &["HEAD"], &[]).unwrap();
491        assert_eq!(result.len(), 1, "{result:?}");
492        assert_eq!(
493            result[0].size,
494            b"this would be the actual binary content".len() as u64,
495        );
496        assert_eq!(result[0].path.as_deref(), Some(Path::new("big.bin")));
497    }
498
499    #[test]
500    fn dedups_same_lfs_oid_in_multiple_paths() {
501        let repo = init_repo();
502        let pointer = pointer_text(b"shared payload");
503        commit_file(&repo, "first.bin", &pointer);
504        commit_file(&repo, "second.bin", &pointer);
505
506        let result = scan_pointers(repo.path(), &["HEAD"], &[]).unwrap();
507        // Same content → same pointer text → same git blob OID, but we
508        // also want to verify dedup at the LFS-OID layer.
509        assert_eq!(result.len(), 1, "{result:?}");
510    }
511
512    #[test]
513    fn finds_pointers_in_history_not_just_tip() {
514        let repo = init_repo();
515        // A pointer that is later overwritten by plain content. ScanRefs
516        // semantics require we still find it — older commits are part of
517        // history reachable from HEAD.
518        let pointer = pointer_text(b"deleted later");
519        commit_file(&repo, "x.bin", &pointer);
520        commit_file(&repo, "x.bin", b"plain text now");
521
522        let result = scan_pointers(repo.path(), &["HEAD"], &[]).unwrap();
523        assert_eq!(result.len(), 1);
524        assert_eq!(result[0].size, b"deleted later".len() as u64);
525    }
526
527    #[test]
528    fn excludes_filter_history_walk() {
529        let repo = init_repo();
530        commit_file(&repo, "old.bin", &pointer_text(b"old payload"));
531        let first = head_oid(&repo);
532        commit_file(&repo, "new.bin", &pointer_text(b"new payload"));
533
534        // Include HEAD, exclude the first commit → only new.bin's pointer.
535        let result = scan_pointers(repo.path(), &["HEAD"], &[&first]).unwrap();
536        assert_eq!(result.len(), 1, "{result:?}");
537        assert_eq!(result[0].size, b"new payload".len() as u64);
538    }
539
540    #[test]
541    fn skips_blobs_that_look_like_pointers_but_dont_parse() {
542        let repo = init_repo();
543        // Small, but malformed pointer-shaped content.
544        commit_file(&repo, "fake.bin", b"version foo\nbut not really a pointer");
545
546        let result = scan_pointers(repo.path(), &["HEAD"], &[]).unwrap();
547        assert!(result.is_empty(), "{result:?}");
548    }
549
550    #[test]
551    fn scan_tree_returns_only_tree_entries_not_history() {
552        let repo = init_repo();
553        // A pointer that exists historically but is gone at HEAD must
554        // NOT show up in scan_tree (this is the point of the helper —
555        // ls-files should only see what's in the named tree).
556        let pointer = pointer_text(b"deleted later");
557        commit_file(&repo, "x.bin", &pointer);
558        commit_file(&repo, "x.bin", b"plain text now");
559
560        let result = scan_tree(repo.path(), "HEAD").unwrap();
561        assert!(result.is_empty(), "{result:?}");
562    }
563
564    #[test]
565    fn scan_tree_emits_one_entry_per_path_not_per_oid() {
566        let repo = init_repo();
567        // Same pointer at two paths in the current tree → two entries.
568        // (scan_pointers would dedupe to one; scan_tree must not.)
569        let pointer = pointer_text(b"shared payload");
570        commit_file(&repo, "first.bin", &pointer);
571        commit_file(&repo, "second.bin", &pointer);
572
573        let mut result = scan_tree(repo.path(), "HEAD").unwrap();
574        result.sort_by(|a, b| a.path.cmp(&b.path));
575        assert_eq!(result.len(), 2, "{result:?}");
576        assert_eq!(result[0].path.as_deref(), Some(Path::new("first.bin")));
577        assert_eq!(result[1].path.as_deref(), Some(Path::new("second.bin")));
578        // Same OID under both paths.
579        assert_eq!(result[0].oid, result[1].oid);
580    }
581
582    #[test]
583    fn scan_tree_skips_plain_blobs_and_keeps_pointers() {
584        let repo = init_repo();
585        commit_file(&repo, "plain.txt", b"just text");
586        let pointer = pointer_text(b"binary content");
587        commit_file(&repo, "big.bin", &pointer);
588
589        let result = scan_tree(repo.path(), "HEAD").unwrap();
590        assert_eq!(result.len(), 1, "{result:?}");
591        assert_eq!(result[0].path.as_deref(), Some(Path::new("big.bin")));
592    }
593
594    #[test]
595    fn scan_tree_unknown_ref_errors() {
596        let repo = init_repo();
597        commit_file(&repo, "a.txt", b"x");
598        let err = scan_tree(repo.path(), "does-not-exist").unwrap_err();
599        match err {
600            Error::Failed(msg) => assert!(
601                msg.contains("does-not-exist") || msg.contains("Not a valid"),
602                "unexpected message: {msg}"
603            ),
604            _ => panic!("expected Failed, got {err:?}"),
605        }
606    }
607}