Skip to main content

grit_lib/
path_walk.rs

1//! Path-batched object graph walk matching Git's `walk_objects_by_path` / `test-tool path-walk`.
2//!
3//! Objects are grouped by repository-relative path (trees end with `/`) and emitted in batches
4//! following Git's priority queue ordering (tags, then blobs, then trees; ties by path).
5
6use std::collections::{BTreeSet, BinaryHeap, HashMap, HashSet, VecDeque};
7use std::io::Read;
8use std::path::Path;
9
10use crate::error::{Error, Result};
11use crate::objects::{parse_commit, parse_tag, parse_tree, ObjectId, ObjectKind};
12use crate::refs;
13use crate::repo::Repository;
14use crate::rev_list::{
15    collect_revision_specs_with_stdin, date_order_walk, resolve_object_walk_roots,
16    resolve_revision_commits, walk_closure, walk_closure_ordered, CommitGraph, ObjectWalkRoot,
17};
18use crate::sparse_checkout::{path_in_sparse_checkout_patterns, ConePatterns};
19
20const ROOT_PATH: &str = "";
21const TAG_PATH: &str = "/tags";
22const TAGGED_BLOBS_PATH: &str = "/tagged-blobs";
23
24/// Options for [`walk_objects_by_path`], aligned with Git's `struct path_walk_info`.
25#[derive(Debug, Clone)]
26pub struct PathWalkOptions {
27    pub include_commits: bool,
28    pub include_trees: bool,
29    pub include_blobs: bool,
30    pub include_tags: bool,
31    pub prune_all_uninteresting: bool,
32    pub edge_aggressive: bool,
33    pub cone_patterns: Option<ConePatterns>,
34    /// Lines from `test-tool path-walk --stdin-pl` (trimmed, non-empty, non-comment).
35    pub sparse_pattern_lines: Option<Vec<String>>,
36}
37
38impl Default for PathWalkOptions {
39    fn default() -> Self {
40        Self {
41            include_commits: true,
42            include_trees: true,
43            include_blobs: true,
44            include_tags: true,
45            prune_all_uninteresting: false,
46            edge_aggressive: false,
47            cone_patterns: None,
48            sparse_pattern_lines: None,
49        }
50    }
51}
52
53/// One line of `test-tool path-walk` output (excluding trailing summary lines).
54#[derive(Debug, Clone)]
55pub struct PathWalkLine {
56    pub batch: u64,
57    pub object_kind: ObjectKind,
58    pub path: String,
59    pub oid: ObjectId,
60    pub uninteresting: bool,
61}
62
63#[derive(Debug, Clone, Default)]
64pub struct PathWalkCounts {
65    pub commits: u64,
66    pub trees: u64,
67    pub blobs: u64,
68    pub tags: u64,
69}
70
71/// Run a path walk for the given revision arguments and options.
72///
73/// `positive_specs` and `negative_specs` are raw revision strings (no interleaved `--not`;
74/// negatives are listed explicitly).
75pub fn walk_objects_by_path(
76    repo: &Repository,
77    positive_specs: &[String],
78    negative_specs: &[String],
79    stdin_all: bool,
80    boundary: bool,
81    opts: &PathWalkOptions,
82) -> Result<(Vec<PathWalkLine>, PathWalkCounts)> {
83    let mut graph = CommitGraph::new(repo, false);
84    let mut all_refs = stdin_all;
85    let mut filtered: Vec<String> = Vec::new();
86    let mut want_indexed_objects = false;
87    for p in positive_specs {
88        match p.as_str() {
89            "--all" => all_refs = true,
90            "--indexed-objects" => want_indexed_objects = true,
91            "--branches" => filtered.extend(expand_branches_refs(repo)?),
92            _ => filtered.push(p.clone()),
93        }
94    }
95    let mut neg_resolved: Vec<String> = Vec::new();
96    for n in negative_specs {
97        match n.as_str() {
98            "--all" => {
99                return Err(Error::InvalidRef(
100                    "--all is not valid in negative revision list".to_owned(),
101                ));
102            }
103            "--indexed-objects" => {
104                return Err(Error::InvalidRef(
105                    "--indexed-objects is not valid in negative revision list".to_owned(),
106                ));
107            }
108            "--branches" => neg_resolved.extend(expand_branches_refs(repo)?),
109            _ => neg_resolved.push(n.clone()),
110        }
111    }
112    if all_refs {
113        filtered.extend(all_ref_commits(repo)?);
114    }
115    if filtered.is_empty() && !want_indexed_objects && !all_refs {
116        return Err(Error::InvalidRef("no revisions specified".to_owned()));
117    }
118    let (commit_tips, mut object_roots) = if filtered.is_empty() {
119        (Vec::new(), Vec::new())
120    } else {
121        resolve_object_walk_roots(repo, &filtered)?
122    };
123    object_roots.extend(tag_object_roots_from_spec_names(repo, &filtered)?);
124    if want_indexed_objects {
125        object_roots.extend(indexed_blob_roots(repo)?);
126    }
127
128    let extra_tag_ref_targets: Vec<ObjectId> = if all_refs {
129        tag_ref_direct_targets(repo)?
130    } else {
131        Vec::new()
132    };
133    let exclude = resolve_revision_commits(repo, &neg_resolved)?;
134    let exclude_tips: HashSet<ObjectId> = exclude.iter().copied().collect();
135    let excluded: HashSet<ObjectId> = if exclude.is_empty() {
136        HashSet::new()
137    } else {
138        walk_closure(&mut graph, &exclude)?
139    };
140
141    let (included_commits, _) = if commit_tips.is_empty() {
142        (HashSet::new(), Vec::new())
143    } else {
144        walk_closure_ordered(&mut graph, &commit_tips)?
145    };
146    let mut interesting_commits: HashSet<ObjectId> = included_commits
147        .iter()
148        .copied()
149        .filter(|c| !excluded.contains(c))
150        .collect();
151
152    let boundary_commits: HashSet<ObjectId> = if boundary {
153        let inc: HashSet<ObjectId> = interesting_commits.iter().copied().collect();
154        let mut bset = HashSet::new();
155        for &oid in &interesting_commits {
156            for p in graph.parents_of(oid)? {
157                if !inc.contains(&p) && excluded.contains(&p) {
158                    bset.insert(p);
159                }
160            }
161        }
162        interesting_commits.extend(bset.iter().copied());
163        bset
164    } else {
165        HashSet::new()
166    };
167
168    let mut uninteresting_commits: HashSet<ObjectId> = excluded.iter().copied().collect();
169    uninteresting_commits.retain(|c| interesting_commits.contains(c));
170
171    let excluded_objects: HashSet<ObjectId> = tree_blob_closure_from_commits(repo, &excluded)?;
172    let excluded_blob_paths = excluded_blob_paths_from_commits(repo, &excluded)?;
173
174    let mut ctx = PathWalkContext {
175        repo,
176        opts,
177        paths: HashMap::new(),
178        heap: BinaryHeap::new(),
179        pushed: HashSet::new(),
180        seen_object: HashSet::new(),
181        uninteresting_object: HashSet::new(),
182        excluded_objects,
183        excluded_blob_paths,
184        heap_seq: 0,
185        batch: 0,
186        lines: Vec::new(),
187        counts: PathWalkCounts::default(),
188    };
189
190    mark_uninteresting_trees(
191        repo,
192        &mut graph,
193        &interesting_commits,
194        &excluded,
195        &exclude_tips,
196        opts,
197        &mut ctx,
198    )?;
199
200    if opts.include_trees {
201        ctx.ensure_root_list();
202        ctx.push_heap(ROOT_PATH);
203    }
204
205    setup_pending_objects(repo, &object_roots, &extra_tag_ref_targets, opts, &mut ctx)?;
206
207    // No user-supplied tip order here; equal-date ties fall through to the OID tiebreak.
208    let ordered_commits = date_order_walk(&mut graph, &interesting_commits, &[], false)?;
209
210    let mut commit_oids: Vec<ObjectId> = Vec::new();
211    for c in ordered_commits {
212        if !interesting_commits.contains(&c) {
213            continue;
214        }
215        if opts.include_commits {
216            commit_oids.push(c);
217        }
218        if !opts.include_trees && !opts.include_blobs {
219            continue;
220        }
221        let commit = load_commit_data(repo, c)?;
222        let tree_oid = commit.tree;
223        if ctx.seen_object.contains(&tree_oid) {
224            continue;
225        }
226        ctx.seen_object.insert(tree_oid);
227        if ctx.excluded_objects.contains(&tree_oid) {
228            ctx.uninteresting_object.insert(tree_oid);
229        }
230        ctx.append_root_tree(tree_oid)?;
231    }
232
233    if opts.edge_aggressive && opts.include_trees {
234        for &tip in &exclude_tips {
235            let commit = load_commit_data(repo, tip)?;
236            let tree_oid = commit.tree;
237            if ctx.seen_object.contains(&tree_oid) {
238                continue;
239            }
240            ctx.seen_object.insert(tree_oid);
241            ctx.uninteresting_object.insert(tree_oid);
242            ctx.append_root_tree(tree_oid)?;
243        }
244    }
245
246    if opts.include_commits && !commit_oids.is_empty() {
247        ctx.emit_commit_batch(&commit_oids, &uninteresting_commits, &boundary_commits)?;
248    }
249
250    ctx.drain_heap(&mut graph)?;
251
252    if !ctx.paths.is_empty() {
253        for key in ctx.paths.keys().cloned().collect::<Vec<_>>() {
254            ctx.push_heap(&key);
255        }
256        ctx.drain_heap(&mut graph)?;
257    }
258
259    Ok((ctx.lines, ctx.counts))
260}
261
262/// Direct `refs/tags/*` peel targets (tag object, commit, tree, or blob OID as stored in the ref).
263/// When a revision token names `refs/tags/<name>` and points at an annotated tag object, include
264/// that tag OID as a root (Git `setup_revisions` keeps the tag in pending).
265fn tag_object_roots_from_spec_names(
266    repo: &Repository,
267    specs: &[String],
268) -> Result<Vec<ObjectWalkRoot>> {
269    let mut out = Vec::new();
270    let mut seen = HashSet::new();
271    for spec in specs {
272        if spec.contains("..") || spec.starts_with('^') || spec == "HEAD" {
273            continue;
274        }
275        if spec.len() == 40 && spec.chars().all(|c| c.is_ascii_hexdigit()) {
276            continue;
277        }
278        let refname = if spec.starts_with("refs/") {
279            spec.clone()
280        } else {
281            format!("refs/tags/{spec}")
282        };
283        let Ok(oid) = refs::resolve_ref(&repo.git_dir, &refname) else {
284            continue;
285        };
286        let obj = repo.odb.read(&oid)?;
287        if obj.kind != ObjectKind::Tag {
288            continue;
289        }
290        if seen.insert(oid) {
291            out.push(ObjectWalkRoot {
292                oid,
293                input: spec.clone(),
294                root_path: None,
295            });
296        }
297    }
298    Ok(out)
299}
300
301fn tag_ref_direct_targets(repo: &Repository) -> Result<Vec<ObjectId>> {
302    let mut out = Vec::new();
303    for (name, _) in refs::list_refs(&repo.git_dir, "refs/tags/")? {
304        let oid = refs::resolve_ref(&repo.git_dir, &name)?;
305        out.push(oid);
306    }
307    Ok(out)
308}
309
310fn all_ref_commits(repo: &Repository) -> Result<Vec<String>> {
311    let mut specs = Vec::new();
312    specs.push("HEAD".to_owned());
313    for (name, _) in refs::list_refs(&repo.git_dir, "refs/")? {
314        specs.push(name);
315    }
316    Ok(specs)
317}
318
319pub fn expand_branches_refs(repo: &Repository) -> Result<Vec<String>> {
320    let mut out = Vec::new();
321    for (_, oid) in refs::list_refs(&repo.git_dir, "refs/heads/")? {
322        out.push(oid.to_hex());
323    }
324    Ok(out)
325}
326
327/// Index roots for `--indexed-objects`: cache-tree root (if present), recovered subtree trees when
328/// the index matches `HEAD` under a prefix, and stage-0 file blobs (each with `:path`).
329fn indexed_blob_roots(repo: &Repository) -> Result<Vec<ObjectWalkRoot>> {
330    let Some(_) = &repo.work_tree else {
331        return Ok(Vec::new());
332    };
333    let index_path = repo.git_dir.join("index");
334    if !index_path.is_file() {
335        return Ok(Vec::new());
336    }
337    let idx = crate::index::Index::load(&index_path)?;
338    let mut out = Vec::new();
339    if let Some(root) = idx.cache_tree_root {
340        out.push(ObjectWalkRoot {
341            oid: root,
342            input: String::new(),
343            root_path: None,
344        });
345    }
346
347    let head_tree = head_tree_oid(repo).ok();
348    let mut file_entries: Vec<(String, ObjectId)> = Vec::new();
349    for e in &idx.entries {
350        if e.stage() != 0 {
351            continue;
352        }
353        if e.mode == 0o160000 || e.mode == crate::index::MODE_TREE {
354            continue;
355        }
356        let path_str = String::from_utf8_lossy(&e.path).into_owned();
357        file_entries.push((path_str.clone(), e.oid));
358        out.push(ObjectWalkRoot {
359            oid: e.oid,
360            input: format!(":{path_str}"),
361            root_path: Some(path_str),
362        });
363    }
364
365    let mut prefixes: BTreeSet<String> = BTreeSet::new();
366    for (path, _) in &file_entries {
367        let mut end = path.len();
368        while end > 0 {
369            if let Some(pos) = path[..end].rfind('/') {
370                prefixes.insert(path[..pos].to_string());
371                end = pos;
372            } else {
373                break;
374            }
375        }
376    }
377
378    let mut recovery_added: HashSet<String> = HashSet::new();
379    let Some(ht) = head_tree else {
380        return Ok(out);
381    };
382    for pref in prefixes {
383        if pref.is_empty() {
384            continue;
385        }
386        let mut any_under = false;
387        let mut all_match = true;
388        for (path, oid) in &file_entries {
389            let under = if path == &pref {
390                true
391            } else if let Some(rest) = path.strip_prefix(&pref) {
392                rest.starts_with('/')
393            } else {
394                false
395            };
396            if !under {
397                continue;
398            }
399            any_under = true;
400            match crate::rev_parse::resolve_treeish_path(repo, ht, path.as_str()) {
401                Ok(h) if h == *oid => {}
402                _ => {
403                    all_match = false;
404                    break;
405                }
406            }
407        }
408        if !any_under || !all_match {
409            continue;
410        }
411        let tree_oid = crate::rev_parse::resolve_treeish_path(repo, ht, pref.as_str())?;
412        if !recovery_added.insert(pref.clone()) {
413            continue;
414        }
415        out.push(ObjectWalkRoot {
416            oid: tree_oid,
417            input: String::new(),
418            root_path: Some(pref),
419        });
420    }
421
422    Ok(out)
423}
424
425fn head_tree_oid(repo: &Repository) -> Result<ObjectId> {
426    let head = refs::resolve_ref(&repo.git_dir, "HEAD")?;
427    let obj = repo.odb.read(&head)?;
428    if obj.kind != ObjectKind::Commit {
429        return Err(Error::InvalidRef("HEAD is not a commit".to_owned()));
430    }
431    let c = parse_commit(&obj.data)?;
432    Ok(c.tree)
433}
434
435struct TypeOidList {
436    kind: ObjectKind,
437    oids: Vec<ObjectId>,
438    maybe_interesting: bool,
439}
440
441struct PathWalkContext<'a> {
442    repo: &'a Repository,
443    opts: &'a PathWalkOptions,
444    paths: HashMap<String, TypeOidList>,
445    heap: BinaryHeap<std::cmp::Reverse<PathHeapItem>>,
446    pushed: HashSet<String>,
447    seen_object: HashSet<ObjectId>,
448    uninteresting_object: HashSet<ObjectId>,
449    excluded_objects: HashSet<ObjectId>,
450    excluded_blob_paths: HashSet<(String, ObjectId)>,
451    heap_seq: u64,
452    batch: u64,
453    lines: Vec<PathWalkLine>,
454    counts: PathWalkCounts,
455}
456
457/// Matches Git `path-walk.c` `compare_by_type` + `prio_queue` insertion counter tie-break.
458#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Debug)]
459struct PathHeapItem {
460    type_rank: u8,
461    path: String,
462    seq: u64,
463}
464
465fn type_rank(kind: ObjectKind) -> u8 {
466    match kind {
467        ObjectKind::Tag => 0,
468        ObjectKind::Blob => 1,
469        ObjectKind::Tree => 2,
470        ObjectKind::Commit => 3,
471    }
472}
473
474impl<'a> PathWalkContext<'a> {
475    fn ensure_root_list(&mut self) {
476        self.paths
477            .entry(ROOT_PATH.to_string())
478            .or_insert_with(|| TypeOidList {
479                kind: ObjectKind::Tree,
480                oids: Vec::new(),
481                maybe_interesting: true,
482            });
483    }
484
485    fn push_heap(&mut self, path: &str) {
486        if !self.pushed.insert(path.to_string()) {
487            return;
488        }
489        let Some(list) = self.paths.get(path) else {
490            return;
491        };
492        let seq = self.heap_seq;
493        self.heap_seq = self.heap_seq.wrapping_add(1);
494        self.heap.push(std::cmp::Reverse(PathHeapItem {
495            type_rank: type_rank(list.kind),
496            path: path.to_string(),
497            seq,
498        }));
499    }
500
501    fn append_root_tree(&mut self, oid: ObjectId) -> Result<()> {
502        self.ensure_root_list();
503        let list = self
504            .paths
505            .get_mut(ROOT_PATH)
506            .ok_or_else(|| Error::CorruptObject("root path list missing".to_owned()))?;
507        list.oids.push(oid);
508        self.push_heap(ROOT_PATH);
509        Ok(())
510    }
511
512    fn add_path(
513        &mut self,
514        path: String,
515        kind: ObjectKind,
516        oid: ObjectId,
517        interesting: bool,
518    ) -> Result<()> {
519        self.add_path_inner(path, kind, oid, interesting, true)
520    }
521
522    /// Record an object at `path` without enqueueing it on the path stack.
523    ///
524    /// Git's `setup_pending_objects` only calls `add_path_to_list` for index objects; those paths
525    /// are pushed in the second `strmap_for_each_entry` phase after the main heap drain, so they
526    /// do not interleave before commit root trees (`t6601` branches + indexed-objects).
527    fn add_path_pending(
528        &mut self,
529        path: String,
530        kind: ObjectKind,
531        oid: ObjectId,
532        interesting: bool,
533    ) -> Result<()> {
534        self.add_path_inner(path, kind, oid, interesting, false)
535    }
536
537    fn add_path_inner(
538        &mut self,
539        path: String,
540        kind: ObjectKind,
541        oid: ObjectId,
542        interesting: bool,
543        enqueue: bool,
544    ) -> Result<()> {
545        let list = self
546            .paths
547            .entry(path.clone())
548            .or_insert_with(|| TypeOidList {
549                kind,
550                oids: Vec::new(),
551                maybe_interesting: false,
552            });
553        if list.kind != kind {
554            return Err(Error::CorruptObject(format!(
555                "path-walk: inconsistent types at path {path:?}"
556            )));
557        }
558        list.maybe_interesting |= interesting;
559        list.oids.push(oid);
560        if enqueue {
561            self.push_heap(&path);
562        }
563        Ok(())
564    }
565
566    fn cone_allows_tree_path(&self, path_with_slash: &str) -> bool {
567        if let Some(lines) = &self.opts.sparse_pattern_lines {
568            return path_in_sparse_checkout_patterns(path_with_slash, lines, true);
569        }
570        if let Some(cone) = &self.opts.cone_patterns {
571            let trimmed = path_with_slash.trim_end_matches('/');
572            if cone.path_included(trimmed) {
573                return true;
574            }
575            return cone.path_included(path_with_slash);
576        }
577        true
578    }
579
580    fn cone_allows_blob_path(&self, path: &str) -> bool {
581        if let Some(lines) = &self.opts.sparse_pattern_lines {
582            return path_in_sparse_checkout_patterns(path, lines, true);
583        }
584        if let Some(cone) = &self.opts.cone_patterns {
585            return cone.path_included(path);
586        }
587        true
588    }
589
590    fn emit_commit_batch(
591        &mut self,
592        oids: &[ObjectId],
593        uninteresting: &HashSet<ObjectId>,
594        boundary: &HashSet<ObjectId>,
595    ) -> Result<()> {
596        let batch = self.batch;
597        self.batch += 1;
598        for &oid in oids {
599            let u = uninteresting.contains(&oid) || boundary.contains(&oid);
600            self.lines.push(PathWalkLine {
601                batch,
602                object_kind: ObjectKind::Commit,
603                path: ROOT_PATH.to_string(),
604                oid,
605                uninteresting: u,
606            });
607            self.counts.commits += 1;
608        }
609        Ok(())
610    }
611
612    fn drain_heap(&mut self, graph: &mut CommitGraph<'_>) -> Result<()> {
613        while let Some(std::cmp::Reverse(item)) = self.heap.pop() {
614            self.walk_path(&item.path, graph)?;
615        }
616        Ok(())
617    }
618
619    fn walk_path(&mut self, path: &str, graph: &mut CommitGraph<'_>) -> Result<()> {
620        let Some(mut list) = self.paths.remove(path) else {
621            return Ok(());
622        };
623        if list.oids.is_empty() {
624            return Ok(());
625        }
626
627        if self.opts.prune_all_uninteresting {
628            if !list.maybe_interesting {
629                return Ok(());
630            }
631            list.maybe_interesting = false;
632            for oid in &list.oids {
633                if self.object_is_interesting_at_path(path, *oid, list.kind) {
634                    list.maybe_interesting = true;
635                    break;
636                }
637            }
638            if !list.maybe_interesting {
639                return Ok(());
640            }
641        }
642
643        let emit = match list.kind {
644            ObjectKind::Tree => self.opts.include_trees,
645            ObjectKind::Blob => self.opts.include_blobs,
646            ObjectKind::Tag => self.opts.include_tags,
647            _ => false,
648        };
649        let batch = if emit {
650            let b = self.batch;
651            self.batch += 1;
652            b
653        } else {
654            0
655        };
656
657        match list.kind {
658            ObjectKind::Tree if self.opts.include_trees => {
659                for &oid in &list.oids {
660                    let u = !self.tree_is_interesting(oid);
661                    self.lines.push(PathWalkLine {
662                        batch,
663                        object_kind: ObjectKind::Tree,
664                        path: path.to_string(),
665                        oid,
666                        uninteresting: u,
667                    });
668                    self.counts.trees += 1;
669                }
670            }
671            ObjectKind::Blob if self.opts.include_blobs => {
672                for &oid in &list.oids {
673                    let u = !self.blob_is_interesting_at_path(path, oid);
674                    self.lines.push(PathWalkLine {
675                        batch,
676                        object_kind: ObjectKind::Blob,
677                        path: path.to_string(),
678                        oid,
679                        uninteresting: u,
680                    });
681                    self.counts.blobs += 1;
682                }
683            }
684            ObjectKind::Tag if self.opts.include_tags => {
685                for &oid in &list.oids {
686                    self.lines.push(PathWalkLine {
687                        batch,
688                        object_kind: ObjectKind::Tag,
689                        path: path.to_string(),
690                        oid,
691                        uninteresting: false,
692                    });
693                    self.counts.tags += 1;
694                }
695            }
696            _ => {}
697        }
698
699        if list.kind == ObjectKind::Tree {
700            let tree_oids = list.oids.clone();
701            for tree_oid in tree_oids {
702                self.add_tree_entries(path, tree_oid, graph)?;
703            }
704        }
705
706        Ok(())
707    }
708
709    fn add_tree_entries(
710        &mut self,
711        base_path: &str,
712        tree_oid: ObjectId,
713        _graph: &mut CommitGraph<'_>,
714    ) -> Result<()> {
715        let obj = self.repo.odb.read(&tree_oid)?;
716        if obj.kind != ObjectKind::Tree {
717            return Err(Error::CorruptObject(format!("{tree_oid} is not a tree")));
718        }
719        let parent_uninteresting = !self.tree_is_interesting(tree_oid);
720        let entries = parse_tree(&obj.data)?;
721        for entry in entries {
722            if entry.mode == 0o160000 {
723                continue;
724            }
725            let is_tree = entry.mode == 0o040000;
726            if !self.opts.include_blobs && !is_tree {
727                continue;
728            }
729            let name = String::from_utf8_lossy(&entry.name);
730            let child_oid = entry.oid;
731            if self.seen_object.contains(&child_oid) {
732                continue;
733            }
734            self.seen_object.insert(child_oid);
735            if parent_uninteresting {
736                self.uninteresting_object.insert(child_oid);
737            }
738            if is_tree {
739                let rel = if base_path.is_empty() {
740                    format!("{name}/")
741                } else {
742                    format!("{base_path}{name}/")
743                };
744                if !self.cone_allows_tree_path(&rel) {
745                    continue;
746                }
747                self.add_path(
748                    rel,
749                    ObjectKind::Tree,
750                    child_oid,
751                    self.tree_is_interesting(child_oid) || !parent_uninteresting,
752                )?;
753            } else {
754                let rel = if base_path.is_empty() {
755                    name.into_owned()
756                } else {
757                    format!("{base_path}{}", name.as_ref())
758                };
759                if !self.cone_allows_blob_path(&rel) {
760                    continue;
761                }
762                let blob_interesting =
763                    self.blob_is_interesting_at_path(&rel, child_oid) || !parent_uninteresting;
764                self.add_path(rel, ObjectKind::Blob, child_oid, blob_interesting)?;
765            }
766        }
767        Ok(())
768    }
769
770    fn object_is_interesting_at_path(&self, path: &str, oid: ObjectId, kind: ObjectKind) -> bool {
771        match kind {
772            ObjectKind::Blob => self.blob_is_interesting_at_path(path, oid),
773            ObjectKind::Tree => self.tree_is_interesting(oid),
774            _ => true,
775        }
776    }
777
778    fn tree_is_interesting(&self, oid: ObjectId) -> bool {
779        !self.uninteresting_object.contains(&oid) && !self.excluded_objects.contains(&oid)
780    }
781
782    fn blob_is_interesting_at_path(&self, path: &str, oid: ObjectId) -> bool {
783        !self.uninteresting_object.contains(&oid)
784            && !self.excluded_blob_paths.contains(&(path.to_string(), oid))
785    }
786}
787
788/// Every `(blob_path, blob_oid)` reachable from excluded commits' trees.
789fn excluded_blob_paths_from_commits(
790    repo: &Repository,
791    commits: &HashSet<ObjectId>,
792) -> Result<HashSet<(String, ObjectId)>> {
793    let mut out = HashSet::new();
794    for &c in commits {
795        let commit = match load_commit_data(repo, c) {
796            Ok(co) => co,
797            Err(_) => continue,
798        };
799        collect_blob_paths(repo, commit.tree, "", &mut out)?;
800    }
801    Ok(out)
802}
803
804fn collect_blob_paths(
805    repo: &Repository,
806    tree_oid: ObjectId,
807    base: &str,
808    into: &mut HashSet<(String, ObjectId)>,
809) -> Result<()> {
810    let obj = repo.odb.read(&tree_oid)?;
811    if obj.kind != ObjectKind::Tree {
812        return Ok(());
813    }
814    let entries = parse_tree(&obj.data)?;
815    for e in entries {
816        if e.mode == 0o160000 {
817            continue;
818        }
819        let name = String::from_utf8_lossy(&e.name);
820        if e.mode == 0o040000 {
821            let next_base = if base.is_empty() {
822                format!("{name}/")
823            } else {
824                format!("{base}{name}/")
825            };
826            collect_blob_paths(repo, e.oid, &next_base, into)?;
827        } else {
828            let path = if base.is_empty() {
829                name.into_owned()
830            } else {
831                format!("{base}{}", name.as_ref())
832            };
833            into.insert((path, e.oid));
834        }
835    }
836    Ok(())
837}
838
839fn tree_blob_closure_from_commits(
840    repo: &Repository,
841    commits: &HashSet<ObjectId>,
842) -> Result<HashSet<ObjectId>> {
843    let mut out = HashSet::new();
844    for &c in commits {
845        let commit = match load_commit_data(repo, c) {
846            Ok(co) => co,
847            Err(_) => continue,
848        };
849        let mut stack = vec![commit.tree];
850        while let Some(t) = stack.pop() {
851            if !out.insert(t) {
852                continue;
853            }
854            let obj = repo.odb.read(&t)?;
855            if obj.kind != ObjectKind::Tree {
856                continue;
857            }
858            let entries = parse_tree(&obj.data)?;
859            for e in entries {
860                if e.mode == 0o160000 {
861                    continue;
862                }
863                out.insert(e.oid);
864                if e.mode == 0o040000 {
865                    stack.push(e.oid);
866                }
867            }
868        }
869    }
870    Ok(out)
871}
872
873fn load_commit_data(repo: &Repository, oid: ObjectId) -> Result<crate::objects::CommitData> {
874    let object = repo.odb.read(&oid)?;
875    if object.kind != ObjectKind::Commit {
876        return Err(Error::CorruptObject(format!("{oid} is not a commit")));
877    }
878    parse_commit(&object.data)
879}
880
881fn mark_uninteresting_trees(
882    repo: &Repository,
883    graph: &mut CommitGraph<'_>,
884    interesting: &HashSet<ObjectId>,
885    excluded: &HashSet<ObjectId>,
886    exclude_tips: &HashSet<ObjectId>,
887    opts: &PathWalkOptions,
888    ctx: &mut PathWalkContext<'_>,
889) -> Result<()> {
890    if !opts.prune_all_uninteresting && !opts.edge_aggressive {
891        return Ok(());
892    }
893    let mut queue: VecDeque<ObjectId> = interesting.iter().copied().collect();
894    let mut seen_edge_commit = HashSet::new();
895    while let Some(c) = queue.pop_front() {
896        let parents = graph.parents_of(c)?;
897        for p in parents {
898            if interesting.contains(&p) {
899                continue;
900            }
901            if !excluded.contains(&p) {
902                continue;
903            }
904            if !seen_edge_commit.insert(p) {
905                continue;
906            }
907            let commit = load_commit_data(repo, p)?;
908            ctx.uninteresting_object.insert(commit.tree);
909            queue.push_back(p);
910        }
911    }
912    // Git `mark_edges_uninteresting`: `edge_hint_aggressive` marks trees only for commits that
913    // appear on the revision cmdline as UNINTERESTING (negative tips), not the whole excluded
914    // ancestry closure.
915    if opts.edge_aggressive {
916        for &c in exclude_tips {
917            if seen_edge_commit.contains(&c) {
918                continue;
919            }
920            let Ok(commit) = load_commit_data(repo, c) else {
921                continue;
922            };
923            ctx.uninteresting_object.insert(commit.tree);
924        }
925    }
926    Ok(())
927}
928
929fn setup_pending_objects(
930    repo: &Repository,
931    roots: &[ObjectWalkRoot],
932    extra_tag_refs: &[ObjectId],
933    opts: &PathWalkOptions,
934    ctx: &mut PathWalkContext<'_>,
935) -> Result<()> {
936    let mut tag_oids: Vec<ObjectId> = Vec::new();
937    let mut tagged_blob_oids: Vec<ObjectId> = Vec::new();
938    let mut tag_seen: HashSet<ObjectId> = HashSet::new();
939
940    for &oid in extra_tag_refs {
941        if ctx.seen_object.contains(&oid) {
942            continue;
943        }
944        let obj = repo.odb.read(&oid)?;
945        match obj.kind {
946            ObjectKind::Tag | ObjectKind::Commit => {
947                if tag_seen.insert(oid) {
948                    tag_oids.push(oid);
949                }
950            }
951            ObjectKind::Tree if opts.include_trees => {
952                ctx.seen_object.insert(oid);
953                ctx.append_root_tree(oid)?;
954            }
955            ObjectKind::Blob if opts.include_blobs => {
956                ctx.seen_object.insert(oid);
957                tagged_blob_oids.push(oid);
958            }
959            _ => {}
960        }
961    }
962
963    for root in roots {
964        let mut oid = root.oid;
965        let mut obj = repo.odb.read(&oid)?;
966        while obj.kind == ObjectKind::Tag {
967            if ctx.seen_object.contains(&oid) {
968                break;
969            }
970            ctx.seen_object.insert(oid);
971            if opts.include_tags && tag_seen.insert(oid) {
972                tag_oids.push(oid);
973            }
974            let tag = parse_tag(&obj.data)?;
975            oid = tag.object;
976            obj = repo.odb.read(&oid)?;
977        }
978        if obj.kind == ObjectKind::Tag {
979            continue;
980        }
981        if !ctx.seen_object.insert(oid) {
982            continue;
983        }
984        match obj.kind {
985            ObjectKind::Tree if opts.include_trees => {
986                if let Some(p) = &root.root_path {
987                    let trimmed = p.trim_end_matches('/');
988                    let path = if trimmed.is_empty() {
989                        "/".to_string()
990                    } else {
991                        format!("{trimmed}/")
992                    };
993                    ctx.add_path_pending(path, ObjectKind::Tree, oid, true)?;
994                } else {
995                    ctx.ensure_root_list();
996                    ctx.paths
997                        .get_mut(ROOT_PATH)
998                        .ok_or_else(|| Error::CorruptObject("root path list missing".to_owned()))?
999                        .oids
1000                        .push(oid);
1001                    ctx.push_heap(ROOT_PATH);
1002                }
1003            }
1004            ObjectKind::Blob if opts.include_blobs => {
1005                if let Some(p) = &root.root_path {
1006                    ctx.add_path_pending(p.clone(), ObjectKind::Blob, oid, true)?;
1007                } else {
1008                    tagged_blob_oids.push(oid);
1009                }
1010            }
1011            ObjectKind::Commit => {}
1012            _ => {}
1013        }
1014    }
1015
1016    if opts.include_blobs && !tagged_blob_oids.is_empty() {
1017        let list = TypeOidList {
1018            kind: ObjectKind::Blob,
1019            oids: tagged_blob_oids,
1020            maybe_interesting: true,
1021        };
1022        ctx.paths.insert(TAGGED_BLOBS_PATH.to_string(), list);
1023        ctx.push_heap(TAGGED_BLOBS_PATH);
1024    }
1025    if opts.include_tags && !tag_oids.is_empty() {
1026        let list = TypeOidList {
1027            kind: ObjectKind::Tag,
1028            oids: tag_oids,
1029            maybe_interesting: true,
1030        };
1031        ctx.paths.insert(TAG_PATH.to_string(), list);
1032        ctx.push_heap(TAG_PATH);
1033    }
1034    Ok(())
1035}
1036
1037/// Parse `test-tool path-walk` argv after the subcommand name.
1038///
1039/// Returns options, positive revision specs, negative revision specs, stdin `--all`, and `--boundary`.
1040pub fn parse_path_walk_cli(
1041    git_dir: &Path,
1042    args: &[String],
1043) -> Result<(PathWalkOptions, Vec<String>, Vec<String>, bool, bool)> {
1044    let mut opts = PathWalkOptions::default();
1045    let mut positive = Vec::new();
1046    let mut negative = Vec::new();
1047    let mut stdin_all = false;
1048    let mut boundary_flag = false;
1049    let mut after_dd = false;
1050    let mut not_mode = false;
1051
1052    let mut i = 0usize;
1053    while i < args.len() {
1054        let a = &args[i];
1055        if !after_dd && a == "--" {
1056            after_dd = true;
1057            i += 1;
1058            continue;
1059        }
1060        if !after_dd {
1061            match a.as_str() {
1062                "--stdin-pl" => {
1063                    let mut buf = String::new();
1064                    std::io::stdin().read_to_string(&mut buf)?;
1065                    let lines: Vec<String> = buf
1066                        .lines()
1067                        .map(str::trim)
1068                        .filter(|l| !l.is_empty() && !l.starts_with('#'))
1069                        .map(String::from)
1070                        .collect();
1071                    if !lines.is_empty() {
1072                        opts.sparse_pattern_lines = Some(lines);
1073                    }
1074                }
1075                "--prune" => opts.prune_all_uninteresting = true,
1076                "--edge-aggressive" => opts.edge_aggressive = true,
1077                "--no-blobs" => opts.include_blobs = false,
1078                "--no-trees" => opts.include_trees = false,
1079                "--no-commits" => opts.include_commits = false,
1080                "--no-tags" => opts.include_tags = false,
1081                "--blobs" => opts.include_blobs = true,
1082                "--trees" => opts.include_trees = true,
1083                "--commits" => opts.include_commits = true,
1084                "--tags" => opts.include_tags = true,
1085                "--stdin" => {
1086                    let (pos, neg, all, _stdin_paths) =
1087                        collect_revision_specs_with_stdin(git_dir, &[], true)?;
1088                    stdin_all |= all;
1089                    positive.extend(pos);
1090                    negative.extend(neg);
1091                }
1092                _ => {
1093                    return Err(Error::InvalidRef(format!(
1094                        "path-walk: unknown option '{a}'"
1095                    )));
1096                }
1097            }
1098        } else if a == "--not" {
1099            not_mode = !not_mode;
1100        } else if a == "--boundary" {
1101            boundary_flag = true;
1102        } else if matches!(a.as_str(), "--all" | "--indexed-objects" | "--branches") {
1103            if not_mode {
1104                negative.push(a.clone());
1105            } else {
1106                positive.push(a.clone());
1107            }
1108        } else {
1109            let (p, n) = crate::rev_list::split_revision_token(a);
1110            if not_mode {
1111                negative.extend(p);
1112                positive.extend(n);
1113            } else {
1114                positive.extend(p);
1115                negative.extend(n);
1116            }
1117        }
1118        i += 1;
1119    }
1120
1121    Ok((opts, positive, negative, stdin_all, boundary_flag))
1122}