Skip to main content

grit_lib/
path_walk.rs

1//! Path-batched object graph walk matching Git's `walk_objects_by_path` / `test-tool path-walk`.
2//!
3//! Objects are grouped by repository-relative path (trees end with `/`) and emitted in batches
4//! following Git's priority queue ordering (tags, then blobs, then trees; ties by path).
5
6use std::collections::{BTreeSet, BinaryHeap, HashMap, HashSet, VecDeque};
7use std::io::Read;
8use std::path::Path;
9
10use crate::error::{Error, Result};
11use crate::objects::{parse_commit, parse_tag, parse_tree, ObjectId, ObjectKind};
12use crate::refs;
13use crate::repo::Repository;
14use crate::rev_list::{
15    collect_revision_specs_with_stdin, date_order_walk, resolve_object_walk_roots,
16    resolve_revision_commits, walk_closure, walk_closure_ordered, CommitGraph, ObjectWalkRoot,
17};
18use crate::sparse_checkout::{path_in_sparse_checkout_patterns, ConePatterns};
19
20const ROOT_PATH: &str = "";
21const TAG_PATH: &str = "/tags";
22const TAGGED_BLOBS_PATH: &str = "/tagged-blobs";
23
24/// Options for [`walk_objects_by_path`], aligned with Git's `struct path_walk_info`.
25#[derive(Debug, Clone)]
26pub struct PathWalkOptions {
27    pub include_commits: bool,
28    pub include_trees: bool,
29    pub include_blobs: bool,
30    pub include_tags: bool,
31    pub prune_all_uninteresting: bool,
32    pub edge_aggressive: bool,
33    pub cone_patterns: Option<ConePatterns>,
34    /// Lines from `test-tool path-walk --stdin-pl` (trimmed, non-empty, non-comment).
35    pub sparse_pattern_lines: Option<Vec<String>>,
36}
37
38impl Default for PathWalkOptions {
39    fn default() -> Self {
40        Self {
41            include_commits: true,
42            include_trees: true,
43            include_blobs: true,
44            include_tags: true,
45            prune_all_uninteresting: false,
46            edge_aggressive: false,
47            cone_patterns: None,
48            sparse_pattern_lines: None,
49        }
50    }
51}
52
53/// One line of `test-tool path-walk` output (excluding trailing summary lines).
54#[derive(Debug, Clone)]
55pub struct PathWalkLine {
56    pub batch: u64,
57    pub object_kind: ObjectKind,
58    pub path: String,
59    pub oid: ObjectId,
60    pub uninteresting: bool,
61}
62
63#[derive(Debug, Clone, Default)]
64pub struct PathWalkCounts {
65    pub commits: u64,
66    pub trees: u64,
67    pub blobs: u64,
68    pub tags: u64,
69}
70
71/// Run a path walk for the given revision arguments and options.
72///
73/// `positive_specs` and `negative_specs` are raw revision strings (no interleaved `--not`;
74/// negatives are listed explicitly).
75pub fn walk_objects_by_path(
76    repo: &Repository,
77    positive_specs: &[String],
78    negative_specs: &[String],
79    stdin_all: bool,
80    boundary: bool,
81    opts: &PathWalkOptions,
82) -> Result<(Vec<PathWalkLine>, PathWalkCounts)> {
83    let mut graph = CommitGraph::new(repo, false);
84    let mut all_refs = stdin_all;
85    let mut filtered: Vec<String> = Vec::new();
86    let mut want_indexed_objects = false;
87    for p in positive_specs {
88        match p.as_str() {
89            "--all" => all_refs = true,
90            "--indexed-objects" => want_indexed_objects = true,
91            "--branches" => filtered.extend(expand_branches_refs(repo)?),
92            _ => filtered.push(p.clone()),
93        }
94    }
95    let mut neg_resolved: Vec<String> = Vec::new();
96    for n in negative_specs {
97        match n.as_str() {
98            "--all" => {
99                return Err(Error::InvalidRef(
100                    "--all is not valid in negative revision list".to_owned(),
101                ));
102            }
103            "--indexed-objects" => {
104                return Err(Error::InvalidRef(
105                    "--indexed-objects is not valid in negative revision list".to_owned(),
106                ));
107            }
108            "--branches" => neg_resolved.extend(expand_branches_refs(repo)?),
109            _ => neg_resolved.push(n.clone()),
110        }
111    }
112    if all_refs {
113        filtered.extend(all_ref_commits(repo)?);
114    }
115    if filtered.is_empty() && !want_indexed_objects && !all_refs {
116        return Err(Error::InvalidRef("no revisions specified".to_owned()));
117    }
118    let (commit_tips, mut object_roots) = if filtered.is_empty() {
119        (Vec::new(), Vec::new())
120    } else {
121        resolve_object_walk_roots(repo, &filtered)?
122    };
123    object_roots.extend(tag_object_roots_from_spec_names(repo, &filtered)?);
124    if want_indexed_objects {
125        object_roots.extend(indexed_blob_roots(repo)?);
126    }
127
128    let extra_tag_ref_targets: Vec<ObjectId> = if all_refs {
129        tag_ref_direct_targets(repo)?
130    } else {
131        Vec::new()
132    };
133    let exclude = resolve_revision_commits(repo, &neg_resolved)?;
134    let exclude_tips: HashSet<ObjectId> = exclude.iter().copied().collect();
135    let excluded: HashSet<ObjectId> = if exclude.is_empty() {
136        HashSet::new()
137    } else {
138        walk_closure(&mut graph, &exclude)?
139    };
140
141    let (included_commits, _) = if commit_tips.is_empty() {
142        (HashSet::new(), Vec::new())
143    } else {
144        walk_closure_ordered(&mut graph, &commit_tips)?
145    };
146    let mut interesting_commits: HashSet<ObjectId> = included_commits
147        .iter()
148        .copied()
149        .filter(|c| !excluded.contains(c))
150        .collect();
151
152    let boundary_commits: HashSet<ObjectId> = if boundary {
153        let inc: HashSet<ObjectId> = interesting_commits.iter().copied().collect();
154        let mut bset = HashSet::new();
155        for &oid in &interesting_commits {
156            for p in graph.parents_of(oid)? {
157                if !inc.contains(&p) && excluded.contains(&p) {
158                    bset.insert(p);
159                }
160            }
161        }
162        interesting_commits.extend(bset.iter().copied());
163        bset
164    } else {
165        HashSet::new()
166    };
167
168    let mut uninteresting_commits: HashSet<ObjectId> = excluded.iter().copied().collect();
169    uninteresting_commits.retain(|c| interesting_commits.contains(c));
170
171    let excluded_objects: HashSet<ObjectId> = tree_blob_closure_from_commits(repo, &excluded)?;
172    let excluded_blob_paths = excluded_blob_paths_from_commits(repo, &excluded)?;
173
174    let mut ctx = PathWalkContext {
175        repo,
176        opts,
177        paths: HashMap::new(),
178        heap: BinaryHeap::new(),
179        pushed: HashSet::new(),
180        seen_object: HashSet::new(),
181        uninteresting_object: HashSet::new(),
182        excluded_objects,
183        excluded_blob_paths,
184        heap_seq: 0,
185        batch: 0,
186        lines: Vec::new(),
187        counts: PathWalkCounts::default(),
188    };
189
190    mark_uninteresting_trees(
191        repo,
192        &mut graph,
193        &interesting_commits,
194        &excluded,
195        &exclude_tips,
196        opts,
197        &mut ctx,
198    )?;
199
200    if opts.include_trees {
201        ctx.ensure_root_list();
202        ctx.push_heap(ROOT_PATH);
203    }
204
205    setup_pending_objects(repo, &object_roots, &extra_tag_ref_targets, opts, &mut ctx)?;
206
207    let ordered_commits = date_order_walk(&mut graph, &interesting_commits, false)?;
208
209    let mut commit_oids: Vec<ObjectId> = Vec::new();
210    for c in ordered_commits {
211        if !interesting_commits.contains(&c) {
212            continue;
213        }
214        if opts.include_commits {
215            commit_oids.push(c);
216        }
217        if !opts.include_trees && !opts.include_blobs {
218            continue;
219        }
220        let commit = load_commit_data(repo, c)?;
221        let tree_oid = commit.tree;
222        if ctx.seen_object.contains(&tree_oid) {
223            continue;
224        }
225        ctx.seen_object.insert(tree_oid);
226        if ctx.excluded_objects.contains(&tree_oid) {
227            ctx.uninteresting_object.insert(tree_oid);
228        }
229        ctx.append_root_tree(tree_oid)?;
230    }
231
232    if opts.edge_aggressive && opts.include_trees {
233        for &tip in &exclude_tips {
234            let commit = load_commit_data(repo, tip)?;
235            let tree_oid = commit.tree;
236            if ctx.seen_object.contains(&tree_oid) {
237                continue;
238            }
239            ctx.seen_object.insert(tree_oid);
240            ctx.uninteresting_object.insert(tree_oid);
241            ctx.append_root_tree(tree_oid)?;
242        }
243    }
244
245    if opts.include_commits && !commit_oids.is_empty() {
246        ctx.emit_commit_batch(&commit_oids, &uninteresting_commits, &boundary_commits)?;
247    }
248
249    ctx.drain_heap(&mut graph)?;
250
251    if !ctx.paths.is_empty() {
252        for key in ctx.paths.keys().cloned().collect::<Vec<_>>() {
253            ctx.push_heap(&key);
254        }
255        ctx.drain_heap(&mut graph)?;
256    }
257
258    Ok((ctx.lines, ctx.counts))
259}
260
261/// Direct `refs/tags/*` peel targets (tag object, commit, tree, or blob OID as stored in the ref).
262/// When a revision token names `refs/tags/<name>` and points at an annotated tag object, include
263/// that tag OID as a root (Git `setup_revisions` keeps the tag in pending).
264fn tag_object_roots_from_spec_names(
265    repo: &Repository,
266    specs: &[String],
267) -> Result<Vec<ObjectWalkRoot>> {
268    let mut out = Vec::new();
269    let mut seen = HashSet::new();
270    for spec in specs {
271        if spec.contains("..") || spec.starts_with('^') || spec == "HEAD" {
272            continue;
273        }
274        if spec.len() == 40 && spec.chars().all(|c| c.is_ascii_hexdigit()) {
275            continue;
276        }
277        let refname = if spec.starts_with("refs/") {
278            spec.clone()
279        } else {
280            format!("refs/tags/{spec}")
281        };
282        let Ok(oid) = refs::resolve_ref(&repo.git_dir, &refname) else {
283            continue;
284        };
285        let obj = repo.odb.read(&oid)?;
286        if obj.kind != ObjectKind::Tag {
287            continue;
288        }
289        if seen.insert(oid) {
290            out.push(ObjectWalkRoot {
291                oid,
292                input: spec.clone(),
293                root_path: None,
294            });
295        }
296    }
297    Ok(out)
298}
299
300fn tag_ref_direct_targets(repo: &Repository) -> Result<Vec<ObjectId>> {
301    let mut out = Vec::new();
302    for (name, _) in refs::list_refs(&repo.git_dir, "refs/tags/")? {
303        let oid = refs::resolve_ref(&repo.git_dir, &name)?;
304        out.push(oid);
305    }
306    Ok(out)
307}
308
309fn all_ref_commits(repo: &Repository) -> Result<Vec<String>> {
310    let mut specs = Vec::new();
311    specs.push("HEAD".to_owned());
312    for (name, _) in refs::list_refs(&repo.git_dir, "refs/")? {
313        specs.push(name);
314    }
315    Ok(specs)
316}
317
318pub fn expand_branches_refs(repo: &Repository) -> Result<Vec<String>> {
319    let mut out = Vec::new();
320    for (_, oid) in refs::list_refs(&repo.git_dir, "refs/heads/")? {
321        out.push(oid.to_hex());
322    }
323    Ok(out)
324}
325
326/// Index roots for `--indexed-objects`: cache-tree root (if present), recovered subtree trees when
327/// the index matches `HEAD` under a prefix, and stage-0 file blobs (each with `:path`).
328fn indexed_blob_roots(repo: &Repository) -> Result<Vec<ObjectWalkRoot>> {
329    let Some(_) = &repo.work_tree else {
330        return Ok(Vec::new());
331    };
332    let index_path = repo.git_dir.join("index");
333    if !index_path.is_file() {
334        return Ok(Vec::new());
335    }
336    let idx = crate::index::Index::load(&index_path)?;
337    let mut out = Vec::new();
338    if let Some(root) = idx.cache_tree_root {
339        out.push(ObjectWalkRoot {
340            oid: root,
341            input: String::new(),
342            root_path: None,
343        });
344    }
345
346    let head_tree = head_tree_oid(repo).ok();
347    let mut file_entries: Vec<(String, ObjectId)> = Vec::new();
348    for e in &idx.entries {
349        if e.stage() != 0 {
350            continue;
351        }
352        if e.mode == 0o160000 || e.mode == crate::index::MODE_TREE {
353            continue;
354        }
355        let path_str = String::from_utf8_lossy(&e.path).into_owned();
356        file_entries.push((path_str.clone(), e.oid));
357        out.push(ObjectWalkRoot {
358            oid: e.oid,
359            input: format!(":{path_str}"),
360            root_path: Some(path_str),
361        });
362    }
363
364    let mut prefixes: BTreeSet<String> = BTreeSet::new();
365    for (path, _) in &file_entries {
366        let mut end = path.len();
367        while end > 0 {
368            if let Some(pos) = path[..end].rfind('/') {
369                prefixes.insert(path[..pos].to_string());
370                end = pos;
371            } else {
372                break;
373            }
374        }
375    }
376
377    let mut recovery_added: HashSet<String> = HashSet::new();
378    let Some(ht) = head_tree else {
379        return Ok(out);
380    };
381    for pref in prefixes {
382        if pref.is_empty() {
383            continue;
384        }
385        let mut any_under = false;
386        let mut all_match = true;
387        for (path, oid) in &file_entries {
388            let under = if path == &pref {
389                true
390            } else if let Some(rest) = path.strip_prefix(&pref) {
391                rest.starts_with('/')
392            } else {
393                false
394            };
395            if !under {
396                continue;
397            }
398            any_under = true;
399            match crate::rev_parse::resolve_treeish_path(repo, ht, path.as_str()) {
400                Ok(h) if h == *oid => {}
401                _ => {
402                    all_match = false;
403                    break;
404                }
405            }
406        }
407        if !any_under || !all_match {
408            continue;
409        }
410        let tree_oid = crate::rev_parse::resolve_treeish_path(repo, ht, pref.as_str())?;
411        if !recovery_added.insert(pref.clone()) {
412            continue;
413        }
414        out.push(ObjectWalkRoot {
415            oid: tree_oid,
416            input: String::new(),
417            root_path: Some(pref),
418        });
419    }
420
421    Ok(out)
422}
423
424fn head_tree_oid(repo: &Repository) -> Result<ObjectId> {
425    let head = refs::resolve_ref(&repo.git_dir, "HEAD")?;
426    let obj = repo.odb.read(&head)?;
427    if obj.kind != ObjectKind::Commit {
428        return Err(Error::InvalidRef("HEAD is not a commit".to_owned()));
429    }
430    let c = parse_commit(&obj.data)?;
431    Ok(c.tree)
432}
433
434struct TypeOidList {
435    kind: ObjectKind,
436    oids: Vec<ObjectId>,
437    maybe_interesting: bool,
438}
439
440struct PathWalkContext<'a> {
441    repo: &'a Repository,
442    opts: &'a PathWalkOptions,
443    paths: HashMap<String, TypeOidList>,
444    heap: BinaryHeap<std::cmp::Reverse<PathHeapItem>>,
445    pushed: HashSet<String>,
446    seen_object: HashSet<ObjectId>,
447    uninteresting_object: HashSet<ObjectId>,
448    excluded_objects: HashSet<ObjectId>,
449    excluded_blob_paths: HashSet<(String, ObjectId)>,
450    heap_seq: u64,
451    batch: u64,
452    lines: Vec<PathWalkLine>,
453    counts: PathWalkCounts,
454}
455
456/// Matches Git `path-walk.c` `compare_by_type` + `prio_queue` insertion counter tie-break.
457#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Debug)]
458struct PathHeapItem {
459    type_rank: u8,
460    path: String,
461    seq: u64,
462}
463
464fn type_rank(kind: ObjectKind) -> u8 {
465    match kind {
466        ObjectKind::Tag => 0,
467        ObjectKind::Blob => 1,
468        ObjectKind::Tree => 2,
469        ObjectKind::Commit => 3,
470    }
471}
472
473impl<'a> PathWalkContext<'a> {
474    fn ensure_root_list(&mut self) {
475        self.paths
476            .entry(ROOT_PATH.to_string())
477            .or_insert_with(|| TypeOidList {
478                kind: ObjectKind::Tree,
479                oids: Vec::new(),
480                maybe_interesting: true,
481            });
482    }
483
484    fn push_heap(&mut self, path: &str) {
485        if !self.pushed.insert(path.to_string()) {
486            return;
487        }
488        let Some(list) = self.paths.get(path) else {
489            return;
490        };
491        let seq = self.heap_seq;
492        self.heap_seq = self.heap_seq.wrapping_add(1);
493        self.heap.push(std::cmp::Reverse(PathHeapItem {
494            type_rank: type_rank(list.kind),
495            path: path.to_string(),
496            seq,
497        }));
498    }
499
500    fn append_root_tree(&mut self, oid: ObjectId) -> Result<()> {
501        self.ensure_root_list();
502        let list = self.paths.get_mut(ROOT_PATH).expect("root list");
503        list.oids.push(oid);
504        self.push_heap(ROOT_PATH);
505        Ok(())
506    }
507
508    fn add_path(
509        &mut self,
510        path: String,
511        kind: ObjectKind,
512        oid: ObjectId,
513        interesting: bool,
514    ) -> Result<()> {
515        self.add_path_inner(path, kind, oid, interesting, true)
516    }
517
518    /// Record an object at `path` without enqueueing it on the path stack.
519    ///
520    /// Git's `setup_pending_objects` only calls `add_path_to_list` for index objects; those paths
521    /// are pushed in the second `strmap_for_each_entry` phase after the main heap drain, so they
522    /// do not interleave before commit root trees (`t6601` branches + indexed-objects).
523    fn add_path_pending(
524        &mut self,
525        path: String,
526        kind: ObjectKind,
527        oid: ObjectId,
528        interesting: bool,
529    ) -> Result<()> {
530        self.add_path_inner(path, kind, oid, interesting, false)
531    }
532
533    fn add_path_inner(
534        &mut self,
535        path: String,
536        kind: ObjectKind,
537        oid: ObjectId,
538        interesting: bool,
539        enqueue: bool,
540    ) -> Result<()> {
541        let list = self
542            .paths
543            .entry(path.clone())
544            .or_insert_with(|| TypeOidList {
545                kind,
546                oids: Vec::new(),
547                maybe_interesting: false,
548            });
549        if list.kind != kind {
550            return Err(Error::CorruptObject(format!(
551                "path-walk: inconsistent types at path {path:?}"
552            )));
553        }
554        list.maybe_interesting |= interesting;
555        list.oids.push(oid);
556        if enqueue {
557            self.push_heap(&path);
558        }
559        Ok(())
560    }
561
562    fn cone_allows_tree_path(&self, path_with_slash: &str) -> bool {
563        if let Some(lines) = &self.opts.sparse_pattern_lines {
564            return path_in_sparse_checkout_patterns(path_with_slash, lines, true);
565        }
566        if let Some(cone) = &self.opts.cone_patterns {
567            let trimmed = path_with_slash.trim_end_matches('/');
568            if cone.path_included(trimmed) {
569                return true;
570            }
571            return cone.path_included(path_with_slash);
572        }
573        true
574    }
575
576    fn cone_allows_blob_path(&self, path: &str) -> bool {
577        if let Some(lines) = &self.opts.sparse_pattern_lines {
578            return path_in_sparse_checkout_patterns(path, lines, true);
579        }
580        if let Some(cone) = &self.opts.cone_patterns {
581            return cone.path_included(path);
582        }
583        true
584    }
585
586    fn emit_commit_batch(
587        &mut self,
588        oids: &[ObjectId],
589        uninteresting: &HashSet<ObjectId>,
590        boundary: &HashSet<ObjectId>,
591    ) -> Result<()> {
592        let batch = self.batch;
593        self.batch += 1;
594        for &oid in oids {
595            let u = uninteresting.contains(&oid) || boundary.contains(&oid);
596            self.lines.push(PathWalkLine {
597                batch,
598                object_kind: ObjectKind::Commit,
599                path: ROOT_PATH.to_string(),
600                oid,
601                uninteresting: u,
602            });
603            self.counts.commits += 1;
604        }
605        Ok(())
606    }
607
608    fn drain_heap(&mut self, graph: &mut CommitGraph<'_>) -> Result<()> {
609        while let Some(std::cmp::Reverse(item)) = self.heap.pop() {
610            self.walk_path(&item.path, graph)?;
611        }
612        Ok(())
613    }
614
615    fn walk_path(&mut self, path: &str, graph: &mut CommitGraph<'_>) -> Result<()> {
616        let Some(mut list) = self.paths.remove(path) else {
617            return Ok(());
618        };
619        if list.oids.is_empty() {
620            return Ok(());
621        }
622
623        if self.opts.prune_all_uninteresting {
624            if !list.maybe_interesting {
625                return Ok(());
626            }
627            list.maybe_interesting = false;
628            for oid in &list.oids {
629                if self.object_is_interesting_at_path(path, *oid, list.kind) {
630                    list.maybe_interesting = true;
631                    break;
632                }
633            }
634            if !list.maybe_interesting {
635                return Ok(());
636            }
637        }
638
639        let emit = match list.kind {
640            ObjectKind::Tree => self.opts.include_trees,
641            ObjectKind::Blob => self.opts.include_blobs,
642            ObjectKind::Tag => self.opts.include_tags,
643            _ => false,
644        };
645        let batch = if emit {
646            let b = self.batch;
647            self.batch += 1;
648            b
649        } else {
650            0
651        };
652
653        match list.kind {
654            ObjectKind::Tree if self.opts.include_trees => {
655                for &oid in &list.oids {
656                    let u = !self.tree_is_interesting(oid);
657                    self.lines.push(PathWalkLine {
658                        batch,
659                        object_kind: ObjectKind::Tree,
660                        path: path.to_string(),
661                        oid,
662                        uninteresting: u,
663                    });
664                    self.counts.trees += 1;
665                }
666            }
667            ObjectKind::Blob if self.opts.include_blobs => {
668                for &oid in &list.oids {
669                    let u = !self.blob_is_interesting_at_path(path, oid);
670                    self.lines.push(PathWalkLine {
671                        batch,
672                        object_kind: ObjectKind::Blob,
673                        path: path.to_string(),
674                        oid,
675                        uninteresting: u,
676                    });
677                    self.counts.blobs += 1;
678                }
679            }
680            ObjectKind::Tag if self.opts.include_tags => {
681                for &oid in &list.oids {
682                    self.lines.push(PathWalkLine {
683                        batch,
684                        object_kind: ObjectKind::Tag,
685                        path: path.to_string(),
686                        oid,
687                        uninteresting: false,
688                    });
689                    self.counts.tags += 1;
690                }
691            }
692            _ => {}
693        }
694
695        if list.kind == ObjectKind::Tree {
696            let tree_oids = list.oids.clone();
697            for tree_oid in tree_oids {
698                self.add_tree_entries(path, tree_oid, graph)?;
699            }
700        }
701
702        Ok(())
703    }
704
705    fn add_tree_entries(
706        &mut self,
707        base_path: &str,
708        tree_oid: ObjectId,
709        _graph: &mut CommitGraph<'_>,
710    ) -> Result<()> {
711        let obj = self.repo.odb.read(&tree_oid)?;
712        if obj.kind != ObjectKind::Tree {
713            return Err(Error::CorruptObject(format!("{tree_oid} is not a tree")));
714        }
715        let parent_uninteresting = !self.tree_is_interesting(tree_oid);
716        let entries = parse_tree(&obj.data)?;
717        for entry in entries {
718            if entry.mode == 0o160000 {
719                continue;
720            }
721            let is_tree = entry.mode == 0o040000;
722            if !self.opts.include_blobs && !is_tree {
723                continue;
724            }
725            let name = String::from_utf8_lossy(&entry.name);
726            let child_oid = entry.oid;
727            if self.seen_object.contains(&child_oid) {
728                continue;
729            }
730            self.seen_object.insert(child_oid);
731            if parent_uninteresting {
732                self.uninteresting_object.insert(child_oid);
733            }
734            if is_tree {
735                let rel = if base_path.is_empty() {
736                    format!("{name}/")
737                } else {
738                    format!("{base_path}{name}/")
739                };
740                if !self.cone_allows_tree_path(&rel) {
741                    continue;
742                }
743                self.add_path(
744                    rel,
745                    ObjectKind::Tree,
746                    child_oid,
747                    self.tree_is_interesting(child_oid) || !parent_uninteresting,
748                )?;
749            } else {
750                let rel = if base_path.is_empty() {
751                    name.into_owned()
752                } else {
753                    format!("{base_path}{}", name.as_ref())
754                };
755                if !self.cone_allows_blob_path(&rel) {
756                    continue;
757                }
758                let blob_interesting =
759                    self.blob_is_interesting_at_path(&rel, child_oid) || !parent_uninteresting;
760                self.add_path(rel, ObjectKind::Blob, child_oid, blob_interesting)?;
761            }
762        }
763        Ok(())
764    }
765
766    fn object_is_interesting_at_path(&self, path: &str, oid: ObjectId, kind: ObjectKind) -> bool {
767        match kind {
768            ObjectKind::Blob => self.blob_is_interesting_at_path(path, oid),
769            ObjectKind::Tree => self.tree_is_interesting(oid),
770            _ => true,
771        }
772    }
773
774    fn tree_is_interesting(&self, oid: ObjectId) -> bool {
775        !self.uninteresting_object.contains(&oid) && !self.excluded_objects.contains(&oid)
776    }
777
778    fn blob_is_interesting_at_path(&self, path: &str, oid: ObjectId) -> bool {
779        !self.uninteresting_object.contains(&oid)
780            && !self.excluded_blob_paths.contains(&(path.to_string(), oid))
781    }
782}
783
784/// Every `(blob_path, blob_oid)` reachable from excluded commits' trees.
785fn excluded_blob_paths_from_commits(
786    repo: &Repository,
787    commits: &HashSet<ObjectId>,
788) -> Result<HashSet<(String, ObjectId)>> {
789    let mut out = HashSet::new();
790    for &c in commits {
791        let commit = match load_commit_data(repo, c) {
792            Ok(co) => co,
793            Err(_) => continue,
794        };
795        collect_blob_paths(repo, commit.tree, "", &mut out)?;
796    }
797    Ok(out)
798}
799
800fn collect_blob_paths(
801    repo: &Repository,
802    tree_oid: ObjectId,
803    base: &str,
804    into: &mut HashSet<(String, ObjectId)>,
805) -> Result<()> {
806    let obj = repo.odb.read(&tree_oid)?;
807    if obj.kind != ObjectKind::Tree {
808        return Ok(());
809    }
810    let entries = parse_tree(&obj.data)?;
811    for e in entries {
812        if e.mode == 0o160000 {
813            continue;
814        }
815        let name = String::from_utf8_lossy(&e.name);
816        if e.mode == 0o040000 {
817            let next_base = if base.is_empty() {
818                format!("{name}/")
819            } else {
820                format!("{base}{name}/")
821            };
822            collect_blob_paths(repo, e.oid, &next_base, into)?;
823        } else {
824            let path = if base.is_empty() {
825                name.into_owned()
826            } else {
827                format!("{base}{}", name.as_ref())
828            };
829            into.insert((path, e.oid));
830        }
831    }
832    Ok(())
833}
834
835fn tree_blob_closure_from_commits(
836    repo: &Repository,
837    commits: &HashSet<ObjectId>,
838) -> Result<HashSet<ObjectId>> {
839    let mut out = HashSet::new();
840    for &c in commits {
841        let commit = match load_commit_data(repo, c) {
842            Ok(co) => co,
843            Err(_) => continue,
844        };
845        let mut stack = vec![commit.tree];
846        while let Some(t) = stack.pop() {
847            if !out.insert(t) {
848                continue;
849            }
850            let obj = repo.odb.read(&t)?;
851            if obj.kind != ObjectKind::Tree {
852                continue;
853            }
854            let entries = parse_tree(&obj.data)?;
855            for e in entries {
856                if e.mode == 0o160000 {
857                    continue;
858                }
859                out.insert(e.oid);
860                if e.mode == 0o040000 {
861                    stack.push(e.oid);
862                }
863            }
864        }
865    }
866    Ok(out)
867}
868
869fn load_commit_data(repo: &Repository, oid: ObjectId) -> Result<crate::objects::CommitData> {
870    let object = repo.odb.read(&oid)?;
871    if object.kind != ObjectKind::Commit {
872        return Err(Error::CorruptObject(format!("{oid} is not a commit")));
873    }
874    parse_commit(&object.data)
875}
876
877fn mark_uninteresting_trees(
878    repo: &Repository,
879    graph: &mut CommitGraph<'_>,
880    interesting: &HashSet<ObjectId>,
881    excluded: &HashSet<ObjectId>,
882    exclude_tips: &HashSet<ObjectId>,
883    opts: &PathWalkOptions,
884    ctx: &mut PathWalkContext<'_>,
885) -> Result<()> {
886    if !opts.prune_all_uninteresting && !opts.edge_aggressive {
887        return Ok(());
888    }
889    let mut queue: VecDeque<ObjectId> = interesting.iter().copied().collect();
890    let mut seen_edge_commit = HashSet::new();
891    while let Some(c) = queue.pop_front() {
892        let parents = graph.parents_of(c)?;
893        for p in parents {
894            if interesting.contains(&p) {
895                continue;
896            }
897            if !excluded.contains(&p) {
898                continue;
899            }
900            if !seen_edge_commit.insert(p) {
901                continue;
902            }
903            let commit = load_commit_data(repo, p)?;
904            ctx.uninteresting_object.insert(commit.tree);
905            queue.push_back(p);
906        }
907    }
908    // Git `mark_edges_uninteresting`: `edge_hint_aggressive` marks trees only for commits that
909    // appear on the revision cmdline as UNINTERESTING (negative tips), not the whole excluded
910    // ancestry closure.
911    if opts.edge_aggressive {
912        for &c in exclude_tips {
913            if seen_edge_commit.contains(&c) {
914                continue;
915            }
916            let Ok(commit) = load_commit_data(repo, c) else {
917                continue;
918            };
919            ctx.uninteresting_object.insert(commit.tree);
920        }
921    }
922    Ok(())
923}
924
925fn setup_pending_objects(
926    repo: &Repository,
927    roots: &[ObjectWalkRoot],
928    extra_tag_refs: &[ObjectId],
929    opts: &PathWalkOptions,
930    ctx: &mut PathWalkContext<'_>,
931) -> Result<()> {
932    let mut tag_oids: Vec<ObjectId> = Vec::new();
933    let mut tagged_blob_oids: Vec<ObjectId> = Vec::new();
934    let mut tag_seen: HashSet<ObjectId> = HashSet::new();
935
936    for &oid in extra_tag_refs {
937        if ctx.seen_object.contains(&oid) {
938            continue;
939        }
940        let obj = repo.odb.read(&oid)?;
941        match obj.kind {
942            ObjectKind::Tag | ObjectKind::Commit => {
943                if tag_seen.insert(oid) {
944                    tag_oids.push(oid);
945                }
946            }
947            ObjectKind::Tree if opts.include_trees => {
948                ctx.seen_object.insert(oid);
949                ctx.append_root_tree(oid)?;
950            }
951            ObjectKind::Blob if opts.include_blobs => {
952                ctx.seen_object.insert(oid);
953                tagged_blob_oids.push(oid);
954            }
955            _ => {}
956        }
957    }
958
959    for root in roots {
960        let mut oid = root.oid;
961        let mut obj = repo.odb.read(&oid)?;
962        while obj.kind == ObjectKind::Tag {
963            if ctx.seen_object.contains(&oid) {
964                break;
965            }
966            ctx.seen_object.insert(oid);
967            if opts.include_tags && tag_seen.insert(oid) {
968                tag_oids.push(oid);
969            }
970            let tag = parse_tag(&obj.data)?;
971            oid = tag.object;
972            obj = repo.odb.read(&oid)?;
973        }
974        if obj.kind == ObjectKind::Tag {
975            continue;
976        }
977        if !ctx.seen_object.insert(oid) {
978            continue;
979        }
980        match obj.kind {
981            ObjectKind::Tree if opts.include_trees => {
982                if let Some(p) = &root.root_path {
983                    let trimmed = p.trim_end_matches('/');
984                    let path = if trimmed.is_empty() {
985                        "/".to_string()
986                    } else {
987                        format!("{trimmed}/")
988                    };
989                    ctx.add_path_pending(path, ObjectKind::Tree, oid, true)?;
990                } else {
991                    ctx.ensure_root_list();
992                    ctx.paths.get_mut(ROOT_PATH).expect("root").oids.push(oid);
993                    ctx.push_heap(ROOT_PATH);
994                }
995            }
996            ObjectKind::Blob if opts.include_blobs => {
997                if let Some(p) = &root.root_path {
998                    ctx.add_path_pending(p.clone(), ObjectKind::Blob, oid, true)?;
999                } else {
1000                    tagged_blob_oids.push(oid);
1001                }
1002            }
1003            ObjectKind::Commit => {}
1004            _ => {}
1005        }
1006    }
1007
1008    if opts.include_blobs && !tagged_blob_oids.is_empty() {
1009        let list = TypeOidList {
1010            kind: ObjectKind::Blob,
1011            oids: tagged_blob_oids,
1012            maybe_interesting: true,
1013        };
1014        ctx.paths.insert(TAGGED_BLOBS_PATH.to_string(), list);
1015        ctx.push_heap(TAGGED_BLOBS_PATH);
1016    }
1017    if opts.include_tags && !tag_oids.is_empty() {
1018        let list = TypeOidList {
1019            kind: ObjectKind::Tag,
1020            oids: tag_oids,
1021            maybe_interesting: true,
1022        };
1023        ctx.paths.insert(TAG_PATH.to_string(), list);
1024        ctx.push_heap(TAG_PATH);
1025    }
1026    Ok(())
1027}
1028
1029/// Parse `test-tool path-walk` argv after the subcommand name.
1030///
1031/// Returns options, positive revision specs, negative revision specs, stdin `--all`, and `--boundary`.
1032pub fn parse_path_walk_cli(
1033    git_dir: &Path,
1034    args: &[String],
1035) -> Result<(PathWalkOptions, Vec<String>, Vec<String>, bool, bool)> {
1036    let mut opts = PathWalkOptions::default();
1037    let mut positive = Vec::new();
1038    let mut negative = Vec::new();
1039    let mut stdin_all = false;
1040    let mut boundary_flag = false;
1041    let mut after_dd = false;
1042    let mut not_mode = false;
1043
1044    let mut i = 0usize;
1045    while i < args.len() {
1046        let a = &args[i];
1047        if !after_dd && a == "--" {
1048            after_dd = true;
1049            i += 1;
1050            continue;
1051        }
1052        if !after_dd {
1053            match a.as_str() {
1054                "--stdin-pl" => {
1055                    let mut buf = String::new();
1056                    std::io::stdin().read_to_string(&mut buf)?;
1057                    let lines: Vec<String> = buf
1058                        .lines()
1059                        .map(str::trim)
1060                        .filter(|l| !l.is_empty() && !l.starts_with('#'))
1061                        .map(String::from)
1062                        .collect();
1063                    if !lines.is_empty() {
1064                        opts.sparse_pattern_lines = Some(lines);
1065                    }
1066                }
1067                "--prune" => opts.prune_all_uninteresting = true,
1068                "--edge-aggressive" => opts.edge_aggressive = true,
1069                "--no-blobs" => opts.include_blobs = false,
1070                "--no-trees" => opts.include_trees = false,
1071                "--no-commits" => opts.include_commits = false,
1072                "--no-tags" => opts.include_tags = false,
1073                "--blobs" => opts.include_blobs = true,
1074                "--trees" => opts.include_trees = true,
1075                "--commits" => opts.include_commits = true,
1076                "--tags" => opts.include_tags = true,
1077                "--stdin" => {
1078                    let (pos, neg, all, _stdin_paths) =
1079                        collect_revision_specs_with_stdin(git_dir, &[], true)?;
1080                    stdin_all |= all;
1081                    positive.extend(pos);
1082                    negative.extend(neg);
1083                }
1084                _ => {
1085                    return Err(Error::InvalidRef(format!(
1086                        "path-walk: unknown option '{a}'"
1087                    )));
1088                }
1089            }
1090        } else if a == "--not" {
1091            not_mode = !not_mode;
1092        } else if a == "--boundary" {
1093            boundary_flag = true;
1094        } else if matches!(a.as_str(), "--all" | "--indexed-objects" | "--branches") {
1095            if not_mode {
1096                negative.push(a.clone());
1097            } else {
1098                positive.push(a.clone());
1099            }
1100        } else {
1101            let (p, n) = crate::rev_list::split_revision_token(a);
1102            if not_mode {
1103                negative.extend(p);
1104                positive.extend(n);
1105            } else {
1106                positive.extend(p);
1107                negative.extend(n);
1108            }
1109        }
1110        i += 1;
1111    }
1112
1113    Ok((opts, positive, negative, stdin_all, boundary_flag))
1114}