Skip to main content

grit_lib/
path_walk.rs

1//! Path-batched object graph walk matching Git's `walk_objects_by_path` / `test-tool path-walk`.
2//!
3//! Objects are grouped by repository-relative path (trees end with `/`) and emitted in batches
4//! following Git's priority queue ordering (tags, then blobs, then trees; ties by path).
5
6use std::collections::{BTreeSet, BinaryHeap, HashMap, HashSet, VecDeque};
7use std::io::Read;
8use std::path::Path;
9
10use crate::error::{Error, Result};
11use crate::objects::{parse_commit, parse_tag, parse_tree, ObjectId, ObjectKind};
12use crate::refs;
13use crate::repo::Repository;
14use crate::rev_list::{
15    collect_revision_specs_with_stdin, date_order_walk, resolve_object_walk_roots,
16    resolve_revision_commits, walk_closure, walk_closure_ordered, CommitGraph, ObjectWalkRoot,
17};
18use crate::sparse_checkout::{path_in_sparse_checkout_patterns, ConePatterns};
19
20const ROOT_PATH: &str = "";
21const TAG_PATH: &str = "/tags";
22const TAGGED_BLOBS_PATH: &str = "/tagged-blobs";
23
24/// Options for [`walk_objects_by_path`], aligned with Git's `struct path_walk_info`.
25#[derive(Debug, Clone)]
26pub struct PathWalkOptions {
27    pub include_commits: bool,
28    pub include_trees: bool,
29    pub include_blobs: bool,
30    pub include_tags: bool,
31    pub prune_all_uninteresting: bool,
32    pub edge_aggressive: bool,
33    pub cone_patterns: Option<ConePatterns>,
34    /// Lines from `test-tool path-walk --stdin-pl` (trimmed, non-empty, non-comment).
35    pub sparse_pattern_lines: Option<Vec<String>>,
36}
37
38impl Default for PathWalkOptions {
39    fn default() -> Self {
40        Self {
41            include_commits: true,
42            include_trees: true,
43            include_blobs: true,
44            include_tags: true,
45            prune_all_uninteresting: false,
46            edge_aggressive: false,
47            cone_patterns: None,
48            sparse_pattern_lines: None,
49        }
50    }
51}
52
53/// One line of `test-tool path-walk` output (excluding trailing summary lines).
54#[derive(Debug, Clone)]
55pub struct PathWalkLine {
56    pub batch: u64,
57    pub object_kind: ObjectKind,
58    pub path: String,
59    pub oid: ObjectId,
60    pub uninteresting: bool,
61}
62
63#[derive(Debug, Clone, Default)]
64pub struct PathWalkCounts {
65    pub commits: u64,
66    pub trees: u64,
67    pub blobs: u64,
68    pub tags: u64,
69}
70
71/// Run a path walk for the given revision arguments and options.
72///
73/// `positive_specs` and `negative_specs` are raw revision strings (no interleaved `--not`;
74/// negatives are listed explicitly).
75pub fn walk_objects_by_path(
76    repo: &Repository,
77    positive_specs: &[String],
78    negative_specs: &[String],
79    stdin_all: bool,
80    boundary: bool,
81    opts: &PathWalkOptions,
82) -> Result<(Vec<PathWalkLine>, PathWalkCounts)> {
83    let mut graph = CommitGraph::new(repo, false);
84    let mut all_refs = stdin_all;
85    let mut filtered: Vec<String> = Vec::new();
86    let mut want_indexed_objects = false;
87    for p in positive_specs {
88        match p.as_str() {
89            "--all" => all_refs = true,
90            "--indexed-objects" => want_indexed_objects = true,
91            "--branches" => filtered.extend(expand_branches_refs(repo)?),
92            _ => filtered.push(p.clone()),
93        }
94    }
95    let mut neg_resolved: Vec<String> = Vec::new();
96    for n in negative_specs {
97        match n.as_str() {
98            "--all" => {
99                return Err(Error::InvalidRef(
100                    "--all is not valid in negative revision list".to_owned(),
101                ));
102            }
103            "--indexed-objects" => {
104                return Err(Error::InvalidRef(
105                    "--indexed-objects is not valid in negative revision list".to_owned(),
106                ));
107            }
108            "--branches" => neg_resolved.extend(expand_branches_refs(repo)?),
109            _ => neg_resolved.push(n.clone()),
110        }
111    }
112    if all_refs {
113        filtered.extend(all_ref_commits(repo)?);
114    }
115    if filtered.is_empty() && !want_indexed_objects && !all_refs {
116        return Err(Error::InvalidRef("no revisions specified".to_owned()));
117    }
118    let (commit_tips, mut object_roots) = if filtered.is_empty() {
119        (Vec::new(), Vec::new())
120    } else {
121        resolve_object_walk_roots(repo, &filtered)?
122    };
123    object_roots.extend(tag_object_roots_from_spec_names(repo, &filtered)?);
124    if want_indexed_objects {
125        object_roots.extend(indexed_blob_roots(repo)?);
126    }
127
128    let extra_tag_ref_targets: Vec<ObjectId> = if all_refs {
129        tag_ref_direct_targets(repo)?
130    } else {
131        Vec::new()
132    };
133    let exclude = resolve_revision_commits(repo, &neg_resolved)?;
134    let exclude_tips: HashSet<ObjectId> = exclude.iter().copied().collect();
135    let excluded: HashSet<ObjectId> = if exclude.is_empty() {
136        HashSet::new()
137    } else {
138        walk_closure(&mut graph, &exclude)?
139    };
140
141    let (included_commits, _) = if commit_tips.is_empty() {
142        (HashSet::new(), Vec::new())
143    } else {
144        walk_closure_ordered(&mut graph, &commit_tips)?
145    };
146    let mut interesting_commits: HashSet<ObjectId> = included_commits
147        .iter()
148        .copied()
149        .filter(|c| !excluded.contains(c))
150        .collect();
151
152    let boundary_commits: HashSet<ObjectId> = if boundary {
153        let inc: HashSet<ObjectId> = interesting_commits.iter().copied().collect();
154        let mut bset = HashSet::new();
155        for &oid in &interesting_commits {
156            for p in graph.parents_of(oid)? {
157                if !inc.contains(&p) && excluded.contains(&p) {
158                    bset.insert(p);
159                }
160            }
161        }
162        interesting_commits.extend(bset.iter().copied());
163        bset
164    } else {
165        HashSet::new()
166    };
167
168    let mut uninteresting_commits: HashSet<ObjectId> = excluded.iter().copied().collect();
169    uninteresting_commits.retain(|c| interesting_commits.contains(c));
170
171    let excluded_objects: HashSet<ObjectId> = tree_blob_closure_from_commits(repo, &excluded)?;
172    let excluded_blob_paths = excluded_blob_paths_from_commits(repo, &excluded)?;
173
174    let mut ctx = PathWalkContext {
175        repo,
176        opts,
177        paths: HashMap::new(),
178        heap: BinaryHeap::new(),
179        pushed: HashSet::new(),
180        seen_object: HashSet::new(),
181        uninteresting_object: HashSet::new(),
182        excluded_objects,
183        excluded_blob_paths,
184        heap_seq: 0,
185        batch: 0,
186        lines: Vec::new(),
187        counts: PathWalkCounts::default(),
188    };
189
190    mark_uninteresting_trees(
191        repo,
192        &mut graph,
193        &interesting_commits,
194        &excluded,
195        &exclude_tips,
196        opts,
197        &mut ctx,
198    )?;
199
200    if opts.include_trees {
201        ctx.ensure_root_list();
202        ctx.push_heap(ROOT_PATH);
203    }
204
205    setup_pending_objects(repo, &object_roots, &extra_tag_ref_targets, opts, &mut ctx)?;
206
207    let ordered_commits = date_order_walk(&mut graph, &interesting_commits, false)?;
208
209    let mut commit_oids: Vec<ObjectId> = Vec::new();
210    for c in ordered_commits {
211        if !interesting_commits.contains(&c) {
212            continue;
213        }
214        if opts.include_commits {
215            commit_oids.push(c);
216        }
217        if !opts.include_trees && !opts.include_blobs {
218            continue;
219        }
220        let commit = load_commit_data(repo, c)?;
221        let tree_oid = commit.tree;
222        if ctx.seen_object.contains(&tree_oid) {
223            continue;
224        }
225        ctx.seen_object.insert(tree_oid);
226        if ctx.excluded_objects.contains(&tree_oid) {
227            ctx.uninteresting_object.insert(tree_oid);
228        }
229        ctx.append_root_tree(tree_oid)?;
230    }
231
232    if opts.edge_aggressive && opts.include_trees {
233        for &tip in &exclude_tips {
234            let commit = load_commit_data(repo, tip)?;
235            let tree_oid = commit.tree;
236            if ctx.seen_object.contains(&tree_oid) {
237                continue;
238            }
239            ctx.seen_object.insert(tree_oid);
240            ctx.uninteresting_object.insert(tree_oid);
241            ctx.append_root_tree(tree_oid)?;
242        }
243    }
244
245    if opts.include_commits && !commit_oids.is_empty() {
246        ctx.emit_commit_batch(&commit_oids, &uninteresting_commits, &boundary_commits)?;
247    }
248
249    ctx.drain_heap(&mut graph)?;
250
251    if !ctx.paths.is_empty() {
252        for key in ctx.paths.keys().cloned().collect::<Vec<_>>() {
253            ctx.push_heap(&key);
254        }
255        ctx.drain_heap(&mut graph)?;
256    }
257
258    Ok((ctx.lines, ctx.counts))
259}
260
261/// Direct `refs/tags/*` peel targets (tag object, commit, tree, or blob OID as stored in the ref).
262/// When a revision token names `refs/tags/<name>` and points at an annotated tag object, include
263/// that tag OID as a root (Git `setup_revisions` keeps the tag in pending).
264fn tag_object_roots_from_spec_names(
265    repo: &Repository,
266    specs: &[String],
267) -> Result<Vec<ObjectWalkRoot>> {
268    let mut out = Vec::new();
269    let mut seen = HashSet::new();
270    for spec in specs {
271        if spec.contains("..") || spec.starts_with('^') || spec == "HEAD" {
272            continue;
273        }
274        if spec.len() == 40 && spec.chars().all(|c| c.is_ascii_hexdigit()) {
275            continue;
276        }
277        let refname = if spec.starts_with("refs/") {
278            spec.clone()
279        } else {
280            format!("refs/tags/{spec}")
281        };
282        let Ok(oid) = refs::resolve_ref(&repo.git_dir, &refname) else {
283            continue;
284        };
285        let obj = repo.odb.read(&oid)?;
286        if obj.kind != ObjectKind::Tag {
287            continue;
288        }
289        if seen.insert(oid) {
290            out.push(ObjectWalkRoot {
291                oid,
292                input: spec.clone(),
293                root_path: None,
294            });
295        }
296    }
297    Ok(out)
298}
299
300fn tag_ref_direct_targets(repo: &Repository) -> Result<Vec<ObjectId>> {
301    let mut out = Vec::new();
302    for (name, _) in refs::list_refs(&repo.git_dir, "refs/tags/")? {
303        let oid = refs::resolve_ref(&repo.git_dir, &name)?;
304        out.push(oid);
305    }
306    Ok(out)
307}
308
309fn all_ref_commits(repo: &Repository) -> Result<Vec<String>> {
310    let mut specs = Vec::new();
311    specs.push("HEAD".to_owned());
312    for (name, _) in refs::list_refs(&repo.git_dir, "refs/")? {
313        specs.push(name);
314    }
315    Ok(specs)
316}
317
318pub fn expand_branches_refs(repo: &Repository) -> Result<Vec<String>> {
319    let mut out = Vec::new();
320    for (_, oid) in refs::list_refs(&repo.git_dir, "refs/heads/")? {
321        out.push(oid.to_hex());
322    }
323    Ok(out)
324}
325
326/// Index roots for `--indexed-objects`: cache-tree root (if present), recovered subtree trees when
327/// the index matches `HEAD` under a prefix, and stage-0 file blobs (each with `:path`).
328fn indexed_blob_roots(repo: &Repository) -> Result<Vec<ObjectWalkRoot>> {
329    let Some(_) = &repo.work_tree else {
330        return Ok(Vec::new());
331    };
332    let index_path = repo.git_dir.join("index");
333    if !index_path.is_file() {
334        return Ok(Vec::new());
335    }
336    let idx = crate::index::Index::load(&index_path)?;
337    let mut out = Vec::new();
338    if let Some(root) = idx.cache_tree_root {
339        out.push(ObjectWalkRoot {
340            oid: root,
341            input: String::new(),
342            root_path: None,
343        });
344    }
345
346    let head_tree = head_tree_oid(repo).ok();
347    let mut file_entries: Vec<(String, ObjectId)> = Vec::new();
348    for e in &idx.entries {
349        if e.stage() != 0 {
350            continue;
351        }
352        if e.mode == 0o160000 || e.mode == crate::index::MODE_TREE {
353            continue;
354        }
355        let path_str = String::from_utf8_lossy(&e.path).into_owned();
356        file_entries.push((path_str.clone(), e.oid));
357        out.push(ObjectWalkRoot {
358            oid: e.oid,
359            input: format!(":{path_str}"),
360            root_path: Some(path_str),
361        });
362    }
363
364    let mut prefixes: BTreeSet<String> = BTreeSet::new();
365    for (path, _) in &file_entries {
366        let mut end = path.len();
367        while end > 0 {
368            if let Some(pos) = path[..end].rfind('/') {
369                prefixes.insert(path[..pos].to_string());
370                end = pos;
371            } else {
372                break;
373            }
374        }
375    }
376
377    let mut recovery_added: HashSet<String> = HashSet::new();
378    let Some(ht) = head_tree else {
379        return Ok(out);
380    };
381    for pref in prefixes {
382        if pref.is_empty() {
383            continue;
384        }
385        let mut any_under = false;
386        let mut all_match = true;
387        for (path, oid) in &file_entries {
388            let under = if path == &pref {
389                true
390            } else if let Some(rest) = path.strip_prefix(&pref) {
391                rest.starts_with('/')
392            } else {
393                false
394            };
395            if !under {
396                continue;
397            }
398            any_under = true;
399            match crate::rev_parse::resolve_treeish_path(repo, ht, path.as_str()) {
400                Ok(h) if h == *oid => {}
401                _ => {
402                    all_match = false;
403                    break;
404                }
405            }
406        }
407        if !any_under || !all_match {
408            continue;
409        }
410        let tree_oid = crate::rev_parse::resolve_treeish_path(repo, ht, pref.as_str())?;
411        if !recovery_added.insert(pref.clone()) {
412            continue;
413        }
414        out.push(ObjectWalkRoot {
415            oid: tree_oid,
416            input: String::new(),
417            root_path: Some(pref),
418        });
419    }
420
421    Ok(out)
422}
423
424fn head_tree_oid(repo: &Repository) -> Result<ObjectId> {
425    let head = refs::resolve_ref(&repo.git_dir, "HEAD")?;
426    let obj = repo.odb.read(&head)?;
427    if obj.kind != ObjectKind::Commit {
428        return Err(Error::InvalidRef("HEAD is not a commit".to_owned()));
429    }
430    let c = parse_commit(&obj.data)?;
431    Ok(c.tree)
432}
433
434struct TypeOidList {
435    kind: ObjectKind,
436    oids: Vec<ObjectId>,
437    maybe_interesting: bool,
438}
439
440struct PathWalkContext<'a> {
441    repo: &'a Repository,
442    opts: &'a PathWalkOptions,
443    paths: HashMap<String, TypeOidList>,
444    heap: BinaryHeap<std::cmp::Reverse<PathHeapItem>>,
445    pushed: HashSet<String>,
446    seen_object: HashSet<ObjectId>,
447    uninteresting_object: HashSet<ObjectId>,
448    excluded_objects: HashSet<ObjectId>,
449    excluded_blob_paths: HashSet<(String, ObjectId)>,
450    heap_seq: u64,
451    batch: u64,
452    lines: Vec<PathWalkLine>,
453    counts: PathWalkCounts,
454}
455
456/// Matches Git `path-walk.c` `compare_by_type` + `prio_queue` insertion counter tie-break.
457#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Debug)]
458struct PathHeapItem {
459    type_rank: u8,
460    path: String,
461    seq: u64,
462}
463
464fn type_rank(kind: ObjectKind) -> u8 {
465    match kind {
466        ObjectKind::Tag => 0,
467        ObjectKind::Blob => 1,
468        ObjectKind::Tree => 2,
469        ObjectKind::Commit => 3,
470    }
471}
472
473impl<'a> PathWalkContext<'a> {
474    fn ensure_root_list(&mut self) {
475        self.paths
476            .entry(ROOT_PATH.to_string())
477            .or_insert_with(|| TypeOidList {
478                kind: ObjectKind::Tree,
479                oids: Vec::new(),
480                maybe_interesting: true,
481            });
482    }
483
484    fn push_heap(&mut self, path: &str) {
485        if !self.pushed.insert(path.to_string()) {
486            return;
487        }
488        let Some(list) = self.paths.get(path) else {
489            return;
490        };
491        let seq = self.heap_seq;
492        self.heap_seq = self.heap_seq.wrapping_add(1);
493        self.heap.push(std::cmp::Reverse(PathHeapItem {
494            type_rank: type_rank(list.kind),
495            path: path.to_string(),
496            seq,
497        }));
498    }
499
500    fn append_root_tree(&mut self, oid: ObjectId) -> Result<()> {
501        self.ensure_root_list();
502        let list = self
503            .paths
504            .get_mut(ROOT_PATH)
505            .ok_or_else(|| Error::CorruptObject("root path list missing".to_owned()))?;
506        list.oids.push(oid);
507        self.push_heap(ROOT_PATH);
508        Ok(())
509    }
510
511    fn add_path(
512        &mut self,
513        path: String,
514        kind: ObjectKind,
515        oid: ObjectId,
516        interesting: bool,
517    ) -> Result<()> {
518        self.add_path_inner(path, kind, oid, interesting, true)
519    }
520
521    /// Record an object at `path` without enqueueing it on the path stack.
522    ///
523    /// Git's `setup_pending_objects` only calls `add_path_to_list` for index objects; those paths
524    /// are pushed in the second `strmap_for_each_entry` phase after the main heap drain, so they
525    /// do not interleave before commit root trees (`t6601` branches + indexed-objects).
526    fn add_path_pending(
527        &mut self,
528        path: String,
529        kind: ObjectKind,
530        oid: ObjectId,
531        interesting: bool,
532    ) -> Result<()> {
533        self.add_path_inner(path, kind, oid, interesting, false)
534    }
535
536    fn add_path_inner(
537        &mut self,
538        path: String,
539        kind: ObjectKind,
540        oid: ObjectId,
541        interesting: bool,
542        enqueue: bool,
543    ) -> Result<()> {
544        let list = self
545            .paths
546            .entry(path.clone())
547            .or_insert_with(|| TypeOidList {
548                kind,
549                oids: Vec::new(),
550                maybe_interesting: false,
551            });
552        if list.kind != kind {
553            return Err(Error::CorruptObject(format!(
554                "path-walk: inconsistent types at path {path:?}"
555            )));
556        }
557        list.maybe_interesting |= interesting;
558        list.oids.push(oid);
559        if enqueue {
560            self.push_heap(&path);
561        }
562        Ok(())
563    }
564
565    fn cone_allows_tree_path(&self, path_with_slash: &str) -> bool {
566        if let Some(lines) = &self.opts.sparse_pattern_lines {
567            return path_in_sparse_checkout_patterns(path_with_slash, lines, true);
568        }
569        if let Some(cone) = &self.opts.cone_patterns {
570            let trimmed = path_with_slash.trim_end_matches('/');
571            if cone.path_included(trimmed) {
572                return true;
573            }
574            return cone.path_included(path_with_slash);
575        }
576        true
577    }
578
579    fn cone_allows_blob_path(&self, path: &str) -> bool {
580        if let Some(lines) = &self.opts.sparse_pattern_lines {
581            return path_in_sparse_checkout_patterns(path, lines, true);
582        }
583        if let Some(cone) = &self.opts.cone_patterns {
584            return cone.path_included(path);
585        }
586        true
587    }
588
589    fn emit_commit_batch(
590        &mut self,
591        oids: &[ObjectId],
592        uninteresting: &HashSet<ObjectId>,
593        boundary: &HashSet<ObjectId>,
594    ) -> Result<()> {
595        let batch = self.batch;
596        self.batch += 1;
597        for &oid in oids {
598            let u = uninteresting.contains(&oid) || boundary.contains(&oid);
599            self.lines.push(PathWalkLine {
600                batch,
601                object_kind: ObjectKind::Commit,
602                path: ROOT_PATH.to_string(),
603                oid,
604                uninteresting: u,
605            });
606            self.counts.commits += 1;
607        }
608        Ok(())
609    }
610
611    fn drain_heap(&mut self, graph: &mut CommitGraph<'_>) -> Result<()> {
612        while let Some(std::cmp::Reverse(item)) = self.heap.pop() {
613            self.walk_path(&item.path, graph)?;
614        }
615        Ok(())
616    }
617
618    fn walk_path(&mut self, path: &str, graph: &mut CommitGraph<'_>) -> Result<()> {
619        let Some(mut list) = self.paths.remove(path) else {
620            return Ok(());
621        };
622        if list.oids.is_empty() {
623            return Ok(());
624        }
625
626        if self.opts.prune_all_uninteresting {
627            if !list.maybe_interesting {
628                return Ok(());
629            }
630            list.maybe_interesting = false;
631            for oid in &list.oids {
632                if self.object_is_interesting_at_path(path, *oid, list.kind) {
633                    list.maybe_interesting = true;
634                    break;
635                }
636            }
637            if !list.maybe_interesting {
638                return Ok(());
639            }
640        }
641
642        let emit = match list.kind {
643            ObjectKind::Tree => self.opts.include_trees,
644            ObjectKind::Blob => self.opts.include_blobs,
645            ObjectKind::Tag => self.opts.include_tags,
646            _ => false,
647        };
648        let batch = if emit {
649            let b = self.batch;
650            self.batch += 1;
651            b
652        } else {
653            0
654        };
655
656        match list.kind {
657            ObjectKind::Tree if self.opts.include_trees => {
658                for &oid in &list.oids {
659                    let u = !self.tree_is_interesting(oid);
660                    self.lines.push(PathWalkLine {
661                        batch,
662                        object_kind: ObjectKind::Tree,
663                        path: path.to_string(),
664                        oid,
665                        uninteresting: u,
666                    });
667                    self.counts.trees += 1;
668                }
669            }
670            ObjectKind::Blob if self.opts.include_blobs => {
671                for &oid in &list.oids {
672                    let u = !self.blob_is_interesting_at_path(path, oid);
673                    self.lines.push(PathWalkLine {
674                        batch,
675                        object_kind: ObjectKind::Blob,
676                        path: path.to_string(),
677                        oid,
678                        uninteresting: u,
679                    });
680                    self.counts.blobs += 1;
681                }
682            }
683            ObjectKind::Tag if self.opts.include_tags => {
684                for &oid in &list.oids {
685                    self.lines.push(PathWalkLine {
686                        batch,
687                        object_kind: ObjectKind::Tag,
688                        path: path.to_string(),
689                        oid,
690                        uninteresting: false,
691                    });
692                    self.counts.tags += 1;
693                }
694            }
695            _ => {}
696        }
697
698        if list.kind == ObjectKind::Tree {
699            let tree_oids = list.oids.clone();
700            for tree_oid in tree_oids {
701                self.add_tree_entries(path, tree_oid, graph)?;
702            }
703        }
704
705        Ok(())
706    }
707
708    fn add_tree_entries(
709        &mut self,
710        base_path: &str,
711        tree_oid: ObjectId,
712        _graph: &mut CommitGraph<'_>,
713    ) -> Result<()> {
714        let obj = self.repo.odb.read(&tree_oid)?;
715        if obj.kind != ObjectKind::Tree {
716            return Err(Error::CorruptObject(format!("{tree_oid} is not a tree")));
717        }
718        let parent_uninteresting = !self.tree_is_interesting(tree_oid);
719        let entries = parse_tree(&obj.data)?;
720        for entry in entries {
721            if entry.mode == 0o160000 {
722                continue;
723            }
724            let is_tree = entry.mode == 0o040000;
725            if !self.opts.include_blobs && !is_tree {
726                continue;
727            }
728            let name = String::from_utf8_lossy(&entry.name);
729            let child_oid = entry.oid;
730            if self.seen_object.contains(&child_oid) {
731                continue;
732            }
733            self.seen_object.insert(child_oid);
734            if parent_uninteresting {
735                self.uninteresting_object.insert(child_oid);
736            }
737            if is_tree {
738                let rel = if base_path.is_empty() {
739                    format!("{name}/")
740                } else {
741                    format!("{base_path}{name}/")
742                };
743                if !self.cone_allows_tree_path(&rel) {
744                    continue;
745                }
746                self.add_path(
747                    rel,
748                    ObjectKind::Tree,
749                    child_oid,
750                    self.tree_is_interesting(child_oid) || !parent_uninteresting,
751                )?;
752            } else {
753                let rel = if base_path.is_empty() {
754                    name.into_owned()
755                } else {
756                    format!("{base_path}{}", name.as_ref())
757                };
758                if !self.cone_allows_blob_path(&rel) {
759                    continue;
760                }
761                let blob_interesting =
762                    self.blob_is_interesting_at_path(&rel, child_oid) || !parent_uninteresting;
763                self.add_path(rel, ObjectKind::Blob, child_oid, blob_interesting)?;
764            }
765        }
766        Ok(())
767    }
768
769    fn object_is_interesting_at_path(&self, path: &str, oid: ObjectId, kind: ObjectKind) -> bool {
770        match kind {
771            ObjectKind::Blob => self.blob_is_interesting_at_path(path, oid),
772            ObjectKind::Tree => self.tree_is_interesting(oid),
773            _ => true,
774        }
775    }
776
777    fn tree_is_interesting(&self, oid: ObjectId) -> bool {
778        !self.uninteresting_object.contains(&oid) && !self.excluded_objects.contains(&oid)
779    }
780
781    fn blob_is_interesting_at_path(&self, path: &str, oid: ObjectId) -> bool {
782        !self.uninteresting_object.contains(&oid)
783            && !self.excluded_blob_paths.contains(&(path.to_string(), oid))
784    }
785}
786
787/// Every `(blob_path, blob_oid)` reachable from excluded commits' trees.
788fn excluded_blob_paths_from_commits(
789    repo: &Repository,
790    commits: &HashSet<ObjectId>,
791) -> Result<HashSet<(String, ObjectId)>> {
792    let mut out = HashSet::new();
793    for &c in commits {
794        let commit = match load_commit_data(repo, c) {
795            Ok(co) => co,
796            Err(_) => continue,
797        };
798        collect_blob_paths(repo, commit.tree, "", &mut out)?;
799    }
800    Ok(out)
801}
802
803fn collect_blob_paths(
804    repo: &Repository,
805    tree_oid: ObjectId,
806    base: &str,
807    into: &mut HashSet<(String, ObjectId)>,
808) -> Result<()> {
809    let obj = repo.odb.read(&tree_oid)?;
810    if obj.kind != ObjectKind::Tree {
811        return Ok(());
812    }
813    let entries = parse_tree(&obj.data)?;
814    for e in entries {
815        if e.mode == 0o160000 {
816            continue;
817        }
818        let name = String::from_utf8_lossy(&e.name);
819        if e.mode == 0o040000 {
820            let next_base = if base.is_empty() {
821                format!("{name}/")
822            } else {
823                format!("{base}{name}/")
824            };
825            collect_blob_paths(repo, e.oid, &next_base, into)?;
826        } else {
827            let path = if base.is_empty() {
828                name.into_owned()
829            } else {
830                format!("{base}{}", name.as_ref())
831            };
832            into.insert((path, e.oid));
833        }
834    }
835    Ok(())
836}
837
838fn tree_blob_closure_from_commits(
839    repo: &Repository,
840    commits: &HashSet<ObjectId>,
841) -> Result<HashSet<ObjectId>> {
842    let mut out = HashSet::new();
843    for &c in commits {
844        let commit = match load_commit_data(repo, c) {
845            Ok(co) => co,
846            Err(_) => continue,
847        };
848        let mut stack = vec![commit.tree];
849        while let Some(t) = stack.pop() {
850            if !out.insert(t) {
851                continue;
852            }
853            let obj = repo.odb.read(&t)?;
854            if obj.kind != ObjectKind::Tree {
855                continue;
856            }
857            let entries = parse_tree(&obj.data)?;
858            for e in entries {
859                if e.mode == 0o160000 {
860                    continue;
861                }
862                out.insert(e.oid);
863                if e.mode == 0o040000 {
864                    stack.push(e.oid);
865                }
866            }
867        }
868    }
869    Ok(out)
870}
871
872fn load_commit_data(repo: &Repository, oid: ObjectId) -> Result<crate::objects::CommitData> {
873    let object = repo.odb.read(&oid)?;
874    if object.kind != ObjectKind::Commit {
875        return Err(Error::CorruptObject(format!("{oid} is not a commit")));
876    }
877    parse_commit(&object.data)
878}
879
880fn mark_uninteresting_trees(
881    repo: &Repository,
882    graph: &mut CommitGraph<'_>,
883    interesting: &HashSet<ObjectId>,
884    excluded: &HashSet<ObjectId>,
885    exclude_tips: &HashSet<ObjectId>,
886    opts: &PathWalkOptions,
887    ctx: &mut PathWalkContext<'_>,
888) -> Result<()> {
889    if !opts.prune_all_uninteresting && !opts.edge_aggressive {
890        return Ok(());
891    }
892    let mut queue: VecDeque<ObjectId> = interesting.iter().copied().collect();
893    let mut seen_edge_commit = HashSet::new();
894    while let Some(c) = queue.pop_front() {
895        let parents = graph.parents_of(c)?;
896        for p in parents {
897            if interesting.contains(&p) {
898                continue;
899            }
900            if !excluded.contains(&p) {
901                continue;
902            }
903            if !seen_edge_commit.insert(p) {
904                continue;
905            }
906            let commit = load_commit_data(repo, p)?;
907            ctx.uninteresting_object.insert(commit.tree);
908            queue.push_back(p);
909        }
910    }
911    // Git `mark_edges_uninteresting`: `edge_hint_aggressive` marks trees only for commits that
912    // appear on the revision cmdline as UNINTERESTING (negative tips), not the whole excluded
913    // ancestry closure.
914    if opts.edge_aggressive {
915        for &c in exclude_tips {
916            if seen_edge_commit.contains(&c) {
917                continue;
918            }
919            let Ok(commit) = load_commit_data(repo, c) else {
920                continue;
921            };
922            ctx.uninteresting_object.insert(commit.tree);
923        }
924    }
925    Ok(())
926}
927
928fn setup_pending_objects(
929    repo: &Repository,
930    roots: &[ObjectWalkRoot],
931    extra_tag_refs: &[ObjectId],
932    opts: &PathWalkOptions,
933    ctx: &mut PathWalkContext<'_>,
934) -> Result<()> {
935    let mut tag_oids: Vec<ObjectId> = Vec::new();
936    let mut tagged_blob_oids: Vec<ObjectId> = Vec::new();
937    let mut tag_seen: HashSet<ObjectId> = HashSet::new();
938
939    for &oid in extra_tag_refs {
940        if ctx.seen_object.contains(&oid) {
941            continue;
942        }
943        let obj = repo.odb.read(&oid)?;
944        match obj.kind {
945            ObjectKind::Tag | ObjectKind::Commit => {
946                if tag_seen.insert(oid) {
947                    tag_oids.push(oid);
948                }
949            }
950            ObjectKind::Tree if opts.include_trees => {
951                ctx.seen_object.insert(oid);
952                ctx.append_root_tree(oid)?;
953            }
954            ObjectKind::Blob if opts.include_blobs => {
955                ctx.seen_object.insert(oid);
956                tagged_blob_oids.push(oid);
957            }
958            _ => {}
959        }
960    }
961
962    for root in roots {
963        let mut oid = root.oid;
964        let mut obj = repo.odb.read(&oid)?;
965        while obj.kind == ObjectKind::Tag {
966            if ctx.seen_object.contains(&oid) {
967                break;
968            }
969            ctx.seen_object.insert(oid);
970            if opts.include_tags && tag_seen.insert(oid) {
971                tag_oids.push(oid);
972            }
973            let tag = parse_tag(&obj.data)?;
974            oid = tag.object;
975            obj = repo.odb.read(&oid)?;
976        }
977        if obj.kind == ObjectKind::Tag {
978            continue;
979        }
980        if !ctx.seen_object.insert(oid) {
981            continue;
982        }
983        match obj.kind {
984            ObjectKind::Tree if opts.include_trees => {
985                if let Some(p) = &root.root_path {
986                    let trimmed = p.trim_end_matches('/');
987                    let path = if trimmed.is_empty() {
988                        "/".to_string()
989                    } else {
990                        format!("{trimmed}/")
991                    };
992                    ctx.add_path_pending(path, ObjectKind::Tree, oid, true)?;
993                } else {
994                    ctx.ensure_root_list();
995                    ctx.paths
996                        .get_mut(ROOT_PATH)
997                        .ok_or_else(|| Error::CorruptObject("root path list missing".to_owned()))?
998                        .oids
999                        .push(oid);
1000                    ctx.push_heap(ROOT_PATH);
1001                }
1002            }
1003            ObjectKind::Blob if opts.include_blobs => {
1004                if let Some(p) = &root.root_path {
1005                    ctx.add_path_pending(p.clone(), ObjectKind::Blob, oid, true)?;
1006                } else {
1007                    tagged_blob_oids.push(oid);
1008                }
1009            }
1010            ObjectKind::Commit => {}
1011            _ => {}
1012        }
1013    }
1014
1015    if opts.include_blobs && !tagged_blob_oids.is_empty() {
1016        let list = TypeOidList {
1017            kind: ObjectKind::Blob,
1018            oids: tagged_blob_oids,
1019            maybe_interesting: true,
1020        };
1021        ctx.paths.insert(TAGGED_BLOBS_PATH.to_string(), list);
1022        ctx.push_heap(TAGGED_BLOBS_PATH);
1023    }
1024    if opts.include_tags && !tag_oids.is_empty() {
1025        let list = TypeOidList {
1026            kind: ObjectKind::Tag,
1027            oids: tag_oids,
1028            maybe_interesting: true,
1029        };
1030        ctx.paths.insert(TAG_PATH.to_string(), list);
1031        ctx.push_heap(TAG_PATH);
1032    }
1033    Ok(())
1034}
1035
1036/// Parse `test-tool path-walk` argv after the subcommand name.
1037///
1038/// Returns options, positive revision specs, negative revision specs, stdin `--all`, and `--boundary`.
1039pub fn parse_path_walk_cli(
1040    git_dir: &Path,
1041    args: &[String],
1042) -> Result<(PathWalkOptions, Vec<String>, Vec<String>, bool, bool)> {
1043    let mut opts = PathWalkOptions::default();
1044    let mut positive = Vec::new();
1045    let mut negative = Vec::new();
1046    let mut stdin_all = false;
1047    let mut boundary_flag = false;
1048    let mut after_dd = false;
1049    let mut not_mode = false;
1050
1051    let mut i = 0usize;
1052    while i < args.len() {
1053        let a = &args[i];
1054        if !after_dd && a == "--" {
1055            after_dd = true;
1056            i += 1;
1057            continue;
1058        }
1059        if !after_dd {
1060            match a.as_str() {
1061                "--stdin-pl" => {
1062                    let mut buf = String::new();
1063                    std::io::stdin().read_to_string(&mut buf)?;
1064                    let lines: Vec<String> = buf
1065                        .lines()
1066                        .map(str::trim)
1067                        .filter(|l| !l.is_empty() && !l.starts_with('#'))
1068                        .map(String::from)
1069                        .collect();
1070                    if !lines.is_empty() {
1071                        opts.sparse_pattern_lines = Some(lines);
1072                    }
1073                }
1074                "--prune" => opts.prune_all_uninteresting = true,
1075                "--edge-aggressive" => opts.edge_aggressive = true,
1076                "--no-blobs" => opts.include_blobs = false,
1077                "--no-trees" => opts.include_trees = false,
1078                "--no-commits" => opts.include_commits = false,
1079                "--no-tags" => opts.include_tags = false,
1080                "--blobs" => opts.include_blobs = true,
1081                "--trees" => opts.include_trees = true,
1082                "--commits" => opts.include_commits = true,
1083                "--tags" => opts.include_tags = true,
1084                "--stdin" => {
1085                    let (pos, neg, all, _stdin_paths) =
1086                        collect_revision_specs_with_stdin(git_dir, &[], true)?;
1087                    stdin_all |= all;
1088                    positive.extend(pos);
1089                    negative.extend(neg);
1090                }
1091                _ => {
1092                    return Err(Error::InvalidRef(format!(
1093                        "path-walk: unknown option '{a}'"
1094                    )));
1095                }
1096            }
1097        } else if a == "--not" {
1098            not_mode = !not_mode;
1099        } else if a == "--boundary" {
1100            boundary_flag = true;
1101        } else if matches!(a.as_str(), "--all" | "--indexed-objects" | "--branches") {
1102            if not_mode {
1103                negative.push(a.clone());
1104            } else {
1105                positive.push(a.clone());
1106            }
1107        } else {
1108            let (p, n) = crate::rev_list::split_revision_token(a);
1109            if not_mode {
1110                negative.extend(p);
1111                positive.extend(n);
1112            } else {
1113                positive.extend(p);
1114                negative.extend(n);
1115            }
1116        }
1117        i += 1;
1118    }
1119
1120    Ok((opts, positive, negative, stdin_all, boundary_flag))
1121}