1use std::collections::{BTreeSet, BinaryHeap, HashMap, HashSet, VecDeque};
7use std::io::Read;
8use std::path::Path;
9
10use crate::error::{Error, Result};
11use crate::objects::{parse_commit, parse_tag, parse_tree, ObjectId, ObjectKind};
12use crate::refs;
13use crate::repo::Repository;
14use crate::rev_list::{
15 collect_revision_specs_with_stdin, date_order_walk, resolve_object_walk_roots,
16 resolve_revision_commits, walk_closure, walk_closure_ordered, CommitGraph, ObjectWalkRoot,
17};
18use crate::sparse_checkout::{path_in_sparse_checkout_patterns, ConePatterns};
19
20const ROOT_PATH: &str = "";
21const TAG_PATH: &str = "/tags";
22const TAGGED_BLOBS_PATH: &str = "/tagged-blobs";
23
24#[derive(Debug, Clone)]
26pub struct PathWalkOptions {
27 pub include_commits: bool,
28 pub include_trees: bool,
29 pub include_blobs: bool,
30 pub include_tags: bool,
31 pub prune_all_uninteresting: bool,
32 pub edge_aggressive: bool,
33 pub cone_patterns: Option<ConePatterns>,
34 pub sparse_pattern_lines: Option<Vec<String>>,
36}
37
38impl Default for PathWalkOptions {
39 fn default() -> Self {
40 Self {
41 include_commits: true,
42 include_trees: true,
43 include_blobs: true,
44 include_tags: true,
45 prune_all_uninteresting: false,
46 edge_aggressive: false,
47 cone_patterns: None,
48 sparse_pattern_lines: None,
49 }
50 }
51}
52
53#[derive(Debug, Clone)]
55pub struct PathWalkLine {
56 pub batch: u64,
57 pub object_kind: ObjectKind,
58 pub path: String,
59 pub oid: ObjectId,
60 pub uninteresting: bool,
61}
62
63#[derive(Debug, Clone, Default)]
64pub struct PathWalkCounts {
65 pub commits: u64,
66 pub trees: u64,
67 pub blobs: u64,
68 pub tags: u64,
69}
70
71pub fn walk_objects_by_path(
76 repo: &Repository,
77 positive_specs: &[String],
78 negative_specs: &[String],
79 stdin_all: bool,
80 boundary: bool,
81 opts: &PathWalkOptions,
82) -> Result<(Vec<PathWalkLine>, PathWalkCounts)> {
83 let mut graph = CommitGraph::new(repo, false);
84 let mut all_refs = stdin_all;
85 let mut filtered: Vec<String> = Vec::new();
86 let mut want_indexed_objects = false;
87 for p in positive_specs {
88 match p.as_str() {
89 "--all" => all_refs = true,
90 "--indexed-objects" => want_indexed_objects = true,
91 "--branches" => filtered.extend(expand_branches_refs(repo)?),
92 _ => filtered.push(p.clone()),
93 }
94 }
95 let mut neg_resolved: Vec<String> = Vec::new();
96 for n in negative_specs {
97 match n.as_str() {
98 "--all" => {
99 return Err(Error::InvalidRef(
100 "--all is not valid in negative revision list".to_owned(),
101 ));
102 }
103 "--indexed-objects" => {
104 return Err(Error::InvalidRef(
105 "--indexed-objects is not valid in negative revision list".to_owned(),
106 ));
107 }
108 "--branches" => neg_resolved.extend(expand_branches_refs(repo)?),
109 _ => neg_resolved.push(n.clone()),
110 }
111 }
112 if all_refs {
113 filtered.extend(all_ref_commits(repo)?);
114 }
115 if filtered.is_empty() && !want_indexed_objects && !all_refs {
116 return Err(Error::InvalidRef("no revisions specified".to_owned()));
117 }
118 let (commit_tips, mut object_roots) = if filtered.is_empty() {
119 (Vec::new(), Vec::new())
120 } else {
121 resolve_object_walk_roots(repo, &filtered)?
122 };
123 object_roots.extend(tag_object_roots_from_spec_names(repo, &filtered)?);
124 if want_indexed_objects {
125 object_roots.extend(indexed_blob_roots(repo)?);
126 }
127
128 let extra_tag_ref_targets: Vec<ObjectId> = if all_refs {
129 tag_ref_direct_targets(repo)?
130 } else {
131 Vec::new()
132 };
133 let exclude = resolve_revision_commits(repo, &neg_resolved)?;
134 let exclude_tips: HashSet<ObjectId> = exclude.iter().copied().collect();
135 let excluded: HashSet<ObjectId> = if exclude.is_empty() {
136 HashSet::new()
137 } else {
138 walk_closure(&mut graph, &exclude)?
139 };
140
141 let (included_commits, _) = if commit_tips.is_empty() {
142 (HashSet::new(), Vec::new())
143 } else {
144 walk_closure_ordered(&mut graph, &commit_tips)?
145 };
146 let mut interesting_commits: HashSet<ObjectId> = included_commits
147 .iter()
148 .copied()
149 .filter(|c| !excluded.contains(c))
150 .collect();
151
152 let boundary_commits: HashSet<ObjectId> = if boundary {
153 let inc: HashSet<ObjectId> = interesting_commits.iter().copied().collect();
154 let mut bset = HashSet::new();
155 for &oid in &interesting_commits {
156 for p in graph.parents_of(oid)? {
157 if !inc.contains(&p) && excluded.contains(&p) {
158 bset.insert(p);
159 }
160 }
161 }
162 interesting_commits.extend(bset.iter().copied());
163 bset
164 } else {
165 HashSet::new()
166 };
167
168 let mut uninteresting_commits: HashSet<ObjectId> = excluded.iter().copied().collect();
169 uninteresting_commits.retain(|c| interesting_commits.contains(c));
170
171 let excluded_objects: HashSet<ObjectId> = tree_blob_closure_from_commits(repo, &excluded)?;
172 let excluded_blob_paths = excluded_blob_paths_from_commits(repo, &excluded)?;
173
174 let mut ctx = PathWalkContext {
175 repo,
176 opts,
177 paths: HashMap::new(),
178 heap: BinaryHeap::new(),
179 pushed: HashSet::new(),
180 seen_object: HashSet::new(),
181 uninteresting_object: HashSet::new(),
182 excluded_objects,
183 excluded_blob_paths,
184 heap_seq: 0,
185 batch: 0,
186 lines: Vec::new(),
187 counts: PathWalkCounts::default(),
188 };
189
190 mark_uninteresting_trees(
191 repo,
192 &mut graph,
193 &interesting_commits,
194 &excluded,
195 &exclude_tips,
196 opts,
197 &mut ctx,
198 )?;
199
200 if opts.include_trees {
201 ctx.ensure_root_list();
202 ctx.push_heap(ROOT_PATH);
203 }
204
205 setup_pending_objects(repo, &object_roots, &extra_tag_ref_targets, opts, &mut ctx)?;
206
207 let ordered_commits = date_order_walk(&mut graph, &interesting_commits, &[], false)?;
209
210 let mut commit_oids: Vec<ObjectId> = Vec::new();
211 for c in ordered_commits {
212 if !interesting_commits.contains(&c) {
213 continue;
214 }
215 if opts.include_commits {
216 commit_oids.push(c);
217 }
218 if !opts.include_trees && !opts.include_blobs {
219 continue;
220 }
221 let commit = load_commit_data(repo, c)?;
222 let tree_oid = commit.tree;
223 if ctx.seen_object.contains(&tree_oid) {
224 continue;
225 }
226 ctx.seen_object.insert(tree_oid);
227 if ctx.excluded_objects.contains(&tree_oid) {
228 ctx.uninteresting_object.insert(tree_oid);
229 }
230 ctx.append_root_tree(tree_oid)?;
231 }
232
233 if opts.edge_aggressive && opts.include_trees {
234 for &tip in &exclude_tips {
235 let commit = load_commit_data(repo, tip)?;
236 let tree_oid = commit.tree;
237 if ctx.seen_object.contains(&tree_oid) {
238 continue;
239 }
240 ctx.seen_object.insert(tree_oid);
241 ctx.uninteresting_object.insert(tree_oid);
242 ctx.append_root_tree(tree_oid)?;
243 }
244 }
245
246 if opts.include_commits && !commit_oids.is_empty() {
247 ctx.emit_commit_batch(&commit_oids, &uninteresting_commits, &boundary_commits)?;
248 }
249
250 ctx.drain_heap(&mut graph)?;
251
252 if !ctx.paths.is_empty() {
253 for key in ctx.paths.keys().cloned().collect::<Vec<_>>() {
254 ctx.push_heap(&key);
255 }
256 ctx.drain_heap(&mut graph)?;
257 }
258
259 Ok((ctx.lines, ctx.counts))
260}
261
262fn tag_object_roots_from_spec_names(
266 repo: &Repository,
267 specs: &[String],
268) -> Result<Vec<ObjectWalkRoot>> {
269 let mut out = Vec::new();
270 let mut seen = HashSet::new();
271 for spec in specs {
272 if spec.contains("..") || spec.starts_with('^') || spec == "HEAD" {
273 continue;
274 }
275 if spec.len() == 40 && spec.chars().all(|c| c.is_ascii_hexdigit()) {
276 continue;
277 }
278 let refname = if spec.starts_with("refs/") {
279 spec.clone()
280 } else {
281 format!("refs/tags/{spec}")
282 };
283 let Ok(oid) = refs::resolve_ref(&repo.git_dir, &refname) else {
284 continue;
285 };
286 let obj = repo.odb.read(&oid)?;
287 if obj.kind != ObjectKind::Tag {
288 continue;
289 }
290 if seen.insert(oid) {
291 out.push(ObjectWalkRoot {
292 oid,
293 input: spec.clone(),
294 root_path: None,
295 });
296 }
297 }
298 Ok(out)
299}
300
301fn tag_ref_direct_targets(repo: &Repository) -> Result<Vec<ObjectId>> {
302 let mut out = Vec::new();
303 for (name, _) in refs::list_refs(&repo.git_dir, "refs/tags/")? {
304 let oid = refs::resolve_ref(&repo.git_dir, &name)?;
305 out.push(oid);
306 }
307 Ok(out)
308}
309
310fn all_ref_commits(repo: &Repository) -> Result<Vec<String>> {
311 let mut specs = Vec::new();
312 specs.push("HEAD".to_owned());
313 for (name, _) in refs::list_refs(&repo.git_dir, "refs/")? {
314 specs.push(name);
315 }
316 Ok(specs)
317}
318
319pub fn expand_branches_refs(repo: &Repository) -> Result<Vec<String>> {
320 let mut out = Vec::new();
321 for (_, oid) in refs::list_refs(&repo.git_dir, "refs/heads/")? {
322 out.push(oid.to_hex());
323 }
324 Ok(out)
325}
326
327fn indexed_blob_roots(repo: &Repository) -> Result<Vec<ObjectWalkRoot>> {
330 let Some(_) = &repo.work_tree else {
331 return Ok(Vec::new());
332 };
333 let index_path = repo.git_dir.join("index");
334 if !index_path.is_file() {
335 return Ok(Vec::new());
336 }
337 let idx = crate::index::Index::load(&index_path)?;
338 let mut out = Vec::new();
339 if let Some(root) = idx.cache_tree_root {
340 out.push(ObjectWalkRoot {
341 oid: root,
342 input: String::new(),
343 root_path: None,
344 });
345 }
346
347 let head_tree = head_tree_oid(repo).ok();
348 let mut file_entries: Vec<(String, ObjectId)> = Vec::new();
349 for e in &idx.entries {
350 if e.stage() != 0 {
351 continue;
352 }
353 if e.mode == 0o160000 || e.mode == crate::index::MODE_TREE {
354 continue;
355 }
356 let path_str = String::from_utf8_lossy(&e.path).into_owned();
357 file_entries.push((path_str.clone(), e.oid));
358 out.push(ObjectWalkRoot {
359 oid: e.oid,
360 input: format!(":{path_str}"),
361 root_path: Some(path_str),
362 });
363 }
364
365 let mut prefixes: BTreeSet<String> = BTreeSet::new();
366 for (path, _) in &file_entries {
367 let mut end = path.len();
368 while end > 0 {
369 if let Some(pos) = path[..end].rfind('/') {
370 prefixes.insert(path[..pos].to_string());
371 end = pos;
372 } else {
373 break;
374 }
375 }
376 }
377
378 let mut recovery_added: HashSet<String> = HashSet::new();
379 let Some(ht) = head_tree else {
380 return Ok(out);
381 };
382 for pref in prefixes {
383 if pref.is_empty() {
384 continue;
385 }
386 let mut any_under = false;
387 let mut all_match = true;
388 for (path, oid) in &file_entries {
389 let under = if path == &pref {
390 true
391 } else if let Some(rest) = path.strip_prefix(&pref) {
392 rest.starts_with('/')
393 } else {
394 false
395 };
396 if !under {
397 continue;
398 }
399 any_under = true;
400 match crate::rev_parse::resolve_treeish_path(repo, ht, path.as_str()) {
401 Ok(h) if h == *oid => {}
402 _ => {
403 all_match = false;
404 break;
405 }
406 }
407 }
408 if !any_under || !all_match {
409 continue;
410 }
411 let tree_oid = crate::rev_parse::resolve_treeish_path(repo, ht, pref.as_str())?;
412 if !recovery_added.insert(pref.clone()) {
413 continue;
414 }
415 out.push(ObjectWalkRoot {
416 oid: tree_oid,
417 input: String::new(),
418 root_path: Some(pref),
419 });
420 }
421
422 Ok(out)
423}
424
425fn head_tree_oid(repo: &Repository) -> Result<ObjectId> {
426 let head = refs::resolve_ref(&repo.git_dir, "HEAD")?;
427 let obj = repo.odb.read(&head)?;
428 if obj.kind != ObjectKind::Commit {
429 return Err(Error::InvalidRef("HEAD is not a commit".to_owned()));
430 }
431 let c = parse_commit(&obj.data)?;
432 Ok(c.tree)
433}
434
435struct TypeOidList {
436 kind: ObjectKind,
437 oids: Vec<ObjectId>,
438 maybe_interesting: bool,
439}
440
441struct PathWalkContext<'a> {
442 repo: &'a Repository,
443 opts: &'a PathWalkOptions,
444 paths: HashMap<String, TypeOidList>,
445 heap: BinaryHeap<std::cmp::Reverse<PathHeapItem>>,
446 pushed: HashSet<String>,
447 seen_object: HashSet<ObjectId>,
448 uninteresting_object: HashSet<ObjectId>,
449 excluded_objects: HashSet<ObjectId>,
450 excluded_blob_paths: HashSet<(String, ObjectId)>,
451 heap_seq: u64,
452 batch: u64,
453 lines: Vec<PathWalkLine>,
454 counts: PathWalkCounts,
455}
456
457#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Debug)]
459struct PathHeapItem {
460 type_rank: u8,
461 path: String,
462 seq: u64,
463}
464
465fn type_rank(kind: ObjectKind) -> u8 {
466 match kind {
467 ObjectKind::Tag => 0,
468 ObjectKind::Blob => 1,
469 ObjectKind::Tree => 2,
470 ObjectKind::Commit => 3,
471 }
472}
473
474impl<'a> PathWalkContext<'a> {
475 fn ensure_root_list(&mut self) {
476 self.paths
477 .entry(ROOT_PATH.to_string())
478 .or_insert_with(|| TypeOidList {
479 kind: ObjectKind::Tree,
480 oids: Vec::new(),
481 maybe_interesting: true,
482 });
483 }
484
485 fn push_heap(&mut self, path: &str) {
486 if !self.pushed.insert(path.to_string()) {
487 return;
488 }
489 let Some(list) = self.paths.get(path) else {
490 return;
491 };
492 let seq = self.heap_seq;
493 self.heap_seq = self.heap_seq.wrapping_add(1);
494 self.heap.push(std::cmp::Reverse(PathHeapItem {
495 type_rank: type_rank(list.kind),
496 path: path.to_string(),
497 seq,
498 }));
499 }
500
501 fn append_root_tree(&mut self, oid: ObjectId) -> Result<()> {
502 self.ensure_root_list();
503 let list = self
504 .paths
505 .get_mut(ROOT_PATH)
506 .ok_or_else(|| Error::CorruptObject("root path list missing".to_owned()))?;
507 list.oids.push(oid);
508 self.push_heap(ROOT_PATH);
509 Ok(())
510 }
511
512 fn add_path(
513 &mut self,
514 path: String,
515 kind: ObjectKind,
516 oid: ObjectId,
517 interesting: bool,
518 ) -> Result<()> {
519 self.add_path_inner(path, kind, oid, interesting, true)
520 }
521
522 fn add_path_pending(
528 &mut self,
529 path: String,
530 kind: ObjectKind,
531 oid: ObjectId,
532 interesting: bool,
533 ) -> Result<()> {
534 self.add_path_inner(path, kind, oid, interesting, false)
535 }
536
537 fn add_path_inner(
538 &mut self,
539 path: String,
540 kind: ObjectKind,
541 oid: ObjectId,
542 interesting: bool,
543 enqueue: bool,
544 ) -> Result<()> {
545 let list = self
546 .paths
547 .entry(path.clone())
548 .or_insert_with(|| TypeOidList {
549 kind,
550 oids: Vec::new(),
551 maybe_interesting: false,
552 });
553 if list.kind != kind {
554 return Err(Error::CorruptObject(format!(
555 "path-walk: inconsistent types at path {path:?}"
556 )));
557 }
558 list.maybe_interesting |= interesting;
559 list.oids.push(oid);
560 if enqueue {
561 self.push_heap(&path);
562 }
563 Ok(())
564 }
565
566 fn cone_allows_tree_path(&self, path_with_slash: &str) -> bool {
567 if let Some(lines) = &self.opts.sparse_pattern_lines {
568 return path_in_sparse_checkout_patterns(path_with_slash, lines, true);
569 }
570 if let Some(cone) = &self.opts.cone_patterns {
571 let trimmed = path_with_slash.trim_end_matches('/');
572 if cone.path_included(trimmed) {
573 return true;
574 }
575 return cone.path_included(path_with_slash);
576 }
577 true
578 }
579
580 fn cone_allows_blob_path(&self, path: &str) -> bool {
581 if let Some(lines) = &self.opts.sparse_pattern_lines {
582 return path_in_sparse_checkout_patterns(path, lines, true);
583 }
584 if let Some(cone) = &self.opts.cone_patterns {
585 return cone.path_included(path);
586 }
587 true
588 }
589
590 fn emit_commit_batch(
591 &mut self,
592 oids: &[ObjectId],
593 uninteresting: &HashSet<ObjectId>,
594 boundary: &HashSet<ObjectId>,
595 ) -> Result<()> {
596 let batch = self.batch;
597 self.batch += 1;
598 for &oid in oids {
599 let u = uninteresting.contains(&oid) || boundary.contains(&oid);
600 self.lines.push(PathWalkLine {
601 batch,
602 object_kind: ObjectKind::Commit,
603 path: ROOT_PATH.to_string(),
604 oid,
605 uninteresting: u,
606 });
607 self.counts.commits += 1;
608 }
609 Ok(())
610 }
611
612 fn drain_heap(&mut self, graph: &mut CommitGraph<'_>) -> Result<()> {
613 while let Some(std::cmp::Reverse(item)) = self.heap.pop() {
614 self.walk_path(&item.path, graph)?;
615 }
616 Ok(())
617 }
618
619 fn walk_path(&mut self, path: &str, graph: &mut CommitGraph<'_>) -> Result<()> {
620 let Some(mut list) = self.paths.remove(path) else {
621 return Ok(());
622 };
623 if list.oids.is_empty() {
624 return Ok(());
625 }
626
627 if self.opts.prune_all_uninteresting {
628 if !list.maybe_interesting {
629 return Ok(());
630 }
631 list.maybe_interesting = false;
632 for oid in &list.oids {
633 if self.object_is_interesting_at_path(path, *oid, list.kind) {
634 list.maybe_interesting = true;
635 break;
636 }
637 }
638 if !list.maybe_interesting {
639 return Ok(());
640 }
641 }
642
643 let emit = match list.kind {
644 ObjectKind::Tree => self.opts.include_trees,
645 ObjectKind::Blob => self.opts.include_blobs,
646 ObjectKind::Tag => self.opts.include_tags,
647 _ => false,
648 };
649 let batch = if emit {
650 let b = self.batch;
651 self.batch += 1;
652 b
653 } else {
654 0
655 };
656
657 match list.kind {
658 ObjectKind::Tree if self.opts.include_trees => {
659 for &oid in &list.oids {
660 let u = !self.tree_is_interesting(oid);
661 self.lines.push(PathWalkLine {
662 batch,
663 object_kind: ObjectKind::Tree,
664 path: path.to_string(),
665 oid,
666 uninteresting: u,
667 });
668 self.counts.trees += 1;
669 }
670 }
671 ObjectKind::Blob if self.opts.include_blobs => {
672 for &oid in &list.oids {
673 let u = !self.blob_is_interesting_at_path(path, oid);
674 self.lines.push(PathWalkLine {
675 batch,
676 object_kind: ObjectKind::Blob,
677 path: path.to_string(),
678 oid,
679 uninteresting: u,
680 });
681 self.counts.blobs += 1;
682 }
683 }
684 ObjectKind::Tag if self.opts.include_tags => {
685 for &oid in &list.oids {
686 self.lines.push(PathWalkLine {
687 batch,
688 object_kind: ObjectKind::Tag,
689 path: path.to_string(),
690 oid,
691 uninteresting: false,
692 });
693 self.counts.tags += 1;
694 }
695 }
696 _ => {}
697 }
698
699 if list.kind == ObjectKind::Tree {
700 let tree_oids = list.oids.clone();
701 for tree_oid in tree_oids {
702 self.add_tree_entries(path, tree_oid, graph)?;
703 }
704 }
705
706 Ok(())
707 }
708
709 fn add_tree_entries(
710 &mut self,
711 base_path: &str,
712 tree_oid: ObjectId,
713 _graph: &mut CommitGraph<'_>,
714 ) -> Result<()> {
715 let obj = self.repo.odb.read(&tree_oid)?;
716 if obj.kind != ObjectKind::Tree {
717 return Err(Error::CorruptObject(format!("{tree_oid} is not a tree")));
718 }
719 let parent_uninteresting = !self.tree_is_interesting(tree_oid);
720 let entries = parse_tree(&obj.data)?;
721 for entry in entries {
722 if entry.mode == 0o160000 {
723 continue;
724 }
725 let is_tree = entry.mode == 0o040000;
726 if !self.opts.include_blobs && !is_tree {
727 continue;
728 }
729 let name = String::from_utf8_lossy(&entry.name);
730 let child_oid = entry.oid;
731 if self.seen_object.contains(&child_oid) {
732 continue;
733 }
734 self.seen_object.insert(child_oid);
735 if parent_uninteresting {
736 self.uninteresting_object.insert(child_oid);
737 }
738 if is_tree {
739 let rel = if base_path.is_empty() {
740 format!("{name}/")
741 } else {
742 format!("{base_path}{name}/")
743 };
744 if !self.cone_allows_tree_path(&rel) {
745 continue;
746 }
747 self.add_path(
748 rel,
749 ObjectKind::Tree,
750 child_oid,
751 self.tree_is_interesting(child_oid) || !parent_uninteresting,
752 )?;
753 } else {
754 let rel = if base_path.is_empty() {
755 name.into_owned()
756 } else {
757 format!("{base_path}{}", name.as_ref())
758 };
759 if !self.cone_allows_blob_path(&rel) {
760 continue;
761 }
762 let blob_interesting =
763 self.blob_is_interesting_at_path(&rel, child_oid) || !parent_uninteresting;
764 self.add_path(rel, ObjectKind::Blob, child_oid, blob_interesting)?;
765 }
766 }
767 Ok(())
768 }
769
770 fn object_is_interesting_at_path(&self, path: &str, oid: ObjectId, kind: ObjectKind) -> bool {
771 match kind {
772 ObjectKind::Blob => self.blob_is_interesting_at_path(path, oid),
773 ObjectKind::Tree => self.tree_is_interesting(oid),
774 _ => true,
775 }
776 }
777
778 fn tree_is_interesting(&self, oid: ObjectId) -> bool {
779 !self.uninteresting_object.contains(&oid) && !self.excluded_objects.contains(&oid)
780 }
781
782 fn blob_is_interesting_at_path(&self, path: &str, oid: ObjectId) -> bool {
783 !self.uninteresting_object.contains(&oid)
784 && !self.excluded_blob_paths.contains(&(path.to_string(), oid))
785 }
786}
787
788fn excluded_blob_paths_from_commits(
790 repo: &Repository,
791 commits: &HashSet<ObjectId>,
792) -> Result<HashSet<(String, ObjectId)>> {
793 let mut out = HashSet::new();
794 for &c in commits {
795 let commit = match load_commit_data(repo, c) {
796 Ok(co) => co,
797 Err(_) => continue,
798 };
799 collect_blob_paths(repo, commit.tree, "", &mut out)?;
800 }
801 Ok(out)
802}
803
804fn collect_blob_paths(
805 repo: &Repository,
806 tree_oid: ObjectId,
807 base: &str,
808 into: &mut HashSet<(String, ObjectId)>,
809) -> Result<()> {
810 let obj = repo.odb.read(&tree_oid)?;
811 if obj.kind != ObjectKind::Tree {
812 return Ok(());
813 }
814 let entries = parse_tree(&obj.data)?;
815 for e in entries {
816 if e.mode == 0o160000 {
817 continue;
818 }
819 let name = String::from_utf8_lossy(&e.name);
820 if e.mode == 0o040000 {
821 let next_base = if base.is_empty() {
822 format!("{name}/")
823 } else {
824 format!("{base}{name}/")
825 };
826 collect_blob_paths(repo, e.oid, &next_base, into)?;
827 } else {
828 let path = if base.is_empty() {
829 name.into_owned()
830 } else {
831 format!("{base}{}", name.as_ref())
832 };
833 into.insert((path, e.oid));
834 }
835 }
836 Ok(())
837}
838
839fn tree_blob_closure_from_commits(
840 repo: &Repository,
841 commits: &HashSet<ObjectId>,
842) -> Result<HashSet<ObjectId>> {
843 let mut out = HashSet::new();
844 for &c in commits {
845 let commit = match load_commit_data(repo, c) {
846 Ok(co) => co,
847 Err(_) => continue,
848 };
849 let mut stack = vec![commit.tree];
850 while let Some(t) = stack.pop() {
851 if !out.insert(t) {
852 continue;
853 }
854 let obj = repo.odb.read(&t)?;
855 if obj.kind != ObjectKind::Tree {
856 continue;
857 }
858 let entries = parse_tree(&obj.data)?;
859 for e in entries {
860 if e.mode == 0o160000 {
861 continue;
862 }
863 out.insert(e.oid);
864 if e.mode == 0o040000 {
865 stack.push(e.oid);
866 }
867 }
868 }
869 }
870 Ok(out)
871}
872
873fn load_commit_data(repo: &Repository, oid: ObjectId) -> Result<crate::objects::CommitData> {
874 let object = repo.odb.read(&oid)?;
875 if object.kind != ObjectKind::Commit {
876 return Err(Error::CorruptObject(format!("{oid} is not a commit")));
877 }
878 parse_commit(&object.data)
879}
880
881fn mark_uninteresting_trees(
882 repo: &Repository,
883 graph: &mut CommitGraph<'_>,
884 interesting: &HashSet<ObjectId>,
885 excluded: &HashSet<ObjectId>,
886 exclude_tips: &HashSet<ObjectId>,
887 opts: &PathWalkOptions,
888 ctx: &mut PathWalkContext<'_>,
889) -> Result<()> {
890 if !opts.prune_all_uninteresting && !opts.edge_aggressive {
891 return Ok(());
892 }
893 let mut queue: VecDeque<ObjectId> = interesting.iter().copied().collect();
894 let mut seen_edge_commit = HashSet::new();
895 while let Some(c) = queue.pop_front() {
896 let parents = graph.parents_of(c)?;
897 for p in parents {
898 if interesting.contains(&p) {
899 continue;
900 }
901 if !excluded.contains(&p) {
902 continue;
903 }
904 if !seen_edge_commit.insert(p) {
905 continue;
906 }
907 let commit = load_commit_data(repo, p)?;
908 ctx.uninteresting_object.insert(commit.tree);
909 queue.push_back(p);
910 }
911 }
912 if opts.edge_aggressive {
916 for &c in exclude_tips {
917 if seen_edge_commit.contains(&c) {
918 continue;
919 }
920 let Ok(commit) = load_commit_data(repo, c) else {
921 continue;
922 };
923 ctx.uninteresting_object.insert(commit.tree);
924 }
925 }
926 Ok(())
927}
928
929fn setup_pending_objects(
930 repo: &Repository,
931 roots: &[ObjectWalkRoot],
932 extra_tag_refs: &[ObjectId],
933 opts: &PathWalkOptions,
934 ctx: &mut PathWalkContext<'_>,
935) -> Result<()> {
936 let mut tag_oids: Vec<ObjectId> = Vec::new();
937 let mut tagged_blob_oids: Vec<ObjectId> = Vec::new();
938 let mut tag_seen: HashSet<ObjectId> = HashSet::new();
939
940 for &oid in extra_tag_refs {
941 if ctx.seen_object.contains(&oid) {
942 continue;
943 }
944 let obj = repo.odb.read(&oid)?;
945 match obj.kind {
946 ObjectKind::Tag | ObjectKind::Commit => {
947 if tag_seen.insert(oid) {
948 tag_oids.push(oid);
949 }
950 }
951 ObjectKind::Tree if opts.include_trees => {
952 ctx.seen_object.insert(oid);
953 ctx.append_root_tree(oid)?;
954 }
955 ObjectKind::Blob if opts.include_blobs => {
956 ctx.seen_object.insert(oid);
957 tagged_blob_oids.push(oid);
958 }
959 _ => {}
960 }
961 }
962
963 for root in roots {
964 let mut oid = root.oid;
965 let mut obj = repo.odb.read(&oid)?;
966 while obj.kind == ObjectKind::Tag {
967 if ctx.seen_object.contains(&oid) {
968 break;
969 }
970 ctx.seen_object.insert(oid);
971 if opts.include_tags && tag_seen.insert(oid) {
972 tag_oids.push(oid);
973 }
974 let tag = parse_tag(&obj.data)?;
975 oid = tag.object;
976 obj = repo.odb.read(&oid)?;
977 }
978 if obj.kind == ObjectKind::Tag {
979 continue;
980 }
981 if !ctx.seen_object.insert(oid) {
982 continue;
983 }
984 match obj.kind {
985 ObjectKind::Tree if opts.include_trees => {
986 if let Some(p) = &root.root_path {
987 let trimmed = p.trim_end_matches('/');
988 let path = if trimmed.is_empty() {
989 "/".to_string()
990 } else {
991 format!("{trimmed}/")
992 };
993 ctx.add_path_pending(path, ObjectKind::Tree, oid, true)?;
994 } else {
995 ctx.ensure_root_list();
996 ctx.paths
997 .get_mut(ROOT_PATH)
998 .ok_or_else(|| Error::CorruptObject("root path list missing".to_owned()))?
999 .oids
1000 .push(oid);
1001 ctx.push_heap(ROOT_PATH);
1002 }
1003 }
1004 ObjectKind::Blob if opts.include_blobs => {
1005 if let Some(p) = &root.root_path {
1006 ctx.add_path_pending(p.clone(), ObjectKind::Blob, oid, true)?;
1007 } else {
1008 tagged_blob_oids.push(oid);
1009 }
1010 }
1011 ObjectKind::Commit => {}
1012 _ => {}
1013 }
1014 }
1015
1016 if opts.include_blobs && !tagged_blob_oids.is_empty() {
1017 let list = TypeOidList {
1018 kind: ObjectKind::Blob,
1019 oids: tagged_blob_oids,
1020 maybe_interesting: true,
1021 };
1022 ctx.paths.insert(TAGGED_BLOBS_PATH.to_string(), list);
1023 ctx.push_heap(TAGGED_BLOBS_PATH);
1024 }
1025 if opts.include_tags && !tag_oids.is_empty() {
1026 let list = TypeOidList {
1027 kind: ObjectKind::Tag,
1028 oids: tag_oids,
1029 maybe_interesting: true,
1030 };
1031 ctx.paths.insert(TAG_PATH.to_string(), list);
1032 ctx.push_heap(TAG_PATH);
1033 }
1034 Ok(())
1035}
1036
1037pub fn parse_path_walk_cli(
1041 git_dir: &Path,
1042 args: &[String],
1043) -> Result<(PathWalkOptions, Vec<String>, Vec<String>, bool, bool)> {
1044 let mut opts = PathWalkOptions::default();
1045 let mut positive = Vec::new();
1046 let mut negative = Vec::new();
1047 let mut stdin_all = false;
1048 let mut boundary_flag = false;
1049 let mut after_dd = false;
1050 let mut not_mode = false;
1051
1052 let mut i = 0usize;
1053 while i < args.len() {
1054 let a = &args[i];
1055 if !after_dd && a == "--" {
1056 after_dd = true;
1057 i += 1;
1058 continue;
1059 }
1060 if !after_dd {
1061 match a.as_str() {
1062 "--stdin-pl" => {
1063 let mut buf = String::new();
1064 std::io::stdin().read_to_string(&mut buf)?;
1065 let lines: Vec<String> = buf
1066 .lines()
1067 .map(str::trim)
1068 .filter(|l| !l.is_empty() && !l.starts_with('#'))
1069 .map(String::from)
1070 .collect();
1071 if !lines.is_empty() {
1072 opts.sparse_pattern_lines = Some(lines);
1073 }
1074 }
1075 "--prune" => opts.prune_all_uninteresting = true,
1076 "--edge-aggressive" => opts.edge_aggressive = true,
1077 "--no-blobs" => opts.include_blobs = false,
1078 "--no-trees" => opts.include_trees = false,
1079 "--no-commits" => opts.include_commits = false,
1080 "--no-tags" => opts.include_tags = false,
1081 "--blobs" => opts.include_blobs = true,
1082 "--trees" => opts.include_trees = true,
1083 "--commits" => opts.include_commits = true,
1084 "--tags" => opts.include_tags = true,
1085 "--stdin" => {
1086 let (pos, neg, all, _stdin_paths) =
1087 collect_revision_specs_with_stdin(git_dir, &[], true)?;
1088 stdin_all |= all;
1089 positive.extend(pos);
1090 negative.extend(neg);
1091 }
1092 _ => {
1093 return Err(Error::InvalidRef(format!(
1094 "path-walk: unknown option '{a}'"
1095 )));
1096 }
1097 }
1098 } else if a == "--not" {
1099 not_mode = !not_mode;
1100 } else if a == "--boundary" {
1101 boundary_flag = true;
1102 } else if matches!(a.as_str(), "--all" | "--indexed-objects" | "--branches") {
1103 if not_mode {
1104 negative.push(a.clone());
1105 } else {
1106 positive.push(a.clone());
1107 }
1108 } else {
1109 let (p, n) = crate::rev_list::split_revision_token(a);
1110 if not_mode {
1111 negative.extend(p);
1112 positive.extend(n);
1113 } else {
1114 positive.extend(p);
1115 negative.extend(n);
1116 }
1117 }
1118 i += 1;
1119 }
1120
1121 Ok((opts, positive, negative, stdin_all, boundary_flag))
1122}