1use std::collections::{BTreeSet, BinaryHeap, HashMap, HashSet, VecDeque};
7use std::io::Read;
8use std::path::Path;
9
10use crate::error::{Error, Result};
11use crate::objects::{parse_commit, parse_tag, parse_tree, ObjectId, ObjectKind};
12use crate::refs;
13use crate::repo::Repository;
14use crate::rev_list::{
15 collect_revision_specs_with_stdin, date_order_walk, resolve_object_walk_roots,
16 resolve_revision_commits, walk_closure, walk_closure_ordered, CommitGraph, ObjectWalkRoot,
17};
18use crate::sparse_checkout::{path_in_sparse_checkout_patterns, ConePatterns};
19
20const ROOT_PATH: &str = "";
21const TAG_PATH: &str = "/tags";
22const TAGGED_BLOBS_PATH: &str = "/tagged-blobs";
23
24#[derive(Debug, Clone)]
26pub struct PathWalkOptions {
27 pub include_commits: bool,
28 pub include_trees: bool,
29 pub include_blobs: bool,
30 pub include_tags: bool,
31 pub prune_all_uninteresting: bool,
32 pub edge_aggressive: bool,
33 pub cone_patterns: Option<ConePatterns>,
34 pub sparse_pattern_lines: Option<Vec<String>>,
36}
37
38impl Default for PathWalkOptions {
39 fn default() -> Self {
40 Self {
41 include_commits: true,
42 include_trees: true,
43 include_blobs: true,
44 include_tags: true,
45 prune_all_uninteresting: false,
46 edge_aggressive: false,
47 cone_patterns: None,
48 sparse_pattern_lines: None,
49 }
50 }
51}
52
53#[derive(Debug, Clone)]
55pub struct PathWalkLine {
56 pub batch: u64,
57 pub object_kind: ObjectKind,
58 pub path: String,
59 pub oid: ObjectId,
60 pub uninteresting: bool,
61}
62
63#[derive(Debug, Clone, Default)]
64pub struct PathWalkCounts {
65 pub commits: u64,
66 pub trees: u64,
67 pub blobs: u64,
68 pub tags: u64,
69}
70
71pub fn walk_objects_by_path(
76 repo: &Repository,
77 positive_specs: &[String],
78 negative_specs: &[String],
79 stdin_all: bool,
80 boundary: bool,
81 opts: &PathWalkOptions,
82) -> Result<(Vec<PathWalkLine>, PathWalkCounts)> {
83 let mut graph = CommitGraph::new(repo, false);
84 let mut all_refs = stdin_all;
85 let mut filtered: Vec<String> = Vec::new();
86 let mut want_indexed_objects = false;
87 for p in positive_specs {
88 match p.as_str() {
89 "--all" => all_refs = true,
90 "--indexed-objects" => want_indexed_objects = true,
91 "--branches" => filtered.extend(expand_branches_refs(repo)?),
92 _ => filtered.push(p.clone()),
93 }
94 }
95 let mut neg_resolved: Vec<String> = Vec::new();
96 for n in negative_specs {
97 match n.as_str() {
98 "--all" => {
99 return Err(Error::InvalidRef(
100 "--all is not valid in negative revision list".to_owned(),
101 ));
102 }
103 "--indexed-objects" => {
104 return Err(Error::InvalidRef(
105 "--indexed-objects is not valid in negative revision list".to_owned(),
106 ));
107 }
108 "--branches" => neg_resolved.extend(expand_branches_refs(repo)?),
109 _ => neg_resolved.push(n.clone()),
110 }
111 }
112 if all_refs {
113 filtered.extend(all_ref_commits(repo)?);
114 }
115 if filtered.is_empty() && !want_indexed_objects && !all_refs {
116 return Err(Error::InvalidRef("no revisions specified".to_owned()));
117 }
118 let (commit_tips, mut object_roots) = if filtered.is_empty() {
119 (Vec::new(), Vec::new())
120 } else {
121 resolve_object_walk_roots(repo, &filtered)?
122 };
123 object_roots.extend(tag_object_roots_from_spec_names(repo, &filtered)?);
124 if want_indexed_objects {
125 object_roots.extend(indexed_blob_roots(repo)?);
126 }
127
128 let extra_tag_ref_targets: Vec<ObjectId> = if all_refs {
129 tag_ref_direct_targets(repo)?
130 } else {
131 Vec::new()
132 };
133 let exclude = resolve_revision_commits(repo, &neg_resolved)?;
134 let exclude_tips: HashSet<ObjectId> = exclude.iter().copied().collect();
135 let excluded: HashSet<ObjectId> = if exclude.is_empty() {
136 HashSet::new()
137 } else {
138 walk_closure(&mut graph, &exclude)?
139 };
140
141 let (included_commits, _) = if commit_tips.is_empty() {
142 (HashSet::new(), Vec::new())
143 } else {
144 walk_closure_ordered(&mut graph, &commit_tips)?
145 };
146 let mut interesting_commits: HashSet<ObjectId> = included_commits
147 .iter()
148 .copied()
149 .filter(|c| !excluded.contains(c))
150 .collect();
151
152 let boundary_commits: HashSet<ObjectId> = if boundary {
153 let inc: HashSet<ObjectId> = interesting_commits.iter().copied().collect();
154 let mut bset = HashSet::new();
155 for &oid in &interesting_commits {
156 for p in graph.parents_of(oid)? {
157 if !inc.contains(&p) && excluded.contains(&p) {
158 bset.insert(p);
159 }
160 }
161 }
162 interesting_commits.extend(bset.iter().copied());
163 bset
164 } else {
165 HashSet::new()
166 };
167
168 let mut uninteresting_commits: HashSet<ObjectId> = excluded.iter().copied().collect();
169 uninteresting_commits.retain(|c| interesting_commits.contains(c));
170
171 let excluded_objects: HashSet<ObjectId> = tree_blob_closure_from_commits(repo, &excluded)?;
172 let excluded_blob_paths = excluded_blob_paths_from_commits(repo, &excluded)?;
173
174 let mut ctx = PathWalkContext {
175 repo,
176 opts,
177 paths: HashMap::new(),
178 heap: BinaryHeap::new(),
179 pushed: HashSet::new(),
180 seen_object: HashSet::new(),
181 uninteresting_object: HashSet::new(),
182 excluded_objects,
183 excluded_blob_paths,
184 heap_seq: 0,
185 batch: 0,
186 lines: Vec::new(),
187 counts: PathWalkCounts::default(),
188 };
189
190 mark_uninteresting_trees(
191 repo,
192 &mut graph,
193 &interesting_commits,
194 &excluded,
195 &exclude_tips,
196 opts,
197 &mut ctx,
198 )?;
199
200 if opts.include_trees {
201 ctx.ensure_root_list();
202 ctx.push_heap(ROOT_PATH);
203 }
204
205 setup_pending_objects(repo, &object_roots, &extra_tag_ref_targets, opts, &mut ctx)?;
206
207 let ordered_commits = date_order_walk(&mut graph, &interesting_commits, false)?;
208
209 let mut commit_oids: Vec<ObjectId> = Vec::new();
210 for c in ordered_commits {
211 if !interesting_commits.contains(&c) {
212 continue;
213 }
214 if opts.include_commits {
215 commit_oids.push(c);
216 }
217 if !opts.include_trees && !opts.include_blobs {
218 continue;
219 }
220 let commit = load_commit_data(repo, c)?;
221 let tree_oid = commit.tree;
222 if ctx.seen_object.contains(&tree_oid) {
223 continue;
224 }
225 ctx.seen_object.insert(tree_oid);
226 if ctx.excluded_objects.contains(&tree_oid) {
227 ctx.uninteresting_object.insert(tree_oid);
228 }
229 ctx.append_root_tree(tree_oid)?;
230 }
231
232 if opts.edge_aggressive && opts.include_trees {
233 for &tip in &exclude_tips {
234 let commit = load_commit_data(repo, tip)?;
235 let tree_oid = commit.tree;
236 if ctx.seen_object.contains(&tree_oid) {
237 continue;
238 }
239 ctx.seen_object.insert(tree_oid);
240 ctx.uninteresting_object.insert(tree_oid);
241 ctx.append_root_tree(tree_oid)?;
242 }
243 }
244
245 if opts.include_commits && !commit_oids.is_empty() {
246 ctx.emit_commit_batch(&commit_oids, &uninteresting_commits, &boundary_commits)?;
247 }
248
249 ctx.drain_heap(&mut graph)?;
250
251 if !ctx.paths.is_empty() {
252 for key in ctx.paths.keys().cloned().collect::<Vec<_>>() {
253 ctx.push_heap(&key);
254 }
255 ctx.drain_heap(&mut graph)?;
256 }
257
258 Ok((ctx.lines, ctx.counts))
259}
260
261fn tag_object_roots_from_spec_names(
265 repo: &Repository,
266 specs: &[String],
267) -> Result<Vec<ObjectWalkRoot>> {
268 let mut out = Vec::new();
269 let mut seen = HashSet::new();
270 for spec in specs {
271 if spec.contains("..") || spec.starts_with('^') || spec == "HEAD" {
272 continue;
273 }
274 if spec.len() == 40 && spec.chars().all(|c| c.is_ascii_hexdigit()) {
275 continue;
276 }
277 let refname = if spec.starts_with("refs/") {
278 spec.clone()
279 } else {
280 format!("refs/tags/{spec}")
281 };
282 let Ok(oid) = refs::resolve_ref(&repo.git_dir, &refname) else {
283 continue;
284 };
285 let obj = repo.odb.read(&oid)?;
286 if obj.kind != ObjectKind::Tag {
287 continue;
288 }
289 if seen.insert(oid) {
290 out.push(ObjectWalkRoot {
291 oid,
292 input: spec.clone(),
293 root_path: None,
294 });
295 }
296 }
297 Ok(out)
298}
299
300fn tag_ref_direct_targets(repo: &Repository) -> Result<Vec<ObjectId>> {
301 let mut out = Vec::new();
302 for (name, _) in refs::list_refs(&repo.git_dir, "refs/tags/")? {
303 let oid = refs::resolve_ref(&repo.git_dir, &name)?;
304 out.push(oid);
305 }
306 Ok(out)
307}
308
309fn all_ref_commits(repo: &Repository) -> Result<Vec<String>> {
310 let mut specs = Vec::new();
311 specs.push("HEAD".to_owned());
312 for (name, _) in refs::list_refs(&repo.git_dir, "refs/")? {
313 specs.push(name);
314 }
315 Ok(specs)
316}
317
318pub fn expand_branches_refs(repo: &Repository) -> Result<Vec<String>> {
319 let mut out = Vec::new();
320 for (_, oid) in refs::list_refs(&repo.git_dir, "refs/heads/")? {
321 out.push(oid.to_hex());
322 }
323 Ok(out)
324}
325
326fn indexed_blob_roots(repo: &Repository) -> Result<Vec<ObjectWalkRoot>> {
329 let Some(_) = &repo.work_tree else {
330 return Ok(Vec::new());
331 };
332 let index_path = repo.git_dir.join("index");
333 if !index_path.is_file() {
334 return Ok(Vec::new());
335 }
336 let idx = crate::index::Index::load(&index_path)?;
337 let mut out = Vec::new();
338 if let Some(root) = idx.cache_tree_root {
339 out.push(ObjectWalkRoot {
340 oid: root,
341 input: String::new(),
342 root_path: None,
343 });
344 }
345
346 let head_tree = head_tree_oid(repo).ok();
347 let mut file_entries: Vec<(String, ObjectId)> = Vec::new();
348 for e in &idx.entries {
349 if e.stage() != 0 {
350 continue;
351 }
352 if e.mode == 0o160000 || e.mode == crate::index::MODE_TREE {
353 continue;
354 }
355 let path_str = String::from_utf8_lossy(&e.path).into_owned();
356 file_entries.push((path_str.clone(), e.oid));
357 out.push(ObjectWalkRoot {
358 oid: e.oid,
359 input: format!(":{path_str}"),
360 root_path: Some(path_str),
361 });
362 }
363
364 let mut prefixes: BTreeSet<String> = BTreeSet::new();
365 for (path, _) in &file_entries {
366 let mut end = path.len();
367 while end > 0 {
368 if let Some(pos) = path[..end].rfind('/') {
369 prefixes.insert(path[..pos].to_string());
370 end = pos;
371 } else {
372 break;
373 }
374 }
375 }
376
377 let mut recovery_added: HashSet<String> = HashSet::new();
378 let Some(ht) = head_tree else {
379 return Ok(out);
380 };
381 for pref in prefixes {
382 if pref.is_empty() {
383 continue;
384 }
385 let mut any_under = false;
386 let mut all_match = true;
387 for (path, oid) in &file_entries {
388 let under = if path == &pref {
389 true
390 } else if let Some(rest) = path.strip_prefix(&pref) {
391 rest.starts_with('/')
392 } else {
393 false
394 };
395 if !under {
396 continue;
397 }
398 any_under = true;
399 match crate::rev_parse::resolve_treeish_path(repo, ht, path.as_str()) {
400 Ok(h) if h == *oid => {}
401 _ => {
402 all_match = false;
403 break;
404 }
405 }
406 }
407 if !any_under || !all_match {
408 continue;
409 }
410 let tree_oid = crate::rev_parse::resolve_treeish_path(repo, ht, pref.as_str())?;
411 if !recovery_added.insert(pref.clone()) {
412 continue;
413 }
414 out.push(ObjectWalkRoot {
415 oid: tree_oid,
416 input: String::new(),
417 root_path: Some(pref),
418 });
419 }
420
421 Ok(out)
422}
423
424fn head_tree_oid(repo: &Repository) -> Result<ObjectId> {
425 let head = refs::resolve_ref(&repo.git_dir, "HEAD")?;
426 let obj = repo.odb.read(&head)?;
427 if obj.kind != ObjectKind::Commit {
428 return Err(Error::InvalidRef("HEAD is not a commit".to_owned()));
429 }
430 let c = parse_commit(&obj.data)?;
431 Ok(c.tree)
432}
433
434struct TypeOidList {
435 kind: ObjectKind,
436 oids: Vec<ObjectId>,
437 maybe_interesting: bool,
438}
439
440struct PathWalkContext<'a> {
441 repo: &'a Repository,
442 opts: &'a PathWalkOptions,
443 paths: HashMap<String, TypeOidList>,
444 heap: BinaryHeap<std::cmp::Reverse<PathHeapItem>>,
445 pushed: HashSet<String>,
446 seen_object: HashSet<ObjectId>,
447 uninteresting_object: HashSet<ObjectId>,
448 excluded_objects: HashSet<ObjectId>,
449 excluded_blob_paths: HashSet<(String, ObjectId)>,
450 heap_seq: u64,
451 batch: u64,
452 lines: Vec<PathWalkLine>,
453 counts: PathWalkCounts,
454}
455
456#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Debug)]
458struct PathHeapItem {
459 type_rank: u8,
460 path: String,
461 seq: u64,
462}
463
464fn type_rank(kind: ObjectKind) -> u8 {
465 match kind {
466 ObjectKind::Tag => 0,
467 ObjectKind::Blob => 1,
468 ObjectKind::Tree => 2,
469 ObjectKind::Commit => 3,
470 }
471}
472
473impl<'a> PathWalkContext<'a> {
474 fn ensure_root_list(&mut self) {
475 self.paths
476 .entry(ROOT_PATH.to_string())
477 .or_insert_with(|| TypeOidList {
478 kind: ObjectKind::Tree,
479 oids: Vec::new(),
480 maybe_interesting: true,
481 });
482 }
483
484 fn push_heap(&mut self, path: &str) {
485 if !self.pushed.insert(path.to_string()) {
486 return;
487 }
488 let Some(list) = self.paths.get(path) else {
489 return;
490 };
491 let seq = self.heap_seq;
492 self.heap_seq = self.heap_seq.wrapping_add(1);
493 self.heap.push(std::cmp::Reverse(PathHeapItem {
494 type_rank: type_rank(list.kind),
495 path: path.to_string(),
496 seq,
497 }));
498 }
499
500 fn append_root_tree(&mut self, oid: ObjectId) -> Result<()> {
501 self.ensure_root_list();
502 let list = self
503 .paths
504 .get_mut(ROOT_PATH)
505 .ok_or_else(|| Error::CorruptObject("root path list missing".to_owned()))?;
506 list.oids.push(oid);
507 self.push_heap(ROOT_PATH);
508 Ok(())
509 }
510
511 fn add_path(
512 &mut self,
513 path: String,
514 kind: ObjectKind,
515 oid: ObjectId,
516 interesting: bool,
517 ) -> Result<()> {
518 self.add_path_inner(path, kind, oid, interesting, true)
519 }
520
521 fn add_path_pending(
527 &mut self,
528 path: String,
529 kind: ObjectKind,
530 oid: ObjectId,
531 interesting: bool,
532 ) -> Result<()> {
533 self.add_path_inner(path, kind, oid, interesting, false)
534 }
535
536 fn add_path_inner(
537 &mut self,
538 path: String,
539 kind: ObjectKind,
540 oid: ObjectId,
541 interesting: bool,
542 enqueue: bool,
543 ) -> Result<()> {
544 let list = self
545 .paths
546 .entry(path.clone())
547 .or_insert_with(|| TypeOidList {
548 kind,
549 oids: Vec::new(),
550 maybe_interesting: false,
551 });
552 if list.kind != kind {
553 return Err(Error::CorruptObject(format!(
554 "path-walk: inconsistent types at path {path:?}"
555 )));
556 }
557 list.maybe_interesting |= interesting;
558 list.oids.push(oid);
559 if enqueue {
560 self.push_heap(&path);
561 }
562 Ok(())
563 }
564
565 fn cone_allows_tree_path(&self, path_with_slash: &str) -> bool {
566 if let Some(lines) = &self.opts.sparse_pattern_lines {
567 return path_in_sparse_checkout_patterns(path_with_slash, lines, true);
568 }
569 if let Some(cone) = &self.opts.cone_patterns {
570 let trimmed = path_with_slash.trim_end_matches('/');
571 if cone.path_included(trimmed) {
572 return true;
573 }
574 return cone.path_included(path_with_slash);
575 }
576 true
577 }
578
579 fn cone_allows_blob_path(&self, path: &str) -> bool {
580 if let Some(lines) = &self.opts.sparse_pattern_lines {
581 return path_in_sparse_checkout_patterns(path, lines, true);
582 }
583 if let Some(cone) = &self.opts.cone_patterns {
584 return cone.path_included(path);
585 }
586 true
587 }
588
589 fn emit_commit_batch(
590 &mut self,
591 oids: &[ObjectId],
592 uninteresting: &HashSet<ObjectId>,
593 boundary: &HashSet<ObjectId>,
594 ) -> Result<()> {
595 let batch = self.batch;
596 self.batch += 1;
597 for &oid in oids {
598 let u = uninteresting.contains(&oid) || boundary.contains(&oid);
599 self.lines.push(PathWalkLine {
600 batch,
601 object_kind: ObjectKind::Commit,
602 path: ROOT_PATH.to_string(),
603 oid,
604 uninteresting: u,
605 });
606 self.counts.commits += 1;
607 }
608 Ok(())
609 }
610
611 fn drain_heap(&mut self, graph: &mut CommitGraph<'_>) -> Result<()> {
612 while let Some(std::cmp::Reverse(item)) = self.heap.pop() {
613 self.walk_path(&item.path, graph)?;
614 }
615 Ok(())
616 }
617
618 fn walk_path(&mut self, path: &str, graph: &mut CommitGraph<'_>) -> Result<()> {
619 let Some(mut list) = self.paths.remove(path) else {
620 return Ok(());
621 };
622 if list.oids.is_empty() {
623 return Ok(());
624 }
625
626 if self.opts.prune_all_uninteresting {
627 if !list.maybe_interesting {
628 return Ok(());
629 }
630 list.maybe_interesting = false;
631 for oid in &list.oids {
632 if self.object_is_interesting_at_path(path, *oid, list.kind) {
633 list.maybe_interesting = true;
634 break;
635 }
636 }
637 if !list.maybe_interesting {
638 return Ok(());
639 }
640 }
641
642 let emit = match list.kind {
643 ObjectKind::Tree => self.opts.include_trees,
644 ObjectKind::Blob => self.opts.include_blobs,
645 ObjectKind::Tag => self.opts.include_tags,
646 _ => false,
647 };
648 let batch = if emit {
649 let b = self.batch;
650 self.batch += 1;
651 b
652 } else {
653 0
654 };
655
656 match list.kind {
657 ObjectKind::Tree if self.opts.include_trees => {
658 for &oid in &list.oids {
659 let u = !self.tree_is_interesting(oid);
660 self.lines.push(PathWalkLine {
661 batch,
662 object_kind: ObjectKind::Tree,
663 path: path.to_string(),
664 oid,
665 uninteresting: u,
666 });
667 self.counts.trees += 1;
668 }
669 }
670 ObjectKind::Blob if self.opts.include_blobs => {
671 for &oid in &list.oids {
672 let u = !self.blob_is_interesting_at_path(path, oid);
673 self.lines.push(PathWalkLine {
674 batch,
675 object_kind: ObjectKind::Blob,
676 path: path.to_string(),
677 oid,
678 uninteresting: u,
679 });
680 self.counts.blobs += 1;
681 }
682 }
683 ObjectKind::Tag if self.opts.include_tags => {
684 for &oid in &list.oids {
685 self.lines.push(PathWalkLine {
686 batch,
687 object_kind: ObjectKind::Tag,
688 path: path.to_string(),
689 oid,
690 uninteresting: false,
691 });
692 self.counts.tags += 1;
693 }
694 }
695 _ => {}
696 }
697
698 if list.kind == ObjectKind::Tree {
699 let tree_oids = list.oids.clone();
700 for tree_oid in tree_oids {
701 self.add_tree_entries(path, tree_oid, graph)?;
702 }
703 }
704
705 Ok(())
706 }
707
708 fn add_tree_entries(
709 &mut self,
710 base_path: &str,
711 tree_oid: ObjectId,
712 _graph: &mut CommitGraph<'_>,
713 ) -> Result<()> {
714 let obj = self.repo.odb.read(&tree_oid)?;
715 if obj.kind != ObjectKind::Tree {
716 return Err(Error::CorruptObject(format!("{tree_oid} is not a tree")));
717 }
718 let parent_uninteresting = !self.tree_is_interesting(tree_oid);
719 let entries = parse_tree(&obj.data)?;
720 for entry in entries {
721 if entry.mode == 0o160000 {
722 continue;
723 }
724 let is_tree = entry.mode == 0o040000;
725 if !self.opts.include_blobs && !is_tree {
726 continue;
727 }
728 let name = String::from_utf8_lossy(&entry.name);
729 let child_oid = entry.oid;
730 if self.seen_object.contains(&child_oid) {
731 continue;
732 }
733 self.seen_object.insert(child_oid);
734 if parent_uninteresting {
735 self.uninteresting_object.insert(child_oid);
736 }
737 if is_tree {
738 let rel = if base_path.is_empty() {
739 format!("{name}/")
740 } else {
741 format!("{base_path}{name}/")
742 };
743 if !self.cone_allows_tree_path(&rel) {
744 continue;
745 }
746 self.add_path(
747 rel,
748 ObjectKind::Tree,
749 child_oid,
750 self.tree_is_interesting(child_oid) || !parent_uninteresting,
751 )?;
752 } else {
753 let rel = if base_path.is_empty() {
754 name.into_owned()
755 } else {
756 format!("{base_path}{}", name.as_ref())
757 };
758 if !self.cone_allows_blob_path(&rel) {
759 continue;
760 }
761 let blob_interesting =
762 self.blob_is_interesting_at_path(&rel, child_oid) || !parent_uninteresting;
763 self.add_path(rel, ObjectKind::Blob, child_oid, blob_interesting)?;
764 }
765 }
766 Ok(())
767 }
768
769 fn object_is_interesting_at_path(&self, path: &str, oid: ObjectId, kind: ObjectKind) -> bool {
770 match kind {
771 ObjectKind::Blob => self.blob_is_interesting_at_path(path, oid),
772 ObjectKind::Tree => self.tree_is_interesting(oid),
773 _ => true,
774 }
775 }
776
777 fn tree_is_interesting(&self, oid: ObjectId) -> bool {
778 !self.uninteresting_object.contains(&oid) && !self.excluded_objects.contains(&oid)
779 }
780
781 fn blob_is_interesting_at_path(&self, path: &str, oid: ObjectId) -> bool {
782 !self.uninteresting_object.contains(&oid)
783 && !self.excluded_blob_paths.contains(&(path.to_string(), oid))
784 }
785}
786
787fn excluded_blob_paths_from_commits(
789 repo: &Repository,
790 commits: &HashSet<ObjectId>,
791) -> Result<HashSet<(String, ObjectId)>> {
792 let mut out = HashSet::new();
793 for &c in commits {
794 let commit = match load_commit_data(repo, c) {
795 Ok(co) => co,
796 Err(_) => continue,
797 };
798 collect_blob_paths(repo, commit.tree, "", &mut out)?;
799 }
800 Ok(out)
801}
802
803fn collect_blob_paths(
804 repo: &Repository,
805 tree_oid: ObjectId,
806 base: &str,
807 into: &mut HashSet<(String, ObjectId)>,
808) -> Result<()> {
809 let obj = repo.odb.read(&tree_oid)?;
810 if obj.kind != ObjectKind::Tree {
811 return Ok(());
812 }
813 let entries = parse_tree(&obj.data)?;
814 for e in entries {
815 if e.mode == 0o160000 {
816 continue;
817 }
818 let name = String::from_utf8_lossy(&e.name);
819 if e.mode == 0o040000 {
820 let next_base = if base.is_empty() {
821 format!("{name}/")
822 } else {
823 format!("{base}{name}/")
824 };
825 collect_blob_paths(repo, e.oid, &next_base, into)?;
826 } else {
827 let path = if base.is_empty() {
828 name.into_owned()
829 } else {
830 format!("{base}{}", name.as_ref())
831 };
832 into.insert((path, e.oid));
833 }
834 }
835 Ok(())
836}
837
838fn tree_blob_closure_from_commits(
839 repo: &Repository,
840 commits: &HashSet<ObjectId>,
841) -> Result<HashSet<ObjectId>> {
842 let mut out = HashSet::new();
843 for &c in commits {
844 let commit = match load_commit_data(repo, c) {
845 Ok(co) => co,
846 Err(_) => continue,
847 };
848 let mut stack = vec![commit.tree];
849 while let Some(t) = stack.pop() {
850 if !out.insert(t) {
851 continue;
852 }
853 let obj = repo.odb.read(&t)?;
854 if obj.kind != ObjectKind::Tree {
855 continue;
856 }
857 let entries = parse_tree(&obj.data)?;
858 for e in entries {
859 if e.mode == 0o160000 {
860 continue;
861 }
862 out.insert(e.oid);
863 if e.mode == 0o040000 {
864 stack.push(e.oid);
865 }
866 }
867 }
868 }
869 Ok(out)
870}
871
872fn load_commit_data(repo: &Repository, oid: ObjectId) -> Result<crate::objects::CommitData> {
873 let object = repo.odb.read(&oid)?;
874 if object.kind != ObjectKind::Commit {
875 return Err(Error::CorruptObject(format!("{oid} is not a commit")));
876 }
877 parse_commit(&object.data)
878}
879
880fn mark_uninteresting_trees(
881 repo: &Repository,
882 graph: &mut CommitGraph<'_>,
883 interesting: &HashSet<ObjectId>,
884 excluded: &HashSet<ObjectId>,
885 exclude_tips: &HashSet<ObjectId>,
886 opts: &PathWalkOptions,
887 ctx: &mut PathWalkContext<'_>,
888) -> Result<()> {
889 if !opts.prune_all_uninteresting && !opts.edge_aggressive {
890 return Ok(());
891 }
892 let mut queue: VecDeque<ObjectId> = interesting.iter().copied().collect();
893 let mut seen_edge_commit = HashSet::new();
894 while let Some(c) = queue.pop_front() {
895 let parents = graph.parents_of(c)?;
896 for p in parents {
897 if interesting.contains(&p) {
898 continue;
899 }
900 if !excluded.contains(&p) {
901 continue;
902 }
903 if !seen_edge_commit.insert(p) {
904 continue;
905 }
906 let commit = load_commit_data(repo, p)?;
907 ctx.uninteresting_object.insert(commit.tree);
908 queue.push_back(p);
909 }
910 }
911 if opts.edge_aggressive {
915 for &c in exclude_tips {
916 if seen_edge_commit.contains(&c) {
917 continue;
918 }
919 let Ok(commit) = load_commit_data(repo, c) else {
920 continue;
921 };
922 ctx.uninteresting_object.insert(commit.tree);
923 }
924 }
925 Ok(())
926}
927
928fn setup_pending_objects(
929 repo: &Repository,
930 roots: &[ObjectWalkRoot],
931 extra_tag_refs: &[ObjectId],
932 opts: &PathWalkOptions,
933 ctx: &mut PathWalkContext<'_>,
934) -> Result<()> {
935 let mut tag_oids: Vec<ObjectId> = Vec::new();
936 let mut tagged_blob_oids: Vec<ObjectId> = Vec::new();
937 let mut tag_seen: HashSet<ObjectId> = HashSet::new();
938
939 for &oid in extra_tag_refs {
940 if ctx.seen_object.contains(&oid) {
941 continue;
942 }
943 let obj = repo.odb.read(&oid)?;
944 match obj.kind {
945 ObjectKind::Tag | ObjectKind::Commit => {
946 if tag_seen.insert(oid) {
947 tag_oids.push(oid);
948 }
949 }
950 ObjectKind::Tree if opts.include_trees => {
951 ctx.seen_object.insert(oid);
952 ctx.append_root_tree(oid)?;
953 }
954 ObjectKind::Blob if opts.include_blobs => {
955 ctx.seen_object.insert(oid);
956 tagged_blob_oids.push(oid);
957 }
958 _ => {}
959 }
960 }
961
962 for root in roots {
963 let mut oid = root.oid;
964 let mut obj = repo.odb.read(&oid)?;
965 while obj.kind == ObjectKind::Tag {
966 if ctx.seen_object.contains(&oid) {
967 break;
968 }
969 ctx.seen_object.insert(oid);
970 if opts.include_tags && tag_seen.insert(oid) {
971 tag_oids.push(oid);
972 }
973 let tag = parse_tag(&obj.data)?;
974 oid = tag.object;
975 obj = repo.odb.read(&oid)?;
976 }
977 if obj.kind == ObjectKind::Tag {
978 continue;
979 }
980 if !ctx.seen_object.insert(oid) {
981 continue;
982 }
983 match obj.kind {
984 ObjectKind::Tree if opts.include_trees => {
985 if let Some(p) = &root.root_path {
986 let trimmed = p.trim_end_matches('/');
987 let path = if trimmed.is_empty() {
988 "/".to_string()
989 } else {
990 format!("{trimmed}/")
991 };
992 ctx.add_path_pending(path, ObjectKind::Tree, oid, true)?;
993 } else {
994 ctx.ensure_root_list();
995 ctx.paths
996 .get_mut(ROOT_PATH)
997 .ok_or_else(|| Error::CorruptObject("root path list missing".to_owned()))?
998 .oids
999 .push(oid);
1000 ctx.push_heap(ROOT_PATH);
1001 }
1002 }
1003 ObjectKind::Blob if opts.include_blobs => {
1004 if let Some(p) = &root.root_path {
1005 ctx.add_path_pending(p.clone(), ObjectKind::Blob, oid, true)?;
1006 } else {
1007 tagged_blob_oids.push(oid);
1008 }
1009 }
1010 ObjectKind::Commit => {}
1011 _ => {}
1012 }
1013 }
1014
1015 if opts.include_blobs && !tagged_blob_oids.is_empty() {
1016 let list = TypeOidList {
1017 kind: ObjectKind::Blob,
1018 oids: tagged_blob_oids,
1019 maybe_interesting: true,
1020 };
1021 ctx.paths.insert(TAGGED_BLOBS_PATH.to_string(), list);
1022 ctx.push_heap(TAGGED_BLOBS_PATH);
1023 }
1024 if opts.include_tags && !tag_oids.is_empty() {
1025 let list = TypeOidList {
1026 kind: ObjectKind::Tag,
1027 oids: tag_oids,
1028 maybe_interesting: true,
1029 };
1030 ctx.paths.insert(TAG_PATH.to_string(), list);
1031 ctx.push_heap(TAG_PATH);
1032 }
1033 Ok(())
1034}
1035
1036pub fn parse_path_walk_cli(
1040 git_dir: &Path,
1041 args: &[String],
1042) -> Result<(PathWalkOptions, Vec<String>, Vec<String>, bool, bool)> {
1043 let mut opts = PathWalkOptions::default();
1044 let mut positive = Vec::new();
1045 let mut negative = Vec::new();
1046 let mut stdin_all = false;
1047 let mut boundary_flag = false;
1048 let mut after_dd = false;
1049 let mut not_mode = false;
1050
1051 let mut i = 0usize;
1052 while i < args.len() {
1053 let a = &args[i];
1054 if !after_dd && a == "--" {
1055 after_dd = true;
1056 i += 1;
1057 continue;
1058 }
1059 if !after_dd {
1060 match a.as_str() {
1061 "--stdin-pl" => {
1062 let mut buf = String::new();
1063 std::io::stdin().read_to_string(&mut buf)?;
1064 let lines: Vec<String> = buf
1065 .lines()
1066 .map(str::trim)
1067 .filter(|l| !l.is_empty() && !l.starts_with('#'))
1068 .map(String::from)
1069 .collect();
1070 if !lines.is_empty() {
1071 opts.sparse_pattern_lines = Some(lines);
1072 }
1073 }
1074 "--prune" => opts.prune_all_uninteresting = true,
1075 "--edge-aggressive" => opts.edge_aggressive = true,
1076 "--no-blobs" => opts.include_blobs = false,
1077 "--no-trees" => opts.include_trees = false,
1078 "--no-commits" => opts.include_commits = false,
1079 "--no-tags" => opts.include_tags = false,
1080 "--blobs" => opts.include_blobs = true,
1081 "--trees" => opts.include_trees = true,
1082 "--commits" => opts.include_commits = true,
1083 "--tags" => opts.include_tags = true,
1084 "--stdin" => {
1085 let (pos, neg, all, _stdin_paths) =
1086 collect_revision_specs_with_stdin(git_dir, &[], true)?;
1087 stdin_all |= all;
1088 positive.extend(pos);
1089 negative.extend(neg);
1090 }
1091 _ => {
1092 return Err(Error::InvalidRef(format!(
1093 "path-walk: unknown option '{a}'"
1094 )));
1095 }
1096 }
1097 } else if a == "--not" {
1098 not_mode = !not_mode;
1099 } else if a == "--boundary" {
1100 boundary_flag = true;
1101 } else if matches!(a.as_str(), "--all" | "--indexed-objects" | "--branches") {
1102 if not_mode {
1103 negative.push(a.clone());
1104 } else {
1105 positive.push(a.clone());
1106 }
1107 } else {
1108 let (p, n) = crate::rev_list::split_revision_token(a);
1109 if not_mode {
1110 negative.extend(p);
1111 positive.extend(n);
1112 } else {
1113 positive.extend(p);
1114 negative.extend(n);
1115 }
1116 }
1117 i += 1;
1118 }
1119
1120 Ok((opts, positive, negative, stdin_all, boundary_flag))
1121}