1use std::collections::{BTreeSet, BinaryHeap, HashMap, HashSet, VecDeque};
7use std::io::Read;
8use std::path::Path;
9
10use crate::error::{Error, Result};
11use crate::objects::{parse_commit, parse_tag, parse_tree, ObjectId, ObjectKind};
12use crate::refs;
13use crate::repo::Repository;
14use crate::rev_list::{
15 collect_revision_specs_with_stdin, date_order_walk, resolve_object_walk_roots,
16 resolve_revision_commits, walk_closure, walk_closure_ordered, CommitGraph, ObjectWalkRoot,
17};
18use crate::sparse_checkout::{path_in_sparse_checkout_patterns, ConePatterns};
19
20const ROOT_PATH: &str = "";
21const TAG_PATH: &str = "/tags";
22const TAGGED_BLOBS_PATH: &str = "/tagged-blobs";
23
24#[derive(Debug, Clone)]
26pub struct PathWalkOptions {
27 pub include_commits: bool,
28 pub include_trees: bool,
29 pub include_blobs: bool,
30 pub include_tags: bool,
31 pub prune_all_uninteresting: bool,
32 pub edge_aggressive: bool,
33 pub cone_patterns: Option<ConePatterns>,
34 pub sparse_pattern_lines: Option<Vec<String>>,
36}
37
38impl Default for PathWalkOptions {
39 fn default() -> Self {
40 Self {
41 include_commits: true,
42 include_trees: true,
43 include_blobs: true,
44 include_tags: true,
45 prune_all_uninteresting: false,
46 edge_aggressive: false,
47 cone_patterns: None,
48 sparse_pattern_lines: None,
49 }
50 }
51}
52
53#[derive(Debug, Clone)]
55pub struct PathWalkLine {
56 pub batch: u64,
57 pub object_kind: ObjectKind,
58 pub path: String,
59 pub oid: ObjectId,
60 pub uninteresting: bool,
61}
62
63#[derive(Debug, Clone, Default)]
64pub struct PathWalkCounts {
65 pub commits: u64,
66 pub trees: u64,
67 pub blobs: u64,
68 pub tags: u64,
69}
70
71pub fn walk_objects_by_path(
76 repo: &Repository,
77 positive_specs: &[String],
78 negative_specs: &[String],
79 stdin_all: bool,
80 boundary: bool,
81 opts: &PathWalkOptions,
82) -> Result<(Vec<PathWalkLine>, PathWalkCounts)> {
83 let mut graph = CommitGraph::new(repo, false);
84 let mut all_refs = stdin_all;
85 let mut filtered: Vec<String> = Vec::new();
86 let mut want_indexed_objects = false;
87 for p in positive_specs {
88 match p.as_str() {
89 "--all" => all_refs = true,
90 "--indexed-objects" => want_indexed_objects = true,
91 "--branches" => filtered.extend(expand_branches_refs(repo)?),
92 _ => filtered.push(p.clone()),
93 }
94 }
95 let mut neg_resolved: Vec<String> = Vec::new();
96 for n in negative_specs {
97 match n.as_str() {
98 "--all" => {
99 return Err(Error::InvalidRef(
100 "--all is not valid in negative revision list".to_owned(),
101 ));
102 }
103 "--indexed-objects" => {
104 return Err(Error::InvalidRef(
105 "--indexed-objects is not valid in negative revision list".to_owned(),
106 ));
107 }
108 "--branches" => neg_resolved.extend(expand_branches_refs(repo)?),
109 _ => neg_resolved.push(n.clone()),
110 }
111 }
112 if all_refs {
113 filtered.extend(all_ref_commits(repo)?);
114 }
115 if filtered.is_empty() && !want_indexed_objects && !all_refs {
116 return Err(Error::InvalidRef("no revisions specified".to_owned()));
117 }
118 let (commit_tips, mut object_roots) = if filtered.is_empty() {
119 (Vec::new(), Vec::new())
120 } else {
121 resolve_object_walk_roots(repo, &filtered)?
122 };
123 object_roots.extend(tag_object_roots_from_spec_names(repo, &filtered)?);
124 if want_indexed_objects {
125 object_roots.extend(indexed_blob_roots(repo)?);
126 }
127
128 let extra_tag_ref_targets: Vec<ObjectId> = if all_refs {
129 tag_ref_direct_targets(repo)?
130 } else {
131 Vec::new()
132 };
133 let exclude = resolve_revision_commits(repo, &neg_resolved)?;
134 let exclude_tips: HashSet<ObjectId> = exclude.iter().copied().collect();
135 let excluded: HashSet<ObjectId> = if exclude.is_empty() {
136 HashSet::new()
137 } else {
138 walk_closure(&mut graph, &exclude)?
139 };
140
141 let (included_commits, _) = if commit_tips.is_empty() {
142 (HashSet::new(), Vec::new())
143 } else {
144 walk_closure_ordered(&mut graph, &commit_tips)?
145 };
146 let mut interesting_commits: HashSet<ObjectId> = included_commits
147 .iter()
148 .copied()
149 .filter(|c| !excluded.contains(c))
150 .collect();
151
152 let boundary_commits: HashSet<ObjectId> = if boundary {
153 let inc: HashSet<ObjectId> = interesting_commits.iter().copied().collect();
154 let mut bset = HashSet::new();
155 for &oid in &interesting_commits {
156 for p in graph.parents_of(oid)? {
157 if !inc.contains(&p) && excluded.contains(&p) {
158 bset.insert(p);
159 }
160 }
161 }
162 interesting_commits.extend(bset.iter().copied());
163 bset
164 } else {
165 HashSet::new()
166 };
167
168 let mut uninteresting_commits: HashSet<ObjectId> = excluded.iter().copied().collect();
169 uninteresting_commits.retain(|c| interesting_commits.contains(c));
170
171 let excluded_objects: HashSet<ObjectId> = tree_blob_closure_from_commits(repo, &excluded)?;
172 let excluded_blob_paths = excluded_blob_paths_from_commits(repo, &excluded)?;
173
174 let mut ctx = PathWalkContext {
175 repo,
176 opts,
177 paths: HashMap::new(),
178 heap: BinaryHeap::new(),
179 pushed: HashSet::new(),
180 seen_object: HashSet::new(),
181 uninteresting_object: HashSet::new(),
182 excluded_objects,
183 excluded_blob_paths,
184 heap_seq: 0,
185 batch: 0,
186 lines: Vec::new(),
187 counts: PathWalkCounts::default(),
188 };
189
190 mark_uninteresting_trees(
191 repo,
192 &mut graph,
193 &interesting_commits,
194 &excluded,
195 &exclude_tips,
196 opts,
197 &mut ctx,
198 )?;
199
200 if opts.include_trees {
201 ctx.ensure_root_list();
202 ctx.push_heap(ROOT_PATH);
203 }
204
205 setup_pending_objects(repo, &object_roots, &extra_tag_ref_targets, opts, &mut ctx)?;
206
207 let ordered_commits = date_order_walk(&mut graph, &interesting_commits, false)?;
208
209 let mut commit_oids: Vec<ObjectId> = Vec::new();
210 for c in ordered_commits {
211 if !interesting_commits.contains(&c) {
212 continue;
213 }
214 if opts.include_commits {
215 commit_oids.push(c);
216 }
217 if !opts.include_trees && !opts.include_blobs {
218 continue;
219 }
220 let commit = load_commit_data(repo, c)?;
221 let tree_oid = commit.tree;
222 if ctx.seen_object.contains(&tree_oid) {
223 continue;
224 }
225 ctx.seen_object.insert(tree_oid);
226 if ctx.excluded_objects.contains(&tree_oid) {
227 ctx.uninteresting_object.insert(tree_oid);
228 }
229 ctx.append_root_tree(tree_oid)?;
230 }
231
232 if opts.edge_aggressive && opts.include_trees {
233 for &tip in &exclude_tips {
234 let commit = load_commit_data(repo, tip)?;
235 let tree_oid = commit.tree;
236 if ctx.seen_object.contains(&tree_oid) {
237 continue;
238 }
239 ctx.seen_object.insert(tree_oid);
240 ctx.uninteresting_object.insert(tree_oid);
241 ctx.append_root_tree(tree_oid)?;
242 }
243 }
244
245 if opts.include_commits && !commit_oids.is_empty() {
246 ctx.emit_commit_batch(&commit_oids, &uninteresting_commits, &boundary_commits)?;
247 }
248
249 ctx.drain_heap(&mut graph)?;
250
251 if !ctx.paths.is_empty() {
252 for key in ctx.paths.keys().cloned().collect::<Vec<_>>() {
253 ctx.push_heap(&key);
254 }
255 ctx.drain_heap(&mut graph)?;
256 }
257
258 Ok((ctx.lines, ctx.counts))
259}
260
261fn tag_object_roots_from_spec_names(
265 repo: &Repository,
266 specs: &[String],
267) -> Result<Vec<ObjectWalkRoot>> {
268 let mut out = Vec::new();
269 let mut seen = HashSet::new();
270 for spec in specs {
271 if spec.contains("..") || spec.starts_with('^') || spec == "HEAD" {
272 continue;
273 }
274 if spec.len() == 40 && spec.chars().all(|c| c.is_ascii_hexdigit()) {
275 continue;
276 }
277 let refname = if spec.starts_with("refs/") {
278 spec.clone()
279 } else {
280 format!("refs/tags/{spec}")
281 };
282 let Ok(oid) = refs::resolve_ref(&repo.git_dir, &refname) else {
283 continue;
284 };
285 let obj = repo.odb.read(&oid)?;
286 if obj.kind != ObjectKind::Tag {
287 continue;
288 }
289 if seen.insert(oid) {
290 out.push(ObjectWalkRoot {
291 oid,
292 input: spec.clone(),
293 root_path: None,
294 });
295 }
296 }
297 Ok(out)
298}
299
300fn tag_ref_direct_targets(repo: &Repository) -> Result<Vec<ObjectId>> {
301 let mut out = Vec::new();
302 for (name, _) in refs::list_refs(&repo.git_dir, "refs/tags/")? {
303 let oid = refs::resolve_ref(&repo.git_dir, &name)?;
304 out.push(oid);
305 }
306 Ok(out)
307}
308
309fn all_ref_commits(repo: &Repository) -> Result<Vec<String>> {
310 let mut specs = Vec::new();
311 specs.push("HEAD".to_owned());
312 for (name, _) in refs::list_refs(&repo.git_dir, "refs/")? {
313 specs.push(name);
314 }
315 Ok(specs)
316}
317
318pub fn expand_branches_refs(repo: &Repository) -> Result<Vec<String>> {
319 let mut out = Vec::new();
320 for (_, oid) in refs::list_refs(&repo.git_dir, "refs/heads/")? {
321 out.push(oid.to_hex());
322 }
323 Ok(out)
324}
325
326fn indexed_blob_roots(repo: &Repository) -> Result<Vec<ObjectWalkRoot>> {
329 let Some(_) = &repo.work_tree else {
330 return Ok(Vec::new());
331 };
332 let index_path = repo.git_dir.join("index");
333 if !index_path.is_file() {
334 return Ok(Vec::new());
335 }
336 let idx = crate::index::Index::load(&index_path)?;
337 let mut out = Vec::new();
338 if let Some(root) = idx.cache_tree_root {
339 out.push(ObjectWalkRoot {
340 oid: root,
341 input: String::new(),
342 root_path: None,
343 });
344 }
345
346 let head_tree = head_tree_oid(repo).ok();
347 let mut file_entries: Vec<(String, ObjectId)> = Vec::new();
348 for e in &idx.entries {
349 if e.stage() != 0 {
350 continue;
351 }
352 if e.mode == 0o160000 || e.mode == crate::index::MODE_TREE {
353 continue;
354 }
355 let path_str = String::from_utf8_lossy(&e.path).into_owned();
356 file_entries.push((path_str.clone(), e.oid));
357 out.push(ObjectWalkRoot {
358 oid: e.oid,
359 input: format!(":{path_str}"),
360 root_path: Some(path_str),
361 });
362 }
363
364 let mut prefixes: BTreeSet<String> = BTreeSet::new();
365 for (path, _) in &file_entries {
366 let mut end = path.len();
367 while end > 0 {
368 if let Some(pos) = path[..end].rfind('/') {
369 prefixes.insert(path[..pos].to_string());
370 end = pos;
371 } else {
372 break;
373 }
374 }
375 }
376
377 let mut recovery_added: HashSet<String> = HashSet::new();
378 let Some(ht) = head_tree else {
379 return Ok(out);
380 };
381 for pref in prefixes {
382 if pref.is_empty() {
383 continue;
384 }
385 let mut any_under = false;
386 let mut all_match = true;
387 for (path, oid) in &file_entries {
388 let under = if path == &pref {
389 true
390 } else if let Some(rest) = path.strip_prefix(&pref) {
391 rest.starts_with('/')
392 } else {
393 false
394 };
395 if !under {
396 continue;
397 }
398 any_under = true;
399 match crate::rev_parse::resolve_treeish_path(repo, ht, path.as_str()) {
400 Ok(h) if h == *oid => {}
401 _ => {
402 all_match = false;
403 break;
404 }
405 }
406 }
407 if !any_under || !all_match {
408 continue;
409 }
410 let tree_oid = crate::rev_parse::resolve_treeish_path(repo, ht, pref.as_str())?;
411 if !recovery_added.insert(pref.clone()) {
412 continue;
413 }
414 out.push(ObjectWalkRoot {
415 oid: tree_oid,
416 input: String::new(),
417 root_path: Some(pref),
418 });
419 }
420
421 Ok(out)
422}
423
424fn head_tree_oid(repo: &Repository) -> Result<ObjectId> {
425 let head = refs::resolve_ref(&repo.git_dir, "HEAD")?;
426 let obj = repo.odb.read(&head)?;
427 if obj.kind != ObjectKind::Commit {
428 return Err(Error::InvalidRef("HEAD is not a commit".to_owned()));
429 }
430 let c = parse_commit(&obj.data)?;
431 Ok(c.tree)
432}
433
434struct TypeOidList {
435 kind: ObjectKind,
436 oids: Vec<ObjectId>,
437 maybe_interesting: bool,
438}
439
440struct PathWalkContext<'a> {
441 repo: &'a Repository,
442 opts: &'a PathWalkOptions,
443 paths: HashMap<String, TypeOidList>,
444 heap: BinaryHeap<std::cmp::Reverse<PathHeapItem>>,
445 pushed: HashSet<String>,
446 seen_object: HashSet<ObjectId>,
447 uninteresting_object: HashSet<ObjectId>,
448 excluded_objects: HashSet<ObjectId>,
449 excluded_blob_paths: HashSet<(String, ObjectId)>,
450 heap_seq: u64,
451 batch: u64,
452 lines: Vec<PathWalkLine>,
453 counts: PathWalkCounts,
454}
455
456#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Debug)]
458struct PathHeapItem {
459 type_rank: u8,
460 path: String,
461 seq: u64,
462}
463
464fn type_rank(kind: ObjectKind) -> u8 {
465 match kind {
466 ObjectKind::Tag => 0,
467 ObjectKind::Blob => 1,
468 ObjectKind::Tree => 2,
469 ObjectKind::Commit => 3,
470 }
471}
472
473impl<'a> PathWalkContext<'a> {
474 fn ensure_root_list(&mut self) {
475 self.paths
476 .entry(ROOT_PATH.to_string())
477 .or_insert_with(|| TypeOidList {
478 kind: ObjectKind::Tree,
479 oids: Vec::new(),
480 maybe_interesting: true,
481 });
482 }
483
484 fn push_heap(&mut self, path: &str) {
485 if !self.pushed.insert(path.to_string()) {
486 return;
487 }
488 let Some(list) = self.paths.get(path) else {
489 return;
490 };
491 let seq = self.heap_seq;
492 self.heap_seq = self.heap_seq.wrapping_add(1);
493 self.heap.push(std::cmp::Reverse(PathHeapItem {
494 type_rank: type_rank(list.kind),
495 path: path.to_string(),
496 seq,
497 }));
498 }
499
500 fn append_root_tree(&mut self, oid: ObjectId) -> Result<()> {
501 self.ensure_root_list();
502 let list = self.paths.get_mut(ROOT_PATH).expect("root list");
503 list.oids.push(oid);
504 self.push_heap(ROOT_PATH);
505 Ok(())
506 }
507
508 fn add_path(
509 &mut self,
510 path: String,
511 kind: ObjectKind,
512 oid: ObjectId,
513 interesting: bool,
514 ) -> Result<()> {
515 self.add_path_inner(path, kind, oid, interesting, true)
516 }
517
518 fn add_path_pending(
524 &mut self,
525 path: String,
526 kind: ObjectKind,
527 oid: ObjectId,
528 interesting: bool,
529 ) -> Result<()> {
530 self.add_path_inner(path, kind, oid, interesting, false)
531 }
532
533 fn add_path_inner(
534 &mut self,
535 path: String,
536 kind: ObjectKind,
537 oid: ObjectId,
538 interesting: bool,
539 enqueue: bool,
540 ) -> Result<()> {
541 let list = self
542 .paths
543 .entry(path.clone())
544 .or_insert_with(|| TypeOidList {
545 kind,
546 oids: Vec::new(),
547 maybe_interesting: false,
548 });
549 if list.kind != kind {
550 return Err(Error::CorruptObject(format!(
551 "path-walk: inconsistent types at path {path:?}"
552 )));
553 }
554 list.maybe_interesting |= interesting;
555 list.oids.push(oid);
556 if enqueue {
557 self.push_heap(&path);
558 }
559 Ok(())
560 }
561
562 fn cone_allows_tree_path(&self, path_with_slash: &str) -> bool {
563 if let Some(lines) = &self.opts.sparse_pattern_lines {
564 return path_in_sparse_checkout_patterns(path_with_slash, lines, true);
565 }
566 if let Some(cone) = &self.opts.cone_patterns {
567 let trimmed = path_with_slash.trim_end_matches('/');
568 if cone.path_included(trimmed) {
569 return true;
570 }
571 return cone.path_included(path_with_slash);
572 }
573 true
574 }
575
576 fn cone_allows_blob_path(&self, path: &str) -> bool {
577 if let Some(lines) = &self.opts.sparse_pattern_lines {
578 return path_in_sparse_checkout_patterns(path, lines, true);
579 }
580 if let Some(cone) = &self.opts.cone_patterns {
581 return cone.path_included(path);
582 }
583 true
584 }
585
586 fn emit_commit_batch(
587 &mut self,
588 oids: &[ObjectId],
589 uninteresting: &HashSet<ObjectId>,
590 boundary: &HashSet<ObjectId>,
591 ) -> Result<()> {
592 let batch = self.batch;
593 self.batch += 1;
594 for &oid in oids {
595 let u = uninteresting.contains(&oid) || boundary.contains(&oid);
596 self.lines.push(PathWalkLine {
597 batch,
598 object_kind: ObjectKind::Commit,
599 path: ROOT_PATH.to_string(),
600 oid,
601 uninteresting: u,
602 });
603 self.counts.commits += 1;
604 }
605 Ok(())
606 }
607
608 fn drain_heap(&mut self, graph: &mut CommitGraph<'_>) -> Result<()> {
609 while let Some(std::cmp::Reverse(item)) = self.heap.pop() {
610 self.walk_path(&item.path, graph)?;
611 }
612 Ok(())
613 }
614
615 fn walk_path(&mut self, path: &str, graph: &mut CommitGraph<'_>) -> Result<()> {
616 let Some(mut list) = self.paths.remove(path) else {
617 return Ok(());
618 };
619 if list.oids.is_empty() {
620 return Ok(());
621 }
622
623 if self.opts.prune_all_uninteresting {
624 if !list.maybe_interesting {
625 return Ok(());
626 }
627 list.maybe_interesting = false;
628 for oid in &list.oids {
629 if self.object_is_interesting_at_path(path, *oid, list.kind) {
630 list.maybe_interesting = true;
631 break;
632 }
633 }
634 if !list.maybe_interesting {
635 return Ok(());
636 }
637 }
638
639 let emit = match list.kind {
640 ObjectKind::Tree => self.opts.include_trees,
641 ObjectKind::Blob => self.opts.include_blobs,
642 ObjectKind::Tag => self.opts.include_tags,
643 _ => false,
644 };
645 let batch = if emit {
646 let b = self.batch;
647 self.batch += 1;
648 b
649 } else {
650 0
651 };
652
653 match list.kind {
654 ObjectKind::Tree if self.opts.include_trees => {
655 for &oid in &list.oids {
656 let u = !self.tree_is_interesting(oid);
657 self.lines.push(PathWalkLine {
658 batch,
659 object_kind: ObjectKind::Tree,
660 path: path.to_string(),
661 oid,
662 uninteresting: u,
663 });
664 self.counts.trees += 1;
665 }
666 }
667 ObjectKind::Blob if self.opts.include_blobs => {
668 for &oid in &list.oids {
669 let u = !self.blob_is_interesting_at_path(path, oid);
670 self.lines.push(PathWalkLine {
671 batch,
672 object_kind: ObjectKind::Blob,
673 path: path.to_string(),
674 oid,
675 uninteresting: u,
676 });
677 self.counts.blobs += 1;
678 }
679 }
680 ObjectKind::Tag if self.opts.include_tags => {
681 for &oid in &list.oids {
682 self.lines.push(PathWalkLine {
683 batch,
684 object_kind: ObjectKind::Tag,
685 path: path.to_string(),
686 oid,
687 uninteresting: false,
688 });
689 self.counts.tags += 1;
690 }
691 }
692 _ => {}
693 }
694
695 if list.kind == ObjectKind::Tree {
696 let tree_oids = list.oids.clone();
697 for tree_oid in tree_oids {
698 self.add_tree_entries(path, tree_oid, graph)?;
699 }
700 }
701
702 Ok(())
703 }
704
705 fn add_tree_entries(
706 &mut self,
707 base_path: &str,
708 tree_oid: ObjectId,
709 _graph: &mut CommitGraph<'_>,
710 ) -> Result<()> {
711 let obj = self.repo.odb.read(&tree_oid)?;
712 if obj.kind != ObjectKind::Tree {
713 return Err(Error::CorruptObject(format!("{tree_oid} is not a tree")));
714 }
715 let parent_uninteresting = !self.tree_is_interesting(tree_oid);
716 let entries = parse_tree(&obj.data)?;
717 for entry in entries {
718 if entry.mode == 0o160000 {
719 continue;
720 }
721 let is_tree = entry.mode == 0o040000;
722 if !self.opts.include_blobs && !is_tree {
723 continue;
724 }
725 let name = String::from_utf8_lossy(&entry.name);
726 let child_oid = entry.oid;
727 if self.seen_object.contains(&child_oid) {
728 continue;
729 }
730 self.seen_object.insert(child_oid);
731 if parent_uninteresting {
732 self.uninteresting_object.insert(child_oid);
733 }
734 if is_tree {
735 let rel = if base_path.is_empty() {
736 format!("{name}/")
737 } else {
738 format!("{base_path}{name}/")
739 };
740 if !self.cone_allows_tree_path(&rel) {
741 continue;
742 }
743 self.add_path(
744 rel,
745 ObjectKind::Tree,
746 child_oid,
747 self.tree_is_interesting(child_oid) || !parent_uninteresting,
748 )?;
749 } else {
750 let rel = if base_path.is_empty() {
751 name.into_owned()
752 } else {
753 format!("{base_path}{}", name.as_ref())
754 };
755 if !self.cone_allows_blob_path(&rel) {
756 continue;
757 }
758 let blob_interesting =
759 self.blob_is_interesting_at_path(&rel, child_oid) || !parent_uninteresting;
760 self.add_path(rel, ObjectKind::Blob, child_oid, blob_interesting)?;
761 }
762 }
763 Ok(())
764 }
765
766 fn object_is_interesting_at_path(&self, path: &str, oid: ObjectId, kind: ObjectKind) -> bool {
767 match kind {
768 ObjectKind::Blob => self.blob_is_interesting_at_path(path, oid),
769 ObjectKind::Tree => self.tree_is_interesting(oid),
770 _ => true,
771 }
772 }
773
774 fn tree_is_interesting(&self, oid: ObjectId) -> bool {
775 !self.uninteresting_object.contains(&oid) && !self.excluded_objects.contains(&oid)
776 }
777
778 fn blob_is_interesting_at_path(&self, path: &str, oid: ObjectId) -> bool {
779 !self.uninteresting_object.contains(&oid)
780 && !self.excluded_blob_paths.contains(&(path.to_string(), oid))
781 }
782}
783
784fn excluded_blob_paths_from_commits(
786 repo: &Repository,
787 commits: &HashSet<ObjectId>,
788) -> Result<HashSet<(String, ObjectId)>> {
789 let mut out = HashSet::new();
790 for &c in commits {
791 let commit = match load_commit_data(repo, c) {
792 Ok(co) => co,
793 Err(_) => continue,
794 };
795 collect_blob_paths(repo, commit.tree, "", &mut out)?;
796 }
797 Ok(out)
798}
799
800fn collect_blob_paths(
801 repo: &Repository,
802 tree_oid: ObjectId,
803 base: &str,
804 into: &mut HashSet<(String, ObjectId)>,
805) -> Result<()> {
806 let obj = repo.odb.read(&tree_oid)?;
807 if obj.kind != ObjectKind::Tree {
808 return Ok(());
809 }
810 let entries = parse_tree(&obj.data)?;
811 for e in entries {
812 if e.mode == 0o160000 {
813 continue;
814 }
815 let name = String::from_utf8_lossy(&e.name);
816 if e.mode == 0o040000 {
817 let next_base = if base.is_empty() {
818 format!("{name}/")
819 } else {
820 format!("{base}{name}/")
821 };
822 collect_blob_paths(repo, e.oid, &next_base, into)?;
823 } else {
824 let path = if base.is_empty() {
825 name.into_owned()
826 } else {
827 format!("{base}{}", name.as_ref())
828 };
829 into.insert((path, e.oid));
830 }
831 }
832 Ok(())
833}
834
835fn tree_blob_closure_from_commits(
836 repo: &Repository,
837 commits: &HashSet<ObjectId>,
838) -> Result<HashSet<ObjectId>> {
839 let mut out = HashSet::new();
840 for &c in commits {
841 let commit = match load_commit_data(repo, c) {
842 Ok(co) => co,
843 Err(_) => continue,
844 };
845 let mut stack = vec![commit.tree];
846 while let Some(t) = stack.pop() {
847 if !out.insert(t) {
848 continue;
849 }
850 let obj = repo.odb.read(&t)?;
851 if obj.kind != ObjectKind::Tree {
852 continue;
853 }
854 let entries = parse_tree(&obj.data)?;
855 for e in entries {
856 if e.mode == 0o160000 {
857 continue;
858 }
859 out.insert(e.oid);
860 if e.mode == 0o040000 {
861 stack.push(e.oid);
862 }
863 }
864 }
865 }
866 Ok(out)
867}
868
869fn load_commit_data(repo: &Repository, oid: ObjectId) -> Result<crate::objects::CommitData> {
870 let object = repo.odb.read(&oid)?;
871 if object.kind != ObjectKind::Commit {
872 return Err(Error::CorruptObject(format!("{oid} is not a commit")));
873 }
874 parse_commit(&object.data)
875}
876
877fn mark_uninteresting_trees(
878 repo: &Repository,
879 graph: &mut CommitGraph<'_>,
880 interesting: &HashSet<ObjectId>,
881 excluded: &HashSet<ObjectId>,
882 exclude_tips: &HashSet<ObjectId>,
883 opts: &PathWalkOptions,
884 ctx: &mut PathWalkContext<'_>,
885) -> Result<()> {
886 if !opts.prune_all_uninteresting && !opts.edge_aggressive {
887 return Ok(());
888 }
889 let mut queue: VecDeque<ObjectId> = interesting.iter().copied().collect();
890 let mut seen_edge_commit = HashSet::new();
891 while let Some(c) = queue.pop_front() {
892 let parents = graph.parents_of(c)?;
893 for p in parents {
894 if interesting.contains(&p) {
895 continue;
896 }
897 if !excluded.contains(&p) {
898 continue;
899 }
900 if !seen_edge_commit.insert(p) {
901 continue;
902 }
903 let commit = load_commit_data(repo, p)?;
904 ctx.uninteresting_object.insert(commit.tree);
905 queue.push_back(p);
906 }
907 }
908 if opts.edge_aggressive {
912 for &c in exclude_tips {
913 if seen_edge_commit.contains(&c) {
914 continue;
915 }
916 let Ok(commit) = load_commit_data(repo, c) else {
917 continue;
918 };
919 ctx.uninteresting_object.insert(commit.tree);
920 }
921 }
922 Ok(())
923}
924
925fn setup_pending_objects(
926 repo: &Repository,
927 roots: &[ObjectWalkRoot],
928 extra_tag_refs: &[ObjectId],
929 opts: &PathWalkOptions,
930 ctx: &mut PathWalkContext<'_>,
931) -> Result<()> {
932 let mut tag_oids: Vec<ObjectId> = Vec::new();
933 let mut tagged_blob_oids: Vec<ObjectId> = Vec::new();
934 let mut tag_seen: HashSet<ObjectId> = HashSet::new();
935
936 for &oid in extra_tag_refs {
937 if ctx.seen_object.contains(&oid) {
938 continue;
939 }
940 let obj = repo.odb.read(&oid)?;
941 match obj.kind {
942 ObjectKind::Tag | ObjectKind::Commit => {
943 if tag_seen.insert(oid) {
944 tag_oids.push(oid);
945 }
946 }
947 ObjectKind::Tree if opts.include_trees => {
948 ctx.seen_object.insert(oid);
949 ctx.append_root_tree(oid)?;
950 }
951 ObjectKind::Blob if opts.include_blobs => {
952 ctx.seen_object.insert(oid);
953 tagged_blob_oids.push(oid);
954 }
955 _ => {}
956 }
957 }
958
959 for root in roots {
960 let mut oid = root.oid;
961 let mut obj = repo.odb.read(&oid)?;
962 while obj.kind == ObjectKind::Tag {
963 if ctx.seen_object.contains(&oid) {
964 break;
965 }
966 ctx.seen_object.insert(oid);
967 if opts.include_tags && tag_seen.insert(oid) {
968 tag_oids.push(oid);
969 }
970 let tag = parse_tag(&obj.data)?;
971 oid = tag.object;
972 obj = repo.odb.read(&oid)?;
973 }
974 if obj.kind == ObjectKind::Tag {
975 continue;
976 }
977 if !ctx.seen_object.insert(oid) {
978 continue;
979 }
980 match obj.kind {
981 ObjectKind::Tree if opts.include_trees => {
982 if let Some(p) = &root.root_path {
983 let trimmed = p.trim_end_matches('/');
984 let path = if trimmed.is_empty() {
985 "/".to_string()
986 } else {
987 format!("{trimmed}/")
988 };
989 ctx.add_path_pending(path, ObjectKind::Tree, oid, true)?;
990 } else {
991 ctx.ensure_root_list();
992 ctx.paths.get_mut(ROOT_PATH).expect("root").oids.push(oid);
993 ctx.push_heap(ROOT_PATH);
994 }
995 }
996 ObjectKind::Blob if opts.include_blobs => {
997 if let Some(p) = &root.root_path {
998 ctx.add_path_pending(p.clone(), ObjectKind::Blob, oid, true)?;
999 } else {
1000 tagged_blob_oids.push(oid);
1001 }
1002 }
1003 ObjectKind::Commit => {}
1004 _ => {}
1005 }
1006 }
1007
1008 if opts.include_blobs && !tagged_blob_oids.is_empty() {
1009 let list = TypeOidList {
1010 kind: ObjectKind::Blob,
1011 oids: tagged_blob_oids,
1012 maybe_interesting: true,
1013 };
1014 ctx.paths.insert(TAGGED_BLOBS_PATH.to_string(), list);
1015 ctx.push_heap(TAGGED_BLOBS_PATH);
1016 }
1017 if opts.include_tags && !tag_oids.is_empty() {
1018 let list = TypeOidList {
1019 kind: ObjectKind::Tag,
1020 oids: tag_oids,
1021 maybe_interesting: true,
1022 };
1023 ctx.paths.insert(TAG_PATH.to_string(), list);
1024 ctx.push_heap(TAG_PATH);
1025 }
1026 Ok(())
1027}
1028
1029pub fn parse_path_walk_cli(
1033 git_dir: &Path,
1034 args: &[String],
1035) -> Result<(PathWalkOptions, Vec<String>, Vec<String>, bool, bool)> {
1036 let mut opts = PathWalkOptions::default();
1037 let mut positive = Vec::new();
1038 let mut negative = Vec::new();
1039 let mut stdin_all = false;
1040 let mut boundary_flag = false;
1041 let mut after_dd = false;
1042 let mut not_mode = false;
1043
1044 let mut i = 0usize;
1045 while i < args.len() {
1046 let a = &args[i];
1047 if !after_dd && a == "--" {
1048 after_dd = true;
1049 i += 1;
1050 continue;
1051 }
1052 if !after_dd {
1053 match a.as_str() {
1054 "--stdin-pl" => {
1055 let mut buf = String::new();
1056 std::io::stdin().read_to_string(&mut buf)?;
1057 let lines: Vec<String> = buf
1058 .lines()
1059 .map(str::trim)
1060 .filter(|l| !l.is_empty() && !l.starts_with('#'))
1061 .map(String::from)
1062 .collect();
1063 if !lines.is_empty() {
1064 opts.sparse_pattern_lines = Some(lines);
1065 }
1066 }
1067 "--prune" => opts.prune_all_uninteresting = true,
1068 "--edge-aggressive" => opts.edge_aggressive = true,
1069 "--no-blobs" => opts.include_blobs = false,
1070 "--no-trees" => opts.include_trees = false,
1071 "--no-commits" => opts.include_commits = false,
1072 "--no-tags" => opts.include_tags = false,
1073 "--blobs" => opts.include_blobs = true,
1074 "--trees" => opts.include_trees = true,
1075 "--commits" => opts.include_commits = true,
1076 "--tags" => opts.include_tags = true,
1077 "--stdin" => {
1078 let (pos, neg, all, _stdin_paths) =
1079 collect_revision_specs_with_stdin(git_dir, &[], true)?;
1080 stdin_all |= all;
1081 positive.extend(pos);
1082 negative.extend(neg);
1083 }
1084 _ => {
1085 return Err(Error::InvalidRef(format!(
1086 "path-walk: unknown option '{a}'"
1087 )));
1088 }
1089 }
1090 } else if a == "--not" {
1091 not_mode = !not_mode;
1092 } else if a == "--boundary" {
1093 boundary_flag = true;
1094 } else if matches!(a.as_str(), "--all" | "--indexed-objects" | "--branches") {
1095 if not_mode {
1096 negative.push(a.clone());
1097 } else {
1098 positive.push(a.clone());
1099 }
1100 } else {
1101 let (p, n) = crate::rev_list::split_revision_token(a);
1102 if not_mode {
1103 negative.extend(p);
1104 positive.extend(n);
1105 } else {
1106 positive.extend(p);
1107 negative.extend(n);
1108 }
1109 }
1110 i += 1;
1111 }
1112
1113 Ok((opts, positive, negative, stdin_all, boundary_flag))
1114}