1use git_filter_tree::FilterTree as _;
2use git2::{Error, ErrorCode, Oid, Repository};
3
4#[derive(Debug, Clone)]
6pub struct MetadataOptions {
7 pub shard_level: u8,
10 pub force: bool,
12}
13
14impl Default for MetadataOptions {
15 fn default() -> Self {
16 Self {
17 shard_level: 1,
18 force: false,
19 }
20 }
21}
22
23#[derive(Debug, Clone, PartialEq, Eq)]
25pub struct MetadataEntry {
26 pub path: String,
28 pub content: Option<Vec<u8>>,
30 pub oid: Oid,
32 pub is_tree: bool,
34}
35
36pub trait MetadataIndex {
42 fn metadata_list(&self, ref_name: &str) -> Result<Vec<(Oid, Oid)>, Error>;
45
46 fn metadata_get(&self, ref_name: &str, target: &Oid) -> Result<Option<Oid>, Error>;
49
50 fn metadata(
55 &self,
56 ref_name: &str,
57 target: &Oid,
58 tree: &Oid,
59 opts: &MetadataOptions,
60 ) -> Result<Oid, Error>;
61
62 fn metadata_commit(&self, ref_name: &str, root: Oid, message: &str) -> Result<Oid, Error>;
66
67 #[deprecated(since = "0.1.0", note = "use `metadata` + `metadata_commit` instead")]
74 fn metadata_set(
75 &self,
76 ref_name: &str,
77 target: &Oid,
78 tree: &Oid,
79 opts: &MetadataOptions,
80 ) -> Result<Oid, Error> {
81 #[allow(deprecated)]
82 let new_root = self.metadata(ref_name, target, tree, opts)?;
83 let msg = format!("metadata: set {} -> {}", target, tree);
84 self.metadata_commit(ref_name, new_root, &msg)?;
85 Ok(new_root)
86 }
87
88 fn metadata_show(&self, ref_name: &str, target: &Oid) -> Result<Vec<MetadataEntry>, Error>;
91
92 fn metadata_add(
99 &self,
100 ref_name: &str,
101 target: &Oid,
102 path: &str,
103 content: Option<&[u8]>,
104 opts: &MetadataOptions,
105 ) -> Result<Oid, Error>;
106
107 fn metadata_remove_paths(
113 &self,
114 ref_name: &str,
115 target: &Oid,
116 patterns: &[&str],
117 keep: bool,
118 ) -> Result<bool, Error>;
119
120 fn metadata_remove(&self, ref_name: &str, target: &Oid) -> Result<bool, Error>;
123
124 fn metadata_copy(
128 &self,
129 ref_name: &str,
130 from: &Oid,
131 to: &Oid,
132 opts: &MetadataOptions,
133 ) -> Result<Oid, Error>;
134
135 fn metadata_prune(&self, ref_name: &str, dry_run: bool) -> Result<Vec<Oid>, Error>;
138
139 fn metadata_get_ref(&self, ref_name: &str) -> String;
141}
142
143fn shard_oid(oid: &Oid, shard_level: u8) -> (Vec<String>, String) {
149 let hex = oid.to_string();
150 let mut segments = Vec::with_capacity(shard_level as usize);
151 let mut pos = 0;
152 for _ in 0..shard_level {
153 segments.push(hex[pos..pos + 2].to_string());
154 pos += 2;
155 }
156 let leaf = hex[pos..].to_string();
157 (segments, leaf)
158}
159
160fn resolve_root_tree<'r>(
162 repo: &'r Repository,
163 ref_name: &str,
164) -> Result<Option<git2::Tree<'r>>, Error> {
165 match repo.find_reference(ref_name) {
166 Ok(reference) => {
167 let commit = reference.peel_to_commit()?;
168 let tree = commit.tree()?;
169 Ok(Some(tree))
170 }
171 Err(e) if e.code() == ErrorCode::NotFound => Ok(None),
172 Err(e) => Err(e),
173 }
174}
175
176fn walk_tree<'a>(
178 repo: &'a Repository,
179 root: &git2::Tree<'a>,
180 segments: &[String],
181) -> Result<Option<git2::Tree<'a>>, Error> {
182 let mut current = root.clone();
183 for seg in segments {
184 let id = match current.get_name(seg) {
185 Some(entry) => entry.id(),
186 None => return Ok(None),
187 };
188 current = repo.find_tree(id)?;
189 }
190 Ok(Some(current))
191}
192
193fn is_fanout_segment(name: &str) -> bool {
195 name.len() == 2 && name.bytes().all(|b| b.is_ascii_hexdigit())
196}
197
198fn collect_entries(
200 repo: &Repository,
201 tree: &git2::Tree<'_>,
202 prefix: &str,
203) -> Result<Vec<(Oid, Oid)>, Error> {
204 let mut results = Vec::new();
205 for entry in tree.iter() {
206 let name = entry.name().unwrap_or("");
207 if entry.kind() != Some(git2::ObjectType::Tree) {
208 continue;
209 }
210 let full = format!("{prefix}{name}");
211 if is_fanout_segment(name) {
212 let subtree = repo.find_tree(entry.id())?;
213 results.extend(collect_entries(repo, &subtree, &full)?);
214 } else if let Ok(oid) = Oid::from_str(&full) {
215 if oid.to_string() == full {
216 results.push((oid, entry.id()));
217 }
218 }
219 }
220 Ok(results)
221}
222
223fn detect_fanout(
225 repo: &Repository,
226 root: &git2::Tree<'_>,
227 target: &Oid,
228) -> Result<Option<(Vec<String>, String, Oid)>, Error> {
229 let hex = target.to_string();
230 let max_depth = hex.len() / 2;
231 for depth in 0..max_depth {
232 let prefix_len = depth * 2;
233 let segments: Vec<String> = (0..depth)
234 .map(|i| hex[i * 2..i * 2 + 2].to_string())
235 .collect();
236 let leaf = &hex[prefix_len..];
237
238 if let Some(subtree) = walk_tree(repo, root, &segments)? {
239 if let Some(entry) = subtree.get_name(leaf) {
240 if entry.kind() == Some(git2::ObjectType::Tree) {
241 return Ok(Some((segments, leaf.to_string(), entry.id())));
242 }
243 }
244 }
245 }
246 Ok(None)
247}
248
249fn build_fanout(
251 repo: &Repository,
252 existing_root: Option<&git2::Tree<'_>>,
253 segments: &[String],
254 leaf: &str,
255 value_tree_oid: &Oid,
256) -> Result<Oid, Error> {
257 let mut existing_subtrees: Vec<Option<git2::Tree<'_>>> = Vec::new();
258 if let Some(root) = existing_root {
259 let mut current = Some(root.clone());
260 existing_subtrees.push(current.clone());
261 for seg in segments {
262 current = match ¤t {
263 Some(t) => match t.get_name(seg) {
264 Some(e) => Some(repo.find_tree(e.id())?),
265 None => None,
266 },
267 None => None,
268 };
269 existing_subtrees.push(current.clone());
270 }
271 } else {
272 for _ in 0..=segments.len() {
273 existing_subtrees.push(None);
274 }
275 }
276
277 let deepest_existing = existing_subtrees.last().and_then(|o| o.as_ref());
278 let mut builder = repo.treebuilder(deepest_existing)?;
279 builder.insert(leaf, *value_tree_oid, 0o040000)?;
280 let mut child_oid = builder.write()?;
281
282 for (i, seg) in segments.iter().enumerate().rev() {
283 let parent_existing = existing_subtrees[i].as_ref();
284 let mut builder = repo.treebuilder(parent_existing)?;
285 builder.insert(seg, child_oid, 0o040000)?;
286 child_oid = builder.write()?;
287 }
288
289 Ok(child_oid)
290}
291
292enum RemoveResult {
294 NotFound,
295 Empty,
296 Removed(Oid),
297}
298
299fn build_fanout_remove(
301 repo: &Repository,
302 root: &git2::Tree<'_>,
303 segments: &[String],
304 leaf: &str,
305) -> Result<RemoveResult, Error> {
306 let mut chain_oids: Vec<Oid> = vec![root.id()];
307 {
308 let mut current = root.clone();
309 for seg in segments {
310 let id = match current.get_name(seg) {
311 Some(e) => e.id(),
312 None => return Ok(RemoveResult::NotFound),
313 };
314 chain_oids.push(id);
315 current = repo.find_tree(id)?;
316 }
317 }
318
319 let deepest = repo.find_tree(*chain_oids.last().unwrap())?;
320 let mut builder = repo.treebuilder(Some(&deepest))?;
321 if builder.get(leaf)?.is_none() {
322 return Ok(RemoveResult::NotFound);
323 }
324 builder.remove(leaf)?;
325
326 let mut child_oid = if builder.len() == 0 {
327 None
328 } else {
329 Some(builder.write()?)
330 };
331
332 for (i, seg) in segments.iter().enumerate().rev() {
333 let parent = repo.find_tree(chain_oids[i])?;
334 let mut builder = repo.treebuilder(Some(&parent))?;
335 match child_oid {
336 Some(oid) => {
337 builder.insert(seg, oid, 0o040000)?;
338 }
339 None => {
340 builder.remove(seg)?;
341 }
342 }
343 child_oid = if builder.len() == 0 {
344 None
345 } else {
346 Some(builder.write()?)
347 };
348 }
349
350 match child_oid {
351 Some(oid) => Ok(RemoveResult::Removed(oid)),
352 None => Ok(RemoveResult::Empty),
353 }
354}
355
356fn commit_index(
358 repo: &Repository,
359 ref_name: &str,
360 tree_oid: Oid,
361 message: &str,
362) -> Result<Oid, Error> {
363 let tree = repo.find_tree(tree_oid)?;
364 let sig = repo.signature()?;
365
366 let parent = match repo.find_reference(ref_name) {
367 Ok(r) => Some(r.peel_to_commit()?),
368 Err(e) if e.code() == ErrorCode::NotFound => None,
369 Err(e) => return Err(e),
370 };
371
372 let parents: Vec<&git2::Commit<'_>> = parent.iter().collect();
373 let commit_oid = repo.commit(Some(ref_name), &sig, &sig, message, &tree, &parents)?;
374 Ok(commit_oid)
375}
376
377fn collect_tree_entries(
379 repo: &Repository,
380 tree: &git2::Tree<'_>,
381 prefix: &str,
382) -> Result<Vec<MetadataEntry>, Error> {
383 let mut results = Vec::new();
384 for entry in tree.iter() {
385 let name = entry.name().unwrap_or("");
386 let path = if prefix.is_empty() {
387 name.to_string()
388 } else {
389 format!("{prefix}/{name}")
390 };
391 match entry.kind() {
392 Some(git2::ObjectType::Tree) => {
393 let subtree = repo.find_tree(entry.id())?;
394 results.extend(collect_tree_entries(repo, &subtree, &path)?);
395 }
396 Some(git2::ObjectType::Blob) => {
397 let blob = repo.find_blob(entry.id())?;
398 results.push(MetadataEntry {
399 path,
400 content: Some(blob.content().to_vec()),
401 oid: entry.id(),
402 is_tree: false,
403 });
404 }
405 _ => {}
406 }
407 }
408 Ok(results)
409}
410
411fn insert_path_into_tree(
414 repo: &Repository,
415 existing: Option<&git2::Tree<'_>>,
416 path: &str,
417 blob_oid: Oid,
418) -> Result<Oid, Error> {
419 let components: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect();
420 if components.is_empty() {
421 return Err(Error::from_str("empty path"));
422 }
423 insert_path_recursive(repo, existing, &components, blob_oid)
424}
425
426fn insert_path_recursive(
427 repo: &Repository,
428 existing: Option<&git2::Tree<'_>>,
429 components: &[&str],
430 blob_oid: Oid,
431) -> Result<Oid, Error> {
432 assert!(!components.is_empty());
433
434 let name = components[0];
435
436 if components.len() == 1 {
437 let mut builder = repo.treebuilder(existing)?;
439 builder.insert(name, blob_oid, 0o100644)?;
440 return builder.write();
441 }
442
443 let sub_existing = match existing {
445 Some(tree) => match tree.get_name(name) {
446 Some(entry) if entry.kind() == Some(git2::ObjectType::Tree) => {
447 Some(repo.find_tree(entry.id())?)
448 }
449 _ => None,
450 },
451 None => None,
452 };
453
454 let child_oid = insert_path_recursive(repo, sub_existing.as_ref(), &components[1..], blob_oid)?;
455
456 let mut builder = repo.treebuilder(existing)?;
457 builder.insert(name, child_oid, 0o040000)?;
458 builder.write()
459}
460
461fn path_exists_in_tree(repo: &Repository, tree: &git2::Tree<'_>, path: &str) -> bool {
463 let components: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect();
464 if components.is_empty() {
465 return false;
466 }
467 path_exists_recursive(repo, tree, &components)
468}
469
470fn path_exists_recursive(repo: &Repository, tree: &git2::Tree<'_>, components: &[&str]) -> bool {
471 if components.is_empty() {
472 return false;
473 }
474 match tree.get_name(components[0]) {
475 None => false,
476 Some(entry) => {
477 if components.len() == 1 {
478 true
479 } else if entry.kind() == Some(git2::ObjectType::Tree) {
480 match repo.find_tree(entry.id()) {
481 Ok(subtree) => path_exists_recursive(repo, &subtree, &components[1..]),
482 Err(_) => false,
483 }
484 } else {
485 false
486 }
487 }
488 }
489}
490
491fn glob_matches(pattern: &str, path: &str) -> bool {
495 let pat_parts: Vec<&str> = pattern.split('/').filter(|s| !s.is_empty()).collect();
496 let path_parts: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect();
497
498 if pattern == path {
500 return true;
501 }
502
503 if !pat_parts.is_empty()
505 && !pat_parts.iter().any(|p| *p == "*" || *p == "**")
506 && path_parts.starts_with(&pat_parts)
507 {
508 return true;
509 }
510
511 glob_match_recursive(&pat_parts, &path_parts)
512}
513
514fn glob_match_recursive(pattern: &[&str], path: &[&str]) -> bool {
515 if pattern.is_empty() {
516 return path.is_empty();
517 }
518
519 if pattern[0] == "**" {
520 let rest_pat = &pattern[1..];
522 for i in 0..=path.len() {
523 if glob_match_recursive(rest_pat, &path[i..]) {
524 return true;
525 }
526 }
527 return false;
528 }
529
530 if path.is_empty() {
531 return false;
532 }
533
534 let matches_component = pattern[0] == "*" || pattern[0] == path[0];
535 if matches_component {
536 glob_match_recursive(&pattern[1..], &path[1..])
537 } else {
538 false
539 }
540}
541
542impl MetadataIndex for Repository {
547 fn metadata_list(&self, ref_name: &str) -> Result<Vec<(Oid, Oid)>, Error> {
548 let root = match resolve_root_tree(self, ref_name)? {
549 Some(t) => t,
550 None => return Ok(Vec::new()),
551 };
552 collect_entries(self, &root, "")
553 }
554
555 fn metadata_get(&self, ref_name: &str, target: &Oid) -> Result<Option<Oid>, Error> {
556 let root = match resolve_root_tree(self, ref_name)? {
557 Some(t) => t,
558 None => return Ok(None),
559 };
560 Ok(detect_fanout(self, &root, target)?.map(|(_, _, oid)| oid))
561 }
562
563 fn metadata(
564 &self,
565 ref_name: &str,
566 target: &Oid,
567 tree: &Oid,
568 opts: &MetadataOptions,
569 ) -> Result<Oid, Error> {
570 self.find_tree(*tree)?;
571
572 let (segments, leaf) = shard_oid(target, opts.shard_level);
573 let existing_root = resolve_root_tree(self, ref_name)?;
574
575 if !opts.force {
576 if let Some(ref root) = existing_root {
577 if detect_fanout(self, root, target)?.is_some() {
578 return Err(Error::from_str(
579 "metadata entry already exists (use force to overwrite)",
580 ));
581 }
582 }
583 }
584
585 build_fanout(self, existing_root.as_ref(), &segments, &leaf, tree)
586 }
587
588 fn metadata_commit(&self, ref_name: &str, root: Oid, message: &str) -> Result<Oid, Error> {
589 commit_index(self, ref_name, root, message)
590 }
591
592 fn metadata_show(&self, ref_name: &str, target: &Oid) -> Result<Vec<MetadataEntry>, Error> {
593 let root = match resolve_root_tree(self, ref_name)? {
594 Some(t) => t,
595 None => return Ok(Vec::new()),
596 };
597
598 let tree_oid = match detect_fanout(self, &root, target)? {
599 Some((_, _, oid)) => oid,
600 None => return Ok(Vec::new()),
601 };
602
603 let tree = self.find_tree(tree_oid)?;
604 collect_tree_entries(self, &tree, "")
605 }
606
607 fn metadata_add(
608 &self,
609 ref_name: &str,
610 target: &Oid,
611 path: &str,
612 content: Option<&[u8]>,
613 opts: &MetadataOptions,
614 ) -> Result<Oid, Error> {
615 let blob_oid = self.blob(content.unwrap_or(b""))?;
616
617 let existing_root = resolve_root_tree(self, ref_name)?;
618
619 let existing_meta_tree = match &existing_root {
621 Some(root) => match detect_fanout(self, root, target)? {
622 Some((_, _, oid)) => Some(self.find_tree(oid)?),
623 None => None,
624 },
625 None => None,
626 };
627
628 if !opts.force {
630 if let Some(ref meta_tree) = existing_meta_tree {
631 if path_exists_in_tree(self, meta_tree, path) {
632 return Err(Error::from_str(
633 "path already exists in metadata (use --force to overwrite)",
634 ));
635 }
636 }
637 }
638
639 let new_meta_tree_oid =
641 insert_path_into_tree(self, existing_meta_tree.as_ref(), path, blob_oid)?;
642
643 let (segments, leaf) = if existing_meta_tree.is_some() {
645 match &existing_root {
647 Some(root) => match detect_fanout(self, root, target)? {
648 Some((s, l, _)) => (s, l),
649 None => shard_oid(target, opts.shard_level),
650 },
651 None => shard_oid(target, opts.shard_level),
652 }
653 } else {
654 shard_oid(target, opts.shard_level)
655 };
656
657 let new_root = build_fanout(
658 self,
659 existing_root.as_ref(),
660 &segments,
661 &leaf,
662 &new_meta_tree_oid,
663 )?;
664
665 let msg = format!("metadata: add {} to {}", path, target);
666 commit_index(self, ref_name, new_root, &msg)?;
667
668 Ok(new_meta_tree_oid)
669 }
670
671 fn metadata_remove_paths(
672 &self,
673 ref_name: &str,
674 target: &Oid,
675 patterns: &[&str],
676 keep: bool,
677 ) -> Result<bool, Error> {
678 let root = match resolve_root_tree(self, ref_name)? {
679 Some(t) => t,
680 None => return Ok(false),
681 };
682
683 let (segments, leaf, meta_oid) = match detect_fanout(self, &root, target)? {
684 Some(t) => t,
685 None => return Ok(false),
686 };
687
688 let meta_tree = self.find_tree(meta_oid)?;
689 let patterns_owned: Vec<String> = patterns.iter().map(|s| s.to_string()).collect();
690 let new_meta_tree = self.filter_by_predicate(&meta_tree, |_repo, path| {
691 let path_str = path.to_str().unwrap_or("");
692 let matched = patterns_owned.iter().any(|p| glob_matches(p, path_str));
693 if keep { matched } else { !matched }
694 })?;
695
696 if new_meta_tree.is_empty() {
697 match build_fanout_remove(self, &root, &segments, &leaf)? {
699 RemoveResult::NotFound => Ok(false),
700 RemoveResult::Empty => {
701 let mut reference = self.find_reference(ref_name)?;
702 reference.delete()?;
703 Ok(true)
704 }
705 RemoveResult::Removed(new_root) => {
706 let msg = format!("metadata: remove paths from {}", target);
707 commit_index(self, ref_name, new_root, &msg)?;
708 Ok(true)
709 }
710 }
711 } else if new_meta_tree.id() == meta_oid {
712 Ok(false)
713 } else {
714 let new_root = build_fanout(self, Some(&root), &segments, &leaf, &new_meta_tree.id())?;
715 let msg = format!("metadata: remove paths from {}", target);
716 commit_index(self, ref_name, new_root, &msg)?;
717 Ok(true)
718 }
719 }
720
721 fn metadata_remove(&self, ref_name: &str, target: &Oid) -> Result<bool, Error> {
722 let root = match resolve_root_tree(self, ref_name)? {
723 Some(t) => t,
724 None => return Ok(false),
725 };
726
727 let (segments, leaf) = match detect_fanout(self, &root, target)? {
728 Some((segments, leaf, _)) => (segments, leaf),
729 None => return Ok(false),
730 };
731
732 match build_fanout_remove(self, &root, &segments, &leaf)? {
733 RemoveResult::NotFound => Ok(false),
734 RemoveResult::Empty => {
735 let mut reference = self.find_reference(ref_name)?;
736 reference.delete()?;
737 Ok(true)
738 }
739 RemoveResult::Removed(new_root) => {
740 let msg = format!("metadata: remove {}", target);
741 commit_index(self, ref_name, new_root, &msg)?;
742 Ok(true)
743 }
744 }
745 }
746
747 fn metadata_copy(
748 &self,
749 ref_name: &str,
750 from: &Oid,
751 to: &Oid,
752 opts: &MetadataOptions,
753 ) -> Result<Oid, Error> {
754 let root = match resolve_root_tree(self, ref_name)? {
755 Some(t) => t,
756 None => {
757 return Err(Error::from_str(&format!(
758 "no metadata entry for source {}",
759 from
760 )));
761 }
762 };
763
764 let source_tree_oid = match detect_fanout(self, &root, from)? {
765 Some((_, _, oid)) => oid,
766 None => {
767 return Err(Error::from_str(&format!(
768 "no metadata entry for source {}",
769 from
770 )));
771 }
772 };
773
774 if !opts.force {
775 if detect_fanout(self, &root, to)?.is_some() {
776 return Err(Error::from_str(
777 "metadata entry already exists for target (use --force to overwrite)",
778 ));
779 }
780 }
781
782 let (segments, leaf) = shard_oid(to, opts.shard_level);
783 let new_root = build_fanout(self, Some(&root), &segments, &leaf, &source_tree_oid)?;
784
785 let msg = format!("metadata: copy {} -> {}", from, to);
786 commit_index(self, ref_name, new_root, &msg)?;
787
788 Ok(source_tree_oid)
789 }
790
791 fn metadata_prune(&self, ref_name: &str, dry_run: bool) -> Result<Vec<Oid>, Error> {
792 let entries = self.metadata_list(ref_name)?;
793 let mut pruned = Vec::new();
794 let odb = self.odb()?;
795
796 for (target, _) in &entries {
797 if !odb.exists(*target) {
798 pruned.push(*target);
799 }
800 }
801
802 if !dry_run {
803 for target in &pruned {
804 self.metadata_remove(ref_name, target)?;
805 }
806 }
807
808 Ok(pruned)
809 }
810
811 fn metadata_get_ref(&self, ref_name: &str) -> String {
812 ref_name.to_string()
813 }
814}
815
816#[cfg(test)]
817mod tests;