1use git2::{Error, ErrorCode, Oid, Repository};
2
3#[derive(Debug, Clone)]
5pub struct MetadataOptions {
6 pub shard_level: u8,
9 pub force: bool,
11}
12
13impl Default for MetadataOptions {
14 fn default() -> Self {
15 Self {
16 shard_level: 1,
17 force: false,
18 }
19 }
20}
21
22#[derive(Debug, Clone, PartialEq, Eq)]
24pub struct MetadataEntry {
25 pub path: String,
27 pub content: Option<Vec<u8>>,
29 pub oid: Oid,
31 pub is_tree: bool,
33}
34
35pub trait MetadataIndex {
41 fn metadata_list(&self, ref_name: &str) -> Result<Vec<(Oid, Oid)>, Error>;
44
45 fn metadata_get(&self, ref_name: &str, target: &Oid) -> Result<Option<Oid>, Error>;
48
49 fn metadata_set(
52 &self,
53 ref_name: &str,
54 target: &Oid,
55 tree: &Oid,
56 opts: &MetadataOptions,
57 ) -> Result<Oid, Error>;
58
59 fn metadata_show(&self, ref_name: &str, target: &Oid) -> Result<Vec<MetadataEntry>, Error>;
62
63 fn metadata_add(
70 &self,
71 ref_name: &str,
72 target: &Oid,
73 path: &str,
74 content: Option<&[u8]>,
75 opts: &MetadataOptions,
76 ) -> Result<Oid, Error>;
77
78 fn metadata_remove_paths(
84 &self,
85 ref_name: &str,
86 target: &Oid,
87 patterns: &[&str],
88 keep: bool,
89 ) -> Result<bool, Error>;
90
91 fn metadata_remove(&self, ref_name: &str, target: &Oid) -> Result<bool, Error>;
94
95 fn metadata_copy(
99 &self,
100 ref_name: &str,
101 from: &Oid,
102 to: &Oid,
103 opts: &MetadataOptions,
104 ) -> Result<Oid, Error>;
105
106 fn metadata_prune(&self, ref_name: &str, dry_run: bool) -> Result<Vec<Oid>, Error>;
109
110 fn metadata_get_ref(&self, ref_name: &str) -> String;
112}
113
114fn shard_oid(oid: &Oid, shard_level: u8) -> (Vec<String>, String) {
120 let hex = oid.to_string();
121 let mut segments = Vec::with_capacity(shard_level as usize);
122 let mut pos = 0;
123 for _ in 0..shard_level {
124 segments.push(hex[pos..pos + 2].to_string());
125 pos += 2;
126 }
127 let leaf = hex[pos..].to_string();
128 (segments, leaf)
129}
130
131fn resolve_root_tree<'r>(
133 repo: &'r Repository,
134 ref_name: &str,
135) -> Result<Option<git2::Tree<'r>>, Error> {
136 match repo.find_reference(ref_name) {
137 Ok(reference) => {
138 let commit = reference.peel_to_commit()?;
139 let tree = commit.tree()?;
140 Ok(Some(tree))
141 }
142 Err(e) if e.code() == ErrorCode::NotFound => Ok(None),
143 Err(e) => Err(e),
144 }
145}
146
147fn walk_tree<'a>(
149 repo: &'a Repository,
150 root: &git2::Tree<'a>,
151 segments: &[String],
152) -> Result<Option<git2::Tree<'a>>, Error> {
153 let mut current = root.clone();
154 for seg in segments {
155 let id = match current.get_name(seg) {
156 Some(entry) => entry.id(),
157 None => return Ok(None),
158 };
159 current = repo.find_tree(id)?;
160 }
161 Ok(Some(current))
162}
163
164fn is_fanout_segment(name: &str) -> bool {
166 name.len() == 2 && name.bytes().all(|b| b.is_ascii_hexdigit())
167}
168
169fn collect_entries(
171 repo: &Repository,
172 tree: &git2::Tree<'_>,
173 prefix: &str,
174) -> Result<Vec<(Oid, Oid)>, Error> {
175 let mut results = Vec::new();
176 for entry in tree.iter() {
177 let name = entry.name().unwrap_or("");
178 if entry.kind() != Some(git2::ObjectType::Tree) {
179 continue;
180 }
181 let full = format!("{prefix}{name}");
182 if is_fanout_segment(name) {
183 let subtree = repo.find_tree(entry.id())?;
184 results.extend(collect_entries(repo, &subtree, &full)?);
185 } else if let Ok(oid) = Oid::from_str(&full) {
186 if oid.to_string() == full {
187 results.push((oid, entry.id()));
188 }
189 }
190 }
191 Ok(results)
192}
193
194fn detect_fanout(
196 repo: &Repository,
197 root: &git2::Tree<'_>,
198 target: &Oid,
199) -> Result<Option<(Vec<String>, String, Oid)>, Error> {
200 let hex = target.to_string();
201 let max_depth = hex.len() / 2;
202 for depth in 0..max_depth {
203 let prefix_len = depth * 2;
204 let segments: Vec<String> = (0..depth)
205 .map(|i| hex[i * 2..i * 2 + 2].to_string())
206 .collect();
207 let leaf = &hex[prefix_len..];
208
209 if let Some(subtree) = walk_tree(repo, root, &segments)? {
210 if let Some(entry) = subtree.get_name(leaf) {
211 if entry.kind() == Some(git2::ObjectType::Tree) {
212 return Ok(Some((segments, leaf.to_string(), entry.id())));
213 }
214 }
215 }
216 }
217 Ok(None)
218}
219
220fn build_fanout(
222 repo: &Repository,
223 existing_root: Option<&git2::Tree<'_>>,
224 segments: &[String],
225 leaf: &str,
226 value_tree_oid: &Oid,
227) -> Result<Oid, Error> {
228 let mut existing_subtrees: Vec<Option<git2::Tree<'_>>> = Vec::new();
229 if let Some(root) = existing_root {
230 let mut current = Some(root.clone());
231 existing_subtrees.push(current.clone());
232 for seg in segments {
233 current = match ¤t {
234 Some(t) => match t.get_name(seg) {
235 Some(e) => Some(repo.find_tree(e.id())?),
236 None => None,
237 },
238 None => None,
239 };
240 existing_subtrees.push(current.clone());
241 }
242 } else {
243 for _ in 0..=segments.len() {
244 existing_subtrees.push(None);
245 }
246 }
247
248 let deepest_existing = existing_subtrees.last().and_then(|o| o.as_ref());
249 let mut builder = repo.treebuilder(deepest_existing)?;
250 builder.insert(leaf, *value_tree_oid, 0o040000)?;
251 let mut child_oid = builder.write()?;
252
253 for (i, seg) in segments.iter().enumerate().rev() {
254 let parent_existing = existing_subtrees[i].as_ref();
255 let mut builder = repo.treebuilder(parent_existing)?;
256 builder.insert(seg, child_oid, 0o040000)?;
257 child_oid = builder.write()?;
258 }
259
260 Ok(child_oid)
261}
262
263enum RemoveResult {
265 NotFound,
266 Empty,
267 Removed(Oid),
268}
269
270fn build_fanout_remove(
272 repo: &Repository,
273 root: &git2::Tree<'_>,
274 segments: &[String],
275 leaf: &str,
276) -> Result<RemoveResult, Error> {
277 let mut chain_oids: Vec<Oid> = vec![root.id()];
278 {
279 let mut current = root.clone();
280 for seg in segments {
281 let id = match current.get_name(seg) {
282 Some(e) => e.id(),
283 None => return Ok(RemoveResult::NotFound),
284 };
285 chain_oids.push(id);
286 current = repo.find_tree(id)?;
287 }
288 }
289
290 let deepest = repo.find_tree(*chain_oids.last().unwrap())?;
291 let mut builder = repo.treebuilder(Some(&deepest))?;
292 if builder.get(leaf)?.is_none() {
293 return Ok(RemoveResult::NotFound);
294 }
295 builder.remove(leaf)?;
296
297 let mut child_oid = if builder.len() == 0 {
298 None
299 } else {
300 Some(builder.write()?)
301 };
302
303 for (i, seg) in segments.iter().enumerate().rev() {
304 let parent = repo.find_tree(chain_oids[i])?;
305 let mut builder = repo.treebuilder(Some(&parent))?;
306 match child_oid {
307 Some(oid) => {
308 builder.insert(seg, oid, 0o040000)?;
309 }
310 None => {
311 builder.remove(seg)?;
312 }
313 }
314 child_oid = if builder.len() == 0 {
315 None
316 } else {
317 Some(builder.write()?)
318 };
319 }
320
321 match child_oid {
322 Some(oid) => Ok(RemoveResult::Removed(oid)),
323 None => Ok(RemoveResult::Empty),
324 }
325}
326
327fn commit_index(
329 repo: &Repository,
330 ref_name: &str,
331 tree_oid: Oid,
332 message: &str,
333) -> Result<Oid, Error> {
334 let tree = repo.find_tree(tree_oid)?;
335 let sig = repo.signature()?;
336
337 let parent = match repo.find_reference(ref_name) {
338 Ok(r) => Some(r.peel_to_commit()?),
339 Err(e) if e.code() == ErrorCode::NotFound => None,
340 Err(e) => return Err(e),
341 };
342
343 let parents: Vec<&git2::Commit<'_>> = parent.iter().collect();
344 let commit_oid = repo.commit(Some(ref_name), &sig, &sig, message, &tree, &parents)?;
345 Ok(commit_oid)
346}
347
348fn collect_tree_entries(
350 repo: &Repository,
351 tree: &git2::Tree<'_>,
352 prefix: &str,
353) -> Result<Vec<MetadataEntry>, Error> {
354 let mut results = Vec::new();
355 for entry in tree.iter() {
356 let name = entry.name().unwrap_or("");
357 let path = if prefix.is_empty() {
358 name.to_string()
359 } else {
360 format!("{prefix}/{name}")
361 };
362 match entry.kind() {
363 Some(git2::ObjectType::Tree) => {
364 let subtree = repo.find_tree(entry.id())?;
365 results.extend(collect_tree_entries(repo, &subtree, &path)?);
366 }
367 Some(git2::ObjectType::Blob) => {
368 let blob = repo.find_blob(entry.id())?;
369 results.push(MetadataEntry {
370 path,
371 content: Some(blob.content().to_vec()),
372 oid: entry.id(),
373 is_tree: false,
374 });
375 }
376 _ => {}
377 }
378 }
379 Ok(results)
380}
381
382fn insert_path_into_tree(
385 repo: &Repository,
386 existing: Option<&git2::Tree<'_>>,
387 path: &str,
388 blob_oid: Oid,
389) -> Result<Oid, Error> {
390 let components: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect();
391 if components.is_empty() {
392 return Err(Error::from_str("empty path"));
393 }
394 insert_path_recursive(repo, existing, &components, blob_oid)
395}
396
397fn insert_path_recursive(
398 repo: &Repository,
399 existing: Option<&git2::Tree<'_>>,
400 components: &[&str],
401 blob_oid: Oid,
402) -> Result<Oid, Error> {
403 assert!(!components.is_empty());
404
405 let name = components[0];
406
407 if components.len() == 1 {
408 let mut builder = repo.treebuilder(existing)?;
410 builder.insert(name, blob_oid, 0o100644)?;
411 return builder.write();
412 }
413
414 let sub_existing = match existing {
416 Some(tree) => match tree.get_name(name) {
417 Some(entry) if entry.kind() == Some(git2::ObjectType::Tree) => {
418 Some(repo.find_tree(entry.id())?)
419 }
420 _ => None,
421 },
422 None => None,
423 };
424
425 let child_oid = insert_path_recursive(repo, sub_existing.as_ref(), &components[1..], blob_oid)?;
426
427 let mut builder = repo.treebuilder(existing)?;
428 builder.insert(name, child_oid, 0o040000)?;
429 builder.write()
430}
431
432fn path_exists_in_tree(repo: &Repository, tree: &git2::Tree<'_>, path: &str) -> bool {
434 let components: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect();
435 if components.is_empty() {
436 return false;
437 }
438 path_exists_recursive(repo, tree, &components)
439}
440
441fn path_exists_recursive(repo: &Repository, tree: &git2::Tree<'_>, components: &[&str]) -> bool {
442 if components.is_empty() {
443 return false;
444 }
445 match tree.get_name(components[0]) {
446 None => false,
447 Some(entry) => {
448 if components.len() == 1 {
449 true
450 } else if entry.kind() == Some(git2::ObjectType::Tree) {
451 match repo.find_tree(entry.id()) {
452 Ok(subtree) => path_exists_recursive(repo, &subtree, &components[1..]),
453 Err(_) => false,
454 }
455 } else {
456 false
457 }
458 }
459 }
460}
461
462fn glob_matches(pattern: &str, path: &str) -> bool {
466 let pat_parts: Vec<&str> = pattern.split('/').filter(|s| !s.is_empty()).collect();
467 let path_parts: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect();
468
469 if pattern == path {
471 return true;
472 }
473
474 if !pat_parts.is_empty()
476 && !pat_parts.iter().any(|p| *p == "*" || *p == "**")
477 && path_parts.starts_with(&pat_parts)
478 {
479 return true;
480 }
481
482 glob_match_recursive(&pat_parts, &path_parts)
483}
484
485fn glob_match_recursive(pattern: &[&str], path: &[&str]) -> bool {
486 if pattern.is_empty() {
487 return path.is_empty();
488 }
489
490 if pattern[0] == "**" {
491 let rest_pat = &pattern[1..];
493 for i in 0..=path.len() {
494 if glob_match_recursive(rest_pat, &path[i..]) {
495 return true;
496 }
497 }
498 return false;
499 }
500
501 if path.is_empty() {
502 return false;
503 }
504
505 let matches_component = pattern[0] == "*" || pattern[0] == path[0];
506 if matches_component {
507 glob_match_recursive(&pattern[1..], &path[1..])
508 } else {
509 false
510 }
511}
512
513fn remove_matching_from_tree(
515 repo: &Repository,
516 tree: &git2::Tree<'_>,
517 patterns: &[&str],
518 keep: bool,
519 prefix: &str,
520) -> Result<Option<Oid>, Error> {
521 let mut builder = repo.treebuilder(None)?;
522 let mut any_change = false;
523
524 for entry in tree.iter() {
525 let name = entry.name().unwrap_or("");
526 let full_path = if prefix.is_empty() {
527 name.to_string()
528 } else {
529 format!("{prefix}/{name}")
530 };
531
532 let matched = patterns.iter().any(|p| glob_matches(p, &full_path));
533
534 if entry.kind() == Some(git2::ObjectType::Tree) {
535 if !keep && matched {
537 any_change = true;
538 continue;
539 }
540
541 let subtree = repo.find_tree(entry.id())?;
542 let child_result =
543 remove_matching_from_tree(repo, &subtree, patterns, keep, &full_path)?;
544
545 if let Some(new_oid) = child_result {
546 if new_oid != entry.id() {
547 any_change = true;
548 }
549 builder.insert(name, new_oid, 0o040000)?;
550 } else {
551 any_change = true;
553 }
554 } else {
555 let should_remove = if keep { !matched } else { matched };
556
557 if should_remove {
558 any_change = true;
559 } else {
560 let filemode = entry.filemode_raw();
561 builder.insert(name, entry.id(), filemode)?;
562 }
563 }
564 }
565
566 if builder.len() == 0 {
567 if any_change {
568 Ok(None)
569 } else {
570 Ok(Some(tree.id()))
572 }
573 } else if any_change {
574 Ok(Some(builder.write()?))
575 } else {
576 Ok(Some(tree.id()))
577 }
578}
579
580impl MetadataIndex for Repository {
585 fn metadata_list(&self, ref_name: &str) -> Result<Vec<(Oid, Oid)>, Error> {
586 let root = match resolve_root_tree(self, ref_name)? {
587 Some(t) => t,
588 None => return Ok(Vec::new()),
589 };
590 collect_entries(self, &root, "")
591 }
592
593 fn metadata_get(&self, ref_name: &str, target: &Oid) -> Result<Option<Oid>, Error> {
594 let root = match resolve_root_tree(self, ref_name)? {
595 Some(t) => t,
596 None => return Ok(None),
597 };
598 Ok(detect_fanout(self, &root, target)?.map(|(_, _, oid)| oid))
599 }
600
601 fn metadata_set(
602 &self,
603 ref_name: &str,
604 target: &Oid,
605 tree: &Oid,
606 opts: &MetadataOptions,
607 ) -> Result<Oid, Error> {
608 self.find_tree(*tree)?;
609
610 let (segments, leaf) = shard_oid(target, opts.shard_level);
611 let existing_root = resolve_root_tree(self, ref_name)?;
612
613 if !opts.force {
614 if let Some(ref root) = existing_root {
615 if detect_fanout(self, root, target)?.is_some() {
616 return Err(Error::from_str(
617 "metadata entry already exists (use force to overwrite)",
618 ));
619 }
620 }
621 }
622
623 let new_root = build_fanout(self, existing_root.as_ref(), &segments, &leaf, tree)?;
624
625 let msg = format!("metadata: set {} -> {}", target, tree);
626 commit_index(self, ref_name, new_root, &msg)?;
627
628 Ok(new_root)
629 }
630
631 fn metadata_show(&self, ref_name: &str, target: &Oid) -> Result<Vec<MetadataEntry>, Error> {
632 let root = match resolve_root_tree(self, ref_name)? {
633 Some(t) => t,
634 None => return Ok(Vec::new()),
635 };
636
637 let tree_oid = match detect_fanout(self, &root, target)? {
638 Some((_, _, oid)) => oid,
639 None => return Ok(Vec::new()),
640 };
641
642 let tree = self.find_tree(tree_oid)?;
643 collect_tree_entries(self, &tree, "")
644 }
645
646 fn metadata_add(
647 &self,
648 ref_name: &str,
649 target: &Oid,
650 path: &str,
651 content: Option<&[u8]>,
652 opts: &MetadataOptions,
653 ) -> Result<Oid, Error> {
654 let blob_oid = self.blob(content.unwrap_or(b""))?;
655
656 let existing_root = resolve_root_tree(self, ref_name)?;
657
658 let existing_meta_tree = match &existing_root {
660 Some(root) => match detect_fanout(self, root, target)? {
661 Some((_, _, oid)) => Some(self.find_tree(oid)?),
662 None => None,
663 },
664 None => None,
665 };
666
667 if !opts.force {
669 if let Some(ref meta_tree) = existing_meta_tree {
670 if path_exists_in_tree(self, meta_tree, path) {
671 return Err(Error::from_str(
672 "path already exists in metadata (use --force to overwrite)",
673 ));
674 }
675 }
676 }
677
678 let new_meta_tree_oid =
680 insert_path_into_tree(self, existing_meta_tree.as_ref(), path, blob_oid)?;
681
682 let (segments, leaf) = if existing_meta_tree.is_some() {
684 match &existing_root {
686 Some(root) => match detect_fanout(self, root, target)? {
687 Some((s, l, _)) => (s, l),
688 None => shard_oid(target, opts.shard_level),
689 },
690 None => shard_oid(target, opts.shard_level),
691 }
692 } else {
693 shard_oid(target, opts.shard_level)
694 };
695
696 let new_root = build_fanout(
697 self,
698 existing_root.as_ref(),
699 &segments,
700 &leaf,
701 &new_meta_tree_oid,
702 )?;
703
704 let msg = format!("metadata: add {} to {}", path, target);
705 commit_index(self, ref_name, new_root, &msg)?;
706
707 Ok(new_meta_tree_oid)
708 }
709
710 fn metadata_remove_paths(
711 &self,
712 ref_name: &str,
713 target: &Oid,
714 patterns: &[&str],
715 keep: bool,
716 ) -> Result<bool, Error> {
717 let root = match resolve_root_tree(self, ref_name)? {
718 Some(t) => t,
719 None => return Ok(false),
720 };
721
722 let (segments, leaf, meta_oid) = match detect_fanout(self, &root, target)? {
723 Some(t) => t,
724 None => return Ok(false),
725 };
726
727 let meta_tree = self.find_tree(meta_oid)?;
728 let new_meta = remove_matching_from_tree(self, &meta_tree, patterns, keep, "")?;
729
730 match new_meta {
731 None => {
732 match build_fanout_remove(self, &root, &segments, &leaf)? {
734 RemoveResult::NotFound => Ok(false),
735 RemoveResult::Empty => {
736 let mut reference = self.find_reference(ref_name)?;
737 reference.delete()?;
738 Ok(true)
739 }
740 RemoveResult::Removed(new_root) => {
741 let msg = format!("metadata: remove paths from {}", target);
742 commit_index(self, ref_name, new_root, &msg)?;
743 Ok(true)
744 }
745 }
746 }
747 Some(new_oid) if new_oid == meta_oid => Ok(false),
748 Some(new_oid) => {
749 let new_root = build_fanout(self, Some(&root), &segments, &leaf, &new_oid)?;
750 let msg = format!("metadata: remove paths from {}", target);
751 commit_index(self, ref_name, new_root, &msg)?;
752 Ok(true)
753 }
754 }
755 }
756
757 fn metadata_remove(&self, ref_name: &str, target: &Oid) -> Result<bool, Error> {
758 let root = match resolve_root_tree(self, ref_name)? {
759 Some(t) => t,
760 None => return Ok(false),
761 };
762
763 let (segments, leaf) = match detect_fanout(self, &root, target)? {
764 Some((segments, leaf, _)) => (segments, leaf),
765 None => return Ok(false),
766 };
767
768 match build_fanout_remove(self, &root, &segments, &leaf)? {
769 RemoveResult::NotFound => Ok(false),
770 RemoveResult::Empty => {
771 let mut reference = self.find_reference(ref_name)?;
772 reference.delete()?;
773 Ok(true)
774 }
775 RemoveResult::Removed(new_root) => {
776 let msg = format!("metadata: remove {}", target);
777 commit_index(self, ref_name, new_root, &msg)?;
778 Ok(true)
779 }
780 }
781 }
782
783 fn metadata_copy(
784 &self,
785 ref_name: &str,
786 from: &Oid,
787 to: &Oid,
788 opts: &MetadataOptions,
789 ) -> Result<Oid, Error> {
790 let root = match resolve_root_tree(self, ref_name)? {
791 Some(t) => t,
792 None => {
793 return Err(Error::from_str(&format!(
794 "no metadata entry for source {}",
795 from
796 )));
797 }
798 };
799
800 let source_tree_oid = match detect_fanout(self, &root, from)? {
801 Some((_, _, oid)) => oid,
802 None => {
803 return Err(Error::from_str(&format!(
804 "no metadata entry for source {}",
805 from
806 )));
807 }
808 };
809
810 if !opts.force {
811 if detect_fanout(self, &root, to)?.is_some() {
812 return Err(Error::from_str(
813 "metadata entry already exists for target (use --force to overwrite)",
814 ));
815 }
816 }
817
818 let (segments, leaf) = shard_oid(to, opts.shard_level);
819 let new_root = build_fanout(self, Some(&root), &segments, &leaf, &source_tree_oid)?;
820
821 let msg = format!("metadata: copy {} -> {}", from, to);
822 commit_index(self, ref_name, new_root, &msg)?;
823
824 Ok(source_tree_oid)
825 }
826
827 fn metadata_prune(&self, ref_name: &str, dry_run: bool) -> Result<Vec<Oid>, Error> {
828 let entries = self.metadata_list(ref_name)?;
829 let mut pruned = Vec::new();
830 let odb = self.odb()?;
831
832 for (target, _) in &entries {
833 if !odb.exists(*target) {
834 pruned.push(*target);
835 }
836 }
837
838 if !dry_run {
839 for target in &pruned {
840 self.metadata_remove(ref_name, target)?;
841 }
842 }
843
844 Ok(pruned)
845 }
846
847 fn metadata_get_ref(&self, ref_name: &str) -> String {
848 ref_name.to_string()
849 }
850}
851
852#[cfg(test)]
853mod tests;