1use super::types::{ManifestEntry, ShardReference};
38use crate::{Result, VoidError};
41
42use serde::{Deserialize, Serialize};
43
44#[derive(Clone, Debug)]
46pub struct DirChild {
47 pub name: String,
49 pub is_dir: bool,
51 pub size: u64,
53 pub lines: u32,
55}
56
57#[derive(Serialize, Deserialize)]
59struct Wire {
60 offsets: Vec<u32>,
61 #[serde(with = "cbor_bytes")]
64 entries: Vec<u8>,
65 shards: Vec<ShardReference>,
66 paths_hash: [u8; 32],
67 total_files: u64,
68 total_bytes: u64,
69}
70
71mod cbor_bytes {
74 use serde::{Deserializer, Serializer};
75
76 pub fn serialize<S: Serializer>(bytes: &Vec<u8>, s: S) -> Result<S::Ok, S::Error> {
77 s.serialize_bytes(bytes)
78 }
79
80 pub fn deserialize<'de, D: Deserializer<'de>>(d: D) -> Result<Vec<u8>, D::Error> {
81 struct ByteVisitor;
82 impl<'de> serde::de::Visitor<'de> for ByteVisitor {
83 type Value = Vec<u8>;
84 fn expecting(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
85 f.write_str("byte string")
86 }
87 fn visit_bytes<E: serde::de::Error>(self, v: &[u8]) -> Result<Vec<u8>, E> {
88 Ok(v.to_vec())
89 }
90 fn visit_byte_buf<E: serde::de::Error>(self, v: Vec<u8>) -> Result<Vec<u8>, E> {
91 Ok(v)
92 }
93 }
94 d.deserialize_byte_buf(ByteVisitor)
95 }
96}
97
98pub struct TreeManifest {
105 offsets: Vec<u32>,
107 entries: Vec<u8>,
109 shards: Vec<ShardReference>,
111 paths_hash: [u8; 32],
113 total_files: u64,
115 total_bytes: u64,
117 serialized: Vec<u8>,
119}
120
121impl TreeManifest {
122 pub fn builder() -> TreeManifestBuilder {
124 TreeManifestBuilder::default()
125 }
126
127 pub(crate) fn open(data: Vec<u8>) -> Result<Self> {
133 let wire: Wire = ciborium::from_reader(&data[..])
134 .map_err(|e| VoidError::Serialization(format!("tree manifest: {e}")))?;
135
136 Ok(Self {
137 offsets: wire.offsets,
138 entries: wire.entries,
139 shards: wire.shards,
140 paths_hash: wire.paths_hash,
141 total_files: wire.total_files,
142 total_bytes: wire.total_bytes,
143 serialized: data,
144 })
145 }
146
147 pub fn from_commit(
154 store: &impl crate::store::ObjectStoreExt,
155 commit: &crate::metadata::Commit,
156 reader: &crate::crypto::CommitReader,
157 ) -> Result<Option<Self>> {
158 let manifest_cid = match &commit.manifest_cid {
159 Some(cid) => cid,
160 None => return Ok(None),
161 };
162 let mcid = crate::cid::VoidCid::from_bytes(manifest_cid.as_bytes())?;
163 let encrypted: void_crypto::EncryptedManifest = store.get_blob(&mcid)?;
164 let decrypted = encrypted.decrypt(reader.content_key().as_bytes())?;
165 Self::open(decrypted).map(Some)
166 }
167
168 pub fn as_bytes(&self) -> &[u8] {
170 &self.serialized
171 }
172
173 pub fn lookup(&self, path: &str) -> Result<ManifestEntry> {
178 let n = self.offsets.len();
179 if n == 0 {
180 return Err(VoidError::NotFound(path.to_string()));
181 }
182
183 let mut lo = 0usize;
184 let mut hi = n;
185
186 while lo < hi {
187 let mid = lo + (hi - lo) / 2;
188 let entry = self.parse_entry_at(mid)?;
189
190 match entry.path.as_str().cmp(path) {
191 std::cmp::Ordering::Equal => return Ok(entry),
192 std::cmp::Ordering::Less => lo = mid + 1,
193 std::cmp::Ordering::Greater => hi = mid,
194 }
195 }
196
197 Err(VoidError::NotFound(path.to_string()))
198 }
199
200 pub fn iter(&self) -> ManifestIter<'_> {
202 ManifestIter {
203 manifest: self,
204 index: 0,
205 }
206 }
207
208 pub fn shards(&self) -> &[ShardReference] {
210 &self.shards
211 }
212
213 pub fn total_files(&self) -> u64 {
215 self.total_files
216 }
217
218 pub fn total_bytes(&self) -> u64 {
220 self.total_bytes
221 }
222
223 pub fn paths_hash(&self) -> &[u8; 32] {
225 &self.paths_hash
226 }
227
228 pub fn entry_count(&self) -> usize {
230 self.offsets.len()
231 }
232
233 pub fn list_dir(&self, path: &str) -> Result<Vec<DirChild>> {
241 let prefix = if path.is_empty() {
242 String::new()
243 } else {
244 format!("{}/", path.trim_end_matches('/'))
245 };
246 let mut children: std::collections::BTreeMap<String, DirChild> = std::collections::BTreeMap::new();
247
248 for entry_result in self.iter() {
249 let entry = entry_result?;
250
251 let relative = if prefix.is_empty() {
253 entry.path.as_str()
254 } else if let Some(rest) = entry.path.strip_prefix(&prefix) {
255 rest
256 } else {
257 continue;
258 };
259
260 if let Some(slash_pos) = relative.find('/') {
262 let dir_name = &relative[..slash_pos];
264 children
265 .entry(dir_name.to_string())
266 .or_insert_with(|| DirChild {
267 name: dir_name.to_string(),
268 is_dir: true,
269 size: 0,
270 lines: 0,
271 });
272 } else {
273 children.insert(
275 relative.to_string(),
276 DirChild {
277 name: relative.to_string(),
278 is_dir: false,
279 size: entry.size,
280 lines: entry.lines,
281 },
282 );
283 }
284 }
285
286 if children.is_empty() && !path.is_empty() {
287 let has_entries = self.iter().any(|r| {
289 r.ok()
290 .map(|e| e.path.starts_with(&prefix))
291 .unwrap_or(false)
292 });
293 if !has_entries {
294 return Err(VoidError::NotFound(format!("directory: {}", path)));
295 }
296 }
297
298 Ok(children.into_values().collect())
299 }
300
301 pub fn entries_by_shard(&self) -> Result<Vec<Vec<ManifestEntry>>> {
307 let mut groups: Vec<Vec<ManifestEntry>> = vec![Vec::new(); self.shards.len()];
308 for entry_result in self.iter() {
309 let entry = entry_result?;
310 let idx = entry.shard_index as usize;
311 if idx < groups.len() {
312 groups[idx].push(entry);
313 }
314 }
315 Ok(groups)
316 }
317
318 fn parse_entry_at(&self, index: usize) -> Result<ManifestEntry> {
320 let start = self.offsets[index] as usize;
321 let end = if index + 1 < self.offsets.len() {
322 self.offsets[index + 1] as usize
323 } else {
324 self.entries.len()
325 };
326
327 if start > self.entries.len() || end > self.entries.len() || start > end {
328 return Err(VoidError::Serialization(format!(
329 "manifest entry {index}: offset {start}..{end} out of range (entries len {})",
330 self.entries.len()
331 )));
332 }
333
334 ciborium::from_reader(&self.entries[start..end])
335 .map_err(|e| VoidError::Serialization(format!("manifest entry {index}: {e}")))
336 }
337}
338
339#[derive(Default)]
343pub struct TreeManifestBuilder {
344 files: Vec<ManifestEntry>,
345 shards: Vec<ShardReference>,
346 paths_hash: [u8; 32],
347 total_files: u64,
348 total_bytes: u64,
349}
350
351impl TreeManifestBuilder {
352 pub fn files(mut self, files: Vec<ManifestEntry>) -> Self {
354 self.files = files;
355 self
356 }
357
358 pub fn shards(mut self, shards: Vec<ShardReference>) -> Self {
360 self.shards = shards;
361 self
362 }
363
364 pub fn paths_hash(mut self, hash: [u8; 32]) -> Self {
366 self.paths_hash = hash;
367 self
368 }
369
370 pub fn total_files(mut self, n: u64) -> Self {
372 self.total_files = n;
373 self
374 }
375
376 pub fn total_bytes(mut self, n: u64) -> Self {
378 self.total_bytes = n;
379 self
380 }
381
382 pub fn build(mut self) -> Result<TreeManifest> {
384 self.files.sort_by(|a, b| a.path.cmp(&b.path));
386
387 let mut entries_blob = Vec::new();
389 let mut offsets = Vec::with_capacity(self.files.len());
390
391 for entry in &self.files {
392 offsets.push(entries_blob.len() as u32);
393 ciborium::into_writer(entry, &mut entries_blob)
394 .map_err(|e| VoidError::Serialization(format!("manifest entry: {e}")))?;
395 }
396
397 let wire = Wire {
398 offsets: offsets.clone(),
399 entries: entries_blob.clone(),
400 shards: self.shards.clone(),
401 paths_hash: self.paths_hash,
402 total_files: self.total_files,
403 total_bytes: self.total_bytes,
404 };
405
406 let mut serialized = Vec::new();
407 ciborium::into_writer(&wire, &mut serialized)
408 .map_err(|e| VoidError::Serialization(format!("tree manifest: {e}")))?;
409
410 Ok(TreeManifest {
411 offsets,
412 entries: entries_blob,
413 shards: self.shards,
414 paths_hash: self.paths_hash,
415 total_files: self.total_files,
416 total_bytes: self.total_bytes,
417 serialized,
418 })
419 }
420}
421
422pub struct ManifestIter<'a> {
424 manifest: &'a TreeManifest,
425 index: usize,
426}
427
428impl<'a> Iterator for ManifestIter<'a> {
429 type Item = Result<ManifestEntry>;
430
431 fn next(&mut self) -> Option<Self::Item> {
432 if self.index >= self.manifest.offsets.len() {
433 return None;
434 }
435 let result = self.manifest.parse_entry_at(self.index);
436 self.index += 1;
437 Some(result)
438 }
439
440 fn size_hint(&self) -> (usize, Option<usize>) {
441 let remaining = self.manifest.offsets.len() - self.index;
442 (remaining, Some(remaining))
443 }
444}
445
446impl<'a> ExactSizeIterator for ManifestIter<'a> {}
447
448#[cfg(test)]
449mod tests {
450 use super::*;
451 use crate::ContentHash;
452 use void_crypto::ShardCid;
453
454 fn make_entry(path: &str, shard_index: u32, offset: u64, size: u64) -> ManifestEntry {
455 ManifestEntry {
456 path: path.to_string(),
457 content_hash: ContentHash::ZERO,
458 size,
459 mode: 0o644,
460 shard_index,
461 offset,
462 length: size,
463 lines: 0,
464 shard_count: 1,
465 }
466 }
467
468 fn make_shard_ref(id: u8) -> ShardReference {
469 ShardReference {
470 cid: ShardCid::from_bytes(vec![id]),
471 size_compressed: 100,
472 size_decompressed: 200,
473 wrapped_key: None,
474 }
475 }
476
477 fn build_manifest(
479 files: Vec<ManifestEntry>,
480 shards: Vec<ShardReference>,
481 ) -> TreeManifest {
482 let total_files = files.len() as u64;
483 let total_bytes = files.iter().map(|f| f.size).sum();
484 TreeManifest::builder()
485 .files(files)
486 .shards(shards)
487 .paths_hash([0u8; 32])
488 .total_files(total_files)
489 .total_bytes(total_bytes)
490 .build()
491 .unwrap()
492 }
493
494 #[test]
497 fn round_trip_single_file() {
498 let manifest = build_manifest(
499 vec![make_entry("hello.txt", 0, 0, 42)],
500 vec![make_shard_ref(1)],
501 );
502
503 let reopened = TreeManifest::open(manifest.as_bytes().to_vec()).unwrap();
504
505 assert_eq!(reopened.entry_count(), 1);
506 assert_eq!(reopened.total_files(), 1);
507 assert_eq!(reopened.total_bytes(), 42);
508 assert_eq!(reopened.shards().len(), 1);
509
510 let entry = reopened.lookup("hello.txt").unwrap();
511 assert_eq!(entry.path, "hello.txt");
512 assert_eq!(entry.size, 42);
513 assert_eq!(entry.shard_index, 0);
514 }
515
516 #[test]
517 fn round_trip_multiple_files() {
518 let files = vec![
519 make_entry("src/main.rs", 0, 0, 500),
520 make_entry("Cargo.toml", 0, 500, 200),
521 make_entry("README.md", 1, 0, 100),
522 make_entry("src/lib.rs", 0, 700, 300),
523 make_entry("tests/test.rs", 1, 100, 150),
524 ];
525 let manifest = build_manifest(files.clone(), vec![make_shard_ref(1), make_shard_ref(2)]);
526
527 let reopened = TreeManifest::open(manifest.as_bytes().to_vec()).unwrap();
528 assert_eq!(reopened.entry_count(), 5);
529
530 for file in &files {
531 let entry = reopened.lookup(&file.path).unwrap();
532 assert_eq!(entry.path, file.path);
533 assert_eq!(entry.size, file.size);
534 assert_eq!(entry.shard_index, file.shard_index);
535 assert_eq!(entry.offset, file.offset);
536 }
537 }
538
539 #[test]
540 fn round_trip_preserves_all_fields() {
541 let original = ManifestEntry {
542 path: "deep/nested/path/file.rs".to_string(),
543 content_hash: ContentHash([0xAB; 32]),
544 size: 9999,
545 mode: 0o755,
546 shard_index: 3,
547 offset: 1234,
548 length: 9999,
549 lines: 42,
550 shard_count: 1,
551 };
552
553 let manifest = build_manifest(vec![original.clone()], vec![make_shard_ref(1)]);
554 let reopened = TreeManifest::open(manifest.as_bytes().to_vec()).unwrap();
555
556 let got = reopened.lookup("deep/nested/path/file.rs").unwrap();
557 assert_eq!(got.path, original.path);
558 assert_eq!(got.content_hash, original.content_hash);
559 assert_eq!(got.size, original.size);
560 assert_eq!(got.mode, original.mode);
561 assert_eq!(got.shard_index, original.shard_index);
562 assert_eq!(got.offset, original.offset);
563 assert_eq!(got.length, original.length);
564 assert_eq!(got.lines, original.lines);
565 }
566
567 #[test]
570 fn builder_sorts_unsorted_input() {
571 let manifest = build_manifest(
572 vec![
573 make_entry("c.txt", 0, 0, 10),
574 make_entry("a.txt", 0, 10, 20),
575 make_entry("b.txt", 0, 30, 30),
576 ],
577 vec![make_shard_ref(1)],
578 );
579
580 assert_eq!(manifest.lookup("a.txt").unwrap().size, 20);
581 assert_eq!(manifest.lookup("b.txt").unwrap().size, 30);
582 assert_eq!(manifest.lookup("c.txt").unwrap().size, 10);
583 }
584
585 #[test]
588 fn iter_returns_sorted_order() {
589 let manifest = build_manifest(
590 vec![
591 make_entry("z.txt", 0, 0, 10),
592 make_entry("a.txt", 0, 10, 20),
593 make_entry("m.txt", 0, 30, 30),
594 ],
595 vec![make_shard_ref(1)],
596 );
597
598 let paths: Vec<String> = manifest.iter().map(|r| r.unwrap().path).collect();
599 assert_eq!(paths, vec!["a.txt", "m.txt", "z.txt"]);
600 }
601
602 #[test]
603 fn iter_exact_size() {
604 let manifest = build_manifest(
605 vec![
606 make_entry("a.txt", 0, 0, 10),
607 make_entry("b.txt", 0, 10, 20),
608 make_entry("c.txt", 0, 30, 30),
609 ],
610 vec![make_shard_ref(1)],
611 );
612
613 assert_eq!(manifest.iter().len(), 3);
614 }
615
616 #[test]
619 fn lookup_nonexistent_returns_not_found() {
620 let manifest = build_manifest(
621 vec![make_entry("a.txt", 0, 0, 10)],
622 vec![make_shard_ref(1)],
623 );
624
625 let err = manifest.lookup("nonexistent.txt").unwrap_err();
626 assert!(matches!(err, VoidError::NotFound(_)));
627 }
628
629 #[test]
632 fn empty_manifest() {
633 let manifest = build_manifest(vec![], vec![]);
634
635 assert_eq!(manifest.entry_count(), 0);
636 assert_eq!(manifest.total_files(), 0);
637 assert_eq!(manifest.total_bytes(), 0);
638 assert_eq!(manifest.shards().len(), 0);
639 assert_eq!(manifest.iter().count(), 0);
640
641 let err = manifest.lookup("anything").unwrap_err();
642 assert!(matches!(err, VoidError::NotFound(_)));
643 }
644
645 #[test]
648 fn lookup_first_entry() {
649 let manifest = build_manifest(
650 vec![
651 make_entry("aaa", 0, 0, 10),
652 make_entry("bbb", 0, 10, 20),
653 make_entry("ccc", 0, 30, 30),
654 ],
655 vec![make_shard_ref(1)],
656 );
657 assert_eq!(manifest.lookup("aaa").unwrap().size, 10);
658 }
659
660 #[test]
661 fn lookup_last_entry() {
662 let manifest = build_manifest(
663 vec![
664 make_entry("aaa", 0, 0, 10),
665 make_entry("bbb", 0, 10, 20),
666 make_entry("ccc", 0, 30, 30),
667 ],
668 vec![make_shard_ref(1)],
669 );
670 assert_eq!(manifest.lookup("ccc").unwrap().size, 30);
671 }
672
673 #[test]
674 fn lookup_middle_entry() {
675 let manifest = build_manifest(
676 vec![
677 make_entry("aaa", 0, 0, 10),
678 make_entry("bbb", 0, 10, 20),
679 make_entry("ccc", 0, 30, 30),
680 ],
681 vec![make_shard_ref(1)],
682 );
683 assert_eq!(manifest.lookup("bbb").unwrap().size, 20);
684 }
685
686 #[test]
689 fn lookup_100_files() {
690 let files: Vec<ManifestEntry> = (0..100)
691 .map(|i| make_entry(&format!("file_{:04}.txt", i), i % 4, (i as u64) * 100, 100))
692 .collect();
693
694 let manifest = build_manifest(
695 files.clone(),
696 vec![
697 make_shard_ref(1),
698 make_shard_ref(2),
699 make_shard_ref(3),
700 make_shard_ref(4),
701 ],
702 );
703
704 assert_eq!(manifest.entry_count(), 100);
705
706 for file in &files {
707 let entry = manifest.lookup(&file.path).unwrap();
708 assert_eq!(entry.path, file.path);
709 assert_eq!(entry.shard_index, file.shard_index);
710 }
711
712 assert!(manifest.lookup("file_9999.txt").is_err());
713 }
714
715 #[test]
718 fn lookup_deep_nested_paths() {
719 let manifest = build_manifest(
720 vec![
721 make_entry("a/b/c/d/e/f/g.txt", 0, 0, 10),
722 make_entry("a/b/c/d/e/f/h.txt", 0, 10, 20),
723 make_entry("x/y/z.txt", 0, 30, 30),
724 ],
725 vec![make_shard_ref(1)],
726 );
727
728 assert_eq!(manifest.lookup("a/b/c/d/e/f/g.txt").unwrap().size, 10);
729 assert_eq!(manifest.lookup("a/b/c/d/e/f/h.txt").unwrap().size, 20);
730 assert_eq!(manifest.lookup("x/y/z.txt").unwrap().size, 30);
731 }
732
733 #[test]
736 fn preserves_shards_and_metadata() {
737 let shards = vec![
738 ShardReference {
739 cid: ShardCid::from_bytes(vec![1, 2, 3]),
740 size_compressed: 500,
741 size_decompressed: 1000,
742 wrapped_key: None,
743 },
744 ShardReference {
745 cid: ShardCid::from_bytes(vec![4, 5, 6]),
746 size_compressed: 300,
747 size_decompressed: 600,
748 wrapped_key: Some(void_crypto::WrappedKey::from_bytes(vec![7, 8, 9])),
749 },
750 ];
751
752 let manifest = TreeManifest::builder()
753 .files(vec![make_entry("test.txt", 0, 0, 100)])
754 .shards(shards)
755 .paths_hash([0xAB; 32])
756 .total_files(1)
757 .total_bytes(100)
758 .build()
759 .unwrap();
760
761 assert_eq!(manifest.paths_hash(), &[0xAB; 32]);
762 assert_eq!(manifest.total_files(), 1);
763 assert_eq!(manifest.total_bytes(), 100);
764 assert_eq!(manifest.shards().len(), 2);
765 assert_eq!(manifest.shards()[0].size_compressed, 500);
766 assert_eq!(manifest.shards()[1].size_compressed, 300);
767 assert!(manifest.shards()[1].wrapped_key.is_some());
768 }
769
770 #[test]
773 fn entries_by_shard_groups_correctly() {
774 let manifest = build_manifest(
775 vec![
776 make_entry("a.txt", 0, 0, 10),
777 make_entry("b.txt", 1, 0, 20),
778 make_entry("c.txt", 0, 10, 30),
779 make_entry("d.txt", 2, 0, 40),
780 make_entry("e.txt", 1, 20, 50),
781 ],
782 vec![make_shard_ref(1), make_shard_ref(2), make_shard_ref(3)],
783 );
784
785 let groups = manifest.entries_by_shard().unwrap();
786 assert_eq!(groups.len(), 3);
787
788 assert_eq!(groups[0].len(), 2);
790 assert_eq!(groups[0][0].path, "a.txt");
791 assert_eq!(groups[0][1].path, "c.txt");
792
793 assert_eq!(groups[1].len(), 2);
795 assert_eq!(groups[1][0].path, "b.txt");
796 assert_eq!(groups[1][1].path, "e.txt");
797
798 assert_eq!(groups[2].len(), 1);
800 assert_eq!(groups[2][0].path, "d.txt");
801 }
802
803 #[test]
804 fn entries_by_shard_empty_manifest() {
805 let manifest = build_manifest(vec![], vec![]);
806 let groups = manifest.entries_by_shard().unwrap();
807 assert_eq!(groups.len(), 0);
808 }
809
810 #[test]
813 fn open_corrupt_data_returns_error() {
814 let result = TreeManifest::open(vec![0xFF, 0xFF, 0xFF]);
815 assert!(result.is_err());
816 }
817
818 #[test]
819 fn open_empty_data_returns_error() {
820 let result = TreeManifest::open(vec![]);
821 assert!(result.is_err());
822 }
823
824 #[test]
827 fn as_bytes_produces_valid_manifest() {
828 let manifest = build_manifest(
829 vec![
830 make_entry("foo.txt", 0, 0, 50),
831 make_entry("bar.txt", 1, 0, 75),
832 ],
833 vec![make_shard_ref(1), make_shard_ref(2)],
834 );
835
836 let bytes = manifest.as_bytes().to_vec();
838 let reopened = TreeManifest::open(bytes).unwrap();
839
840 assert_eq!(reopened.entry_count(), 2);
841 assert_eq!(reopened.lookup("foo.txt").unwrap().size, 50);
842 assert_eq!(reopened.lookup("bar.txt").unwrap().size, 75);
843 assert_eq!(reopened.shards().len(), 2);
844 }
845}