Skip to main content

void_core/metadata/
manifest_tree.rs

1//! Indexed tree manifest with O(log N) single-file lookup.
2//!
3//! `TreeManifest` is the single type for both building and reading manifests.
4//! It stores file entries in an indexed CBOR format that supports binary search
5//! without deserializing the entire file list.
6//!
7//! # Format
8//!
9//! ```text
10//! CBOR map {
11//!   "offsets":     [u32, ...]            // byte offsets into entries blob, sorted by path
12//!   "entries":     <byte string>         // concatenated per-entry CBOR, sorted by path
13//!   "shards":      [ShardReference, ...] // shard references (parsed eagerly)
14//!   "paths_hash":  <32 bytes>            // SHA256(sorted paths joined by \0)
15//!   "total_files": u64
16//!   "total_bytes": u64
17//! }
18//! ```
19//!
20//! # Usage
21//!
22//! ```ignore
23//! // Build (seal pipeline):
24//! let manifest = TreeManifest::builder()
25//!     .files(files)
26//!     .shards(shards)
27//!     .paths_hash(hash)
28//!     .total_files(n)
29//!     .total_bytes(b)
30//!     .build()?;
31//!
32//! // Read (from decrypted bytes):
33//! let manifest = TreeManifest::open(bytes)?;
34//! let entry = manifest.lookup("src/main.rs")?;
35//! ```
36
37use super::types::{ManifestEntry, ShardReference};
38// Note: types.rs still has the old TreeManifest struct. Once all consumers
39// migrate to this module's TreeManifest, the old struct will be removed.
40use crate::{Result, VoidError};
41
42use serde::{Deserialize, Serialize};
43
44/// A child entry in a directory listing derived from the manifest.
45#[derive(Clone, Debug)]
46pub struct DirChild {
47    /// Entry name (just the filename/dirname, not full path).
48    pub name: String,
49    /// Whether this is a directory.
50    pub is_dir: bool,
51    /// File size in bytes (0 for directories).
52    pub size: u64,
53    /// Number of lines (0 for directories or unknown).
54    pub lines: u32,
55}
56
57/// CBOR wire format for the indexed manifest.
58#[derive(Serialize, Deserialize)]
59struct Wire {
60    offsets: Vec<u32>,
61    /// Concatenated per-entry CBOR. Serialized as CBOR byte string for efficiency
62    /// (avoids encoding each byte as a separate CBOR integer).
63    #[serde(with = "cbor_bytes")]
64    entries: Vec<u8>,
65    shards: Vec<ShardReference>,
66    paths_hash: [u8; 32],
67    total_files: u64,
68    total_bytes: u64,
69}
70
71/// Serde helper to serialize `Vec<u8>` as a CBOR byte string instead of an
72/// array of integers. Equivalent to `serde_bytes` but without the dependency.
73mod cbor_bytes {
74    use serde::{Deserializer, Serializer};
75
76    pub fn serialize<S: Serializer>(bytes: &Vec<u8>, s: S) -> Result<S::Ok, S::Error> {
77        s.serialize_bytes(bytes)
78    }
79
80    pub fn deserialize<'de, D: Deserializer<'de>>(d: D) -> Result<Vec<u8>, D::Error> {
81        struct ByteVisitor;
82        impl<'de> serde::de::Visitor<'de> for ByteVisitor {
83            type Value = Vec<u8>;
84            fn expecting(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
85                f.write_str("byte string")
86            }
87            fn visit_bytes<E: serde::de::Error>(self, v: &[u8]) -> Result<Vec<u8>, E> {
88                Ok(v.to_vec())
89            }
90            fn visit_byte_buf<E: serde::de::Error>(self, v: Vec<u8>) -> Result<Vec<u8>, E> {
91                Ok(v)
92            }
93        }
94        d.deserialize_byte_buf(ByteVisitor)
95    }
96}
97
98/// Indexed tree manifest.
99///
100/// Holds file entries as raw CBOR bytes with a sorted offset table for
101/// O(log N) single-file lookup. Metadata (shards, totals) is parsed eagerly.
102///
103/// Constructed via [`TreeManifestBuilder`] or [`TreeManifest::open`].
104pub struct TreeManifest {
105    /// Byte offsets into `entries` for each file, sorted by path.
106    offsets: Vec<u32>,
107    /// Concatenated CBOR-serialized `ManifestEntry` values, sorted by path.
108    entries: Vec<u8>,
109    /// Shard references (parsed eagerly).
110    shards: Vec<ShardReference>,
111    /// SHA256 of sorted paths joined by \0.
112    paths_hash: [u8; 32],
113    /// Total file count.
114    total_files: u64,
115    /// Total bytes across all files.
116    total_bytes: u64,
117    /// Serialized CBOR bytes (cached from build or open).
118    serialized: Vec<u8>,
119}
120
121impl TreeManifest {
122    /// Create a builder for constructing a new manifest.
123    pub fn builder() -> TreeManifestBuilder {
124        TreeManifestBuilder::default()
125    }
126
127    /// Open a manifest from decrypted CBOR bytes.
128    ///
129    /// Parses the outer structure (offsets, shards, totals) but does NOT
130    /// parse individual file entries — those stay as raw bytes until
131    /// accessed via `lookup()` or `iter()`.
132    pub(crate) fn open(data: Vec<u8>) -> Result<Self> {
133        let wire: Wire = ciborium::from_reader(&data[..])
134            .map_err(|e| VoidError::Serialization(format!("tree manifest: {e}")))?;
135
136        Ok(Self {
137            offsets: wire.offsets,
138            entries: wire.entries,
139            shards: wire.shards,
140            paths_hash: wire.paths_hash,
141            total_files: wire.total_files,
142            total_bytes: wire.total_bytes,
143            serialized: data,
144        })
145    }
146
147    /// Load and decrypt the TreeManifest from a commit.
148    ///
149    /// Extracts the manifest CID from the commit, fetches the encrypted blob
150    /// from the store, decrypts it with the reader's content key, and parses it.
151    ///
152    /// Returns `None` if the commit has no `manifest_cid`.
153    pub fn from_commit(
154        store: &impl crate::store::ObjectStoreExt,
155        commit: &crate::metadata::Commit,
156        reader: &crate::crypto::CommitReader,
157    ) -> Result<Option<Self>> {
158        let manifest_cid = match &commit.manifest_cid {
159            Some(cid) => cid,
160            None => return Ok(None),
161        };
162        let mcid = crate::cid::VoidCid::from_bytes(manifest_cid.as_bytes())?;
163        let encrypted: void_crypto::EncryptedManifest = store.get_blob(&mcid)?;
164        let decrypted = encrypted.decrypt(reader.content_key().as_bytes())?;
165        Self::open(decrypted).map(Some)
166    }
167
168    /// Returns the serialized CBOR bytes for encryption and storage.
169    pub fn as_bytes(&self) -> &[u8] {
170        &self.serialized
171    }
172
173    /// Look up a single file by path. O(log N).
174    ///
175    /// Binary searches the offset table, parsing one entry per probe to
176    /// compare paths.
177    pub fn lookup(&self, path: &str) -> Result<ManifestEntry> {
178        let n = self.offsets.len();
179        if n == 0 {
180            return Err(VoidError::NotFound(path.to_string()));
181        }
182
183        let mut lo = 0usize;
184        let mut hi = n;
185
186        while lo < hi {
187            let mid = lo + (hi - lo) / 2;
188            let entry = self.parse_entry_at(mid)?;
189
190            match entry.path.as_str().cmp(path) {
191                std::cmp::Ordering::Equal => return Ok(entry),
192                std::cmp::Ordering::Less => lo = mid + 1,
193                std::cmp::Ordering::Greater => hi = mid,
194            }
195        }
196
197        Err(VoidError::NotFound(path.to_string()))
198    }
199
200    /// Iterate all file entries in path-sorted order. O(N).
201    pub fn iter(&self) -> ManifestIter<'_> {
202        ManifestIter {
203            manifest: self,
204            index: 0,
205        }
206    }
207
208    /// Returns the shard references.
209    pub fn shards(&self) -> &[ShardReference] {
210        &self.shards
211    }
212
213    /// Total number of files.
214    pub fn total_files(&self) -> u64 {
215        self.total_files
216    }
217
218    /// Total bytes across all files.
219    pub fn total_bytes(&self) -> u64 {
220        self.total_bytes
221    }
222
223    /// SHA256 hash of sorted paths.
224    pub fn paths_hash(&self) -> &[u8; 32] {
225        &self.paths_hash
226    }
227
228    /// Number of file entries.
229    pub fn entry_count(&self) -> usize {
230        self.offsets.len()
231    }
232
233    /// List the immediate children of a directory.
234    ///
235    /// Returns files and subdirectories directly under `path`.
236    /// Use `""` for the root directory.
237    ///
238    /// Entries are sorted by path, so we can efficiently find the range of
239    /// entries under a prefix and derive the directory listing.
240    pub fn list_dir(&self, path: &str) -> Result<Vec<DirChild>> {
241        let prefix = if path.is_empty() {
242            String::new()
243        } else {
244            format!("{}/", path.trim_end_matches('/'))
245        };
246        let mut children: std::collections::BTreeMap<String, DirChild> = std::collections::BTreeMap::new();
247
248        for entry_result in self.iter() {
249            let entry = entry_result?;
250
251            // Check if entry is under this directory
252            let relative = if prefix.is_empty() {
253                entry.path.as_str()
254            } else if let Some(rest) = entry.path.strip_prefix(&prefix) {
255                rest
256            } else {
257                continue;
258            };
259
260            // Get the immediate child name
261            if let Some(slash_pos) = relative.find('/') {
262                // Subdirectory — register as a dir child
263                let dir_name = &relative[..slash_pos];
264                children
265                    .entry(dir_name.to_string())
266                    .or_insert_with(|| DirChild {
267                        name: dir_name.to_string(),
268                        is_dir: true,
269                        size: 0,
270                        lines: 0,
271                    });
272            } else {
273                // Direct file child
274                children.insert(
275                    relative.to_string(),
276                    DirChild {
277                        name: relative.to_string(),
278                        is_dir: false,
279                        size: entry.size,
280                        lines: entry.lines,
281                    },
282                );
283            }
284        }
285
286        if children.is_empty() && !path.is_empty() {
287            // Check if this directory even exists by looking for any entry with the prefix
288            let has_entries = self.iter().any(|r| {
289                r.ok()
290                    .map(|e| e.path.starts_with(&prefix))
291                    .unwrap_or(false)
292            });
293            if !has_entries {
294                return Err(VoidError::NotFound(format!("directory: {}", path)));
295            }
296        }
297
298        Ok(children.into_values().collect())
299    }
300
301    /// Group file entries by shard index.
302    ///
303    /// Returns a vec indexed by shard position, where each element contains the
304    /// `ManifestEntry` items stored in that shard. Callers can iterate shard-by-shard
305    /// to decrypt each shard once and extract all files from it.
306    pub fn entries_by_shard(&self) -> Result<Vec<Vec<ManifestEntry>>> {
307        let mut groups: Vec<Vec<ManifestEntry>> = vec![Vec::new(); self.shards.len()];
308        for entry_result in self.iter() {
309            let entry = entry_result?;
310            let idx = entry.shard_index as usize;
311            if idx < groups.len() {
312                groups[idx].push(entry);
313            }
314        }
315        Ok(groups)
316    }
317
318    /// Parse the entry at the given index in the offset table.
319    fn parse_entry_at(&self, index: usize) -> Result<ManifestEntry> {
320        let start = self.offsets[index] as usize;
321        let end = if index + 1 < self.offsets.len() {
322            self.offsets[index + 1] as usize
323        } else {
324            self.entries.len()
325        };
326
327        if start > self.entries.len() || end > self.entries.len() || start > end {
328            return Err(VoidError::Serialization(format!(
329                "manifest entry {index}: offset {start}..{end} out of range (entries len {})",
330                self.entries.len()
331            )));
332        }
333
334        ciborium::from_reader(&self.entries[start..end])
335            .map_err(|e| VoidError::Serialization(format!("manifest entry {index}: {e}")))
336    }
337}
338
339/// Builder for constructing a [`TreeManifest`].
340///
341/// Sorts entries by path and serializes into the indexed CBOR format.
342#[derive(Default)]
343pub struct TreeManifestBuilder {
344    files: Vec<ManifestEntry>,
345    shards: Vec<ShardReference>,
346    paths_hash: [u8; 32],
347    total_files: u64,
348    total_bytes: u64,
349}
350
351impl TreeManifestBuilder {
352    /// Set the file entries. Will be sorted by path during `build()`.
353    pub fn files(mut self, files: Vec<ManifestEntry>) -> Self {
354        self.files = files;
355        self
356    }
357
358    /// Set the shard references.
359    pub fn shards(mut self, shards: Vec<ShardReference>) -> Self {
360        self.shards = shards;
361        self
362    }
363
364    /// Set the paths hash (SHA256 of sorted paths joined by \0).
365    pub fn paths_hash(mut self, hash: [u8; 32]) -> Self {
366        self.paths_hash = hash;
367        self
368    }
369
370    /// Set the total file count.
371    pub fn total_files(mut self, n: u64) -> Self {
372        self.total_files = n;
373        self
374    }
375
376    /// Set the total bytes across all files.
377    pub fn total_bytes(mut self, n: u64) -> Self {
378        self.total_bytes = n;
379        self
380    }
381
382    /// Build the manifest, sorting entries and serializing to indexed CBOR.
383    pub fn build(mut self) -> Result<TreeManifest> {
384        // Sort entries by path for binary search
385        self.files.sort_by(|a, b| a.path.cmp(&b.path));
386
387        // Serialize each entry individually into a contiguous byte blob
388        let mut entries_blob = Vec::new();
389        let mut offsets = Vec::with_capacity(self.files.len());
390
391        for entry in &self.files {
392            offsets.push(entries_blob.len() as u32);
393            ciborium::into_writer(entry, &mut entries_blob)
394                .map_err(|e| VoidError::Serialization(format!("manifest entry: {e}")))?;
395        }
396
397        let wire = Wire {
398            offsets: offsets.clone(),
399            entries: entries_blob.clone(),
400            shards: self.shards.clone(),
401            paths_hash: self.paths_hash,
402            total_files: self.total_files,
403            total_bytes: self.total_bytes,
404        };
405
406        let mut serialized = Vec::new();
407        ciborium::into_writer(&wire, &mut serialized)
408            .map_err(|e| VoidError::Serialization(format!("tree manifest: {e}")))?;
409
410        Ok(TreeManifest {
411            offsets,
412            entries: entries_blob,
413            shards: self.shards,
414            paths_hash: self.paths_hash,
415            total_files: self.total_files,
416            total_bytes: self.total_bytes,
417            serialized,
418        })
419    }
420}
421
422/// Iterator over manifest entries in path-sorted order.
423pub struct ManifestIter<'a> {
424    manifest: &'a TreeManifest,
425    index: usize,
426}
427
428impl<'a> Iterator for ManifestIter<'a> {
429    type Item = Result<ManifestEntry>;
430
431    fn next(&mut self) -> Option<Self::Item> {
432        if self.index >= self.manifest.offsets.len() {
433            return None;
434        }
435        let result = self.manifest.parse_entry_at(self.index);
436        self.index += 1;
437        Some(result)
438    }
439
440    fn size_hint(&self) -> (usize, Option<usize>) {
441        let remaining = self.manifest.offsets.len() - self.index;
442        (remaining, Some(remaining))
443    }
444}
445
446impl<'a> ExactSizeIterator for ManifestIter<'a> {}
447
448#[cfg(test)]
449mod tests {
450    use super::*;
451    use crate::ContentHash;
452    use void_crypto::ShardCid;
453
454    fn make_entry(path: &str, shard_index: u32, offset: u64, size: u64) -> ManifestEntry {
455        ManifestEntry {
456            path: path.to_string(),
457            content_hash: ContentHash::ZERO,
458            size,
459            mode: 0o644,
460            shard_index,
461            offset,
462            length: size,
463            lines: 0,
464            shard_count: 1,
465        }
466    }
467
468    fn make_shard_ref(id: u8) -> ShardReference {
469        ShardReference {
470            cid: ShardCid::from_bytes(vec![id]),
471            size_compressed: 100,
472            size_decompressed: 200,
473            wrapped_key: None,
474        }
475    }
476
477    /// Helper: build a manifest via the builder.
478    fn build_manifest(
479        files: Vec<ManifestEntry>,
480        shards: Vec<ShardReference>,
481    ) -> TreeManifest {
482        let total_files = files.len() as u64;
483        let total_bytes = files.iter().map(|f| f.size).sum();
484        TreeManifest::builder()
485            .files(files)
486            .shards(shards)
487            .paths_hash([0u8; 32])
488            .total_files(total_files)
489            .total_bytes(total_bytes)
490            .build()
491            .unwrap()
492    }
493
494    // -- Round-trip: build → as_bytes → open → lookup --
495
496    #[test]
497    fn round_trip_single_file() {
498        let manifest = build_manifest(
499            vec![make_entry("hello.txt", 0, 0, 42)],
500            vec![make_shard_ref(1)],
501        );
502
503        let reopened = TreeManifest::open(manifest.as_bytes().to_vec()).unwrap();
504
505        assert_eq!(reopened.entry_count(), 1);
506        assert_eq!(reopened.total_files(), 1);
507        assert_eq!(reopened.total_bytes(), 42);
508        assert_eq!(reopened.shards().len(), 1);
509
510        let entry = reopened.lookup("hello.txt").unwrap();
511        assert_eq!(entry.path, "hello.txt");
512        assert_eq!(entry.size, 42);
513        assert_eq!(entry.shard_index, 0);
514    }
515
516    #[test]
517    fn round_trip_multiple_files() {
518        let files = vec![
519            make_entry("src/main.rs", 0, 0, 500),
520            make_entry("Cargo.toml", 0, 500, 200),
521            make_entry("README.md", 1, 0, 100),
522            make_entry("src/lib.rs", 0, 700, 300),
523            make_entry("tests/test.rs", 1, 100, 150),
524        ];
525        let manifest = build_manifest(files.clone(), vec![make_shard_ref(1), make_shard_ref(2)]);
526
527        let reopened = TreeManifest::open(manifest.as_bytes().to_vec()).unwrap();
528        assert_eq!(reopened.entry_count(), 5);
529
530        for file in &files {
531            let entry = reopened.lookup(&file.path).unwrap();
532            assert_eq!(entry.path, file.path);
533            assert_eq!(entry.size, file.size);
534            assert_eq!(entry.shard_index, file.shard_index);
535            assert_eq!(entry.offset, file.offset);
536        }
537    }
538
539    #[test]
540    fn round_trip_preserves_all_fields() {
541        let original = ManifestEntry {
542            path: "deep/nested/path/file.rs".to_string(),
543            content_hash: ContentHash([0xAB; 32]),
544            size: 9999,
545            mode: 0o755,
546            shard_index: 3,
547            offset: 1234,
548            length: 9999,
549            lines: 42,
550            shard_count: 1,
551        };
552
553        let manifest = build_manifest(vec![original.clone()], vec![make_shard_ref(1)]);
554        let reopened = TreeManifest::open(manifest.as_bytes().to_vec()).unwrap();
555
556        let got = reopened.lookup("deep/nested/path/file.rs").unwrap();
557        assert_eq!(got.path, original.path);
558        assert_eq!(got.content_hash, original.content_hash);
559        assert_eq!(got.size, original.size);
560        assert_eq!(got.mode, original.mode);
561        assert_eq!(got.shard_index, original.shard_index);
562        assert_eq!(got.offset, original.offset);
563        assert_eq!(got.length, original.length);
564        assert_eq!(got.lines, original.lines);
565    }
566
567    // -- Builder sorts unsorted input --
568
569    #[test]
570    fn builder_sorts_unsorted_input() {
571        let manifest = build_manifest(
572            vec![
573                make_entry("c.txt", 0, 0, 10),
574                make_entry("a.txt", 0, 10, 20),
575                make_entry("b.txt", 0, 30, 30),
576            ],
577            vec![make_shard_ref(1)],
578        );
579
580        assert_eq!(manifest.lookup("a.txt").unwrap().size, 20);
581        assert_eq!(manifest.lookup("b.txt").unwrap().size, 30);
582        assert_eq!(manifest.lookup("c.txt").unwrap().size, 10);
583    }
584
585    // -- Iterator --
586
587    #[test]
588    fn iter_returns_sorted_order() {
589        let manifest = build_manifest(
590            vec![
591                make_entry("z.txt", 0, 0, 10),
592                make_entry("a.txt", 0, 10, 20),
593                make_entry("m.txt", 0, 30, 30),
594            ],
595            vec![make_shard_ref(1)],
596        );
597
598        let paths: Vec<String> = manifest.iter().map(|r| r.unwrap().path).collect();
599        assert_eq!(paths, vec!["a.txt", "m.txt", "z.txt"]);
600    }
601
602    #[test]
603    fn iter_exact_size() {
604        let manifest = build_manifest(
605            vec![
606                make_entry("a.txt", 0, 0, 10),
607                make_entry("b.txt", 0, 10, 20),
608                make_entry("c.txt", 0, 30, 30),
609            ],
610            vec![make_shard_ref(1)],
611        );
612
613        assert_eq!(manifest.iter().len(), 3);
614    }
615
616    // -- Lookup miss --
617
618    #[test]
619    fn lookup_nonexistent_returns_not_found() {
620        let manifest = build_manifest(
621            vec![make_entry("a.txt", 0, 0, 10)],
622            vec![make_shard_ref(1)],
623        );
624
625        let err = manifest.lookup("nonexistent.txt").unwrap_err();
626        assert!(matches!(err, VoidError::NotFound(_)));
627    }
628
629    // -- Empty manifest --
630
631    #[test]
632    fn empty_manifest() {
633        let manifest = build_manifest(vec![], vec![]);
634
635        assert_eq!(manifest.entry_count(), 0);
636        assert_eq!(manifest.total_files(), 0);
637        assert_eq!(manifest.total_bytes(), 0);
638        assert_eq!(manifest.shards().len(), 0);
639        assert_eq!(manifest.iter().count(), 0);
640
641        let err = manifest.lookup("anything").unwrap_err();
642        assert!(matches!(err, VoidError::NotFound(_)));
643    }
644
645    // -- Binary search boundary --
646
647    #[test]
648    fn lookup_first_entry() {
649        let manifest = build_manifest(
650            vec![
651                make_entry("aaa", 0, 0, 10),
652                make_entry("bbb", 0, 10, 20),
653                make_entry("ccc", 0, 30, 30),
654            ],
655            vec![make_shard_ref(1)],
656        );
657        assert_eq!(manifest.lookup("aaa").unwrap().size, 10);
658    }
659
660    #[test]
661    fn lookup_last_entry() {
662        let manifest = build_manifest(
663            vec![
664                make_entry("aaa", 0, 0, 10),
665                make_entry("bbb", 0, 10, 20),
666                make_entry("ccc", 0, 30, 30),
667            ],
668            vec![make_shard_ref(1)],
669        );
670        assert_eq!(manifest.lookup("ccc").unwrap().size, 30);
671    }
672
673    #[test]
674    fn lookup_middle_entry() {
675        let manifest = build_manifest(
676            vec![
677                make_entry("aaa", 0, 0, 10),
678                make_entry("bbb", 0, 10, 20),
679                make_entry("ccc", 0, 30, 30),
680            ],
681            vec![make_shard_ref(1)],
682        );
683        assert_eq!(manifest.lookup("bbb").unwrap().size, 20);
684    }
685
686    // -- Scale --
687
688    #[test]
689    fn lookup_100_files() {
690        let files: Vec<ManifestEntry> = (0..100)
691            .map(|i| make_entry(&format!("file_{:04}.txt", i), i % 4, (i as u64) * 100, 100))
692            .collect();
693
694        let manifest = build_manifest(
695            files.clone(),
696            vec![
697                make_shard_ref(1),
698                make_shard_ref(2),
699                make_shard_ref(3),
700                make_shard_ref(4),
701            ],
702        );
703
704        assert_eq!(manifest.entry_count(), 100);
705
706        for file in &files {
707            let entry = manifest.lookup(&file.path).unwrap();
708            assert_eq!(entry.path, file.path);
709            assert_eq!(entry.shard_index, file.shard_index);
710        }
711
712        assert!(manifest.lookup("file_9999.txt").is_err());
713    }
714
715    // -- Deep paths --
716
717    #[test]
718    fn lookup_deep_nested_paths() {
719        let manifest = build_manifest(
720            vec![
721                make_entry("a/b/c/d/e/f/g.txt", 0, 0, 10),
722                make_entry("a/b/c/d/e/f/h.txt", 0, 10, 20),
723                make_entry("x/y/z.txt", 0, 30, 30),
724            ],
725            vec![make_shard_ref(1)],
726        );
727
728        assert_eq!(manifest.lookup("a/b/c/d/e/f/g.txt").unwrap().size, 10);
729        assert_eq!(manifest.lookup("a/b/c/d/e/f/h.txt").unwrap().size, 20);
730        assert_eq!(manifest.lookup("x/y/z.txt").unwrap().size, 30);
731    }
732
733    // -- Metadata preservation --
734
735    #[test]
736    fn preserves_shards_and_metadata() {
737        let shards = vec![
738            ShardReference {
739                cid: ShardCid::from_bytes(vec![1, 2, 3]),
740                size_compressed: 500,
741                size_decompressed: 1000,
742                wrapped_key: None,
743            },
744            ShardReference {
745                cid: ShardCid::from_bytes(vec![4, 5, 6]),
746                size_compressed: 300,
747                size_decompressed: 600,
748                wrapped_key: Some(void_crypto::WrappedKey::from_bytes(vec![7, 8, 9])),
749            },
750        ];
751
752        let manifest = TreeManifest::builder()
753            .files(vec![make_entry("test.txt", 0, 0, 100)])
754            .shards(shards)
755            .paths_hash([0xAB; 32])
756            .total_files(1)
757            .total_bytes(100)
758            .build()
759            .unwrap();
760
761        assert_eq!(manifest.paths_hash(), &[0xAB; 32]);
762        assert_eq!(manifest.total_files(), 1);
763        assert_eq!(manifest.total_bytes(), 100);
764        assert_eq!(manifest.shards().len(), 2);
765        assert_eq!(manifest.shards()[0].size_compressed, 500);
766        assert_eq!(manifest.shards()[1].size_compressed, 300);
767        assert!(manifest.shards()[1].wrapped_key.is_some());
768    }
769
770    // -- entries_by_shard --
771
772    #[test]
773    fn entries_by_shard_groups_correctly() {
774        let manifest = build_manifest(
775            vec![
776                make_entry("a.txt", 0, 0, 10),
777                make_entry("b.txt", 1, 0, 20),
778                make_entry("c.txt", 0, 10, 30),
779                make_entry("d.txt", 2, 0, 40),
780                make_entry("e.txt", 1, 20, 50),
781            ],
782            vec![make_shard_ref(1), make_shard_ref(2), make_shard_ref(3)],
783        );
784
785        let groups = manifest.entries_by_shard().unwrap();
786        assert_eq!(groups.len(), 3);
787
788        // Shard 0: a.txt, c.txt
789        assert_eq!(groups[0].len(), 2);
790        assert_eq!(groups[0][0].path, "a.txt");
791        assert_eq!(groups[0][1].path, "c.txt");
792
793        // Shard 1: b.txt, e.txt
794        assert_eq!(groups[1].len(), 2);
795        assert_eq!(groups[1][0].path, "b.txt");
796        assert_eq!(groups[1][1].path, "e.txt");
797
798        // Shard 2: d.txt
799        assert_eq!(groups[2].len(), 1);
800        assert_eq!(groups[2][0].path, "d.txt");
801    }
802
803    #[test]
804    fn entries_by_shard_empty_manifest() {
805        let manifest = build_manifest(vec![], vec![]);
806        let groups = manifest.entries_by_shard().unwrap();
807        assert_eq!(groups.len(), 0);
808    }
809
810    // -- Corrupt data --
811
812    #[test]
813    fn open_corrupt_data_returns_error() {
814        let result = TreeManifest::open(vec![0xFF, 0xFF, 0xFF]);
815        assert!(result.is_err());
816    }
817
818    #[test]
819    fn open_empty_data_returns_error() {
820        let result = TreeManifest::open(vec![]);
821        assert!(result.is_err());
822    }
823
824    // -- as_bytes round-trip --
825
826    #[test]
827    fn as_bytes_produces_valid_manifest() {
828        let manifest = build_manifest(
829            vec![
830                make_entry("foo.txt", 0, 0, 50),
831                make_entry("bar.txt", 1, 0, 75),
832            ],
833            vec![make_shard_ref(1), make_shard_ref(2)],
834        );
835
836        // Simulate encrypt → store → load → decrypt cycle
837        let bytes = manifest.as_bytes().to_vec();
838        let reopened = TreeManifest::open(bytes).unwrap();
839
840        assert_eq!(reopened.entry_count(), 2);
841        assert_eq!(reopened.lookup("foo.txt").unwrap().size, 50);
842        assert_eq!(reopened.lookup("bar.txt").unwrap().size, 75);
843        assert_eq!(reopened.shards().len(), 2);
844    }
845}