Skip to main content

hashtree_core/
reader.rs

1//! Tree reader and traversal utilities
2//!
3//! Read files and directories from content-addressed storage
4
5use std::collections::HashMap;
6use std::sync::Arc;
7
8use crate::codec::{decode_tree_node, is_directory_node, is_tree_node, try_decode_tree_node};
9use crate::store::Store;
10use crate::types::{to_hex, Cid, Hash, Link, LinkType, TreeNode};
11
12use crate::crypto::{decrypt_chk, EncryptionKey};
13
14/// Tree entry for directory listings
15#[derive(Debug, Clone)]
16pub struct TreeEntry {
17    pub name: String,
18    pub hash: Hash,
19    pub size: u64,
20    /// Type of content this entry points to (Blob, File, or Dir)
21    pub link_type: LinkType,
22    /// Optional decryption key (for encrypted content)
23    pub key: Option<[u8; 32]>,
24    /// Optional metadata (createdAt, mimeType, thumbnail, etc.)
25    pub meta: Option<HashMap<String, serde_json::Value>>,
26}
27
28/// Walk entry for tree traversal
29#[derive(Debug, Clone)]
30pub struct WalkEntry {
31    pub path: String,
32    pub hash: Hash,
33    /// Type of content this entry points to (Blob, File, or Dir)
34    pub link_type: LinkType,
35    pub size: u64,
36    /// Optional decryption key (for encrypted content)
37    pub key: Option<[u8; 32]>,
38}
39
40/// TreeReader - reads and traverses merkle trees
41pub struct TreeReader<S: Store> {
42    store: Arc<S>,
43}
44
45impl<S: Store> TreeReader<S> {
46    pub fn new(store: Arc<S>) -> Self {
47        Self { store }
48    }
49
50    /// Get raw data by hash
51    pub async fn get_blob(&self, hash: &Hash) -> Result<Option<Vec<u8>>, ReaderError> {
52        self.store
53            .get(hash)
54            .await
55            .map_err(|e| ReaderError::Store(e.to_string()))
56    }
57
58    /// Get and decode a tree node
59    pub async fn get_tree_node(&self, hash: &Hash) -> Result<Option<TreeNode>, ReaderError> {
60        let data = match self.store.get(hash).await.map_err(|e| ReaderError::Store(e.to_string()))? {
61            Some(d) => d,
62            None => return Ok(None),
63        };
64
65        if !is_tree_node(&data) {
66            return Ok(None); // It's a blob, not a tree
67        }
68
69        let node = decode_tree_node(&data).map_err(ReaderError::Codec)?;
70        Ok(Some(node))
71    }
72
73    /// Check if hash points to a tree node or blob
74    pub async fn is_tree(&self, hash: &Hash) -> Result<bool, ReaderError> {
75        let data = match self.store.get(hash).await.map_err(|e| ReaderError::Store(e.to_string()))? {
76            Some(d) => d,
77            None => return Ok(false),
78        };
79        Ok(is_tree_node(&data))
80    }
81
82    /// Check if hash points to a directory (tree with named links)
83    /// vs a chunked file (tree with unnamed links) or raw blob
84    pub async fn is_directory(&self, hash: &Hash) -> Result<bool, ReaderError> {
85        let data = match self.store.get(hash).await.map_err(|e| ReaderError::Store(e.to_string()))? {
86            Some(d) => d,
87            None => return Ok(false),
88        };
89        Ok(is_directory_node(&data))
90    }
91
92    /// Read content by CID (handles both encrypted and public content)
93    ///
94    /// This is the unified read method that handles decryption automatically
95    /// when the CID contains an encryption key.
96    pub async fn get(&self, cid: &Cid) -> Result<Option<Vec<u8>>, ReaderError> {
97        if let Some(key) = cid.key {
98            self.get_encrypted(&cid.hash, &key).await
99        } else {
100            self.read_file(&cid.hash).await
101        }
102    }
103
104    /// Read encrypted content by hash and key (internal)
105    async fn get_encrypted(
106        &self,
107        hash: &Hash,
108        key: &EncryptionKey,
109    ) -> Result<Option<Vec<u8>>, ReaderError> {
110        let encrypted_data = match self.store.get(hash).await.map_err(|e| ReaderError::Store(e.to_string()))? {
111            Some(d) => d,
112            None => return Ok(None),
113        };
114
115        // Decrypt the data
116        let decrypted = decrypt_chk(&encrypted_data, key)
117            .map_err(|e| ReaderError::Decryption(e.to_string()))?;
118
119        // Check if it's a tree node
120        if is_tree_node(&decrypted) {
121            let node = decode_tree_node(&decrypted)?;
122            let assembled = self.assemble_encrypted_chunks(&node).await?;
123            return Ok(Some(assembled));
124        }
125
126        // Single chunk data
127        Ok(Some(decrypted))
128    }
129
130    /// Assemble encrypted chunks from tree
131    async fn assemble_encrypted_chunks(&self, node: &TreeNode) -> Result<Vec<u8>, ReaderError> {
132        let mut parts: Vec<Vec<u8>> = Vec::new();
133
134        for link in &node.links {
135            let chunk_key = link.key.ok_or(ReaderError::MissingKey)?;
136
137            let encrypted_child = self
138                .store
139                .get(&link.hash)
140                .await
141                .map_err(|e| ReaderError::Store(e.to_string()))?
142                .ok_or_else(|| ReaderError::MissingChunk(to_hex(&link.hash)))?;
143
144            let decrypted = decrypt_chk(&encrypted_child, &chunk_key)
145                .map_err(|e| ReaderError::Decryption(e.to_string()))?;
146
147            if is_tree_node(&decrypted) {
148                // Intermediate tree node - recurse
149                let child_node = decode_tree_node(&decrypted)?;
150                let child_data = Box::pin(self.assemble_encrypted_chunks(&child_node)).await?;
151                parts.push(child_data);
152            } else {
153                // Leaf data chunk
154                parts.push(decrypted);
155            }
156        }
157
158        let total_len: usize = parts.iter().map(|p| p.len()).sum();
159        let mut result = Vec::with_capacity(total_len);
160        for part in parts {
161            result.extend_from_slice(&part);
162        }
163
164        Ok(result)
165    }
166
167    /// Read a complete file (reassemble chunks if needed)
168    /// For unencrypted content only - use `get()` for unified access
169    pub async fn read_file(&self, hash: &Hash) -> Result<Option<Vec<u8>>, ReaderError> {
170        let data = match self.store.get(hash).await.map_err(|e| ReaderError::Store(e.to_string()))? {
171            Some(d) => d,
172            None => return Ok(None),
173        };
174
175        // Check if it's a tree (chunked file) or raw blob
176        if !is_tree_node(&data) {
177            return Ok(Some(data)); // Direct blob
178        }
179
180        // It's a tree - reassemble chunks
181        let node = decode_tree_node(&data).map_err(ReaderError::Codec)?;
182        let assembled = self.assemble_chunks(&node).await?;
183        Ok(Some(assembled))
184    }
185
186    /// Read a byte range from a file (fetches only necessary chunks)
187    ///
188    /// - `start`: Starting byte offset (inclusive)
189    /// - `end`: Ending byte offset (exclusive), or None to read to end
190    ///
191    /// For unencrypted content only - encrypted range reads not yet supported.
192    pub async fn read_file_range(
193        &self,
194        hash: &Hash,
195        start: u64,
196        end: Option<u64>,
197    ) -> Result<Option<Vec<u8>>, ReaderError> {
198        let data = match self.store.get(hash).await.map_err(|e| ReaderError::Store(e.to_string()))? {
199            Some(d) => d,
200            None => return Ok(None),
201        };
202
203        // Single blob - just slice it
204        if !is_tree_node(&data) {
205            let start_idx = start as usize;
206            let end_idx = end.map(|e| e as usize).unwrap_or(data.len());
207            if start_idx >= data.len() {
208                return Ok(Some(vec![]));
209            }
210            let end_idx = end_idx.min(data.len());
211            return Ok(Some(data[start_idx..end_idx].to_vec()));
212        }
213
214        // It's a chunked file - fetch only needed chunks
215        let node = decode_tree_node(&data).map_err(ReaderError::Codec)?;
216        let range_data = self.assemble_chunks_range(&node, start, end).await?;
217        Ok(Some(range_data))
218    }
219
220    /// Assemble only the chunks needed for a byte range
221    async fn assemble_chunks_range(
222        &self,
223        node: &TreeNode,
224        start: u64,
225        end: Option<u64>,
226    ) -> Result<Vec<u8>, ReaderError> {
227        // First, flatten the tree to get all leaf chunks with their byte offsets
228        let chunks_info = self.collect_chunk_offsets(node).await?;
229
230        if chunks_info.is_empty() {
231            return Ok(vec![]);
232        }
233
234        // Calculate total size and actual end
235        let total_size: u64 = chunks_info.iter().map(|(_, _, size)| size).sum();
236        let actual_end = end.unwrap_or(total_size).min(total_size);
237
238        if start >= actual_end {
239            return Ok(vec![]);
240        }
241
242        // Find chunks that overlap with [start, actual_end)
243        let mut result = Vec::with_capacity((actual_end - start) as usize);
244        let mut current_offset = 0u64;
245
246        for (chunk_hash, _chunk_offset, chunk_size) in &chunks_info {
247            let chunk_start = current_offset;
248            let chunk_end = current_offset + chunk_size;
249
250            // Check if this chunk overlaps with our range
251            if chunk_end > start && chunk_start < actual_end {
252                // Fetch this chunk
253                let chunk_data = self
254                    .store
255                    .get(chunk_hash)
256                    .await
257                    .map_err(|e| ReaderError::Store(e.to_string()))?
258                    .ok_or_else(|| ReaderError::MissingChunk(to_hex(chunk_hash)))?;
259
260                // Calculate slice bounds within this chunk
261                let slice_start = if start > chunk_start {
262                    (start - chunk_start) as usize
263                } else {
264                    0
265                };
266                let slice_end = if actual_end < chunk_end {
267                    (actual_end - chunk_start) as usize
268                } else {
269                    chunk_data.len()
270                };
271
272                result.extend_from_slice(&chunk_data[slice_start..slice_end]);
273            }
274
275            current_offset = chunk_end;
276
277            // Early exit if we've passed the requested range
278            if current_offset >= actual_end {
279                break;
280            }
281        }
282
283        Ok(result)
284    }
285
286    /// Collect all leaf chunk hashes with their byte offsets
287    /// Returns Vec<(hash, offset, size)>
288    async fn collect_chunk_offsets(
289        &self,
290        node: &TreeNode,
291    ) -> Result<Vec<(Hash, u64, u64)>, ReaderError> {
292        let mut chunks = Vec::new();
293        let mut offset = 0u64;
294        self.collect_chunk_offsets_recursive(node, &mut chunks, &mut offset).await?;
295        Ok(chunks)
296    }
297
298    async fn collect_chunk_offsets_recursive(
299        &self,
300        node: &TreeNode,
301        chunks: &mut Vec<(Hash, u64, u64)>,
302        offset: &mut u64,
303    ) -> Result<(), ReaderError> {
304        for link in &node.links {
305            let child_data = self
306                .store
307                .get(&link.hash)
308                .await
309                .map_err(|e| ReaderError::Store(e.to_string()))?
310                .ok_or_else(|| ReaderError::MissingChunk(to_hex(&link.hash)))?;
311
312            if is_tree_node(&child_data) {
313                // Intermediate node - recurse
314                let child_node = decode_tree_node(&child_data).map_err(ReaderError::Codec)?;
315                Box::pin(self.collect_chunk_offsets_recursive(&child_node, chunks, offset)).await?;
316            } else {
317                // Leaf chunk
318                let size = child_data.len() as u64;
319                chunks.push((link.hash, *offset, size));
320                *offset += size;
321            }
322        }
323        Ok(())
324    }
325
326    /// Recursively assemble chunks from tree (unencrypted)
327    async fn assemble_chunks(&self, node: &TreeNode) -> Result<Vec<u8>, ReaderError> {
328        let mut parts: Vec<Vec<u8>> = Vec::new();
329
330        for link in &node.links {
331            let child_data = self
332                .store
333                .get(&link.hash)
334                .await
335                .map_err(|e| ReaderError::Store(e.to_string()))?
336                .ok_or_else(|| ReaderError::MissingChunk(to_hex(&link.hash)))?;
337
338            if is_tree_node(&child_data) {
339                // Nested tree - recurse
340                let child_node = decode_tree_node(&child_data).map_err(ReaderError::Codec)?;
341                parts.push(Box::pin(self.assemble_chunks(&child_node)).await?);
342            } else {
343                // Leaf blob
344                parts.push(child_data);
345            }
346        }
347
348        // Concatenate all parts
349        let total_length: usize = parts.iter().map(|p| p.len()).sum();
350        let mut result = Vec::with_capacity(total_length);
351        for part in parts {
352            result.extend_from_slice(&part);
353        }
354
355        Ok(result)
356    }
357
358    /// Read a file with streaming (returns chunks as vec)
359    pub async fn read_file_chunks(&self, hash: &Hash) -> Result<Vec<Vec<u8>>, ReaderError> {
360        let data = match self.store.get(hash).await.map_err(|e| ReaderError::Store(e.to_string()))? {
361            Some(d) => d,
362            None => return Ok(vec![]),
363        };
364
365        if !is_tree_node(&data) {
366            return Ok(vec![data]);
367        }
368
369        let node = decode_tree_node(&data).map_err(ReaderError::Codec)?;
370        self.collect_chunks(&node).await
371    }
372
373    /// Recursively collect chunks
374    async fn collect_chunks(&self, node: &TreeNode) -> Result<Vec<Vec<u8>>, ReaderError> {
375        let mut chunks = Vec::new();
376
377        for link in &node.links {
378            let child_data = self
379                .store
380                .get(&link.hash)
381                .await
382                .map_err(|e| ReaderError::Store(e.to_string()))?
383                .ok_or_else(|| ReaderError::MissingChunk(to_hex(&link.hash)))?;
384
385            if is_tree_node(&child_data) {
386                let child_node = decode_tree_node(&child_data).map_err(ReaderError::Codec)?;
387                chunks.extend(Box::pin(self.collect_chunks(&child_node)).await?);
388            } else {
389                chunks.push(child_data);
390            }
391        }
392
393        Ok(chunks)
394    }
395
396    /// List directory entries
397    pub async fn list_directory(&self, hash: &Hash) -> Result<Vec<TreeEntry>, ReaderError> {
398        let node = match self.get_tree_node(hash).await? {
399            Some(n) => n,
400            None => return Ok(vec![]),
401        };
402
403        let mut entries = Vec::new();
404
405        for link in &node.links {
406            // Skip internal chunk nodes (names starting with _chunk_)
407            if let Some(ref name) = link.name {
408                if name.starts_with("_chunk_") {
409                    // This is an internal split - recurse into it
410                    let sub_entries = Box::pin(self.list_directory(&link.hash)).await?;
411                    entries.extend(sub_entries);
412                    continue;
413                }
414
415                // Skip internal group nodes (names starting with _ but not _chunk_)
416                if name.starts_with('_') {
417                    let sub_entries = Box::pin(self.list_directory(&link.hash)).await?;
418                    entries.extend(sub_entries);
419                    continue;
420                }
421            }
422
423            entries.push(TreeEntry {
424                name: link.name.clone().unwrap_or_else(|| to_hex(&link.hash)),
425                hash: link.hash,
426                size: link.size,
427                link_type: link.link_type,
428                key: link.key,
429                meta: link.meta.clone(),
430            });
431        }
432
433        Ok(entries)
434    }
435
436    /// Resolve a path within a tree
437    /// e.g., resolve_path("root/foo/bar.txt")
438    pub async fn resolve_path(&self, root_hash: &Hash, path: &str) -> Result<Option<Hash>, ReaderError> {
439        let parts: Vec<&str> = path.split('/').filter(|p| !p.is_empty()).collect();
440
441        let mut current_hash = *root_hash;
442
443        for part in parts {
444            let node = match self.get_tree_node(&current_hash).await? {
445                Some(n) => n,
446                None => return Ok(None),
447            };
448
449            if let Some(link) = self.find_link(&node, part) {
450                current_hash = link.hash;
451            } else {
452                // Check internal nodes
453                match self.find_in_subtrees(&node, part).await? {
454                    Some(hash) => current_hash = hash,
455                    None => return Ok(None),
456                }
457            }
458        }
459
460        Ok(Some(current_hash))
461    }
462
463    /// Find a link by name in a tree node
464    fn find_link(&self, node: &TreeNode, name: &str) -> Option<Link> {
465        node.links
466            .iter()
467            .find(|l| l.name.as_deref() == Some(name))
468            .cloned()
469    }
470
471    /// Search for name in internal subtrees
472    async fn find_in_subtrees(&self, node: &TreeNode, name: &str) -> Result<Option<Hash>, ReaderError> {
473        for link in &node.links {
474            // Only search internal nodes
475            if !link.name.as_ref().map(|n| n.starts_with('_')).unwrap_or(false) {
476                continue;
477            }
478
479            let sub_node = match self.get_tree_node(&link.hash).await? {
480                Some(n) => n,
481                None => continue,
482            };
483
484            if let Some(found) = self.find_link(&sub_node, name) {
485                return Ok(Some(found.hash));
486            }
487
488            // Recurse deeper
489            if let Some(deep_found) = Box::pin(self.find_in_subtrees(&sub_node, name)).await? {
490                return Ok(Some(deep_found));
491            }
492        }
493
494        Ok(None)
495    }
496
497    /// Get total size of a tree
498    pub async fn get_size(&self, hash: &Hash) -> Result<u64, ReaderError> {
499        let data = match self.store.get(hash).await.map_err(|e| ReaderError::Store(e.to_string()))? {
500            Some(d) => d,
501            None => return Ok(0),
502        };
503
504        if !is_tree_node(&data) {
505            return Ok(data.len() as u64);
506        }
507
508        let node = decode_tree_node(&data).map_err(ReaderError::Codec)?;
509        // Calculate from children
510        let mut total = 0u64;
511        for link in &node.links {
512            total += link.size;
513        }
514        Ok(total)
515    }
516
517    /// Walk entire tree depth-first
518    pub async fn walk(&self, hash: &Hash, path: &str) -> Result<Vec<WalkEntry>, ReaderError> {
519        let mut entries = Vec::new();
520        self.walk_recursive(hash, path, &mut entries).await?;
521        Ok(entries)
522    }
523
524    async fn walk_recursive(
525        &self,
526        hash: &Hash,
527        path: &str,
528        entries: &mut Vec<WalkEntry>,
529    ) -> Result<(), ReaderError> {
530        let data = match self.store.get(hash).await.map_err(|e| ReaderError::Store(e.to_string()))? {
531            Some(d) => d,
532            None => return Ok(()),
533        };
534
535        let node = match try_decode_tree_node(&data) {
536            Some(n) => n,
537            None => {
538                entries.push(WalkEntry {
539                    path: path.to_string(),
540                    hash: *hash,
541                    link_type: LinkType::Blob,
542                    size: data.len() as u64,
543                    key: None, // TreeReader doesn't track keys
544                });
545                return Ok(());
546            }
547        };
548
549        let node_size: u64 = node.links.iter().map(|l| l.size).sum();
550        entries.push(WalkEntry {
551            path: path.to_string(),
552            hash: *hash,
553            link_type: node.node_type,
554            size: node_size,
555            key: None, // directories are not encrypted
556        });
557
558        for link in &node.links {
559            let child_path = match &link.name {
560                Some(name) => {
561                    // Skip internal chunk nodes in path
562                    if name.starts_with("_chunk_") || name.starts_with('_') {
563                        Box::pin(self.walk_recursive(&link.hash, path, entries)).await?;
564                        continue;
565                    }
566                    if path.is_empty() {
567                        name.clone()
568                    } else {
569                        format!("{}/{}", path, name)
570                    }
571                }
572                None => path.to_string(),
573            };
574
575            Box::pin(self.walk_recursive(&link.hash, &child_path, entries)).await?;
576        }
577
578        Ok(())
579    }
580}
581
582/// Verify tree integrity
583/// Checks that all referenced hashes exist
584pub async fn verify_tree<S: Store>(store: Arc<S>, root_hash: &Hash) -> Result<VerifyResult, ReaderError> {
585    let mut missing = Vec::new();
586    let mut visited = std::collections::HashSet::new();
587
588    verify_recursive(store, root_hash, &mut missing, &mut visited).await?;
589
590    Ok(VerifyResult {
591        valid: missing.is_empty(),
592        missing,
593    })
594}
595
596async fn verify_recursive<S: Store>(
597    store: Arc<S>,
598    hash: &Hash,
599    missing: &mut Vec<Hash>,
600    visited: &mut std::collections::HashSet<String>,
601) -> Result<(), ReaderError> {
602    let hex = to_hex(hash);
603    if visited.contains(&hex) {
604        return Ok(());
605    }
606    visited.insert(hex);
607
608    let data = match store.get(hash).await.map_err(|e| ReaderError::Store(e.to_string()))? {
609        Some(d) => d,
610        None => {
611            missing.push(*hash);
612            return Ok(());
613        }
614    };
615
616    if is_tree_node(&data) {
617        let node = decode_tree_node(&data).map_err(ReaderError::Codec)?;
618        for link in &node.links {
619            Box::pin(verify_recursive(store.clone(), &link.hash, missing, visited)).await?;
620        }
621    }
622
623    Ok(())
624}
625
626/// Result of tree verification
627#[derive(Debug, Clone)]
628pub struct VerifyResult {
629    pub valid: bool,
630    pub missing: Vec<Hash>,
631}
632
633/// Reader error type
634#[derive(Debug, thiserror::Error)]
635pub enum ReaderError {
636    #[error("Store error: {0}")]
637    Store(String),
638    #[error("Codec error: {0}")]
639    Codec(#[from] crate::codec::CodecError),
640    #[error("Missing chunk: {0}")]
641    MissingChunk(String),
642    #[error("Decryption error: {0}")]
643    Decryption(String),
644    #[error("Missing decryption key")]
645    MissingKey,
646}
647
648#[cfg(test)]
649mod tests {
650    use super::*;
651    use crate::builder::{BuilderConfig, TreeBuilder};
652    use crate::store::MemoryStore;
653    use crate::types::DirEntry;
654
655    fn make_store() -> Arc<MemoryStore> {
656        Arc::new(MemoryStore::new())
657    }
658
659    #[tokio::test]
660    async fn test_get_blob() {
661        let store = make_store();
662        let builder = TreeBuilder::new(BuilderConfig::new(store.clone()));
663        let reader = TreeReader::new(store);
664
665        let data = vec![1u8, 2, 3, 4, 5];
666        let hash = builder.put_blob(&data).await.unwrap();
667
668        let result = reader.get_blob(&hash).await.unwrap();
669        assert_eq!(result, Some(data));
670    }
671
672    #[tokio::test]
673    async fn test_get_blob_missing() {
674        let store = make_store();
675        let reader = TreeReader::new(store);
676
677        let hash = [0u8; 32];
678        let result = reader.get_blob(&hash).await.unwrap();
679        assert!(result.is_none());
680    }
681
682    #[tokio::test]
683    async fn test_get_tree_node() {
684        let store = make_store();
685        let builder = TreeBuilder::new(BuilderConfig::new(store.clone()));
686        let reader = TreeReader::new(store);
687
688        let file_hash = builder.put_blob(&[1u8]).await.unwrap();
689        let dir_hash = builder
690            .put_directory(vec![DirEntry::new("test.txt", file_hash).with_size(1)])
691            .await
692            .unwrap();
693
694        let node = reader.get_tree_node(&dir_hash).await.unwrap();
695        assert!(node.is_some());
696        assert_eq!(node.unwrap().links.len(), 1);
697    }
698
699    #[tokio::test]
700    async fn test_get_tree_node_returns_none_for_blob() {
701        let store = make_store();
702        let builder = TreeBuilder::new(BuilderConfig::new(store.clone()));
703        let reader = TreeReader::new(store);
704
705        let hash = builder.put_blob(&[1u8, 2, 3]).await.unwrap();
706        let node = reader.get_tree_node(&hash).await.unwrap();
707        assert!(node.is_none());
708    }
709
710    #[tokio::test]
711    async fn test_is_tree() {
712        let store = make_store();
713        let builder = TreeBuilder::new(BuilderConfig::new(store.clone()));
714        let reader = TreeReader::new(store);
715
716        let file_hash = builder.put_blob(&[1u8]).await.unwrap();
717        let dir_hash = builder
718            .put_directory(vec![DirEntry::new("test.txt", file_hash)])
719            .await
720            .unwrap();
721
722        assert!(reader.is_tree(&dir_hash).await.unwrap());
723        assert!(!reader.is_tree(&file_hash).await.unwrap());
724    }
725
726    #[tokio::test]
727    async fn test_read_file_small() {
728        let store = make_store();
729        // Use public() for tests that check raw data storage
730        let builder = TreeBuilder::new(BuilderConfig::new(store.clone()).public());
731        let reader = TreeReader::new(store);
732
733        let data = vec![1u8, 2, 3, 4, 5];
734        let (cid, _size) = builder.put(&data).await.unwrap();
735
736        let result = reader.read_file(&cid.hash).await.unwrap();
737        assert_eq!(result, Some(data));
738    }
739
740    #[tokio::test]
741    async fn test_read_file_chunked() {
742        let store = make_store();
743        let config = BuilderConfig::new(store.clone()).with_chunk_size(100).public();
744        let builder = TreeBuilder::new(config);
745        let reader = TreeReader::new(store);
746
747        let mut data = vec![0u8; 350];
748        for i in 0..data.len() {
749            data[i] = (i % 256) as u8;
750        }
751
752        let (cid, _size) = builder.put(&data).await.unwrap();
753        let result = reader.read_file(&cid.hash).await.unwrap();
754
755        assert_eq!(result, Some(data));
756    }
757
758    #[tokio::test]
759    async fn test_list_directory() {
760        let store = make_store();
761        let builder = TreeBuilder::new(BuilderConfig::new(store.clone()));
762        let reader = TreeReader::new(store);
763
764        let h1 = builder.put_blob(&[1u8]).await.unwrap();
765        let h2 = builder.put_blob(&[2u8]).await.unwrap();
766
767        let dir_hash = builder
768            .put_directory(
769                vec![
770                    DirEntry::new("first.txt", h1).with_size(1),
771                    DirEntry::new("second.txt", h2).with_size(1),
772                ],
773            )
774            .await
775            .unwrap();
776
777        let entries = reader.list_directory(&dir_hash).await.unwrap();
778
779        assert_eq!(entries.len(), 2);
780        assert!(entries.iter().any(|e| e.name == "first.txt"));
781        assert!(entries.iter().any(|e| e.name == "second.txt"));
782    }
783
784    #[tokio::test]
785    async fn test_resolve_path() {
786        let store = make_store();
787        let builder = TreeBuilder::new(BuilderConfig::new(store.clone()));
788        let reader = TreeReader::new(store);
789
790        let file_data = vec![1u8, 2, 3];
791        let file_hash = builder.put_blob(&file_data).await.unwrap();
792
793        let dir_hash = builder
794            .put_directory(vec![DirEntry::new("test.txt", file_hash)])
795            .await
796            .unwrap();
797
798        let resolved = reader.resolve_path(&dir_hash, "test.txt").await.unwrap();
799        assert_eq!(resolved, Some(file_hash));
800    }
801
802    #[tokio::test]
803    async fn test_resolve_path_nested() {
804        let store = make_store();
805        let builder = TreeBuilder::new(BuilderConfig::new(store.clone()));
806        let reader = TreeReader::new(store);
807
808        let file_hash = builder.put_blob(&[1u8]).await.unwrap();
809
810        let sub_sub_dir = builder
811            .put_directory(vec![DirEntry::new("deep.txt", file_hash)])
812            .await
813            .unwrap();
814
815        let sub_dir = builder
816            .put_directory(vec![DirEntry::new("level2", sub_sub_dir)])
817            .await
818            .unwrap();
819
820        let root_dir = builder
821            .put_directory(vec![DirEntry::new("level1", sub_dir)])
822            .await
823            .unwrap();
824
825        let resolved = reader
826            .resolve_path(&root_dir, "level1/level2/deep.txt")
827            .await
828            .unwrap();
829        assert_eq!(resolved, Some(file_hash));
830    }
831
832    #[tokio::test]
833    async fn test_get_size() {
834        let store = make_store();
835        let builder = TreeBuilder::new(BuilderConfig::new(store.clone()));
836        let reader = TreeReader::new(store);
837
838        let data = vec![0u8; 123];
839        let hash = builder.put_blob(&data).await.unwrap();
840
841        assert_eq!(reader.get_size(&hash).await.unwrap(), 123);
842    }
843
844    #[tokio::test]
845    async fn test_walk() {
846        let store = make_store();
847        let builder = TreeBuilder::new(BuilderConfig::new(store.clone()));
848        let reader = TreeReader::new(store);
849
850        let f1 = builder.put_blob(&[1u8]).await.unwrap();
851        let f2 = builder.put_blob(&[2u8, 3]).await.unwrap();
852
853        let sub_dir = builder
854            .put_directory(vec![DirEntry::new("nested.txt", f2).with_size(2)])
855            .await
856            .unwrap();
857
858        let root_dir = builder
859            .put_directory(
860                vec![
861                    DirEntry::new("root.txt", f1).with_size(1),
862                    DirEntry::new("sub", sub_dir),
863                ],
864            )
865            .await
866            .unwrap();
867
868        let entries = reader.walk(&root_dir, "").await.unwrap();
869        let paths: Vec<_> = entries.iter().map(|e| e.path.as_str()).collect();
870
871        assert!(paths.contains(&""));
872        assert!(paths.contains(&"root.txt"));
873        assert!(paths.contains(&"sub"));
874        assert!(paths.contains(&"sub/nested.txt"));
875    }
876
877    #[tokio::test]
878    async fn test_verify_tree_valid() {
879        let store = make_store();
880        let config = BuilderConfig::new(store.clone()).with_chunk_size(100).public();
881        let builder = TreeBuilder::new(config);
882
883        let data = vec![0u8; 350];
884        let (cid, _size) = builder.put(&data).await.unwrap();
885
886        let result = verify_tree(store, &cid.hash).await.unwrap();
887        assert!(result.valid);
888        assert!(result.missing.is_empty());
889    }
890
891    #[tokio::test]
892    async fn test_verify_tree_missing() {
893        let store = make_store();
894        let config = BuilderConfig::new(store.clone()).with_chunk_size(100).public();
895        let builder = TreeBuilder::new(config);
896
897        let data = vec![0u8; 350];
898        let (cid, _size) = builder.put(&data).await.unwrap();
899
900        // Delete one of the chunks
901        let keys = store.keys();
902        if let Some(chunk_to_delete) = keys.iter().find(|k| **k != cid.hash) {
903            store.delete(chunk_to_delete).await.unwrap();
904        }
905
906        let result = verify_tree(store, &cid.hash).await.unwrap();
907        assert!(!result.valid);
908        assert!(!result.missing.is_empty());
909    }
910
911    #[tokio::test]
912    async fn test_read_file_range_small_blob() {
913        let store = make_store();
914        let builder = TreeBuilder::new(BuilderConfig::new(store.clone()).public());
915        let reader = TreeReader::new(store);
916
917        let data = b"Hello, World!";
918        let hash = builder.put_blob(data).await.unwrap();
919
920        // Read middle portion
921        let result = reader.read_file_range(&hash, 7, Some(12)).await.unwrap();
922        assert_eq!(result, Some(b"World".to_vec()));
923
924        // Read from start
925        let result = reader.read_file_range(&hash, 0, Some(5)).await.unwrap();
926        assert_eq!(result, Some(b"Hello".to_vec()));
927
928        // Read to end (no end specified)
929        let result = reader.read_file_range(&hash, 7, None).await.unwrap();
930        assert_eq!(result, Some(b"World!".to_vec()));
931    }
932
933    #[tokio::test]
934    async fn test_read_file_range_chunked() {
935        let store = make_store();
936        // Small chunk size to force chunking
937        let config = BuilderConfig::new(store.clone()).with_chunk_size(100).public();
938        let builder = TreeBuilder::new(config);
939        let reader = TreeReader::new(store);
940
941        // Create 350 bytes of sequential data
942        let mut data = vec![0u8; 350];
943        for i in 0..data.len() {
944            data[i] = (i % 256) as u8;
945        }
946
947        let (cid, _size) = builder.put(&data).await.unwrap();
948
949        // Read bytes 50-150 (spans chunk boundary at 100)
950        let result = reader.read_file_range(&cid.hash, 50, Some(150)).await.unwrap().unwrap();
951        assert_eq!(result.len(), 100);
952        assert_eq!(result, data[50..150].to_vec());
953
954        // Read bytes 200-300 (within third and fourth chunks)
955        let result = reader.read_file_range(&cid.hash, 200, Some(300)).await.unwrap().unwrap();
956        assert_eq!(result.len(), 100);
957        assert_eq!(result, data[200..300].to_vec());
958
959        // Read last 50 bytes
960        let result = reader.read_file_range(&cid.hash, 300, None).await.unwrap().unwrap();
961        assert_eq!(result.len(), 50);
962        assert_eq!(result, data[300..].to_vec());
963    }
964
965    #[tokio::test]
966    async fn test_read_file_range_entire_file() {
967        let store = make_store();
968        let config = BuilderConfig::new(store.clone()).with_chunk_size(100).public();
969        let builder = TreeBuilder::new(config);
970        let reader = TreeReader::new(store);
971
972        let mut data = vec![0u8; 350];
973        for i in 0..data.len() {
974            data[i] = (i % 256) as u8;
975        }
976
977        let (cid, _size) = builder.put(&data).await.unwrap();
978
979        // Read entire file using range
980        let result = reader.read_file_range(&cid.hash, 0, None).await.unwrap().unwrap();
981        assert_eq!(result, data);
982    }
983
984    #[tokio::test]
985    async fn test_read_file_range_out_of_bounds() {
986        let store = make_store();
987        let builder = TreeBuilder::new(BuilderConfig::new(store.clone()).public());
988        let reader = TreeReader::new(store);
989
990        let data = b"Short";
991        let hash = builder.put_blob(data).await.unwrap();
992
993        // Start past end of file
994        let result = reader.read_file_range(&hash, 100, Some(200)).await.unwrap();
995        assert_eq!(result, Some(vec![]));
996
997        // End past file length (should clamp)
998        let result = reader.read_file_range(&hash, 0, Some(100)).await.unwrap();
999        assert_eq!(result, Some(b"Short".to_vec()));
1000    }
1001
1002    #[tokio::test]
1003    async fn test_read_file_range_single_byte() {
1004        let store = make_store();
1005        let config = BuilderConfig::new(store.clone()).with_chunk_size(100).public();
1006        let builder = TreeBuilder::new(config);
1007        let reader = TreeReader::new(store);
1008
1009        let mut data = vec![0u8; 350];
1010        for i in 0..data.len() {
1011            data[i] = (i % 256) as u8;
1012        }
1013
1014        let (cid, _size) = builder.put(&data).await.unwrap();
1015
1016        // Read single byte at chunk boundary
1017        let result = reader.read_file_range(&cid.hash, 100, Some(101)).await.unwrap().unwrap();
1018        assert_eq!(result.len(), 1);
1019        assert_eq!(result[0], 100);
1020    }
1021}