Skip to main content

hashtree_core/
reader.rs

1//! Tree reader and traversal utilities
2//!
3//! Read files and directories from content-addressed storage
4
5use std::collections::HashMap;
6use std::sync::Arc;
7
8use crate::codec::{decode_tree_node, is_directory_node, is_tree_node, try_decode_tree_node};
9use crate::store::Store;
10use crate::types::{to_hex, Cid, Hash, Link, LinkType, TreeNode};
11
12use crate::crypto::{decrypt_chk, EncryptionKey};
13
14/// Tree entry for directory listings
15#[derive(Debug, Clone)]
16pub struct TreeEntry {
17    pub name: String,
18    pub hash: Hash,
19    pub size: u64,
20    /// Type of content this entry points to (Blob, File, or Dir)
21    pub link_type: LinkType,
22    /// Optional decryption key (for encrypted content)
23    pub key: Option<[u8; 32]>,
24    /// Optional metadata (createdAt, mimeType, thumbnail, etc.)
25    pub meta: Option<HashMap<String, serde_json::Value>>,
26}
27
28/// Walk entry for tree traversal
29#[derive(Debug, Clone)]
30pub struct WalkEntry {
31    pub path: String,
32    pub hash: Hash,
33    /// Type of content this entry points to (Blob, File, or Dir)
34    pub link_type: LinkType,
35    pub size: u64,
36    /// Optional decryption key (for encrypted content)
37    pub key: Option<[u8; 32]>,
38}
39
40/// TreeReader - reads and traverses merkle trees
41pub struct TreeReader<S: Store> {
42    store: Arc<S>,
43}
44
45impl<S: Store> TreeReader<S> {
46    pub fn new(store: Arc<S>) -> Self {
47        Self { store }
48    }
49
50    /// Get raw data by hash
51    pub async fn get_blob(&self, hash: &Hash) -> Result<Option<Vec<u8>>, ReaderError> {
52        self.store
53            .get(hash)
54            .await
55            .map_err(|e| ReaderError::Store(e.to_string()))
56    }
57
58    /// Get and decode a tree node
59    pub async fn get_tree_node(&self, hash: &Hash) -> Result<Option<TreeNode>, ReaderError> {
60        let data = match self
61            .store
62            .get(hash)
63            .await
64            .map_err(|e| ReaderError::Store(e.to_string()))?
65        {
66            Some(d) => d,
67            None => return Ok(None),
68        };
69
70        if !is_tree_node(&data) {
71            return Ok(None); // It's a blob, not a tree
72        }
73
74        let node = decode_tree_node(&data).map_err(ReaderError::Codec)?;
75        Ok(Some(node))
76    }
77
78    /// Check if hash points to a tree node or blob
79    pub async fn is_tree(&self, hash: &Hash) -> Result<bool, ReaderError> {
80        let data = match self
81            .store
82            .get(hash)
83            .await
84            .map_err(|e| ReaderError::Store(e.to_string()))?
85        {
86            Some(d) => d,
87            None => return Ok(false),
88        };
89        Ok(is_tree_node(&data))
90    }
91
92    /// Check if hash points to a directory (tree with named links)
93    /// vs a chunked file (tree with unnamed links) or raw blob
94    pub async fn is_directory(&self, hash: &Hash) -> Result<bool, ReaderError> {
95        let data = match self
96            .store
97            .get(hash)
98            .await
99            .map_err(|e| ReaderError::Store(e.to_string()))?
100        {
101            Some(d) => d,
102            None => return Ok(false),
103        };
104        Ok(is_directory_node(&data))
105    }
106
107    /// Read content by CID (handles both encrypted and public content)
108    ///
109    /// This is the unified read method that handles decryption automatically
110    /// when the CID contains an encryption key.
111    pub async fn get(&self, cid: &Cid) -> Result<Option<Vec<u8>>, ReaderError> {
112        if let Some(key) = cid.key {
113            self.get_encrypted(&cid.hash, &key).await
114        } else {
115            self.read_file(&cid.hash).await
116        }
117    }
118
119    /// Read encrypted content by hash and key (internal)
120    async fn get_encrypted(
121        &self,
122        hash: &Hash,
123        key: &EncryptionKey,
124    ) -> Result<Option<Vec<u8>>, ReaderError> {
125        let encrypted_data = match self
126            .store
127            .get(hash)
128            .await
129            .map_err(|e| ReaderError::Store(e.to_string()))?
130        {
131            Some(d) => d,
132            None => return Ok(None),
133        };
134
135        // Decrypt the data
136        let decrypted = decrypt_chk(&encrypted_data, key)
137            .map_err(|e| ReaderError::Decryption(e.to_string()))?;
138
139        // Check if it's a tree node
140        if is_tree_node(&decrypted) {
141            let node = decode_tree_node(&decrypted)?;
142            let assembled = self.assemble_encrypted_chunks(&node).await?;
143            return Ok(Some(assembled));
144        }
145
146        // Single chunk data
147        Ok(Some(decrypted))
148    }
149
150    /// Assemble encrypted chunks from tree
151    async fn assemble_encrypted_chunks(&self, node: &TreeNode) -> Result<Vec<u8>, ReaderError> {
152        let mut parts: Vec<Vec<u8>> = Vec::new();
153
154        for link in &node.links {
155            let chunk_key = link.key.ok_or(ReaderError::MissingKey)?;
156
157            let encrypted_child = self
158                .store
159                .get(&link.hash)
160                .await
161                .map_err(|e| ReaderError::Store(e.to_string()))?
162                .ok_or_else(|| ReaderError::MissingChunk(to_hex(&link.hash)))?;
163
164            let decrypted = decrypt_chk(&encrypted_child, &chunk_key)
165                .map_err(|e| ReaderError::Decryption(e.to_string()))?;
166
167            if is_tree_node(&decrypted) {
168                // Intermediate tree node - recurse
169                let child_node = decode_tree_node(&decrypted)?;
170                let child_data = Box::pin(self.assemble_encrypted_chunks(&child_node)).await?;
171                parts.push(child_data);
172            } else {
173                // Leaf data chunk
174                parts.push(decrypted);
175            }
176        }
177
178        let total_len: usize = parts.iter().map(|p| p.len()).sum();
179        let mut result = Vec::with_capacity(total_len);
180        for part in parts {
181            result.extend_from_slice(&part);
182        }
183
184        Ok(result)
185    }
186
187    /// Read a complete file (reassemble chunks if needed)
188    /// For unencrypted content only - use `get()` for unified access
189    pub async fn read_file(&self, hash: &Hash) -> Result<Option<Vec<u8>>, ReaderError> {
190        let data = match self
191            .store
192            .get(hash)
193            .await
194            .map_err(|e| ReaderError::Store(e.to_string()))?
195        {
196            Some(d) => d,
197            None => return Ok(None),
198        };
199
200        // Check if it's a tree (chunked file) or raw blob
201        if !is_tree_node(&data) {
202            return Ok(Some(data)); // Direct blob
203        }
204
205        // It's a tree - reassemble chunks
206        let node = decode_tree_node(&data).map_err(ReaderError::Codec)?;
207        let assembled = self.assemble_chunks(&node).await?;
208        Ok(Some(assembled))
209    }
210
211    /// Read a byte range from a file (fetches only necessary chunks)
212    ///
213    /// - `start`: Starting byte offset (inclusive)
214    /// - `end`: Ending byte offset (exclusive), or None to read to end
215    ///
216    /// For unencrypted content only - encrypted range reads not yet supported.
217    pub async fn read_file_range(
218        &self,
219        hash: &Hash,
220        start: u64,
221        end: Option<u64>,
222    ) -> Result<Option<Vec<u8>>, ReaderError> {
223        let data = match self
224            .store
225            .get(hash)
226            .await
227            .map_err(|e| ReaderError::Store(e.to_string()))?
228        {
229            Some(d) => d,
230            None => return Ok(None),
231        };
232
233        // Single blob - just slice it
234        if !is_tree_node(&data) {
235            let start_idx = start as usize;
236            let end_idx = end.map(|e| e as usize).unwrap_or(data.len());
237            if start_idx >= data.len() {
238                return Ok(Some(vec![]));
239            }
240            let end_idx = end_idx.min(data.len());
241            return Ok(Some(data[start_idx..end_idx].to_vec()));
242        }
243
244        // It's a chunked file - fetch only needed chunks
245        let node = decode_tree_node(&data).map_err(ReaderError::Codec)?;
246        let range_data = self.assemble_chunks_range(&node, start, end).await?;
247        Ok(Some(range_data))
248    }
249
250    /// Assemble only the chunks needed for a byte range
251    async fn assemble_chunks_range(
252        &self,
253        node: &TreeNode,
254        start: u64,
255        end: Option<u64>,
256    ) -> Result<Vec<u8>, ReaderError> {
257        // First, flatten the tree to get all leaf chunks with their byte offsets
258        let chunks_info = self.collect_chunk_offsets(node).await?;
259
260        if chunks_info.is_empty() {
261            return Ok(vec![]);
262        }
263
264        // Calculate total size and actual end
265        let total_size: u64 = chunks_info.iter().map(|(_, _, size)| size).sum();
266        let actual_end = end.unwrap_or(total_size).min(total_size);
267
268        if start >= actual_end {
269            return Ok(vec![]);
270        }
271
272        // Find chunks that overlap with [start, actual_end)
273        let mut result = Vec::with_capacity((actual_end - start) as usize);
274        let mut current_offset = 0u64;
275
276        for (chunk_hash, _chunk_offset, chunk_size) in &chunks_info {
277            let chunk_start = current_offset;
278            let chunk_end = current_offset + chunk_size;
279
280            // Check if this chunk overlaps with our range
281            if chunk_end > start && chunk_start < actual_end {
282                // Fetch this chunk
283                let chunk_data = self
284                    .store
285                    .get(chunk_hash)
286                    .await
287                    .map_err(|e| ReaderError::Store(e.to_string()))?
288                    .ok_or_else(|| ReaderError::MissingChunk(to_hex(chunk_hash)))?;
289
290                // Calculate slice bounds within this chunk
291                let slice_start = if start > chunk_start {
292                    (start - chunk_start) as usize
293                } else {
294                    0
295                };
296                let slice_end = if actual_end < chunk_end {
297                    (actual_end - chunk_start) as usize
298                } else {
299                    chunk_data.len()
300                };
301
302                result.extend_from_slice(&chunk_data[slice_start..slice_end]);
303            }
304
305            current_offset = chunk_end;
306
307            // Early exit if we've passed the requested range
308            if current_offset >= actual_end {
309                break;
310            }
311        }
312
313        Ok(result)
314    }
315
316    /// Collect all leaf chunk hashes with their byte offsets
317    /// Returns Vec<(hash, offset, size)>
318    async fn collect_chunk_offsets(
319        &self,
320        node: &TreeNode,
321    ) -> Result<Vec<(Hash, u64, u64)>, ReaderError> {
322        let mut chunks = Vec::new();
323        let mut offset = 0u64;
324        self.collect_chunk_offsets_recursive(node, &mut chunks, &mut offset)
325            .await?;
326        Ok(chunks)
327    }
328
329    async fn collect_chunk_offsets_recursive(
330        &self,
331        node: &TreeNode,
332        chunks: &mut Vec<(Hash, u64, u64)>,
333        offset: &mut u64,
334    ) -> Result<(), ReaderError> {
335        for link in &node.links {
336            let child_data = self
337                .store
338                .get(&link.hash)
339                .await
340                .map_err(|e| ReaderError::Store(e.to_string()))?
341                .ok_or_else(|| ReaderError::MissingChunk(to_hex(&link.hash)))?;
342
343            if is_tree_node(&child_data) {
344                // Intermediate node - recurse
345                let child_node = decode_tree_node(&child_data).map_err(ReaderError::Codec)?;
346                Box::pin(self.collect_chunk_offsets_recursive(&child_node, chunks, offset)).await?;
347            } else {
348                // Leaf chunk
349                let size = child_data.len() as u64;
350                chunks.push((link.hash, *offset, size));
351                *offset += size;
352            }
353        }
354        Ok(())
355    }
356
357    /// Recursively assemble chunks from tree (unencrypted)
358    async fn assemble_chunks(&self, node: &TreeNode) -> Result<Vec<u8>, ReaderError> {
359        let mut parts: Vec<Vec<u8>> = Vec::new();
360
361        for link in &node.links {
362            let child_data = self
363                .store
364                .get(&link.hash)
365                .await
366                .map_err(|e| ReaderError::Store(e.to_string()))?
367                .ok_or_else(|| ReaderError::MissingChunk(to_hex(&link.hash)))?;
368
369            if is_tree_node(&child_data) {
370                // Nested tree - recurse
371                let child_node = decode_tree_node(&child_data).map_err(ReaderError::Codec)?;
372                parts.push(Box::pin(self.assemble_chunks(&child_node)).await?);
373            } else {
374                // Leaf blob
375                parts.push(child_data);
376            }
377        }
378
379        // Concatenate all parts
380        let total_length: usize = parts.iter().map(|p| p.len()).sum();
381        let mut result = Vec::with_capacity(total_length);
382        for part in parts {
383            result.extend_from_slice(&part);
384        }
385
386        Ok(result)
387    }
388
389    /// Read a file with streaming (returns chunks as vec)
390    pub async fn read_file_chunks(&self, hash: &Hash) -> Result<Vec<Vec<u8>>, ReaderError> {
391        let data = match self
392            .store
393            .get(hash)
394            .await
395            .map_err(|e| ReaderError::Store(e.to_string()))?
396        {
397            Some(d) => d,
398            None => return Ok(vec![]),
399        };
400
401        if !is_tree_node(&data) {
402            return Ok(vec![data]);
403        }
404
405        let node = decode_tree_node(&data).map_err(ReaderError::Codec)?;
406        self.collect_chunks(&node).await
407    }
408
409    /// Recursively collect chunks
410    async fn collect_chunks(&self, node: &TreeNode) -> Result<Vec<Vec<u8>>, ReaderError> {
411        let mut chunks = Vec::new();
412
413        for link in &node.links {
414            let child_data = self
415                .store
416                .get(&link.hash)
417                .await
418                .map_err(|e| ReaderError::Store(e.to_string()))?
419                .ok_or_else(|| ReaderError::MissingChunk(to_hex(&link.hash)))?;
420
421            if is_tree_node(&child_data) {
422                let child_node = decode_tree_node(&child_data).map_err(ReaderError::Codec)?;
423                chunks.extend(Box::pin(self.collect_chunks(&child_node)).await?);
424            } else {
425                chunks.push(child_data);
426            }
427        }
428
429        Ok(chunks)
430    }
431
432    /// List directory entries
433    pub async fn list_directory(&self, hash: &Hash) -> Result<Vec<TreeEntry>, ReaderError> {
434        let node = match self.get_tree_node(hash).await? {
435            Some(n) => n,
436            None => return Ok(vec![]),
437        };
438
439        let mut entries = Vec::new();
440
441        for link in &node.links {
442            // Skip internal chunk nodes (names starting with _chunk_)
443            if let Some(ref name) = link.name {
444                if name.starts_with("_chunk_") {
445                    // This is an internal split - recurse into it
446                    let sub_entries = Box::pin(self.list_directory(&link.hash)).await?;
447                    entries.extend(sub_entries);
448                    continue;
449                }
450
451                // Skip internal group nodes (names starting with _ but not _chunk_)
452                if name.starts_with('_') {
453                    let sub_entries = Box::pin(self.list_directory(&link.hash)).await?;
454                    entries.extend(sub_entries);
455                    continue;
456                }
457            }
458
459            entries.push(TreeEntry {
460                name: link.name.clone().unwrap_or_else(|| to_hex(&link.hash)),
461                hash: link.hash,
462                size: link.size,
463                link_type: link.link_type,
464                key: link.key,
465                meta: link.meta.clone(),
466            });
467        }
468
469        Ok(entries)
470    }
471
472    /// Resolve a path within a tree
473    /// e.g., resolve_path("root/foo/bar.txt")
474    pub async fn resolve_path(
475        &self,
476        root_hash: &Hash,
477        path: &str,
478    ) -> Result<Option<Hash>, ReaderError> {
479        let parts: Vec<&str> = path.split('/').filter(|p| !p.is_empty()).collect();
480
481        let mut current_hash = *root_hash;
482
483        for part in parts {
484            let node = match self.get_tree_node(&current_hash).await? {
485                Some(n) => n,
486                None => return Ok(None),
487            };
488
489            if let Some(link) = self.find_link(&node, part) {
490                current_hash = link.hash;
491            } else {
492                // Check internal nodes
493                match self.find_in_subtrees(&node, part).await? {
494                    Some(hash) => current_hash = hash,
495                    None => return Ok(None),
496                }
497            }
498        }
499
500        Ok(Some(current_hash))
501    }
502
503    /// Find a link by name in a tree node
504    fn find_link(&self, node: &TreeNode, name: &str) -> Option<Link> {
505        node.links
506            .iter()
507            .find(|l| l.name.as_deref() == Some(name))
508            .cloned()
509    }
510
511    /// Search for name in internal subtrees
512    async fn find_in_subtrees(
513        &self,
514        node: &TreeNode,
515        name: &str,
516    ) -> Result<Option<Hash>, ReaderError> {
517        for link in &node.links {
518            // Only search internal nodes
519            if !link
520                .name
521                .as_ref()
522                .map(|n| n.starts_with('_'))
523                .unwrap_or(false)
524            {
525                continue;
526            }
527
528            let sub_node = match self.get_tree_node(&link.hash).await? {
529                Some(n) => n,
530                None => continue,
531            };
532
533            if let Some(found) = self.find_link(&sub_node, name) {
534                return Ok(Some(found.hash));
535            }
536
537            // Recurse deeper
538            if let Some(deep_found) = Box::pin(self.find_in_subtrees(&sub_node, name)).await? {
539                return Ok(Some(deep_found));
540            }
541        }
542
543        Ok(None)
544    }
545
546    /// Get total size of a tree
547    pub async fn get_size(&self, hash: &Hash) -> Result<u64, ReaderError> {
548        let data = match self
549            .store
550            .get(hash)
551            .await
552            .map_err(|e| ReaderError::Store(e.to_string()))?
553        {
554            Some(d) => d,
555            None => return Ok(0),
556        };
557
558        if !is_tree_node(&data) {
559            return Ok(data.len() as u64);
560        }
561
562        let node = decode_tree_node(&data).map_err(ReaderError::Codec)?;
563        // Calculate from children
564        let mut total = 0u64;
565        for link in &node.links {
566            total += link.size;
567        }
568        Ok(total)
569    }
570
571    /// Walk entire tree depth-first
572    pub async fn walk(&self, hash: &Hash, path: &str) -> Result<Vec<WalkEntry>, ReaderError> {
573        let mut entries = Vec::new();
574        self.walk_recursive(hash, path, &mut entries).await?;
575        Ok(entries)
576    }
577
578    async fn walk_recursive(
579        &self,
580        hash: &Hash,
581        path: &str,
582        entries: &mut Vec<WalkEntry>,
583    ) -> Result<(), ReaderError> {
584        let data = match self
585            .store
586            .get(hash)
587            .await
588            .map_err(|e| ReaderError::Store(e.to_string()))?
589        {
590            Some(d) => d,
591            None => return Ok(()),
592        };
593
594        let node = match try_decode_tree_node(&data) {
595            Some(n) => n,
596            None => {
597                entries.push(WalkEntry {
598                    path: path.to_string(),
599                    hash: *hash,
600                    link_type: LinkType::Blob,
601                    size: data.len() as u64,
602                    key: None, // TreeReader doesn't track keys
603                });
604                return Ok(());
605            }
606        };
607
608        let node_size: u64 = node.links.iter().map(|l| l.size).sum();
609        entries.push(WalkEntry {
610            path: path.to_string(),
611            hash: *hash,
612            link_type: node.node_type,
613            size: node_size,
614            key: None, // directories are not encrypted
615        });
616
617        for link in &node.links {
618            let child_path = match &link.name {
619                Some(name) => {
620                    // Skip internal chunk nodes in path
621                    if name.starts_with("_chunk_") || name.starts_with('_') {
622                        Box::pin(self.walk_recursive(&link.hash, path, entries)).await?;
623                        continue;
624                    }
625                    if path.is_empty() {
626                        name.clone()
627                    } else {
628                        format!("{}/{}", path, name)
629                    }
630                }
631                None => path.to_string(),
632            };
633
634            Box::pin(self.walk_recursive(&link.hash, &child_path, entries)).await?;
635        }
636
637        Ok(())
638    }
639}
640
641/// Verify tree integrity
642/// Checks that all referenced hashes exist
643pub async fn verify_tree<S: Store>(
644    store: Arc<S>,
645    root_hash: &Hash,
646) -> Result<VerifyResult, ReaderError> {
647    let mut missing = Vec::new();
648    let mut visited = std::collections::HashSet::new();
649
650    verify_recursive(store, root_hash, &mut missing, &mut visited).await?;
651
652    Ok(VerifyResult {
653        valid: missing.is_empty(),
654        missing,
655    })
656}
657
658async fn verify_recursive<S: Store>(
659    store: Arc<S>,
660    hash: &Hash,
661    missing: &mut Vec<Hash>,
662    visited: &mut std::collections::HashSet<String>,
663) -> Result<(), ReaderError> {
664    let hex = to_hex(hash);
665    if visited.contains(&hex) {
666        return Ok(());
667    }
668    visited.insert(hex);
669
670    let data = match store
671        .get(hash)
672        .await
673        .map_err(|e| ReaderError::Store(e.to_string()))?
674    {
675        Some(d) => d,
676        None => {
677            missing.push(*hash);
678            return Ok(());
679        }
680    };
681
682    if is_tree_node(&data) {
683        let node = decode_tree_node(&data).map_err(ReaderError::Codec)?;
684        for link in &node.links {
685            Box::pin(verify_recursive(
686                store.clone(),
687                &link.hash,
688                missing,
689                visited,
690            ))
691            .await?;
692        }
693    }
694
695    Ok(())
696}
697
698/// Result of tree verification
699#[derive(Debug, Clone)]
700pub struct VerifyResult {
701    pub valid: bool,
702    pub missing: Vec<Hash>,
703}
704
705/// Reader error type
706#[derive(Debug, thiserror::Error)]
707pub enum ReaderError {
708    #[error("Store error: {0}")]
709    Store(String),
710    #[error("Codec error: {0}")]
711    Codec(#[from] crate::codec::CodecError),
712    #[error("Missing chunk: {0}")]
713    MissingChunk(String),
714    #[error("Decryption error: {0}")]
715    Decryption(String),
716    #[error("Missing decryption key")]
717    MissingKey,
718}
719
720#[cfg(test)]
721mod tests {
722    use super::*;
723    use crate::builder::{BuilderConfig, TreeBuilder};
724    use crate::store::MemoryStore;
725    use crate::types::DirEntry;
726
727    fn make_store() -> Arc<MemoryStore> {
728        Arc::new(MemoryStore::new())
729    }
730
731    #[tokio::test]
732    async fn test_get_blob() {
733        let store = make_store();
734        let builder = TreeBuilder::new(BuilderConfig::new(store.clone()));
735        let reader = TreeReader::new(store);
736
737        let data = vec![1u8, 2, 3, 4, 5];
738        let hash = builder.put_blob(&data).await.unwrap();
739
740        let result = reader.get_blob(&hash).await.unwrap();
741        assert_eq!(result, Some(data));
742    }
743
744    #[tokio::test]
745    async fn test_get_blob_missing() {
746        let store = make_store();
747        let reader = TreeReader::new(store);
748
749        let hash = [0u8; 32];
750        let result = reader.get_blob(&hash).await.unwrap();
751        assert!(result.is_none());
752    }
753
754    #[tokio::test]
755    async fn test_get_tree_node() {
756        let store = make_store();
757        let builder = TreeBuilder::new(BuilderConfig::new(store.clone()));
758        let reader = TreeReader::new(store);
759
760        let file_hash = builder.put_blob(&[1u8]).await.unwrap();
761        let dir_hash = builder
762            .put_directory(vec![DirEntry::new("test.txt", file_hash).with_size(1)])
763            .await
764            .unwrap();
765
766        let node = reader.get_tree_node(&dir_hash).await.unwrap();
767        assert!(node.is_some());
768        assert_eq!(node.unwrap().links.len(), 1);
769    }
770
771    #[tokio::test]
772    async fn test_get_tree_node_returns_none_for_blob() {
773        let store = make_store();
774        let builder = TreeBuilder::new(BuilderConfig::new(store.clone()));
775        let reader = TreeReader::new(store);
776
777        let hash = builder.put_blob(&[1u8, 2, 3]).await.unwrap();
778        let node = reader.get_tree_node(&hash).await.unwrap();
779        assert!(node.is_none());
780    }
781
782    #[tokio::test]
783    async fn test_is_tree() {
784        let store = make_store();
785        let builder = TreeBuilder::new(BuilderConfig::new(store.clone()));
786        let reader = TreeReader::new(store);
787
788        let file_hash = builder.put_blob(&[1u8]).await.unwrap();
789        let dir_hash = builder
790            .put_directory(vec![DirEntry::new("test.txt", file_hash)])
791            .await
792            .unwrap();
793
794        assert!(reader.is_tree(&dir_hash).await.unwrap());
795        assert!(!reader.is_tree(&file_hash).await.unwrap());
796    }
797
798    #[tokio::test]
799    async fn test_read_file_small() {
800        let store = make_store();
801        // Use public() for tests that check raw data storage
802        let builder = TreeBuilder::new(BuilderConfig::new(store.clone()).public());
803        let reader = TreeReader::new(store);
804
805        let data = vec![1u8, 2, 3, 4, 5];
806        let (cid, _size) = builder.put(&data).await.unwrap();
807
808        let result = reader.read_file(&cid.hash).await.unwrap();
809        assert_eq!(result, Some(data));
810    }
811
812    #[tokio::test]
813    async fn test_read_file_chunked() {
814        let store = make_store();
815        let config = BuilderConfig::new(store.clone())
816            .with_chunk_size(100)
817            .public();
818        let builder = TreeBuilder::new(config);
819        let reader = TreeReader::new(store);
820
821        let mut data = vec![0u8; 350];
822        for i in 0..data.len() {
823            data[i] = (i % 256) as u8;
824        }
825
826        let (cid, _size) = builder.put(&data).await.unwrap();
827        let result = reader.read_file(&cid.hash).await.unwrap();
828
829        assert_eq!(result, Some(data));
830    }
831
832    #[tokio::test]
833    async fn test_list_directory() {
834        let store = make_store();
835        let builder = TreeBuilder::new(BuilderConfig::new(store.clone()));
836        let reader = TreeReader::new(store);
837
838        let h1 = builder.put_blob(&[1u8]).await.unwrap();
839        let h2 = builder.put_blob(&[2u8]).await.unwrap();
840
841        let dir_hash = builder
842            .put_directory(vec![
843                DirEntry::new("first.txt", h1).with_size(1),
844                DirEntry::new("second.txt", h2).with_size(1),
845            ])
846            .await
847            .unwrap();
848
849        let entries = reader.list_directory(&dir_hash).await.unwrap();
850
851        assert_eq!(entries.len(), 2);
852        assert!(entries.iter().any(|e| e.name == "first.txt"));
853        assert!(entries.iter().any(|e| e.name == "second.txt"));
854    }
855
856    #[tokio::test]
857    async fn test_resolve_path() {
858        let store = make_store();
859        let builder = TreeBuilder::new(BuilderConfig::new(store.clone()));
860        let reader = TreeReader::new(store);
861
862        let file_data = vec![1u8, 2, 3];
863        let file_hash = builder.put_blob(&file_data).await.unwrap();
864
865        let dir_hash = builder
866            .put_directory(vec![DirEntry::new("test.txt", file_hash)])
867            .await
868            .unwrap();
869
870        let resolved = reader.resolve_path(&dir_hash, "test.txt").await.unwrap();
871        assert_eq!(resolved, Some(file_hash));
872    }
873
874    #[tokio::test]
875    async fn test_resolve_path_nested() {
876        let store = make_store();
877        let builder = TreeBuilder::new(BuilderConfig::new(store.clone()));
878        let reader = TreeReader::new(store);
879
880        let file_hash = builder.put_blob(&[1u8]).await.unwrap();
881
882        let sub_sub_dir = builder
883            .put_directory(vec![DirEntry::new("deep.txt", file_hash)])
884            .await
885            .unwrap();
886
887        let sub_dir = builder
888            .put_directory(vec![DirEntry::new("level2", sub_sub_dir)])
889            .await
890            .unwrap();
891
892        let root_dir = builder
893            .put_directory(vec![DirEntry::new("level1", sub_dir)])
894            .await
895            .unwrap();
896
897        let resolved = reader
898            .resolve_path(&root_dir, "level1/level2/deep.txt")
899            .await
900            .unwrap();
901        assert_eq!(resolved, Some(file_hash));
902    }
903
904    #[tokio::test]
905    async fn test_get_size() {
906        let store = make_store();
907        let builder = TreeBuilder::new(BuilderConfig::new(store.clone()));
908        let reader = TreeReader::new(store);
909
910        let data = vec![0u8; 123];
911        let hash = builder.put_blob(&data).await.unwrap();
912
913        assert_eq!(reader.get_size(&hash).await.unwrap(), 123);
914    }
915
916    #[tokio::test]
917    async fn test_walk() {
918        let store = make_store();
919        let builder = TreeBuilder::new(BuilderConfig::new(store.clone()));
920        let reader = TreeReader::new(store);
921
922        let f1 = builder.put_blob(&[1u8]).await.unwrap();
923        let f2 = builder.put_blob(&[2u8, 3]).await.unwrap();
924
925        let sub_dir = builder
926            .put_directory(vec![DirEntry::new("nested.txt", f2).with_size(2)])
927            .await
928            .unwrap();
929
930        let root_dir = builder
931            .put_directory(vec![
932                DirEntry::new("root.txt", f1).with_size(1),
933                DirEntry::new("sub", sub_dir),
934            ])
935            .await
936            .unwrap();
937
938        let entries = reader.walk(&root_dir, "").await.unwrap();
939        let paths: Vec<_> = entries.iter().map(|e| e.path.as_str()).collect();
940
941        assert!(paths.contains(&""));
942        assert!(paths.contains(&"root.txt"));
943        assert!(paths.contains(&"sub"));
944        assert!(paths.contains(&"sub/nested.txt"));
945    }
946
947    #[tokio::test]
948    async fn test_verify_tree_valid() {
949        let store = make_store();
950        let config = BuilderConfig::new(store.clone())
951            .with_chunk_size(100)
952            .public();
953        let builder = TreeBuilder::new(config);
954
955        let data = vec![0u8; 350];
956        let (cid, _size) = builder.put(&data).await.unwrap();
957
958        let result = verify_tree(store, &cid.hash).await.unwrap();
959        assert!(result.valid);
960        assert!(result.missing.is_empty());
961    }
962
963    #[tokio::test]
964    async fn test_verify_tree_missing() {
965        let store = make_store();
966        let config = BuilderConfig::new(store.clone())
967            .with_chunk_size(100)
968            .public();
969        let builder = TreeBuilder::new(config);
970
971        let data = vec![0u8; 350];
972        let (cid, _size) = builder.put(&data).await.unwrap();
973
974        // Delete one of the chunks
975        let keys = store.keys();
976        if let Some(chunk_to_delete) = keys.iter().find(|k| **k != cid.hash) {
977            store.delete(chunk_to_delete).await.unwrap();
978        }
979
980        let result = verify_tree(store, &cid.hash).await.unwrap();
981        assert!(!result.valid);
982        assert!(!result.missing.is_empty());
983    }
984
985    #[tokio::test]
986    async fn test_read_file_range_small_blob() {
987        let store = make_store();
988        let builder = TreeBuilder::new(BuilderConfig::new(store.clone()).public());
989        let reader = TreeReader::new(store);
990
991        let data = b"Hello, World!";
992        let hash = builder.put_blob(data).await.unwrap();
993
994        // Read middle portion
995        let result = reader.read_file_range(&hash, 7, Some(12)).await.unwrap();
996        assert_eq!(result, Some(b"World".to_vec()));
997
998        // Read from start
999        let result = reader.read_file_range(&hash, 0, Some(5)).await.unwrap();
1000        assert_eq!(result, Some(b"Hello".to_vec()));
1001
1002        // Read to end (no end specified)
1003        let result = reader.read_file_range(&hash, 7, None).await.unwrap();
1004        assert_eq!(result, Some(b"World!".to_vec()));
1005    }
1006
1007    #[tokio::test]
1008    async fn test_read_file_range_chunked() {
1009        let store = make_store();
1010        // Small chunk size to force chunking
1011        let config = BuilderConfig::new(store.clone())
1012            .with_chunk_size(100)
1013            .public();
1014        let builder = TreeBuilder::new(config);
1015        let reader = TreeReader::new(store);
1016
1017        // Create 350 bytes of sequential data
1018        let mut data = vec![0u8; 350];
1019        for i in 0..data.len() {
1020            data[i] = (i % 256) as u8;
1021        }
1022
1023        let (cid, _size) = builder.put(&data).await.unwrap();
1024
1025        // Read bytes 50-150 (spans chunk boundary at 100)
1026        let result = reader
1027            .read_file_range(&cid.hash, 50, Some(150))
1028            .await
1029            .unwrap()
1030            .unwrap();
1031        assert_eq!(result.len(), 100);
1032        assert_eq!(result, data[50..150].to_vec());
1033
1034        // Read bytes 200-300 (within third and fourth chunks)
1035        let result = reader
1036            .read_file_range(&cid.hash, 200, Some(300))
1037            .await
1038            .unwrap()
1039            .unwrap();
1040        assert_eq!(result.len(), 100);
1041        assert_eq!(result, data[200..300].to_vec());
1042
1043        // Read last 50 bytes
1044        let result = reader
1045            .read_file_range(&cid.hash, 300, None)
1046            .await
1047            .unwrap()
1048            .unwrap();
1049        assert_eq!(result.len(), 50);
1050        assert_eq!(result, data[300..].to_vec());
1051    }
1052
1053    #[tokio::test]
1054    async fn test_read_file_range_entire_file() {
1055        let store = make_store();
1056        let config = BuilderConfig::new(store.clone())
1057            .with_chunk_size(100)
1058            .public();
1059        let builder = TreeBuilder::new(config);
1060        let reader = TreeReader::new(store);
1061
1062        let mut data = vec![0u8; 350];
1063        for i in 0..data.len() {
1064            data[i] = (i % 256) as u8;
1065        }
1066
1067        let (cid, _size) = builder.put(&data).await.unwrap();
1068
1069        // Read entire file using range
1070        let result = reader
1071            .read_file_range(&cid.hash, 0, None)
1072            .await
1073            .unwrap()
1074            .unwrap();
1075        assert_eq!(result, data);
1076    }
1077
1078    #[tokio::test]
1079    async fn test_read_file_range_out_of_bounds() {
1080        let store = make_store();
1081        let builder = TreeBuilder::new(BuilderConfig::new(store.clone()).public());
1082        let reader = TreeReader::new(store);
1083
1084        let data = b"Short";
1085        let hash = builder.put_blob(data).await.unwrap();
1086
1087        // Start past end of file
1088        let result = reader.read_file_range(&hash, 100, Some(200)).await.unwrap();
1089        assert_eq!(result, Some(vec![]));
1090
1091        // End past file length (should clamp)
1092        let result = reader.read_file_range(&hash, 0, Some(100)).await.unwrap();
1093        assert_eq!(result, Some(b"Short".to_vec()));
1094    }
1095
1096    #[tokio::test]
1097    async fn test_read_file_range_single_byte() {
1098        let store = make_store();
1099        let config = BuilderConfig::new(store.clone())
1100            .with_chunk_size(100)
1101            .public();
1102        let builder = TreeBuilder::new(config);
1103        let reader = TreeReader::new(store);
1104
1105        let mut data = vec![0u8; 350];
1106        for i in 0..data.len() {
1107            data[i] = (i % 256) as u8;
1108        }
1109
1110        let (cid, _size) = builder.put(&data).await.unwrap();
1111
1112        // Read single byte at chunk boundary
1113        let result = reader
1114            .read_file_range(&cid.hash, 100, Some(101))
1115            .await
1116            .unwrap()
1117            .unwrap();
1118        assert_eq!(result.len(), 1);
1119        assert_eq!(result[0], 100);
1120    }
1121}