Skip to main content

hashtree_core/
reader.rs

1//! Tree reader and traversal utilities
2//!
3//! Read files and directories from content-addressed storage
4
5use std::collections::HashMap;
6use std::sync::Arc;
7
8use crate::codec::{decode_tree_node, is_directory_node, is_tree_node, try_decode_tree_node};
9use crate::hash::sha256;
10use crate::store::Store;
11use crate::types::{to_hex, Cid, Hash, Link, LinkType, TreeNode};
12
13use crate::crypto::{decrypt_chk, EncryptionKey};
14
15/// Tree entry for directory listings
16#[derive(Debug, Clone)]
17pub struct TreeEntry {
18    pub name: String,
19    pub hash: Hash,
20    pub size: u64,
21    /// Type of content this entry points to (Blob, File, or Dir)
22    pub link_type: LinkType,
23    /// Optional decryption key (for encrypted content)
24    pub key: Option<[u8; 32]>,
25    /// Optional metadata (createdAt, mimeType, thumbnail, etc.)
26    pub meta: Option<HashMap<String, serde_json::Value>>,
27}
28
29/// Walk entry for tree traversal
30#[derive(Debug, Clone)]
31pub struct WalkEntry {
32    pub path: String,
33    pub hash: Hash,
34    /// Type of content this entry points to (Blob, File, or Dir)
35    pub link_type: LinkType,
36    pub size: u64,
37    /// Optional decryption key (for encrypted content)
38    pub key: Option<[u8; 32]>,
39}
40
41/// TreeReader - reads and traverses merkle trees
42pub struct TreeReader<S: Store> {
43    store: Arc<S>,
44}
45
46impl<S: Store> TreeReader<S> {
47    fn internal_chunk_start(name: &str) -> Option<usize> {
48        let suffix = name.strip_prefix("_chunk_")?;
49        if suffix.is_empty() || !suffix.bytes().all(|byte| byte.is_ascii_digit()) {
50            return None;
51        }
52        suffix.parse().ok()
53    }
54
55    fn node_uses_directory_fanout(node: &TreeNode) -> bool {
56        !node.links.is_empty()
57            && node.links.iter().all(|link| {
58                let Some(name) = link.name.as_deref() else {
59                    return false;
60                };
61                Self::internal_chunk_start(name).is_some() && link.link_type == LinkType::Dir
62            })
63    }
64
65    fn is_internal_directory_link(node: &TreeNode, link: &Link) -> bool {
66        if !Self::node_uses_directory_fanout(node) || link.link_type != LinkType::Dir {
67            return false;
68        }
69
70        let Some(name) = link.name.as_deref() else {
71            return false;
72        };
73        Self::internal_chunk_start(name).is_some()
74    }
75
76    pub fn new(store: Arc<S>) -> Self {
77        Self { store }
78    }
79
80    /// Get raw data by hash
81    pub async fn get_blob(&self, hash: &Hash) -> Result<Option<Vec<u8>>, ReaderError> {
82        self.store
83            .get(hash)
84            .await
85            .map_err(|e| ReaderError::Store(e.to_string()))
86    }
87
88    /// Get and decode a tree node
89    pub async fn get_tree_node(&self, hash: &Hash) -> Result<Option<TreeNode>, ReaderError> {
90        let data = match self
91            .store
92            .get(hash)
93            .await
94            .map_err(|e| ReaderError::Store(e.to_string()))?
95        {
96            Some(d) => d,
97            None => return Ok(None),
98        };
99
100        if !is_tree_node(&data) {
101            return Ok(None); // It's a blob, not a tree
102        }
103
104        let node = decode_tree_node(&data).map_err(ReaderError::Codec)?;
105        Ok(Some(node))
106    }
107
108    /// Check if hash points to a tree node or blob
109    pub async fn is_tree(&self, hash: &Hash) -> Result<bool, ReaderError> {
110        let data = match self
111            .store
112            .get(hash)
113            .await
114            .map_err(|e| ReaderError::Store(e.to_string()))?
115        {
116            Some(d) => d,
117            None => return Ok(false),
118        };
119        Ok(is_tree_node(&data))
120    }
121
122    /// Check if hash points to a directory (tree with named links)
123    /// vs a chunked file (tree with unnamed links) or raw blob
124    pub async fn is_directory(&self, hash: &Hash) -> Result<bool, ReaderError> {
125        let data = match self
126            .store
127            .get(hash)
128            .await
129            .map_err(|e| ReaderError::Store(e.to_string()))?
130        {
131            Some(d) => d,
132            None => return Ok(false),
133        };
134        Ok(is_directory_node(&data))
135    }
136
137    /// Read content by CID (handles both encrypted and public content)
138    ///
139    /// This is the unified read method that handles decryption automatically
140    /// when the CID contains an encryption key.
141    pub async fn get(&self, cid: &Cid) -> Result<Option<Vec<u8>>, ReaderError> {
142        if let Some(key) = cid.key {
143            self.get_encrypted(&cid.hash, &key).await
144        } else {
145            self.read_file(&cid.hash).await
146        }
147    }
148
149    /// Read encrypted content by hash and key (internal)
150    async fn get_encrypted(
151        &self,
152        hash: &Hash,
153        key: &EncryptionKey,
154    ) -> Result<Option<Vec<u8>>, ReaderError> {
155        let encrypted_data = match self
156            .store
157            .get(hash)
158            .await
159            .map_err(|e| ReaderError::Store(e.to_string()))?
160        {
161            Some(d) => d,
162            None => return Ok(None),
163        };
164
165        // Decrypt the data
166        let decrypted = decrypt_chk(&encrypted_data, key)
167            .map_err(|e| ReaderError::Decryption(e.to_string()))?;
168
169        // Check if it's a tree node
170        if is_tree_node(&decrypted) {
171            let node = decode_tree_node(&decrypted)?;
172            let assembled = self.assemble_encrypted_chunks(&node).await?;
173            return Ok(Some(assembled));
174        }
175
176        // Single chunk data
177        Ok(Some(decrypted))
178    }
179
180    /// Assemble encrypted chunks from tree
181    async fn assemble_encrypted_chunks(&self, node: &TreeNode) -> Result<Vec<u8>, ReaderError> {
182        let mut parts: Vec<Vec<u8>> = Vec::new();
183
184        for link in &node.links {
185            let chunk_key = link.key.ok_or(ReaderError::MissingKey)?;
186
187            let encrypted_child = self
188                .store
189                .get(&link.hash)
190                .await
191                .map_err(|e| ReaderError::Store(e.to_string()))?
192                .ok_or_else(|| ReaderError::MissingChunk(to_hex(&link.hash)))?;
193
194            let decrypted = decrypt_chk(&encrypted_child, &chunk_key)
195                .map_err(|e| ReaderError::Decryption(e.to_string()))?;
196
197            if is_tree_node(&decrypted) {
198                // Intermediate tree node - recurse
199                let child_node = decode_tree_node(&decrypted)?;
200                let child_data = Box::pin(self.assemble_encrypted_chunks(&child_node)).await?;
201                parts.push(child_data);
202            } else {
203                // Leaf data chunk
204                parts.push(decrypted);
205            }
206        }
207
208        let total_len: usize = parts.iter().map(|p| p.len()).sum();
209        let mut result = Vec::with_capacity(total_len);
210        for part in parts {
211            result.extend_from_slice(&part);
212        }
213
214        Ok(result)
215    }
216
217    /// Read a complete file (reassemble chunks if needed)
218    /// For unencrypted content only - use `get()` for unified access
219    pub async fn read_file(&self, hash: &Hash) -> Result<Option<Vec<u8>>, ReaderError> {
220        let data = match self
221            .store
222            .get(hash)
223            .await
224            .map_err(|e| ReaderError::Store(e.to_string()))?
225        {
226            Some(d) => d,
227            None => return Ok(None),
228        };
229
230        // Check if it's a tree (chunked file) or raw blob
231        if !is_tree_node(&data) {
232            return Ok(Some(data)); // Direct blob
233        }
234
235        // It's a tree - reassemble chunks
236        let node = decode_tree_node(&data).map_err(ReaderError::Codec)?;
237        let assembled = self.assemble_chunks(&node).await?;
238        Ok(Some(assembled))
239    }
240
241    /// Read a byte range from a file (fetches only necessary chunks)
242    ///
243    /// - `start`: Starting byte offset (inclusive)
244    /// - `end`: Ending byte offset (exclusive), or None to read to end
245    ///
246    /// For unencrypted content only - encrypted range reads not yet supported.
247    pub async fn read_file_range(
248        &self,
249        hash: &Hash,
250        start: u64,
251        end: Option<u64>,
252    ) -> Result<Option<Vec<u8>>, ReaderError> {
253        let data = match self
254            .store
255            .get(hash)
256            .await
257            .map_err(|e| ReaderError::Store(e.to_string()))?
258        {
259            Some(d) => d,
260            None => return Ok(None),
261        };
262
263        // Single blob - just slice it
264        if !is_tree_node(&data) {
265            let start_idx = start as usize;
266            let end_idx = end.map(|e| e as usize).unwrap_or(data.len());
267            if start_idx >= data.len() {
268                return Ok(Some(vec![]));
269            }
270            let end_idx = end_idx.min(data.len());
271            return Ok(Some(data[start_idx..end_idx].to_vec()));
272        }
273
274        // It's a chunked file - fetch only needed chunks
275        let node = decode_tree_node(&data).map_err(ReaderError::Codec)?;
276        let range_data = self.assemble_chunks_range(&node, start, end).await?;
277        Ok(Some(range_data))
278    }
279
280    /// Assemble only the chunks needed for a byte range
281    async fn assemble_chunks_range(
282        &self,
283        node: &TreeNode,
284        start: u64,
285        end: Option<u64>,
286    ) -> Result<Vec<u8>, ReaderError> {
287        // First, flatten the tree to get all leaf chunks with their byte offsets
288        let chunks_info = self.collect_chunk_offsets(node).await?;
289
290        if chunks_info.is_empty() {
291            return Ok(vec![]);
292        }
293
294        // Calculate total size and actual end
295        let total_size: u64 = chunks_info.iter().map(|(_, _, size)| size).sum();
296        let actual_end = end.unwrap_or(total_size).min(total_size);
297
298        if start >= actual_end {
299            return Ok(vec![]);
300        }
301
302        // Find chunks that overlap with [start, actual_end)
303        let mut result = Vec::with_capacity((actual_end - start) as usize);
304        let mut current_offset = 0u64;
305
306        for (chunk_hash, _chunk_offset, chunk_size) in &chunks_info {
307            let chunk_start = current_offset;
308            let chunk_end = current_offset + chunk_size;
309
310            // Check if this chunk overlaps with our range
311            if chunk_end > start && chunk_start < actual_end {
312                // Fetch this chunk
313                let chunk_data = self
314                    .store
315                    .get(chunk_hash)
316                    .await
317                    .map_err(|e| ReaderError::Store(e.to_string()))?
318                    .ok_or_else(|| ReaderError::MissingChunk(to_hex(chunk_hash)))?;
319
320                // Calculate slice bounds within this chunk
321                let slice_start = if start > chunk_start {
322                    (start - chunk_start) as usize
323                } else {
324                    0
325                };
326                let slice_end = if actual_end < chunk_end {
327                    (actual_end - chunk_start) as usize
328                } else {
329                    chunk_data.len()
330                };
331
332                result.extend_from_slice(&chunk_data[slice_start..slice_end]);
333            }
334
335            current_offset = chunk_end;
336
337            // Early exit if we've passed the requested range
338            if current_offset >= actual_end {
339                break;
340            }
341        }
342
343        Ok(result)
344    }
345
346    /// Collect all leaf chunk hashes with their byte offsets
347    /// Returns Vec<(hash, offset, size)>
348    async fn collect_chunk_offsets(
349        &self,
350        node: &TreeNode,
351    ) -> Result<Vec<(Hash, u64, u64)>, ReaderError> {
352        let mut chunks = Vec::new();
353        let mut offset = 0u64;
354        self.collect_chunk_offsets_recursive(node, &mut chunks, &mut offset)
355            .await?;
356        Ok(chunks)
357    }
358
359    async fn collect_chunk_offsets_recursive(
360        &self,
361        node: &TreeNode,
362        chunks: &mut Vec<(Hash, u64, u64)>,
363        offset: &mut u64,
364    ) -> Result<(), ReaderError> {
365        for link in &node.links {
366            let child_data = self
367                .store
368                .get(&link.hash)
369                .await
370                .map_err(|e| ReaderError::Store(e.to_string()))?
371                .ok_or_else(|| ReaderError::MissingChunk(to_hex(&link.hash)))?;
372
373            if is_tree_node(&child_data) {
374                // Intermediate node - recurse
375                let child_node = decode_tree_node(&child_data).map_err(ReaderError::Codec)?;
376                Box::pin(self.collect_chunk_offsets_recursive(&child_node, chunks, offset)).await?;
377            } else {
378                // Leaf chunk
379                let size = child_data.len() as u64;
380                chunks.push((link.hash, *offset, size));
381                *offset += size;
382            }
383        }
384        Ok(())
385    }
386
387    /// Recursively assemble chunks from tree (unencrypted)
388    async fn assemble_chunks(&self, node: &TreeNode) -> Result<Vec<u8>, ReaderError> {
389        let mut parts: Vec<Vec<u8>> = Vec::new();
390
391        for link in &node.links {
392            let child_data = self
393                .store
394                .get(&link.hash)
395                .await
396                .map_err(|e| ReaderError::Store(e.to_string()))?
397                .ok_or_else(|| ReaderError::MissingChunk(to_hex(&link.hash)))?;
398
399            if is_tree_node(&child_data) {
400                // Nested tree - recurse
401                let child_node = decode_tree_node(&child_data).map_err(ReaderError::Codec)?;
402                parts.push(Box::pin(self.assemble_chunks(&child_node)).await?);
403            } else {
404                // Leaf blob
405                parts.push(child_data);
406            }
407        }
408
409        // Concatenate all parts
410        let total_length: usize = parts.iter().map(|p| p.len()).sum();
411        let mut result = Vec::with_capacity(total_length);
412        for part in parts {
413            result.extend_from_slice(&part);
414        }
415
416        Ok(result)
417    }
418
419    /// Read a file with streaming (returns chunks as vec)
420    pub async fn read_file_chunks(&self, hash: &Hash) -> Result<Vec<Vec<u8>>, ReaderError> {
421        let data = match self
422            .store
423            .get(hash)
424            .await
425            .map_err(|e| ReaderError::Store(e.to_string()))?
426        {
427            Some(d) => d,
428            None => return Ok(vec![]),
429        };
430
431        if !is_tree_node(&data) {
432            return Ok(vec![data]);
433        }
434
435        let node = decode_tree_node(&data).map_err(ReaderError::Codec)?;
436        self.collect_chunks(&node).await
437    }
438
439    /// Recursively collect chunks
440    async fn collect_chunks(&self, node: &TreeNode) -> Result<Vec<Vec<u8>>, ReaderError> {
441        let mut chunks = Vec::new();
442
443        for link in &node.links {
444            let child_data = self
445                .store
446                .get(&link.hash)
447                .await
448                .map_err(|e| ReaderError::Store(e.to_string()))?
449                .ok_or_else(|| ReaderError::MissingChunk(to_hex(&link.hash)))?;
450
451            if is_tree_node(&child_data) {
452                let child_node = decode_tree_node(&child_data).map_err(ReaderError::Codec)?;
453                chunks.extend(Box::pin(self.collect_chunks(&child_node)).await?);
454            } else {
455                chunks.push(child_data);
456            }
457        }
458
459        Ok(chunks)
460    }
461
462    /// List directory entries
463    pub async fn list_directory(&self, hash: &Hash) -> Result<Vec<TreeEntry>, ReaderError> {
464        let node = match self.get_tree_node(hash).await? {
465            Some(n) => n,
466            None => return Ok(vec![]),
467        };
468
469        let mut entries = Vec::new();
470
471        for link in &node.links {
472            // Skip internal chunk nodes (names starting with _chunk_)
473            if Self::is_internal_directory_link(&node, link) {
474                let sub_entries = Box::pin(self.list_directory(&link.hash)).await?;
475                entries.extend(sub_entries);
476                continue;
477            }
478
479            entries.push(TreeEntry {
480                name: link.name.clone().unwrap_or_else(|| to_hex(&link.hash)),
481                hash: link.hash,
482                size: link.size,
483                link_type: link.link_type,
484                key: link.key,
485                meta: link.meta.clone(),
486            });
487        }
488
489        Ok(entries)
490    }
491
492    /// Resolve a path within a tree
493    /// e.g., resolve_path("root/foo/bar.txt")
494    pub async fn resolve_path(
495        &self,
496        root_hash: &Hash,
497        path: &str,
498    ) -> Result<Option<Hash>, ReaderError> {
499        let parts: Vec<&str> = path.split('/').filter(|p| !p.is_empty()).collect();
500
501        let mut current_hash = *root_hash;
502
503        for part in parts {
504            let node = match self.get_tree_node(&current_hash).await? {
505                Some(n) => n,
506                None => return Ok(None),
507            };
508
509            if let Some(link) = self.find_link(&node, part) {
510                current_hash = link.hash;
511            } else {
512                // Check internal nodes
513                match self.find_in_subtrees(&node, part).await? {
514                    Some(hash) => current_hash = hash,
515                    None => return Ok(None),
516                }
517            }
518        }
519
520        Ok(Some(current_hash))
521    }
522
523    /// Find a link by name in a tree node
524    fn find_link(&self, node: &TreeNode, name: &str) -> Option<Link> {
525        node.links
526            .iter()
527            .find(|l| l.name.as_deref() == Some(name))
528            .cloned()
529    }
530
531    /// Search for name in internal subtrees
532    async fn find_in_subtrees(
533        &self,
534        node: &TreeNode,
535        name: &str,
536    ) -> Result<Option<Hash>, ReaderError> {
537        for link in &node.links {
538            // Only search internal nodes
539            if !Self::is_internal_directory_link(node, link) {
540                continue;
541            }
542
543            let sub_node = match self.get_tree_node(&link.hash).await? {
544                Some(n) => n,
545                None => continue,
546            };
547
548            if let Some(found) = self.find_link(&sub_node, name) {
549                return Ok(Some(found.hash));
550            }
551
552            // Recurse deeper
553            if let Some(deep_found) = Box::pin(self.find_in_subtrees(&sub_node, name)).await? {
554                return Ok(Some(deep_found));
555            }
556        }
557
558        Ok(None)
559    }
560
561    /// Get total size of a tree
562    pub async fn get_size(&self, hash: &Hash) -> Result<u64, ReaderError> {
563        let data = match self
564            .store
565            .get(hash)
566            .await
567            .map_err(|e| ReaderError::Store(e.to_string()))?
568        {
569            Some(d) => d,
570            None => return Ok(0),
571        };
572
573        if !is_tree_node(&data) {
574            return Ok(data.len() as u64);
575        }
576
577        let node = decode_tree_node(&data).map_err(ReaderError::Codec)?;
578        // Calculate from children
579        let mut total = 0u64;
580        for link in &node.links {
581            total += link.size;
582        }
583        Ok(total)
584    }
585
586    /// Walk entire tree depth-first
587    pub async fn walk(&self, hash: &Hash, path: &str) -> Result<Vec<WalkEntry>, ReaderError> {
588        let mut entries = Vec::new();
589        self.walk_recursive(hash, path, &mut entries).await?;
590        Ok(entries)
591    }
592
593    async fn walk_recursive(
594        &self,
595        hash: &Hash,
596        path: &str,
597        entries: &mut Vec<WalkEntry>,
598    ) -> Result<(), ReaderError> {
599        let data = match self
600            .store
601            .get(hash)
602            .await
603            .map_err(|e| ReaderError::Store(e.to_string()))?
604        {
605            Some(d) => d,
606            None => return Ok(()),
607        };
608
609        let node = match try_decode_tree_node(&data) {
610            Some(n) => n,
611            None => {
612                entries.push(WalkEntry {
613                    path: path.to_string(),
614                    hash: *hash,
615                    link_type: LinkType::Blob,
616                    size: data.len() as u64,
617                    key: None, // TreeReader doesn't track keys
618                });
619                return Ok(());
620            }
621        };
622
623        let node_size: u64 = node.links.iter().map(|l| l.size).sum();
624        entries.push(WalkEntry {
625            path: path.to_string(),
626            hash: *hash,
627            link_type: node.node_type,
628            size: node_size,
629            key: None, // directories are not encrypted
630        });
631
632        for link in &node.links {
633            let child_path = match &link.name {
634                Some(name) => {
635                    // Skip internal chunk nodes in path
636                    if Self::is_internal_directory_link(&node, link) {
637                        Box::pin(self.walk_recursive(&link.hash, path, entries)).await?;
638                        continue;
639                    }
640                    if path.is_empty() {
641                        name.clone()
642                    } else {
643                        format!("{}/{}", path, name)
644                    }
645                }
646                None => path.to_string(),
647            };
648
649            Box::pin(self.walk_recursive(&link.hash, &child_path, entries)).await?;
650        }
651
652        Ok(())
653    }
654}
655
656/// Verify tree integrity
657/// Checks that all referenced hashes exist
658pub async fn verify_tree<S: Store>(
659    store: Arc<S>,
660    root_hash: &Hash,
661) -> Result<VerifyResult, ReaderError> {
662    let mut missing = Vec::new();
663    let mut visited = std::collections::HashSet::new();
664
665    verify_recursive(store, root_hash, &mut missing, &mut visited).await?;
666
667    Ok(VerifyResult {
668        valid: missing.is_empty(),
669        missing,
670    })
671}
672
673async fn verify_recursive<S: Store>(
674    store: Arc<S>,
675    hash: &Hash,
676    missing: &mut Vec<Hash>,
677    visited: &mut std::collections::HashSet<String>,
678) -> Result<(), ReaderError> {
679    let hex = to_hex(hash);
680    if visited.contains(&hex) {
681        return Ok(());
682    }
683    visited.insert(hex);
684
685    let data = match store
686        .get(hash)
687        .await
688        .map_err(|e| ReaderError::Store(e.to_string()))?
689    {
690        Some(d) => d,
691        None => {
692            missing.push(*hash);
693            return Ok(());
694        }
695    };
696
697    if is_tree_node(&data) {
698        let node = decode_tree_node(&data).map_err(ReaderError::Codec)?;
699        for link in &node.links {
700            Box::pin(verify_recursive(
701                store.clone(),
702                &link.hash,
703                missing,
704                visited,
705            ))
706            .await?;
707        }
708    }
709
710    Ok(())
711}
712
713/// Verify tree integrity and content addresses.
714///
715/// Checks that:
716/// - all referenced hashes exist
717/// - every fetched blob/node satisfies `sha256(bytes) == referenced_hash`
718pub async fn verify_tree_integrity<S: Store>(
719    store: Arc<S>,
720    root_hash: &Hash,
721) -> Result<VerifyIntegrityResult, ReaderError> {
722    let mut missing = Vec::new();
723    let mut corrupted = Vec::new();
724    let mut visited = std::collections::HashSet::new();
725
726    verify_integrity_recursive(store, root_hash, &mut missing, &mut corrupted, &mut visited)
727        .await?;
728
729    Ok(VerifyIntegrityResult {
730        valid: missing.is_empty() && corrupted.is_empty(),
731        missing,
732        corrupted,
733    })
734}
735
736async fn verify_integrity_recursive<S: Store>(
737    store: Arc<S>,
738    hash: &Hash,
739    missing: &mut Vec<Hash>,
740    corrupted: &mut Vec<Hash>,
741    visited: &mut std::collections::HashSet<String>,
742) -> Result<(), ReaderError> {
743    let hex = to_hex(hash);
744    if visited.contains(&hex) {
745        return Ok(());
746    }
747    visited.insert(hex);
748
749    let data = match store
750        .get(hash)
751        .await
752        .map_err(|e| ReaderError::Store(e.to_string()))?
753    {
754        Some(d) => d,
755        None => {
756            missing.push(*hash);
757            return Ok(());
758        }
759    };
760
761    // Strong integrity check: referenced hash must match fetched bytes.
762    if sha256(&data) != *hash {
763        corrupted.push(*hash);
764        return Ok(());
765    }
766
767    if is_tree_node(&data) {
768        let node = decode_tree_node(&data).map_err(ReaderError::Codec)?;
769        for link in &node.links {
770            Box::pin(verify_integrity_recursive(
771                store.clone(),
772                &link.hash,
773                missing,
774                corrupted,
775                visited,
776            ))
777            .await?;
778        }
779    }
780
781    Ok(())
782}
783
784/// Result of tree verification
785#[derive(Debug, Clone)]
786pub struct VerifyResult {
787    pub valid: bool,
788    pub missing: Vec<Hash>,
789}
790
791/// Result of strong tree integrity verification.
792#[derive(Debug, Clone)]
793pub struct VerifyIntegrityResult {
794    pub valid: bool,
795    pub missing: Vec<Hash>,
796    pub corrupted: Vec<Hash>,
797}
798
799/// Reader error type
800#[derive(Debug, thiserror::Error)]
801pub enum ReaderError {
802    #[error("Store error: {0}")]
803    Store(String),
804    #[error("Codec error: {0}")]
805    Codec(#[from] crate::codec::CodecError),
806    #[error("Missing chunk: {0}")]
807    MissingChunk(String),
808    #[error("Decryption error: {0}")]
809    Decryption(String),
810    #[error("Missing decryption key")]
811    MissingKey,
812}
813
814#[cfg(test)]
815mod tests {
816    use super::*;
817    use crate::builder::{BuilderConfig, TreeBuilder};
818    use crate::store::MemoryStore;
819    use crate::types::DirEntry;
820
821    fn make_store() -> Arc<MemoryStore> {
822        Arc::new(MemoryStore::new())
823    }
824
825    #[tokio::test]
826    async fn test_get_blob() {
827        let store = make_store();
828        let builder = TreeBuilder::new(BuilderConfig::new(store.clone()));
829        let reader = TreeReader::new(store);
830
831        let data = vec![1u8, 2, 3, 4, 5];
832        let hash = builder.put_blob(&data).await.unwrap();
833
834        let result = reader.get_blob(&hash).await.unwrap();
835        assert_eq!(result, Some(data));
836    }
837
838    #[tokio::test]
839    async fn test_get_blob_missing() {
840        let store = make_store();
841        let reader = TreeReader::new(store);
842
843        let hash = [0u8; 32];
844        let result = reader.get_blob(&hash).await.unwrap();
845        assert!(result.is_none());
846    }
847
848    #[tokio::test]
849    async fn test_get_tree_node() {
850        let store = make_store();
851        let builder = TreeBuilder::new(BuilderConfig::new(store.clone()));
852        let reader = TreeReader::new(store);
853
854        let file_hash = builder.put_blob(&[1u8]).await.unwrap();
855        let dir_hash = builder
856            .put_directory(vec![DirEntry::new("test.txt", file_hash).with_size(1)])
857            .await
858            .unwrap();
859
860        let node = reader.get_tree_node(&dir_hash).await.unwrap();
861        assert!(node.is_some());
862        assert_eq!(node.unwrap().links.len(), 1);
863    }
864
865    #[tokio::test]
866    async fn test_get_tree_node_returns_none_for_blob() {
867        let store = make_store();
868        let builder = TreeBuilder::new(BuilderConfig::new(store.clone()));
869        let reader = TreeReader::new(store);
870
871        let hash = builder.put_blob(&[1u8, 2, 3]).await.unwrap();
872        let node = reader.get_tree_node(&hash).await.unwrap();
873        assert!(node.is_none());
874    }
875
876    #[tokio::test]
877    async fn test_is_tree() {
878        let store = make_store();
879        let builder = TreeBuilder::new(BuilderConfig::new(store.clone()));
880        let reader = TreeReader::new(store);
881
882        let file_hash = builder.put_blob(&[1u8]).await.unwrap();
883        let dir_hash = builder
884            .put_directory(vec![DirEntry::new("test.txt", file_hash)])
885            .await
886            .unwrap();
887
888        assert!(reader.is_tree(&dir_hash).await.unwrap());
889        assert!(!reader.is_tree(&file_hash).await.unwrap());
890    }
891
892    #[tokio::test]
893    async fn test_read_file_small() {
894        let store = make_store();
895        // Use public() for tests that check raw data storage
896        let builder = TreeBuilder::new(BuilderConfig::new(store.clone()).public());
897        let reader = TreeReader::new(store);
898
899        let data = vec![1u8, 2, 3, 4, 5];
900        let (cid, _size) = builder.put(&data).await.unwrap();
901
902        let result = reader.read_file(&cid.hash).await.unwrap();
903        assert_eq!(result, Some(data));
904    }
905
906    #[tokio::test]
907    async fn test_read_file_chunked() {
908        let store = make_store();
909        let config = BuilderConfig::new(store.clone())
910            .with_chunk_size(100)
911            .public();
912        let builder = TreeBuilder::new(config);
913        let reader = TreeReader::new(store);
914
915        let mut data = vec![0u8; 350];
916        for i in 0..data.len() {
917            data[i] = (i % 256) as u8;
918        }
919
920        let (cid, _size) = builder.put(&data).await.unwrap();
921        let result = reader.read_file(&cid.hash).await.unwrap();
922
923        assert_eq!(result, Some(data));
924    }
925
926    #[tokio::test]
927    async fn test_list_directory() {
928        let store = make_store();
929        let builder = TreeBuilder::new(BuilderConfig::new(store.clone()));
930        let reader = TreeReader::new(store);
931
932        let h1 = builder.put_blob(&[1u8]).await.unwrap();
933        let h2 = builder.put_blob(&[2u8]).await.unwrap();
934
935        let dir_hash = builder
936            .put_directory(vec![
937                DirEntry::new("first.txt", h1).with_size(1),
938                DirEntry::new("second.txt", h2).with_size(1),
939            ])
940            .await
941            .unwrap();
942
943        let entries = reader.list_directory(&dir_hash).await.unwrap();
944
945        assert_eq!(entries.len(), 2);
946        assert!(entries.iter().any(|e| e.name == "first.txt"));
947        assert!(entries.iter().any(|e| e.name == "second.txt"));
948    }
949
950    #[tokio::test]
951    async fn test_resolve_path() {
952        let store = make_store();
953        let builder = TreeBuilder::new(BuilderConfig::new(store.clone()));
954        let reader = TreeReader::new(store);
955
956        let file_data = vec![1u8, 2, 3];
957        let file_hash = builder.put_blob(&file_data).await.unwrap();
958
959        let dir_hash = builder
960            .put_directory(vec![DirEntry::new("test.txt", file_hash)])
961            .await
962            .unwrap();
963
964        let resolved = reader.resolve_path(&dir_hash, "test.txt").await.unwrap();
965        assert_eq!(resolved, Some(file_hash));
966    }
967
968    #[tokio::test]
969    async fn test_resolve_path_nested() {
970        let store = make_store();
971        let builder = TreeBuilder::new(BuilderConfig::new(store.clone()));
972        let reader = TreeReader::new(store);
973
974        let file_hash = builder.put_blob(&[1u8]).await.unwrap();
975
976        let sub_sub_dir = builder
977            .put_directory(vec![DirEntry::new("deep.txt", file_hash)])
978            .await
979            .unwrap();
980
981        let sub_dir = builder
982            .put_directory(vec![DirEntry::new("level2", sub_sub_dir)])
983            .await
984            .unwrap();
985
986        let root_dir = builder
987            .put_directory(vec![DirEntry::new("level1", sub_dir)])
988            .await
989            .unwrap();
990
991        let resolved = reader
992            .resolve_path(&root_dir, "level1/level2/deep.txt")
993            .await
994            .unwrap();
995        assert_eq!(resolved, Some(file_hash));
996    }
997
998    #[tokio::test]
999    async fn test_get_size() {
1000        let store = make_store();
1001        let builder = TreeBuilder::new(BuilderConfig::new(store.clone()));
1002        let reader = TreeReader::new(store);
1003
1004        let data = vec![0u8; 123];
1005        let hash = builder.put_blob(&data).await.unwrap();
1006
1007        assert_eq!(reader.get_size(&hash).await.unwrap(), 123);
1008    }
1009
1010    #[tokio::test]
1011    async fn test_walk() {
1012        let store = make_store();
1013        let builder = TreeBuilder::new(BuilderConfig::new(store.clone()));
1014        let reader = TreeReader::new(store);
1015
1016        let f1 = builder.put_blob(&[1u8]).await.unwrap();
1017        let f2 = builder.put_blob(&[2u8, 3]).await.unwrap();
1018
1019        let sub_dir = builder
1020            .put_directory(vec![DirEntry::new("nested.txt", f2).with_size(2)])
1021            .await
1022            .unwrap();
1023
1024        let root_dir = builder
1025            .put_directory(vec![
1026                DirEntry::new("root.txt", f1).with_size(1),
1027                DirEntry::new("sub", sub_dir),
1028            ])
1029            .await
1030            .unwrap();
1031
1032        let entries = reader.walk(&root_dir, "").await.unwrap();
1033        let paths: Vec<_> = entries.iter().map(|e| e.path.as_str()).collect();
1034
1035        assert!(paths.contains(&""));
1036        assert!(paths.contains(&"root.txt"));
1037        assert!(paths.contains(&"sub"));
1038        assert!(paths.contains(&"sub/nested.txt"));
1039    }
1040
1041    #[tokio::test]
1042    async fn test_verify_tree_valid() {
1043        let store = make_store();
1044        let config = BuilderConfig::new(store.clone())
1045            .with_chunk_size(100)
1046            .public();
1047        let builder = TreeBuilder::new(config);
1048
1049        let data = vec![0u8; 350];
1050        let (cid, _size) = builder.put(&data).await.unwrap();
1051
1052        let result = verify_tree(store, &cid.hash).await.unwrap();
1053        assert!(result.valid);
1054        assert!(result.missing.is_empty());
1055    }
1056
1057    #[tokio::test]
1058    async fn test_verify_tree_missing() {
1059        let store = make_store();
1060        let config = BuilderConfig::new(store.clone())
1061            .with_chunk_size(100)
1062            .public();
1063        let builder = TreeBuilder::new(config);
1064
1065        let data = vec![0u8; 350];
1066        let (cid, _size) = builder.put(&data).await.unwrap();
1067
1068        // Delete one of the chunks
1069        let keys = store.keys();
1070        if let Some(chunk_to_delete) = keys.iter().find(|k| **k != cid.hash) {
1071            store.delete(chunk_to_delete).await.unwrap();
1072        }
1073
1074        let result = verify_tree(store, &cid.hash).await.unwrap();
1075        assert!(!result.valid);
1076        assert!(!result.missing.is_empty());
1077    }
1078
1079    #[tokio::test]
1080    async fn test_verify_tree_integrity_valid() {
1081        let store = make_store();
1082        let config = BuilderConfig::new(store.clone())
1083            .with_chunk_size(100)
1084            .public();
1085        let builder = TreeBuilder::new(config);
1086
1087        let data = vec![0u8; 350];
1088        let (cid, _size) = builder.put(&data).await.unwrap();
1089
1090        let result = verify_tree_integrity(store, &cid.hash).await.unwrap();
1091        assert!(result.valid);
1092        assert!(result.missing.is_empty());
1093        assert!(result.corrupted.is_empty());
1094    }
1095
1096    #[tokio::test]
1097    async fn test_verify_tree_integrity_missing() {
1098        let store = make_store();
1099        let config = BuilderConfig::new(store.clone())
1100            .with_chunk_size(100)
1101            .public();
1102        let builder = TreeBuilder::new(config);
1103
1104        let data = vec![0u8; 350];
1105        let (cid, _size) = builder.put(&data).await.unwrap();
1106
1107        // Delete one of the chunks
1108        let keys = store.keys();
1109        if let Some(chunk_to_delete) = keys.iter().find(|k| **k != cid.hash) {
1110            store.delete(chunk_to_delete).await.unwrap();
1111        }
1112
1113        let result = verify_tree_integrity(store, &cid.hash).await.unwrap();
1114        assert!(!result.valid);
1115        assert!(!result.missing.is_empty());
1116        assert!(result.corrupted.is_empty());
1117    }
1118
1119    #[tokio::test]
1120    async fn test_verify_tree_integrity_corrupted_hash_mismatch() {
1121        let store = make_store();
1122        let config = BuilderConfig::new(store.clone())
1123            .with_chunk_size(100)
1124            .public();
1125        let builder = TreeBuilder::new(config);
1126
1127        let data = vec![0u8; 350];
1128        let (cid, _size) = builder.put(&data).await.unwrap();
1129
1130        // Pick a leaf chunk (non-root in this shape) and mutate bytes without changing key.
1131        let keys = store.keys();
1132        let target = keys
1133            .iter()
1134            .find(|k| **k != cid.hash)
1135            .copied()
1136            .expect("expected at least one child chunk");
1137
1138        let mut corrupted = store.get(&target).await.unwrap().unwrap();
1139        corrupted[0] ^= 0xff;
1140        store.delete(&target).await.unwrap();
1141        store.put(target, corrupted).await.unwrap();
1142
1143        // Legacy verifier checks only existence, so this still appears valid.
1144        let legacy = verify_tree(store.clone(), &cid.hash).await.unwrap();
1145        assert!(legacy.valid);
1146
1147        let strict = verify_tree_integrity(store, &cid.hash).await.unwrap();
1148        assert!(!strict.valid);
1149        assert!(strict.missing.is_empty());
1150        assert!(!strict.corrupted.is_empty());
1151        assert!(strict.corrupted.contains(&target));
1152    }
1153
1154    #[tokio::test]
1155    async fn test_read_file_range_small_blob() {
1156        let store = make_store();
1157        let builder = TreeBuilder::new(BuilderConfig::new(store.clone()).public());
1158        let reader = TreeReader::new(store);
1159
1160        let data = b"Hello, World!";
1161        let hash = builder.put_blob(data).await.unwrap();
1162
1163        // Read middle portion
1164        let result = reader.read_file_range(&hash, 7, Some(12)).await.unwrap();
1165        assert_eq!(result, Some(b"World".to_vec()));
1166
1167        // Read from start
1168        let result = reader.read_file_range(&hash, 0, Some(5)).await.unwrap();
1169        assert_eq!(result, Some(b"Hello".to_vec()));
1170
1171        // Read to end (no end specified)
1172        let result = reader.read_file_range(&hash, 7, None).await.unwrap();
1173        assert_eq!(result, Some(b"World!".to_vec()));
1174    }
1175
1176    #[tokio::test]
1177    async fn test_read_file_range_chunked() {
1178        let store = make_store();
1179        // Small chunk size to force chunking
1180        let config = BuilderConfig::new(store.clone())
1181            .with_chunk_size(100)
1182            .public();
1183        let builder = TreeBuilder::new(config);
1184        let reader = TreeReader::new(store);
1185
1186        // Create 350 bytes of sequential data
1187        let mut data = vec![0u8; 350];
1188        for i in 0..data.len() {
1189            data[i] = (i % 256) as u8;
1190        }
1191
1192        let (cid, _size) = builder.put(&data).await.unwrap();
1193
1194        // Read bytes 50-150 (spans chunk boundary at 100)
1195        let result = reader
1196            .read_file_range(&cid.hash, 50, Some(150))
1197            .await
1198            .unwrap()
1199            .unwrap();
1200        assert_eq!(result.len(), 100);
1201        assert_eq!(result, data[50..150].to_vec());
1202
1203        // Read bytes 200-300 (within third and fourth chunks)
1204        let result = reader
1205            .read_file_range(&cid.hash, 200, Some(300))
1206            .await
1207            .unwrap()
1208            .unwrap();
1209        assert_eq!(result.len(), 100);
1210        assert_eq!(result, data[200..300].to_vec());
1211
1212        // Read last 50 bytes
1213        let result = reader
1214            .read_file_range(&cid.hash, 300, None)
1215            .await
1216            .unwrap()
1217            .unwrap();
1218        assert_eq!(result.len(), 50);
1219        assert_eq!(result, data[300..].to_vec());
1220    }
1221
1222    #[tokio::test]
1223    async fn test_read_file_range_entire_file() {
1224        let store = make_store();
1225        let config = BuilderConfig::new(store.clone())
1226            .with_chunk_size(100)
1227            .public();
1228        let builder = TreeBuilder::new(config);
1229        let reader = TreeReader::new(store);
1230
1231        let mut data = vec![0u8; 350];
1232        for i in 0..data.len() {
1233            data[i] = (i % 256) as u8;
1234        }
1235
1236        let (cid, _size) = builder.put(&data).await.unwrap();
1237
1238        // Read entire file using range
1239        let result = reader
1240            .read_file_range(&cid.hash, 0, None)
1241            .await
1242            .unwrap()
1243            .unwrap();
1244        assert_eq!(result, data);
1245    }
1246
1247    #[tokio::test]
1248    async fn test_read_file_range_out_of_bounds() {
1249        let store = make_store();
1250        let builder = TreeBuilder::new(BuilderConfig::new(store.clone()).public());
1251        let reader = TreeReader::new(store);
1252
1253        let data = b"Short";
1254        let hash = builder.put_blob(data).await.unwrap();
1255
1256        // Start past end of file
1257        let result = reader.read_file_range(&hash, 100, Some(200)).await.unwrap();
1258        assert_eq!(result, Some(vec![]));
1259
1260        // End past file length (should clamp)
1261        let result = reader.read_file_range(&hash, 0, Some(100)).await.unwrap();
1262        assert_eq!(result, Some(b"Short".to_vec()));
1263    }
1264
1265    #[tokio::test]
1266    async fn test_read_file_range_single_byte() {
1267        let store = make_store();
1268        let config = BuilderConfig::new(store.clone())
1269            .with_chunk_size(100)
1270            .public();
1271        let builder = TreeBuilder::new(config);
1272        let reader = TreeReader::new(store);
1273
1274        let mut data = vec![0u8; 350];
1275        for i in 0..data.len() {
1276            data[i] = (i % 256) as u8;
1277        }
1278
1279        let (cid, _size) = builder.put(&data).await.unwrap();
1280
1281        // Read single byte at chunk boundary
1282        let result = reader
1283            .read_file_range(&cid.hash, 100, Some(101))
1284            .await
1285            .unwrap()
1286            .unwrap();
1287        assert_eq!(result.len(), 1);
1288        assert_eq!(result[0], 100);
1289    }
1290}