Skip to main content

hashtree_core/
reader.rs

1//! Tree reader and traversal utilities
2//!
3//! Read files and directories from content-addressed storage
4
5use std::collections::HashMap;
6use std::sync::Arc;
7
8use crate::codec::{decode_tree_node, is_directory_node, is_tree_node, try_decode_tree_node};
9use crate::hash::sha256;
10use crate::store::Store;
11use crate::types::{to_hex, Cid, Hash, Link, LinkType, TreeNode};
12
13use crate::crypto::{decrypt_chk, EncryptionKey};
14
15/// Tree entry for directory listings
16#[derive(Debug, Clone)]
17pub struct TreeEntry {
18    pub name: String,
19    pub hash: Hash,
20    pub size: u64,
21    /// Type of content this entry points to (Blob, File, or Dir)
22    pub link_type: LinkType,
23    /// Optional decryption key (for encrypted content)
24    pub key: Option<[u8; 32]>,
25    /// Optional metadata (createdAt, mimeType, thumbnail, etc.)
26    pub meta: Option<HashMap<String, serde_json::Value>>,
27}
28
29/// Walk entry for tree traversal
30#[derive(Debug, Clone)]
31pub struct WalkEntry {
32    pub path: String,
33    pub hash: Hash,
34    /// Type of content this entry points to (Blob, File, or Dir)
35    pub link_type: LinkType,
36    pub size: u64,
37    /// Optional decryption key (for encrypted content)
38    pub key: Option<[u8; 32]>,
39}
40
41/// TreeReader - reads and traverses merkle trees
42pub struct TreeReader<S: Store> {
43    store: Arc<S>,
44}
45
46impl<S: Store> TreeReader<S> {
47    fn is_legacy_internal_group_name(name: &str) -> bool {
48        name.starts_with('_') && !name.starts_with("_chunk_") && name.chars().count() == 2
49    }
50
51    fn node_uses_legacy_directory_fanout(node: &TreeNode) -> bool {
52        !node.links.is_empty()
53            && node.links.iter().all(|link| {
54                let Some(name) = link.name.as_deref() else {
55                    return false;
56                };
57                Self::is_legacy_internal_group_name(name) && link.link_type == LinkType::Dir
58            })
59    }
60
61    fn is_internal_directory_link(node: &TreeNode, link: &Link) -> bool {
62        let Some(name) = link.name.as_deref() else {
63            return false;
64        };
65
66        if name.starts_with("_chunk_") {
67            return true;
68        }
69
70        Self::node_uses_legacy_directory_fanout(node)
71            && Self::is_legacy_internal_group_name(name)
72            && link.link_type == LinkType::Dir
73    }
74
75    pub fn new(store: Arc<S>) -> Self {
76        Self { store }
77    }
78
79    /// Get raw data by hash
80    pub async fn get_blob(&self, hash: &Hash) -> Result<Option<Vec<u8>>, ReaderError> {
81        self.store
82            .get(hash)
83            .await
84            .map_err(|e| ReaderError::Store(e.to_string()))
85    }
86
87    /// Get and decode a tree node
88    pub async fn get_tree_node(&self, hash: &Hash) -> Result<Option<TreeNode>, ReaderError> {
89        let data = match self
90            .store
91            .get(hash)
92            .await
93            .map_err(|e| ReaderError::Store(e.to_string()))?
94        {
95            Some(d) => d,
96            None => return Ok(None),
97        };
98
99        if !is_tree_node(&data) {
100            return Ok(None); // It's a blob, not a tree
101        }
102
103        let node = decode_tree_node(&data).map_err(ReaderError::Codec)?;
104        Ok(Some(node))
105    }
106
107    /// Check if hash points to a tree node or blob
108    pub async fn is_tree(&self, hash: &Hash) -> Result<bool, ReaderError> {
109        let data = match self
110            .store
111            .get(hash)
112            .await
113            .map_err(|e| ReaderError::Store(e.to_string()))?
114        {
115            Some(d) => d,
116            None => return Ok(false),
117        };
118        Ok(is_tree_node(&data))
119    }
120
121    /// Check if hash points to a directory (tree with named links)
122    /// vs a chunked file (tree with unnamed links) or raw blob
123    pub async fn is_directory(&self, hash: &Hash) -> Result<bool, ReaderError> {
124        let data = match self
125            .store
126            .get(hash)
127            .await
128            .map_err(|e| ReaderError::Store(e.to_string()))?
129        {
130            Some(d) => d,
131            None => return Ok(false),
132        };
133        Ok(is_directory_node(&data))
134    }
135
136    /// Read content by CID (handles both encrypted and public content)
137    ///
138    /// This is the unified read method that handles decryption automatically
139    /// when the CID contains an encryption key.
140    pub async fn get(&self, cid: &Cid) -> Result<Option<Vec<u8>>, ReaderError> {
141        if let Some(key) = cid.key {
142            self.get_encrypted(&cid.hash, &key).await
143        } else {
144            self.read_file(&cid.hash).await
145        }
146    }
147
148    /// Read encrypted content by hash and key (internal)
149    async fn get_encrypted(
150        &self,
151        hash: &Hash,
152        key: &EncryptionKey,
153    ) -> Result<Option<Vec<u8>>, ReaderError> {
154        let encrypted_data = match self
155            .store
156            .get(hash)
157            .await
158            .map_err(|e| ReaderError::Store(e.to_string()))?
159        {
160            Some(d) => d,
161            None => return Ok(None),
162        };
163
164        // Decrypt the data
165        let decrypted = decrypt_chk(&encrypted_data, key)
166            .map_err(|e| ReaderError::Decryption(e.to_string()))?;
167
168        // Check if it's a tree node
169        if is_tree_node(&decrypted) {
170            let node = decode_tree_node(&decrypted)?;
171            let assembled = self.assemble_encrypted_chunks(&node).await?;
172            return Ok(Some(assembled));
173        }
174
175        // Single chunk data
176        Ok(Some(decrypted))
177    }
178
179    /// Assemble encrypted chunks from tree
180    async fn assemble_encrypted_chunks(&self, node: &TreeNode) -> Result<Vec<u8>, ReaderError> {
181        let mut parts: Vec<Vec<u8>> = Vec::new();
182
183        for link in &node.links {
184            let chunk_key = link.key.ok_or(ReaderError::MissingKey)?;
185
186            let encrypted_child = self
187                .store
188                .get(&link.hash)
189                .await
190                .map_err(|e| ReaderError::Store(e.to_string()))?
191                .ok_or_else(|| ReaderError::MissingChunk(to_hex(&link.hash)))?;
192
193            let decrypted = decrypt_chk(&encrypted_child, &chunk_key)
194                .map_err(|e| ReaderError::Decryption(e.to_string()))?;
195
196            if is_tree_node(&decrypted) {
197                // Intermediate tree node - recurse
198                let child_node = decode_tree_node(&decrypted)?;
199                let child_data = Box::pin(self.assemble_encrypted_chunks(&child_node)).await?;
200                parts.push(child_data);
201            } else {
202                // Leaf data chunk
203                parts.push(decrypted);
204            }
205        }
206
207        let total_len: usize = parts.iter().map(|p| p.len()).sum();
208        let mut result = Vec::with_capacity(total_len);
209        for part in parts {
210            result.extend_from_slice(&part);
211        }
212
213        Ok(result)
214    }
215
216    /// Read a complete file (reassemble chunks if needed)
217    /// For unencrypted content only - use `get()` for unified access
218    pub async fn read_file(&self, hash: &Hash) -> Result<Option<Vec<u8>>, ReaderError> {
219        let data = match self
220            .store
221            .get(hash)
222            .await
223            .map_err(|e| ReaderError::Store(e.to_string()))?
224        {
225            Some(d) => d,
226            None => return Ok(None),
227        };
228
229        // Check if it's a tree (chunked file) or raw blob
230        if !is_tree_node(&data) {
231            return Ok(Some(data)); // Direct blob
232        }
233
234        // It's a tree - reassemble chunks
235        let node = decode_tree_node(&data).map_err(ReaderError::Codec)?;
236        let assembled = self.assemble_chunks(&node).await?;
237        Ok(Some(assembled))
238    }
239
240    /// Read a byte range from a file (fetches only necessary chunks)
241    ///
242    /// - `start`: Starting byte offset (inclusive)
243    /// - `end`: Ending byte offset (exclusive), or None to read to end
244    ///
245    /// For unencrypted content only - encrypted range reads not yet supported.
246    pub async fn read_file_range(
247        &self,
248        hash: &Hash,
249        start: u64,
250        end: Option<u64>,
251    ) -> Result<Option<Vec<u8>>, ReaderError> {
252        let data = match self
253            .store
254            .get(hash)
255            .await
256            .map_err(|e| ReaderError::Store(e.to_string()))?
257        {
258            Some(d) => d,
259            None => return Ok(None),
260        };
261
262        // Single blob - just slice it
263        if !is_tree_node(&data) {
264            let start_idx = start as usize;
265            let end_idx = end.map(|e| e as usize).unwrap_or(data.len());
266            if start_idx >= data.len() {
267                return Ok(Some(vec![]));
268            }
269            let end_idx = end_idx.min(data.len());
270            return Ok(Some(data[start_idx..end_idx].to_vec()));
271        }
272
273        // It's a chunked file - fetch only needed chunks
274        let node = decode_tree_node(&data).map_err(ReaderError::Codec)?;
275        let range_data = self.assemble_chunks_range(&node, start, end).await?;
276        Ok(Some(range_data))
277    }
278
279    /// Assemble only the chunks needed for a byte range
280    async fn assemble_chunks_range(
281        &self,
282        node: &TreeNode,
283        start: u64,
284        end: Option<u64>,
285    ) -> Result<Vec<u8>, ReaderError> {
286        // First, flatten the tree to get all leaf chunks with their byte offsets
287        let chunks_info = self.collect_chunk_offsets(node).await?;
288
289        if chunks_info.is_empty() {
290            return Ok(vec![]);
291        }
292
293        // Calculate total size and actual end
294        let total_size: u64 = chunks_info.iter().map(|(_, _, size)| size).sum();
295        let actual_end = end.unwrap_or(total_size).min(total_size);
296
297        if start >= actual_end {
298            return Ok(vec![]);
299        }
300
301        // Find chunks that overlap with [start, actual_end)
302        let mut result = Vec::with_capacity((actual_end - start) as usize);
303        let mut current_offset = 0u64;
304
305        for (chunk_hash, _chunk_offset, chunk_size) in &chunks_info {
306            let chunk_start = current_offset;
307            let chunk_end = current_offset + chunk_size;
308
309            // Check if this chunk overlaps with our range
310            if chunk_end > start && chunk_start < actual_end {
311                // Fetch this chunk
312                let chunk_data = self
313                    .store
314                    .get(chunk_hash)
315                    .await
316                    .map_err(|e| ReaderError::Store(e.to_string()))?
317                    .ok_or_else(|| ReaderError::MissingChunk(to_hex(chunk_hash)))?;
318
319                // Calculate slice bounds within this chunk
320                let slice_start = if start > chunk_start {
321                    (start - chunk_start) as usize
322                } else {
323                    0
324                };
325                let slice_end = if actual_end < chunk_end {
326                    (actual_end - chunk_start) as usize
327                } else {
328                    chunk_data.len()
329                };
330
331                result.extend_from_slice(&chunk_data[slice_start..slice_end]);
332            }
333
334            current_offset = chunk_end;
335
336            // Early exit if we've passed the requested range
337            if current_offset >= actual_end {
338                break;
339            }
340        }
341
342        Ok(result)
343    }
344
345    /// Collect all leaf chunk hashes with their byte offsets
346    /// Returns Vec<(hash, offset, size)>
347    async fn collect_chunk_offsets(
348        &self,
349        node: &TreeNode,
350    ) -> Result<Vec<(Hash, u64, u64)>, ReaderError> {
351        let mut chunks = Vec::new();
352        let mut offset = 0u64;
353        self.collect_chunk_offsets_recursive(node, &mut chunks, &mut offset)
354            .await?;
355        Ok(chunks)
356    }
357
358    async fn collect_chunk_offsets_recursive(
359        &self,
360        node: &TreeNode,
361        chunks: &mut Vec<(Hash, u64, u64)>,
362        offset: &mut u64,
363    ) -> Result<(), ReaderError> {
364        for link in &node.links {
365            let child_data = self
366                .store
367                .get(&link.hash)
368                .await
369                .map_err(|e| ReaderError::Store(e.to_string()))?
370                .ok_or_else(|| ReaderError::MissingChunk(to_hex(&link.hash)))?;
371
372            if is_tree_node(&child_data) {
373                // Intermediate node - recurse
374                let child_node = decode_tree_node(&child_data).map_err(ReaderError::Codec)?;
375                Box::pin(self.collect_chunk_offsets_recursive(&child_node, chunks, offset)).await?;
376            } else {
377                // Leaf chunk
378                let size = child_data.len() as u64;
379                chunks.push((link.hash, *offset, size));
380                *offset += size;
381            }
382        }
383        Ok(())
384    }
385
386    /// Recursively assemble chunks from tree (unencrypted)
387    async fn assemble_chunks(&self, node: &TreeNode) -> Result<Vec<u8>, ReaderError> {
388        let mut parts: Vec<Vec<u8>> = Vec::new();
389
390        for link in &node.links {
391            let child_data = self
392                .store
393                .get(&link.hash)
394                .await
395                .map_err(|e| ReaderError::Store(e.to_string()))?
396                .ok_or_else(|| ReaderError::MissingChunk(to_hex(&link.hash)))?;
397
398            if is_tree_node(&child_data) {
399                // Nested tree - recurse
400                let child_node = decode_tree_node(&child_data).map_err(ReaderError::Codec)?;
401                parts.push(Box::pin(self.assemble_chunks(&child_node)).await?);
402            } else {
403                // Leaf blob
404                parts.push(child_data);
405            }
406        }
407
408        // Concatenate all parts
409        let total_length: usize = parts.iter().map(|p| p.len()).sum();
410        let mut result = Vec::with_capacity(total_length);
411        for part in parts {
412            result.extend_from_slice(&part);
413        }
414
415        Ok(result)
416    }
417
418    /// Read a file with streaming (returns chunks as vec)
419    pub async fn read_file_chunks(&self, hash: &Hash) -> Result<Vec<Vec<u8>>, ReaderError> {
420        let data = match self
421            .store
422            .get(hash)
423            .await
424            .map_err(|e| ReaderError::Store(e.to_string()))?
425        {
426            Some(d) => d,
427            None => return Ok(vec![]),
428        };
429
430        if !is_tree_node(&data) {
431            return Ok(vec![data]);
432        }
433
434        let node = decode_tree_node(&data).map_err(ReaderError::Codec)?;
435        self.collect_chunks(&node).await
436    }
437
438    /// Recursively collect chunks
439    async fn collect_chunks(&self, node: &TreeNode) -> Result<Vec<Vec<u8>>, ReaderError> {
440        let mut chunks = Vec::new();
441
442        for link in &node.links {
443            let child_data = self
444                .store
445                .get(&link.hash)
446                .await
447                .map_err(|e| ReaderError::Store(e.to_string()))?
448                .ok_or_else(|| ReaderError::MissingChunk(to_hex(&link.hash)))?;
449
450            if is_tree_node(&child_data) {
451                let child_node = decode_tree_node(&child_data).map_err(ReaderError::Codec)?;
452                chunks.extend(Box::pin(self.collect_chunks(&child_node)).await?);
453            } else {
454                chunks.push(child_data);
455            }
456        }
457
458        Ok(chunks)
459    }
460
461    /// List directory entries
462    pub async fn list_directory(&self, hash: &Hash) -> Result<Vec<TreeEntry>, ReaderError> {
463        let node = match self.get_tree_node(hash).await? {
464            Some(n) => n,
465            None => return Ok(vec![]),
466        };
467
468        let mut entries = Vec::new();
469
470        for link in &node.links {
471            // Skip internal chunk nodes (names starting with _chunk_)
472            if Self::is_internal_directory_link(&node, link) {
473                let sub_entries = Box::pin(self.list_directory(&link.hash)).await?;
474                entries.extend(sub_entries);
475                continue;
476            }
477
478            entries.push(TreeEntry {
479                name: link.name.clone().unwrap_or_else(|| to_hex(&link.hash)),
480                hash: link.hash,
481                size: link.size,
482                link_type: link.link_type,
483                key: link.key,
484                meta: link.meta.clone(),
485            });
486        }
487
488        Ok(entries)
489    }
490
491    /// Resolve a path within a tree
492    /// e.g., resolve_path("root/foo/bar.txt")
493    pub async fn resolve_path(
494        &self,
495        root_hash: &Hash,
496        path: &str,
497    ) -> Result<Option<Hash>, ReaderError> {
498        let parts: Vec<&str> = path.split('/').filter(|p| !p.is_empty()).collect();
499
500        let mut current_hash = *root_hash;
501
502        for part in parts {
503            let node = match self.get_tree_node(&current_hash).await? {
504                Some(n) => n,
505                None => return Ok(None),
506            };
507
508            if let Some(link) = self.find_link(&node, part) {
509                current_hash = link.hash;
510            } else {
511                // Check internal nodes
512                match self.find_in_subtrees(&node, part).await? {
513                    Some(hash) => current_hash = hash,
514                    None => return Ok(None),
515                }
516            }
517        }
518
519        Ok(Some(current_hash))
520    }
521
522    /// Find a link by name in a tree node
523    fn find_link(&self, node: &TreeNode, name: &str) -> Option<Link> {
524        node.links
525            .iter()
526            .find(|l| l.name.as_deref() == Some(name))
527            .cloned()
528    }
529
530    /// Search for name in internal subtrees
531    async fn find_in_subtrees(
532        &self,
533        node: &TreeNode,
534        name: &str,
535    ) -> Result<Option<Hash>, ReaderError> {
536        for link in &node.links {
537            // Only search internal nodes
538            if !Self::is_internal_directory_link(node, link) {
539                continue;
540            }
541
542            let sub_node = match self.get_tree_node(&link.hash).await? {
543                Some(n) => n,
544                None => continue,
545            };
546
547            if let Some(found) = self.find_link(&sub_node, name) {
548                return Ok(Some(found.hash));
549            }
550
551            // Recurse deeper
552            if let Some(deep_found) = Box::pin(self.find_in_subtrees(&sub_node, name)).await? {
553                return Ok(Some(deep_found));
554            }
555        }
556
557        Ok(None)
558    }
559
560    /// Get total size of a tree
561    pub async fn get_size(&self, hash: &Hash) -> Result<u64, ReaderError> {
562        let data = match self
563            .store
564            .get(hash)
565            .await
566            .map_err(|e| ReaderError::Store(e.to_string()))?
567        {
568            Some(d) => d,
569            None => return Ok(0),
570        };
571
572        if !is_tree_node(&data) {
573            return Ok(data.len() as u64);
574        }
575
576        let node = decode_tree_node(&data).map_err(ReaderError::Codec)?;
577        // Calculate from children
578        let mut total = 0u64;
579        for link in &node.links {
580            total += link.size;
581        }
582        Ok(total)
583    }
584
585    /// Walk entire tree depth-first
586    pub async fn walk(&self, hash: &Hash, path: &str) -> Result<Vec<WalkEntry>, ReaderError> {
587        let mut entries = Vec::new();
588        self.walk_recursive(hash, path, &mut entries).await?;
589        Ok(entries)
590    }
591
592    async fn walk_recursive(
593        &self,
594        hash: &Hash,
595        path: &str,
596        entries: &mut Vec<WalkEntry>,
597    ) -> Result<(), ReaderError> {
598        let data = match self
599            .store
600            .get(hash)
601            .await
602            .map_err(|e| ReaderError::Store(e.to_string()))?
603        {
604            Some(d) => d,
605            None => return Ok(()),
606        };
607
608        let node = match try_decode_tree_node(&data) {
609            Some(n) => n,
610            None => {
611                entries.push(WalkEntry {
612                    path: path.to_string(),
613                    hash: *hash,
614                    link_type: LinkType::Blob,
615                    size: data.len() as u64,
616                    key: None, // TreeReader doesn't track keys
617                });
618                return Ok(());
619            }
620        };
621
622        let node_size: u64 = node.links.iter().map(|l| l.size).sum();
623        entries.push(WalkEntry {
624            path: path.to_string(),
625            hash: *hash,
626            link_type: node.node_type,
627            size: node_size,
628            key: None, // directories are not encrypted
629        });
630
631        for link in &node.links {
632            let child_path = match &link.name {
633                Some(name) => {
634                    // Skip internal chunk nodes in path
635                    if Self::is_internal_directory_link(&node, link) {
636                        Box::pin(self.walk_recursive(&link.hash, path, entries)).await?;
637                        continue;
638                    }
639                    if path.is_empty() {
640                        name.clone()
641                    } else {
642                        format!("{}/{}", path, name)
643                    }
644                }
645                None => path.to_string(),
646            };
647
648            Box::pin(self.walk_recursive(&link.hash, &child_path, entries)).await?;
649        }
650
651        Ok(())
652    }
653}
654
655/// Verify tree integrity
656/// Checks that all referenced hashes exist
657pub async fn verify_tree<S: Store>(
658    store: Arc<S>,
659    root_hash: &Hash,
660) -> Result<VerifyResult, ReaderError> {
661    let mut missing = Vec::new();
662    let mut visited = std::collections::HashSet::new();
663
664    verify_recursive(store, root_hash, &mut missing, &mut visited).await?;
665
666    Ok(VerifyResult {
667        valid: missing.is_empty(),
668        missing,
669    })
670}
671
672async fn verify_recursive<S: Store>(
673    store: Arc<S>,
674    hash: &Hash,
675    missing: &mut Vec<Hash>,
676    visited: &mut std::collections::HashSet<String>,
677) -> Result<(), ReaderError> {
678    let hex = to_hex(hash);
679    if visited.contains(&hex) {
680        return Ok(());
681    }
682    visited.insert(hex);
683
684    let data = match store
685        .get(hash)
686        .await
687        .map_err(|e| ReaderError::Store(e.to_string()))?
688    {
689        Some(d) => d,
690        None => {
691            missing.push(*hash);
692            return Ok(());
693        }
694    };
695
696    if is_tree_node(&data) {
697        let node = decode_tree_node(&data).map_err(ReaderError::Codec)?;
698        for link in &node.links {
699            Box::pin(verify_recursive(
700                store.clone(),
701                &link.hash,
702                missing,
703                visited,
704            ))
705            .await?;
706        }
707    }
708
709    Ok(())
710}
711
712/// Verify tree integrity and content addresses.
713///
714/// Checks that:
715/// - all referenced hashes exist
716/// - every fetched blob/node satisfies `sha256(bytes) == referenced_hash`
717pub async fn verify_tree_integrity<S: Store>(
718    store: Arc<S>,
719    root_hash: &Hash,
720) -> Result<VerifyIntegrityResult, ReaderError> {
721    let mut missing = Vec::new();
722    let mut corrupted = Vec::new();
723    let mut visited = std::collections::HashSet::new();
724
725    verify_integrity_recursive(store, root_hash, &mut missing, &mut corrupted, &mut visited)
726        .await?;
727
728    Ok(VerifyIntegrityResult {
729        valid: missing.is_empty() && corrupted.is_empty(),
730        missing,
731        corrupted,
732    })
733}
734
735async fn verify_integrity_recursive<S: Store>(
736    store: Arc<S>,
737    hash: &Hash,
738    missing: &mut Vec<Hash>,
739    corrupted: &mut Vec<Hash>,
740    visited: &mut std::collections::HashSet<String>,
741) -> Result<(), ReaderError> {
742    let hex = to_hex(hash);
743    if visited.contains(&hex) {
744        return Ok(());
745    }
746    visited.insert(hex);
747
748    let data = match store
749        .get(hash)
750        .await
751        .map_err(|e| ReaderError::Store(e.to_string()))?
752    {
753        Some(d) => d,
754        None => {
755            missing.push(*hash);
756            return Ok(());
757        }
758    };
759
760    // Strong integrity check: referenced hash must match fetched bytes.
761    if sha256(&data) != *hash {
762        corrupted.push(*hash);
763        return Ok(());
764    }
765
766    if is_tree_node(&data) {
767        let node = decode_tree_node(&data).map_err(ReaderError::Codec)?;
768        for link in &node.links {
769            Box::pin(verify_integrity_recursive(
770                store.clone(),
771                &link.hash,
772                missing,
773                corrupted,
774                visited,
775            ))
776            .await?;
777        }
778    }
779
780    Ok(())
781}
782
783/// Result of tree verification
784#[derive(Debug, Clone)]
785pub struct VerifyResult {
786    pub valid: bool,
787    pub missing: Vec<Hash>,
788}
789
790/// Result of strong tree integrity verification.
791#[derive(Debug, Clone)]
792pub struct VerifyIntegrityResult {
793    pub valid: bool,
794    pub missing: Vec<Hash>,
795    pub corrupted: Vec<Hash>,
796}
797
798/// Reader error type
799#[derive(Debug, thiserror::Error)]
800pub enum ReaderError {
801    #[error("Store error: {0}")]
802    Store(String),
803    #[error("Codec error: {0}")]
804    Codec(#[from] crate::codec::CodecError),
805    #[error("Missing chunk: {0}")]
806    MissingChunk(String),
807    #[error("Decryption error: {0}")]
808    Decryption(String),
809    #[error("Missing decryption key")]
810    MissingKey,
811}
812
813#[cfg(test)]
814mod tests {
815    use super::*;
816    use crate::builder::{BuilderConfig, TreeBuilder};
817    use crate::store::MemoryStore;
818    use crate::types::DirEntry;
819
820    fn make_store() -> Arc<MemoryStore> {
821        Arc::new(MemoryStore::new())
822    }
823
824    #[tokio::test]
825    async fn test_get_blob() {
826        let store = make_store();
827        let builder = TreeBuilder::new(BuilderConfig::new(store.clone()));
828        let reader = TreeReader::new(store);
829
830        let data = vec![1u8, 2, 3, 4, 5];
831        let hash = builder.put_blob(&data).await.unwrap();
832
833        let result = reader.get_blob(&hash).await.unwrap();
834        assert_eq!(result, Some(data));
835    }
836
837    #[tokio::test]
838    async fn test_get_blob_missing() {
839        let store = make_store();
840        let reader = TreeReader::new(store);
841
842        let hash = [0u8; 32];
843        let result = reader.get_blob(&hash).await.unwrap();
844        assert!(result.is_none());
845    }
846
847    #[tokio::test]
848    async fn test_get_tree_node() {
849        let store = make_store();
850        let builder = TreeBuilder::new(BuilderConfig::new(store.clone()));
851        let reader = TreeReader::new(store);
852
853        let file_hash = builder.put_blob(&[1u8]).await.unwrap();
854        let dir_hash = builder
855            .put_directory(vec![DirEntry::new("test.txt", file_hash).with_size(1)])
856            .await
857            .unwrap();
858
859        let node = reader.get_tree_node(&dir_hash).await.unwrap();
860        assert!(node.is_some());
861        assert_eq!(node.unwrap().links.len(), 1);
862    }
863
864    #[tokio::test]
865    async fn test_get_tree_node_returns_none_for_blob() {
866        let store = make_store();
867        let builder = TreeBuilder::new(BuilderConfig::new(store.clone()));
868        let reader = TreeReader::new(store);
869
870        let hash = builder.put_blob(&[1u8, 2, 3]).await.unwrap();
871        let node = reader.get_tree_node(&hash).await.unwrap();
872        assert!(node.is_none());
873    }
874
875    #[tokio::test]
876    async fn test_is_tree() {
877        let store = make_store();
878        let builder = TreeBuilder::new(BuilderConfig::new(store.clone()));
879        let reader = TreeReader::new(store);
880
881        let file_hash = builder.put_blob(&[1u8]).await.unwrap();
882        let dir_hash = builder
883            .put_directory(vec![DirEntry::new("test.txt", file_hash)])
884            .await
885            .unwrap();
886
887        assert!(reader.is_tree(&dir_hash).await.unwrap());
888        assert!(!reader.is_tree(&file_hash).await.unwrap());
889    }
890
891    #[tokio::test]
892    async fn test_read_file_small() {
893        let store = make_store();
894        // Use public() for tests that check raw data storage
895        let builder = TreeBuilder::new(BuilderConfig::new(store.clone()).public());
896        let reader = TreeReader::new(store);
897
898        let data = vec![1u8, 2, 3, 4, 5];
899        let (cid, _size) = builder.put(&data).await.unwrap();
900
901        let result = reader.read_file(&cid.hash).await.unwrap();
902        assert_eq!(result, Some(data));
903    }
904
905    #[tokio::test]
906    async fn test_read_file_chunked() {
907        let store = make_store();
908        let config = BuilderConfig::new(store.clone())
909            .with_chunk_size(100)
910            .public();
911        let builder = TreeBuilder::new(config);
912        let reader = TreeReader::new(store);
913
914        let mut data = vec![0u8; 350];
915        for i in 0..data.len() {
916            data[i] = (i % 256) as u8;
917        }
918
919        let (cid, _size) = builder.put(&data).await.unwrap();
920        let result = reader.read_file(&cid.hash).await.unwrap();
921
922        assert_eq!(result, Some(data));
923    }
924
925    #[tokio::test]
926    async fn test_list_directory() {
927        let store = make_store();
928        let builder = TreeBuilder::new(BuilderConfig::new(store.clone()));
929        let reader = TreeReader::new(store);
930
931        let h1 = builder.put_blob(&[1u8]).await.unwrap();
932        let h2 = builder.put_blob(&[2u8]).await.unwrap();
933
934        let dir_hash = builder
935            .put_directory(vec![
936                DirEntry::new("first.txt", h1).with_size(1),
937                DirEntry::new("second.txt", h2).with_size(1),
938            ])
939            .await
940            .unwrap();
941
942        let entries = reader.list_directory(&dir_hash).await.unwrap();
943
944        assert_eq!(entries.len(), 2);
945        assert!(entries.iter().any(|e| e.name == "first.txt"));
946        assert!(entries.iter().any(|e| e.name == "second.txt"));
947    }
948
949    #[tokio::test]
950    async fn test_resolve_path() {
951        let store = make_store();
952        let builder = TreeBuilder::new(BuilderConfig::new(store.clone()));
953        let reader = TreeReader::new(store);
954
955        let file_data = vec![1u8, 2, 3];
956        let file_hash = builder.put_blob(&file_data).await.unwrap();
957
958        let dir_hash = builder
959            .put_directory(vec![DirEntry::new("test.txt", file_hash)])
960            .await
961            .unwrap();
962
963        let resolved = reader.resolve_path(&dir_hash, "test.txt").await.unwrap();
964        assert_eq!(resolved, Some(file_hash));
965    }
966
967    #[tokio::test]
968    async fn test_resolve_path_nested() {
969        let store = make_store();
970        let builder = TreeBuilder::new(BuilderConfig::new(store.clone()));
971        let reader = TreeReader::new(store);
972
973        let file_hash = builder.put_blob(&[1u8]).await.unwrap();
974
975        let sub_sub_dir = builder
976            .put_directory(vec![DirEntry::new("deep.txt", file_hash)])
977            .await
978            .unwrap();
979
980        let sub_dir = builder
981            .put_directory(vec![DirEntry::new("level2", sub_sub_dir)])
982            .await
983            .unwrap();
984
985        let root_dir = builder
986            .put_directory(vec![DirEntry::new("level1", sub_dir)])
987            .await
988            .unwrap();
989
990        let resolved = reader
991            .resolve_path(&root_dir, "level1/level2/deep.txt")
992            .await
993            .unwrap();
994        assert_eq!(resolved, Some(file_hash));
995    }
996
997    #[tokio::test]
998    async fn test_get_size() {
999        let store = make_store();
1000        let builder = TreeBuilder::new(BuilderConfig::new(store.clone()));
1001        let reader = TreeReader::new(store);
1002
1003        let data = vec![0u8; 123];
1004        let hash = builder.put_blob(&data).await.unwrap();
1005
1006        assert_eq!(reader.get_size(&hash).await.unwrap(), 123);
1007    }
1008
1009    #[tokio::test]
1010    async fn test_walk() {
1011        let store = make_store();
1012        let builder = TreeBuilder::new(BuilderConfig::new(store.clone()));
1013        let reader = TreeReader::new(store);
1014
1015        let f1 = builder.put_blob(&[1u8]).await.unwrap();
1016        let f2 = builder.put_blob(&[2u8, 3]).await.unwrap();
1017
1018        let sub_dir = builder
1019            .put_directory(vec![DirEntry::new("nested.txt", f2).with_size(2)])
1020            .await
1021            .unwrap();
1022
1023        let root_dir = builder
1024            .put_directory(vec![
1025                DirEntry::new("root.txt", f1).with_size(1),
1026                DirEntry::new("sub", sub_dir),
1027            ])
1028            .await
1029            .unwrap();
1030
1031        let entries = reader.walk(&root_dir, "").await.unwrap();
1032        let paths: Vec<_> = entries.iter().map(|e| e.path.as_str()).collect();
1033
1034        assert!(paths.contains(&""));
1035        assert!(paths.contains(&"root.txt"));
1036        assert!(paths.contains(&"sub"));
1037        assert!(paths.contains(&"sub/nested.txt"));
1038    }
1039
1040    #[tokio::test]
1041    async fn test_verify_tree_valid() {
1042        let store = make_store();
1043        let config = BuilderConfig::new(store.clone())
1044            .with_chunk_size(100)
1045            .public();
1046        let builder = TreeBuilder::new(config);
1047
1048        let data = vec![0u8; 350];
1049        let (cid, _size) = builder.put(&data).await.unwrap();
1050
1051        let result = verify_tree(store, &cid.hash).await.unwrap();
1052        assert!(result.valid);
1053        assert!(result.missing.is_empty());
1054    }
1055
1056    #[tokio::test]
1057    async fn test_verify_tree_missing() {
1058        let store = make_store();
1059        let config = BuilderConfig::new(store.clone())
1060            .with_chunk_size(100)
1061            .public();
1062        let builder = TreeBuilder::new(config);
1063
1064        let data = vec![0u8; 350];
1065        let (cid, _size) = builder.put(&data).await.unwrap();
1066
1067        // Delete one of the chunks
1068        let keys = store.keys();
1069        if let Some(chunk_to_delete) = keys.iter().find(|k| **k != cid.hash) {
1070            store.delete(chunk_to_delete).await.unwrap();
1071        }
1072
1073        let result = verify_tree(store, &cid.hash).await.unwrap();
1074        assert!(!result.valid);
1075        assert!(!result.missing.is_empty());
1076    }
1077
1078    #[tokio::test]
1079    async fn test_verify_tree_integrity_valid() {
1080        let store = make_store();
1081        let config = BuilderConfig::new(store.clone())
1082            .with_chunk_size(100)
1083            .public();
1084        let builder = TreeBuilder::new(config);
1085
1086        let data = vec![0u8; 350];
1087        let (cid, _size) = builder.put(&data).await.unwrap();
1088
1089        let result = verify_tree_integrity(store, &cid.hash).await.unwrap();
1090        assert!(result.valid);
1091        assert!(result.missing.is_empty());
1092        assert!(result.corrupted.is_empty());
1093    }
1094
1095    #[tokio::test]
1096    async fn test_verify_tree_integrity_missing() {
1097        let store = make_store();
1098        let config = BuilderConfig::new(store.clone())
1099            .with_chunk_size(100)
1100            .public();
1101        let builder = TreeBuilder::new(config);
1102
1103        let data = vec![0u8; 350];
1104        let (cid, _size) = builder.put(&data).await.unwrap();
1105
1106        // Delete one of the chunks
1107        let keys = store.keys();
1108        if let Some(chunk_to_delete) = keys.iter().find(|k| **k != cid.hash) {
1109            store.delete(chunk_to_delete).await.unwrap();
1110        }
1111
1112        let result = verify_tree_integrity(store, &cid.hash).await.unwrap();
1113        assert!(!result.valid);
1114        assert!(!result.missing.is_empty());
1115        assert!(result.corrupted.is_empty());
1116    }
1117
1118    #[tokio::test]
1119    async fn test_verify_tree_integrity_corrupted_hash_mismatch() {
1120        let store = make_store();
1121        let config = BuilderConfig::new(store.clone())
1122            .with_chunk_size(100)
1123            .public();
1124        let builder = TreeBuilder::new(config);
1125
1126        let data = vec![0u8; 350];
1127        let (cid, _size) = builder.put(&data).await.unwrap();
1128
1129        // Pick a leaf chunk (non-root in this shape) and mutate bytes without changing key.
1130        let keys = store.keys();
1131        let target = keys
1132            .iter()
1133            .find(|k| **k != cid.hash)
1134            .copied()
1135            .expect("expected at least one child chunk");
1136
1137        let mut corrupted = store.get(&target).await.unwrap().unwrap();
1138        corrupted[0] ^= 0xff;
1139        store.delete(&target).await.unwrap();
1140        store.put(target, corrupted).await.unwrap();
1141
1142        // Legacy verifier checks only existence, so this still appears valid.
1143        let legacy = verify_tree(store.clone(), &cid.hash).await.unwrap();
1144        assert!(legacy.valid);
1145
1146        let strict = verify_tree_integrity(store, &cid.hash).await.unwrap();
1147        assert!(!strict.valid);
1148        assert!(strict.missing.is_empty());
1149        assert!(!strict.corrupted.is_empty());
1150        assert!(strict.corrupted.contains(&target));
1151    }
1152
1153    #[tokio::test]
1154    async fn test_read_file_range_small_blob() {
1155        let store = make_store();
1156        let builder = TreeBuilder::new(BuilderConfig::new(store.clone()).public());
1157        let reader = TreeReader::new(store);
1158
1159        let data = b"Hello, World!";
1160        let hash = builder.put_blob(data).await.unwrap();
1161
1162        // Read middle portion
1163        let result = reader.read_file_range(&hash, 7, Some(12)).await.unwrap();
1164        assert_eq!(result, Some(b"World".to_vec()));
1165
1166        // Read from start
1167        let result = reader.read_file_range(&hash, 0, Some(5)).await.unwrap();
1168        assert_eq!(result, Some(b"Hello".to_vec()));
1169
1170        // Read to end (no end specified)
1171        let result = reader.read_file_range(&hash, 7, None).await.unwrap();
1172        assert_eq!(result, Some(b"World!".to_vec()));
1173    }
1174
1175    #[tokio::test]
1176    async fn test_read_file_range_chunked() {
1177        let store = make_store();
1178        // Small chunk size to force chunking
1179        let config = BuilderConfig::new(store.clone())
1180            .with_chunk_size(100)
1181            .public();
1182        let builder = TreeBuilder::new(config);
1183        let reader = TreeReader::new(store);
1184
1185        // Create 350 bytes of sequential data
1186        let mut data = vec![0u8; 350];
1187        for i in 0..data.len() {
1188            data[i] = (i % 256) as u8;
1189        }
1190
1191        let (cid, _size) = builder.put(&data).await.unwrap();
1192
1193        // Read bytes 50-150 (spans chunk boundary at 100)
1194        let result = reader
1195            .read_file_range(&cid.hash, 50, Some(150))
1196            .await
1197            .unwrap()
1198            .unwrap();
1199        assert_eq!(result.len(), 100);
1200        assert_eq!(result, data[50..150].to_vec());
1201
1202        // Read bytes 200-300 (within third and fourth chunks)
1203        let result = reader
1204            .read_file_range(&cid.hash, 200, Some(300))
1205            .await
1206            .unwrap()
1207            .unwrap();
1208        assert_eq!(result.len(), 100);
1209        assert_eq!(result, data[200..300].to_vec());
1210
1211        // Read last 50 bytes
1212        let result = reader
1213            .read_file_range(&cid.hash, 300, None)
1214            .await
1215            .unwrap()
1216            .unwrap();
1217        assert_eq!(result.len(), 50);
1218        assert_eq!(result, data[300..].to_vec());
1219    }
1220
1221    #[tokio::test]
1222    async fn test_read_file_range_entire_file() {
1223        let store = make_store();
1224        let config = BuilderConfig::new(store.clone())
1225            .with_chunk_size(100)
1226            .public();
1227        let builder = TreeBuilder::new(config);
1228        let reader = TreeReader::new(store);
1229
1230        let mut data = vec![0u8; 350];
1231        for i in 0..data.len() {
1232            data[i] = (i % 256) as u8;
1233        }
1234
1235        let (cid, _size) = builder.put(&data).await.unwrap();
1236
1237        // Read entire file using range
1238        let result = reader
1239            .read_file_range(&cid.hash, 0, None)
1240            .await
1241            .unwrap()
1242            .unwrap();
1243        assert_eq!(result, data);
1244    }
1245
1246    #[tokio::test]
1247    async fn test_read_file_range_out_of_bounds() {
1248        let store = make_store();
1249        let builder = TreeBuilder::new(BuilderConfig::new(store.clone()).public());
1250        let reader = TreeReader::new(store);
1251
1252        let data = b"Short";
1253        let hash = builder.put_blob(data).await.unwrap();
1254
1255        // Start past end of file
1256        let result = reader.read_file_range(&hash, 100, Some(200)).await.unwrap();
1257        assert_eq!(result, Some(vec![]));
1258
1259        // End past file length (should clamp)
1260        let result = reader.read_file_range(&hash, 0, Some(100)).await.unwrap();
1261        assert_eq!(result, Some(b"Short".to_vec()));
1262    }
1263
1264    #[tokio::test]
1265    async fn test_read_file_range_single_byte() {
1266        let store = make_store();
1267        let config = BuilderConfig::new(store.clone())
1268            .with_chunk_size(100)
1269            .public();
1270        let builder = TreeBuilder::new(config);
1271        let reader = TreeReader::new(store);
1272
1273        let mut data = vec![0u8; 350];
1274        for i in 0..data.len() {
1275            data[i] = (i % 256) as u8;
1276        }
1277
1278        let (cid, _size) = builder.put(&data).await.unwrap();
1279
1280        // Read single byte at chunk boundary
1281        let result = reader
1282            .read_file_range(&cid.hash, 100, Some(101))
1283            .await
1284            .unwrap()
1285            .unwrap();
1286        assert_eq!(result.len(), 1);
1287        assert_eq!(result[0], 100);
1288    }
1289}