Skip to main content

hashtree_core/
builder.rs

1//! Tree builder with chunking and fanout support
2//!
3//! - Large files are split into chunks
4//! - Large directories are split into sub-trees
5//! - Supports streaming appends
6//! - Encryption enabled by default (CHK - Content Hash Key)
7
8use std::collections::HashMap;
9use std::sync::Arc;
10
11use crate::codec::encode_and_hash;
12use crate::hash::sha256;
13use crate::store::Store;
14use crate::types::{Cid, DirEntry, Hash, Link, LinkType, TreeNode};
15
16use crate::crypto::{encrypt_chk, EncryptionKey};
17
18/// Default chunk size: 2MB (optimized for blossom uploads, matches hashtree-ts)
19pub const DEFAULT_CHUNK_SIZE: usize = 2 * 1024 * 1024;
20
21/// BEP52 chunk size: 16KB
22pub const BEP52_CHUNK_SIZE: usize = 16 * 1024;
23
24/// Default max links per tree node (fanout)
25pub const DEFAULT_MAX_LINKS: usize = 174;
26
27/// Builder configuration
28#[derive(Clone)]
29pub struct BuilderConfig<S: Store> {
30    pub store: Arc<S>,
31    pub chunk_size: usize,
32    pub max_links: usize,
33    /// Whether to encrypt content (default: true when encryption feature enabled)
34    pub encrypted: bool,
35}
36
37impl<S: Store> BuilderConfig<S> {
38    pub fn new(store: Arc<S>) -> Self {
39        Self {
40            store,
41            chunk_size: DEFAULT_CHUNK_SIZE,
42            max_links: DEFAULT_MAX_LINKS,
43            encrypted: true,
44        }
45    }
46
47    pub fn with_chunk_size(mut self, chunk_size: usize) -> Self {
48        self.chunk_size = chunk_size;
49        self
50    }
51
52    pub fn with_max_links(mut self, max_links: usize) -> Self {
53        self.max_links = max_links;
54        self
55    }
56
57    /// Disable encryption (store content publicly)
58    pub fn public(mut self) -> Self {
59        self.encrypted = false;
60        self
61    }
62
63    /// Enable encryption (CHK - Content Hash Key)
64    pub fn encrypted(mut self) -> Self {
65        self.encrypted = true;
66        self
67    }
68}
69
70/// TreeBuilder - builds content-addressed merkle trees
71pub struct TreeBuilder<S: Store> {
72    store: Arc<S>,
73    chunk_size: usize,
74    max_links: usize,
75    encrypted: bool,
76}
77
78impl<S: Store> TreeBuilder<S> {
79    pub fn new(config: BuilderConfig<S>) -> Self {
80        Self {
81            store: config.store,
82            chunk_size: config.chunk_size,
83            max_links: config.max_links,
84            encrypted: config.encrypted,
85        }
86    }
87
88    /// Check if encryption is enabled
89    pub fn is_encrypted(&self) -> bool {
90        self.encrypted
91    }
92
93    /// Store a blob directly (small data, no encryption)
94    /// Returns the content hash
95    pub async fn put_blob(&self, data: &[u8]) -> Result<Hash, BuilderError> {
96        let hash = sha256(data);
97        self.store
98            .put(hash, data.to_vec())
99            .await
100            .map_err(|e| BuilderError::Store(e.to_string()))?;
101        Ok(hash)
102    }
103
104    /// Store a chunk with optional encryption
105    /// Returns (hash, optional_key) where hash is of stored data
106    async fn put_chunk_internal(
107        &self,
108        data: &[u8],
109    ) -> Result<(Hash, Option<EncryptionKey>), BuilderError> {
110        if self.encrypted {
111            let (encrypted, key) =
112                encrypt_chk(data).map_err(|e| BuilderError::Encryption(e.to_string()))?;
113            let hash = sha256(&encrypted);
114            self.store
115                .put(hash, encrypted)
116                .await
117                .map_err(|e| BuilderError::Store(e.to_string()))?;
118            Ok((hash, Some(key)))
119        } else {
120            let hash = self.put_blob(data).await?;
121            Ok((hash, None))
122        }
123    }
124
125    /// Store a file, chunking if necessary
126    /// Returns (Cid, size) where Cid contains hash and optional encryption key
127    ///
128    /// When encryption is enabled (default), each chunk is CHK encrypted
129    /// and the result contains the decryption key.
130    pub async fn put(&self, data: &[u8]) -> Result<(Cid, u64), BuilderError> {
131        let size = data.len() as u64;
132
133        // Small file - store as single chunk
134        if data.len() <= self.chunk_size {
135            let (hash, key) = self.put_chunk_internal(data).await?;
136            return Ok((Cid { hash, key }, size));
137        }
138
139        // Large file - chunk it
140        let mut links: Vec<Link> = Vec::new();
141        let mut offset = 0;
142
143        while offset < data.len() {
144            let end = (offset + self.chunk_size).min(data.len());
145            let chunk = &data[offset..end];
146            let chunk_size = chunk.len() as u64;
147            let (hash, key) = self.put_chunk_internal(chunk).await?;
148            links.push(Link {
149                hash,
150                name: None,
151                size: chunk_size,
152                key,
153                link_type: LinkType::Blob, // leaf chunk
154                meta: None,
155            });
156            offset = end;
157        }
158
159        // Build tree from chunks
160        let (root_hash, root_key) = self.build_tree_internal(links, Some(size)).await?;
161
162        Ok((
163            Cid {
164                hash: root_hash,
165                key: root_key,
166            },
167            size,
168        ))
169    }
170
171    /// Build tree and return (hash, optional_key)
172    /// When encrypted, tree nodes are also CHK encrypted
173    async fn build_tree_internal(
174        &self,
175        links: Vec<Link>,
176        total_size: Option<u64>,
177    ) -> Result<(Hash, Option<[u8; 32]>), BuilderError> {
178        // Single link with matching size - return directly
179        if links.len() == 1 {
180            if let Some(ts) = total_size {
181                if links[0].size == ts {
182                    return Ok((links[0].hash, links[0].key));
183                }
184            }
185        }
186
187        if links.len() <= self.max_links {
188            let node = TreeNode {
189                node_type: LinkType::File,
190                links,
191            };
192            let (data, _) = encode_and_hash(&node)?;
193
194            if self.encrypted {
195                let (encrypted, key) =
196                    encrypt_chk(&data).map_err(|e| BuilderError::Encryption(e.to_string()))?;
197                let hash = sha256(&encrypted);
198                self.store
199                    .put(hash, encrypted)
200                    .await
201                    .map_err(|e| BuilderError::Store(e.to_string()))?;
202                return Ok((hash, Some(key)));
203            }
204
205            // Unencrypted path
206            let hash = sha256(&data);
207            self.store
208                .put(hash, data)
209                .await
210                .map_err(|e| BuilderError::Store(e.to_string()))?;
211            return Ok((hash, None));
212        }
213
214        // Too many links - create subtrees
215        let mut sub_links = Vec::new();
216        for batch in links.chunks(self.max_links) {
217            let batch_size: u64 = batch.iter().map(|l| l.size).sum();
218            let (hash, key) =
219                Box::pin(self.build_tree_internal(batch.to_vec(), Some(batch_size))).await?;
220            sub_links.push(Link {
221                hash,
222                name: None,
223                size: batch_size,
224                key,
225                link_type: LinkType::File, // subtree
226                meta: None,
227            });
228        }
229
230        Box::pin(self.build_tree_internal(sub_links, total_size)).await
231    }
232
233    /// Build a balanced tree from links
234    /// Handles fanout by creating intermediate nodes
235    #[allow(dead_code)]
236    async fn build_tree(
237        &self,
238        links: Vec<Link>,
239        total_size: Option<u64>,
240    ) -> Result<Hash, BuilderError> {
241        // Single link with matching size - return it directly
242        if links.len() == 1 {
243            if let Some(ts) = total_size {
244                if links[0].size == ts {
245                    return Ok(links[0].hash);
246                }
247            }
248        }
249
250        // Fits in one node
251        if links.len() <= self.max_links {
252            let node = TreeNode {
253                node_type: LinkType::File,
254                links,
255            };
256            let (data, hash) = encode_and_hash(&node)?;
257            self.store
258                .put(hash, data)
259                .await
260                .map_err(|e| BuilderError::Store(e.to_string()))?;
261            return Ok(hash);
262        }
263
264        // Need to split into sub-trees
265        let mut sub_trees: Vec<Link> = Vec::new();
266
267        for batch in links.chunks(self.max_links) {
268            let batch_size: u64 = batch.iter().map(|l| l.size).sum();
269
270            let node = TreeNode {
271                node_type: LinkType::File,
272                links: batch.to_vec(),
273            };
274            let (data, hash) = encode_and_hash(&node)?;
275            self.store
276                .put(hash, data)
277                .await
278                .map_err(|e| BuilderError::Store(e.to_string()))?;
279
280            sub_trees.push(Link {
281                hash,
282                name: None,
283                size: batch_size,
284                key: None,
285                link_type: LinkType::File, // subtree
286                meta: None,
287            });
288        }
289
290        // Recursively build parent level
291        Box::pin(self.build_tree(sub_trees, total_size)).await
292    }
293
294    /// Build a directory from entries
295    /// Entries can be files or subdirectories
296    pub async fn put_directory(&self, entries: Vec<DirEntry>) -> Result<Hash, BuilderError> {
297        // Sort entries by name for deterministic hashing
298        let mut sorted = entries;
299        sorted.sort_by(|a, b| a.name.cmp(&b.name));
300
301        let links: Vec<Link> = sorted
302            .into_iter()
303            .map(|e| Link {
304                hash: e.hash,
305                name: Some(e.name),
306                size: e.size,
307                key: e.key,
308                link_type: e.link_type,
309                meta: e.meta,
310            })
311            .collect();
312
313        let total_size: u64 = links.iter().map(|l| l.size).sum();
314
315        // Fits in one node
316        if links.len() <= self.max_links {
317            let node = TreeNode {
318                node_type: LinkType::Dir,
319                links,
320            };
321            let (data, hash) = encode_and_hash(&node)?;
322            self.store
323                .put(hash, data)
324                .await
325                .map_err(|e| BuilderError::Store(e.to_string()))?;
326            return Ok(hash);
327        }
328
329        // Large directory - create sub-trees
330        // Group by first character for balanced distribution
331        let mut groups: HashMap<char, Vec<Link>> = HashMap::new();
332
333        for link in &links {
334            let key = link
335                .name
336                .as_ref()
337                .and_then(|n| n.chars().next())
338                .map(|c| c.to_ascii_lowercase())
339                .unwrap_or('\0');
340            groups.entry(key).or_default().push(link.clone());
341        }
342
343        // If groups are still too large, split numerically
344        let max_group_size = groups.values().map(|g| g.len()).max().unwrap_or(0);
345        if groups.len() == 1 || max_group_size > self.max_links {
346            return self.build_directory_by_chunks(links, total_size).await;
347        }
348
349        // Build sub-tree for each group
350        let mut sub_dirs: Vec<DirEntry> = Vec::new();
351        let mut sorted_groups: Vec<_> = groups.into_iter().collect();
352        sorted_groups.sort_by(|a, b| a.0.cmp(&b.0));
353
354        for (key, group_links) in sorted_groups {
355            let group_size: u64 = group_links.iter().map(|l| l.size).sum();
356
357            if group_links.len() <= self.max_links {
358                let node = TreeNode {
359                    node_type: LinkType::Dir,
360                    links: group_links,
361                };
362                let (data, hash) = encode_and_hash(&node)?;
363                self.store
364                    .put(hash, data)
365                    .await
366                    .map_err(|e| BuilderError::Store(e.to_string()))?;
367                sub_dirs.push(DirEntry {
368                    name: format!("_{}", key),
369                    hash,
370                    size: group_size,
371                    key: None,
372                    link_type: LinkType::Dir, // Internal chunk node
373                    meta: None,
374                });
375            } else {
376                // Recursively split this group
377                let hash = self
378                    .build_directory_by_chunks(group_links, group_size)
379                    .await?;
380                sub_dirs.push(DirEntry {
381                    name: format!("_{}", key),
382                    hash,
383                    size: group_size,
384                    key: None,
385                    link_type: LinkType::Dir, // Internal chunk node
386                    meta: None,
387                });
388            }
389        }
390
391        Box::pin(self.put_directory(sub_dirs)).await
392    }
393
394    /// Split directory into numeric chunks when grouping doesn't help
395    async fn build_directory_by_chunks(
396        &self,
397        links: Vec<Link>,
398        total_size: u64,
399    ) -> Result<Hash, BuilderError> {
400        let mut sub_trees: Vec<Link> = Vec::new();
401
402        for (i, batch) in links.chunks(self.max_links).enumerate() {
403            let batch_size: u64 = batch.iter().map(|l| l.size).sum();
404
405            let node = TreeNode {
406                node_type: LinkType::Dir,
407                links: batch.to_vec(),
408            };
409            let (data, hash) = encode_and_hash(&node)?;
410            self.store
411                .put(hash, data)
412                .await
413                .map_err(|e| BuilderError::Store(e.to_string()))?;
414
415            sub_trees.push(Link {
416                hash,
417                name: Some(format!("_chunk_{}", i * self.max_links)),
418                size: batch_size,
419                key: None,
420                link_type: LinkType::Dir, // Internal chunk node
421                meta: None,
422            });
423        }
424
425        if sub_trees.len() <= self.max_links {
426            let node = TreeNode {
427                node_type: LinkType::Dir,
428                links: sub_trees,
429            };
430            let (data, hash) = encode_and_hash(&node)?;
431            self.store
432                .put(hash, data)
433                .await
434                .map_err(|e| BuilderError::Store(e.to_string()))?;
435            return Ok(hash);
436        }
437
438        // Recursively build more levels
439        Box::pin(self.build_directory_by_chunks(sub_trees, total_size)).await
440    }
441
442    /// Create a tree node
443    pub async fn put_tree_node(&self, links: Vec<Link>) -> Result<Hash, BuilderError> {
444        let node = TreeNode {
445            node_type: LinkType::Dir,
446            links,
447        };
448
449        let (data, hash) = encode_and_hash(&node)?;
450        self.store
451            .put(hash, data)
452            .await
453            .map_err(|e| BuilderError::Store(e.to_string()))?;
454        Ok(hash)
455    }
456}
457
458/// StreamBuilder - supports incremental appends
459pub struct StreamBuilder<S: Store> {
460    store: Arc<S>,
461    chunk_size: usize,
462    max_links: usize,
463
464    // Current partial chunk being built
465    buffer: Vec<u8>,
466
467    // Completed chunks
468    chunks: Vec<Link>,
469    total_size: u64,
470}
471
472impl<S: Store> StreamBuilder<S> {
473    pub fn new(config: BuilderConfig<S>) -> Self {
474        Self {
475            store: config.store,
476            chunk_size: config.chunk_size,
477            max_links: config.max_links,
478            buffer: Vec::with_capacity(config.chunk_size),
479            chunks: Vec::new(),
480            total_size: 0,
481        }
482    }
483
484    /// Append data to the stream
485    pub async fn append(&mut self, data: &[u8]) -> Result<(), BuilderError> {
486        let mut offset = 0;
487
488        while offset < data.len() {
489            let space = self.chunk_size - self.buffer.len();
490            let to_write = space.min(data.len() - offset);
491
492            self.buffer
493                .extend_from_slice(&data[offset..offset + to_write]);
494            offset += to_write;
495
496            // Flush full chunk
497            if self.buffer.len() == self.chunk_size {
498                self.flush_chunk().await?;
499            }
500        }
501
502        self.total_size += data.len() as u64;
503        Ok(())
504    }
505
506    /// Flush current buffer as a chunk
507    async fn flush_chunk(&mut self) -> Result<(), BuilderError> {
508        if self.buffer.is_empty() {
509            return Ok(());
510        }
511
512        let chunk = std::mem::take(&mut self.buffer);
513        let hash = sha256(&chunk);
514        self.store
515            .put(hash, chunk.clone())
516            .await
517            .map_err(|e| BuilderError::Store(e.to_string()))?;
518
519        self.chunks.push(Link {
520            hash,
521            name: None,
522            size: chunk.len() as u64,
523            key: None,
524            link_type: LinkType::Blob, // Leaf chunk (raw blob)
525            meta: None,
526        });
527
528        self.buffer = Vec::with_capacity(self.chunk_size);
529        Ok(())
530    }
531
532    /// Get current root hash without finalizing
533    /// Useful for checkpoints
534    pub async fn current_root(&mut self) -> Result<Option<Hash>, BuilderError> {
535        if self.chunks.is_empty() && self.buffer.is_empty() {
536            return Ok(None);
537        }
538
539        // Temporarily include buffer
540        let mut temp_chunks = self.chunks.clone();
541        if !self.buffer.is_empty() {
542            let chunk = self.buffer.clone();
543            let hash = sha256(&chunk);
544            self.store
545                .put(hash, chunk.clone())
546                .await
547                .map_err(|e| BuilderError::Store(e.to_string()))?;
548            temp_chunks.push(Link {
549                hash,
550                name: None,
551                size: chunk.len() as u64,
552                key: None,
553                link_type: LinkType::Blob, // Leaf chunk (raw blob)
554                meta: None,
555            });
556        }
557
558        let hash = self
559            .build_tree_from_chunks(&temp_chunks, self.total_size)
560            .await?;
561        Ok(Some(hash))
562    }
563
564    /// Finalize the stream and return root hash
565    pub async fn finalize(mut self) -> Result<(Hash, u64), BuilderError> {
566        // Flush remaining buffer
567        self.flush_chunk().await?;
568
569        if self.chunks.is_empty() {
570            // Empty stream - return hash of empty data
571            let empty_hash = sha256(&[]);
572            self.store
573                .put(empty_hash, vec![])
574                .await
575                .map_err(|e| BuilderError::Store(e.to_string()))?;
576            return Ok((empty_hash, 0));
577        }
578
579        let hash = self
580            .build_tree_from_chunks(&self.chunks, self.total_size)
581            .await?;
582        Ok((hash, self.total_size))
583    }
584
585    /// Build balanced tree from chunks
586    async fn build_tree_from_chunks(
587        &self,
588        chunks: &[Link],
589        total_size: u64,
590    ) -> Result<Hash, BuilderError> {
591        if chunks.len() == 1 {
592            return Ok(chunks[0].hash);
593        }
594
595        if chunks.len() <= self.max_links {
596            let node = TreeNode {
597                node_type: LinkType::File,
598                links: chunks.to_vec(),
599            };
600            let (data, hash) = encode_and_hash(&node)?;
601            self.store
602                .put(hash, data)
603                .await
604                .map_err(|e| BuilderError::Store(e.to_string()))?;
605            return Ok(hash);
606        }
607
608        // Build intermediate level
609        let mut sub_trees: Vec<Link> = Vec::new();
610        for batch in chunks.chunks(self.max_links) {
611            let batch_size: u64 = batch.iter().map(|l| l.size).sum();
612
613            let node = TreeNode {
614                node_type: LinkType::File,
615                links: batch.to_vec(),
616            };
617            let (data, hash) = encode_and_hash(&node)?;
618            self.store
619                .put(hash, data)
620                .await
621                .map_err(|e| BuilderError::Store(e.to_string()))?;
622
623            sub_trees.push(Link {
624                hash,
625                name: None,
626                size: batch_size,
627                key: None,
628                link_type: LinkType::File, // Internal tree node
629                meta: None,
630            });
631        }
632
633        Box::pin(self.build_tree_from_chunks(&sub_trees, total_size)).await
634    }
635
636    /// Get stats
637    pub fn stats(&self) -> StreamStats {
638        StreamStats {
639            chunks: self.chunks.len(),
640            buffered: self.buffer.len(),
641            total_size: self.total_size,
642        }
643    }
644}
645
646#[derive(Debug, Clone, PartialEq)]
647pub struct StreamStats {
648    pub chunks: usize,
649    pub buffered: usize,
650    pub total_size: u64,
651}
652
653/// Builder error type
654#[derive(Debug, thiserror::Error)]
655pub enum BuilderError {
656    #[error("Store error: {0}")]
657    Store(String),
658    #[error("Codec error: {0}")]
659    Codec(#[from] crate::codec::CodecError),
660    #[error("Encryption error: {0}")]
661    Encryption(String),
662}
663
664#[cfg(test)]
665mod tests {
666    use super::*;
667    use crate::store::MemoryStore;
668    use crate::types::to_hex;
669
670    fn make_store() -> Arc<MemoryStore> {
671        Arc::new(MemoryStore::new())
672    }
673
674    #[tokio::test]
675    async fn test_put_blob() {
676        let store = make_store();
677        let builder = TreeBuilder::new(BuilderConfig::new(store.clone()));
678
679        let data = vec![1u8, 2, 3, 4, 5];
680        let hash = builder.put_blob(&data).await.unwrap();
681
682        assert_eq!(hash.len(), 32);
683        assert!(store.has(&hash).await.unwrap());
684
685        let retrieved = store.get(&hash).await.unwrap();
686        assert_eq!(retrieved, Some(data));
687    }
688
689    #[tokio::test]
690    async fn test_put_blob_correct_hash() {
691        let store = make_store();
692        let builder = TreeBuilder::new(BuilderConfig::new(store));
693
694        let data = vec![1u8, 2, 3];
695        let hash = builder.put_blob(&data).await.unwrap();
696        let expected_hash = sha256(&data);
697
698        assert_eq!(to_hex(&hash), to_hex(&expected_hash));
699    }
700
701    #[tokio::test]
702    async fn test_put_small() {
703        let store = make_store();
704        // Use public() to disable encryption for this test
705        let builder = TreeBuilder::new(BuilderConfig::new(store.clone()).public());
706
707        let data = vec![1u8, 2, 3, 4, 5];
708        let (cid, size) = builder.put(&data).await.unwrap();
709
710        assert_eq!(size, 5);
711        assert!(cid.key.is_none()); // public content
712        let retrieved = store.get(&cid.hash).await.unwrap();
713        assert_eq!(retrieved, Some(data));
714    }
715
716    #[tokio::test]
717    async fn test_put_chunked() {
718        let store = make_store();
719        let config = BuilderConfig::new(store.clone())
720            .with_chunk_size(1024)
721            .public();
722        let builder = TreeBuilder::new(config);
723
724        let mut data = vec![0u8; 1024 * 2 + 100];
725        for i in 0..data.len() {
726            data[i] = (i % 256) as u8;
727        }
728
729        let (_cid, size) = builder.put(&data).await.unwrap();
730        assert_eq!(size, data.len() as u64);
731
732        // Verify store has multiple items (chunks + tree node)
733        assert!(store.size() > 1);
734    }
735
736    #[tokio::test]
737    async fn test_put_directory() {
738        let store = make_store();
739        let builder = TreeBuilder::new(BuilderConfig::new(store.clone()));
740
741        let file1 = vec![1u8, 2, 3];
742        let file2 = vec![4u8, 5, 6, 7];
743
744        let hash1 = builder.put_blob(&file1).await.unwrap();
745        let hash2 = builder.put_blob(&file2).await.unwrap();
746
747        let dir_hash = builder
748            .put_directory(vec![
749                DirEntry::new("a.txt", hash1).with_size(file1.len() as u64),
750                DirEntry::new("b.txt", hash2).with_size(file2.len() as u64),
751            ])
752            .await
753            .unwrap();
754
755        assert!(store.has(&dir_hash).await.unwrap());
756    }
757
758    #[tokio::test]
759    async fn test_put_directory_sorted() {
760        let store = make_store();
761        let builder = TreeBuilder::new(BuilderConfig::new(store.clone()));
762
763        let hash = builder.put_blob(&[1u8]).await.unwrap();
764
765        let dir_hash = builder
766            .put_directory(vec![
767                DirEntry::new("zebra", hash),
768                DirEntry::new("apple", hash),
769                DirEntry::new("mango", hash),
770            ])
771            .await
772            .unwrap();
773
774        let data = store.get(&dir_hash).await.unwrap().unwrap();
775        let node = crate::codec::decode_tree_node(&data).unwrap();
776
777        let names: Vec<_> = node.links.iter().filter_map(|l| l.name.clone()).collect();
778        assert_eq!(names, vec!["apple", "mango", "zebra"]);
779    }
780
781    #[tokio::test]
782    async fn test_put_tree_node_with_link_meta() {
783        let store = make_store();
784        let builder = TreeBuilder::new(BuilderConfig::new(store.clone()));
785
786        let hash = builder.put_blob(&[1u8]).await.unwrap();
787
788        let mut meta = HashMap::new();
789        meta.insert("version".to_string(), serde_json::json!(2));
790        meta.insert("created".to_string(), serde_json::json!("2024-01-01"));
791
792        let node_hash = builder
793            .put_tree_node(vec![Link {
794                hash,
795                name: Some("test".to_string()),
796                size: 1,
797                key: None,
798                link_type: LinkType::Blob,
799                meta: Some(meta.clone()),
800            }])
801            .await
802            .unwrap();
803
804        let data = store.get(&node_hash).await.unwrap().unwrap();
805        let node = crate::codec::decode_tree_node(&data).unwrap();
806
807        assert!(node.links[0].meta.is_some());
808        let m = node.links[0].meta.as_ref().unwrap();
809        assert_eq!(m.get("version"), Some(&serde_json::json!(2)));
810    }
811
812    #[tokio::test]
813    async fn test_stream_builder() {
814        let store = make_store();
815        let config = BuilderConfig::new(store.clone()).with_chunk_size(100);
816        let mut stream = StreamBuilder::new(config);
817
818        stream.append(&[1u8, 2, 3]).await.unwrap();
819        stream.append(&[4u8, 5]).await.unwrap();
820        stream.append(&[6u8, 7, 8, 9]).await.unwrap();
821
822        let (hash, size) = stream.finalize().await.unwrap();
823
824        assert_eq!(size, 9);
825        assert!(store.has(&hash).await.unwrap());
826    }
827
828    #[tokio::test]
829    async fn test_stream_stats() {
830        let store = make_store();
831        let config = BuilderConfig::new(store).with_chunk_size(100);
832        let mut stream = StreamBuilder::new(config);
833
834        assert_eq!(stream.stats().chunks, 0);
835        assert_eq!(stream.stats().buffered, 0);
836        assert_eq!(stream.stats().total_size, 0);
837
838        stream.append(&[0u8; 50]).await.unwrap();
839        assert_eq!(stream.stats().buffered, 50);
840        assert_eq!(stream.stats().total_size, 50);
841
842        stream.append(&[0u8; 60]).await.unwrap(); // Crosses boundary
843        assert_eq!(stream.stats().chunks, 1);
844        assert_eq!(stream.stats().buffered, 10);
845        assert_eq!(stream.stats().total_size, 110);
846    }
847
848    #[tokio::test]
849    async fn test_stream_current_root() {
850        let store = make_store();
851        let config = BuilderConfig::new(store).with_chunk_size(100);
852        let mut stream = StreamBuilder::new(config);
853
854        stream.append(&[1u8, 2, 3]).await.unwrap();
855        let root1 = stream.current_root().await.unwrap();
856
857        stream.append(&[4u8, 5, 6]).await.unwrap();
858        let root2 = stream.current_root().await.unwrap();
859
860        // Roots should be different
861        assert_ne!(to_hex(&root1.unwrap()), to_hex(&root2.unwrap()));
862    }
863
864    #[tokio::test]
865    async fn test_stream_empty() {
866        let store = make_store();
867        let config = BuilderConfig::new(store.clone());
868        let stream = StreamBuilder::new(config);
869
870        let (hash, size) = stream.finalize().await.unwrap();
871        assert_eq!(size, 0);
872        assert!(store.has(&hash).await.unwrap());
873    }
874
875    #[tokio::test]
876    async fn test_unified_put_public() {
877        let store = make_store();
878        // Use .public() to disable encryption
879        let config = BuilderConfig::new(store.clone()).public();
880        let builder = TreeBuilder::new(config);
881
882        let data = b"Hello, World!";
883        let (cid, size) = builder.put(data).await.unwrap();
884
885        assert_eq!(size, data.len() as u64);
886        assert!(cid.key.is_none()); // No encryption key for public content
887        assert!(store.has(&cid.hash).await.unwrap());
888    }
889
890    #[tokio::test]
891    async fn test_unified_put_encrypted() {
892        use crate::reader::TreeReader;
893
894        let store = make_store();
895        // Default config has encryption enabled
896        let config = BuilderConfig::new(store.clone());
897        let builder = TreeBuilder::new(config);
898
899        let data = b"Hello, encrypted world!";
900        let (cid, size) = builder.put(data).await.unwrap();
901
902        assert_eq!(size, data.len() as u64);
903        assert!(cid.key.is_some()); // Has encryption key
904
905        // Verify we can read it back
906        let reader = TreeReader::new(store);
907        let retrieved = reader.get(&cid).await.unwrap().unwrap();
908        assert_eq!(retrieved, data);
909    }
910
911    #[tokio::test]
912    async fn test_unified_put_encrypted_chunked() {
913        use crate::reader::TreeReader;
914
915        let store = make_store();
916        let config = BuilderConfig::new(store.clone()).with_chunk_size(100);
917        let builder = TreeBuilder::new(config);
918
919        // Data larger than chunk size
920        let data: Vec<u8> = (0..500).map(|i| (i % 256) as u8).collect();
921        let (cid, size) = builder.put(&data).await.unwrap();
922
923        assert_eq!(size, data.len() as u64);
924        assert!(cid.key.is_some());
925
926        // Verify roundtrip
927        let reader = TreeReader::new(store);
928        let retrieved = reader.get(&cid).await.unwrap().unwrap();
929        assert_eq!(retrieved, data);
930    }
931
932    #[tokio::test]
933    async fn test_cid_deterministic() {
934        let store = make_store();
935        let config = BuilderConfig::new(store.clone());
936        let builder = TreeBuilder::new(config);
937
938        let data = b"Same content produces same CID";
939
940        let (cid1, _) = builder.put(data).await.unwrap();
941        let (cid2, _) = builder.put(data).await.unwrap();
942
943        // CHK: same content = same hash AND same key
944        assert_eq!(cid1.hash, cid2.hash);
945        assert_eq!(cid1.key, cid2.key);
946        assert_eq!(cid1.to_string(), cid2.to_string());
947    }
948}