Skip to main content

aleph_cid/
folder_hash.rs

1//! Local UnixFS DAG construction for folder uploads.
2//!
3//! Computes the same root CID kubo produces via `ipfs add -r`, given the
4//! same flat list of file paths. Used by `AlephClient::upload_folder_to_ipfs`
5//! to verify that the IPFS gateway returns the expected CID after upload.
6//!
7//! Two public entry points share the same walk:
8//! `hash_folder_root` discards the block bytes and returns just the root
9//! CID; `build_folder_dag` invokes a caller-supplied sink for each
10//! `(cid_bytes, block_bytes)` pair as the walk progresses, which the
11//! authenticated CAR upload path uses to stream the DAG into a tempfile.
12//!
13//! # Test goldens
14//!
15//! Golden CIDs in `tests/folder_hash.rs` are regenerated against real kubo
16//! via `tests/regen-folder-hash-goldens.sh`. Run it after any change that
17//! could affect output bytes.
18
19use crate::cid::Cid;
20use crate::proto::{merkledag, unixfs};
21use crate::verify::{DAG_PB_CODEC, DagNode, Hasher, encode_multihash, encode_pbnode_canonical};
22use crate::{CidVersion, FolderEntry, UploadFolderOptions};
23use prost::Message;
24use sha2::{Digest, Sha256};
25use std::collections::BTreeMap;
26use std::fs::File;
27use std::io::Read;
28use std::path::PathBuf;
29
30#[derive(Debug, thiserror::Error)]
31pub enum FolderHashError {
32    #[error("I/O error reading {path}: {source}")]
33    Io {
34        path: PathBuf,
35        #[source]
36        source: std::io::Error,
37    },
38    #[error("HAMT recursion exceeded 8 levels (impossible hash collision)")]
39    HamtDepthExceeded,
40    #[error("sink error: {0}")]
41    Sink(#[source] std::io::Error),
42}
43
44/// Visitor sink for `build_folder_dag`.
45///
46/// Called once per emitted block as `(cid_bytes, block_bytes)`. For dag-pb
47/// nodes `block_bytes` is the canonical pbnode encoding; for raw leaves it is
48/// the raw chunk. The root block is always emitted last.
49pub type BlockSink<'a> = dyn FnMut(&[u8], &[u8]) -> std::io::Result<()> + 'a;
50
51/// A node in the in-memory folder tree, before hashing.
52#[derive(Debug)]
53enum TreeNode {
54    File(PathBuf),
55    Dir(BTreeMap<String, TreeNode>),
56}
57
58/// Build a nested tree from a flat list of `FolderEntry` whose `relative_path`
59/// is forward-slash separated. The returned map represents the root directory.
60fn build_tree(entries: &[FolderEntry]) -> BTreeMap<String, TreeNode> {
61    let mut root: BTreeMap<String, TreeNode> = BTreeMap::new();
62    for entry in entries {
63        let mut cursor = &mut root;
64        let segments: Vec<&str> = entry.relative_path.split('/').collect();
65        let (last, parents) = segments.split_last().expect("non-empty relative path");
66        for part in parents {
67            let next = cursor
68                .entry((*part).to_string())
69                .or_insert_with(|| TreeNode::Dir(BTreeMap::new()));
70            cursor = match next {
71                TreeNode::Dir(map) => map,
72                TreeNode::File(_) => {
73                    panic!(
74                        "folder tree: {} is both a file and a directory prefix",
75                        part
76                    );
77                }
78            };
79        }
80        cursor.insert(
81            (*last).to_string(),
82            TreeNode::File(entry.absolute_path.clone()),
83        );
84    }
85    root
86}
87
88/// One entry to attach under a directory or HAMT node.
89#[derive(Debug, Clone)]
90pub(crate) struct ChildLink {
91    pub name: String,
92    /// CID bytes as stored in PBLink.Hash. CIDv0: bare multihash. CIDv1: full binary.
93    pub cid_bytes: Vec<u8>,
94    pub cumulative_size: u64,
95}
96
97/// Wrap a sha2-256 multihash as either a CIDv1 dag-pb binary (version byte
98/// 0x01 + codec 0x70 + multihash) or a bare CIDv0 multihash.
99fn build_cid_bytes(multihash: Vec<u8>, cid_v1: bool) -> Vec<u8> {
100    if cid_v1 {
101        let mut cid = Vec::with_capacity(2 + multihash.len());
102        cid.push(0x01);
103        cid.push(DAG_PB_CODEC as u8);
104        cid.extend_from_slice(&multihash);
105        cid
106    } else {
107        multihash
108    }
109}
110
111/// Build a plain UnixFS Directory node from a list of children.
112///
113/// `cid_v1`: when `true`, returns a CIDv1 dag-pb CID (codec 0x70). When `false`,
114/// returns a bare multihash (CIDv0).
115///
116/// Children must already be sorted by `name` (callers use a `BTreeMap` so this
117/// is automatic).
118// .expect() is safe here: the only fallible path in the sinkful variant is
119// the sink itself, and a no-op sink never errors. (build_hamt_root cannot
120// take the same shape because its sinkful variant can also fail with
121// HamtDepthExceeded.)
122#[cfg_attr(not(test), allow(dead_code))]
123pub(crate) fn build_plain_directory(children: &[ChildLink], cid_v1: bool) -> DagNode {
124    build_plain_directory_with_sink(children, cid_v1, &mut |_, _| Ok(()))
125        .expect("no-op sink cannot fail")
126}
127
128fn build_plain_directory_with_sink(
129    children: &[ChildLink],
130    cid_v1: bool,
131    sink: &mut BlockSink<'_>,
132) -> Result<DagNode, FolderHashError> {
133    let links: Vec<merkledag::PbLink> = children
134        .iter()
135        .map(|c| merkledag::PbLink {
136            hash: Some(c.cid_bytes.clone()),
137            name: Some(c.name.clone()),
138            tsize: Some(c.cumulative_size),
139        })
140        .collect();
141
142    let dir_data = unixfs::Data {
143        r#type: unixfs::DataType::Directory as i32,
144        data: None,
145        filesize: None,
146        blocksizes: vec![],
147        hash_type: None,
148        fanout: None,
149    };
150    let mut dir_bytes = Vec::new();
151    dir_data
152        .encode(&mut dir_bytes)
153        .expect("protobuf encoding cannot fail");
154
155    let node = merkledag::PbNode {
156        links,
157        data: Some(dir_bytes),
158    };
159    let node_bytes = encode_pbnode_canonical(&node);
160    let digest = Sha256::digest(&node_bytes);
161    let mh = encode_multihash(&digest);
162    let cid_bytes = build_cid_bytes(mh, cid_v1);
163
164    let node_size = node_bytes.len() as u64;
165    let children_cumulative: u64 = children.iter().map(|c| c.cumulative_size).sum();
166
167    let dag_node = DagNode {
168        cid_bytes,
169        cumulative_size: node_size + children_cumulative,
170        data_size: 0, // directories don't expose a payload size
171    };
172    sink(&dag_node.cid_bytes, &node_bytes).map_err(FolderHashError::Sink)?;
173    Ok(dag_node)
174}
175
176/// Kubo's HAMT sharding threshold.
177///
178/// Source: `github.com/ipfs/boxo/ipld/unixfs/io.HAMTShardingSize = 256 * 1024`.
179const HAMT_SHARDING_SIZE: usize = 256 * 1024; // 262144 bytes
180
181/// Estimate the bare-directory size as kubo does.
182///
183/// Source: boxo `io/directory.go` `productionLinkSize`:
184/// `len(linkName) + linkCid.ByteLen()` — name bytes plus the binary CID size.
185pub(crate) fn estimated_dir_size(children: &[ChildLink]) -> usize {
186    children
187        .iter()
188        .map(|c| c.name.len() + c.cid_bytes.len())
189        .sum()
190}
191
192/// Returns true when the directory should be encoded as a HAMT shard.
193pub(crate) fn should_shard(children: &[ChildLink]) -> bool {
194    estimated_dir_size(children) >= HAMT_SHARDING_SIZE
195}
196
197/// HAMT bitfield: set the bit for slot index `slot`.
198///
199/// Bit ordering matches go-unixfs HAMT:
200/// - Bits within each byte are LSB-first (slot % 8 = 0 → bit 0, i.e. 0x01).
201/// - Bytes are stored in reverse order: byte at serialized index 0 holds the
202///   highest-numbered slots (slots 248–255), byte at index 31 holds slots 0–7.
203///   Equivalently: serialized_byte_index = 31 - (slot / 8).
204pub(crate) fn set_bit(bf: &mut [u8; 32], slot: u8) {
205    let byte = 31 - (slot / 8) as usize;
206    let bit = slot % 8;
207    bf[byte] |= 1 << bit;
208}
209
210/// Test a HAMT bitfield bit. Same ordering as `set_bit`.
211#[cfg(test)]
212pub(crate) fn test_bit(bf: &[u8; 32], slot: u8) -> bool {
213    let byte = 31 - (slot / 8) as usize;
214    let bit = slot % 8;
215    (bf[byte] & (1 << bit)) != 0
216}
217
218/// HAMT fanout: 256 buckets per shard.
219const HAMT_FANOUT: u32 = 256;
220
221/// UnixFS hashType code for murmur3-x64-64.
222const HAMT_HASH_TYPE: u64 = 0x22;
223
224/// Murmur3 seed used by go-ipfs HAMT.
225const HAMT_MURMUR_SEED: u32 = 0;
226
227/// Hash a directory entry name to a 64-bit murmur3 value (kubo-compatible).
228///
229/// Kubo uses spaolacci/murmur3 `Sum64`, which returns the LOW 64 bits of the
230/// x64_128 hash. We reproduce that by computing x64_128 and downcasting.
231fn hamt_hash_name(name: &str) -> u64 {
232    use murmur3::murmur3_x64_128;
233    use std::io::Cursor;
234    let h128 = murmur3_x64_128(&mut Cursor::new(name.as_bytes()), HAMT_MURMUR_SEED)
235        .expect("in-memory cursor read cannot fail");
236    h128 as u64
237}
238
239/// Extract the 8-bit slot index for HAMT level `level` from a 64-bit hash.
240///
241/// Kubo consumes bits from the HIGH end of the hash first — level 0 uses
242/// bits [56..64), level 1 uses bits [48..56), etc.
243///
244/// This matches go-unixfs `hashBits.Next(8)` which reads:
245///   `(h.val >> (maxHashLen - consumed)) & mask`
246/// where `maxHashLen = 64` and `consumed` advances by 8 per level.
247fn hamt_slot(hash: u64, level: u32) -> u8 {
248    ((hash >> (56 - 8 * level)) & 0xff) as u8
249}
250
251/// Build the root of a HAMT shard tree from `children`.
252#[cfg_attr(not(test), allow(dead_code))]
253pub(crate) fn build_hamt_root(
254    children: &[ChildLink],
255    cid_v1: bool,
256) -> Result<DagNode, FolderHashError> {
257    build_hamt_root_with_sink(children, cid_v1, &mut |_, _| Ok(()))
258}
259
260fn build_hamt_root_with_sink(
261    children: &[ChildLink],
262    cid_v1: bool,
263    sink: &mut BlockSink<'_>,
264) -> Result<DagNode, FolderHashError> {
265    let hashed: Vec<(u64, ChildLink)> = children
266        .iter()
267        .map(|c| (hamt_hash_name(&c.name), c.clone()))
268        .collect();
269    build_hamt_node_with_sink(&hashed, 0, cid_v1, sink)
270}
271
272fn build_hamt_node_with_sink(
273    entries: &[(u64, ChildLink)],
274    level: u32,
275    cid_v1: bool,
276    sink: &mut BlockSink<'_>,
277) -> Result<DagNode, FolderHashError> {
278    if level >= 8 {
279        return Err(FolderHashError::HamtDepthExceeded);
280    }
281
282    // Bucket entries by slot.
283    let mut buckets: [Vec<(u64, ChildLink)>; 256] = std::array::from_fn(|_| Vec::new());
284    for (h, c) in entries {
285        let slot = hamt_slot(*h, level) as usize;
286        buckets[slot].push((*h, c.clone()));
287    }
288
289    let mut bitfield = [0u8; 32];
290    let mut links: Vec<merkledag::PbLink> = Vec::new();
291    let mut children_cumulative: u64 = 0;
292
293    for (slot_idx, bucket) in buckets.iter().enumerate() {
294        if bucket.is_empty() {
295            continue;
296        }
297        set_bit(&mut bitfield, slot_idx as u8);
298
299        let (link_name, child_dag) = if bucket.len() == 1 {
300            // Leaf: link directly to the entry's CID with name "{slot_hex}{actual}".
301            let (_, c) = &bucket[0];
302            let name = format!("{:02X}{}", slot_idx, c.name);
303            (
304                name,
305                DagNode {
306                    cid_bytes: c.cid_bytes.clone(),
307                    cumulative_size: c.cumulative_size,
308                    data_size: 0,
309                },
310            )
311        } else {
312            // Subshard: recurse, link with name "{slot_hex}".
313            let sub = build_hamt_node_with_sink(bucket, level + 1, cid_v1, sink)?;
314            (format!("{slot_idx:02X}"), sub)
315        };
316
317        children_cumulative += child_dag.cumulative_size;
318        links.push(merkledag::PbLink {
319            hash: Some(child_dag.cid_bytes),
320            name: Some(link_name),
321            tsize: Some(child_dag.cumulative_size),
322        });
323    }
324
325    // Sort links by name to match kubo. Since prefix is fixed 2-hex-char
326    // width, this equals sort-by-slot.
327    links.sort_by(|a, b| a.name.cmp(&b.name));
328
329    // Serialize the bitfield like go-bitfield's Bytes(): trim leading zero bytes
330    // (bytes at low indices, which hold the highest-numbered slots).
331    let bf_vec: Vec<u8> = {
332        let start = bitfield.iter().position(|&b| b != 0).unwrap_or(32);
333        bitfield[start..].to_vec()
334    };
335
336    let unixfs_data = unixfs::Data {
337        r#type: unixfs::DataType::HamtShard as i32,
338        data: Some(bf_vec),
339        filesize: None,
340        blocksizes: vec![],
341        hash_type: Some(HAMT_HASH_TYPE),
342        fanout: Some(HAMT_FANOUT as u64),
343    };
344    let mut data_bytes = Vec::new();
345    unixfs_data
346        .encode(&mut data_bytes)
347        .expect("protobuf encoding cannot fail");
348
349    let node = merkledag::PbNode {
350        links,
351        data: Some(data_bytes),
352    };
353    let node_bytes = encode_pbnode_canonical(&node);
354    let digest = Sha256::digest(&node_bytes);
355    let mh = encode_multihash(&digest);
356    let cid_bytes = build_cid_bytes(mh, cid_v1);
357
358    let node_size = node_bytes.len() as u64;
359
360    let dag_node = DagNode {
361        cid_bytes,
362        cumulative_size: node_size + children_cumulative,
363        data_size: 0,
364    };
365    sink(&dag_node.cid_bytes, &node_bytes).map_err(FolderHashError::Sink)?;
366    Ok(dag_node)
367}
368
369/// Walk the folder DAG, emitting every block to `sink`. Returns the root CID.
370///
371/// `sink` is invoked once per emitted block as `(cid_bytes, block_bytes)`. For
372/// dag-pb nodes `block_bytes` is the canonical pbnode encoding; for raw leaves
373/// it is the raw chunk. The root block is emitted last.
374pub fn build_folder_dag(
375    entries: &[FolderEntry],
376    opts: &UploadFolderOptions,
377    sink: &mut BlockSink<'_>,
378) -> Result<Cid, FolderHashError> {
379    let tree = build_tree(entries);
380    let cid_v1 = matches!(opts.cid_version, CidVersion::V1);
381    let root = hash_dir_with_sink(&tree, cid_v1, sink)?;
382    Ok(cid_bytes_to_cid(&root.cid_bytes))
383}
384
385/// Build the local UnixFS root CID for `entries`, matching what kubo's
386/// HTTP `/api/v0/add?wrap-with-directory=true` produces given the same flat
387/// list of files via multipart.
388///
389/// This is a thin wrapper over `build_folder_dag` that discards block bytes.
390///
391/// # Symlinks
392///
393/// This function hashes only the byte content of `entries` — there is no
394/// notion of a UnixFS `Symlink` node. With `follow_symlinks=true` (the
395/// `collect_folder_files` default), symlinks are dereferenced by the walker
396/// and their target's bytes are uploaded as regular files. The resulting CID
397/// matches `ipfs add -r --no-symlinks` on the same source, NOT plain
398/// `ipfs add -r` (which would create UnixFS `Symlink` nodes for each
399/// symlink and produce a different CID).
400///
401/// This is a deliberate tradeoff that mirrors the multipart upload path; if
402/// symlink-preservation semantics are needed, both the walker and the upload
403/// would have to switch to `application/x-symlink` parts together.
404pub fn hash_folder_root(
405    entries: &[FolderEntry],
406    opts: &UploadFolderOptions,
407) -> Result<Cid, FolderHashError> {
408    build_folder_dag(entries, opts, &mut |_, _| Ok(()))
409}
410
411fn hash_dir_with_sink(
412    tree: &BTreeMap<String, TreeNode>,
413    cid_v1: bool,
414    sink: &mut BlockSink<'_>,
415) -> Result<DagNode, FolderHashError> {
416    let mut children: Vec<ChildLink> = Vec::with_capacity(tree.len());
417    for (name, node) in tree {
418        let dag = match node {
419            TreeNode::File(path) => hash_file_with_sink(path, cid_v1, sink)?,
420            TreeNode::Dir(sub) => hash_dir_with_sink(sub, cid_v1, sink)?,
421        };
422        children.push(ChildLink {
423            name: name.clone(),
424            cid_bytes: dag.cid_bytes,
425            cumulative_size: dag.cumulative_size,
426        });
427    }
428
429    if should_shard(&children) {
430        build_hamt_root_with_sink(&children, cid_v1, sink)
431    } else {
432        build_plain_directory_with_sink(&children, cid_v1, sink)
433    }
434}
435
436fn hash_file_with_sink(
437    path: &std::path::Path,
438    cid_v1: bool,
439    sink: &mut BlockSink<'_>,
440) -> Result<DagNode, FolderHashError> {
441    let mut hasher = if cid_v1 {
442        Hasher::for_ipfs_v1_raw_leaves()
443    } else {
444        Hasher::for_ipfs()
445    };
446
447    let mut f = File::open(path).map_err(|e| FolderHashError::Io {
448        path: path.to_path_buf(),
449        source: e,
450    })?;
451    let mut buf = vec![0u8; 64 * 1024];
452    loop {
453        let n = f.read(&mut buf).map_err(|e| FolderHashError::Io {
454            path: path.to_path_buf(),
455            source: e,
456        })?;
457        if n == 0 {
458            break;
459        }
460        hasher
461            .update_with_sink(&buf[..n], sink)
462            .map_err(FolderHashError::Sink)?;
463    }
464
465    // finalize_with_sink returns the root DagNode with the correct cumulative_size
466    // (i.e. the sum of all block sizes in the subtree, not just the raw file bytes).
467    // This is what kubo stores in PBLink.Tsize for the parent directory's link.
468    hasher
469        .finalize_with_sink(sink)
470        .map_err(FolderHashError::Sink)
471}
472
473fn cid_bytes_to_cid(bytes: &[u8]) -> Cid {
474    // CIDv1 binary starts with 0x01 (version byte); CIDv0 binary is a bare
475    // sha2-256 multihash starting with 0x12 (multihash code). Mirrors the
476    // byte-sniff in `verify::Hasher::finalize`.
477    let cid_str = if bytes.first() == Some(&0x01) {
478        let parsed = ::cid::Cid::try_from(bytes).expect("hash_folder_root produces valid CIDs");
479        parsed.to_string()
480    } else {
481        let mh = multihash::Multihash::<64>::from_bytes(bytes)
482            .expect("CIDv0 multihash bytes must parse");
483        let parsed = ::cid::Cid::new_v0(mh).expect("valid sha2-256 multihash");
484        parsed.to_string()
485    };
486    Cid::try_from(cid_str.as_str()).expect("round-trip CID parse")
487}
488
489#[cfg(test)]
490mod tests {
491    use super::*;
492
493    fn entry(rel: &str) -> FolderEntry {
494        FolderEntry {
495            relative_path: rel.to_string(),
496            absolute_path: PathBuf::from(format!("/abs/{rel}")),
497        }
498    }
499
500    #[test]
501    fn build_tree_flat() {
502        let entries = vec![entry("a.txt"), entry("b.txt")];
503        let tree = build_tree(&entries);
504        assert_eq!(tree.len(), 2);
505        assert!(matches!(tree.get("a.txt"), Some(TreeNode::File(_))));
506        assert!(matches!(tree.get("b.txt"), Some(TreeNode::File(_))));
507    }
508
509    #[test]
510    fn build_tree_nested() {
511        let entries = vec![
512            entry("top.txt"),
513            entry("sub/inner.txt"),
514            entry("sub/deeper/leaf.txt"),
515        ];
516        let tree = build_tree(&entries);
517        assert!(matches!(tree.get("top.txt"), Some(TreeNode::File(_))));
518        let sub = match tree.get("sub") {
519            Some(TreeNode::Dir(m)) => m,
520            _ => panic!("sub must be a Dir"),
521        };
522        assert!(matches!(sub.get("inner.txt"), Some(TreeNode::File(_))));
523        let deeper = match sub.get("deeper") {
524            Some(TreeNode::Dir(m)) => m,
525            _ => panic!("deeper must be a Dir"),
526        };
527        assert!(matches!(deeper.get("leaf.txt"), Some(TreeNode::File(_))));
528    }
529
530    #[test]
531    fn build_tree_alphabetic_via_btreemap() {
532        let entries = vec![entry("zebra.txt"), entry("apple.txt"), entry("mango.txt")];
533        let tree = build_tree(&entries);
534        let names: Vec<&String> = tree.keys().collect();
535        assert_eq!(names, vec!["apple.txt", "mango.txt", "zebra.txt"]);
536    }
537
538    #[test]
539    fn plain_dir_two_entries_canonical_encoding() {
540        // Two children with stub CIDv1 raw CIDs (32 bytes of repeated content
541        // for the multihash). Verifies: structure builds, CIDv1 dag-pb prefix,
542        // cumulative_size accounts for child contributions.
543        let child_a = ChildLink {
544            name: "a.txt".to_string(),
545            cid_bytes: stub_cidv1_raw(0x00),
546            cumulative_size: 1,
547        };
548        let child_b = ChildLink {
549            name: "b.txt".to_string(),
550            cid_bytes: stub_cidv1_raw(0x01),
551            cumulative_size: 1,
552        };
553
554        let dag = build_plain_directory(&[child_a.clone(), child_b.clone()], true);
555
556        assert_eq!(dag.cid_bytes[0], 0x01); // CIDv1
557        assert_eq!(dag.cid_bytes[1], 0x70); // dag-pb codec
558
559        // Cumulative size = node serialized bytes + sum of child cumulative sizes.
560        assert!(dag.cumulative_size > child_a.cumulative_size + child_b.cumulative_size);
561    }
562
563    fn stub_cidv1_raw(b: u8) -> Vec<u8> {
564        // CIDv1 raw codec wrapping a sha2-256 multihash filled with byte `b`.
565        let mut v = vec![0x01u8, 0x55, 0x12, 0x20];
566        v.extend(std::iter::repeat_n(b, 32));
567        v
568    }
569
570    #[test]
571    fn estimated_dir_size_short_names() {
572        // Per boxo productionLinkSize: len(name) + cid.ByteLen().
573        // Two entries with 10-char names and 36-byte CIDv1 each:
574        // 2 * (10 + 36) = 92.
575        let children = vec![
576            ChildLink {
577                name: "a".repeat(10),
578                cid_bytes: stub_cidv1_raw(0x00),
579                cumulative_size: 0,
580            },
581            ChildLink {
582                name: "b".repeat(10),
583                cid_bytes: stub_cidv1_raw(0x01),
584                cumulative_size: 0,
585            },
586        ];
587        assert_eq!(estimated_dir_size(&children), 92);
588    }
589
590    #[test]
591    fn shard_threshold_below_and_above() {
592        // Threshold is 256 * 1024 = 262144.
593        // boxo productionLinkSize = len(name) + cid.ByteLen()
594        // For 8-char names with 36-byte CIDv1: 8 + 36 = 44 bytes per entry.
595        // 5957 entries * 44 = 262108 -> below
596        // 5958 entries * 44 = 262152 -> above (matches kubo v0.30.0)
597        //
598        // Stub CIDv1 raw = 36 bytes (version=1, codec=0x55, sha256 multihash).
599        let stub_cid = stub_cidv1_raw(0x00);
600        let small: Vec<ChildLink> = (0..5957)
601            .map(|i| ChildLink {
602                name: format!("{i:08}"),
603                cid_bytes: stub_cid.clone(),
604                cumulative_size: 0,
605            })
606            .collect();
607        let big: Vec<ChildLink> = (0..5958)
608            .map(|i| ChildLink {
609                name: format!("{i:08}"),
610                cid_bytes: stub_cid.clone(),
611                cumulative_size: 0,
612            })
613            .collect();
614        assert!(!should_shard(&small));
615        assert!(should_shard(&big));
616    }
617
618    #[test]
619    fn bitfield_set_and_test() {
620        let mut bf = [0u8; 32];
621        set_bit(&mut bf, 0);
622        set_bit(&mut bf, 7);
623        set_bit(&mut bf, 8);
624        set_bit(&mut bf, 255);
625        assert!(test_bit(&bf, 0));
626        assert!(test_bit(&bf, 7));
627        assert!(test_bit(&bf, 8));
628        assert!(test_bit(&bf, 255));
629        assert!(!test_bit(&bf, 1));
630        assert!(!test_bit(&bf, 254));
631    }
632
633    #[test]
634    fn hash_folder_root_single_file_v1_raw() {
635        use crate::{CidVersion, UploadFolderOptions};
636        use std::io::Write;
637        use tempfile::TempDir;
638
639        let dir = TempDir::new().unwrap();
640        let path = dir.path().join("hello.txt");
641        let mut f = std::fs::File::create(&path).unwrap();
642        f.write_all(b"hello\n").unwrap();
643        drop(f);
644
645        let entries = vec![FolderEntry {
646            relative_path: "hello.txt".to_string(),
647            absolute_path: path,
648        }];
649        let opts = UploadFolderOptions {
650            cid_version: CidVersion::V1,
651            ..Default::default()
652        };
653
654        let cid = hash_folder_root(&entries, &opts).expect("hash should succeed");
655
656        let s = cid.to_string();
657        assert!(s.starts_with("bafy"), "expected CIDv1 dag-pb root, got {s}");
658    }
659
660    #[test]
661    fn hamt_single_level_two_entries() {
662        // Two entries with names that go into different root-level slots.
663        // The HAMT root should:
664        //   - Be a CIDv1 dag-pb (codec 0x70)
665        //   - Have non-zero cumulative_size
666        //   - Build successfully without errors
667        let kids = vec![
668            ChildLink {
669                name: "alpha".to_string(),
670                cid_bytes: stub_cidv1_raw(0xaa),
671                cumulative_size: 1,
672            },
673            ChildLink {
674                name: "bravo".to_string(),
675                cid_bytes: stub_cidv1_raw(0xbb),
676                cumulative_size: 1,
677            },
678        ];
679        let dag = build_hamt_root(&kids, true).expect("HAMT must build");
680
681        assert_eq!(dag.cid_bytes[0], 0x01);
682        assert_eq!(dag.cid_bytes[1], 0x70);
683        assert!(dag.cumulative_size > 0);
684    }
685}