aleph-cid 0.1.0

kubo-compatible IPFS CID computation for Aleph Cloud: UnixFS dag-pb hashing, HAMT-sharded directories and CARv1 framing.
Documentation
//! Golden-CID integration tests for `folder_hash::hash_folder_root`.
//!
//! Each test builds a deterministic folder structure on disk, runs the local
//! hasher, and asserts the result equals a golden CID generated from real kubo
//! via `tests/regen-folder-hash-goldens.sh`.
//!
//! The fixture-builder functions below mirror the bash fixtures in the regen
//! script — keep them in sync.

use aleph_cid::{CidVersion, FolderEntry, UploadFolderOptions, collect_folder_files};
use std::fs;
use std::io::Write;
use std::path::Path;
use tempfile::TempDir;

// === Fixtures ===

fn fixture_single_file_small(root: &Path) {
    fs::write(root.join("hello.txt"), b"hello\n").unwrap();
}

fn fixture_single_file_multi_chunk(root: &Path) {
    // The bash regen script uses `tr '\0' '\xab'` which GNU tr interprets as
    // the literal character 'x' (0x78), not hex 0xAB.  Match that behaviour.
    let mut f = fs::File::create(root.join("big.bin")).unwrap();
    let chunk = vec![b'x'; 1024];
    for _ in 0..1024 {
        f.write_all(&chunk).unwrap();
    }
}

fn fixture_flat_dir_small(root: &Path) {
    for c in 'a'..='j' {
        fs::write(root.join(format!("{c}.txt")), [c as u8]).unwrap();
    }
}

fn fixture_nested_dir(root: &Path) {
    fs::write(root.join("top.txt"), b"top\n").unwrap();
    let sub = root.join("sub");
    fs::create_dir(&sub).unwrap();
    fs::write(sub.join("inner.txt"), b"inner\n").unwrap();
    let deeper = sub.join("deeper");
    fs::create_dir(&deeper).unwrap();
    fs::write(deeper.join("leaf.txt"), b"leaf\n").unwrap();
}

fn fixture_hamt_short_names(root: &Path) {
    // 6000 files * (8-char name + 36-byte CIDv1) = 264000 > 262144 (HAMT threshold)
    for i in 0..6000u32 {
        fs::write(root.join(format!("{i:08}")), b"x").unwrap();
    }
}

fn fixture_hamt_long_names(root: &Path) {
    // 920 files * (250-char name + 36-byte CIDv1) = 263120 > 262144 (HAMT threshold).
    // Names are 4-digit prefix + 246-char suffix to stay within the Linux
    // 255-char filename limit while still triggering HAMT sharding.
    let suffix = "z".repeat(246);
    for i in 0..920u32 {
        fs::write(root.join(format!("{i:04}{suffix}")), b"x").unwrap();
    }
}

fn fixture_empty_file(root: &Path) {
    fs::write(root.join("empty"), b"").unwrap();
}

fn fixture_utf8_names(root: &Path) {
    // Cover 2-byte (latin extended), 3-byte (CJK), and 4-byte (emoji) UTF-8.
    fs::write(root.join("café.txt"), b"a\n").unwrap();
    fs::write(root.join("日本.txt"), b"b\n").unwrap();
    fs::write(root.join("🚀.txt"), b"c\n").unwrap();
}

// Threshold boundary fixtures: 8-char names, 1-byte content.
// boxo productionLinkSize = name.len() + cid.ByteLen() = 8 + 36 = 44.
// 5957 * 44 = 262108 -> BasicDirectory (below 262144 HAMT threshold).
// 5958 * 44 = 262152 -> HAMTDirectory (above threshold).
fn fixture_threshold_below(root: &Path) {
    for i in 0..5957u32 {
        fs::write(root.join(format!("{i:08}")), b"x").unwrap();
    }
}

fn fixture_threshold_above(root: &Path) {
    for i in 0..5958u32 {
        fs::write(root.join(format!("{i:08}")), b"x").unwrap();
    }
}

fn fixture_hamt_multi_level(root: &Path) {
    let collision_a = include_str!("hamt_collision_a.txt").trim();
    let collision_b = include_str!("hamt_collision_b.txt").trim();
    if !collision_a.is_empty() && !collision_b.is_empty() {
        fs::write(root.join(collision_a), b"a").unwrap();
        fs::write(root.join(collision_b), b"b").unwrap();
    }
    fs::write(root.join("Z"), b"c").unwrap();
}

// === Helper: run the hasher over a built fixture ===

fn hash_root(root: &Path, version: CidVersion) -> String {
    let entries = collect_folder_files(root, true).unwrap();
    let mut opts = UploadFolderOptions::default();
    opts.cid_version = version;
    aleph_cid::folder_hash::hash_folder_root(&entries, &opts)
        .expect("hashing must succeed")
        .to_string()
}

// === Goldens ===
//
// Generated by tests/regen-folder-hash-goldens.sh against kubo v0.30.0.
// Regenerate after any change to folder_hash.rs that could affect output bytes.

const GOLDEN_SINGLE_FILE_SMALL_V1: &str =
    "bafybeigdcg7pksx2zk5336vrfsktjodlr4rbfz37qr3koc5xboxe5ekv24";
const GOLDEN_SINGLE_FILE_MULTI_CHUNK_V1: &str =
    "bafybeiconxawjaqtqwur53yryub7awouqco4rlqqfbv6jypyfzeewmhrbe";
const GOLDEN_FLAT_DIR_SMALL_V1: &str =
    "bafybeic44rqkymydh3gvookwnqasv5ydbk5owkl7l2pkvgmh4stny4cdly";
const GOLDEN_NESTED_DIR_V1: &str = "bafybeidcclyz24mrl4furbaf4ecb3ks52dbfer7r6dxqavd4wrqg7bp7lu";
const GOLDEN_HAMT_SHORT_NAMES_V1: &str =
    "bafybeidk3a4hr6msgcv24pkutwrethttydqzx56m724lsg75fzgeu3yzn4";
const GOLDEN_HAMT_LONG_NAMES_V1: &str =
    "bafybeidurl753n2ka5wvgx6qyifmilikuh2lb5eucgdpo3pu253pkx7r4e";
const GOLDEN_HAMT_MULTI_LEVEL_V1: &str =
    "bafybeiegiotpslrdicvq2duzx4gmigbxyocs7fw37q7hmwa3sseiz2tj7y";
const GOLDEN_FLAT_DIR_SMALL_V0: &str = "QmaVbVDQrEVXH6EAQQExN82Xt44VmrQWkkey4S8eYcTNRs";
const GOLDEN_EMPTY_DIRECTORY_V1: &str =
    "bafybeiczsscdsbs7ffqz55asqdf3smv6klcw3gofszvwlyarci47bgf354";
const GOLDEN_EMPTY_FILE_V1: &str = "bafybeigalvxqcgqiogiv6bimmxedgxbhu7cikwhwha2c2iqzdbnabewjai";
const GOLDEN_UTF8_NAMES_V1: &str = "bafybeib5csmg24msmu4jmaczufo6s5cciojvsct4nb56db46ozy6mz4bji";
const GOLDEN_THRESHOLD_BELOW_V1: &str =
    "bafybeibqvqqd4yvkpt5olhxoayzzevdxkw5ucbk2hlnjrttpo6psbjwx4u";
const GOLDEN_THRESHOLD_ABOVE_V1: &str =
    "bafybeidzmazxweqx7qun4ezjil2iqmn7u23225yfxhekm5zysnwmmtbohe";

#[test]

fn golden_single_file_small_v1() {
    let dir = TempDir::new().unwrap();
    fixture_single_file_small(dir.path());
    assert_eq!(
        hash_root(dir.path(), CidVersion::V1),
        GOLDEN_SINGLE_FILE_SMALL_V1
    );
}

#[test]

fn golden_single_file_multi_chunk_v1() {
    let dir = TempDir::new().unwrap();
    fixture_single_file_multi_chunk(dir.path());
    assert_eq!(
        hash_root(dir.path(), CidVersion::V1),
        GOLDEN_SINGLE_FILE_MULTI_CHUNK_V1
    );
}

#[test]

fn golden_flat_dir_small_v1() {
    let dir = TempDir::new().unwrap();
    fixture_flat_dir_small(dir.path());
    assert_eq!(
        hash_root(dir.path(), CidVersion::V1),
        GOLDEN_FLAT_DIR_SMALL_V1
    );
}

#[test]

fn golden_nested_dir_v1() {
    let dir = TempDir::new().unwrap();
    fixture_nested_dir(dir.path());
    assert_eq!(hash_root(dir.path(), CidVersion::V1), GOLDEN_NESTED_DIR_V1);
}

#[test]

fn golden_hamt_short_names_v1() {
    let dir = TempDir::new().unwrap();
    fixture_hamt_short_names(dir.path());
    assert_eq!(
        hash_root(dir.path(), CidVersion::V1),
        GOLDEN_HAMT_SHORT_NAMES_V1
    );
}

#[test]

fn golden_hamt_long_names_v1() {
    let dir = TempDir::new().unwrap();
    fixture_hamt_long_names(dir.path());
    assert_eq!(
        hash_root(dir.path(), CidVersion::V1),
        GOLDEN_HAMT_LONG_NAMES_V1
    );
}

#[test]

fn golden_hamt_multi_level_v1() {
    let dir = TempDir::new().unwrap();
    fixture_hamt_multi_level(dir.path());
    assert_eq!(
        hash_root(dir.path(), CidVersion::V1),
        GOLDEN_HAMT_MULTI_LEVEL_V1
    );
}

#[test]

fn golden_flat_dir_small_v0() {
    let dir = TempDir::new().unwrap();
    fixture_flat_dir_small(dir.path());
    assert_eq!(
        hash_root(dir.path(), CidVersion::V0),
        GOLDEN_FLAT_DIR_SMALL_V0
    );
}

#[test]
fn golden_empty_directory_v1() {
    // collect_folder_files refuses empty roots, so bypass it.
    let entries: Vec<FolderEntry> = vec![];
    let mut opts = UploadFolderOptions::default();
    opts.cid_version = CidVersion::V1;
    let hash =
        aleph_cid::folder_hash::hash_folder_root(&entries, &opts).expect("hashing must succeed");
    assert_eq!(hash.to_string(), GOLDEN_EMPTY_DIRECTORY_V1);
}

#[test]
fn golden_empty_file_v1() {
    let dir = TempDir::new().unwrap();
    fixture_empty_file(dir.path());
    assert_eq!(hash_root(dir.path(), CidVersion::V1), GOLDEN_EMPTY_FILE_V1);
}

#[test]
fn golden_utf8_names_v1() {
    let dir = TempDir::new().unwrap();
    fixture_utf8_names(dir.path());
    assert_eq!(hash_root(dir.path(), CidVersion::V1), GOLDEN_UTF8_NAMES_V1);
}

#[test]
fn golden_threshold_below_v1() {
    // 5957 entries with 8-char names: stays a BasicDirectory.
    let dir = TempDir::new().unwrap();
    fixture_threshold_below(dir.path());
    assert_eq!(
        hash_root(dir.path(), CidVersion::V1),
        GOLDEN_THRESHOLD_BELOW_V1
    );
}

#[test]
fn golden_threshold_above_v1() {
    // 5958 entries: flips to HAMTDirectory. CIDs must differ from _BELOW_
    // even though only one file was added.
    let dir = TempDir::new().unwrap();
    fixture_threshold_above(dir.path());
    let above = hash_root(dir.path(), CidVersion::V1);
    assert_eq!(above, GOLDEN_THRESHOLD_ABOVE_V1);
    assert_ne!(
        above, GOLDEN_THRESHOLD_BELOW_V1,
        "below/above must differ: the +1 entry crosses the HAMT threshold"
    );
}

#[test]
fn order_independence_v1() {
    // Building the same tree from two different input orderings must produce
    // the same CID. Pins the BTreeMap-based tree assembly: callers (or future
    // walker changes) cannot regress the invariant by yielding entries in a
    // different order.
    let dir = TempDir::new().unwrap();
    fixture_nested_dir(dir.path());

    let entries_a = collect_folder_files(dir.path(), true).unwrap();
    let mut entries_b = collect_folder_files(dir.path(), true).unwrap();
    entries_b.reverse();

    let names_a: Vec<&str> = entries_a.iter().map(|e| e.relative_path.as_str()).collect();
    let names_b: Vec<&str> = entries_b.iter().map(|e| e.relative_path.as_str()).collect();
    assert_ne!(
        names_a, names_b,
        "test setup: the two input orderings must differ"
    );

    let mut opts = UploadFolderOptions::default();
    opts.cid_version = CidVersion::V1;
    let ha = aleph_cid::folder_hash::hash_folder_root(&entries_a, &opts).unwrap();
    let hb = aleph_cid::folder_hash::hash_folder_root(&entries_b, &opts).unwrap();
    assert_eq!(ha.to_string(), hb.to_string());

    // Cross-check against the canonical golden so this can't drift silently.
    assert_eq!(ha.to_string(), GOLDEN_NESTED_DIR_V1);
}

#[test]
fn build_folder_dag_matches_hash_folder_root_with_no_op_sink() {
    use aleph_cid::UploadFolderOptions;
    use aleph_cid::folder_hash::build_folder_dag;
    use aleph_cid::folder_hash::hash_folder_root;

    let tmp = tempfile::tempdir().unwrap();
    std::fs::write(tmp.path().join("a.txt"), b"hello").unwrap();
    std::fs::write(tmp.path().join("b.txt"), b"world").unwrap();
    let entries = aleph_cid::collect_folder_files(tmp.path(), true).unwrap();
    let opts = UploadFolderOptions::default();

    let via_walker = build_folder_dag(&entries, &opts, &mut |_, _| Ok(())).unwrap();
    let via_hasher = hash_folder_root(&entries, &opts).unwrap();
    assert_eq!(via_walker, via_hasher);
}

#[test]
fn build_folder_dag_blocks_self_consistent() {
    use aleph_cid::UploadFolderOptions;
    use aleph_cid::folder_hash::build_folder_dag;

    let tmp = tempfile::tempdir().unwrap();
    std::fs::write(tmp.path().join("a.txt"), b"abc").unwrap();
    std::fs::write(tmp.path().join("b.txt"), b"defgh").unwrap();
    let entries = aleph_cid::collect_folder_files(tmp.path(), true).unwrap();
    let opts = UploadFolderOptions::default();

    let mut blocks: Vec<(Vec<u8>, Vec<u8>)> = Vec::new();
    let root = build_folder_dag(&entries, &opts, &mut |cid, block| {
        blocks.push((cid.to_vec(), block.to_vec()));
        Ok(())
    })
    .unwrap();

    assert!(!blocks.is_empty());
    let last_cid_bytes = &blocks.last().unwrap().0;
    let root_cid_str = root.to_string();
    let parsed = ::cid::Cid::try_from(&last_cid_bytes[..]).unwrap();
    assert_eq!(parsed.to_string(), root_cid_str);
}

#[test]
fn build_folder_dag_propagates_sink_error() {
    use aleph_cid::UploadFolderOptions;
    use aleph_cid::folder_hash::{FolderHashError, build_folder_dag};

    let tmp = tempfile::tempdir().unwrap();
    std::fs::write(tmp.path().join("a.txt"), b"abc").unwrap();
    let entries = aleph_cid::collect_folder_files(tmp.path(), true).unwrap();
    let opts = UploadFolderOptions::default();

    let err = build_folder_dag(&entries, &opts, &mut |_, _| {
        Err(std::io::Error::new(std::io::ErrorKind::WriteZero, "test"))
    })
    .unwrap_err();
    assert!(matches!(err, FolderHashError::Sink(_)));
}