trazaeo 0.5.0

Open-source provenance SDK and specification for verifiable EO and climate data workflows
Documentation
use std::fs::File;
use std::io;
use std::path::Path;

use crate::chunker::ChunkingConfig;
use crate::content::{build_content_descriptor, ContentDescriptor, ContentDescriptorInput};
use memmap2::Mmap;

/// Hash a file using zero-copy memory mapping and return a content descriptor.
pub fn hash_file_content_descriptor_zero_copy<P: AsRef<Path>>(
    path: P,
    cfg: &ChunkingConfig,
    _threads: usize,
    artifact_id: &str,
    media_type: &str,
    created_at: &str,
) -> io::Result<ContentDescriptor> {
    if cfg.chunk_size == 0 {
        return Err(io::Error::new(
            io::ErrorKind::InvalidInput,
            "chunk_size must be greater than zero",
        ));
    }

    let path_ref = path.as_ref();
    let meta = std::fs::symlink_metadata(path_ref)?;
    if meta.file_type().is_symlink() {
        return Err(io::Error::new(
            io::ErrorKind::InvalidInput,
            "refusing to hash symlink path",
        ));
    }
    if !meta.is_file() {
        return Err(io::Error::new(
            io::ErrorKind::InvalidInput,
            "path must reference a regular file",
        ));
    }

    let file = File::open(path_ref)?;
    // Safety: the file metadata is checked above and mapped read-only for hashing.
    let mmap = unsafe { Mmap::map(&file)? };
    let bytes: &[u8] = &mmap;

    let chunk_count = bytes.len().div_ceil(cfg.chunk_size);

    Ok(build_content_descriptor(ContentDescriptorInput {
        artifact_id,
        root: crate::utils::Hash(*blake3::hash(bytes).as_bytes()),
        chunk_size: cfg.chunk_size,
        leaf_count: chunk_count,
        byte_length: bytes.len() as u64,
        media_type,
        created_at,
    }))
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::io::Write;
    use tempfile::NamedTempFile;

    /// Tests that hash file content descriptor zero copy returns descriptor.
    #[test]
    fn hash_file_content_descriptor_zero_copy_returns_descriptor() {
        let mut file = NamedTempFile::new().expect("temp file");
        file.write_all(b"0123456789abcdef").expect("write");

        let cfg = ChunkingConfig { chunk_size: 4 };
        let descriptor = hash_file_content_descriptor_zero_copy(
            file.path(),
            &cfg,
            2,
            "artifact-1",
            "application/octet-stream",
            "2026-01-01T00:00:00Z",
        )
        .expect("zero copy hashing should succeed");

        assert_eq!(descriptor.leaf_count, 4);
        assert_eq!(descriptor.artifact_id, "artifact-1");
        assert!(!descriptor.content_root_hash.is_empty());
    }
}