use std::fs::File;
use std::io;
use std::path::Path;
use crate::chunker::ChunkingConfig;
use crate::content::{build_content_descriptor, ContentDescriptor, ContentDescriptorInput};
use memmap2::Mmap;
pub fn hash_file_content_descriptor_zero_copy<P: AsRef<Path>>(
path: P,
cfg: &ChunkingConfig,
_threads: usize,
artifact_id: &str,
media_type: &str,
created_at: &str,
) -> io::Result<ContentDescriptor> {
if cfg.chunk_size == 0 {
return Err(io::Error::new(
io::ErrorKind::InvalidInput,
"chunk_size must be greater than zero",
));
}
let path_ref = path.as_ref();
let meta = std::fs::symlink_metadata(path_ref)?;
if meta.file_type().is_symlink() {
return Err(io::Error::new(
io::ErrorKind::InvalidInput,
"refusing to hash symlink path",
));
}
if !meta.is_file() {
return Err(io::Error::new(
io::ErrorKind::InvalidInput,
"path must reference a regular file",
));
}
let file = File::open(path_ref)?;
let mmap = unsafe { Mmap::map(&file)? };
let bytes: &[u8] = &mmap;
let chunk_count = bytes.len().div_ceil(cfg.chunk_size);
Ok(build_content_descriptor(ContentDescriptorInput {
artifact_id,
root: crate::utils::Hash(*blake3::hash(bytes).as_bytes()),
chunk_size: cfg.chunk_size,
leaf_count: chunk_count,
byte_length: bytes.len() as u64,
media_type,
created_at,
}))
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Write;
use tempfile::NamedTempFile;
#[test]
fn hash_file_content_descriptor_zero_copy_returns_descriptor() {
let mut file = NamedTempFile::new().expect("temp file");
file.write_all(b"0123456789abcdef").expect("write");
let cfg = ChunkingConfig { chunk_size: 4 };
let descriptor = hash_file_content_descriptor_zero_copy(
file.path(),
&cfg,
2,
"artifact-1",
"application/octet-stream",
"2026-01-01T00:00:00Z",
)
.expect("zero copy hashing should succeed");
assert_eq!(descriptor.leaf_count, 4);
assert_eq!(descriptor.artifact_id, "artifact-1");
assert!(!descriptor.content_root_hash.is_empty());
}
}