Skip to main content

chkpt_core/store/
blob.rs

1use crate::error::Result;
2use memmap2::Mmap;
3use std::io::{BufReader, Read};
4use std::path::Path;
5
6const HASH_FILE_MMAP_THRESHOLD: u64 = 256 * 1024;
7
8/// Zero-copy file content: small files are heap-allocated, large files are memory-mapped.
9pub enum FileContent {
10    Vec(Vec<u8>),
11    Mmap(Mmap),
12}
13
14impl AsRef<[u8]> for FileContent {
15    fn as_ref(&self) -> &[u8] {
16        match self {
17            FileContent::Vec(v) => v.as_slice(),
18            FileContent::Mmap(m) => m.as_ref(),
19        }
20    }
21}
22
23/// Read a file into a `FileContent`. Files >= 256 KB are memory-mapped; smaller files
24/// are read into a heap-allocated `Vec<u8>`.
25pub fn read_or_mmap(path: &Path) -> Result<FileContent> {
26    let file = std::fs::File::open(path)?;
27    let metadata = file.metadata()?;
28    if metadata.len() >= HASH_FILE_MMAP_THRESHOLD {
29        // SAFETY: the file is opened read-only and we do not mutate it through the mapping.
30        let mmap = unsafe { Mmap::map(&file) }?;
31        #[cfg(unix)]
32        {
33            let _ = mmap.advise(memmap2::Advice::Sequential);
34        }
35        return Ok(FileContent::Mmap(mmap));
36    }
37    let mut buf = Vec::with_capacity(metadata.len() as usize);
38    let mut file = file;
39    file.read_to_end(&mut buf)?;
40    Ok(FileContent::Vec(buf))
41}
42
43/// Compute XXH3-128 hash of content as raw bytes.
44pub fn hash_content_bytes(content: &[u8]) -> [u8; 16] {
45    xxhash_rust::xxh3::xxh3_128(content).to_le_bytes()
46}
47
48/// Compute XXH3-128 hash of content, return 32-char hex string.
49pub fn hash_content(content: &[u8]) -> String {
50    bytes_to_hex(&hash_content_bytes(content))
51}
52
53/// Compute XXH3-128 hash of a file without loading the full file into memory.
54pub fn hash_file_bytes(path: &Path) -> Result<[u8; 16]> {
55    if let Ok(metadata) = std::fs::metadata(path) {
56        if metadata.len() >= HASH_FILE_MMAP_THRESHOLD {
57            if let Ok(file) = std::fs::File::open(path) {
58                // SAFETY: file is opened read-only and kept alive alongside the mmap.
59                if let Ok(mmap) = unsafe { Mmap::map(&file) } {
60                    return Ok(xxhash_rust::xxh3::xxh3_128(&mmap).to_le_bytes());
61                }
62            }
63        }
64    }
65
66    let file = std::fs::File::open(path)?;
67    let mut reader = BufReader::new(file);
68    let mut hasher = xxhash_rust::xxh3::Xxh3::new();
69    let mut buffer = [0u8; 64 * 1024];
70
71    loop {
72        let bytes_read = reader.read(&mut buffer)?;
73        if bytes_read == 0 {
74            break;
75        }
76        hasher.update(&buffer[..bytes_read]);
77    }
78
79    Ok(hasher.digest128().to_le_bytes())
80}
81
82fn read_link_bytes(path: &Path) -> Result<Vec<u8>> {
83    let target = std::fs::read_link(path)?;
84
85    #[cfg(unix)]
86    {
87        use std::os::unix::ffi::OsStrExt;
88        return Ok(target.as_os_str().as_bytes().to_vec());
89    }
90
91    #[cfg(not(unix))]
92    {
93        Ok(target.to_string_lossy().into_owned().into_bytes())
94    }
95}
96
97pub fn read_path_bytes(path: &Path, is_symlink: bool) -> Result<Vec<u8>> {
98    if is_symlink {
99        return read_link_bytes(path);
100    }
101
102    let mut file = std::fs::File::open(path)?;
103    let metadata = file.metadata()?;
104    let mut buf = Vec::with_capacity(metadata.len() as usize);
105    file.read_to_end(&mut buf)?;
106    Ok(buf)
107}
108
109pub fn hash_path_bytes(path: &Path, is_symlink: bool) -> Result<[u8; 16]> {
110    if is_symlink {
111        return Ok(hash_content_bytes(&read_link_bytes(path)?));
112    }
113
114    hash_file_bytes(path)
115}
116
117/// Convert a 32-char hex string to [u8; 16].
118pub fn hex_to_bytes(hex: &str) -> Result<[u8; 16]> {
119    let mut bytes = [0u8; 16];
120    if hex.len() != 32 {
121        return Err(crate::error::ChkpttError::Other(format!(
122            "Invalid hash length: {}",
123            hex.len()
124        )));
125    }
126    for i in 0..16 {
127        let slice = hex.get(i * 2..i * 2 + 2).ok_or_else(|| {
128            crate::error::ChkpttError::Other("Invalid hex: byte index out of bounds".into())
129        })?;
130        bytes[i] = u8::from_str_radix(slice, 16)
131            .map_err(|_| crate::error::ChkpttError::Other("Invalid hex".into()))?;
132    }
133    Ok(bytes)
134}
135
136/// Convert [u8; 16] to a 32-char hex string.
137pub fn bytes_to_hex(bytes: &[u8; 16]) -> String {
138    use std::fmt::Write;
139    let mut hex = String::with_capacity(32);
140    for byte in bytes {
141        write!(hex, "{byte:02x}").unwrap();
142    }
143    hex
144}