Skip to main content

chkpt_core/store/
tree.rs

1use crate::error::{ChkpttError, Result};
2use crate::store::blob::hex_to_bytes;
3use bitcode::{Decode, Encode};
4use memmap2::Mmap;
5use std::io::{BufWriter, Seek, SeekFrom, Write};
6use std::path::PathBuf;
7use tempfile::NamedTempFile;
8
9const TREE_PACK_MAGIC: &[u8; 4] = b"CKTL";
10const TREE_IDX_ENTRY_SIZE: usize = 16 + 8 + 8; // hash(16) + offset(8) + size(8)
11const TREE_HEADER_SIZE: u64 = 8;
12
13#[derive(Debug, Clone, Copy, PartialEq, Eq, Encode, Decode)]
14pub enum EntryType {
15    File,
16    Dir,
17    Symlink,
18}
19
20#[derive(Debug, Clone, PartialEq, Eq, Encode, Decode)]
21pub struct TreeEntry {
22    pub name: String,
23    pub entry_type: EntryType,
24    pub hash: [u8; 16],
25    pub size: u64,
26    pub mode: u32,
27}
28
29/// Index entry for a tree in the pack.
30#[derive(Debug, Clone)]
31struct TreeIdxEntry {
32    hash: [u8; 16],
33    offset: u64,
34    size: u64,
35}
36
37pub struct TreeStore {
38    base_dir: PathBuf,
39    /// mmap'd tree pack data file (if exists)
40    pack_dat: Option<Mmap>,
41    /// mmap'd tree pack index file (if exists)
42    pack_idx: Option<Mmap>,
43    pack_entry_count: usize,
44}
45
46impl TreeStore {
47    pub fn new(base_dir: PathBuf) -> Self {
48        let dat_path = base_dir.join("trees.dat");
49        let idx_path = base_dir.join("trees.idx");
50
51        let (pack_dat, pack_idx, pack_entry_count) = match (
52            std::fs::File::open(&dat_path),
53            std::fs::File::open(&idx_path),
54        ) {
55            // SAFETY: files are opened read-only and kept alive alongside the mmaps.
56            (Ok(dat_file), Ok(idx_file)) => match (unsafe { Mmap::map(&dat_file) }, unsafe {
57                Mmap::map(&idx_file)
58            }) {
59                (Ok(dat), Ok(idx)) => {
60                    #[cfg(unix)]
61                    {
62                        let _ = dat.advise(memmap2::Advice::Sequential);
63                        let _ = idx.advise(memmap2::Advice::Random);
64                    }
65                    let count = idx.len() / TREE_IDX_ENTRY_SIZE;
66                    (Some(dat), Some(idx), count)
67                }
68                _ => (None, None, 0),
69            },
70            (Err(dat_error), _) if dat_error.kind() == std::io::ErrorKind::NotFound => {
71                (None, None, 0)
72            }
73            (_, Err(idx_error)) if idx_error.kind() == std::io::ErrorKind::NotFound => {
74                (None, None, 0)
75            }
76            _ => (None, None, 0),
77        };
78
79        Self {
80            base_dir,
81            pack_dat,
82            pack_idx,
83            pack_entry_count,
84        }
85    }
86
87    /// Write tree entries (sorted by name). Returns hash hex.
88    pub fn write(&self, entries: &[TreeEntry]) -> Result<String> {
89        let mut sorted = entries.to_vec();
90        sorted.sort_unstable_by(|a, b| a.name.cmp(&b.name));
91        let encoded = bitcode::encode(&sorted);
92        let hash_bytes = xxhash_rust::xxh3::xxh3_128(&encoded).to_le_bytes();
93        let hash_hex = crate::store::blob::bytes_to_hex(&hash_bytes);
94        self.write_pack(&[(hash_hex.clone(), encoded)])?;
95        Ok(hash_hex)
96    }
97
98    /// Write a batch of pre-computed trees to a pack file.
99    /// Each entry is (hash_hex, encoded_data).
100    pub fn write_pack(&self, entries: &[(String, Vec<u8>)]) -> Result<()> {
101        if entries.is_empty() {
102            return Ok(());
103        }
104
105        std::fs::create_dir_all(&self.base_dir)?;
106
107        let dat_path = self.base_dir.join("trees.dat");
108        let idx_path = self.base_dir.join("trees.idx");
109
110        // Collect existing idx entries
111        let mut all_idx_entries: Vec<TreeIdxEntry> = Vec::new();
112        let mut existing_hashes: std::collections::HashSet<[u8; 16]> =
113            std::collections::HashSet::new();
114
115        let existing_dat_len = if let (Some(dat), Some(idx)) = (&self.pack_dat, &self.pack_idx) {
116            for i in 0..self.pack_entry_count {
117                let pos = i * TREE_IDX_ENTRY_SIZE;
118                let mut hash = [0u8; 16];
119                hash.copy_from_slice(&idx[pos..pos + 16]);
120                let offset = u64::from_le_bytes(idx[pos + 16..pos + 24].try_into().unwrap());
121                let size = u64::from_le_bytes(idx[pos + 24..pos + 32].try_into().unwrap());
122                existing_hashes.insert(hash);
123                all_idx_entries.push(TreeIdxEntry { hash, offset, size });
124            }
125            dat.len() as u64
126        } else {
127            TREE_HEADER_SIZE
128        };
129
130        // Write new .dat
131        let mut dat_tmp = NamedTempFile::new_in(&self.base_dir)?;
132        {
133            let mut writer = BufWriter::with_capacity(256 * 1024, &mut dat_tmp);
134
135            if let Some(dat) = &self.pack_dat {
136                writer.write_all(dat)?;
137            } else {
138                writer.write_all(&[0u8; TREE_HEADER_SIZE as usize])?;
139            }
140
141            let mut offset = existing_dat_len;
142
143            for (hash_hex, encoded) in entries {
144                let hash = hex_to_bytes(hash_hex)?;
145                if existing_hashes.contains(&hash) {
146                    continue;
147                }
148                let data_len = encoded.len() as u64;
149                // Write: hash(16) + size(8) + data(N)
150                writer.write_all(&hash)?;
151                writer.write_all(&data_len.to_le_bytes())?;
152                writer.write_all(encoded)?;
153
154                all_idx_entries.push(TreeIdxEntry {
155                    hash,
156                    offset,
157                    size: data_len,
158                });
159                offset += 16 + 8 + data_len;
160            }
161
162            writer.flush()?;
163        }
164
165        // Write header
166        let total_count = all_idx_entries.len() as u32;
167        dat_tmp.seek(SeekFrom::Start(0))?;
168        dat_tmp.write_all(TREE_PACK_MAGIC)?;
169        dat_tmp.write_all(&total_count.to_le_bytes())?;
170        dat_tmp.flush()?;
171
172        // Persist .dat
173        dat_tmp
174            .persist(&dat_path)
175            .map_err(|e| ChkpttError::Other(e.error.to_string()))?;
176
177        // Sort idx and write
178        all_idx_entries.sort_unstable_by(|a, b| a.hash.cmp(&b.hash));
179        let mut idx_buf: Vec<u8> = Vec::with_capacity(all_idx_entries.len() * TREE_IDX_ENTRY_SIZE);
180        for entry in &all_idx_entries {
181            idx_buf.extend_from_slice(&entry.hash);
182            idx_buf.extend_from_slice(&entry.offset.to_le_bytes());
183            idx_buf.extend_from_slice(&entry.size.to_le_bytes());
184        }
185        let idx_tmp_path = idx_path.with_extension("idx.tmp");
186        std::fs::write(&idx_tmp_path, &idx_buf)?;
187        std::fs::rename(&idx_tmp_path, &idx_path)?;
188
189        Ok(())
190    }
191
192    /// Read tree entries by hash from the tree pack.
193    pub fn read(&self, hash_hex: &str) -> Result<Vec<TreeEntry>> {
194        if let Some(data) = self
195            .read_from_pack(hash_hex)
196            .or_else(|| self.read_from_current_pack_files(hash_hex))
197        {
198            let entries: Vec<TreeEntry> = bitcode::decode(&data)?;
199            return Ok(entries);
200        }
201
202        Err(ChkpttError::ObjectNotFound(hash_hex.to_string()))
203    }
204
205    /// Read raw data from the tree pack by hash.
206    fn read_from_pack(&self, hash_hex: &str) -> Option<Vec<u8>> {
207        let idx = self.pack_idx.as_ref()?;
208        let dat = self.pack_dat.as_ref()?;
209        Self::read_from_pack_bytes(hash_hex, dat, idx, self.pack_entry_count)
210    }
211
212    fn read_from_current_pack_files(&self, hash_hex: &str) -> Option<Vec<u8>> {
213        let dat = std::fs::read(self.base_dir.join("trees.dat")).ok()?;
214        let idx = std::fs::read(self.base_dir.join("trees.idx")).ok()?;
215        let entry_count = idx.len() / TREE_IDX_ENTRY_SIZE;
216        Self::read_from_pack_bytes(hash_hex, &dat, &idx, entry_count)
217    }
218
219    fn read_from_pack_bytes(
220        hash_hex: &str,
221        dat: &[u8],
222        idx: &[u8],
223        entry_count: usize,
224    ) -> Option<Vec<u8>> {
225        let hash_bytes = hex_to_bytes(hash_hex).ok()?;
226
227        // Binary search in idx
228        let mut lo = 0usize;
229        let mut hi = entry_count;
230        while lo < hi {
231            let mid = lo + (hi - lo) / 2;
232            let pos = mid * TREE_IDX_ENTRY_SIZE;
233            let mid_hash = &idx[pos..pos + 16];
234            match mid_hash.cmp(&hash_bytes) {
235                std::cmp::Ordering::Equal => {
236                    let offset = u64::from_le_bytes(idx[pos + 16..pos + 24].try_into().unwrap());
237                    let size = u64::from_le_bytes(idx[pos + 24..pos + 32].try_into().unwrap());
238                    let data_start = offset as usize + 16 + 8;
239                    let data_end = data_start + size as usize;
240                    if data_end > dat.len() {
241                        return None;
242                    }
243                    return Some(dat[data_start..data_end].to_vec());
244                }
245                std::cmp::Ordering::Less => lo = mid + 1,
246                std::cmp::Ordering::Greater => hi = mid,
247            }
248        }
249        None
250    }
251}