1use crate::error::{ChkpttError, Result};
2use crate::store::blob::hex_to_bytes;
3use bitcode::{Decode, Encode};
4use memmap2::Mmap;
5use std::io::{BufWriter, Seek, SeekFrom, Write};
6use std::path::PathBuf;
7use tempfile::NamedTempFile;
8
9const TREE_PACK_MAGIC: &[u8; 4] = b"CKTL";
10const TREE_IDX_ENTRY_SIZE: usize = 16 + 8 + 8; const TREE_HEADER_SIZE: u64 = 8;
12
13#[derive(Debug, Clone, Copy, PartialEq, Eq, Encode, Decode)]
14pub enum EntryType {
15 File,
16 Dir,
17 Symlink,
18}
19
20#[derive(Debug, Clone, PartialEq, Eq, Encode, Decode)]
21pub struct TreeEntry {
22 pub name: String,
23 pub entry_type: EntryType,
24 pub hash: [u8; 16],
25 pub size: u64,
26 pub mode: u32,
27}
28
29#[derive(Debug, Clone)]
31struct TreeIdxEntry {
32 hash: [u8; 16],
33 offset: u64,
34 size: u64,
35}
36
37pub struct TreeStore {
38 base_dir: PathBuf,
39 pack_dat: Option<Mmap>,
41 pack_idx: Option<Mmap>,
43 pack_entry_count: usize,
44}
45
46impl TreeStore {
47 pub fn new(base_dir: PathBuf) -> Self {
48 let dat_path = base_dir.join("trees.dat");
49 let idx_path = base_dir.join("trees.idx");
50
51 let (pack_dat, pack_idx, pack_entry_count) = match (
52 std::fs::File::open(&dat_path),
53 std::fs::File::open(&idx_path),
54 ) {
55 (Ok(dat_file), Ok(idx_file)) => match (unsafe { Mmap::map(&dat_file) }, unsafe {
57 Mmap::map(&idx_file)
58 }) {
59 (Ok(dat), Ok(idx)) => {
60 #[cfg(unix)]
61 {
62 let _ = dat.advise(memmap2::Advice::Sequential);
63 let _ = idx.advise(memmap2::Advice::Random);
64 }
65 let count = idx.len() / TREE_IDX_ENTRY_SIZE;
66 (Some(dat), Some(idx), count)
67 }
68 _ => (None, None, 0),
69 },
70 (Err(dat_error), _) if dat_error.kind() == std::io::ErrorKind::NotFound => {
71 (None, None, 0)
72 }
73 (_, Err(idx_error)) if idx_error.kind() == std::io::ErrorKind::NotFound => {
74 (None, None, 0)
75 }
76 _ => (None, None, 0),
77 };
78
79 Self {
80 base_dir,
81 pack_dat,
82 pack_idx,
83 pack_entry_count,
84 }
85 }
86
87 fn tree_path(&self, hash_hex: &str) -> PathBuf {
88 let (prefix, rest) = hash_hex.split_at(2);
89 self.base_dir.join(prefix).join(rest)
90 }
91
92 pub fn write(&self, entries: &[TreeEntry]) -> Result<String> {
95 let mut sorted = entries.to_vec();
96 sorted.sort_unstable_by(|a, b| a.name.cmp(&b.name));
97 let encoded = bitcode::encode(&sorted);
98 let hash_bytes = xxhash_rust::xxh3::xxh3_128(&encoded).to_le_bytes();
99 let hash_hex = crate::store::blob::bytes_to_hex(&hash_bytes);
100 let path = self.tree_path(&hash_hex);
101 let parent = path
102 .parent()
103 .ok_or_else(|| ChkpttError::Other("Tree path missing parent directory".into()))?;
104 std::fs::create_dir_all(parent)?;
105
106 let mut tmp = NamedTempFile::new_in(parent)?;
107 tmp.write_all(&encoded)?;
108 tmp.flush()?;
109
110 match tmp.persist_noclobber(&path) {
111 Ok(_) => Ok(hash_hex),
112 Err(error) if error.error.kind() == std::io::ErrorKind::AlreadyExists => Ok(hash_hex),
113 Err(error) => Err(error.error.into()),
114 }
115 }
116
117 pub fn write_pack(&self, entries: &[(String, Vec<u8>)]) -> Result<()> {
120 if entries.is_empty() {
121 return Ok(());
122 }
123
124 std::fs::create_dir_all(&self.base_dir)?;
125
126 let dat_path = self.base_dir.join("trees.dat");
127 let idx_path = self.base_dir.join("trees.idx");
128
129 let mut all_idx_entries: Vec<TreeIdxEntry> = Vec::new();
131 let mut existing_hashes: std::collections::HashSet<[u8; 16]> =
132 std::collections::HashSet::new();
133
134 let existing_dat_len = if let (Some(dat), Some(idx)) = (&self.pack_dat, &self.pack_idx) {
135 for i in 0..self.pack_entry_count {
136 let pos = i * TREE_IDX_ENTRY_SIZE;
137 let mut hash = [0u8; 16];
138 hash.copy_from_slice(&idx[pos..pos + 16]);
139 let offset = u64::from_le_bytes(idx[pos + 16..pos + 24].try_into().unwrap());
140 let size = u64::from_le_bytes(idx[pos + 24..pos + 32].try_into().unwrap());
141 existing_hashes.insert(hash);
142 all_idx_entries.push(TreeIdxEntry { hash, offset, size });
143 }
144 dat.len() as u64
145 } else {
146 TREE_HEADER_SIZE
147 };
148
149 let mut dat_tmp = NamedTempFile::new_in(&self.base_dir)?;
151 {
152 let mut writer = BufWriter::with_capacity(256 * 1024, &mut dat_tmp);
153
154 if let Some(dat) = &self.pack_dat {
155 writer.write_all(dat)?;
156 } else {
157 writer.write_all(&[0u8; TREE_HEADER_SIZE as usize])?;
158 }
159
160 let mut offset = existing_dat_len;
161
162 for (hash_hex, encoded) in entries {
163 let hash = hex_to_bytes(hash_hex)?;
164 if existing_hashes.contains(&hash) {
165 continue;
166 }
167 let data_len = encoded.len() as u64;
168 writer.write_all(&hash)?;
170 writer.write_all(&data_len.to_le_bytes())?;
171 writer.write_all(encoded)?;
172
173 all_idx_entries.push(TreeIdxEntry {
174 hash,
175 offset,
176 size: data_len,
177 });
178 offset += 16 + 8 + data_len;
179 }
180
181 writer.flush()?;
182 }
183
184 let total_count = all_idx_entries.len() as u32;
186 dat_tmp.seek(SeekFrom::Start(0))?;
187 dat_tmp.write_all(TREE_PACK_MAGIC)?;
188 dat_tmp.write_all(&total_count.to_le_bytes())?;
189 dat_tmp.flush()?;
190
191 dat_tmp
193 .persist(&dat_path)
194 .map_err(|e| ChkpttError::Other(e.error.to_string()))?;
195
196 all_idx_entries.sort_unstable_by(|a, b| a.hash.cmp(&b.hash));
198 let mut idx_buf: Vec<u8> = Vec::with_capacity(all_idx_entries.len() * TREE_IDX_ENTRY_SIZE);
199 for entry in &all_idx_entries {
200 idx_buf.extend_from_slice(&entry.hash);
201 idx_buf.extend_from_slice(&entry.offset.to_le_bytes());
202 idx_buf.extend_from_slice(&entry.size.to_le_bytes());
203 }
204 let idx_tmp_path = idx_path.with_extension("idx.tmp");
205 std::fs::write(&idx_tmp_path, &idx_buf)?;
206 std::fs::rename(&idx_tmp_path, &idx_path)?;
207
208 Ok(())
209 }
210
211 pub fn read(&self, hash_hex: &str) -> Result<Vec<TreeEntry>> {
213 if let Some(data) = self.read_from_pack(hash_hex) {
215 let entries: Vec<TreeEntry> = bitcode::decode(&data)?;
216 return Ok(entries);
217 }
218
219 let path = self.tree_path(hash_hex);
221 let data = match std::fs::read(&path) {
222 Ok(data) => data,
223 Err(error) if error.kind() == std::io::ErrorKind::NotFound => {
224 return Err(ChkpttError::ObjectNotFound(hash_hex.to_string()));
225 }
226 Err(error) => return Err(error.into()),
227 };
228 let entries: Vec<TreeEntry> = bitcode::decode(&data)?;
229 Ok(entries)
230 }
231
232 fn read_from_pack(&self, hash_hex: &str) -> Option<Vec<u8>> {
234 let idx = self.pack_idx.as_ref()?;
235 let dat = self.pack_dat.as_ref()?;
236 let hash_bytes = hex_to_bytes(hash_hex).ok()?;
237
238 let mut lo = 0usize;
240 let mut hi = self.pack_entry_count;
241 while lo < hi {
242 let mid = lo + (hi - lo) / 2;
243 let pos = mid * TREE_IDX_ENTRY_SIZE;
244 let mid_hash = &idx[pos..pos + 16];
245 match mid_hash.cmp(&hash_bytes) {
246 std::cmp::Ordering::Equal => {
247 let offset = u64::from_le_bytes(idx[pos + 16..pos + 24].try_into().unwrap());
248 let size = u64::from_le_bytes(idx[pos + 24..pos + 32].try_into().unwrap());
249 let data_start = offset as usize + 16 + 8;
250 let data_end = data_start + size as usize;
251 if data_end > dat.len() {
252 return None;
253 }
254 return Some(dat[data_start..data_end].to_vec());
255 }
256 std::cmp::Ordering::Less => lo = mid + 1,
257 std::cmp::Ordering::Greater => hi = mid,
258 }
259 }
260 None
261 }
262}