spacetimedb_fs_utils/dir_trie.rs
1//! Shallow byte-partitied directory tries.
2//!
3//! This is an implementation of the same on-disk data structure as
4//! [the git object store](https://git-scm.com/book/en/v2/Git-Internals-Git-Objects),
5//! used for storing objects in an object store keyed on a content-addressed 32-byte hash.
6//!
7//! Objects' location in the trie is computed based on the (64 character) hexadecimal encoding of their (32 byte) Blake3 hash.
8//! The leading 2 digits of this hash are the name of the directory,
9//! and the remaining 62 digits are the name of the file itself.
10//!
11//! Storing files in this way is beneficial because directories internally are unsorted arrays of file entries,
12//! so searching for a file within a directory is O(directory_size).
13//! The trie structure implemented here is still O(n), but with a drastically reduced constant factor (1/128th),
14//! which we expect to shrink the linear lookups to an acceptable size.
15
16use crate::compression::{AsyncCompressReader, CompressReader};
17use std::fs::File;
18use std::io::BufWriter;
19use std::{
20 fs::{create_dir_all, OpenOptions},
21 io::{self, Read, Write},
22 path::{Path, PathBuf},
23};
24
25/// [`OpenOptions`] corresponding to opening a file with `O_EXCL`,
26/// i.e. creating a new writeable file, failing if it already exists.
27pub fn o_excl() -> OpenOptions {
28 let mut options = OpenOptions::new();
29 options.create_new(true).write(true);
30 options
31}
32
33/// [`OpenOptions`] corresponding to opening a file with `O_RDONLY`,
34/// i.e. opening an existing file for reading.
35pub fn o_rdonly() -> OpenOptions {
36 let mut options = OpenOptions::new();
37 options.read(true);
38 options
39}
40
41/// [`OpenOptions`] corresponding to opening a file with `O_RDONLY`,
42/// i.e. opening an existing file for reading.
43pub fn o_rdonly_async() -> tokio::fs::OpenOptions {
44 let mut options = tokio::fs::OpenOptions::new();
45 options.read(true);
46 options
47}
48
49/// Counter for objects written newly to disk versus hardlinked,
50/// for diagnostic purposes with operations that may hardlink or write.
51///
52/// See [`DirTrie::hardlink_or_write`].
53#[derive(Default, Debug)]
54pub struct CountCreated {
55 pub objects_written: u64,
56 pub objects_hardlinked: u64,
57}
58
59/// A directory trie.
60pub struct DirTrie {
61 /// The directory name at which the dir trie is stored.
62 root: PathBuf,
63}
64
65const FILE_ID_BYTES: usize = 32;
66const FILE_ID_HEX_CHARS: usize = FILE_ID_BYTES * 2;
67
68type FileId = [u8; FILE_ID_BYTES];
69
70/// The number of leading hex chars taken from a `FileId` as the subdirectory name.
71const DIR_HEX_CHARS: usize = 2;
72
73impl DirTrie {
74 /// Returns the root of this `DirTrie` on disk.
75 pub fn root(&self) -> &Path {
76 &self.root
77 }
78
79 /// Open the directory trie at `root`, creating the root directory if it doesn't exist.
80 ///
81 /// Returns an error if the `root` cannot be created as a directory.
82 /// See documentation on [`create_dir_all`] for more details.
83 pub fn open(root: PathBuf) -> Result<Self, io::Error> {
84 create_dir_all(&root)?;
85 Ok(Self { root })
86 }
87
88 pub fn file_path(&self, file_id: &FileId) -> PathBuf {
89 // TODO(perf, bikeshedding): avoid allocating a `String`.
90 let file_id_hex = hex::encode(file_id);
91
92 let mut file_path = self.root.clone();
93 // Two additional chars for slashes.
94 file_path.reserve(FILE_ID_HEX_CHARS + 2);
95
96 // The path will look like `root/xx/yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy`,
97 // where `xx` are the leading hex characters of the file id,
98 // and the `y`s are the remaining 62 hex characters.
99 file_path.push(&file_id_hex[..DIR_HEX_CHARS]);
100 file_path.push(&file_id_hex[DIR_HEX_CHARS..]);
101
102 file_path
103 }
104
105 /// Hardlink the entry for `file_id` from `src_repo` into `self`.
106 ///
107 /// Hardlinking makes the object shared between both [`DirTrie`]s
108 /// without copying the data on-disk.
109 /// Note that this is only possible within a single file system;
110 /// this method will likely return an error if `self` and `src_repo`
111 /// are in different file systems.
112 /// See [Wikipedia](https://en.wikipedia.org/wiki/Hard_link) for more information on hard links.
113 ///
114 /// Returns `Ok(true)` if the `file_id` existed in `src_repo` and was successfully linked into `self`,
115 /// `Ok(false)` if the `file_id` did not exist in `src_repo`,
116 /// or an `Err` if a filesystem operation failed.
117 ///
118 /// The object's hash is not verified against its `file_id`,
119 /// so if `file_id` is corrupted within `src_repo`,
120 /// the corrupted object will be hardlinked into `self`.
121 pub fn try_hardlink_from(&self, src_repo: &DirTrie, file_id: &FileId) -> Result<bool, io::Error> {
122 let src_file = src_repo.file_path(file_id);
123 if src_file.is_file() {
124 let dst_file = self.file_path(file_id);
125 Self::create_parent(&dst_file)?;
126 std::fs::hard_link(src_file, dst_file)?;
127 Ok(true)
128 } else {
129 Ok(false)
130 }
131 }
132
133 /// True if `file_id` is the key for an object present in this trie.
134 ///
135 /// Internally calls [`Path::is_file`]; see documentation on that method for detailed semantics.
136 pub fn contains_entry(&self, file_id: &FileId) -> bool {
137 let path = self.file_path(file_id);
138
139 path.is_file()
140 }
141
142 fn create_parent(file: &Path) -> Result<(), io::Error> {
143 // Known to succeed because `self.file_path` creates a path with a parent.
144 let dir = file.parent().unwrap();
145
146 create_dir_all(dir)?;
147 Ok(())
148 }
149
150 /// Hardlink `file_id` from `src_repo` into `self`, or create it in `self` containing `contents`.
151 ///
152 /// `contents` is a thunk which will be called only if the `src_repo` does not contain `file_id`
153 /// in order to compute the file contents.
154 /// This allows callers to avoid expensive serialization if the object already exists in `src_repo`.
155 ///
156 /// If the source file exists but the hardlink operation fails, this method returns an error.
157 /// In this case, the destination file is not created.
158 /// See [`Self::try_hardlink_from`].
159 pub fn hardlink_or_write<Bytes: AsRef<[u8]>>(
160 &self,
161 src_repo: Option<&DirTrie>,
162 file_id: &FileId,
163 contents: impl FnOnce() -> Bytes,
164 counter: &mut CountCreated,
165 ) -> Result<(), io::Error> {
166 if self.contains_entry(file_id) {
167 return Ok(());
168 }
169
170 if let Some(src_repo) = src_repo {
171 if self.try_hardlink_from(src_repo, file_id)? {
172 counter.objects_hardlinked += 1;
173 return Ok(());
174 }
175 }
176
177 let mut file = self.open_entry_writer(file_id)?;
178 let contents = contents();
179 file.write_all(contents.as_ref())?;
180 file.flush()?;
181 counter.objects_written += 1;
182 Ok(())
183 }
184
185 /// Open the file keyed with `file_id` for reading.
186 ///
187 /// It will be decompressed based on the file's magic bytes.
188 ///
189 /// It will be opened with [`o_rdonly`].
190 pub fn open_entry_reader(&self, file_id: &FileId) -> Result<CompressReader, io::Error> {
191 let path = self.file_path(file_id);
192 Self::create_parent(&path)?;
193 CompressReader::new(o_rdonly().open(path)?)
194 }
195
196 /// Open the file keyed with `file_id` for reading.
197 ///
198 /// It will be decompressed based on the file's magic bytes.
199 ///
200 /// It will be opened with [`o_rdonly_async`].
201 pub async fn open_entry_reader_async(
202 &self,
203 file_id: &FileId,
204 ) -> Result<AsyncCompressReader<tokio::fs::File>, io::Error> {
205 let path = self.file_path(file_id);
206 Self::create_parent(&path)?;
207 AsyncCompressReader::new(o_rdonly_async().open(path).await?).await
208 }
209
210 /// Open the file keyed with `file_id` for writing.
211 ///
212 /// If `compress_type` is [`CompressType::None`], the file will be written uncompressed.
213 ///
214 /// The file will be opened with [`o_excl`].
215 pub fn open_entry_writer(&self, file_id: &FileId) -> Result<BufWriter<File>, io::Error> {
216 let path = self.file_path(file_id);
217 Self::create_parent(&path)?;
218 Ok(BufWriter::new(o_excl().open(path)?))
219 }
220
221 /// Open the entry keyed with `file_id` and read it into a `Vec<u8>`.
222 pub fn read_entry(&self, file_id: &FileId) -> Result<Vec<u8>, io::Error> {
223 let mut file = self.open_entry_reader(file_id)?;
224 let mut buf = Vec::with_capacity(file.file_size()?);
225 // TODO(perf): Async IO?
226 file.read_to_end(&mut buf)?;
227 Ok(buf)
228 }
229}
230
231#[cfg(test)]
232mod test {
233 use super::*;
234 use std::io::Read;
235
236 const TEST_ID: FileId = [0xa5; FILE_ID_BYTES];
237 const TEST_STRING: &[u8] = b"test string";
238
239 fn with_test_dir_trie(f: impl FnOnce(DirTrie)) {
240 let root = tempdir::TempDir::new("test_dir_trie").unwrap();
241 let trie = DirTrie::open(root.path().to_path_buf()).unwrap();
242 f(trie)
243 }
244
245 /// Write the [`TEST_STRING`] into the entry [`TEST_ID`].
246 fn write_test_string(trie: &DirTrie) {
247 let mut file = trie.open_entry_writer(&TEST_ID).unwrap();
248 file.write_all(TEST_STRING).unwrap();
249 }
250
251 /// Read the entry [`TEST_ID`] and assert that its contents match the [`TEST_STRING`].
252 fn read_test_string(trie: &DirTrie) {
253 let mut file = trie.open_entry_reader(&TEST_ID).unwrap();
254 let mut contents = Vec::new();
255 file.read_to_end(&mut contents).unwrap();
256 assert_eq!(&contents, TEST_STRING);
257 }
258
259 #[test]
260 fn create_retrieve() {
261 with_test_dir_trie(|trie| {
262 // The trie starts empty, so it doesn't contain the `TEST_ID`'s file.
263 assert!(!trie.contains_entry(&TEST_ID));
264
265 // Create an entry in the trie and write some data to it.
266 write_test_string(&trie);
267
268 // The trie now has that entry.
269 assert!(trie.contains_entry(&TEST_ID));
270
271 // Open the entry and read its data back.
272 read_test_string(&trie);
273 })
274 }
275
276 #[test]
277 fn hardlink() {
278 with_test_dir_trie(|src| {
279 with_test_dir_trie(|dst| {
280 // Both tries starts empty, so they don't contain the `TEST_ID`'s file.
281 assert!(!src.contains_entry(&TEST_ID));
282 assert!(!dst.contains_entry(&TEST_ID));
283
284 // Create an entry in `src` and write some data to it.
285 write_test_string(&src);
286
287 // The `src` now contains the entry, but the `dst` still doesn't.
288 assert!(src.contains_entry(&TEST_ID));
289 assert!(!dst.contains_entry(&TEST_ID));
290
291 // Hardlink the entry from `src` into `dst`.
292 assert!(dst.try_hardlink_from(&src, &TEST_ID).unwrap());
293
294 // After hardlinking, the file is now in `dst`.
295 assert!(dst.contains_entry(&TEST_ID));
296 // Open the entry in `dst` and read its data back.
297 read_test_string(&dst);
298
299 // The file is still also in `src`, and its data hasn't changed.
300 assert!(src.contains_entry(&TEST_ID));
301 read_test_string(&src);
302 })
303 })
304 }
305
306 #[test]
307 fn open_options() {
308 with_test_dir_trie(|trie| {
309 // The trie starts empty, so it doesn't contain the `TEST_ID`'s file.
310 assert!(!trie.contains_entry(&TEST_ID));
311
312 // Because the file isn't there, we can't open it.
313 assert!(trie.open_entry_reader(&TEST_ID).is_err());
314
315 // Create an entry in the trie and write some data to it.
316 write_test_string(&trie);
317
318 // The trie now has that entry.
319 assert!(trie.contains_entry(&TEST_ID));
320
321 // Because the file is there, we can't create it.
322 assert!(trie.open_entry_writer(&TEST_ID).is_err());
323 })
324 }
325}