spacetimedb_fs_utils/
dir_trie.rs

1//! Shallow byte-partitied directory tries.
2//!
3//! This is an implementation of the same on-disk data structure as
4//! [the git object store](https://git-scm.com/book/en/v2/Git-Internals-Git-Objects),
5//! used for storing objects in an object store keyed on a content-addressed 32-byte hash.
6//!
7//! Objects' location in the trie is computed based on the (64 character) hexadecimal encoding of their (32 byte) Blake3 hash.
8//! The leading 2 digits of this hash are the name of the directory,
9//! and the remaining 62 digits are the name of the file itself.
10//!
11//! Storing files in this way is beneficial because directories internally are unsorted arrays of file entries,
12//! so searching for a file within a directory is O(directory_size).
13//! The trie structure implemented here is still O(n), but with a drastically reduced constant factor (1/128th),
14//! which we expect to shrink the linear lookups to an acceptable size.
15
16use crate::compression::{AsyncCompressReader, CompressReader};
17use std::fs::File;
18use std::io::BufWriter;
19use std::{
20    fs::{create_dir_all, OpenOptions},
21    io::{self, Read, Write},
22    path::{Path, PathBuf},
23};
24
25/// [`OpenOptions`] corresponding to opening a file with `O_EXCL`,
26/// i.e. creating a new writeable file, failing if it already exists.
27pub fn o_excl() -> OpenOptions {
28    let mut options = OpenOptions::new();
29    options.create_new(true).write(true);
30    options
31}
32
33/// [`OpenOptions`] corresponding to opening a file with `O_RDONLY`,
34/// i.e. opening an existing file for reading.
35pub fn o_rdonly() -> OpenOptions {
36    let mut options = OpenOptions::new();
37    options.read(true);
38    options
39}
40
41/// [`OpenOptions`] corresponding to opening a file with `O_RDONLY`,
42/// i.e. opening an existing file for reading.
43pub fn o_rdonly_async() -> tokio::fs::OpenOptions {
44    let mut options = tokio::fs::OpenOptions::new();
45    options.read(true);
46    options
47}
48
49/// Counter for objects written newly to disk versus hardlinked,
50/// for diagnostic purposes with operations that may hardlink or write.
51///
52/// See [`DirTrie::hardlink_or_write`].
53#[derive(Default, Debug)]
54pub struct CountCreated {
55    pub objects_written: u64,
56    pub objects_hardlinked: u64,
57}
58
59/// A directory trie.
60pub struct DirTrie {
61    /// The directory name at which the dir trie is stored.
62    root: PathBuf,
63}
64
65const FILE_ID_BYTES: usize = 32;
66const FILE_ID_HEX_CHARS: usize = FILE_ID_BYTES * 2;
67
68type FileId = [u8; FILE_ID_BYTES];
69
70/// The number of leading hex chars taken from a `FileId` as the subdirectory name.
71const DIR_HEX_CHARS: usize = 2;
72
73impl DirTrie {
74    /// Returns the root of this `DirTrie` on disk.
75    pub fn root(&self) -> &Path {
76        &self.root
77    }
78
79    /// Open the directory trie at `root`, creating the root directory if it doesn't exist.
80    ///
81    /// Returns an error if the `root` cannot be created as a directory.
82    /// See documentation on [`create_dir_all`] for more details.
83    pub fn open(root: PathBuf) -> Result<Self, io::Error> {
84        create_dir_all(&root)?;
85        Ok(Self { root })
86    }
87
88    pub fn file_path(&self, file_id: &FileId) -> PathBuf {
89        // TODO(perf, bikeshedding): avoid allocating a `String`.
90        let file_id_hex = hex::encode(file_id);
91
92        let mut file_path = self.root.clone();
93        // Two additional chars for slashes.
94        file_path.reserve(FILE_ID_HEX_CHARS + 2);
95
96        // The path will look like `root/xx/yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy`,
97        // where `xx` are the leading hex characters of the file id,
98        // and the `y`s are the remaining 62 hex characters.
99        file_path.push(&file_id_hex[..DIR_HEX_CHARS]);
100        file_path.push(&file_id_hex[DIR_HEX_CHARS..]);
101
102        file_path
103    }
104
105    /// Hardlink the entry for `file_id` from `src_repo` into `self`.
106    ///
107    /// Hardlinking makes the object shared between both [`DirTrie`]s
108    /// without copying the data on-disk.
109    /// Note that this is only possible within a single file system;
110    /// this method will likely return an error if `self` and `src_repo`
111    /// are in different file systems.
112    /// See [Wikipedia](https://en.wikipedia.org/wiki/Hard_link) for more information on hard links.
113    ///
114    /// Returns `Ok(true)` if the `file_id` existed in `src_repo` and was successfully linked into `self`,
115    /// `Ok(false)` if the `file_id` did not exist in `src_repo`,
116    /// or an `Err` if a filesystem operation failed.
117    ///
118    /// The object's hash is not verified against its `file_id`,
119    /// so if `file_id` is corrupted within `src_repo`,
120    /// the corrupted object will be hardlinked into `self`.
121    pub fn try_hardlink_from(&self, src_repo: &DirTrie, file_id: &FileId) -> Result<bool, io::Error> {
122        let src_file = src_repo.file_path(file_id);
123        if src_file.is_file() {
124            let dst_file = self.file_path(file_id);
125            Self::create_parent(&dst_file)?;
126            std::fs::hard_link(src_file, dst_file)?;
127            Ok(true)
128        } else {
129            Ok(false)
130        }
131    }
132
133    /// True if `file_id` is the key for an object present in this trie.
134    ///
135    /// Internally calls [`Path::is_file`]; see documentation on that method for detailed semantics.
136    pub fn contains_entry(&self, file_id: &FileId) -> bool {
137        let path = self.file_path(file_id);
138
139        path.is_file()
140    }
141
142    fn create_parent(file: &Path) -> Result<(), io::Error> {
143        // Known to succeed because `self.file_path` creates a path with a parent.
144        let dir = file.parent().unwrap();
145
146        create_dir_all(dir)?;
147        Ok(())
148    }
149
150    /// Hardlink `file_id` from `src_repo` into `self`, or create it in `self` containing `contents`.
151    ///
152    /// `contents` is a thunk which will be called only if the `src_repo` does not contain `file_id`
153    /// in order to compute the file contents.
154    /// This allows callers to avoid expensive serialization if the object already exists in `src_repo`.
155    ///
156    /// If the source file exists but the hardlink operation fails, this method returns an error.
157    /// In this case, the destination file is not created.
158    /// See [`Self::try_hardlink_from`].
159    pub fn hardlink_or_write<Bytes: AsRef<[u8]>>(
160        &self,
161        src_repo: Option<&DirTrie>,
162        file_id: &FileId,
163        contents: impl FnOnce() -> Bytes,
164        counter: &mut CountCreated,
165    ) -> Result<(), io::Error> {
166        if self.contains_entry(file_id) {
167            return Ok(());
168        }
169
170        if let Some(src_repo) = src_repo {
171            if self.try_hardlink_from(src_repo, file_id)? {
172                counter.objects_hardlinked += 1;
173                return Ok(());
174            }
175        }
176
177        let mut file = self.open_entry_writer(file_id)?;
178        let contents = contents();
179        file.write_all(contents.as_ref())?;
180        file.flush()?;
181        counter.objects_written += 1;
182        Ok(())
183    }
184
185    /// Open the file keyed with `file_id` for reading.
186    ///
187    /// It will be decompressed based on the file's magic bytes.
188    ///
189    /// It will be opened with [`o_rdonly`].
190    pub fn open_entry_reader(&self, file_id: &FileId) -> Result<CompressReader, io::Error> {
191        let path = self.file_path(file_id);
192        Self::create_parent(&path)?;
193        CompressReader::new(o_rdonly().open(path)?)
194    }
195
196    /// Open the file keyed with `file_id` for reading.
197    ///
198    /// It will be decompressed based on the file's magic bytes.
199    ///
200    /// It will be opened with [`o_rdonly_async`].
201    pub async fn open_entry_reader_async(
202        &self,
203        file_id: &FileId,
204    ) -> Result<AsyncCompressReader<tokio::fs::File>, io::Error> {
205        let path = self.file_path(file_id);
206        Self::create_parent(&path)?;
207        AsyncCompressReader::new(o_rdonly_async().open(path).await?).await
208    }
209
210    /// Open the file keyed with `file_id` for writing.
211    ///
212    /// If `compress_type` is [`CompressType::None`], the file will be written uncompressed.
213    ///
214    /// The file will be opened with [`o_excl`].
215    pub fn open_entry_writer(&self, file_id: &FileId) -> Result<BufWriter<File>, io::Error> {
216        let path = self.file_path(file_id);
217        Self::create_parent(&path)?;
218        Ok(BufWriter::new(o_excl().open(path)?))
219    }
220
221    /// Open the entry keyed with `file_id` and read it into a `Vec<u8>`.
222    pub fn read_entry(&self, file_id: &FileId) -> Result<Vec<u8>, io::Error> {
223        let mut file = self.open_entry_reader(file_id)?;
224        let mut buf = Vec::with_capacity(file.file_size()?);
225        // TODO(perf): Async IO?
226        file.read_to_end(&mut buf)?;
227        Ok(buf)
228    }
229}
230
231#[cfg(test)]
232mod test {
233    use super::*;
234    use std::io::Read;
235
236    const TEST_ID: FileId = [0xa5; FILE_ID_BYTES];
237    const TEST_STRING: &[u8] = b"test string";
238
239    fn with_test_dir_trie(f: impl FnOnce(DirTrie)) {
240        let root = tempdir::TempDir::new("test_dir_trie").unwrap();
241        let trie = DirTrie::open(root.path().to_path_buf()).unwrap();
242        f(trie)
243    }
244
245    /// Write the [`TEST_STRING`] into the entry [`TEST_ID`].
246    fn write_test_string(trie: &DirTrie) {
247        let mut file = trie.open_entry_writer(&TEST_ID).unwrap();
248        file.write_all(TEST_STRING).unwrap();
249    }
250
251    /// Read the entry [`TEST_ID`] and assert that its contents match the [`TEST_STRING`].
252    fn read_test_string(trie: &DirTrie) {
253        let mut file = trie.open_entry_reader(&TEST_ID).unwrap();
254        let mut contents = Vec::new();
255        file.read_to_end(&mut contents).unwrap();
256        assert_eq!(&contents, TEST_STRING);
257    }
258
259    #[test]
260    fn create_retrieve() {
261        with_test_dir_trie(|trie| {
262            // The trie starts empty, so it doesn't contain the `TEST_ID`'s file.
263            assert!(!trie.contains_entry(&TEST_ID));
264
265            // Create an entry in the trie and write some data to it.
266            write_test_string(&trie);
267
268            // The trie now has that entry.
269            assert!(trie.contains_entry(&TEST_ID));
270
271            // Open the entry and read its data back.
272            read_test_string(&trie);
273        })
274    }
275
276    #[test]
277    fn hardlink() {
278        with_test_dir_trie(|src| {
279            with_test_dir_trie(|dst| {
280                // Both tries starts empty, so they don't contain the `TEST_ID`'s file.
281                assert!(!src.contains_entry(&TEST_ID));
282                assert!(!dst.contains_entry(&TEST_ID));
283
284                // Create an entry in `src` and write some data to it.
285                write_test_string(&src);
286
287                // The `src` now contains the entry, but the `dst` still doesn't.
288                assert!(src.contains_entry(&TEST_ID));
289                assert!(!dst.contains_entry(&TEST_ID));
290
291                // Hardlink the entry from `src` into `dst`.
292                assert!(dst.try_hardlink_from(&src, &TEST_ID).unwrap());
293
294                // After hardlinking, the file is now in `dst`.
295                assert!(dst.contains_entry(&TEST_ID));
296                // Open the entry in `dst` and read its data back.
297                read_test_string(&dst);
298
299                // The file is still also in `src`, and its data hasn't changed.
300                assert!(src.contains_entry(&TEST_ID));
301                read_test_string(&src);
302            })
303        })
304    }
305
306    #[test]
307    fn open_options() {
308        with_test_dir_trie(|trie| {
309            // The trie starts empty, so it doesn't contain the `TEST_ID`'s file.
310            assert!(!trie.contains_entry(&TEST_ID));
311
312            // Because the file isn't there, we can't open it.
313            assert!(trie.open_entry_reader(&TEST_ID).is_err());
314
315            // Create an entry in the trie and write some data to it.
316            write_test_string(&trie);
317
318            // The trie now has that entry.
319            assert!(trie.contains_entry(&TEST_ID));
320
321            // Because the file is there, we can't create it.
322            assert!(trie.open_entry_writer(&TEST_ID).is_err());
323        })
324    }
325}