mmap_cache/
lib.rs

1//! A low-level API for a memory-mapped cache of a read-only key-value store.
2//!
3//! ## Design
4//!
5//! The [`Cache`] index is an [`fst::Map`], which maps from arbitrary byte sequences to [`u64`]. We use the [`u64`] as byte
6//! offsets into a memory-mapped file that stores arbitrary values. This read-only cache is very compact and simple to
7//! construct, requiring only constant memory for serializing arbitrarily large maps.
8//!
9//! By using Finite State Transducers from the [`fst`] crate, we get a highly compressed mapping that performs key --> offset
10//! lookups in O(key length) time.
11//!
12//! ## Example
13//!
14//! ```
15//! # use mmap_cache::Error;
16//! # fn example() -> Result<(), Error> {
17//! use mmap_cache::{FileBuilder, MmapCache};
18//!
19//! const INDEX_PATH: &str = "/tmp/mmap_cache_index";
20//! const VALUES_PATH: &str = "/tmp/mmap_cache_values";
21//!
22//! // Serialize to files. As required by the finite state transducer (FST) builder,
23//! // keys must be provided in sorted (lexicographical) order.
24//! let mut builder = FileBuilder::create_files(INDEX_PATH, VALUES_PATH)?;
25//! builder.insert(b"abc", b"def")?;
26//! builder.insert(b"foo", b"bar")?;
27//! builder.finish()?;
28//!
29//! // Map the files into memory.
30//! let cache = unsafe { MmapCache::map_paths(INDEX_PATH, VALUES_PATH) }?;
31//! let value = unsafe { cache.get_transmuted_value(b"foo") };
32//! assert_eq!(value, Some(b"bar"));
33//! # Ok(())
34//! # }
35//! # example().unwrap();
36//! ```
37//!
38//! ## IO Concurrency
39//!
40//! When using [`memmap2`] on a large file, it's likely that accessing values from the cache will cause the thread to block in
41//! the operating system scheduler while the page cache is filled from the file system. To achieve concurrency IO up to some
42//! maximum concurrency N, you could dispatch your IOs in a thread pool of N threads.
43
44mod builder;
45mod cache;
46mod error;
47
48pub use builder::*;
49pub use cache::*;
50pub use error::*;
51
52pub use fst;
53pub use memmap2;
54
55#[cfg(test)]
56mod tests {
57    use super::*;
58
59    use bytemuck::cast_slice;
60    use fst::{IntoStreamer, Streamer};
61
62    #[test]
63    fn serialize_and_read_range() {
64        serialize_example();
65
66        let cache = unsafe { MmapCache::map_paths(INDEX_PATH, VALUES_PATH) }.unwrap();
67        let dog: &[u8] = b"dog";
68        let gator: &[u8] = b"gator";
69        let mut stream = cache.range(dog..=gator).into_stream();
70        let mut key_values = Vec::new();
71        while let Some((key, offset)) = stream.next() {
72            let offset = offset as usize;
73            let value: &[i32; 3] = unsafe { std::mem::transmute(&cache.value_bytes()[offset]) };
74            key_values.push((key.to_vec(), value));
75        }
76
77        assert_eq!(
78            key_values,
79            [
80                (b"dog".to_vec(), &PAIRS[1].1),
81                (b"doggy".to_vec(), &PAIRS[2].1),
82                (b"frog".to_vec(), &PAIRS[3].1)
83            ]
84        );
85    }
86
87    #[test]
88    fn key_lookups() {
89        serialize_example();
90
91        let cache = unsafe { MmapCache::map_paths(INDEX_PATH, VALUES_PATH) }.unwrap();
92
93        let (first_key, first_offset) = cache.first().unwrap();
94        assert_eq!(&first_key, b"cat");
95        assert_eq!(first_offset, 0);
96
97        let (last_key, last_offset) = cache.last().unwrap();
98        assert_eq!(&last_key, b"goose");
99        assert_eq!(last_offset, 48);
100
101        // Equal.
102        let (le_key, le_offset) = cache.last_le::<4>(b"frog").unwrap();
103        assert_eq!(&le_key, b"frog");
104        assert_eq!(le_offset, 36);
105
106        // Lesser, same length.
107        let (le_key, le_offset) = cache.last_le::<4>(b"full").unwrap();
108        assert_eq!(&le_key, b"frog");
109        assert_eq!(le_offset, 36);
110
111        // Lesser, same length, different starting letter.
112        let (le_key, le_offset) = cache.last_le::<4>(b"goon").unwrap();
113        assert_eq!(&le_key, b"frog");
114        assert_eq!(le_offset, 36);
115
116        // Lesser, longer.
117        let (le_key, le_offset) = cache.last_le::<4>(b"goony").unwrap();
118        assert_eq!(&le_key, b"frog");
119        assert_eq!(le_offset, 36);
120
121        // Lesser, longer, superstring.
122        let (le_key, le_offset) = cache.last_le::<3>(b"doge").unwrap();
123        assert_eq!(le_key.as_ref(), b"dog");
124        assert_eq!(le_offset, 12);
125
126        // Lesser, same length, substring matches greater key.
127        let (le_key, le_offset) = cache.last_le::<4>(b"goos").unwrap();
128        assert_eq!(&le_key, b"frog");
129        assert_eq!(le_offset, 36);
130
131        // Lesser, shorter.
132        let (le_key, le_offset) = cache.last_le::<4>(b"fry").unwrap();
133        assert_eq!(&le_key, b"frog");
134        assert_eq!(le_offset, 36);
135        let (le_key, le_offset) = cache.last_le::<3>(b"do").unwrap();
136        assert_eq!(le_key.as_ref(), b"cat");
137        assert_eq!(le_offset, 0);
138        let (le_key, le_offset) = cache.last_le::<5>(b"food").unwrap();
139        assert_eq!(le_key.as_ref(), b"doggy");
140        assert_eq!(le_offset, 24);
141
142        // Lesser, shorter, substring matches greater key.
143        let (le_key, le_offset) = cache.last_le::<5>(b"fro").unwrap();
144        assert_eq!(&le_key, b"doggy");
145        assert_eq!(le_offset, 24);
146
147        // No LE keys.
148        let result = cache.last_le::<4>(b"candy");
149        assert_eq!(result, None);
150    }
151
152    const INDEX_PATH: &str = "/tmp/mmap_cache_index";
153    const VALUES_PATH: &str = "/tmp/mmap_cache_values";
154
155    const PAIRS: [(&[u8], [i32; 3]); 5] = [
156        (b"cat", [1, 2, 3]),
157        (b"dog", [2, 3, 4]),
158        (b"doggy", [3, 4, 5]),
159        (b"frog", [4, 5, 6]),
160        (b"goose", [5, 6, 7]),
161    ];
162
163    fn serialize_example() {
164        let mut builder = FileBuilder::create_files(INDEX_PATH, VALUES_PATH).unwrap();
165        for (key, value) in PAIRS {
166            builder.insert(key, cast_slice(&value)).unwrap();
167        }
168        builder.finish().unwrap();
169    }
170}