mmap_cache/
cache.rs

1use crate::Error;
2
3use fst::raw::Node;
4use fst::raw::Transition;
5use fst::Streamer;
6use memmap2::Mmap;
7use std::cmp::Ordering;
8use std::fs;
9use std::ops::{Bound, RangeBounds};
10use std::path::Path;
11
12/// A cache, mapping `[u8]` keys to `[u8]` values.
13///
14/// This cache wraps generic byte storage that implements `AsRef<[u8]>`. This is most commonly a memory-mapped file, [`Mmap`].
15///
16/// For serializing a stream of (key, value) pairs, see [`Builder`](crate::Builder).
17pub struct Cache<DK, DV> {
18    index: fst::Map<DK>,
19    value_bytes: DV,
20}
21
22impl<DK, DV> Cache<DK, DV>
23where
24    DK: AsRef<[u8]>,
25    DV: AsRef<[u8]>,
26{
27    pub fn new(index_bytes: DK, value_bytes: DV) -> Result<Self, Error> {
28        Ok(Self {
29            index: fst::Map::new(index_bytes)?,
30            value_bytes,
31        })
32    }
33
34    /// Access the internal [`fst::Map`] used for mapping keys to value offsets.
35    pub fn index(&self) -> &fst::Map<DK> {
36        &self.index
37    }
38
39    /// The entire byte slice storing all values.
40    pub fn value_bytes(&self) -> &[u8] {
41        self.value_bytes.as_ref()
42    }
43
44    /// Returns the byte offset of the value for `key`, if it exists.
45    ///
46    /// The returned offset can be used with the `value_at_offset` method.
47    pub fn get_value_offset(&self, key: &[u8]) -> Option<u64> {
48        self.index.get(key)
49    }
50
51    /// Transmutes the bytes starting at `offset` into a `T` reference.
52    ///
53    /// # Safety
54    ///
55    /// `offset` must point to a valid representation of `T` in the `value_bytes` region of memory.
56    pub unsafe fn offset_transmuted_value<T>(&self, offset: usize) -> &T {
57        std::mem::transmute(&self.value_bytes()[offset])
58    }
59
60    /// Transmutes the bytes pointed to by `key` (if any) into a `T` reference.
61    ///
62    /// # Safety
63    ///
64    /// `key` must point to a valid representation of `T` in the `value_bytes` region of memory.
65    pub unsafe fn get_transmuted_value<T>(&self, key: &[u8]) -> Option<&T> {
66        self.get_value_offset(key)
67            .map(|offset| self.offset_transmuted_value(offset.try_into().unwrap()))
68    }
69
70    /// Returns a streaming iterator over (key, value offset) pairs.
71    ///
72    /// The offset is a byte offset pointing to the start of the value for that key.
73    pub fn range<K, R>(&self, key_range: R) -> fst::map::StreamBuilder
74    where
75        K: AsRef<[u8]>,
76        R: RangeBounds<K>,
77    {
78        let builder = self.index.range();
79        let builder = match key_range.start_bound() {
80            Bound::Unbounded => builder,
81            Bound::Excluded(b) => builder.gt(b),
82            Bound::Included(b) => builder.ge(b),
83        };
84        match key_range.end_bound() {
85            Bound::Unbounded => builder,
86            Bound::Excluded(b) => builder.lt(b),
87            Bound::Included(b) => builder.le(b),
88        }
89    }
90
91    /// Returns the (lexicographical) first (key, value) pair.
92    ///
93    /// # Panics
94    ///
95    /// If the actual first key is longer than `N`.
96    pub fn first<const N: usize>(&self) -> Option<([u8; N], u64)> {
97        self.index.stream().next().map(|(k, offset)| {
98            let mut key = [0; N];
99            key.copy_from_slice(k);
100            (key, offset)
101        })
102    }
103
104    /// Returns the (lexicographical) last (key, value) pair.
105    ///
106    /// # Panics
107    ///
108    /// If the actual last key is longer than `N`.
109    pub fn last<const N: usize>(&self) -> Option<([u8; N], u64)> {
110        let raw = self.index.as_fst();
111        let mut key = [0; N];
112        let mut n = raw.root();
113        let mut i = 0;
114        let mut offset = 0;
115        while !n.is_final() || !n.is_empty() {
116            let last = n.transition(n.len() - 1);
117            key[i] = last.inp;
118            n = raw.node(last.addr);
119            i += 1;
120            offset += last.out.value();
121        }
122        (i == N).then(|| (key, offset))
123    }
124
125    /// Finds the (lexicographical) greatest key `k` such that `k <= upper_bound`.
126    ///
127    /// # Panics
128    ///
129    /// If the found key is longer than `N`.
130    pub fn last_le<const N: usize>(&self, upper_bound: &[u8]) -> Option<([u8; N], u64)> {
131        let raw = self.index.as_fst();
132        let mut key = [0; N];
133        let offset = self.last_le_recursive(raw, upper_bound, LastLeSearch::initial(raw), &mut key);
134        offset.map(|o| (key, o))
135    }
136
137    fn last_le_recursive<const N: usize>(
138        &self,
139        raw: &fst::raw::Fst<DK>,
140        upper_bound: &[u8],
141        state: LastLeSearch,
142        key: &mut [u8; N],
143    ) -> Option<u64> {
144        if let Ordering::Greater = state.parent_ordering {
145            return None;
146        }
147
148        let le_found = if !state.node.is_empty() {
149            match state.parent_ordering {
150                Ordering::Greater => unreachable!(),
151                Ordering::Equal => {
152                    if state.byte_i < upper_bound.len() {
153                        // We need to backtrack if the least terminal key is GREATER than upper_bound.
154                        find_last_le_transition(state.node, upper_bound[state.byte_i]).and_then(
155                            |(t_i, t)| {
156                                key[state.byte_i] = t.inp;
157                                let next_state = state.next(raw, upper_bound, t);
158                                self.last_le_recursive(raw, upper_bound, next_state, key)
159                                    .or_else(|| {
160                                        // Backtrack. We should only need to move to the next greatest key.
161                                        if t_i > 0 {
162                                            let t = state.node.transition(t_i - 1);
163                                            key[state.byte_i] = t.inp;
164                                            let next_state =
165                                                state.next_with_ordering(raw, t, Ordering::Less);
166                                            self.last_le_recursive(
167                                                raw,
168                                                upper_bound,
169                                                next_state,
170                                                key,
171                                            )
172                                        } else {
173                                            None
174                                        }
175                                    })
176                            },
177                        )
178                    } else {
179                        None
180                    }
181                }
182                Ordering::Less => {
183                    // We're already LESS, so just take the greatest key we can find.
184                    let t = state.node.transition(state.node.len() - 1);
185                    key[state.byte_i] = t.inp;
186                    let next_state = state.next_with_ordering(raw, t, Ordering::Less);
187                    self.last_le_recursive(raw, upper_bound, next_state, key)
188                }
189            }
190        } else {
191            None
192        };
193        le_found.or_else(|| state.node.is_final().then(|| state.offset_sum))
194    }
195}
196
197struct LastLeSearch<'a> {
198    parent_ordering: Ordering,
199    byte_i: usize,
200    offset_sum: u64,
201    node: Node<'a>,
202}
203
204impl<'a> LastLeSearch<'a> {
205    fn initial<B>(raw: &'a fst::raw::Fst<B>) -> Self
206    where
207        B: AsRef<[u8]>,
208    {
209        Self {
210            parent_ordering: Ordering::Equal,
211            byte_i: 0,
212            offset_sum: 0,
213            node: raw.root(),
214        }
215    }
216
217    fn next<B>(&self, raw: &'a fst::raw::Fst<B>, upper_bound: &[u8], t: Transition) -> Self
218    where
219        B: AsRef<[u8]>,
220    {
221        self.next_with_ordering(raw, t, t.inp.cmp(&upper_bound[self.byte_i]))
222    }
223
224    fn next_with_ordering<B>(
225        &self,
226        raw: &'a fst::raw::Fst<B>,
227        t: Transition,
228        ordering: Ordering,
229    ) -> Self
230    where
231        B: AsRef<[u8]>,
232    {
233        Self {
234            parent_ordering: ordering,
235            byte_i: self.byte_i + 1,
236            node: raw.node(t.addr),
237            offset_sum: self.offset_sum + t.out.value(),
238        }
239    }
240}
241
242/// If there are any transitions from `node` whose input byte is LE `upper_bound`, then one of them will be returned. If there
243/// are multiple such transitions, the one with the greatest input byte is returned.
244fn find_last_le_transition(node: Node, upper_bound: u8) -> Option<(usize, Transition)> {
245    // Binary search over the transitions.
246    let mut lower = 0;
247    let mut upper = node.len();
248    while lower != upper {
249        let mid = (lower + upper) / 2;
250
251        let t = node.transition(mid);
252        if t.inp <= upper_bound {
253            if mid == node.len() - 1 {
254                // Transition byte is LE our upper_bound, and we're at the last transition, so this is the *last* LE transition.
255                return Some((mid, t));
256            }
257
258            let next_t = node.transition(mid + 1);
259            if next_t.inp > upper_bound {
260                // Transition byte is LE our upper_bound, and the next transition byte is *not*, so this is the *last* LE
261                // transition.
262                return Some((mid, t));
263            }
264
265            // Not the last LE transition, so we need to search higher than mid.
266            lower = mid + 1;
267        } else {
268            // Transition too large, search lower than mid.
269            upper = mid;
270        }
271    }
272    None
273}
274
275pub type MmapCache = Cache<Mmap, Mmap>;
276
277impl MmapCache {
278    /// Maps the files at `index_path` and `value_path` to read-only virtual memory ranges.
279    ///
280    /// # Safety
281    ///
282    /// See [`Mmap`].
283    pub unsafe fn map_paths(
284        index_path: impl AsRef<Path>,
285        value_path: impl AsRef<Path>,
286    ) -> Result<Self, Error> {
287        let index_file = fs::File::open(index_path)?;
288        let value_file = fs::File::open(value_path)?;
289        Self::map_files(&index_file, &value_file)
290    }
291
292    /// Maps`index_file` and `value_file` to read-only virtual memory ranges.
293    ///
294    /// # Safety
295    ///
296    /// See [`Mmap`].
297    pub unsafe fn map_files(index_file: &fs::File, value_file: &fs::File) -> Result<Self, Error> {
298        let index_mmap = Mmap::map(index_file)?;
299        let value_mmap = Mmap::map(value_file)?;
300        Self::new(index_mmap, value_mmap)
301    }
302}