spacetimedb_table/table_index/
unique_direct_index.rs

1use crate::indexes::{PageIndex, PageOffset, RowPointer, SquashedOffset};
2use crate::MemoryUsage;
3use core::mem;
4use core::ops::{Bound, RangeBounds};
5use core::option::IntoIter;
6
7/// A direct index for relating unsigned integer keys [`u8`..`u64`] to [`RowPointer`].
8///
9/// This index is efficient when given keys that are used in non-random insert patterns
10/// where keys are dense and not far apart as well as starting near zero.
11/// Conversely, it performs worse than a btree index in the case of highly random inserts
12/// and with sparse keys and where the first key inserted is large.
13#[derive(Debug, Clone, Default, PartialEq, Eq)]
14pub struct UniqueDirectIndex {
15    /// The outer index.
16    outer: Vec<Option<InnerIndex>>,
17    /// The number of keys indexed.
18    len: usize,
19}
20
21impl MemoryUsage for UniqueDirectIndex {
22    fn heap_usage(&self) -> usize {
23        let Self { outer, len } = self;
24        outer.heap_usage() + len.heap_usage()
25    }
26}
27
28/// The standard page size on linux x64.
29const PAGE_SIZE: usize = 4_096;
30/// Number of keys per inner index.
31const KEYS_PER_INNER: usize = PAGE_SIZE / size_of::<RowPointer>();
32/// The inner index array, which will be heap allocated.
33type InnerIndexArray = [RowPointer; KEYS_PER_INNER];
34
35/// An inner index. Either it is empty, or it has `KEYS_PER_INNER` elements.
36#[derive(Debug, Clone, PartialEq, Eq)]
37struct InnerIndex {
38    inner: Box<InnerIndexArray>,
39}
40
41impl MemoryUsage for InnerIndex {
42    fn heap_usage(&self) -> usize {
43        self.inner.heap_usage()
44    }
45}
46
47/// The sentinel used to represent an empty slot in the index.
48/// The reserved bit set to `false` is used to indicate absence.
49const NONE_PTR: RowPointer = RowPointer::new(false, PageIndex(0), PageOffset(0), SquashedOffset::TX_STATE);
50
51struct InnerIndexKey(usize);
52
53/// Splits the `key` into an outer and inner key.
54fn split_key(key: usize) -> (usize, InnerIndexKey) {
55    (key / KEYS_PER_INNER, InnerIndexKey(key % KEYS_PER_INNER))
56}
57
58impl InnerIndex {
59    fn new() -> Self {
60        use std::alloc::{alloc_zeroed, handle_alloc_error, Layout};
61
62        let layout = Layout::new::<InnerIndexArray>();
63
64        // Allocate with `alloc_zeroed` so that the bytes are initially 0, rather than uninit.
65        // This is a sound implementation as `0`-init elements == `NONE_PTR`.
66        // TODO: use Box::new_zeroed() once stabilized.
67        // SAFETY: The layout's size is non-zero.
68        let raw: *mut InnerIndexArray = unsafe { alloc_zeroed(layout) }.cast();
69
70        if raw.is_null() {
71            handle_alloc_error(layout);
72        }
73
74        // SAFETY: We used the global allocator with a layout for `InnerIndexArray`.
75        //         and the elements are 0-init by `alloc_zeroed`,
76        //         which makes each element a valid `RowPointer` (`u64`).
77        let inner = unsafe { Box::from_raw(raw) };
78
79        Self { inner }
80    }
81
82    /// Returns the pointer at `key`.
83    fn get(&self, key: InnerIndexKey) -> RowPointer {
84        // SAFETY: `self.inner.len() = KEYS_PER_INNER` and `key.0 < KEYS_PER_INNER`.
85        *unsafe { self.inner.get_unchecked(key.0) }
86    }
87
88    /// Returns the mutable slot at `key`.
89    fn get_mut(&mut self, key: InnerIndexKey) -> &mut RowPointer {
90        // SAFETY: `self.inner.len() = KEYS_PER_INNER` and `key.0 < KEYS_PER_INNER`.
91        unsafe { self.inner.get_unchecked_mut(key.0) }
92    }
93}
94
95impl UniqueDirectIndex {
96    /// Inserts the relation `key -> val` to this index.
97    ///
98    /// If `key` was already present in the index, does not add an association with `val`.
99    /// Returns the existing associated value instead.
100    pub fn insert(&mut self, key: usize, val: RowPointer) -> Result<(), RowPointer> {
101        let (key_outer, key_inner) = split_key(key);
102
103        // Fetch the outer index and ensure it can house `key_outer`.
104        let outer = &mut self.outer;
105        outer.resize(outer.len().max(key_outer + 1), None);
106
107        // Fetch the inner index.
108        // SAFETY: ensured in `.resize(_)` that `key_outer < outer.len()`, making indexing to `key_outer` valid.
109        let inner = unsafe { outer.get_unchecked_mut(key_outer) };
110        let inner = inner.get_or_insert_with(InnerIndex::new);
111
112        // Fetch the slot.
113        let slot = inner.get_mut(key_inner);
114        let in_slot = *slot;
115        if in_slot == NONE_PTR {
116            // We have `NONE_PTR`, so not set yet.
117            *slot = val.with_reserved_bit(true);
118            self.len += 1;
119            Ok(())
120        } else {
121            Err(in_slot.with_reserved_bit(false))
122        }
123    }
124
125    /// Deletes `key` from this map.
126    ///
127    /// Returns whether `key` was present.
128    pub fn delete(&mut self, key: usize) -> bool {
129        let (key_outer, key_inner) = split_key(key);
130        let outer = &mut self.outer;
131        if let Some(Some(inner)) = outer.get_mut(key_outer) {
132            let slot = inner.get_mut(key_inner);
133            let old_val = mem::replace(slot, NONE_PTR);
134            let deleted = old_val != NONE_PTR;
135            self.len -= deleted as usize;
136            return deleted;
137        }
138        false
139    }
140
141    /// Returns an iterator yielding the potential [`RowPointer`] for `key`.
142    pub fn seek_point(&self, key: usize) -> UniqueDirectIndexPointIter {
143        let (outer_key, inner_key) = split_key(key);
144        let iter = self
145            .outer
146            .get(outer_key)
147            .and_then(|x| x.as_ref())
148            .map(|inner| inner.get(inner_key))
149            .filter(|slot| *slot != NONE_PTR)
150            .map(|ptr| ptr.with_reserved_bit(false))
151            .into_iter();
152        UniqueDirectIndexPointIter { iter }
153    }
154
155    /// Returns an iterator yielding all the [`RowPointer`] that correspond to the provided `range`.
156    pub fn seek_range(&self, range: &impl RangeBounds<usize>) -> UniqueDirectIndexRangeIter {
157        // Translate `range` to `start..end`.
158        let start = match range.start_bound() {
159            Bound::Included(&s) => s,
160            Bound::Excluded(&s) => s + 1, // If this wraps, we will clamp to `max_key` later.
161            Bound::Unbounded => 0,
162        };
163        let end = match range.end_bound() {
164            Bound::Included(&e) => e + 1, // If this wraps, we will clamp to `max_key` later.
165            Bound::Excluded(&e) => e,
166            Bound::Unbounded => self.len,
167        };
168
169        // The upper bound of possible key.
170        // This isn't necessarily the real max key actually present in the index,
171        // due to possible deletions.
172        let max_key = self.outer.len() * KEYS_PER_INNER;
173
174        // Clamp `end` to max possible key in index.
175        let end = end.min(max_key);
176
177        // Normalize `start` so that `start <= end`.
178        let start = start.min(end);
179
180        UniqueDirectIndexRangeIter {
181            outer: &self.outer,
182            start,
183            end,
184        }
185    }
186
187    /// Returns the number of unique keys in the index.
188    pub fn num_keys(&self) -> usize {
189        self.len
190    }
191
192    /// Returns the total number of entries in the index.
193    pub fn len(&self) -> usize {
194        self.len
195    }
196
197    /// Returns whether there are any entries in the index.
198    #[allow(unused)] // No use for this currently.
199    pub fn is_empty(&self) -> bool {
200        self.len() == 0
201    }
202
203    /// Deletes all entries from the index, leaving it empty.
204    /// This will not deallocate the outer index.
205    pub fn clear(&mut self) {
206        self.outer.clear();
207        self.len = 0;
208    }
209}
210
211/// An iterator over the potential value in a [`UniqueDirectMap`] for a given key.
212pub struct UniqueDirectIndexPointIter {
213    iter: IntoIter<RowPointer>,
214}
215
216impl Iterator for UniqueDirectIndexPointIter {
217    type Item = RowPointer;
218    fn next(&mut self) -> Option<Self::Item> {
219        self.iter.next()
220    }
221}
222
223/// An iterator over a range of keys in a [`UniqueDirectIndex`].
224#[derive(Debug)]
225pub struct UniqueDirectIndexRangeIter<'a> {
226    outer: &'a [Option<InnerIndex>],
227    start: usize,
228    end: usize,
229}
230
231impl Iterator for UniqueDirectIndexRangeIter<'_> {
232    type Item = RowPointer;
233    fn next(&mut self) -> Option<Self::Item> {
234        loop {
235            if self.start >= self.end {
236                // We're at or beyond the end, so we're done.
237                return None;
238            }
239
240            let (outer_key, inner_key) = split_key(self.start);
241            // SAFETY:
242            // - `self.start <= self.end <= max_key`
243            // - the early exit above ensures that `self.start < max_key`.
244            // - `split_key(max_key).0 = self.outer.len()`.
245            // - this entails that `outer_key < self.outer.len()`.
246            let inner = unsafe { self.outer.get_unchecked(outer_key) };
247            let Some(inner) = inner else {
248                // Inner index has not been initialized,
249                // so the entire inner index is empty.
250                // Let's jump to the next inner index.
251                self.start += KEYS_PER_INNER;
252                continue;
253            };
254            let ptr = inner.get(inner_key);
255
256            // Advance to next key.
257            self.start += 1;
258
259            if ptr != NONE_PTR {
260                // The row actually exists, so we've found something to return.
261                return Some(ptr.with_reserved_bit(false));
262            }
263        }
264    }
265}
266
267#[cfg(test)]
268mod test {
269    use core::iter::repeat_with;
270
271    use super::*;
272    use crate::indexes::Size;
273
274    const FIXED_ROW_SIZE: Size = Size(4 * 4);
275
276    fn gen_row_pointers() -> impl Iterator<Item = RowPointer> {
277        let mut page_index = PageIndex(0);
278        let mut page_offset = PageOffset(0);
279        repeat_with(move || {
280            if page_offset.0 as usize + FIXED_ROW_SIZE.0 as usize >= PageOffset::PAGE_END.0 as usize {
281                // Consumed the page, let's use a new page.
282                page_index.0 += 1;
283                page_offset = PageOffset(0);
284            } else {
285                page_offset += FIXED_ROW_SIZE;
286            }
287
288            RowPointer::new(false, page_index, page_offset, SquashedOffset::COMMITTED_STATE)
289        })
290    }
291
292    #[test]
293    fn seek_range_gives_back_inserted() {
294        let range = (KEYS_PER_INNER - 2)..(KEYS_PER_INNER + 2);
295        let (keys, ptrs): (Vec<_>, Vec<_>) = range.clone().zip(gen_row_pointers()).unzip();
296
297        let mut index = UniqueDirectIndex::default();
298        for (key, ptr) in keys.iter().zip(&ptrs) {
299            index.insert(*key, *ptr).unwrap();
300        }
301        assert_eq!(index.len(), 4);
302
303        let ptrs_found = index.seek_range(&range).collect::<Vec<_>>();
304        assert_eq!(ptrs, ptrs_found);
305    }
306
307    #[test]
308    fn inserting_again_errors() {
309        let range = (KEYS_PER_INNER - 2)..(KEYS_PER_INNER + 2);
310        let (keys, ptrs): (Vec<_>, Vec<_>) = range.zip(gen_row_pointers()).unzip();
311
312        let mut index = UniqueDirectIndex::default();
313        for (key, ptr) in keys.iter().zip(&ptrs) {
314            index.insert(*key, *ptr).unwrap();
315        }
316
317        for (key, ptr) in keys.iter().zip(&ptrs) {
318            assert_eq!(index.insert(*key, *ptr).unwrap_err(), *ptr)
319        }
320    }
321
322    #[test]
323    fn deleting_allows_reinsertion() {
324        let range = (KEYS_PER_INNER - 2)..(KEYS_PER_INNER + 2);
325        let (keys, ptrs): (Vec<_>, Vec<_>) = range.zip(gen_row_pointers()).unzip();
326
327        let mut index = UniqueDirectIndex::default();
328        for (key, ptr) in keys.iter().zip(&ptrs) {
329            index.insert(*key, *ptr).unwrap();
330        }
331        assert_eq!(index.len(), 4);
332
333        let key = KEYS_PER_INNER + 1;
334        let ptr = index.seek_point(key).next().unwrap();
335        assert!(index.delete(key));
336        assert!(!index.delete(key));
337        assert_eq!(index.len(), 3);
338
339        index.insert(key, ptr).unwrap();
340        assert_eq!(index.len(), 4);
341    }
342}