spacetimedb_table/
indexes.rs

1//! Provides primitive types and definitions around
2//! bytes, row hashes, (page) sizes, offsets, and indices.
3
4use super::util::range_move;
5use crate::{static_assert_size, MemoryUsage};
6use ahash::RandomState;
7use core::fmt;
8use core::ops::{AddAssign, Div, Mul, Range, SubAssign};
9use derive_more::{Add, Sub};
10use spacetimedb_data_structures::map::ValidAsIdentityHash;
11use spacetimedb_sats::{impl_deserialize, impl_serialize};
12
13/// A byte is a `u8`.
14///
15/// Previous implementations used `MaybeUninit<u8>` here,
16/// but it became necessary to serialize pages to enable snapshotting,
17/// so we require that all bytes in a page be valid `u8`s, never uninit.
18pub type Byte = u8;
19
20/// A slice of [`Byte`]s.
21pub type Bytes = [Byte];
22
23/// Total size of a page, incl. header.
24///
25/// Defined as 64 KiB.
26pub(crate) const PAGE_SIZE: usize = u16::MAX as usize + 1;
27
28/// Total size of a page header.
29///
30/// 64 as the header is aligned to 64 bytes.
31pub(crate) const PAGE_HEADER_SIZE: usize = 64;
32
33/// The size of the data segment of a [`Page`](super::page::Page).
34///
35/// Currently 64KiB - 64 bytes.
36/// The 64 bytes are used for the header of the `Page`.
37// pub for benchmarks
38pub const PAGE_DATA_SIZE: usize = PAGE_SIZE - PAGE_HEADER_SIZE;
39
40/// The content hash of a row.
41///
42/// Notes:
43/// - The hash is not cryptographically secure.
44///
45/// - The hash is valid only for the lifetime of a `Table`.
46///   This entails that it should not be persisted to disk
47///   or used as a stable identifier over the network.
48///   For example, the hashing algorithm could be different
49///   on different machines based on availability of hardware instructions.
50///   Moreover, due to random seeds, when restarting from disk,
51///   the hashes may be different for the same rows.
52#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
53#[cfg_attr(any(test, feature = "proptest"), derive(proptest_derive::Arbitrary))]
54pub struct RowHash(pub u64);
55
56impl MemoryUsage for RowHash {}
57
58static_assert_size!(RowHash, 8);
59
60/// `RowHash` is already a hash, so no need to hash again.
61impl ValidAsIdentityHash for RowHash {}
62
63impl RowHash {
64    /// Returns a `Hasher` builder that yields the type of hashes that `RowHash` stores.
65    pub fn hasher_builder() -> RandomState {
66        // For equal `row`s, all calls within the same process will yield the same hash.
67        RandomState::with_seed(0x42)
68    }
69}
70
71/// The size of something in page storage in bytes.
72#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug, Hash, Add, Sub)]
73pub struct Size(pub u16);
74
75impl MemoryUsage for Size {}
76
77// We need to be able to serialize and deserialize `Size` because they appear in the `PageHeader`.
78impl_serialize!([] Size, (self, ser) => self.0.serialize(ser));
79impl_deserialize!([] Size, de => u16::deserialize(de).map(Size));
80
81impl Size {
82    /// Returns the size for use in `usize` computations.
83    #[inline]
84    #[allow(clippy::len_without_is_empty)]
85    pub const fn len(self) -> usize {
86        self.0 as usize
87    }
88}
89
90impl Mul<usize> for Size {
91    type Output = Size;
92
93    #[inline]
94    fn mul(self, rhs: usize) -> Self::Output {
95        Size((self.len() * rhs) as u16)
96    }
97}
98
99/// An offset into a [`Page`].
100#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Add, Sub, bytemuck::NoUninit)]
101#[repr(transparent)]
102#[cfg_attr(any(test, feature = "proptest"), derive(proptest_derive::Arbitrary))]
103pub struct PageOffset(
104    #[cfg_attr(any(test, feature = "proptest"), proptest(strategy = "0..PageOffset::PAGE_END.0"))] pub u16,
105);
106
107impl MemoryUsage for PageOffset {}
108
109static_assert_size!(PageOffset, 2);
110
111// We need to ser/de `PageOffset`s because they appear within the `PageHeader`.
112impl_serialize!([] PageOffset, (self, ser) => self.0.serialize(ser));
113impl_deserialize!([] PageOffset, de => u16::deserialize(de).map(PageOffset));
114
115impl PageOffset {
116    /// Returns the offset as a `usize` index.
117    #[inline]
118    pub const fn idx(self) -> usize {
119        self.0 as usize
120    }
121
122    /// Is this offset the null offset, which refers to an empty var-len object?
123    ///
124    /// The null `PageOffset` is used as a sentinel in `VarLenRef`s and `VarLenGranule`s
125    /// as the empty list.
126    /// `VAR_LEN_NULL` can never refer to an allocated `VarLenGranule`,
127    /// because the existence of a `VarLenGranule` implies the existence of at least one fixed-len row,
128    /// which means the fixed-len high water mark is strictly greater than zero.
129    ///
130    /// Note that the null `PageOffset` refers to a valid fixed-len row slot.
131    /// It may only be used as a sentinel value when referring to var-len allocations.
132    #[inline]
133    pub const fn is_var_len_null(self) -> bool {
134        self.0 == Self::VAR_LEN_NULL.0
135    }
136
137    /// The null offset, pointing to the beginning of a page.
138    ///
139    /// The null `PageOffset` is used as a sentinel in `VarLenRef`s and `VarLenGranule`s
140    /// as the empty list.
141    /// `VAR_LEN_NULL` can never refer to an allocated `VarLenGranule`,
142    /// because the existence of a `VarLenGranule` implies the existence of at least one fixed-len row,
143    /// which means the fixed-len high water mark is strictly greater than zero.
144    ///
145    /// Note that the null `PageOffset` refers to a valid fixed-len row slot.
146    /// It may only be used as a sentinel value when referring to var-len allocations.
147    pub const VAR_LEN_NULL: Self = Self(0);
148
149    /// Is this offset at the [`PageOffset::PAGE_END`]?
150    #[inline]
151    pub const fn is_at_end(self) -> bool {
152        self.0 == Self::PAGE_END.0
153    }
154
155    /// The offset one past the end of the page.
156    /// That is, for `row_data: [Byte; PAGE_DATA_SIZE]`, this is `row_data.len()`.
157    ///
158    /// This also means that `PAGE_END` is **not** a page offset
159    /// that can be used for indexing, but it can be used as a sentinel.
160    pub const PAGE_END: Self = Self(PAGE_DATA_SIZE as u16);
161
162    /// Returns a range from this offset lasting `size` bytes.
163    #[inline]
164    pub const fn range(self, size: Size) -> Range<usize> {
165        range_move(0..size.len(), self.idx())
166    }
167}
168
169impl PartialEq<Size> for PageOffset {
170    #[inline]
171    fn eq(&self, other: &Size) -> bool {
172        self.0 == other.0
173    }
174}
175
176impl AddAssign<Size> for PageOffset {
177    #[inline]
178    fn add_assign(&mut self, rhs: Size) {
179        self.0 += rhs.0;
180    }
181}
182
183impl SubAssign<Size> for PageOffset {
184    #[inline]
185    fn sub_assign(&mut self, rhs: Size) {
186        self.0 -= rhs.0;
187    }
188}
189
190impl Div<Size> for PageOffset {
191    type Output = usize;
192
193    #[inline]
194    fn div(self, size: Size) -> Self::Output {
195        self.idx() / size.len()
196    }
197}
198
199impl fmt::LowerHex for PageOffset {
200    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
201        fmt::LowerHex::fmt(&self.0, f)
202    }
203}
204
205/// Returns the maximum number of rows with `fixed_row_size` that a page can hold.
206#[inline]
207pub fn max_rows_in_page(fixed_row_size: Size) -> usize {
208    PageOffset::PAGE_END.idx().div_ceil(fixed_row_size.len())
209}
210
211/// The index of a [`Page`] within a [`Pages`].
212#[cfg_attr(any(test, feature = "proptest"), derive(proptest_derive::Arbitrary))]
213#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)]
214pub struct PageIndex(#[cfg_attr(any(test, feature = "proptest"), proptest(strategy = "0..MASK_PI"))] pub u64);
215
216impl MemoryUsage for PageIndex {}
217
218static_assert_size!(PageIndex, 8);
219
220impl PageIndex {
221    /// The maximum page index, currently `(1 << 39) - 1`.
222    ///
223    /// Limiting `PageIndex` to 39 bits allows it to be packed into `RowPointer`'s 64 bits
224    /// alongside a `PageOffset`, a `SquashedOffset` and a reserved bit.
225    pub const MAX: Self = Self(MASK_PI);
226
227    /// Returns this index as a `usize`.
228    #[inline]
229    pub const fn idx(self) -> usize {
230        self.0 as usize
231    }
232}
233
234/// Indicates which version of a `Table` is referred to by a `RowPointer`.
235///
236/// Currently, `SquashedOffset` has two meaningful values,
237/// [`SquashedOffset::TX_STATE`] and [`SquashedOffset::COMMITTED_STATE`],
238/// which refer to the TX scratchpad and the committed state respectively.
239///
240/// In the future, `SquashedOffset` will be extended to capture
241/// which savepoint within a transaction the pointer refers to,
242/// or which committed-unsquashed preceding transaction.
243#[derive(Clone, Copy, PartialEq, Eq, Debug)]
244#[cfg_attr(any(test, feature = "proptest"), derive(proptest_derive::Arbitrary))]
245pub struct SquashedOffset(pub u8);
246
247impl MemoryUsage for SquashedOffset {}
248
249static_assert_size!(SquashedOffset, 1);
250
251impl SquashedOffset {
252    /// Does this `SquahsedOffset` refer to the TX scratchpad?
253    #[inline]
254    pub const fn is_tx_state(self) -> bool {
255        self.0 == Self::TX_STATE.0
256    }
257
258    /// The `SquashedOffset` for the TX scratchpad.
259    pub const TX_STATE: Self = Self(0);
260
261    /// Does this `SquashedOffset` refer to the committed (squashed) state?
262    #[inline]
263    pub const fn is_committed_state(self) -> bool {
264        self.0 == Self::COMMITTED_STATE.0
265    }
266
267    /// The `SquashedOffset` for the committed (squashed) state.
268    pub const COMMITTED_STATE: Self = Self(1);
269}
270
271/// Offset to a buffer inside `Pages` referring
272/// to the index of a specific page
273/// and the offset within the page.
274#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
275#[repr(transparent)]
276pub struct RowPointer(pub u64);
277
278impl MemoryUsage for RowPointer {}
279
280static_assert_size!(RowPointer, 8);
281
282// Offsets and bits for the various components of `RowPointer`.
283const OFFSET_RB: u64 = 0;
284const BITS_RB: u64 = 1;
285// ^-- Bit 1 is reserved so it can be used in `OffsetOrCollider` ("reserved bit").
286const OFFSET_PI: u64 = OFFSET_RB + BITS_RB;
287const BITS_PI: u64 = 39;
288const OFFSET_PO: u64 = OFFSET_PI + BITS_PI;
289const BITS_PO: u64 = 16;
290const OFFSET_SQ: u64 = OFFSET_PO + BITS_PO;
291const BITS_SQ: u64 = 8;
292
293// Extracting masks for the various components of `RowPointer`.
294const MASK_RB: u64 = (1 << BITS_RB) - 1;
295const MASK_PI: u64 = (1 << BITS_PI) - 1;
296const MASK_PO: u64 = (1 << BITS_PO) - 1;
297const MASK_SQ: u64 = (1 << BITS_SQ) - 1;
298
299// Zeroing masks for the various components of `RowPointer`.
300const MASK_ZERO_RB: u64 = !(MASK_RB << OFFSET_RB);
301const MASK_ZERO_PI: u64 = !(MASK_PI << OFFSET_PI);
302const MASK_ZERO_PO: u64 = !(MASK_PO << OFFSET_PO);
303const MASK_ZERO_SQ: u64 = !(MASK_SQ << OFFSET_SQ);
304
305impl RowPointer {
306    /// Returns a row pointer that is at the given `page_offset`,
307    /// in the page with `page_index`,
308    /// and with the `squashed_offset` (savepoint offset).
309    #[inline(always)]
310    pub const fn new(
311        reserved_bit: bool,
312        page_index: PageIndex,
313        page_offset: PageOffset,
314        squashed_offset: SquashedOffset,
315    ) -> Self {
316        Self(0)
317            .with_reserved_bit(reserved_bit)
318            .with_squashed_offset(squashed_offset)
319            .with_page_index(page_index)
320            .with_page_offset(page_offset)
321    }
322
323    /// Returns the reserved bit.
324    #[inline(always)]
325    pub const fn reserved_bit(self) -> bool {
326        ((self.0 >> OFFSET_RB) & MASK_RB) != 0
327    }
328
329    /// Returns the index of the page.
330    #[inline(always)]
331    pub const fn page_index(self) -> PageIndex {
332        PageIndex((self.0 >> OFFSET_PI) & MASK_PI)
333    }
334
335    /// Returns the offset within the page.
336    #[inline(always)]
337    pub const fn page_offset(self) -> PageOffset {
338        PageOffset(((self.0 >> OFFSET_PO) & MASK_PO) as u16)
339    }
340
341    /// Returns the squashed offset, i.e., the savepoint offset.
342    #[inline(always)]
343    pub const fn squashed_offset(self) -> SquashedOffset {
344        SquashedOffset(((self.0 >> OFFSET_SQ) & MASK_SQ) as u8)
345    }
346
347    /// Returns a new row pointer
348    /// with its reserved bit changed to `reserved_bit`.
349    #[inline(always)]
350    pub const fn with_reserved_bit(self, reserved_bit: bool) -> Self {
351        Self::with(self, reserved_bit as u64, MASK_RB, OFFSET_RB, MASK_ZERO_RB)
352    }
353
354    /// Returns a new row pointer
355    /// with its `PageIndex` changed to `page_index`.
356    #[inline(always)]
357    pub const fn with_page_index(self, page_index: PageIndex) -> Self {
358        Self::with(self, page_index.0, MASK_PI, OFFSET_PI, MASK_ZERO_PI)
359    }
360
361    /// Returns a new row pointer
362    /// with its `PageOffset` changed to `page_offset`.
363    #[inline(always)]
364    pub const fn with_page_offset(self, page_offset: PageOffset) -> Self {
365        Self::with(self, page_offset.0 as u64, MASK_PO, OFFSET_PO, MASK_ZERO_PO)
366    }
367
368    /// Returns a new row pointer
369    /// with its `SquashedOffset` changed to `squashed_offset`.
370    #[inline(always)]
371    pub const fn with_squashed_offset(self, squashed_offset: SquashedOffset) -> Self {
372        Self::with(self, squashed_offset.0 as u64, MASK_SQ, OFFSET_SQ, MASK_ZERO_SQ)
373    }
374
375    #[inline(always)]
376    const fn with(self, v: u64, mask: u64, offset: u64, zero: u64) -> Self {
377        let vmoved = (v & mask) << offset;
378        Self((self.0 & zero) | vmoved)
379    }
380}
381
382impl fmt::Debug for RowPointer {
383    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
384        write!(
385            f,
386            "RowPointer(r: {:?}, pi: {:?}, po: {:?}, so: {:?})",
387            self.reserved_bit() as u8,
388            self.page_index().idx(),
389            self.page_offset().idx(),
390            self.squashed_offset().0,
391        )
392    }
393}
394
395#[cfg(test)]
396mod test {
397    use super::*;
398    use proptest::prelude::*;
399
400    proptest! {
401        #[test]
402        fn row_pointer_ops_work(
403            ((rb1, pi1, po1, so1), (rb2, pi2, po2, so2)) in (
404                (any::<bool>(), any::<PageIndex>(), any::<PageOffset>(), any::<SquashedOffset>()),
405                (any::<bool>(), any::<PageIndex>(), any::<PageOffset>(), any::<SquashedOffset>()),
406        )) {
407            let check = |rb, pi, po, so, ptr: RowPointer| {
408                prop_assert_eq!(rb, ptr.reserved_bit());
409                prop_assert_eq!(pi, ptr.page_index());
410                prop_assert_eq!(po, ptr.page_offset());
411                prop_assert_eq!(so, ptr.squashed_offset());
412                Ok(())
413            };
414            let ptr = RowPointer::new(rb1, pi1, po1, so1);
415            check(rb1, pi1, po1, so1, ptr)?;
416            check(rb2, pi1, po1, so1, ptr.with_reserved_bit(rb2))?;
417            check(rb1, pi2, po1, so1, ptr.with_page_index(pi2))?;
418            check(rb1, pi1, po2, so1, ptr.with_page_offset(po2))?;
419            check(rb1, pi1, po1, so2, ptr.with_squashed_offset(so2))?;
420        }
421    }
422}