nomt_core/
page_id.rs

1//! This module contains all the relevant methods to work with PageIds.
2//!
3//! A PageId is an unique identifier for a Page in a tree of pages with branching factor 2^6 and
4//! a maximum depth of 42, with the root page counted as depth 0.
5//!
6//! Each PageId consists of a list of numbers between 0 and 2^6 - 1, which encodes a path through
7//! the tree. The list may have between 0 and 42 (inclusive) items.
8//!
9//! Page IDs also have a disambiguated 256-bit representation which is given by starting with a
10//! blank bit pattern, and then repeatedly shifting it to the left by 6 bits, then adding the next
11//! child index, then adding 1. This disambiguated representation uniquely encodes all the page IDs
12//! in a fixed-width bit pattern as, essentially, a base-64 integer.
13
14use crate::{page::DEPTH, trie::KeyPath};
15use arrayvec::ArrayVec;
16use bitvec::prelude::*;
17use ruint::Uint;
18
19// The encoded representation of the highest valid page ID: the highest one at layer 42.
20const HIGHEST_ENCODED_42: Uint<256, 4> = Uint::from_be_bytes([
21    16, 65, 4, 16, 65, 4, 16, 65, 4, 16, 65, 4, 16, 65, 4, 16, 65, 4, 16, 65, 4, 16, 65, 4, 16, 65,
22    4, 16, 65, 4, 16, 64,
23]);
24
25pub const MAX_PAGE_DEPTH: usize = 42;
26
27/// A unique ID for a page.
28///
29/// # Ordering
30///
31/// Page IDs are ordered "depth-first" such that:
32///  - An ID is always less than its child IDs.
33///  - An ID's child IDs are ordered ascending by child index.
34///  - An ID's child IDs are always less than any sibling IDs to the right of the ID.
35///
36/// This property lets us refer to sub-trees cleanly with simple ordering statements.
37#[derive(Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
38pub struct PageId {
39    path: ArrayVec<u8, MAX_PAGE_DEPTH>,
40}
41
42/// The root page is the one containing the sub-trie directly descending from the root node.
43pub const ROOT_PAGE_ID: PageId = PageId {
44    path: ArrayVec::new_const(),
45};
46
47pub const MAX_CHILD_INDEX: u8 = (1 << DEPTH) - 1;
48
49/// The number of children each Page ID has.
50pub const NUM_CHILDREN: usize = MAX_CHILD_INDEX as usize + 1;
51
52/// The index of a children of a page.
53///
54/// Each page can be thought of a root-less binary tree. The leaves of that tree are roots of
55/// subtrees stored in subsequent pages. There are 64 (2^[`DEPTH`]) children in each page.
56#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)]
57pub struct ChildPageIndex(u8);
58
59impl ChildPageIndex {
60    pub fn new(index: u8) -> Option<Self> {
61        if index > MAX_CHILD_INDEX {
62            return None;
63        }
64        Some(Self(index))
65    }
66
67    pub fn to_u8(self) -> u8 {
68        self.0
69    }
70}
71
72impl Clone for PageId {
73    fn clone(&self) -> Self {
74        let mut new_path = ArrayVec::new();
75        unsafe {
76            // SAFETY: values are POD and lengths match.
77            new_path.set_len(self.path.len());
78        }
79
80        new_path.copy_from_slice(&self.path);
81        PageId { path: new_path }
82    }
83}
84
85impl PageId {
86    /// Decode a page ID from its disambiguated representation.
87    ///
88    /// This can fall out of bounds.
89    pub fn decode(bytes: [u8; 32]) -> Result<Self, InvalidPageIdBytes> {
90        let mut uint = Uint::from_be_bytes(bytes);
91
92        if uint > HIGHEST_ENCODED_42 {
93            return Err(InvalidPageIdBytes);
94        }
95
96        let leading_zeros = uint.leading_zeros();
97        let bit_count = 256 - leading_zeros;
98        let sextets = (bit_count + 5) / 6;
99
100        if bit_count == 0 {
101            return Ok(ROOT_PAGE_ID);
102        }
103
104        // we iterate the sextets from least significant to most significant, subtracting out
105        // 1 from each sextet. if the last sextet is zero after this operation, we skip it.
106        let mut path = ArrayVec::new();
107        for _ in 0..sextets - 1 {
108            uint -= Uint::<256, 4>::from(1);
109            let x = uint & Uint::from(0b111111);
110            path.push(x.to::<u8>());
111            uint >>= DEPTH;
112        }
113        if uint.byte(0) != 0 {
114            uint -= Uint::<256, 4>::from(1);
115            path.push(uint.byte(0));
116        }
117        path.reverse();
118
119        Ok(PageId { path })
120    }
121
122    /// Get the child index of the page at the given depth.
123    ///
124    /// This panics if the depth of the page is not at least `depth + 1`.
125    pub fn child_index_at_level(&self, depth: usize) -> ChildPageIndex {
126        ChildPageIndex(self.path[depth])
127    }
128
129    /// Encode this page ID to its disambiguated (fixed-width) representation.
130    pub fn encode(&self) -> [u8; 32] {
131        if self.path.len() < 10 {
132            // 7 + 6*9 = 61 - max bit-width of a page with depth 9.
133            let mut word: u64 = 0;
134            for limb in &self.path {
135                word += (limb + 1) as u64;
136                word <<= 6;
137            }
138
139            let mut buf = [0u8; 32];
140            let word_bytes = word.to_be_bytes();
141            buf[24..32].copy_from_slice(&word_bytes);
142            buf
143        } else {
144            let mut uint = Uint::<256, 4>::from(0);
145            for limb in &self.path {
146                uint += Uint::from(limb + 1);
147                uint <<= 6;
148            }
149
150            uint.to_be_bytes::<32>()
151        }
152    }
153
154    /// Get a length-dependent representation of the page id.
155    pub fn length_dependent_encoding(&self) -> &[u8] {
156        &self.path[..]
157    }
158
159    /// Get the depth of the page ID. The depth of the [`ROOT_PAGE_ID`] is 0.
160    pub fn depth(&self) -> usize {
161        self.path.len()
162    }
163
164    /// Construct the Child PageId given the previous PageId and the child index.
165    ///
166    /// Child index must be a 6 bit integer, two most significant bits must be zero.
167    /// Passed PageId must be a valid PageId and be located in a layer below 42 otherwise
168    /// `PageIdOverflow` will be returned.
169    pub fn child_page_id(&self, child_index: ChildPageIndex) -> Result<Self, ChildPageIdError> {
170        if self.path.len() >= MAX_PAGE_DEPTH {
171            return Err(ChildPageIdError::PageIdOverflow);
172        }
173
174        let mut path = self.path.clone();
175        path.push(child_index.0);
176        Ok(PageId { path })
177    }
178
179    /// Extract the Parent PageId given a PageId.
180    ///
181    /// If the provided PageId is the one pointing to the root,
182    /// then itself is returned.
183    pub fn parent_page_id(&self) -> Self {
184        if *self == ROOT_PAGE_ID {
185            return ROOT_PAGE_ID;
186        }
187
188        let mut path = self.path.clone();
189        let _ = path.pop();
190        PageId { path }
191    }
192
193    /// Whether this page is a descendant of the other.
194    pub fn is_descendant_of(&self, other: &PageId) -> bool {
195        self.path.starts_with(&other.path)
196    }
197
198    /// Get the maximum descendant of this page.
199    pub fn max_descendant(&self) -> PageId {
200        let mut page_id = self.clone();
201        while page_id.path.len() < MAX_PAGE_DEPTH {
202            page_id.path.push(MAX_CHILD_INDEX);
203        }
204
205        page_id
206    }
207
208    /// Get the minimum key-path which could land in this page.
209    pub fn min_key_path(&self) -> KeyPath {
210        let mut path = KeyPath::default();
211        for (i, child_index) in self.path.iter().enumerate() {
212            let bit_start = i * 6;
213            let bit_end = bit_start + 6;
214            let child_bits = &child_index.view_bits::<Msb0>()[2..8];
215            path.view_bits_mut::<Msb0>()[bit_start..bit_end].copy_from_bitslice(child_bits);
216        }
217
218        for i in (6 * self.path.len())..256 {
219            path.view_bits_mut::<Msb0>().set(i, false);
220        }
221
222        path
223    }
224
225    /// Get the maximum key-path which could land in this page.
226    pub fn max_key_path(&self) -> KeyPath {
227        let mut path = KeyPath::default();
228        for (i, child_index) in self.path.iter().enumerate() {
229            let bit_start = i * 6;
230            let bit_end = bit_start + 6;
231            let child_bits = &child_index.view_bits::<Msb0>()[2..8];
232            path.view_bits_mut::<Msb0>()[bit_start..bit_end].copy_from_bitslice(child_bits);
233        }
234
235        for i in (6 * self.path.len())..256 {
236            path.view_bits_mut::<Msb0>().set(i, true);
237        }
238
239        path
240    }
241}
242
243/// The bytes cannot form a valid PageId because they define
244/// a PageId bigger than the biggest valid one, the rightmost Page in the last layer.
245#[derive(Debug, PartialEq)]
246pub struct InvalidPageIdBytes;
247
248/// Errors related to the construction of a Child PageId
249#[derive(Debug, PartialEq)]
250pub enum ChildPageIdError {
251    /// PageId was at the last layer of the page tree
252    /// or it was too big to represent a valid page
253    PageIdOverflow,
254}
255
256/// Iterator of PageIds over a KeyPath,
257/// PageIds will be lazily constructed as needed
258pub struct PageIdsIterator {
259    key_path: Uint<256, 4>,
260    page_id: Option<PageId>,
261}
262
263impl PageIdsIterator {
264    /// Create a PageIds Iterator over a KeyPath
265    pub fn new(key_path: KeyPath) -> Self {
266        Self {
267            key_path: Uint::from_be_bytes(key_path),
268            page_id: Some(ROOT_PAGE_ID),
269        }
270    }
271}
272
273impl Iterator for PageIdsIterator {
274    type Item = PageId;
275
276    fn next(&mut self) -> Option<Self::Item> {
277        let prev = self.page_id.take()?;
278
279        // unwrap: `new` can't return an error because the key_path is shifted.
280        let child_index = ChildPageIndex::new(self.key_path.byte(31) >> 2).unwrap();
281        self.key_path <<= 6;
282        self.page_id = prev.child_page_id(child_index).ok();
283        Some(prev)
284    }
285}
286
287#[cfg(test)]
288mod tests {
289    use super::{
290        ChildPageIdError, ChildPageIndex, InvalidPageIdBytes, Msb0, PageId, PageIdsIterator, Uint,
291        HIGHEST_ENCODED_42, MAX_CHILD_INDEX, ROOT_PAGE_ID,
292    };
293    use bitvec::prelude::*;
294
295    const LOWEST_ENCODED_42: Uint<256, 4> = Uint::from_be_bytes([
296        0, 65, 4, 16, 65, 4, 16, 65, 4, 16, 65, 4, 16, 65, 4, 16, 65, 4, 16, 65, 4, 16, 65, 4, 16,
297        65, 4, 16, 65, 4, 16, 65,
298    ]);
299
300    fn child_page_id(page_id: &PageId, child_index: u8) -> Result<PageId, ChildPageIdError> {
301        page_id.child_page_id(ChildPageIndex::new(child_index).unwrap())
302    }
303
304    #[test]
305    fn test_child_and_parent_page_id() {
306        let mut page_id_1 = [0u8; 32]; // child index 6
307        page_id_1[31] = 0b00000111;
308        let page_id_1 = PageId::decode(page_id_1).unwrap();
309
310        assert_eq!(Ok(page_id_1.clone()), child_page_id(&ROOT_PAGE_ID, 6));
311        assert_eq!(ROOT_PAGE_ID, page_id_1.parent_page_id());
312
313        let mut page_id_2 = [0u8; 32]; // child index 4
314        page_id_2[31] = 0b11000101;
315        page_id_2[30] = 0b00000001;
316        let page_id_2 = PageId::decode(page_id_2).unwrap();
317
318        assert_eq!(Ok(page_id_2.clone()), child_page_id(&page_id_1, 4));
319        assert_eq!(page_id_1, page_id_2.parent_page_id());
320
321        let mut page_id_3 = [0u8; 32]; // child index 63
322        page_id_3[31] = 0b10000000;
323        page_id_3[30] = 0b01110001;
324        let page_id_3 = PageId::decode(page_id_3).unwrap();
325
326        assert_eq!(
327            Ok(page_id_3.clone()),
328            child_page_id(&page_id_2, MAX_CHILD_INDEX),
329        );
330        assert_eq!(page_id_2, page_id_3.parent_page_id());
331    }
332
333    #[test]
334    fn test_page_ids_iterator() {
335        // key_path = 0b000001|000010|0...
336        let mut key_path = [0u8; 32];
337        key_path[0] = 0b00000100;
338        key_path[1] = 0b00100000;
339
340        let mut page_id_1 = [0u8; 32];
341        page_id_1[31] = 0b00000010; // 0b000001 + 1
342        let page_id_1 = PageId::decode(page_id_1).unwrap();
343        let mut page_id_2 = [0u8; 32];
344        page_id_2[31] = 0b10000011; // (0b000001 + 1 << 6) + 0b000010 + 1
345        let page_id_2 = PageId::decode(page_id_2).unwrap();
346
347        let mut page_ids = PageIdsIterator::new(key_path);
348        assert_eq!(page_ids.next(), Some(ROOT_PAGE_ID));
349        assert_eq!(page_ids.next(), Some(page_id_1));
350        assert_eq!(page_ids.next(), Some(page_id_2));
351
352        // key_path = 0b000010|111111|0...
353        let mut key_path = [0u8; 32];
354        key_path[0] = 0b00001011;
355        key_path[1] = 0b11110000;
356
357        let mut page_id_1 = [0u8; 32];
358        page_id_1[31] = 0b00000011; // 0b000010 + 1
359        let page_id_1 = PageId::decode(page_id_1).unwrap();
360        let mut page_id_2 = [0u8; 32];
361        page_id_2[31] = 0b0000000;
362        page_id_2[30] = 0b0000001; // (0b00000011 << 6) + 0b111111 + 1 = (0b00000011 + 1) << 6
363        let page_id_2 = PageId::decode(page_id_2).unwrap();
364
365        let mut page_ids = PageIdsIterator::new(key_path);
366        assert_eq!(page_ids.next(), Some(ROOT_PAGE_ID));
367        assert_eq!(page_ids.next(), Some(page_id_1));
368        assert_eq!(page_ids.next(), Some(page_id_2));
369    }
370
371    #[test]
372    fn invalid_child_index() {
373        assert_eq!(None, ChildPageIndex::new(0b01010000));
374        assert_eq!(None, ChildPageIndex::new(0b10000100));
375        assert_eq!(None, ChildPageIndex::new(0b11000101));
376    }
377
378    #[test]
379    fn test_invalid_page_id() {
380        // position 255
381        let mut page_id = [0u8; 32];
382        page_id[0] = 128;
383        assert_eq!(Err(InvalidPageIdBytes), PageId::decode(page_id));
384
385        // position 252
386        let mut page_id = [0u8; 32];
387        page_id[0] = 128;
388        assert_eq!(Err(InvalidPageIdBytes), PageId::decode(page_id));
389    }
390
391    #[test]
392    fn test_page_id_overflow() {
393        let first_page_last_layer = PageIdsIterator::new([0u8; 32]).last().unwrap();
394        let last_page_last_layer = PageIdsIterator::new([255; 32]).last().unwrap();
395        assert_eq!(
396            Err(ChildPageIdError::PageIdOverflow),
397            child_page_id(&first_page_last_layer, 0),
398        );
399        assert_eq!(
400            Err(ChildPageIdError::PageIdOverflow),
401            child_page_id(&last_page_last_layer, 0),
402        );
403
404        // position 255
405        let page_id = PageId::decode(HIGHEST_ENCODED_42.to_be_bytes()).unwrap();
406        assert_eq!(
407            Err(ChildPageIdError::PageIdOverflow),
408            child_page_id(&page_id, 0),
409        );
410
411        // any PageId bigger than LOWEST_42 must overflow
412        let mut page_id = LOWEST_ENCODED_42.to_be_bytes();
413        page_id[31] = 255;
414        let page_id = PageId::decode(page_id).unwrap();
415        assert_eq!(
416            Err(ChildPageIdError::PageIdOverflow),
417            child_page_id(&page_id, 0),
418        );
419
420        // position 245
421        let mut page_id = [0u8; 32];
422        page_id[1] = 32;
423        let page_id = PageId::decode(page_id).unwrap();
424        assert!(child_page_id(&page_id, 0).is_ok());
425
426        // neither of those two have to panic if called at most 41 times
427        let mut low = ROOT_PAGE_ID;
428        let mut high = ROOT_PAGE_ID;
429        for _ in 0..42 {
430            low = child_page_id(&low, 0).unwrap();
431            high = child_page_id(&high, MAX_CHILD_INDEX).unwrap();
432        }
433    }
434
435    #[test]
436    fn page_id_sibling_order() {
437        let root_page = ROOT_PAGE_ID;
438        let mut last_child = None;
439        for i in 0..=MAX_CHILD_INDEX {
440            let child = root_page.child_page_id(ChildPageIndex(i)).unwrap();
441            assert!(child > root_page);
442
443            if let Some(last) = last_child.take() {
444                assert!(child > last);
445            }
446            last_child = Some(child);
447        }
448    }
449
450    #[test]
451    fn page_max_descendants_all_less_than_right_sibling() {
452        let sibling_left = ROOT_PAGE_ID.child_page_id(ChildPageIndex(0)).unwrap();
453        let sibling_right = ROOT_PAGE_ID.child_page_id(ChildPageIndex(1)).unwrap();
454
455        let mut left_descendant = sibling_left.clone();
456        loop {
457            left_descendant = match left_descendant.child_page_id(ChildPageIndex(MAX_CHILD_INDEX)) {
458                Err(_) => break,
459                Ok(d) => d,
460            };
461
462            assert!(left_descendant < sibling_right);
463        }
464    }
465
466    #[test]
467    fn page_min_descendants_all_greater_than_left_sibling() {
468        let sibling_left = ROOT_PAGE_ID.child_page_id(ChildPageIndex(0)).unwrap();
469        let sibling_right = ROOT_PAGE_ID.child_page_id(ChildPageIndex(1)).unwrap();
470
471        let mut right_descendant = sibling_right.clone();
472        loop {
473            right_descendant = match right_descendant.child_page_id(ChildPageIndex(0)) {
474                Err(_) => break,
475                Ok(d) => d,
476            };
477
478            assert!(right_descendant > sibling_left);
479        }
480    }
481
482    #[test]
483    fn root_min_key_path() {
484        assert_eq!(ROOT_PAGE_ID.min_key_path(), [0; 32]);
485    }
486
487    #[test]
488    fn root_max_key_path() {
489        assert_eq!(ROOT_PAGE_ID.max_key_path(), [255; 32]);
490    }
491
492    #[test]
493    fn page_min_key_path() {
494        let min_page = ROOT_PAGE_ID.child_page_id(ChildPageIndex(0)).unwrap();
495        let max_page = ROOT_PAGE_ID
496            .child_page_id(ChildPageIndex(MAX_CHILD_INDEX))
497            .unwrap();
498
499        assert_eq!(min_page.min_key_path(), [0; 32]);
500        let mut key_path = [0; 32];
501        for i in 0..6 {
502            key_path.view_bits_mut::<Msb0>().set(i, true);
503        }
504        assert_eq!(max_page.min_key_path(), key_path);
505    }
506
507    #[test]
508    fn page_max_key_path() {
509        let min_page = ROOT_PAGE_ID.child_page_id(ChildPageIndex(0)).unwrap();
510        let max_page = ROOT_PAGE_ID
511            .child_page_id(ChildPageIndex(MAX_CHILD_INDEX))
512            .unwrap();
513
514        assert_eq!(max_page.max_key_path(), [255; 32]);
515        let mut key_path = [255; 32];
516        for i in 0..6 {
517            key_path.view_bits_mut::<Msb0>().set(i, false);
518        }
519        assert_eq!(min_page.max_key_path(), key_path);
520    }
521}