anondb_kv/lexicographic/
key.rs

1/// A vector of bytes representing a lexicographically sortable set of keys. Each key is separated
2/// by a byte 0x00 to allow partial index searches.
3///
4/// If I have an index (id: u8, created_at: u8, name: String ) and i want to filter by
5/// { id = 0, created_at = gt(1) && lt(99) }
6///
7/// I need to sort by 00000000100000000..0000000063000000. But i need to include all keys that are
8/// longer than the provided slice. e.g. 0000000050000000a3eb398e should be included.
9///
10/// To achieve this we need a separator that is a fixed value that we can use for comparison. If we
11/// choose this byte as 0x00, then we can suffix the upper bound of our sort queries with
12/// 0x01 to include all longer keys.
13///
14/// To visualize this better consider the following hex example:
15///
16/// We want values between 0001 and 00ff. We also want all longer hex values. We compute our
17/// bounds:
18///
19/// lower: 0001
20/// upper: 00ff01
21///
22/// Now the value 00ee00aabbcc sorts within this range
23///
24/// This strategy adds ~1 byte of overhead per field (0 bytes for indices with 1 field).
25#[derive(Default, Clone)]
26pub struct LexicographicKey {
27    bytes: Vec<u8>,
28}
29
30impl LexicographicKey {
31    /// Append a slice representing a lexicographically sortable key.
32    pub fn append_key_slice(&mut self, slice: &[u8]) {
33        if !self.bytes.is_empty() {
34            self.append_separator();
35        }
36        self.bytes.extend_from_slice(slice);
37    }
38
39    /// Append a 0x01 byte that will sort all longer keys before this key.
40    pub fn append_upper_inclusive_byte(&mut self) {
41        self.bytes.push(0x01);
42    }
43
44    pub fn append_separator(&mut self) {
45        self.bytes.push(0x00);
46    }
47
48    pub fn is_empty(&self) -> bool {
49        self.bytes.is_empty()
50    }
51
52    pub fn take(&mut self) -> Vec<u8> {
53        std::mem::take(&mut self.bytes)
54    }
55
56    pub fn as_slice(&self) -> &[u8] {
57        &self.bytes
58    }
59
60    pub fn to_vec(&self) -> Vec<u8> {
61        self.bytes.clone()
62    }
63}
64
65#[cfg(test)]
66mod test {
67    use super::*;
68
69    #[test]
70    fn key_sort_longer_vec_within() {
71        let mut start_key = LexicographicKey::default();
72        start_key.append_key_slice(&[0u8; 32]);
73        let mut end_key = LexicographicKey::default();
74        end_key.append_key_slice(&[u8::MAX; 32]);
75        end_key.append_upper_inclusive_byte();
76
77        let mut expected_within = LexicographicKey::default();
78        expected_within.append_key_slice(&[5u8; 32]);
79        expected_within.append_key_slice(&[99u8; 32]);
80
81        let range = start_key.as_slice()..end_key.as_slice();
82        assert!(range.contains(&expected_within.as_slice()));
83    }
84}