Skip to main content

reddb_server/storage/schema/
canonical_key.rs

1use std::cmp::Ordering;
2use std::net::IpAddr;
3use std::sync::Arc;
4
5use super::Value;
6
7/// Stable key family for ordered secondary indexes.
8///
9/// Families are intentionally narrow: range pushdown is only considered safe
10/// when all indexed values in a column belong to the same family.
11#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
12pub enum CanonicalKeyFamily {
13    Null,
14    Boolean,
15    Integer,
16    BigInt,
17    UnsignedInteger,
18    Float,
19    Text,
20    Blob,
21    Timestamp,
22    Duration,
23    IpAddr,
24    MacAddr,
25    Json,
26    Uuid,
27    NodeRef,
28    EdgeRef,
29    VectorRef,
30    RowRef,
31    Color,
32    Email,
33    Url,
34    Phone,
35    Semver,
36    Cidr,
37    Date,
38    Time,
39    Decimal,
40    EnumValue,
41    TimestampMs,
42    Ipv4,
43    Ipv6,
44    Subnet,
45    Port,
46    Latitude,
47    Longitude,
48    GeoPoint,
49    Country2,
50    Country3,
51    Lang2,
52    Lang5,
53    Currency,
54    ColorAlpha,
55    KeyRef,
56    DocRef,
57    TableRef,
58    PageRef,
59    Password,
60}
61
62/// Canonical multi-type key used by ordered in-memory indexes.
63///
64/// The ordering is stable and type-aware. Different families never compare
65/// equal and range pushdown is only enabled when a column stays within one
66/// family. Exact point lookups remain safe even when a column has mixed
67/// families because BTree point seeks are still exact on the encoded key.
68#[derive(Debug, Clone, PartialEq, Eq, Hash)]
69pub enum CanonicalKey {
70    Null,
71    Boolean(bool),
72    Signed(CanonicalKeyFamily, i64),
73    Unsigned(CanonicalKeyFamily, u64),
74    Float(u64),
75    /// Text-kind values. `Arc<str>` instead of `String` so
76    /// `Value::Text(Arc<str>)` roundtrips free (Arc bump) rather than
77    /// allocating a new String per encode. Text-like variants built
78    /// from `String` (NodeRef, EdgeRef, Email, Url, TableRef,
79    /// Password) pay one Arc::from allocation at encode time — same
80    /// cost as the previous String clone. Net: GROUP BY over a
81    /// `TEXT` column stops paying N allocations per scan.
82    Text(CanonicalKeyFamily, Arc<str>),
83    Bytes(CanonicalKeyFamily, Vec<u8>),
84    PairTextU64(CanonicalKeyFamily, String, u64),
85    PairTextText(CanonicalKeyFamily, String, String),
86    PairU32U8(CanonicalKeyFamily, u32, u8),
87    PairU32U32(CanonicalKeyFamily, u32, u32),
88    PairI32I32(CanonicalKeyFamily, i32, i32),
89}
90
91impl CanonicalKey {
92    pub fn family(&self) -> CanonicalKeyFamily {
93        match self {
94            Self::Null => CanonicalKeyFamily::Null,
95            Self::Boolean(_) => CanonicalKeyFamily::Boolean,
96            Self::Signed(family, _) => *family,
97            Self::Unsigned(family, _) => *family,
98            Self::Float(_) => CanonicalKeyFamily::Float,
99            Self::Text(family, _) => *family,
100            Self::Bytes(family, _) => *family,
101            Self::PairTextU64(family, _, _) => *family,
102            Self::PairTextText(family, _, _) => *family,
103            Self::PairU32U8(family, _, _) => *family,
104            Self::PairU32U32(family, _, _) => *family,
105            Self::PairI32I32(family, _, _) => *family,
106        }
107    }
108
109    pub fn into_value(self) -> Value {
110        match self {
111            Self::Null => Value::Null,
112            Self::Boolean(v) => Value::Boolean(v),
113            Self::Signed(CanonicalKeyFamily::Integer, v) => Value::Integer(v),
114            Self::Signed(CanonicalKeyFamily::BigInt, v) => Value::BigInt(v),
115            Self::Signed(CanonicalKeyFamily::Timestamp, v) => Value::Timestamp(v),
116            Self::Signed(CanonicalKeyFamily::Duration, v) => Value::Duration(v),
117            Self::Signed(CanonicalKeyFamily::Date, v) => Value::Date(v as i32),
118            Self::Signed(CanonicalKeyFamily::Decimal, v) => Value::Decimal(v),
119            Self::Signed(CanonicalKeyFamily::TimestampMs, v) => Value::TimestampMs(v),
120            Self::Signed(CanonicalKeyFamily::Latitude, v) => Value::Latitude(v as i32),
121            Self::Signed(CanonicalKeyFamily::Longitude, v) => Value::Longitude(v as i32),
122            Self::Unsigned(CanonicalKeyFamily::UnsignedInteger, v) => Value::UnsignedInteger(v),
123            Self::Unsigned(CanonicalKeyFamily::Phone, v) => Value::Phone(v),
124            Self::Unsigned(CanonicalKeyFamily::Semver, v) => Value::Semver(v as u32),
125            Self::Unsigned(CanonicalKeyFamily::Time, v) => Value::Time(v as u32),
126            Self::Unsigned(CanonicalKeyFamily::EnumValue, v) => Value::EnumValue(v as u8),
127            Self::Unsigned(CanonicalKeyFamily::Port, v) => Value::Port(v as u16),
128            Self::Unsigned(CanonicalKeyFamily::PageRef, v) => Value::PageRef(v as u32),
129            Self::Float(bits) => Value::Float(f64::from_bits(bits)),
130            Self::Text(CanonicalKeyFamily::Text, v) => Value::text(v),
131            Self::Text(CanonicalKeyFamily::NodeRef, v) => Value::NodeRef(v.to_string()),
132            Self::Text(CanonicalKeyFamily::EdgeRef, v) => Value::EdgeRef(v.to_string()),
133            Self::Text(CanonicalKeyFamily::Email, v) => Value::Email(v.to_string()),
134            Self::Text(CanonicalKeyFamily::Url, v) => Value::Url(v.to_string()),
135            Self::Text(CanonicalKeyFamily::TableRef, v) => Value::TableRef(v.to_string()),
136            Self::Text(CanonicalKeyFamily::Password, v) => Value::Password(v.to_string()),
137            Self::Bytes(CanonicalKeyFamily::Blob, v) => Value::Blob(v),
138            Self::Bytes(CanonicalKeyFamily::MacAddr, v) => {
139                let mut out = [0u8; 6];
140                out.copy_from_slice(&v[..6]);
141                Value::MacAddr(out)
142            }
143            Self::Bytes(CanonicalKeyFamily::Json, v) => Value::Json(v),
144            Self::Bytes(CanonicalKeyFamily::Uuid, v) => {
145                let mut out = [0u8; 16];
146                out.copy_from_slice(&v[..16]);
147                Value::Uuid(out)
148            }
149            Self::Bytes(CanonicalKeyFamily::IpAddr, v) => {
150                let mut out = [0u8; 16];
151                out.copy_from_slice(&v[..16]);
152                Value::IpAddr(IpAddr::from(out))
153            }
154            Self::Bytes(CanonicalKeyFamily::Ipv4, v) => {
155                let mut out = [0u8; 4];
156                out.copy_from_slice(&v[..4]);
157                Value::Ipv4(u32::from_be_bytes(out))
158            }
159            Self::Bytes(CanonicalKeyFamily::Ipv6, v) => {
160                let mut out = [0u8; 16];
161                out.copy_from_slice(&v[..16]);
162                Value::Ipv6(out)
163            }
164            Self::Bytes(CanonicalKeyFamily::Color, v) => {
165                let mut out = [0u8; 3];
166                out.copy_from_slice(&v[..3]);
167                Value::Color(out)
168            }
169            Self::Bytes(CanonicalKeyFamily::Country2, v) => {
170                let mut out = [0u8; 2];
171                out.copy_from_slice(&v[..2]);
172                Value::Country2(out)
173            }
174            Self::Bytes(CanonicalKeyFamily::Country3, v) => {
175                let mut out = [0u8; 3];
176                out.copy_from_slice(&v[..3]);
177                Value::Country3(out)
178            }
179            Self::Bytes(CanonicalKeyFamily::Lang2, v) => {
180                let mut out = [0u8; 2];
181                out.copy_from_slice(&v[..2]);
182                Value::Lang2(out)
183            }
184            Self::Bytes(CanonicalKeyFamily::Lang5, v) => {
185                let mut out = [0u8; 5];
186                out.copy_from_slice(&v[..5]);
187                Value::Lang5(out)
188            }
189            Self::Bytes(CanonicalKeyFamily::Currency, v) => {
190                let mut out = [0u8; 3];
191                out.copy_from_slice(&v[..3]);
192                Value::Currency(out)
193            }
194            Self::Bytes(CanonicalKeyFamily::ColorAlpha, v) => {
195                let mut out = [0u8; 4];
196                out.copy_from_slice(&v[..4]);
197                Value::ColorAlpha(out)
198            }
199            Self::PairTextU64(CanonicalKeyFamily::VectorRef, collection, id) => {
200                Value::VectorRef(collection, id)
201            }
202            Self::PairTextU64(CanonicalKeyFamily::RowRef, collection, id) => {
203                Value::RowRef(collection, id)
204            }
205            Self::PairTextU64(CanonicalKeyFamily::DocRef, collection, id) => {
206                Value::DocRef(collection, id)
207            }
208            Self::PairTextText(CanonicalKeyFamily::KeyRef, collection, key) => {
209                Value::KeyRef(collection, key)
210            }
211            Self::PairU32U8(CanonicalKeyFamily::Cidr, ip, prefix) => Value::Cidr(ip, prefix),
212            Self::PairU32U32(CanonicalKeyFamily::Subnet, ip, mask) => Value::Subnet(ip, mask),
213            Self::PairI32I32(CanonicalKeyFamily::GeoPoint, lat, lon) => Value::GeoPoint(lat, lon),
214            _ => unreachable!("canonical key family/value mismatch"),
215        }
216    }
217}
218
219impl Ord for CanonicalKey {
220    fn cmp(&self, other: &Self) -> Ordering {
221        let family_cmp = self.family().cmp(&other.family());
222        if family_cmp != Ordering::Equal {
223            return family_cmp;
224        }
225        match (self, other) {
226            (Self::Null, Self::Null) => Ordering::Equal,
227            (Self::Boolean(left), Self::Boolean(right)) => left.cmp(right),
228            (Self::Signed(_, left), Self::Signed(_, right)) => left.cmp(right),
229            (Self::Unsigned(_, left), Self::Unsigned(_, right)) => left.cmp(right),
230            (Self::Float(left), Self::Float(right)) => {
231                f64::from_bits(*left).total_cmp(&f64::from_bits(*right))
232            }
233            (Self::Text(_, left), Self::Text(_, right)) => left.cmp(right),
234            (Self::Bytes(_, left), Self::Bytes(_, right)) => left.cmp(right),
235            (Self::PairTextU64(_, l_text, l_num), Self::PairTextU64(_, r_text, r_num)) => {
236                l_text.cmp(r_text).then_with(|| l_num.cmp(r_num))
237            }
238            (Self::PairTextText(_, l1, l2), Self::PairTextText(_, r1, r2)) => {
239                l1.cmp(r1).then_with(|| l2.cmp(r2))
240            }
241            (Self::PairU32U8(_, l1, l2), Self::PairU32U8(_, r1, r2)) => {
242                l1.cmp(r1).then_with(|| l2.cmp(r2))
243            }
244            (Self::PairU32U32(_, l1, l2), Self::PairU32U32(_, r1, r2)) => {
245                l1.cmp(r1).then_with(|| l2.cmp(r2))
246            }
247            (Self::PairI32I32(_, l1, l2), Self::PairI32I32(_, r1, r2)) => {
248                l1.cmp(r1).then_with(|| l2.cmp(r2))
249            }
250            _ => Ordering::Equal,
251        }
252    }
253}
254
255impl PartialOrd for CanonicalKey {
256    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
257        Some(self.cmp(other))
258    }
259}
260
261pub fn value_to_canonical_key(value: &Value) -> Option<CanonicalKey> {
262    match value {
263        Value::Null => Some(CanonicalKey::Null),
264        Value::Integer(v) => Some(CanonicalKey::Signed(CanonicalKeyFamily::Integer, *v)),
265        Value::UnsignedInteger(v) => Some(CanonicalKey::Unsigned(
266            CanonicalKeyFamily::UnsignedInteger,
267            *v,
268        )),
269        Value::Float(v) if v.is_finite() => Some(CanonicalKey::Float(v.to_bits())),
270        Value::Float(_) => None,
271        Value::Text(v) => Some(CanonicalKey::Text(CanonicalKeyFamily::Text, v.clone())),
272        Value::Blob(v) => Some(CanonicalKey::Bytes(CanonicalKeyFamily::Blob, v.clone())),
273        Value::Boolean(v) => Some(CanonicalKey::Boolean(*v)),
274        Value::Timestamp(v) => Some(CanonicalKey::Signed(CanonicalKeyFamily::Timestamp, *v)),
275        Value::Duration(v) => Some(CanonicalKey::Signed(CanonicalKeyFamily::Duration, *v)),
276        Value::IpAddr(v) => Some(CanonicalKey::Bytes(
277            CanonicalKeyFamily::IpAddr,
278            ipaddr_to_bytes(*v).to_vec(),
279        )),
280        Value::MacAddr(v) => Some(CanonicalKey::Bytes(CanonicalKeyFamily::MacAddr, v.to_vec())),
281        Value::Vector(_) => None,
282        Value::Json(v) => Some(CanonicalKey::Bytes(CanonicalKeyFamily::Json, v.clone())),
283        Value::Uuid(v) => Some(CanonicalKey::Bytes(CanonicalKeyFamily::Uuid, v.to_vec())),
284        Value::NodeRef(v) => Some(CanonicalKey::Text(
285            CanonicalKeyFamily::NodeRef,
286            Arc::from(v.as_str()),
287        )),
288        Value::EdgeRef(v) => Some(CanonicalKey::Text(
289            CanonicalKeyFamily::EdgeRef,
290            Arc::from(v.as_str()),
291        )),
292        Value::VectorRef(collection, id) => Some(CanonicalKey::PairTextU64(
293            CanonicalKeyFamily::VectorRef,
294            collection.clone(),
295            *id,
296        )),
297        Value::RowRef(collection, id) => Some(CanonicalKey::PairTextU64(
298            CanonicalKeyFamily::RowRef,
299            collection.clone(),
300            *id,
301        )),
302        Value::Color(v) => Some(CanonicalKey::Bytes(CanonicalKeyFamily::Color, v.to_vec())),
303        Value::Email(v) => Some(CanonicalKey::Text(
304            CanonicalKeyFamily::Email,
305            Arc::from(v.as_str()),
306        )),
307        Value::Url(v) => Some(CanonicalKey::Text(
308            CanonicalKeyFamily::Url,
309            Arc::from(v.as_str()),
310        )),
311        Value::Phone(v) => Some(CanonicalKey::Unsigned(CanonicalKeyFamily::Phone, *v)),
312        Value::Semver(v) => Some(CanonicalKey::Unsigned(
313            CanonicalKeyFamily::Semver,
314            *v as u64,
315        )),
316        Value::Cidr(ip, prefix) => Some(CanonicalKey::PairU32U8(
317            CanonicalKeyFamily::Cidr,
318            *ip,
319            *prefix,
320        )),
321        Value::Date(v) => Some(CanonicalKey::Signed(
322            CanonicalKeyFamily::Date,
323            i64::from(*v),
324        )),
325        Value::Time(v) => Some(CanonicalKey::Unsigned(CanonicalKeyFamily::Time, *v as u64)),
326        Value::Decimal(v) => Some(CanonicalKey::Signed(CanonicalKeyFamily::Decimal, *v)),
327        Value::EnumValue(v) => Some(CanonicalKey::Unsigned(
328            CanonicalKeyFamily::EnumValue,
329            *v as u64,
330        )),
331        Value::Array(_) => None,
332        Value::TimestampMs(v) => Some(CanonicalKey::Signed(CanonicalKeyFamily::TimestampMs, *v)),
333        Value::Ipv4(v) => Some(CanonicalKey::Bytes(
334            CanonicalKeyFamily::Ipv4,
335            v.to_be_bytes().to_vec(),
336        )),
337        Value::Ipv6(v) => Some(CanonicalKey::Bytes(CanonicalKeyFamily::Ipv6, v.to_vec())),
338        Value::Subnet(ip, mask) => Some(CanonicalKey::PairU32U32(
339            CanonicalKeyFamily::Subnet,
340            *ip,
341            *mask,
342        )),
343        Value::Port(v) => Some(CanonicalKey::Unsigned(CanonicalKeyFamily::Port, *v as u64)),
344        Value::Latitude(v) => Some(CanonicalKey::Signed(
345            CanonicalKeyFamily::Latitude,
346            i64::from(*v),
347        )),
348        Value::Longitude(v) => Some(CanonicalKey::Signed(
349            CanonicalKeyFamily::Longitude,
350            i64::from(*v),
351        )),
352        Value::GeoPoint(lat, lon) => Some(CanonicalKey::PairI32I32(
353            CanonicalKeyFamily::GeoPoint,
354            *lat,
355            *lon,
356        )),
357        Value::Country2(v) => Some(CanonicalKey::Bytes(
358            CanonicalKeyFamily::Country2,
359            v.to_vec(),
360        )),
361        Value::Country3(v) => Some(CanonicalKey::Bytes(
362            CanonicalKeyFamily::Country3,
363            v.to_vec(),
364        )),
365        Value::Lang2(v) => Some(CanonicalKey::Bytes(CanonicalKeyFamily::Lang2, v.to_vec())),
366        Value::Lang5(v) => Some(CanonicalKey::Bytes(CanonicalKeyFamily::Lang5, v.to_vec())),
367        Value::Currency(v) => Some(CanonicalKey::Bytes(
368            CanonicalKeyFamily::Currency,
369            v.to_vec(),
370        )),
371        Value::AssetCode(v) => Some(CanonicalKey::Text(
372            CanonicalKeyFamily::Text,
373            Arc::from(v.as_str()),
374        )),
375        Value::Money { .. } => None,
376        Value::ColorAlpha(v) => Some(CanonicalKey::Bytes(
377            CanonicalKeyFamily::ColorAlpha,
378            v.to_vec(),
379        )),
380        Value::BigInt(v) => Some(CanonicalKey::Signed(CanonicalKeyFamily::BigInt, *v)),
381        Value::KeyRef(collection, key) => Some(CanonicalKey::PairTextText(
382            CanonicalKeyFamily::KeyRef,
383            collection.clone(),
384            key.clone(),
385        )),
386        Value::DocRef(collection, id) => Some(CanonicalKey::PairTextU64(
387            CanonicalKeyFamily::DocRef,
388            collection.clone(),
389            *id,
390        )),
391        Value::TableRef(v) => Some(CanonicalKey::Text(
392            CanonicalKeyFamily::TableRef,
393            Arc::from(v.as_str()),
394        )),
395        Value::PageRef(v) => Some(CanonicalKey::Unsigned(
396            CanonicalKeyFamily::PageRef,
397            *v as u64,
398        )),
399        Value::Secret(_) => None,
400        Value::Password(v) => Some(CanonicalKey::Text(
401            CanonicalKeyFamily::Password,
402            Arc::from(v.as_str()),
403        )),
404    }
405}
406
407fn ipaddr_to_bytes(value: IpAddr) -> [u8; 16] {
408    match value {
409        IpAddr::V4(v4) => v4.to_ipv6_mapped().octets(),
410        IpAddr::V6(v6) => v6.octets(),
411    }
412}
413
414#[cfg(test)]
415mod tests {
416    use super::*;
417
418    #[test]
419    fn canonical_keys_are_ordered_inside_their_family() {
420        let low = value_to_canonical_key(&Value::Integer(-5)).unwrap();
421        let high = value_to_canonical_key(&Value::Integer(20)).unwrap();
422        assert!(low < high);
423    }
424
425    #[test]
426    fn float_keys_reject_nan() {
427        assert!(value_to_canonical_key(&Value::Float(f64::NAN)).is_none());
428    }
429
430    #[test]
431    fn text_and_email_use_different_families() {
432        let text = value_to_canonical_key(&Value::text("alice".to_string())).unwrap();
433        let email = value_to_canonical_key(&Value::Email("alice@example.com".to_string())).unwrap();
434        assert_ne!(text.family(), email.family());
435    }
436}