Skip to main content

icydb_core/db/
identity.rs

1#![expect(clippy::cast_possible_truncation)]
2//! Identity invariants and construction.
3//!
4//! Invariants:
5//! - Identities are ASCII, non-empty, and bounded by MAX_* limits.
6//! - All construction paths validate invariants.
7//! - Stored byte representation is canonical and order-preserving.
8
9use crate::MAX_INDEX_FIELDS;
10use std::{
11    cmp::Ordering,
12    fmt::{self, Display},
13};
14use thiserror::Error as ThisError;
15
16///
17/// Constants
18///
19
20pub(super) const MAX_ENTITY_NAME_LEN: usize = 64;
21pub(super) const MAX_INDEX_FIELD_NAME_LEN: usize = 64;
22pub(super) const MAX_INDEX_NAME_LEN: usize =
23    MAX_ENTITY_NAME_LEN + (MAX_INDEX_FIELDS * (MAX_INDEX_FIELD_NAME_LEN + 1));
24
25///
26/// Decode errors (storage / corruption boundary)
27///
28
29#[derive(Debug, ThisError)]
30pub enum IdentityDecodeError {
31    #[error("invalid size")]
32    InvalidSize,
33
34    #[error("invalid length")]
35    InvalidLength,
36
37    #[error("non-ascii encoding")]
38    NonAscii,
39
40    #[error("non-zero padding")]
41    NonZeroPadding,
42}
43
44///
45/// EntityNameError
46///
47
48#[derive(Debug, ThisError)]
49pub enum EntityNameError {
50    #[error("entity name is empty")]
51    Empty,
52
53    #[error("entity name length {len} exceeds max {max}")]
54    TooLong { len: usize, max: usize },
55
56    #[error("entity name must be ASCII")]
57    NonAscii,
58}
59
60///
61/// IndexNameError
62///
63
64#[derive(Debug, ThisError)]
65pub enum IndexNameError {
66    #[error("index has {len} fields (max {max})")]
67    TooManyFields { len: usize, max: usize },
68
69    #[error("index field name '{field}' exceeds max length {max}")]
70    FieldTooLong { field: String, max: usize },
71
72    #[error("index field name '{field}' must be ASCII")]
73    FieldNonAscii { field: String },
74
75    #[error("index name length {len} exceeds max {max}")]
76    TooLong { len: usize, max: usize },
77}
78
79///
80/// EntityName
81///
82
83#[derive(Clone, Copy, Eq, Hash, PartialEq)]
84pub struct EntityName {
85    len: u8,
86    bytes: [u8; MAX_ENTITY_NAME_LEN],
87}
88
89impl EntityName {
90    /// Fixed on-disk size in bytes (stable, protocol-level)
91    pub const STORED_SIZE_BYTES: u64 = 1 + (MAX_ENTITY_NAME_LEN as u64);
92
93    /// Fixed in-memory size (for buffers and arrays)
94    pub const STORED_SIZE_USIZE: usize = Self::STORED_SIZE_BYTES as usize;
95
96    pub fn try_from_str(name: &str) -> Result<Self, EntityNameError> {
97        let bytes = name.as_bytes();
98        let len = bytes.len();
99
100        if len == 0 {
101            return Err(EntityNameError::Empty);
102        }
103        if len > MAX_ENTITY_NAME_LEN {
104            return Err(EntityNameError::TooLong {
105                len,
106                max: MAX_ENTITY_NAME_LEN,
107            });
108        }
109        if !bytes.is_ascii() {
110            return Err(EntityNameError::NonAscii);
111        }
112
113        let mut out = [0u8; MAX_ENTITY_NAME_LEN];
114        out[..len].copy_from_slice(bytes);
115
116        Ok(Self {
117            len: len as u8,
118            bytes: out,
119        })
120    }
121
122    #[must_use]
123    pub const fn len(&self) -> usize {
124        self.len as usize
125    }
126
127    #[must_use]
128    pub const fn is_empty(&self) -> bool {
129        self.len() == 0
130    }
131
132    #[must_use]
133    pub fn as_bytes(&self) -> &[u8] {
134        &self.bytes[..self.len()]
135    }
136
137    #[must_use]
138    pub fn as_str(&self) -> &str {
139        // Invariant: construction and decoding enforce ASCII-only storage,
140        // so UTF-8 decoding cannot fail.
141        std::str::from_utf8(self.as_bytes()).expect("EntityName invariant: ASCII-only storage")
142    }
143
144    #[must_use]
145    pub fn to_bytes(self) -> [u8; Self::STORED_SIZE_USIZE] {
146        let mut out = [0u8; Self::STORED_SIZE_USIZE];
147        out[0] = self.len;
148        out[1..].copy_from_slice(&self.bytes);
149        out
150    }
151
152    pub fn from_bytes(bytes: &[u8]) -> Result<Self, IdentityDecodeError> {
153        if bytes.len() != Self::STORED_SIZE_USIZE {
154            return Err(IdentityDecodeError::InvalidSize);
155        }
156
157        let len = bytes[0] as usize;
158        if len == 0 || len > MAX_ENTITY_NAME_LEN {
159            return Err(IdentityDecodeError::InvalidLength);
160        }
161        if !bytes[1..=len].is_ascii() {
162            return Err(IdentityDecodeError::NonAscii);
163        }
164        if bytes[1 + len..].iter().any(|&b| b != 0) {
165            return Err(IdentityDecodeError::NonZeroPadding);
166        }
167
168        let mut name = [0u8; MAX_ENTITY_NAME_LEN];
169        name.copy_from_slice(&bytes[1..]);
170
171        Ok(Self {
172            len: len as u8,
173            bytes: name,
174        })
175    }
176
177    #[must_use]
178    pub const fn max_storable() -> Self {
179        Self {
180            len: MAX_ENTITY_NAME_LEN as u8,
181            bytes: [b'z'; MAX_ENTITY_NAME_LEN],
182        }
183    }
184}
185
186impl Ord for EntityName {
187    fn cmp(&self, other: &Self) -> Ordering {
188        self.len.cmp(&other.len).then(self.bytes.cmp(&other.bytes))
189    }
190}
191
192impl PartialOrd for EntityName {
193    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
194        Some(self.cmp(other))
195    }
196}
197
198impl Display for EntityName {
199    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
200        f.write_str(self.as_str())
201    }
202}
203
204impl fmt::Debug for EntityName {
205    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
206        write!(f, "EntityName({})", self.as_str())
207    }
208}
209
210///
211/// IndexName
212///
213
214#[derive(Clone, Copy, Eq, Hash, PartialEq)]
215pub struct IndexName {
216    len: u16,
217    bytes: [u8; MAX_INDEX_NAME_LEN],
218}
219
220impl IndexName {
221    pub const STORED_SIZE_BYTES: u64 = 2 + (MAX_INDEX_NAME_LEN as u64);
222    pub const STORED_SIZE_USIZE: usize = Self::STORED_SIZE_BYTES as usize;
223
224    pub fn try_from_parts(entity: &EntityName, fields: &[&str]) -> Result<Self, IndexNameError> {
225        if fields.len() > MAX_INDEX_FIELDS {
226            return Err(IndexNameError::TooManyFields {
227                len: fields.len(),
228                max: MAX_INDEX_FIELDS,
229            });
230        }
231
232        let mut total_len = entity.len();
233        for field in fields {
234            let field_len = field.len();
235            if field_len > MAX_INDEX_FIELD_NAME_LEN {
236                return Err(IndexNameError::FieldTooLong {
237                    field: (*field).to_string(),
238                    max: MAX_INDEX_FIELD_NAME_LEN,
239                });
240            }
241            if !field.is_ascii() {
242                return Err(IndexNameError::FieldNonAscii {
243                    field: (*field).to_string(),
244                });
245            }
246            total_len = total_len.saturating_add(1 + field_len);
247        }
248
249        if total_len > MAX_INDEX_NAME_LEN {
250            return Err(IndexNameError::TooLong {
251                len: total_len,
252                max: MAX_INDEX_NAME_LEN,
253            });
254        }
255
256        let mut out = [0u8; MAX_INDEX_NAME_LEN];
257        let mut len = 0usize;
258
259        Self::push_bytes(&mut out, &mut len, entity.as_bytes());
260        for field in fields {
261            Self::push_bytes(&mut out, &mut len, b"|");
262            Self::push_bytes(&mut out, &mut len, field.as_bytes());
263        }
264
265        Ok(Self {
266            len: len as u16,
267            bytes: out,
268        })
269    }
270
271    #[must_use]
272    pub fn as_bytes(&self) -> &[u8] {
273        &self.bytes[..self.len as usize]
274    }
275
276    #[must_use]
277    pub fn as_str(&self) -> &str {
278        // SAFETY:
279        // Preconditions:
280        // - `try_from_parts` validates all segments are ASCII.
281        // - `from_bytes` rejects non-ASCII payloads and malformed lengths.
282        // - `as_bytes` returns only initialized bytes within `len`.
283        //
284        // Aliasing:
285        // - We expose a shared `&str` over immutable storage; no mutable alias
286        //   is created while the reference is live.
287        //
288        // What would break this:
289        // - Accepting non-ASCII bytes in any construction/decoding path.
290        // - Mutating the underlying `bytes` without re-validating invariants.
291        unsafe { std::str::from_utf8_unchecked(self.as_bytes()) }
292    }
293
294    #[must_use]
295    pub fn to_bytes(self) -> [u8; Self::STORED_SIZE_USIZE] {
296        let mut out = [0u8; Self::STORED_SIZE_USIZE];
297        out[..2].copy_from_slice(&self.len.to_be_bytes());
298        out[2..].copy_from_slice(&self.bytes);
299        out
300    }
301
302    pub fn from_bytes(bytes: &[u8]) -> Result<Self, IdentityDecodeError> {
303        if bytes.len() != Self::STORED_SIZE_USIZE {
304            return Err(IdentityDecodeError::InvalidSize);
305        }
306
307        let len = u16::from_be_bytes([bytes[0], bytes[1]]) as usize;
308        if len == 0 || len > MAX_INDEX_NAME_LEN {
309            return Err(IdentityDecodeError::InvalidLength);
310        }
311        if !bytes[2..2 + len].is_ascii() {
312            return Err(IdentityDecodeError::NonAscii);
313        }
314        if bytes[2 + len..].iter().any(|&b| b != 0) {
315            return Err(IdentityDecodeError::NonZeroPadding);
316        }
317
318        let mut name = [0u8; MAX_INDEX_NAME_LEN];
319        name.copy_from_slice(&bytes[2..]);
320
321        Ok(Self {
322            len: len as u16,
323            bytes: name,
324        })
325    }
326
327    fn push_bytes(out: &mut [u8; MAX_INDEX_NAME_LEN], len: &mut usize, bytes: &[u8]) {
328        let end = *len + bytes.len();
329        out[*len..end].copy_from_slice(bytes);
330        *len = end;
331    }
332
333    #[must_use]
334    pub const fn max_storable() -> Self {
335        Self {
336            len: MAX_INDEX_NAME_LEN as u16,
337            bytes: [b'z'; MAX_INDEX_NAME_LEN],
338        }
339    }
340}
341
342impl Ord for IndexName {
343    fn cmp(&self, other: &Self) -> Ordering {
344        self.to_bytes().cmp(&other.to_bytes())
345    }
346}
347
348impl PartialOrd for IndexName {
349    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
350        Some(self.cmp(other))
351    }
352}
353
354impl fmt::Debug for IndexName {
355    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
356        write!(f, "IndexName({})", self.as_str())
357    }
358}
359
360impl Display for IndexName {
361    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
362        f.write_str(self.as_str())
363    }
364}
365
366///
367/// TESTS
368///
369
370#[cfg(test)]
371mod tests {
372    use super::*;
373
374    const ENTITY_64: &str = "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef";
375    const ENTITY_64_B: &str = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb";
376    const FIELD_64_A: &str = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
377    const FIELD_64_B: &str = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb";
378    const FIELD_64_C: &str = "cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc";
379    const FIELD_64_D: &str = "dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd";
380
381    #[test]
382    fn index_name_max_len_matches_limits() {
383        let entity = EntityName::try_from_str(ENTITY_64).unwrap();
384        let fields = [FIELD_64_A, FIELD_64_B, FIELD_64_C, FIELD_64_D];
385
386        assert_eq!(entity.as_str().len(), MAX_ENTITY_NAME_LEN);
387        for field in &fields {
388            assert_eq!(field.len(), MAX_INDEX_FIELD_NAME_LEN);
389        }
390        assert_eq!(fields.len(), MAX_INDEX_FIELDS);
391
392        let name = IndexName::try_from_parts(&entity, &fields).unwrap();
393        assert_eq!(name.as_bytes().len(), MAX_INDEX_NAME_LEN);
394    }
395
396    #[test]
397    fn index_name_max_size_roundtrip_and_ordering() {
398        let entity_a = EntityName::try_from_str(ENTITY_64).unwrap();
399        let entity_b = EntityName::try_from_str(ENTITY_64_B).unwrap();
400
401        let fields_a = [FIELD_64_A, FIELD_64_A, FIELD_64_A, FIELD_64_A];
402        let fields_b = [FIELD_64_B, FIELD_64_B, FIELD_64_B, FIELD_64_B];
403
404        let idx_a = IndexName::try_from_parts(&entity_a, &fields_a).unwrap();
405        let idx_b = IndexName::try_from_parts(&entity_b, &fields_b).unwrap();
406
407        let decoded = IndexName::from_bytes(&idx_a.to_bytes()).unwrap();
408        assert_eq!(idx_a, decoded);
409
410        assert_eq!(idx_a.cmp(&idx_b), idx_a.to_bytes().cmp(&idx_b.to_bytes()));
411    }
412
413    #[test]
414    fn rejects_too_many_index_fields() {
415        let entity = EntityName::try_from_str("entity").unwrap();
416        let fields = ["a", "b", "c", "d", "e"];
417
418        let err = IndexName::try_from_parts(&entity, &fields).unwrap_err();
419        assert!(matches!(err, IndexNameError::TooManyFields { .. }));
420    }
421
422    #[test]
423    fn rejects_index_field_over_len() {
424        let entity = EntityName::try_from_str("entity").unwrap();
425        let long_field = "a".repeat(MAX_INDEX_FIELD_NAME_LEN + 1);
426
427        let err = IndexName::try_from_parts(&entity, &[long_field.as_str()]).unwrap_err();
428        assert!(matches!(err, IndexNameError::FieldTooLong { .. }));
429    }
430
431    #[test]
432    fn entity_try_from_str_roundtrip() {
433        let e = EntityName::try_from_str("user").unwrap();
434        assert_eq!(e.len(), 4);
435        assert_eq!(e.as_str(), "user");
436    }
437
438    #[test]
439    fn entity_rejects_empty() {
440        let err = EntityName::try_from_str("").unwrap_err();
441        assert!(matches!(err, EntityNameError::Empty));
442    }
443
444    #[test]
445    fn entity_rejects_len_over_max() {
446        let s = "a".repeat(MAX_ENTITY_NAME_LEN + 1);
447        let err = EntityName::try_from_str(&s).unwrap_err();
448        assert!(matches!(err, EntityNameError::TooLong { .. }));
449    }
450
451    #[test]
452    fn entity_rejects_non_ascii() {
453        let err = EntityName::try_from_str("usér").unwrap_err();
454        assert!(matches!(err, EntityNameError::NonAscii));
455    }
456
457    #[test]
458    fn entity_storage_roundtrip() {
459        let e = EntityName::try_from_str("entity_name").unwrap();
460        let bytes = e.to_bytes();
461        let decoded = EntityName::from_bytes(&bytes).unwrap();
462        assert_eq!(e, decoded);
463    }
464
465    #[test]
466    fn entity_max_storable_is_ascii_utf8() {
467        let max = EntityName::max_storable();
468        assert_eq!(max.len(), MAX_ENTITY_NAME_LEN);
469        assert!(max.as_str().is_ascii());
470    }
471
472    #[test]
473    fn entity_rejects_invalid_size() {
474        let buf = vec![0u8; EntityName::STORED_SIZE_USIZE - 1];
475        assert!(matches!(
476            EntityName::from_bytes(&buf),
477            Err(IdentityDecodeError::InvalidSize)
478        ));
479    }
480
481    #[test]
482    fn entity_rejects_len_over_max_from_bytes() {
483        let mut buf = [0u8; EntityName::STORED_SIZE_USIZE];
484        buf[0] = (MAX_ENTITY_NAME_LEN as u8).saturating_add(1);
485        assert!(matches!(
486            EntityName::from_bytes(&buf),
487            Err(IdentityDecodeError::InvalidLength)
488        ));
489    }
490
491    #[test]
492    fn entity_rejects_non_ascii_from_bytes() {
493        let mut buf = [0u8; EntityName::STORED_SIZE_USIZE];
494        buf[0] = 1;
495        buf[1] = 0xFF;
496        assert!(matches!(
497            EntityName::from_bytes(&buf),
498            Err(IdentityDecodeError::NonAscii)
499        ));
500    }
501
502    #[test]
503    fn entity_rejects_non_zero_padding() {
504        let e = EntityName::try_from_str("user").unwrap();
505        let mut bytes = e.to_bytes();
506        bytes[1 + e.len()] = b'x';
507
508        assert!(matches!(
509            EntityName::from_bytes(&bytes),
510            Err(IdentityDecodeError::NonZeroPadding)
511        ));
512    }
513
514    #[test]
515    fn entity_ordering_matches_bytes() {
516        let a = EntityName::try_from_str("abc").unwrap();
517        let b = EntityName::try_from_str("abd").unwrap();
518        let c = EntityName::try_from_str("abcx").unwrap();
519
520        assert_eq!(a.cmp(&b), a.to_bytes().cmp(&b.to_bytes()));
521        assert_eq!(a.cmp(&c), a.to_bytes().cmp(&c.to_bytes()));
522    }
523
524    #[test]
525    fn index_single_field_format() {
526        let entity = EntityName::try_from_str("user").unwrap();
527        let idx = IndexName::try_from_parts(&entity, &["email"]).unwrap();
528
529        assert_eq!(idx.as_str(), "user|email");
530    }
531
532    #[test]
533    fn index_field_order_is_preserved() {
534        let entity = EntityName::try_from_str("user").unwrap();
535        let idx = IndexName::try_from_parts(&entity, &["a", "b", "c"]).unwrap();
536
537        assert_eq!(idx.as_str(), "user|a|b|c");
538    }
539
540    #[test]
541    fn index_storage_roundtrip() {
542        let entity = EntityName::try_from_str("user").unwrap();
543        let idx = IndexName::try_from_parts(&entity, &["a", "b"]).unwrap();
544
545        let bytes = idx.to_bytes();
546        let decoded = IndexName::from_bytes(&bytes).unwrap();
547        assert_eq!(idx, decoded);
548    }
549
550    #[test]
551    fn index_max_storable_is_ascii_utf8() {
552        let max = IndexName::max_storable();
553        assert_eq!(max.as_bytes().len(), MAX_INDEX_NAME_LEN);
554        assert!(max.as_str().is_ascii());
555    }
556
557    #[test]
558    fn index_rejects_non_ascii_from_bytes() {
559        let mut buf = [0u8; IndexName::STORED_SIZE_USIZE];
560        buf[..2].copy_from_slice(&1u16.to_be_bytes());
561        buf[2] = 0xFF;
562
563        assert!(matches!(
564            IndexName::from_bytes(&buf),
565            Err(IdentityDecodeError::NonAscii)
566        ));
567    }
568
569    // ------------------------------------------------------------------
570    // FUZZING (deterministic)
571    // ------------------------------------------------------------------
572
573    fn gen_ascii(seed: u64, max_len: usize) -> String {
574        let len = (seed as usize % max_len).max(1);
575        let mut out = String::with_capacity(len);
576
577        let mut x = seed;
578        for _ in 0..len {
579            x = x.wrapping_mul(6_364_136_223_846_793_005).wrapping_add(1);
580            let c = b'a' + (x % 26) as u8;
581            out.push(c as char);
582        }
583
584        out
585    }
586
587    #[test]
588    fn fuzz_entity_name_roundtrip_and_ordering() {
589        let mut prev: Option<EntityName> = None;
590
591        for i in 1..=1_000u64 {
592            let s = gen_ascii(i, MAX_ENTITY_NAME_LEN);
593            let e = EntityName::try_from_str(&s).unwrap();
594
595            let bytes = e.to_bytes();
596            let decoded = EntityName::from_bytes(&bytes).unwrap();
597            assert_eq!(e, decoded);
598
599            if let Some(p) = prev {
600                assert_eq!(p.cmp(&e), p.to_bytes().cmp(&e.to_bytes()));
601            }
602
603            prev = Some(e);
604        }
605    }
606
607    #[test]
608    fn fuzz_index_name_roundtrip_and_ordering() {
609        let entity = EntityName::try_from_str("entity").unwrap();
610        let mut prev: Option<IndexName> = None;
611
612        for i in 1..=1_000u64 {
613            let field_count = (i as usize % MAX_INDEX_FIELDS).max(1);
614
615            let mut fields = Vec::with_capacity(field_count);
616            for f in 0..field_count {
617                let s = gen_ascii(i * 31 + f as u64, MAX_INDEX_FIELD_NAME_LEN);
618                fields.push(s);
619            }
620
621            let field_refs: Vec<&str> = fields.iter().map(String::as_str).collect();
622            let idx = IndexName::try_from_parts(&entity, &field_refs).unwrap();
623
624            let bytes = idx.to_bytes();
625            let decoded = IndexName::from_bytes(&bytes).unwrap();
626            assert_eq!(idx, decoded);
627
628            if let Some(p) = prev {
629                assert_eq!(p.cmp(&idx), p.to_bytes().cmp(&idx.to_bytes()));
630            }
631
632            prev = Some(idx);
633        }
634    }
635}