Skip to main content

icydb_core/db/
identity.rs

1#![expect(clippy::cast_possible_truncation)]
2//! Identity invariants and construction.
3//!
4//! Invariants:
5//! - Identities are ASCII, non-empty, and bounded by MAX_* limits.
6//! - All construction paths validate invariants.
7//! - Stored byte representation is canonical and order-preserving.
8
9use crate::MAX_INDEX_FIELDS;
10use std::{
11    cmp::Ordering,
12    fmt::{self, Display},
13};
14use thiserror::Error as ThisError;
15
16///
17/// Constants
18///
19
20pub(super) const MAX_ENTITY_NAME_LEN: usize = 64;
21pub(super) const MAX_INDEX_FIELD_NAME_LEN: usize = 64;
22pub(super) const MAX_INDEX_NAME_LEN: usize =
23    MAX_ENTITY_NAME_LEN + (MAX_INDEX_FIELDS * (MAX_INDEX_FIELD_NAME_LEN + 1));
24
25///
26/// Decode errors (storage / corruption boundary)
27///
28
29#[derive(Debug, ThisError)]
30pub enum IdentityDecodeError {
31    #[error("invalid size")]
32    InvalidSize,
33    #[error("invalid length")]
34    InvalidLength,
35    #[error("non-ascii encoding")]
36    NonAscii,
37    #[error("non-zero padding")]
38    NonZeroPadding,
39}
40
41///
42/// EntityNameError
43///
44
45#[derive(Debug, ThisError)]
46pub enum EntityNameError {
47    #[error("entity name is empty")]
48    Empty,
49
50    #[error("entity name length {len} exceeds max {max}")]
51    TooLong { len: usize, max: usize },
52
53    #[error("entity name must be ASCII")]
54    NonAscii,
55}
56
57///
58/// IndexNameError
59///
60
61#[derive(Debug, ThisError)]
62pub enum IndexNameError {
63    #[error("index has {len} fields (max {max})")]
64    TooManyFields { len: usize, max: usize },
65
66    #[error("index field name '{field}' exceeds max length {max}")]
67    FieldTooLong { field: String, max: usize },
68
69    #[error("index field name '{field}' must be ASCII")]
70    FieldNonAscii { field: String },
71
72    #[error("index name length {len} exceeds max {max}")]
73    TooLong { len: usize, max: usize },
74}
75
76///
77/// EntityName
78///
79
80#[derive(Clone, Copy, Eq, Hash, PartialEq)]
81pub struct EntityName {
82    len: u8,
83    bytes: [u8; MAX_ENTITY_NAME_LEN],
84}
85
86impl EntityName {
87    /// Fixed on-disk size in bytes (stable, protocol-level)
88    pub const STORED_SIZE_BYTES: u64 = 1 + (MAX_ENTITY_NAME_LEN as u64);
89
90    /// Fixed in-memory size (for buffers and arrays)
91    pub const STORED_SIZE_USIZE: usize = Self::STORED_SIZE_BYTES as usize;
92
93    pub fn try_from_str(name: &str) -> Result<Self, EntityNameError> {
94        let bytes = name.as_bytes();
95        let len = bytes.len();
96
97        if len == 0 {
98            return Err(EntityNameError::Empty);
99        }
100        if len > MAX_ENTITY_NAME_LEN {
101            return Err(EntityNameError::TooLong {
102                len,
103                max: MAX_ENTITY_NAME_LEN,
104            });
105        }
106        if !bytes.is_ascii() {
107            return Err(EntityNameError::NonAscii);
108        }
109
110        let mut out = [0u8; MAX_ENTITY_NAME_LEN];
111        out[..len].copy_from_slice(bytes);
112
113        Ok(Self {
114            len: len as u8,
115            bytes: out,
116        })
117    }
118
119    #[must_use]
120    pub const fn len(&self) -> usize {
121        self.len as usize
122    }
123
124    #[must_use]
125    pub const fn is_empty(&self) -> bool {
126        self.len() == 0
127    }
128
129    #[must_use]
130    pub fn as_bytes(&self) -> &[u8] {
131        &self.bytes[..self.len()]
132    }
133
134    #[must_use]
135    pub fn as_str(&self) -> &str {
136        // SAFETY:
137        // Preconditions:
138        // - Constructors (`try_from_str`) and decoders (`from_bytes`) reject
139        //   non-ASCII inputs.
140        // - Stored slices returned by `as_bytes` are within initialized bounds.
141        //
142        // Aliasing:
143        // - This creates an immutable `&str` view over immutable bytes already
144        //   owned by `self`; no mutable aliasing is introduced.
145        //
146        // What would break this:
147        // - Any future constructor/decoder path that permits non-ASCII bytes.
148        // - Any mutation of `bytes[..len]` bypassing validation guarantees.
149        unsafe { std::str::from_utf8_unchecked(self.as_bytes()) }
150    }
151
152    #[must_use]
153    pub fn to_bytes(self) -> [u8; Self::STORED_SIZE_USIZE] {
154        let mut out = [0u8; Self::STORED_SIZE_USIZE];
155        out[0] = self.len;
156        out[1..].copy_from_slice(&self.bytes);
157        out
158    }
159
160    pub fn from_bytes(bytes: &[u8]) -> Result<Self, IdentityDecodeError> {
161        if bytes.len() != Self::STORED_SIZE_USIZE {
162            return Err(IdentityDecodeError::InvalidSize);
163        }
164
165        let len = bytes[0] as usize;
166        if len == 0 || len > MAX_ENTITY_NAME_LEN {
167            return Err(IdentityDecodeError::InvalidLength);
168        }
169        if !bytes[1..=len].is_ascii() {
170            return Err(IdentityDecodeError::NonAscii);
171        }
172        if bytes[1 + len..].iter().any(|&b| b != 0) {
173            return Err(IdentityDecodeError::NonZeroPadding);
174        }
175
176        let mut name = [0u8; MAX_ENTITY_NAME_LEN];
177        name.copy_from_slice(&bytes[1..]);
178
179        Ok(Self {
180            len: len as u8,
181            bytes: name,
182        })
183    }
184
185    #[must_use]
186    pub const fn max_storable() -> Self {
187        Self {
188            len: MAX_ENTITY_NAME_LEN as u8,
189            bytes: [b'z'; MAX_ENTITY_NAME_LEN],
190        }
191    }
192}
193
194impl Ord for EntityName {
195    fn cmp(&self, other: &Self) -> Ordering {
196        self.to_bytes().cmp(&other.to_bytes())
197    }
198}
199
200impl PartialOrd for EntityName {
201    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
202        Some(self.cmp(other))
203    }
204}
205
206impl Display for EntityName {
207    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
208        f.write_str(self.as_str())
209    }
210}
211
212impl fmt::Debug for EntityName {
213    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
214        write!(f, "EntityName({})", self.as_str())
215    }
216}
217
218///
219/// IndexName
220///
221
222#[derive(Clone, Copy, Eq, Hash, PartialEq)]
223pub struct IndexName {
224    len: u16,
225    bytes: [u8; MAX_INDEX_NAME_LEN],
226}
227
228impl IndexName {
229    pub const STORED_SIZE_BYTES: u64 = 2 + (MAX_INDEX_NAME_LEN as u64);
230    pub const STORED_SIZE_USIZE: usize = Self::STORED_SIZE_BYTES as usize;
231
232    pub fn try_from_parts(entity: &EntityName, fields: &[&str]) -> Result<Self, IndexNameError> {
233        if fields.len() > MAX_INDEX_FIELDS {
234            return Err(IndexNameError::TooManyFields {
235                len: fields.len(),
236                max: MAX_INDEX_FIELDS,
237            });
238        }
239
240        let mut total_len = entity.len();
241        for field in fields {
242            let field_len = field.len();
243            if field_len > MAX_INDEX_FIELD_NAME_LEN {
244                return Err(IndexNameError::FieldTooLong {
245                    field: (*field).to_string(),
246                    max: MAX_INDEX_FIELD_NAME_LEN,
247                });
248            }
249            if !field.is_ascii() {
250                return Err(IndexNameError::FieldNonAscii {
251                    field: (*field).to_string(),
252                });
253            }
254            total_len = total_len.saturating_add(1 + field_len);
255        }
256
257        if total_len > MAX_INDEX_NAME_LEN {
258            return Err(IndexNameError::TooLong {
259                len: total_len,
260                max: MAX_INDEX_NAME_LEN,
261            });
262        }
263
264        let mut out = [0u8; MAX_INDEX_NAME_LEN];
265        let mut len = 0usize;
266
267        Self::push_bytes(&mut out, &mut len, entity.as_bytes());
268        for field in fields {
269            Self::push_bytes(&mut out, &mut len, b"|");
270            Self::push_bytes(&mut out, &mut len, field.as_bytes());
271        }
272
273        Ok(Self {
274            len: len as u16,
275            bytes: out,
276        })
277    }
278
279    #[must_use]
280    pub fn as_bytes(&self) -> &[u8] {
281        &self.bytes[..self.len as usize]
282    }
283
284    #[must_use]
285    pub fn as_str(&self) -> &str {
286        // SAFETY:
287        // Preconditions:
288        // - `try_from_parts` validates all segments are ASCII.
289        // - `from_bytes` rejects non-ASCII payloads and malformed lengths.
290        // - `as_bytes` returns only initialized bytes within `len`.
291        //
292        // Aliasing:
293        // - We expose a shared `&str` over immutable storage; no mutable alias
294        //   is created while the reference is live.
295        //
296        // What would break this:
297        // - Accepting non-ASCII bytes in any construction/decoding path.
298        // - Mutating the underlying `bytes` without re-validating invariants.
299        unsafe { std::str::from_utf8_unchecked(self.as_bytes()) }
300    }
301
302    #[must_use]
303    pub fn to_bytes(self) -> [u8; Self::STORED_SIZE_USIZE] {
304        let mut out = [0u8; Self::STORED_SIZE_USIZE];
305        out[..2].copy_from_slice(&self.len.to_be_bytes());
306        out[2..].copy_from_slice(&self.bytes);
307        out
308    }
309
310    pub fn from_bytes(bytes: &[u8]) -> Result<Self, IdentityDecodeError> {
311        if bytes.len() != Self::STORED_SIZE_USIZE {
312            return Err(IdentityDecodeError::InvalidSize);
313        }
314
315        let len = u16::from_be_bytes([bytes[0], bytes[1]]) as usize;
316        if len == 0 || len > MAX_INDEX_NAME_LEN {
317            return Err(IdentityDecodeError::InvalidLength);
318        }
319        if !bytes[2..2 + len].is_ascii() {
320            return Err(IdentityDecodeError::NonAscii);
321        }
322        if bytes[2 + len..].iter().any(|&b| b != 0) {
323            return Err(IdentityDecodeError::NonZeroPadding);
324        }
325
326        let mut name = [0u8; MAX_INDEX_NAME_LEN];
327        name.copy_from_slice(&bytes[2..]);
328
329        Ok(Self {
330            len: len as u16,
331            bytes: name,
332        })
333    }
334
335    fn push_bytes(out: &mut [u8; MAX_INDEX_NAME_LEN], len: &mut usize, bytes: &[u8]) {
336        let end = *len + bytes.len();
337        out[*len..end].copy_from_slice(bytes);
338        *len = end;
339    }
340
341    #[must_use]
342    pub const fn max_storable() -> Self {
343        Self {
344            len: MAX_INDEX_NAME_LEN as u16,
345            bytes: [b'z'; MAX_INDEX_NAME_LEN],
346        }
347    }
348}
349
350impl Ord for IndexName {
351    fn cmp(&self, other: &Self) -> Ordering {
352        self.to_bytes().cmp(&other.to_bytes())
353    }
354}
355
356impl PartialOrd for IndexName {
357    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
358        Some(self.cmp(other))
359    }
360}
361
362impl fmt::Debug for IndexName {
363    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
364        write!(f, "IndexName({})", self.as_str())
365    }
366}
367
368impl Display for IndexName {
369    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
370        f.write_str(self.as_str())
371    }
372}
373
374///
375/// TESTS
376///
377
378#[cfg(test)]
379mod tests {
380    use super::*;
381
382    const ENTITY_64: &str = "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef";
383    const ENTITY_64_B: &str = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb";
384    const FIELD_64_A: &str = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
385    const FIELD_64_B: &str = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb";
386    const FIELD_64_C: &str = "cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc";
387    const FIELD_64_D: &str = "dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd";
388
389    #[test]
390    fn index_name_max_len_matches_limits() {
391        let entity = EntityName::try_from_str(ENTITY_64).unwrap();
392        let fields = [FIELD_64_A, FIELD_64_B, FIELD_64_C, FIELD_64_D];
393
394        assert_eq!(entity.as_str().len(), MAX_ENTITY_NAME_LEN);
395        for field in &fields {
396            assert_eq!(field.len(), MAX_INDEX_FIELD_NAME_LEN);
397        }
398        assert_eq!(fields.len(), MAX_INDEX_FIELDS);
399
400        let name = IndexName::try_from_parts(&entity, &fields).unwrap();
401        assert_eq!(name.as_bytes().len(), MAX_INDEX_NAME_LEN);
402    }
403
404    #[test]
405    fn index_name_max_size_roundtrip_and_ordering() {
406        let entity_a = EntityName::try_from_str(ENTITY_64).unwrap();
407        let entity_b = EntityName::try_from_str(ENTITY_64_B).unwrap();
408
409        let fields_a = [FIELD_64_A, FIELD_64_A, FIELD_64_A, FIELD_64_A];
410        let fields_b = [FIELD_64_B, FIELD_64_B, FIELD_64_B, FIELD_64_B];
411
412        let idx_a = IndexName::try_from_parts(&entity_a, &fields_a).unwrap();
413        let idx_b = IndexName::try_from_parts(&entity_b, &fields_b).unwrap();
414
415        let decoded = IndexName::from_bytes(&idx_a.to_bytes()).unwrap();
416        assert_eq!(idx_a, decoded);
417
418        assert_eq!(idx_a.cmp(&idx_b), idx_a.to_bytes().cmp(&idx_b.to_bytes()));
419    }
420
421    #[test]
422    fn rejects_too_many_index_fields() {
423        let entity = EntityName::try_from_str("entity").unwrap();
424        let fields = ["a", "b", "c", "d", "e"];
425
426        let err = IndexName::try_from_parts(&entity, &fields).unwrap_err();
427        assert!(matches!(err, IndexNameError::TooManyFields { .. }));
428    }
429
430    #[test]
431    fn rejects_index_field_over_len() {
432        let entity = EntityName::try_from_str("entity").unwrap();
433        let long_field = "a".repeat(MAX_INDEX_FIELD_NAME_LEN + 1);
434
435        let err = IndexName::try_from_parts(&entity, &[long_field.as_str()]).unwrap_err();
436        assert!(matches!(err, IndexNameError::FieldTooLong { .. }));
437    }
438
439    #[test]
440    fn entity_try_from_str_roundtrip() {
441        let e = EntityName::try_from_str("user").unwrap();
442        assert_eq!(e.len(), 4);
443        assert_eq!(e.as_str(), "user");
444    }
445
446    #[test]
447    fn entity_rejects_empty() {
448        let err = EntityName::try_from_str("").unwrap_err();
449        assert!(matches!(err, EntityNameError::Empty));
450    }
451
452    #[test]
453    fn entity_rejects_len_over_max() {
454        let s = "a".repeat(MAX_ENTITY_NAME_LEN + 1);
455        let err = EntityName::try_from_str(&s).unwrap_err();
456        assert!(matches!(err, EntityNameError::TooLong { .. }));
457    }
458
459    #[test]
460    fn entity_rejects_non_ascii() {
461        let err = EntityName::try_from_str("usér").unwrap_err();
462        assert!(matches!(err, EntityNameError::NonAscii));
463    }
464
465    #[test]
466    fn entity_storage_roundtrip() {
467        let e = EntityName::try_from_str("entity_name").unwrap();
468        let bytes = e.to_bytes();
469        let decoded = EntityName::from_bytes(&bytes).unwrap();
470        assert_eq!(e, decoded);
471    }
472
473    #[test]
474    fn entity_max_storable_is_ascii_utf8() {
475        let max = EntityName::max_storable();
476        assert_eq!(max.len(), MAX_ENTITY_NAME_LEN);
477        assert!(max.as_str().is_ascii());
478    }
479
480    #[test]
481    fn entity_rejects_invalid_size() {
482        let buf = vec![0u8; EntityName::STORED_SIZE_USIZE - 1];
483        assert!(matches!(
484            EntityName::from_bytes(&buf),
485            Err(IdentityDecodeError::InvalidSize)
486        ));
487    }
488
489    #[test]
490    fn entity_rejects_len_over_max_from_bytes() {
491        let mut buf = [0u8; EntityName::STORED_SIZE_USIZE];
492        buf[0] = (MAX_ENTITY_NAME_LEN as u8).saturating_add(1);
493        assert!(matches!(
494            EntityName::from_bytes(&buf),
495            Err(IdentityDecodeError::InvalidLength)
496        ));
497    }
498
499    #[test]
500    fn entity_rejects_non_ascii_from_bytes() {
501        let mut buf = [0u8; EntityName::STORED_SIZE_USIZE];
502        buf[0] = 1;
503        buf[1] = 0xFF;
504        assert!(matches!(
505            EntityName::from_bytes(&buf),
506            Err(IdentityDecodeError::NonAscii)
507        ));
508    }
509
510    #[test]
511    fn entity_rejects_non_zero_padding() {
512        let e = EntityName::try_from_str("user").unwrap();
513        let mut bytes = e.to_bytes();
514        bytes[1 + e.len()] = b'x';
515
516        assert!(matches!(
517            EntityName::from_bytes(&bytes),
518            Err(IdentityDecodeError::NonZeroPadding)
519        ));
520    }
521
522    #[test]
523    fn entity_ordering_matches_bytes() {
524        let a = EntityName::try_from_str("abc").unwrap();
525        let b = EntityName::try_from_str("abd").unwrap();
526        let c = EntityName::try_from_str("abcx").unwrap();
527
528        assert_eq!(a.cmp(&b), a.to_bytes().cmp(&b.to_bytes()));
529        assert_eq!(a.cmp(&c), a.to_bytes().cmp(&c.to_bytes()));
530    }
531
532    #[test]
533    fn index_single_field_format() {
534        let entity = EntityName::try_from_str("user").unwrap();
535        let idx = IndexName::try_from_parts(&entity, &["email"]).unwrap();
536
537        assert_eq!(idx.as_str(), "user|email");
538    }
539
540    #[test]
541    fn index_field_order_is_preserved() {
542        let entity = EntityName::try_from_str("user").unwrap();
543        let idx = IndexName::try_from_parts(&entity, &["a", "b", "c"]).unwrap();
544
545        assert_eq!(idx.as_str(), "user|a|b|c");
546    }
547
548    #[test]
549    fn index_storage_roundtrip() {
550        let entity = EntityName::try_from_str("user").unwrap();
551        let idx = IndexName::try_from_parts(&entity, &["a", "b"]).unwrap();
552
553        let bytes = idx.to_bytes();
554        let decoded = IndexName::from_bytes(&bytes).unwrap();
555        assert_eq!(idx, decoded);
556    }
557
558    #[test]
559    fn index_max_storable_is_ascii_utf8() {
560        let max = IndexName::max_storable();
561        assert_eq!(max.as_bytes().len(), MAX_INDEX_NAME_LEN);
562        assert!(max.as_str().is_ascii());
563    }
564
565    #[test]
566    fn index_rejects_non_ascii_from_bytes() {
567        let mut buf = [0u8; IndexName::STORED_SIZE_USIZE];
568        buf[..2].copy_from_slice(&1u16.to_be_bytes());
569        buf[2] = 0xFF;
570
571        assert!(matches!(
572            IndexName::from_bytes(&buf),
573            Err(IdentityDecodeError::NonAscii)
574        ));
575    }
576
577    // ------------------------------------------------------------------
578    // FUZZING (deterministic)
579    // ------------------------------------------------------------------
580
581    fn gen_ascii(seed: u64, max_len: usize) -> String {
582        let len = (seed as usize % max_len).max(1);
583        let mut out = String::with_capacity(len);
584
585        let mut x = seed;
586        for _ in 0..len {
587            x = x.wrapping_mul(6_364_136_223_846_793_005).wrapping_add(1);
588            let c = b'a' + (x % 26) as u8;
589            out.push(c as char);
590        }
591
592        out
593    }
594
595    #[test]
596    fn fuzz_entity_name_roundtrip_and_ordering() {
597        let mut prev: Option<EntityName> = None;
598
599        for i in 1..=1_000u64 {
600            let s = gen_ascii(i, MAX_ENTITY_NAME_LEN);
601            let e = EntityName::try_from_str(&s).unwrap();
602
603            let bytes = e.to_bytes();
604            let decoded = EntityName::from_bytes(&bytes).unwrap();
605            assert_eq!(e, decoded);
606
607            if let Some(p) = prev {
608                assert_eq!(p.cmp(&e), p.to_bytes().cmp(&e.to_bytes()));
609            }
610
611            prev = Some(e);
612        }
613    }
614
615    #[test]
616    fn fuzz_index_name_roundtrip_and_ordering() {
617        let entity = EntityName::try_from_str("entity").unwrap();
618        let mut prev: Option<IndexName> = None;
619
620        for i in 1..=1_000u64 {
621            let field_count = (i as usize % MAX_INDEX_FIELDS).max(1);
622
623            let mut fields = Vec::with_capacity(field_count);
624            for f in 0..field_count {
625                let s = gen_ascii(i * 31 + f as u64, MAX_INDEX_FIELD_NAME_LEN);
626                fields.push(s);
627            }
628
629            let field_refs: Vec<&str> = fields.iter().map(String::as_str).collect();
630            let idx = IndexName::try_from_parts(&entity, &field_refs).unwrap();
631
632            let bytes = idx.to_bytes();
633            let decoded = IndexName::from_bytes(&bytes).unwrap();
634            assert_eq!(idx, decoded);
635
636            if let Some(p) = prev {
637                assert_eq!(p.cmp(&idx), p.to_bytes().cmp(&idx.to_bytes()));
638            }
639
640            prev = Some(idx);
641        }
642    }
643}