Skip to main content

icydb_core/db/
identity.rs

1#![expect(clippy::cast_possible_truncation)]
2//! Identity invariants and construction.
3//!
4//! Invariants:
5//! - Identities are ASCII, non-empty, and bounded by MAX_* limits.
6//! - All construction paths validate invariants.
7//! - Stored byte representation is canonical and order-preserving.
8//! - Ordering semantics follow the length-prefixed stored-byte layout, not
9//!   lexicographic string ordering.
10
11use crate::MAX_INDEX_FIELDS;
12use std::{
13    cmp::Ordering,
14    fmt::{self, Display},
15};
16use thiserror::Error as ThisError;
17
18///
19/// Constants
20///
21
22pub(super) const MAX_ENTITY_NAME_LEN: usize = 64;
23pub(super) const MAX_INDEX_FIELD_NAME_LEN: usize = 64;
24pub(super) const MAX_INDEX_NAME_LEN: usize =
25    MAX_ENTITY_NAME_LEN + (MAX_INDEX_FIELDS * (MAX_INDEX_FIELD_NAME_LEN + 1));
26
27///
28/// IdentityDecodeError
29/// Decode errors (storage / corruption boundary)
30///
31
32#[derive(Debug, ThisError)]
33pub enum IdentityDecodeError {
34    #[error("invalid size")]
35    InvalidSize,
36
37    #[error("invalid length")]
38    InvalidLength,
39
40    #[error("non-ascii encoding")]
41    NonAscii,
42
43    #[error("non-zero padding")]
44    NonZeroPadding,
45}
46
47///
48/// EntityNameError
49///
50
51#[derive(Debug, ThisError)]
52pub enum EntityNameError {
53    #[error("entity name is empty")]
54    Empty,
55
56    #[error("entity name length {len} exceeds max {max}")]
57    TooLong { len: usize, max: usize },
58
59    #[error("entity name must be ASCII")]
60    NonAscii,
61}
62
63///
64/// IndexNameError
65///
66
67#[derive(Debug, ThisError)]
68pub enum IndexNameError {
69    #[error("index has {len} fields (max {max})")]
70    TooManyFields { len: usize, max: usize },
71
72    #[error("index field name '{field}' exceeds max length {max}")]
73    FieldTooLong { field: String, max: usize },
74
75    #[error("index field name '{field}' must be ASCII")]
76    FieldNonAscii { field: String },
77
78    #[error("index name length {len} exceeds max {max}")]
79    TooLong { len: usize, max: usize },
80}
81
82///
83/// EntityName
84///
85
86#[derive(Clone, Copy, Eq, Hash, PartialEq)]
87pub struct EntityName {
88    len: u8,
89    bytes: [u8; MAX_ENTITY_NAME_LEN],
90}
91
92impl EntityName {
93    /// Fixed on-disk size in bytes (stable, protocol-level)
94    pub const STORED_SIZE_BYTES: u64 = 1 + (MAX_ENTITY_NAME_LEN as u64);
95
96    /// Fixed in-memory size (for buffers and arrays)
97    pub const STORED_SIZE_USIZE: usize = Self::STORED_SIZE_BYTES as usize;
98
99    pub fn try_from_str(name: &str) -> Result<Self, EntityNameError> {
100        let bytes = name.as_bytes();
101        let len = bytes.len();
102
103        if len == 0 {
104            return Err(EntityNameError::Empty);
105        }
106        if len > MAX_ENTITY_NAME_LEN {
107            return Err(EntityNameError::TooLong {
108                len,
109                max: MAX_ENTITY_NAME_LEN,
110            });
111        }
112        if !bytes.is_ascii() {
113            return Err(EntityNameError::NonAscii);
114        }
115
116        let mut out = [0u8; MAX_ENTITY_NAME_LEN];
117        out[..len].copy_from_slice(bytes);
118
119        Ok(Self {
120            len: len as u8,
121            bytes: out,
122        })
123    }
124
125    #[must_use]
126    pub const fn len(&self) -> usize {
127        self.len as usize
128    }
129
130    #[must_use]
131    pub const fn is_empty(&self) -> bool {
132        self.len() == 0
133    }
134
135    #[must_use]
136    pub fn as_bytes(&self) -> &[u8] {
137        &self.bytes[..self.len()]
138    }
139
140    #[must_use]
141    pub fn as_str(&self) -> &str {
142        // Invariant: construction and decoding enforce ASCII-only storage,
143        // so UTF-8 decoding cannot fail.
144        std::str::from_utf8(self.as_bytes()).expect("EntityName invariant: ASCII-only storage")
145    }
146
147    #[must_use]
148    pub fn to_bytes(self) -> [u8; Self::STORED_SIZE_USIZE] {
149        let mut out = [0u8; Self::STORED_SIZE_USIZE];
150        out[0] = self.len;
151        out[1..].copy_from_slice(&self.bytes);
152        out
153    }
154
155    pub fn from_bytes(bytes: &[u8]) -> Result<Self, IdentityDecodeError> {
156        if bytes.len() != Self::STORED_SIZE_USIZE {
157            return Err(IdentityDecodeError::InvalidSize);
158        }
159
160        let len = bytes[0] as usize;
161        if len == 0 || len > MAX_ENTITY_NAME_LEN {
162            return Err(IdentityDecodeError::InvalidLength);
163        }
164        if !bytes[1..=len].is_ascii() {
165            return Err(IdentityDecodeError::NonAscii);
166        }
167        if bytes[1 + len..].iter().any(|&b| b != 0) {
168            return Err(IdentityDecodeError::NonZeroPadding);
169        }
170
171        let mut name = [0u8; MAX_ENTITY_NAME_LEN];
172        name.copy_from_slice(&bytes[1..]);
173
174        Ok(Self {
175            len: len as u8,
176            bytes: name,
177        })
178    }
179
180    #[must_use]
181    pub const fn max_storable() -> Self {
182        Self {
183            len: MAX_ENTITY_NAME_LEN as u8,
184            bytes: [b'z'; MAX_ENTITY_NAME_LEN],
185        }
186    }
187}
188
189impl Ord for EntityName {
190    fn cmp(&self, other: &Self) -> Ordering {
191        // Keep ordering consistent with `to_bytes()` (length prefix first).
192        // This is deterministic protocol/storage ordering, not lexical string order.
193        self.len.cmp(&other.len).then(self.bytes.cmp(&other.bytes))
194    }
195}
196
197impl PartialOrd for EntityName {
198    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
199        Some(self.cmp(other))
200    }
201}
202
203impl Display for EntityName {
204    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
205        f.write_str(self.as_str())
206    }
207}
208
209impl fmt::Debug for EntityName {
210    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
211        write!(f, "EntityName({})", self.as_str())
212    }
213}
214
215///
216/// IndexName
217///
218
219#[derive(Clone, Copy, Eq, Hash, PartialEq)]
220pub struct IndexName {
221    len: u16,
222    bytes: [u8; MAX_INDEX_NAME_LEN],
223}
224
225impl IndexName {
226    pub const STORED_SIZE_BYTES: u64 = 2 + (MAX_INDEX_NAME_LEN as u64);
227    pub const STORED_SIZE_USIZE: usize = Self::STORED_SIZE_BYTES as usize;
228
229    pub fn try_from_parts(entity: &EntityName, fields: &[&str]) -> Result<Self, IndexNameError> {
230        if fields.len() > MAX_INDEX_FIELDS {
231            return Err(IndexNameError::TooManyFields {
232                len: fields.len(),
233                max: MAX_INDEX_FIELDS,
234            });
235        }
236
237        let mut total_len = entity.len();
238        for field in fields {
239            let field_len = field.len();
240            if field_len > MAX_INDEX_FIELD_NAME_LEN {
241                return Err(IndexNameError::FieldTooLong {
242                    field: (*field).to_string(),
243                    max: MAX_INDEX_FIELD_NAME_LEN,
244                });
245            }
246            if !field.is_ascii() {
247                return Err(IndexNameError::FieldNonAscii {
248                    field: (*field).to_string(),
249                });
250            }
251            total_len = total_len.saturating_add(1 + field_len);
252        }
253
254        if total_len > MAX_INDEX_NAME_LEN {
255            return Err(IndexNameError::TooLong {
256                len: total_len,
257                max: MAX_INDEX_NAME_LEN,
258            });
259        }
260
261        let mut out = [0u8; MAX_INDEX_NAME_LEN];
262        let mut len = 0usize;
263
264        Self::push_bytes(&mut out, &mut len, entity.as_bytes());
265        for field in fields {
266            Self::push_bytes(&mut out, &mut len, b"|");
267            Self::push_bytes(&mut out, &mut len, field.as_bytes());
268        }
269
270        Ok(Self {
271            len: len as u16,
272            bytes: out,
273        })
274    }
275
276    #[must_use]
277    pub fn as_bytes(&self) -> &[u8] {
278        &self.bytes[..self.len as usize]
279    }
280
281    #[must_use]
282    pub fn as_str(&self) -> &str {
283        // Invariant: construction and decoding enforce ASCII-only storage,
284        // so UTF-8 decoding cannot fail.
285        std::str::from_utf8(self.as_bytes()).expect("IndexName invariant: ASCII-only storage")
286    }
287
288    #[must_use]
289    pub fn to_bytes(self) -> [u8; Self::STORED_SIZE_USIZE] {
290        let mut out = [0u8; Self::STORED_SIZE_USIZE];
291        out[..2].copy_from_slice(&self.len.to_be_bytes());
292        out[2..].copy_from_slice(&self.bytes);
293        out
294    }
295
296    pub fn from_bytes(bytes: &[u8]) -> Result<Self, IdentityDecodeError> {
297        if bytes.len() != Self::STORED_SIZE_USIZE {
298            return Err(IdentityDecodeError::InvalidSize);
299        }
300
301        let len = u16::from_be_bytes([bytes[0], bytes[1]]) as usize;
302        if len == 0 || len > MAX_INDEX_NAME_LEN {
303            return Err(IdentityDecodeError::InvalidLength);
304        }
305        if !bytes[2..2 + len].is_ascii() {
306            return Err(IdentityDecodeError::NonAscii);
307        }
308        if bytes[2 + len..].iter().any(|&b| b != 0) {
309            return Err(IdentityDecodeError::NonZeroPadding);
310        }
311
312        let mut name = [0u8; MAX_INDEX_NAME_LEN];
313        name.copy_from_slice(&bytes[2..]);
314
315        Ok(Self {
316            len: len as u16,
317            bytes: name,
318        })
319    }
320
321    fn push_bytes(out: &mut [u8; MAX_INDEX_NAME_LEN], len: &mut usize, bytes: &[u8]) {
322        let end = *len + bytes.len();
323        out[*len..end].copy_from_slice(bytes);
324        *len = end;
325    }
326
327    #[must_use]
328    pub const fn max_storable() -> Self {
329        Self {
330            len: MAX_INDEX_NAME_LEN as u16,
331            bytes: [b'z'; MAX_INDEX_NAME_LEN],
332        }
333    }
334}
335
336impl Ord for IndexName {
337    fn cmp(&self, other: &Self) -> Ordering {
338        self.to_bytes().cmp(&other.to_bytes())
339    }
340}
341
342impl PartialOrd for IndexName {
343    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
344        Some(self.cmp(other))
345    }
346}
347
348impl fmt::Debug for IndexName {
349    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
350        write!(f, "IndexName({})", self.as_str())
351    }
352}
353
354impl Display for IndexName {
355    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
356        f.write_str(self.as_str())
357    }
358}
359
360///
361/// TESTS
362///
363
364#[cfg(test)]
365mod tests {
366    use super::*;
367
368    const ENTITY_64: &str = "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef";
369    const ENTITY_64_B: &str = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb";
370    const FIELD_64_A: &str = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
371    const FIELD_64_B: &str = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb";
372    const FIELD_64_C: &str = "cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc";
373    const FIELD_64_D: &str = "dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd";
374
375    #[test]
376    fn index_name_max_len_matches_limits() {
377        let entity = EntityName::try_from_str(ENTITY_64).unwrap();
378        let fields = [FIELD_64_A, FIELD_64_B, FIELD_64_C, FIELD_64_D];
379
380        assert_eq!(entity.as_str().len(), MAX_ENTITY_NAME_LEN);
381        for field in &fields {
382            assert_eq!(field.len(), MAX_INDEX_FIELD_NAME_LEN);
383        }
384        assert_eq!(fields.len(), MAX_INDEX_FIELDS);
385
386        let name = IndexName::try_from_parts(&entity, &fields).unwrap();
387        assert_eq!(name.as_bytes().len(), MAX_INDEX_NAME_LEN);
388    }
389
390    #[test]
391    fn index_name_max_size_roundtrip_and_ordering() {
392        let entity_a = EntityName::try_from_str(ENTITY_64).unwrap();
393        let entity_b = EntityName::try_from_str(ENTITY_64_B).unwrap();
394
395        let fields_a = [FIELD_64_A, FIELD_64_A, FIELD_64_A, FIELD_64_A];
396        let fields_b = [FIELD_64_B, FIELD_64_B, FIELD_64_B, FIELD_64_B];
397
398        let idx_a = IndexName::try_from_parts(&entity_a, &fields_a).unwrap();
399        let idx_b = IndexName::try_from_parts(&entity_b, &fields_b).unwrap();
400
401        let decoded = IndexName::from_bytes(&idx_a.to_bytes()).unwrap();
402        assert_eq!(idx_a, decoded);
403
404        assert_eq!(idx_a.cmp(&idx_b), idx_a.to_bytes().cmp(&idx_b.to_bytes()));
405    }
406
407    #[test]
408    fn rejects_too_many_index_fields() {
409        let entity = EntityName::try_from_str("entity").unwrap();
410        let fields = ["a", "b", "c", "d", "e"];
411
412        let err = IndexName::try_from_parts(&entity, &fields).unwrap_err();
413        assert!(matches!(err, IndexNameError::TooManyFields { .. }));
414    }
415
416    #[test]
417    fn rejects_index_field_over_len() {
418        let entity = EntityName::try_from_str("entity").unwrap();
419        let long_field = "a".repeat(MAX_INDEX_FIELD_NAME_LEN + 1);
420
421        let err = IndexName::try_from_parts(&entity, &[long_field.as_str()]).unwrap_err();
422        assert!(matches!(err, IndexNameError::FieldTooLong { .. }));
423    }
424
425    #[test]
426    fn entity_try_from_str_roundtrip() {
427        let e = EntityName::try_from_str("user").unwrap();
428        assert_eq!(e.len(), 4);
429        assert_eq!(e.as_str(), "user");
430    }
431
432    #[test]
433    fn entity_rejects_empty() {
434        let err = EntityName::try_from_str("").unwrap_err();
435        assert!(matches!(err, EntityNameError::Empty));
436    }
437
438    #[test]
439    fn entity_rejects_len_over_max() {
440        let s = "a".repeat(MAX_ENTITY_NAME_LEN + 1);
441        let err = EntityName::try_from_str(&s).unwrap_err();
442        assert!(matches!(err, EntityNameError::TooLong { .. }));
443    }
444
445    #[test]
446    fn entity_rejects_non_ascii() {
447        let err = EntityName::try_from_str("usér").unwrap_err();
448        assert!(matches!(err, EntityNameError::NonAscii));
449    }
450
451    #[test]
452    fn entity_storage_roundtrip() {
453        let e = EntityName::try_from_str("entity_name").unwrap();
454        let bytes = e.to_bytes();
455        let decoded = EntityName::from_bytes(&bytes).unwrap();
456        assert_eq!(e, decoded);
457    }
458
459    #[test]
460    fn entity_max_storable_is_ascii_utf8() {
461        let max = EntityName::max_storable();
462        assert_eq!(max.len(), MAX_ENTITY_NAME_LEN);
463        assert!(max.as_str().is_ascii());
464    }
465
466    #[test]
467    fn entity_rejects_invalid_size() {
468        let buf = vec![0u8; EntityName::STORED_SIZE_USIZE - 1];
469        assert!(matches!(
470            EntityName::from_bytes(&buf),
471            Err(IdentityDecodeError::InvalidSize)
472        ));
473    }
474
475    #[test]
476    fn entity_rejects_len_over_max_from_bytes() {
477        let mut buf = [0u8; EntityName::STORED_SIZE_USIZE];
478        buf[0] = (MAX_ENTITY_NAME_LEN as u8).saturating_add(1);
479        assert!(matches!(
480            EntityName::from_bytes(&buf),
481            Err(IdentityDecodeError::InvalidLength)
482        ));
483    }
484
485    #[test]
486    fn entity_rejects_non_ascii_from_bytes() {
487        let mut buf = [0u8; EntityName::STORED_SIZE_USIZE];
488        buf[0] = 1;
489        buf[1] = 0xFF;
490        assert!(matches!(
491            EntityName::from_bytes(&buf),
492            Err(IdentityDecodeError::NonAscii)
493        ));
494    }
495
496    #[test]
497    fn entity_rejects_non_zero_padding() {
498        let e = EntityName::try_from_str("user").unwrap();
499        let mut bytes = e.to_bytes();
500        bytes[1 + e.len()] = b'x';
501
502        assert!(matches!(
503            EntityName::from_bytes(&bytes),
504            Err(IdentityDecodeError::NonZeroPadding)
505        ));
506    }
507
508    #[test]
509    fn entity_ordering_matches_bytes() {
510        let a = EntityName::try_from_str("abc").unwrap();
511        let b = EntityName::try_from_str("abd").unwrap();
512        let c = EntityName::try_from_str("abcx").unwrap();
513
514        assert_eq!(a.cmp(&b), a.to_bytes().cmp(&b.to_bytes()));
515        assert_eq!(a.cmp(&c), a.to_bytes().cmp(&c.to_bytes()));
516    }
517
518    #[test]
519    fn entity_ordering_is_not_lexicographic() {
520        let z = EntityName::try_from_str("z").unwrap();
521        let aa = EntityName::try_from_str("aa").unwrap();
522
523        assert_eq!(z.cmp(&aa), Ordering::Less);
524        assert_eq!(z.to_bytes().cmp(&aa.to_bytes()), Ordering::Less);
525        assert_eq!(z.as_str().cmp(aa.as_str()), Ordering::Greater);
526    }
527
528    #[test]
529    fn index_single_field_format() {
530        let entity = EntityName::try_from_str("user").unwrap();
531        let idx = IndexName::try_from_parts(&entity, &["email"]).unwrap();
532
533        assert_eq!(idx.as_str(), "user|email");
534    }
535
536    #[test]
537    fn index_field_order_is_preserved() {
538        let entity = EntityName::try_from_str("user").unwrap();
539        let idx = IndexName::try_from_parts(&entity, &["a", "b", "c"]).unwrap();
540
541        assert_eq!(idx.as_str(), "user|a|b|c");
542    }
543
544    #[test]
545    fn index_storage_roundtrip() {
546        let entity = EntityName::try_from_str("user").unwrap();
547        let idx = IndexName::try_from_parts(&entity, &["a", "b"]).unwrap();
548
549        let bytes = idx.to_bytes();
550        let decoded = IndexName::from_bytes(&bytes).unwrap();
551        assert_eq!(idx, decoded);
552    }
553
554    #[test]
555    fn index_max_storable_is_ascii_utf8() {
556        let max = IndexName::max_storable();
557        assert_eq!(max.as_bytes().len(), MAX_INDEX_NAME_LEN);
558        assert!(max.as_str().is_ascii());
559    }
560
561    #[test]
562    fn index_rejects_non_ascii_from_bytes() {
563        let mut buf = [0u8; IndexName::STORED_SIZE_USIZE];
564        buf[..2].copy_from_slice(&1u16.to_be_bytes());
565        buf[2] = 0xFF;
566
567        assert!(matches!(
568            IndexName::from_bytes(&buf),
569            Err(IdentityDecodeError::NonAscii)
570        ));
571    }
572
573    // ------------------------------------------------------------------
574    // FUZZING (deterministic)
575    // ------------------------------------------------------------------
576
577    fn gen_ascii(seed: u64, max_len: usize) -> String {
578        let len = (seed as usize % max_len).max(1);
579        let mut out = String::with_capacity(len);
580
581        let mut x = seed;
582        for _ in 0..len {
583            x = x.wrapping_mul(6_364_136_223_846_793_005).wrapping_add(1);
584            let c = b'a' + (x % 26) as u8;
585            out.push(c as char);
586        }
587
588        out
589    }
590
591    #[test]
592    fn fuzz_entity_name_roundtrip_and_ordering() {
593        let mut prev: Option<EntityName> = None;
594
595        for i in 1..=1_000u64 {
596            let s = gen_ascii(i, MAX_ENTITY_NAME_LEN);
597            let e = EntityName::try_from_str(&s).unwrap();
598
599            let bytes = e.to_bytes();
600            let decoded = EntityName::from_bytes(&bytes).unwrap();
601            assert_eq!(e, decoded);
602
603            if let Some(p) = prev {
604                assert_eq!(p.cmp(&e), p.to_bytes().cmp(&e.to_bytes()));
605            }
606
607            prev = Some(e);
608        }
609    }
610
611    #[test]
612    fn fuzz_index_name_roundtrip_and_ordering() {
613        let entity = EntityName::try_from_str("entity").unwrap();
614        let mut prev: Option<IndexName> = None;
615
616        for i in 1..=1_000u64 {
617            let field_count = (i as usize % MAX_INDEX_FIELDS).max(1);
618
619            let mut fields = Vec::with_capacity(field_count);
620            for f in 0..field_count {
621                let s = gen_ascii(i * 31 + f as u64, MAX_INDEX_FIELD_NAME_LEN);
622                fields.push(s);
623            }
624
625            let field_refs: Vec<&str> = fields.iter().map(String::as_str).collect();
626            let idx = IndexName::try_from_parts(&entity, &field_refs).unwrap();
627
628            let bytes = idx.to_bytes();
629            let decoded = IndexName::from_bytes(&bytes).unwrap();
630            assert_eq!(idx, decoded);
631
632            if let Some(p) = prev {
633                assert_eq!(p.cmp(&idx), p.to_bytes().cmp(&idx.to_bytes()));
634            }
635
636            prev = Some(idx);
637        }
638    }
639}