Skip to main content

icydb_core/db/
identity.rs

1#![expect(clippy::cast_possible_truncation)]
2//! Identity invariants and construction.
3//!
4//! Invariants:
5//! - Identities are ASCII, non-empty, and bounded by MAX_* limits.
6//! - All construction paths validate invariants.
7//! - Stored byte representation is canonical and order-preserving.
8
9use crate::MAX_INDEX_FIELDS;
10use std::{
11    cmp::Ordering,
12    fmt::{self, Display},
13};
14use thiserror::Error as ThisError;
15
16///
17/// Constants
18///
19
20pub const MAX_ENTITY_NAME_LEN: usize = 64;
21pub const MAX_INDEX_FIELD_NAME_LEN: usize = 64;
22pub const MAX_INDEX_NAME_LEN: usize =
23    MAX_ENTITY_NAME_LEN + (MAX_INDEX_FIELDS * (MAX_INDEX_FIELD_NAME_LEN + 1));
24
25///
26/// Decode errors (storage / corruption boundary)
27///
28
29#[derive(Debug, ThisError)]
30pub enum IdentityDecodeError {
31    #[error("invalid size")]
32    InvalidSize,
33    #[error("invalid length")]
34    InvalidLength,
35    #[error("non-ascii encoding")]
36    NonAscii,
37    #[error("non-zero padding")]
38    NonZeroPadding,
39}
40
41///
42/// EntityNameError
43///
44
45#[derive(Debug, ThisError)]
46pub enum EntityNameError {
47    #[error("entity name is empty")]
48    Empty,
49
50    #[error("entity name length {len} exceeds max {max}")]
51    TooLong { len: usize, max: usize },
52
53    #[error("entity name must be ASCII")]
54    NonAscii,
55}
56
57///
58/// IndexNameError
59///
60
61#[derive(Debug, ThisError)]
62pub enum IndexNameError {
63    #[error("index has {len} fields (max {max})")]
64    TooManyFields { len: usize, max: usize },
65
66    #[error("index field name '{field}' exceeds max length {max}")]
67    FieldTooLong { field: String, max: usize },
68
69    #[error("index field name '{field}' must be ASCII")]
70    FieldNonAscii { field: String },
71
72    #[error("index name length {len} exceeds max {max}")]
73    TooLong { len: usize, max: usize },
74}
75
76///
77/// EntityName
78///
79
80#[derive(Clone, Copy, Eq, Hash, PartialEq)]
81pub struct EntityName {
82    len: u8,
83    bytes: [u8; MAX_ENTITY_NAME_LEN],
84}
85
86impl EntityName {
87    /// Fixed on-disk size in bytes (stable, protocol-level)
88    pub const STORED_SIZE_BYTES: u64 = 1 + (MAX_ENTITY_NAME_LEN as u64);
89
90    /// Fixed in-memory size (for buffers and arrays)
91    pub const STORED_SIZE_USIZE: usize = Self::STORED_SIZE_BYTES as usize;
92
93    pub fn try_from_str(name: &str) -> Result<Self, EntityNameError> {
94        let bytes = name.as_bytes();
95        let len = bytes.len();
96
97        if len == 0 {
98            return Err(EntityNameError::Empty);
99        }
100        if len > MAX_ENTITY_NAME_LEN {
101            return Err(EntityNameError::TooLong {
102                len,
103                max: MAX_ENTITY_NAME_LEN,
104            });
105        }
106        if !bytes.is_ascii() {
107            return Err(EntityNameError::NonAscii);
108        }
109
110        let mut out = [0u8; MAX_ENTITY_NAME_LEN];
111        out[..len].copy_from_slice(bytes);
112
113        Ok(Self {
114            len: len as u8,
115            bytes: out,
116        })
117    }
118
119    #[must_use]
120    #[expect(clippy::len_without_is_empty)]
121    pub const fn len(&self) -> usize {
122        self.len as usize
123    }
124
125    #[must_use]
126    pub fn as_bytes(&self) -> &[u8] {
127        &self.bytes[..self.len()]
128    }
129
130    #[must_use]
131    pub fn as_str(&self) -> &str {
132        // SAFETY:
133        // Preconditions:
134        // - Constructors (`try_from_str`) and decoders (`from_bytes`) reject
135        //   non-ASCII inputs.
136        // - Stored slices returned by `as_bytes` are within initialized bounds.
137        //
138        // Aliasing:
139        // - This creates an immutable `&str` view over immutable bytes already
140        //   owned by `self`; no mutable aliasing is introduced.
141        //
142        // What would break this:
143        // - Any future constructor/decoder path that permits non-ASCII bytes.
144        // - Any mutation of `bytes[..len]` bypassing validation guarantees.
145        unsafe { std::str::from_utf8_unchecked(self.as_bytes()) }
146    }
147
148    #[must_use]
149    pub fn to_bytes(self) -> [u8; Self::STORED_SIZE_USIZE] {
150        let mut out = [0u8; Self::STORED_SIZE_USIZE];
151        out[0] = self.len;
152        out[1..].copy_from_slice(&self.bytes);
153        out
154    }
155
156    pub fn from_bytes(bytes: &[u8]) -> Result<Self, IdentityDecodeError> {
157        if bytes.len() != Self::STORED_SIZE_USIZE {
158            return Err(IdentityDecodeError::InvalidSize);
159        }
160
161        let len = bytes[0] as usize;
162        if len == 0 || len > MAX_ENTITY_NAME_LEN {
163            return Err(IdentityDecodeError::InvalidLength);
164        }
165        if !bytes[1..=len].is_ascii() {
166            return Err(IdentityDecodeError::NonAscii);
167        }
168        if bytes[1 + len..].iter().any(|&b| b != 0) {
169            return Err(IdentityDecodeError::NonZeroPadding);
170        }
171
172        let mut name = [0u8; MAX_ENTITY_NAME_LEN];
173        name.copy_from_slice(&bytes[1..]);
174
175        Ok(Self {
176            len: len as u8,
177            bytes: name,
178        })
179    }
180
181    #[must_use]
182    pub const fn max_storable() -> Self {
183        Self {
184            len: MAX_ENTITY_NAME_LEN as u8,
185            bytes: [b'z'; MAX_ENTITY_NAME_LEN],
186        }
187    }
188}
189
190impl Ord for EntityName {
191    fn cmp(&self, other: &Self) -> Ordering {
192        self.to_bytes().cmp(&other.to_bytes())
193    }
194}
195
196impl PartialOrd for EntityName {
197    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
198        Some(self.cmp(other))
199    }
200}
201
202impl Display for EntityName {
203    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
204        f.write_str(self.as_str())
205    }
206}
207
208impl fmt::Debug for EntityName {
209    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
210        write!(f, "EntityName({})", self.as_str())
211    }
212}
213
214///
215/// IndexName
216///
217
218#[derive(Clone, Copy, Eq, Hash, PartialEq)]
219pub struct IndexName {
220    len: u16,
221    bytes: [u8; MAX_INDEX_NAME_LEN],
222}
223
224impl IndexName {
225    pub const STORED_SIZE_BYTES: u64 = 2 + (MAX_INDEX_NAME_LEN as u64);
226    pub const STORED_SIZE_USIZE: usize = Self::STORED_SIZE_BYTES as usize;
227
228    pub fn try_from_parts(entity: &EntityName, fields: &[&str]) -> Result<Self, IndexNameError> {
229        if fields.len() > MAX_INDEX_FIELDS {
230            return Err(IndexNameError::TooManyFields {
231                len: fields.len(),
232                max: MAX_INDEX_FIELDS,
233            });
234        }
235
236        let mut total_len = entity.len();
237        for field in fields {
238            let field_len = field.len();
239            if field_len > MAX_INDEX_FIELD_NAME_LEN {
240                return Err(IndexNameError::FieldTooLong {
241                    field: (*field).to_string(),
242                    max: MAX_INDEX_FIELD_NAME_LEN,
243                });
244            }
245            if !field.is_ascii() {
246                return Err(IndexNameError::FieldNonAscii {
247                    field: (*field).to_string(),
248                });
249            }
250            total_len = total_len.saturating_add(1 + field_len);
251        }
252
253        if total_len > MAX_INDEX_NAME_LEN {
254            return Err(IndexNameError::TooLong {
255                len: total_len,
256                max: MAX_INDEX_NAME_LEN,
257            });
258        }
259
260        let mut out = [0u8; MAX_INDEX_NAME_LEN];
261        let mut len = 0usize;
262
263        Self::push_bytes(&mut out, &mut len, entity.as_bytes());
264        for field in fields {
265            Self::push_bytes(&mut out, &mut len, b"|");
266            Self::push_bytes(&mut out, &mut len, field.as_bytes());
267        }
268
269        Ok(Self {
270            len: len as u16,
271            bytes: out,
272        })
273    }
274
275    #[must_use]
276    pub fn as_bytes(&self) -> &[u8] {
277        &self.bytes[..self.len as usize]
278    }
279
280    #[must_use]
281    pub fn as_str(&self) -> &str {
282        // SAFETY:
283        // Preconditions:
284        // - `try_from_parts` validates all segments are ASCII.
285        // - `from_bytes` rejects non-ASCII payloads and malformed lengths.
286        // - `as_bytes` returns only initialized bytes within `len`.
287        //
288        // Aliasing:
289        // - We expose a shared `&str` over immutable storage; no mutable alias
290        //   is created while the reference is live.
291        //
292        // What would break this:
293        // - Accepting non-ASCII bytes in any construction/decoding path.
294        // - Mutating the underlying `bytes` without re-validating invariants.
295        unsafe { std::str::from_utf8_unchecked(self.as_bytes()) }
296    }
297
298    #[must_use]
299    pub fn to_bytes(self) -> [u8; Self::STORED_SIZE_USIZE] {
300        let mut out = [0u8; Self::STORED_SIZE_USIZE];
301        out[..2].copy_from_slice(&self.len.to_be_bytes());
302        out[2..].copy_from_slice(&self.bytes);
303        out
304    }
305
306    pub fn from_bytes(bytes: &[u8]) -> Result<Self, IdentityDecodeError> {
307        if bytes.len() != Self::STORED_SIZE_USIZE {
308            return Err(IdentityDecodeError::InvalidSize);
309        }
310
311        let len = u16::from_be_bytes([bytes[0], bytes[1]]) as usize;
312        if len == 0 || len > MAX_INDEX_NAME_LEN {
313            return Err(IdentityDecodeError::InvalidLength);
314        }
315        if !bytes[2..2 + len].is_ascii() {
316            return Err(IdentityDecodeError::NonAscii);
317        }
318        if bytes[2 + len..].iter().any(|&b| b != 0) {
319            return Err(IdentityDecodeError::NonZeroPadding);
320        }
321
322        let mut name = [0u8; MAX_INDEX_NAME_LEN];
323        name.copy_from_slice(&bytes[2..]);
324
325        Ok(Self {
326            len: len as u16,
327            bytes: name,
328        })
329    }
330
331    fn push_bytes(out: &mut [u8; MAX_INDEX_NAME_LEN], len: &mut usize, bytes: &[u8]) {
332        let end = *len + bytes.len();
333        out[*len..end].copy_from_slice(bytes);
334        *len = end;
335    }
336
337    #[must_use]
338    pub const fn max_storable() -> Self {
339        Self {
340            len: MAX_INDEX_NAME_LEN as u16,
341            bytes: [b'z'; MAX_INDEX_NAME_LEN],
342        }
343    }
344}
345
346impl Ord for IndexName {
347    fn cmp(&self, other: &Self) -> Ordering {
348        self.to_bytes().cmp(&other.to_bytes())
349    }
350}
351
352impl PartialOrd for IndexName {
353    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
354        Some(self.cmp(other))
355    }
356}
357
358impl fmt::Debug for IndexName {
359    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
360        write!(f, "IndexName({})", self.as_str())
361    }
362}
363
364impl Display for IndexName {
365    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
366        f.write_str(self.as_str())
367    }
368}
369
370///
371/// TESTS
372///
373
374#[cfg(test)]
375mod tests {
376    use super::*;
377
378    const ENTITY_64: &str = "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef";
379    const ENTITY_64_B: &str = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb";
380    const FIELD_64_A: &str = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
381    const FIELD_64_B: &str = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb";
382    const FIELD_64_C: &str = "cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc";
383    const FIELD_64_D: &str = "dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd";
384
385    #[test]
386    fn index_name_max_len_matches_limits() {
387        let entity = EntityName::try_from_str(ENTITY_64).unwrap();
388        let fields = [FIELD_64_A, FIELD_64_B, FIELD_64_C, FIELD_64_D];
389
390        assert_eq!(entity.as_str().len(), MAX_ENTITY_NAME_LEN);
391        for field in &fields {
392            assert_eq!(field.len(), MAX_INDEX_FIELD_NAME_LEN);
393        }
394        assert_eq!(fields.len(), MAX_INDEX_FIELDS);
395
396        let name = IndexName::try_from_parts(&entity, &fields).unwrap();
397        assert_eq!(name.as_bytes().len(), MAX_INDEX_NAME_LEN);
398    }
399
400    #[test]
401    fn index_name_max_size_roundtrip_and_ordering() {
402        let entity_a = EntityName::try_from_str(ENTITY_64).unwrap();
403        let entity_b = EntityName::try_from_str(ENTITY_64_B).unwrap();
404
405        let fields_a = [FIELD_64_A, FIELD_64_A, FIELD_64_A, FIELD_64_A];
406        let fields_b = [FIELD_64_B, FIELD_64_B, FIELD_64_B, FIELD_64_B];
407
408        let idx_a = IndexName::try_from_parts(&entity_a, &fields_a).unwrap();
409        let idx_b = IndexName::try_from_parts(&entity_b, &fields_b).unwrap();
410
411        let decoded = IndexName::from_bytes(&idx_a.to_bytes()).unwrap();
412        assert_eq!(idx_a, decoded);
413
414        assert_eq!(idx_a.cmp(&idx_b), idx_a.to_bytes().cmp(&idx_b.to_bytes()));
415    }
416
417    #[test]
418    fn rejects_too_many_index_fields() {
419        let entity = EntityName::try_from_str("entity").unwrap();
420        let fields = ["a", "b", "c", "d", "e"];
421
422        let err = IndexName::try_from_parts(&entity, &fields).unwrap_err();
423        assert!(matches!(err, IndexNameError::TooManyFields { .. }));
424    }
425
426    #[test]
427    fn rejects_index_field_over_len() {
428        let entity = EntityName::try_from_str("entity").unwrap();
429        let long_field = "a".repeat(MAX_INDEX_FIELD_NAME_LEN + 1);
430
431        let err = IndexName::try_from_parts(&entity, &[long_field.as_str()]).unwrap_err();
432        assert!(matches!(err, IndexNameError::FieldTooLong { .. }));
433    }
434
435    #[test]
436    fn entity_try_from_str_roundtrip() {
437        let e = EntityName::try_from_str("user").unwrap();
438        assert_eq!(e.len(), 4);
439        assert_eq!(e.as_str(), "user");
440    }
441
442    #[test]
443    fn entity_rejects_empty() {
444        let err = EntityName::try_from_str("").unwrap_err();
445        assert!(matches!(err, EntityNameError::Empty));
446    }
447
448    #[test]
449    fn entity_rejects_len_over_max() {
450        let s = "a".repeat(MAX_ENTITY_NAME_LEN + 1);
451        let err = EntityName::try_from_str(&s).unwrap_err();
452        assert!(matches!(err, EntityNameError::TooLong { .. }));
453    }
454
455    #[test]
456    fn entity_rejects_non_ascii() {
457        let err = EntityName::try_from_str("usér").unwrap_err();
458        assert!(matches!(err, EntityNameError::NonAscii));
459    }
460
461    #[test]
462    fn entity_storage_roundtrip() {
463        let e = EntityName::try_from_str("entity_name").unwrap();
464        let bytes = e.to_bytes();
465        let decoded = EntityName::from_bytes(&bytes).unwrap();
466        assert_eq!(e, decoded);
467    }
468
469    #[test]
470    fn entity_max_storable_is_ascii_utf8() {
471        let max = EntityName::max_storable();
472        assert_eq!(max.len(), MAX_ENTITY_NAME_LEN);
473        assert!(max.as_str().is_ascii());
474    }
475
476    #[test]
477    fn entity_rejects_invalid_size() {
478        let buf = vec![0u8; EntityName::STORED_SIZE_USIZE - 1];
479        assert!(matches!(
480            EntityName::from_bytes(&buf),
481            Err(IdentityDecodeError::InvalidSize)
482        ));
483    }
484
485    #[test]
486    fn entity_rejects_len_over_max_from_bytes() {
487        let mut buf = [0u8; EntityName::STORED_SIZE_USIZE];
488        buf[0] = (MAX_ENTITY_NAME_LEN as u8).saturating_add(1);
489        assert!(matches!(
490            EntityName::from_bytes(&buf),
491            Err(IdentityDecodeError::InvalidLength)
492        ));
493    }
494
495    #[test]
496    fn entity_rejects_non_ascii_from_bytes() {
497        let mut buf = [0u8; EntityName::STORED_SIZE_USIZE];
498        buf[0] = 1;
499        buf[1] = 0xFF;
500        assert!(matches!(
501            EntityName::from_bytes(&buf),
502            Err(IdentityDecodeError::NonAscii)
503        ));
504    }
505
506    #[test]
507    fn entity_rejects_non_zero_padding() {
508        let e = EntityName::try_from_str("user").unwrap();
509        let mut bytes = e.to_bytes();
510        bytes[1 + e.len()] = b'x';
511
512        assert!(matches!(
513            EntityName::from_bytes(&bytes),
514            Err(IdentityDecodeError::NonZeroPadding)
515        ));
516    }
517
518    #[test]
519    fn entity_ordering_matches_bytes() {
520        let a = EntityName::try_from_str("abc").unwrap();
521        let b = EntityName::try_from_str("abd").unwrap();
522        let c = EntityName::try_from_str("abcx").unwrap();
523
524        assert_eq!(a.cmp(&b), a.to_bytes().cmp(&b.to_bytes()));
525        assert_eq!(a.cmp(&c), a.to_bytes().cmp(&c.to_bytes()));
526    }
527
528    #[test]
529    fn index_single_field_format() {
530        let entity = EntityName::try_from_str("user").unwrap();
531        let idx = IndexName::try_from_parts(&entity, &["email"]).unwrap();
532
533        assert_eq!(idx.as_str(), "user|email");
534    }
535
536    #[test]
537    fn index_field_order_is_preserved() {
538        let entity = EntityName::try_from_str("user").unwrap();
539        let idx = IndexName::try_from_parts(&entity, &["a", "b", "c"]).unwrap();
540
541        assert_eq!(idx.as_str(), "user|a|b|c");
542    }
543
544    #[test]
545    fn index_storage_roundtrip() {
546        let entity = EntityName::try_from_str("user").unwrap();
547        let idx = IndexName::try_from_parts(&entity, &["a", "b"]).unwrap();
548
549        let bytes = idx.to_bytes();
550        let decoded = IndexName::from_bytes(&bytes).unwrap();
551        assert_eq!(idx, decoded);
552    }
553
554    #[test]
555    fn index_max_storable_is_ascii_utf8() {
556        let max = IndexName::max_storable();
557        assert_eq!(max.as_bytes().len(), MAX_INDEX_NAME_LEN);
558        assert!(max.as_str().is_ascii());
559    }
560
561    #[test]
562    fn index_rejects_non_ascii_from_bytes() {
563        let mut buf = [0u8; IndexName::STORED_SIZE_USIZE];
564        buf[..2].copy_from_slice(&1u16.to_be_bytes());
565        buf[2] = 0xFF;
566
567        assert!(matches!(
568            IndexName::from_bytes(&buf),
569            Err(IdentityDecodeError::NonAscii)
570        ));
571    }
572
573    // ------------------------------------------------------------------
574    // FUZZING (deterministic)
575    // ------------------------------------------------------------------
576
577    fn gen_ascii(seed: u64, max_len: usize) -> String {
578        let len = (seed as usize % max_len).max(1);
579        let mut out = String::with_capacity(len);
580
581        let mut x = seed;
582        for _ in 0..len {
583            x = x.wrapping_mul(6_364_136_223_846_793_005).wrapping_add(1);
584            let c = b'a' + (x % 26) as u8;
585            out.push(c as char);
586        }
587
588        out
589    }
590
591    #[test]
592    fn fuzz_entity_name_roundtrip_and_ordering() {
593        let mut prev: Option<EntityName> = None;
594
595        for i in 1..=1_000u64 {
596            let s = gen_ascii(i, MAX_ENTITY_NAME_LEN);
597            let e = EntityName::try_from_str(&s).unwrap();
598
599            let bytes = e.to_bytes();
600            let decoded = EntityName::from_bytes(&bytes).unwrap();
601            assert_eq!(e, decoded);
602
603            if let Some(p) = prev {
604                assert_eq!(p.cmp(&e), p.to_bytes().cmp(&e.to_bytes()));
605            }
606
607            prev = Some(e);
608        }
609    }
610
611    #[test]
612    fn fuzz_index_name_roundtrip_and_ordering() {
613        let entity = EntityName::try_from_str("entity").unwrap();
614        let mut prev: Option<IndexName> = None;
615
616        for i in 1..=1_000u64 {
617            let field_count = (i as usize % MAX_INDEX_FIELDS).max(1);
618
619            let mut fields = Vec::with_capacity(field_count);
620            for f in 0..field_count {
621                let s = gen_ascii(i * 31 + f as u64, MAX_INDEX_FIELD_NAME_LEN);
622                fields.push(s);
623            }
624
625            let field_refs: Vec<&str> = fields.iter().map(String::as_str).collect();
626            let idx = IndexName::try_from_parts(&entity, &field_refs).unwrap();
627
628            let bytes = idx.to_bytes();
629            let decoded = IndexName::from_bytes(&bytes).unwrap();
630            assert_eq!(idx, decoded);
631
632            if let Some(p) = prev {
633                assert_eq!(p.cmp(&idx), p.to_bytes().cmp(&idx.to_bytes()));
634            }
635
636            prev = Some(idx);
637        }
638    }
639}