Skip to main content

icydb_core/db/
identity.rs

1#![expect(clippy::cast_possible_truncation)]
2//! Identity invariants and construction.
3//!
4//! Invariants:
5//! - Identities are ASCII, non-empty, and bounded by MAX_* limits.
6//! - All construction paths validate invariants.
7//! - Stored byte representation is canonical and order-preserving.
8
9use crate::MAX_INDEX_FIELDS;
10use std::{
11    cmp::Ordering,
12    fmt::{self, Display},
13};
14use thiserror::Error as ThisError;
15
16///
17/// Constants
18///
19
20pub const MAX_ENTITY_NAME_LEN: usize = 64;
21pub const MAX_INDEX_FIELD_NAME_LEN: usize = 64;
22pub const MAX_INDEX_NAME_LEN: usize =
23    MAX_ENTITY_NAME_LEN + (MAX_INDEX_FIELDS * (MAX_INDEX_FIELD_NAME_LEN + 1));
24
25///
26/// Decode errors (storage / corruption boundary)
27///
28
29#[derive(Debug, ThisError)]
30pub enum IdentityDecodeError {
31    #[error("invalid size")]
32    InvalidSize,
33    #[error("invalid length")]
34    InvalidLength,
35    #[error("non-ascii encoding")]
36    NonAscii,
37    #[error("non-zero padding")]
38    NonZeroPadding,
39}
40
41///
42/// EntityNameError
43///
44
45#[derive(Debug, ThisError)]
46pub enum EntityNameError {
47    #[error("entity name is empty")]
48    Empty,
49
50    #[error("entity name length {len} exceeds max {max}")]
51    TooLong { len: usize, max: usize },
52
53    #[error("entity name must be ASCII")]
54    NonAscii,
55}
56
57///
58/// IndexNameError
59///
60
61#[derive(Debug, ThisError)]
62pub enum IndexNameError {
63    #[error("index has {len} fields (max {max})")]
64    TooManyFields { len: usize, max: usize },
65
66    #[error("index field name '{field}' exceeds max length {max}")]
67    FieldTooLong { field: String, max: usize },
68
69    #[error("index field name '{field}' must be ASCII")]
70    FieldNonAscii { field: String },
71
72    #[error("index name length {len} exceeds max {max}")]
73    TooLong { len: usize, max: usize },
74}
75
76///
77/// EntityName
78///
79
80#[derive(Clone, Copy, Eq, Hash, PartialEq)]
81pub struct EntityName {
82    len: u8,
83    bytes: [u8; MAX_ENTITY_NAME_LEN],
84}
85
86impl EntityName {
87    /// Fixed on-disk size in bytes (stable, protocol-level)
88    pub const STORED_SIZE_BYTES: u64 = 1 + (MAX_ENTITY_NAME_LEN as u64);
89
90    /// Fixed in-memory size (for buffers and arrays)
91    pub const STORED_SIZE_USIZE: usize = Self::STORED_SIZE_BYTES as usize;
92
93    pub fn try_from_str(name: &str) -> Result<Self, EntityNameError> {
94        let bytes = name.as_bytes();
95        let len = bytes.len();
96
97        if len == 0 {
98            return Err(EntityNameError::Empty);
99        }
100        if len > MAX_ENTITY_NAME_LEN {
101            return Err(EntityNameError::TooLong {
102                len,
103                max: MAX_ENTITY_NAME_LEN,
104            });
105        }
106        if !bytes.is_ascii() {
107            return Err(EntityNameError::NonAscii);
108        }
109
110        let mut out = [0u8; MAX_ENTITY_NAME_LEN];
111        out[..len].copy_from_slice(bytes);
112
113        Ok(Self {
114            len: len as u8,
115            bytes: out,
116        })
117    }
118
119    #[must_use]
120    #[expect(clippy::len_without_is_empty)]
121    pub const fn len(&self) -> usize {
122        self.len as usize
123    }
124
125    #[must_use]
126    pub fn as_bytes(&self) -> &[u8] {
127        &self.bytes[..self.len()]
128    }
129
130    #[must_use]
131    pub fn as_str(&self) -> &str {
132        // Safe: ASCII enforced at construction
133        unsafe { std::str::from_utf8_unchecked(self.as_bytes()) }
134    }
135
136    #[must_use]
137    pub fn to_bytes(self) -> [u8; Self::STORED_SIZE_USIZE] {
138        let mut out = [0u8; Self::STORED_SIZE_USIZE];
139        out[0] = self.len;
140        out[1..].copy_from_slice(&self.bytes);
141        out
142    }
143
144    pub fn from_bytes(bytes: &[u8]) -> Result<Self, IdentityDecodeError> {
145        if bytes.len() != Self::STORED_SIZE_USIZE {
146            return Err(IdentityDecodeError::InvalidSize);
147        }
148
149        let len = bytes[0] as usize;
150        if len == 0 || len > MAX_ENTITY_NAME_LEN {
151            return Err(IdentityDecodeError::InvalidLength);
152        }
153        if !bytes[1..=len].is_ascii() {
154            return Err(IdentityDecodeError::NonAscii);
155        }
156        if bytes[1 + len..].iter().any(|&b| b != 0) {
157            return Err(IdentityDecodeError::NonZeroPadding);
158        }
159
160        let mut name = [0u8; MAX_ENTITY_NAME_LEN];
161        name.copy_from_slice(&bytes[1..]);
162
163        Ok(Self {
164            len: len as u8,
165            bytes: name,
166        })
167    }
168
169    #[must_use]
170    pub const fn max_storable() -> Self {
171        Self {
172            len: MAX_ENTITY_NAME_LEN as u8,
173            bytes: [b'z'; MAX_ENTITY_NAME_LEN],
174        }
175    }
176}
177
178impl Ord for EntityName {
179    fn cmp(&self, other: &Self) -> Ordering {
180        self.to_bytes().cmp(&other.to_bytes())
181    }
182}
183
184impl PartialOrd for EntityName {
185    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
186        Some(self.cmp(other))
187    }
188}
189
190impl Display for EntityName {
191    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
192        f.write_str(self.as_str())
193    }
194}
195
196impl fmt::Debug for EntityName {
197    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
198        write!(f, "EntityName({})", self.as_str())
199    }
200}
201
202///
203/// IndexName
204///
205
206#[derive(Clone, Copy, Eq, Hash, PartialEq)]
207pub struct IndexName {
208    len: u16,
209    bytes: [u8; MAX_INDEX_NAME_LEN],
210}
211
212impl IndexName {
213    pub const STORED_SIZE_BYTES: u64 = 2 + (MAX_INDEX_NAME_LEN as u64);
214    pub const STORED_SIZE_USIZE: usize = Self::STORED_SIZE_BYTES as usize;
215
216    pub fn try_from_parts(entity: &EntityName, fields: &[&str]) -> Result<Self, IndexNameError> {
217        if fields.len() > MAX_INDEX_FIELDS {
218            return Err(IndexNameError::TooManyFields {
219                len: fields.len(),
220                max: MAX_INDEX_FIELDS,
221            });
222        }
223
224        let mut total_len = entity.len();
225        for field in fields {
226            let field_len = field.len();
227            if field_len > MAX_INDEX_FIELD_NAME_LEN {
228                return Err(IndexNameError::FieldTooLong {
229                    field: (*field).to_string(),
230                    max: MAX_INDEX_FIELD_NAME_LEN,
231                });
232            }
233            if !field.is_ascii() {
234                return Err(IndexNameError::FieldNonAscii {
235                    field: (*field).to_string(),
236                });
237            }
238            total_len = total_len.saturating_add(1 + field_len);
239        }
240
241        if total_len > MAX_INDEX_NAME_LEN {
242            return Err(IndexNameError::TooLong {
243                len: total_len,
244                max: MAX_INDEX_NAME_LEN,
245            });
246        }
247
248        let mut out = [0u8; MAX_INDEX_NAME_LEN];
249        let mut len = 0usize;
250
251        Self::push_bytes(&mut out, &mut len, entity.as_bytes());
252        for field in fields {
253            Self::push_bytes(&mut out, &mut len, b"|");
254            Self::push_bytes(&mut out, &mut len, field.as_bytes());
255        }
256
257        Ok(Self {
258            len: len as u16,
259            bytes: out,
260        })
261    }
262
263    #[must_use]
264    pub fn as_bytes(&self) -> &[u8] {
265        &self.bytes[..self.len as usize]
266    }
267
268    #[must_use]
269    pub fn as_str(&self) -> &str {
270        unsafe { std::str::from_utf8_unchecked(self.as_bytes()) }
271    }
272
273    #[must_use]
274    pub fn to_bytes(self) -> [u8; Self::STORED_SIZE_USIZE] {
275        let mut out = [0u8; Self::STORED_SIZE_USIZE];
276        out[..2].copy_from_slice(&self.len.to_be_bytes());
277        out[2..].copy_from_slice(&self.bytes);
278        out
279    }
280
281    pub fn from_bytes(bytes: &[u8]) -> Result<Self, IdentityDecodeError> {
282        if bytes.len() != Self::STORED_SIZE_USIZE {
283            return Err(IdentityDecodeError::InvalidSize);
284        }
285
286        let len = u16::from_be_bytes([bytes[0], bytes[1]]) as usize;
287        if len == 0 || len > MAX_INDEX_NAME_LEN {
288            return Err(IdentityDecodeError::InvalidLength);
289        }
290        if !bytes[2..2 + len].is_ascii() {
291            return Err(IdentityDecodeError::NonAscii);
292        }
293        if bytes[2 + len..].iter().any(|&b| b != 0) {
294            return Err(IdentityDecodeError::NonZeroPadding);
295        }
296
297        let mut name = [0u8; MAX_INDEX_NAME_LEN];
298        name.copy_from_slice(&bytes[2..]);
299
300        Ok(Self {
301            len: len as u16,
302            bytes: name,
303        })
304    }
305
306    fn push_bytes(out: &mut [u8; MAX_INDEX_NAME_LEN], len: &mut usize, bytes: &[u8]) {
307        let end = *len + bytes.len();
308        out[*len..end].copy_from_slice(bytes);
309        *len = end;
310    }
311
312    #[must_use]
313    pub const fn max_storable() -> Self {
314        Self {
315            len: MAX_INDEX_NAME_LEN as u16,
316            bytes: [b'z'; MAX_INDEX_NAME_LEN],
317        }
318    }
319}
320
321impl Ord for IndexName {
322    fn cmp(&self, other: &Self) -> Ordering {
323        self.to_bytes().cmp(&other.to_bytes())
324    }
325}
326
327impl PartialOrd for IndexName {
328    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
329        Some(self.cmp(other))
330    }
331}
332
333impl fmt::Debug for IndexName {
334    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
335        write!(f, "IndexName({})", self.as_str())
336    }
337}
338
339impl Display for IndexName {
340    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
341        f.write_str(self.as_str())
342    }
343}
344
345///
346/// TESTS
347///
348
349#[cfg(test)]
350mod tests {
351    use super::*;
352
353    const ENTITY_64: &str = "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef";
354    const ENTITY_64_B: &str = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb";
355    const FIELD_64_A: &str = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
356    const FIELD_64_B: &str = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb";
357    const FIELD_64_C: &str = "cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc";
358    const FIELD_64_D: &str = "dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd";
359
360    #[test]
361    fn index_name_max_len_matches_limits() {
362        let entity = EntityName::try_from_str(ENTITY_64).unwrap();
363        let fields = [FIELD_64_A, FIELD_64_B, FIELD_64_C, FIELD_64_D];
364
365        assert_eq!(entity.as_str().len(), MAX_ENTITY_NAME_LEN);
366        for field in &fields {
367            assert_eq!(field.len(), MAX_INDEX_FIELD_NAME_LEN);
368        }
369        assert_eq!(fields.len(), MAX_INDEX_FIELDS);
370
371        let name = IndexName::try_from_parts(&entity, &fields).unwrap();
372        assert_eq!(name.as_bytes().len(), MAX_INDEX_NAME_LEN);
373    }
374
375    #[test]
376    fn index_name_max_size_roundtrip_and_ordering() {
377        let entity_a = EntityName::try_from_str(ENTITY_64).unwrap();
378        let entity_b = EntityName::try_from_str(ENTITY_64_B).unwrap();
379
380        let fields_a = [FIELD_64_A, FIELD_64_A, FIELD_64_A, FIELD_64_A];
381        let fields_b = [FIELD_64_B, FIELD_64_B, FIELD_64_B, FIELD_64_B];
382
383        let idx_a = IndexName::try_from_parts(&entity_a, &fields_a).unwrap();
384        let idx_b = IndexName::try_from_parts(&entity_b, &fields_b).unwrap();
385
386        let decoded = IndexName::from_bytes(&idx_a.to_bytes()).unwrap();
387        assert_eq!(idx_a, decoded);
388
389        assert_eq!(idx_a.cmp(&idx_b), idx_a.to_bytes().cmp(&idx_b.to_bytes()));
390    }
391
392    #[test]
393    fn rejects_too_many_index_fields() {
394        let entity = EntityName::try_from_str("entity").unwrap();
395        let fields = ["a", "b", "c", "d", "e"];
396
397        let err = IndexName::try_from_parts(&entity, &fields).unwrap_err();
398        assert!(matches!(err, IndexNameError::TooManyFields { .. }));
399    }
400
401    #[test]
402    fn rejects_index_field_over_len() {
403        let entity = EntityName::try_from_str("entity").unwrap();
404        let long_field = "a".repeat(MAX_INDEX_FIELD_NAME_LEN + 1);
405
406        let err = IndexName::try_from_parts(&entity, &[long_field.as_str()]).unwrap_err();
407        assert!(matches!(err, IndexNameError::FieldTooLong { .. }));
408    }
409
410    #[test]
411    fn entity_try_from_str_roundtrip() {
412        let e = EntityName::try_from_str("user").unwrap();
413        assert_eq!(e.len(), 4);
414        assert_eq!(e.as_str(), "user");
415    }
416
417    #[test]
418    fn entity_rejects_empty() {
419        let err = EntityName::try_from_str("").unwrap_err();
420        assert!(matches!(err, EntityNameError::Empty));
421    }
422
423    #[test]
424    fn entity_rejects_len_over_max() {
425        let s = "a".repeat(MAX_ENTITY_NAME_LEN + 1);
426        let err = EntityName::try_from_str(&s).unwrap_err();
427        assert!(matches!(err, EntityNameError::TooLong { .. }));
428    }
429
430    #[test]
431    fn entity_rejects_non_ascii() {
432        let err = EntityName::try_from_str("usér").unwrap_err();
433        assert!(matches!(err, EntityNameError::NonAscii));
434    }
435
436    #[test]
437    fn entity_storage_roundtrip() {
438        let e = EntityName::try_from_str("entity_name").unwrap();
439        let bytes = e.to_bytes();
440        let decoded = EntityName::from_bytes(&bytes).unwrap();
441        assert_eq!(e, decoded);
442    }
443
444    #[test]
445    fn entity_rejects_invalid_size() {
446        let buf = vec![0u8; EntityName::STORED_SIZE_USIZE - 1];
447        assert!(matches!(
448            EntityName::from_bytes(&buf),
449            Err(IdentityDecodeError::InvalidSize)
450        ));
451    }
452
453    #[test]
454    fn entity_rejects_len_over_max_from_bytes() {
455        let mut buf = [0u8; EntityName::STORED_SIZE_USIZE];
456        buf[0] = (MAX_ENTITY_NAME_LEN as u8).saturating_add(1);
457        assert!(matches!(
458            EntityName::from_bytes(&buf),
459            Err(IdentityDecodeError::InvalidLength)
460        ));
461    }
462
463    #[test]
464    fn entity_rejects_non_ascii_from_bytes() {
465        let mut buf = [0u8; EntityName::STORED_SIZE_USIZE];
466        buf[0] = 1;
467        buf[1] = 0xFF;
468        assert!(matches!(
469            EntityName::from_bytes(&buf),
470            Err(IdentityDecodeError::NonAscii)
471        ));
472    }
473
474    #[test]
475    fn entity_rejects_non_zero_padding() {
476        let e = EntityName::try_from_str("user").unwrap();
477        let mut bytes = e.to_bytes();
478        bytes[1 + e.len()] = b'x';
479
480        assert!(matches!(
481            EntityName::from_bytes(&bytes),
482            Err(IdentityDecodeError::NonZeroPadding)
483        ));
484    }
485
486    #[test]
487    fn entity_ordering_matches_bytes() {
488        let a = EntityName::try_from_str("abc").unwrap();
489        let b = EntityName::try_from_str("abd").unwrap();
490        let c = EntityName::try_from_str("abcx").unwrap();
491
492        assert_eq!(a.cmp(&b), a.to_bytes().cmp(&b.to_bytes()));
493        assert_eq!(a.cmp(&c), a.to_bytes().cmp(&c.to_bytes()));
494    }
495
496    #[test]
497    fn index_single_field_format() {
498        let entity = EntityName::try_from_str("user").unwrap();
499        let idx = IndexName::try_from_parts(&entity, &["email"]).unwrap();
500
501        assert_eq!(idx.as_str(), "user|email");
502    }
503
504    #[test]
505    fn index_field_order_is_preserved() {
506        let entity = EntityName::try_from_str("user").unwrap();
507        let idx = IndexName::try_from_parts(&entity, &["a", "b", "c"]).unwrap();
508
509        assert_eq!(idx.as_str(), "user|a|b|c");
510    }
511
512    #[test]
513    fn index_storage_roundtrip() {
514        let entity = EntityName::try_from_str("user").unwrap();
515        let idx = IndexName::try_from_parts(&entity, &["a", "b"]).unwrap();
516
517        let bytes = idx.to_bytes();
518        let decoded = IndexName::from_bytes(&bytes).unwrap();
519        assert_eq!(idx, decoded);
520    }
521
522    // ------------------------------------------------------------------
523    // FUZZING (deterministic)
524    // ------------------------------------------------------------------
525
526    fn gen_ascii(seed: u64, max_len: usize) -> String {
527        let len = (seed as usize % max_len).max(1);
528        let mut out = String::with_capacity(len);
529
530        let mut x = seed;
531        for _ in 0..len {
532            x = x.wrapping_mul(6_364_136_223_846_793_005).wrapping_add(1);
533            let c = b'a' + (x % 26) as u8;
534            out.push(c as char);
535        }
536
537        out
538    }
539
540    #[test]
541    fn fuzz_entity_name_roundtrip_and_ordering() {
542        let mut prev: Option<EntityName> = None;
543
544        for i in 1..=1_000u64 {
545            let s = gen_ascii(i, MAX_ENTITY_NAME_LEN);
546            let e = EntityName::try_from_str(&s).unwrap();
547
548            let bytes = e.to_bytes();
549            let decoded = EntityName::from_bytes(&bytes).unwrap();
550            assert_eq!(e, decoded);
551
552            if let Some(p) = prev {
553                assert_eq!(p.cmp(&e), p.to_bytes().cmp(&e.to_bytes()));
554            }
555
556            prev = Some(e);
557        }
558    }
559
560    #[test]
561    fn fuzz_index_name_roundtrip_and_ordering() {
562        let entity = EntityName::try_from_str("entity").unwrap();
563        let mut prev: Option<IndexName> = None;
564
565        for i in 1..=1_000u64 {
566            let field_count = (i as usize % MAX_INDEX_FIELDS).max(1);
567
568            let mut fields = Vec::with_capacity(field_count);
569            for f in 0..field_count {
570                let s = gen_ascii(i * 31 + f as u64, MAX_INDEX_FIELD_NAME_LEN);
571                fields.push(s);
572            }
573
574            let field_refs: Vec<&str> = fields.iter().map(String::as_str).collect();
575            let idx = IndexName::try_from_parts(&entity, &field_refs).unwrap();
576
577            let bytes = idx.to_bytes();
578            let decoded = IndexName::from_bytes(&bytes).unwrap();
579            assert_eq!(idx, decoded);
580
581            if let Some(p) = prev {
582                assert_eq!(p.cmp(&idx), p.to_bytes().cmp(&idx.to_bytes()));
583            }
584
585            prev = Some(idx);
586        }
587    }
588}