Skip to main content

icydb_core/db/identity/
mod.rs

1//! Module: identity
2//! Responsibility: validated entity/index naming and stable byte ordering contracts.
3//! Does not own: schema metadata, relation policy, or storage-layer persistence.
4//! Boundary: all identity construction/decoding for db data/index key domains.
5//!
6//! Invariants:
7//! - Identities are ASCII, non-empty, and bounded by MAX_* limits.
8//! - All construction paths validate invariants.
9//! - Stored byte representation is canonical and order-preserving.
10//! - Ordering semantics follow the length-prefixed stored-byte layout, not
11//!   lexicographic string ordering.
12
13#![expect(clippy::cast_possible_truncation)]
14
15#[cfg(test)]
16mod tests;
17
18use crate::MAX_INDEX_FIELDS;
19use icydb_utils::to_snake_case;
20use std::{
21    cmp::Ordering,
22    fmt::{self, Display},
23};
24use thiserror::Error as ThisError;
25
26///
27/// Constants
28///
29
30pub(super) const MAX_ENTITY_NAME_LEN: usize = 64;
31pub(super) const MAX_INDEX_FIELD_NAME_LEN: usize = 64;
32const MAX_INDEX_NAME_PREFIX_LEN: usize = 5;
33const MAX_ENTITY_NAME_SLUG_LEN: usize = (MAX_ENTITY_NAME_LEN * 3) / 2;
34const MAX_INDEX_FIELD_NAME_SLUG_LEN: usize = (MAX_INDEX_FIELD_NAME_LEN * 3) / 2;
35pub(super) const MAX_INDEX_NAME_LEN: usize = MAX_INDEX_NAME_PREFIX_LEN
36    + MAX_ENTITY_NAME_SLUG_LEN
37    + 2
38    + (MAX_INDEX_FIELDS * MAX_INDEX_FIELD_NAME_SLUG_LEN)
39    + (MAX_INDEX_FIELDS - 1);
40const INDEX_NAME_SEGMENT_DELIMITER: u8 = b'|';
41///
42/// IdentityDecodeError
43/// Decode errors (storage / corruption boundary)
44///
45
46#[derive(Debug, ThisError)]
47pub enum IdentityDecodeError {
48    #[error("invalid size")]
49    InvalidSize,
50
51    #[error("invalid length")]
52    InvalidLength,
53
54    #[error("non-ascii encoding")]
55    NonAscii,
56
57    #[error("non-zero padding")]
58    NonZeroPadding,
59
60    #[error("reserved identity delimiter")]
61    Delimiter,
62}
63
64///
65/// EntityNameError
66///
67
68#[derive(Debug, ThisError)]
69pub enum EntityNameError {
70    #[error("entity name is empty")]
71    Empty,
72
73    #[error("entity name length {len} exceeds max {max}")]
74    TooLong { len: usize, max: usize },
75
76    #[error("entity name must be ASCII")]
77    NonAscii,
78
79    #[error("entity name must not contain '|'")]
80    Delimiter,
81}
82
83///
84/// IndexNameError
85///
86
87#[derive(Debug, ThisError)]
88pub enum IndexNameError {
89    #[error("index has {len} fields (max {max})")]
90    TooManyFields { len: usize, max: usize },
91
92    #[error("index must reference at least one field")]
93    NoFields,
94
95    #[error("index field name is empty")]
96    FieldEmpty,
97
98    #[error("index field name '{field}' exceeds max length {max}")]
99    FieldTooLong { field: String, max: usize },
100
101    #[error("index field name '{field}' must be ASCII")]
102    FieldNonAscii { field: String },
103
104    #[error("index field name '{field}' must not contain '|'")]
105    FieldDelimiter { field: String },
106
107    #[error("index name length {len} exceeds max {max}")]
108    TooLong { len: usize, max: usize },
109}
110
111///
112/// EntityName
113///
114
115#[derive(Clone, Copy, Eq, Hash, PartialEq)]
116pub struct EntityName {
117    len: u8,
118    bytes: [u8; MAX_ENTITY_NAME_LEN],
119}
120
121impl EntityName {
122    /// Fixed on-disk size in bytes (stable, protocol-level)
123    pub const STORED_SIZE_BYTES: u64 = 1 + (MAX_ENTITY_NAME_LEN as u64);
124
125    /// Fixed in-memory size (for buffers and arrays)
126    pub const STORED_SIZE_USIZE: usize = Self::STORED_SIZE_BYTES as usize;
127
128    /// Validate and construct an entity name from one ASCII string.
129    pub fn try_from_str(name: &str) -> Result<Self, EntityNameError> {
130        // Phase 1: validate user-visible identity constraints.
131        let bytes = name.as_bytes();
132        let len = bytes.len();
133
134        if len == 0 {
135            return Err(EntityNameError::Empty);
136        }
137        if len > MAX_ENTITY_NAME_LEN {
138            return Err(EntityNameError::TooLong {
139                len,
140                max: MAX_ENTITY_NAME_LEN,
141            });
142        }
143        if !bytes.is_ascii() {
144            return Err(EntityNameError::NonAscii);
145        }
146        if bytes.contains(&INDEX_NAME_SEGMENT_DELIMITER) {
147            return Err(EntityNameError::Delimiter);
148        }
149
150        // Phase 2: write into fixed-size canonical storage.
151        let mut out = [0u8; MAX_ENTITY_NAME_LEN];
152        out[..len].copy_from_slice(bytes);
153
154        Ok(Self {
155            len: len as u8,
156            bytes: out,
157        })
158    }
159
160    /// Return the stored entity-name length.
161    #[must_use]
162    pub const fn len(&self) -> usize {
163        self.len as usize
164    }
165
166    /// Return whether the stored entity-name length is zero.
167    #[must_use]
168    pub const fn is_empty(&self) -> bool {
169        self.len() == 0
170    }
171
172    /// Borrow raw identity bytes excluding trailing fixed-buffer padding.
173    #[must_use]
174    pub fn as_bytes(&self) -> &[u8] {
175        &self.bytes[..self.len()]
176    }
177
178    /// Borrow the entity name as UTF-8 text.
179    #[must_use]
180    pub fn as_str(&self) -> &str {
181        // Invariant: construction and decoding enforce ASCII-only storage,
182        // so UTF-8 decoding cannot fail.
183        std::str::from_utf8(self.as_bytes()).expect("EntityName invariant: ASCII-only storage")
184    }
185
186    /// Encode this identity into its fixed-size persisted representation.
187    #[must_use]
188    pub fn to_bytes(self) -> [u8; Self::STORED_SIZE_USIZE] {
189        let mut out = [0u8; Self::STORED_SIZE_USIZE];
190        out[0] = self.len;
191        out[1..].copy_from_slice(&self.bytes);
192        out
193    }
194
195    /// Decode one fixed-size persisted entity identity payload.
196    pub fn from_bytes(bytes: &[u8]) -> Result<Self, IdentityDecodeError> {
197        // Phase 1: validate layout and payload bounds.
198        if bytes.len() != Self::STORED_SIZE_USIZE {
199            return Err(IdentityDecodeError::InvalidSize);
200        }
201
202        let len = bytes[0] as usize;
203        if len == 0 || len > MAX_ENTITY_NAME_LEN {
204            return Err(IdentityDecodeError::InvalidLength);
205        }
206        if !bytes[1..=len].is_ascii() {
207            return Err(IdentityDecodeError::NonAscii);
208        }
209        if bytes[1..=len].contains(&INDEX_NAME_SEGMENT_DELIMITER) {
210            return Err(IdentityDecodeError::Delimiter);
211        }
212        if bytes[1 + len..].iter().any(|&b| b != 0) {
213            return Err(IdentityDecodeError::NonZeroPadding);
214        }
215
216        // Phase 2: materialize canonical fixed-buffer identity storage.
217        let mut name = [0u8; MAX_ENTITY_NAME_LEN];
218        name.copy_from_slice(&bytes[1..]);
219
220        Ok(Self {
221            len: len as u8,
222            bytes: name,
223        })
224    }
225}
226
227impl Ord for EntityName {
228    fn cmp(&self, other: &Self) -> Ordering {
229        // Keep ordering consistent with `to_bytes()` (length prefix first).
230        // This is deterministic protocol/storage ordering, not lexical string order.
231        self.len.cmp(&other.len).then(self.bytes.cmp(&other.bytes))
232    }
233}
234
235impl PartialOrd for EntityName {
236    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
237        Some(self.cmp(other))
238    }
239}
240
241impl Display for EntityName {
242    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
243        f.write_str(self.as_str())
244    }
245}
246
247impl fmt::Debug for EntityName {
248    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
249        write!(f, "EntityName({})", self.as_str())
250    }
251}
252
253///
254/// IndexName
255///
256
257#[derive(Clone, Copy, Eq, Hash, PartialEq)]
258pub struct IndexName {
259    len: u16,
260    bytes: [u8; MAX_INDEX_NAME_LEN],
261}
262
263impl IndexName {
264    /// Fixed on-disk size in bytes (stable, protocol-level).
265    pub const STORED_SIZE_BYTES: u64 = 2 + (MAX_INDEX_NAME_LEN as u64);
266    /// Fixed in-memory size (for buffers and arrays).
267    pub const STORED_SIZE_USIZE: usize = Self::STORED_SIZE_BYTES as usize;
268
269    /// Validate and construct one non-unique index identity from an entity +
270    /// field list.
271    pub fn try_from_parts(entity: &EntityName, fields: &[&str]) -> Result<Self, IndexNameError> {
272        Self::try_from_parts_with_prefix("idx", entity, fields)
273    }
274
275    /// Validate and construct one unique index identity from an entity + field
276    /// list.
277    pub fn try_unique_from_parts(
278        entity: &EntityName,
279        fields: &[&str],
280    ) -> Result<Self, IndexNameError> {
281        Self::try_from_parts_with_prefix("uniq", entity, fields)
282    }
283
284    fn try_from_parts_with_prefix(
285        prefix: &str,
286        entity: &EntityName,
287        fields: &[&str],
288    ) -> Result<Self, IndexNameError> {
289        // Phase 1: validate index-field count and per-field identity constraints.
290        if fields.is_empty() {
291            return Err(IndexNameError::NoFields);
292        }
293        if fields.len() > MAX_INDEX_FIELDS {
294            return Err(IndexNameError::TooManyFields {
295                len: fields.len(),
296                max: MAX_INDEX_FIELDS,
297            });
298        }
299
300        let mut field_slugs = Vec::with_capacity(fields.len());
301        for field in fields {
302            let field_len = field.len();
303            if field_len == 0 {
304                return Err(IndexNameError::FieldEmpty);
305            }
306            if field_len > MAX_INDEX_FIELD_NAME_LEN {
307                return Err(IndexNameError::FieldTooLong {
308                    field: (*field).to_string(),
309                    max: MAX_INDEX_FIELD_NAME_LEN,
310                });
311            }
312            if !field.is_ascii() {
313                return Err(IndexNameError::FieldNonAscii {
314                    field: (*field).to_string(),
315                });
316            }
317            if field.as_bytes().contains(&INDEX_NAME_SEGMENT_DELIMITER) {
318                return Err(IndexNameError::FieldDelimiter {
319                    field: (*field).to_string(),
320                });
321            }
322            let slug = index_name_slug(field);
323            if slug.is_empty() {
324                return Err(IndexNameError::FieldEmpty);
325            }
326            field_slugs.push(slug);
327        }
328
329        let entity_slug = index_name_slug(entity.as_str());
330        let total_len = prefix
331            .len()
332            .saturating_add(1)
333            .saturating_add(entity_slug.len())
334            .saturating_add(2)
335            .saturating_add(field_slugs.iter().map(String::len).sum::<usize>())
336            .saturating_add(field_slugs.len().saturating_sub(1));
337        if total_len > MAX_INDEX_NAME_LEN {
338            return Err(IndexNameError::TooLong {
339                len: total_len,
340                max: MAX_INDEX_NAME_LEN,
341            });
342        }
343
344        // Phase 2: encode canonical `idx_entity__field...` bytes into fixed storage.
345        let mut out = [0u8; MAX_INDEX_NAME_LEN];
346        let mut len = 0usize;
347
348        Self::push_bytes(&mut out, &mut len, prefix.as_bytes());
349        Self::push_bytes(&mut out, &mut len, b"_");
350        Self::push_bytes(&mut out, &mut len, entity_slug.as_bytes());
351        Self::push_bytes(&mut out, &mut len, b"__");
352        for (index, field_slug) in field_slugs.iter().enumerate() {
353            if index > 0 {
354                Self::push_bytes(&mut out, &mut len, b"_");
355            }
356            Self::push_bytes(&mut out, &mut len, field_slug.as_bytes());
357        }
358
359        Ok(Self {
360            len: len as u16,
361            bytes: out,
362        })
363    }
364
365    /// Borrow raw index-identity bytes excluding trailing fixed-buffer padding.
366    #[must_use]
367    pub fn as_bytes(&self) -> &[u8] {
368        &self.bytes[..self.len as usize]
369    }
370
371    /// Borrow the index identity as UTF-8 text.
372    #[must_use]
373    pub fn as_str(&self) -> &str {
374        // Invariant: construction and decoding enforce ASCII-only storage,
375        // so UTF-8 decoding cannot fail.
376        std::str::from_utf8(self.as_bytes()).expect("IndexName invariant: ASCII-only storage")
377    }
378
379    /// Encode this identity into its fixed-size persisted representation.
380    #[must_use]
381    pub fn to_bytes(self) -> [u8; Self::STORED_SIZE_USIZE] {
382        let mut out = [0u8; Self::STORED_SIZE_USIZE];
383        out[..2].copy_from_slice(&self.len.to_be_bytes());
384        out[2..].copy_from_slice(&self.bytes);
385        out
386    }
387
388    /// Decode one fixed-size persisted index identity payload.
389    ///
390    /// This validates the canonical fixed-width byte envelope only. It does not
391    /// reconstruct field segments or prove the bytes were produced by
392    /// `try_from_parts`; callers must ensure persisted bytes originate from a
393    /// previously validated `IndexName`.
394    pub fn from_bytes(bytes: &[u8]) -> Result<Self, IdentityDecodeError> {
395        // Phase 1: validate layout and payload bounds.
396        if bytes.len() != Self::STORED_SIZE_USIZE {
397            return Err(IdentityDecodeError::InvalidSize);
398        }
399
400        let len = u16::from_be_bytes([bytes[0], bytes[1]]) as usize;
401        if len == 0 || len > MAX_INDEX_NAME_LEN {
402            return Err(IdentityDecodeError::InvalidLength);
403        }
404        if !bytes[2..2 + len].is_ascii() {
405            return Err(IdentityDecodeError::NonAscii);
406        }
407        if bytes[2 + len..].iter().any(|&b| b != 0) {
408            return Err(IdentityDecodeError::NonZeroPadding);
409        }
410
411        // Phase 2: materialize canonical fixed-buffer identity storage.
412        let mut name = [0u8; MAX_INDEX_NAME_LEN];
413        name.copy_from_slice(&bytes[2..]);
414
415        Ok(Self {
416            len: len as u16,
417            bytes: name,
418        })
419    }
420
421    // Append bytes into the fixed-size identity buffer while tracking write offset.
422    fn push_bytes(out: &mut [u8; MAX_INDEX_NAME_LEN], len: &mut usize, bytes: &[u8]) {
423        let end = *len + bytes.len();
424        out[*len..end].copy_from_slice(bytes);
425        *len = end;
426    }
427}
428
429fn index_name_slug(value: &str) -> String {
430    let separated = value
431        .chars()
432        .map(|ch| if ch.is_ascii_alphanumeric() { ch } else { '_' })
433        .collect::<String>();
434
435    to_snake_case(separated.as_str())
436}
437
438impl Ord for IndexName {
439    fn cmp(&self, other: &Self) -> Ordering {
440        self.to_bytes().cmp(&other.to_bytes())
441    }
442}
443
444impl PartialOrd for IndexName {
445    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
446        Some(self.cmp(other))
447    }
448}
449
450impl fmt::Debug for IndexName {
451    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
452        write!(f, "IndexName({})", self.as_str())
453    }
454}
455
456impl Display for IndexName {
457    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
458        f.write_str(self.as_str())
459    }
460}