Skip to main content

icydb_core/db/identity/
mod.rs

1//! Module: db::identity
2//! Responsibility: validated entity/index naming and stable byte ordering contracts.
3//! Does not own: schema metadata, relation policy, or storage-layer persistence.
4//! Boundary: all identity construction/decoding for db data/index key domains.
5//!
6//! Invariants:
7//! - Identities are ASCII, non-empty, and bounded by MAX_* limits.
8//! - All construction paths validate invariants.
9//! - Stored byte representation is canonical and order-preserving.
10//! - Ordering semantics follow the length-prefixed stored-byte layout, not
11//!   lexicographic string ordering.
12
13#![expect(clippy::cast_possible_truncation)]
14
15#[cfg(test)]
16mod tests;
17
18use crate::MAX_INDEX_FIELDS;
19use icydb_utils::to_snake_case;
20use std::{
21    cmp::Ordering,
22    fmt::{self, Display},
23};
24
25///
26/// Constants
27///
28
29const MAX_ENTITY_NAME_LEN: usize = 64;
30const MAX_INDEX_FIELD_NAME_LEN: usize = 64;
31const MAX_INDEX_NAME_PREFIX_LEN: usize = 5;
32const MAX_ENTITY_NAME_SLUG_LEN: usize = (MAX_ENTITY_NAME_LEN * 3) / 2;
33const MAX_INDEX_FIELD_NAME_SLUG_LEN: usize = (MAX_INDEX_FIELD_NAME_LEN * 3) / 2;
34const MAX_INDEX_NAME_LEN: usize = MAX_INDEX_NAME_PREFIX_LEN
35    + MAX_ENTITY_NAME_SLUG_LEN
36    + 2
37    + (MAX_INDEX_FIELDS * MAX_INDEX_FIELD_NAME_SLUG_LEN)
38    + (MAX_INDEX_FIELDS - 1);
39const INDEX_NAME_SEGMENT_DELIMITER: u8 = b'|';
40
41/// Decode error for persisted identity bytes.
42///
43/// Owned by the identity layer at storage/corruption boundaries.
44
45#[derive(Debug)]
46pub enum IdentityDecodeError {
47    /// Persisted byte slice has the wrong fixed envelope size.
48    InvalidSize,
49
50    /// Persisted length prefix is empty or exceeds the identity limit.
51    InvalidLength,
52
53    /// Identity payload contains non-ASCII bytes.
54    NonAscii,
55
56    /// Fixed-width padding contains non-zero bytes.
57    NonZeroPadding,
58
59    /// Entity identity payload contains the reserved index segment delimiter.
60    Delimiter,
61}
62
63/// Admission error for generated entity names.
64///
65/// Owned by the identity layer before entity names enter stable key formats.
66
67#[derive(Debug)]
68pub enum EntityNameError {
69    /// Entity name is empty.
70    Empty,
71
72    /// Entity name exceeds the stable identity byte limit.
73    TooLong { len: usize, max: usize },
74
75    /// Entity name contains non-ASCII bytes.
76    NonAscii,
77
78    /// Entity name contains the reserved index segment delimiter.
79    Delimiter,
80}
81
82/// Admission error for generated index names.
83///
84/// Owned by the identity layer while deriving index names from an entity and
85/// field list.
86
87#[derive(Debug)]
88pub enum IndexNameError {
89    /// Index field list exceeds the supported key-width limit.
90    TooManyFields { len: usize, max: usize },
91
92    /// Index field list is empty.
93    NoFields,
94
95    /// One index field segment is empty.
96    FieldEmpty,
97
98    /// One index field segment exceeds the stable identity byte limit.
99    FieldTooLong { field: String, max: usize },
100
101    /// One index field segment contains non-ASCII bytes.
102    FieldNonAscii { field: String },
103
104    /// One index field segment contains the reserved segment delimiter.
105    FieldDelimiter { field: String },
106
107    /// Derived index name exceeds the stable identity byte limit.
108    TooLong { len: usize, max: usize },
109}
110
111/// Validated entity identity with fixed-size persisted representation.
112///
113/// Used by data-key and index-key domains as canonical entity-name bytes.
114
115#[derive(Clone, Copy, Eq, Hash, PartialEq)]
116pub struct EntityName {
117    len: u8,
118    bytes: [u8; MAX_ENTITY_NAME_LEN],
119}
120
121impl EntityName {
122    /// Fixed on-disk size in bytes (stable, protocol-level)
123    pub const STORED_SIZE_BYTES: u64 = 1 + (MAX_ENTITY_NAME_LEN as u64);
124
125    /// Fixed in-memory size (for buffers and arrays)
126    pub const STORED_SIZE_USIZE: usize = Self::STORED_SIZE_BYTES as usize;
127
128    /// Validate and construct an entity name from one ASCII string.
129    pub fn try_from_str(name: &str) -> Result<Self, EntityNameError> {
130        // Phase 1: validate user-visible identity constraints.
131        let bytes = name.as_bytes();
132        let len = bytes.len();
133
134        if len == 0 {
135            return Err(EntityNameError::Empty);
136        }
137        if len > MAX_ENTITY_NAME_LEN {
138            return Err(EntityNameError::TooLong {
139                len,
140                max: MAX_ENTITY_NAME_LEN,
141            });
142        }
143        if !bytes.is_ascii() {
144            return Err(EntityNameError::NonAscii);
145        }
146        if bytes.contains(&INDEX_NAME_SEGMENT_DELIMITER) {
147            return Err(EntityNameError::Delimiter);
148        }
149
150        // Phase 2: write into fixed-size canonical storage.
151        let mut out = [0u8; MAX_ENTITY_NAME_LEN];
152        out[..len].copy_from_slice(bytes);
153
154        Ok(Self {
155            len: len as u8,
156            bytes: out,
157        })
158    }
159
160    /// Return the stored entity-name length.
161    #[must_use]
162    pub const fn len(&self) -> usize {
163        self.len as usize
164    }
165
166    /// Return whether the stored entity-name length is zero.
167    #[must_use]
168    pub const fn is_empty(&self) -> bool {
169        self.len() == 0
170    }
171
172    /// Borrow raw identity bytes excluding trailing fixed-buffer padding.
173    #[must_use]
174    pub fn as_bytes(&self) -> &[u8] {
175        &self.bytes[..self.len()]
176    }
177
178    /// Borrow the entity name as UTF-8 text.
179    ///
180    /// # Panics
181    ///
182    /// Panics if the stored entity-name bytes violate the ASCII-only identity
183    /// invariant. Construction and decoding are expected to prevent this.
184    #[must_use]
185    pub fn as_str(&self) -> &str {
186        // Invariant: construction and decoding enforce ASCII-only storage,
187        // so UTF-8 decoding cannot fail.
188        std::str::from_utf8(self.as_bytes()).expect("EntityName invariant: ASCII-only storage")
189    }
190
191    /// Encode this identity into its fixed-size persisted representation.
192    #[must_use]
193    pub fn to_bytes(self) -> [u8; Self::STORED_SIZE_USIZE] {
194        let mut out = [0u8; Self::STORED_SIZE_USIZE];
195        out[0] = self.len;
196        out[1..].copy_from_slice(&self.bytes);
197        out
198    }
199
200    /// Decode one fixed-size persisted entity identity payload.
201    pub fn from_bytes(bytes: &[u8]) -> Result<Self, IdentityDecodeError> {
202        // Phase 1: validate layout and payload bounds.
203        if bytes.len() != Self::STORED_SIZE_USIZE {
204            return Err(IdentityDecodeError::InvalidSize);
205        }
206
207        let len = bytes[0] as usize;
208        if len == 0 || len > MAX_ENTITY_NAME_LEN {
209            return Err(IdentityDecodeError::InvalidLength);
210        }
211        if !bytes[1..=len].is_ascii() {
212            return Err(IdentityDecodeError::NonAscii);
213        }
214        if bytes[1..=len].contains(&INDEX_NAME_SEGMENT_DELIMITER) {
215            return Err(IdentityDecodeError::Delimiter);
216        }
217        if bytes[1 + len..].iter().any(|&b| b != 0) {
218            return Err(IdentityDecodeError::NonZeroPadding);
219        }
220
221        // Phase 2: materialize canonical fixed-buffer identity storage.
222        let mut name = [0u8; MAX_ENTITY_NAME_LEN];
223        name.copy_from_slice(&bytes[1..]);
224
225        Ok(Self {
226            len: len as u8,
227            bytes: name,
228        })
229    }
230}
231
232impl Ord for EntityName {
233    fn cmp(&self, other: &Self) -> Ordering {
234        // Keep ordering consistent with `to_bytes()` (length prefix first).
235        // This is deterministic protocol/storage ordering, not lexical string order.
236        self.len.cmp(&other.len).then(self.bytes.cmp(&other.bytes))
237    }
238}
239
240impl PartialOrd for EntityName {
241    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
242        Some(self.cmp(other))
243    }
244}
245
246impl Display for EntityName {
247    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
248        f.write_str(self.as_str())
249    }
250}
251
252impl fmt::Debug for EntityName {
253    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
254        write!(f, "EntityName({})", self.as_str())
255    }
256}
257
258/// Validated index identity with fixed-size persisted representation.
259///
260/// Used by index-store key domains as canonical index-name bytes.
261
262#[derive(Clone, Copy, Eq, Hash, PartialEq)]
263pub struct IndexName {
264    len: u16,
265    bytes: [u8; MAX_INDEX_NAME_LEN],
266}
267
268impl IndexName {
269    /// Fixed on-disk size in bytes (stable, protocol-level).
270    pub const STORED_SIZE_BYTES: u64 = 2 + (MAX_INDEX_NAME_LEN as u64);
271    /// Fixed in-memory size (for buffers and arrays).
272    pub const STORED_SIZE_USIZE: usize = Self::STORED_SIZE_BYTES as usize;
273
274    /// Validate and construct one non-unique index identity from an entity +
275    /// field list.
276    pub fn try_from_entity_fields(
277        entity: &EntityName,
278        fields: &[&str],
279    ) -> Result<Self, IndexNameError> {
280        Self::try_from_entity_fields_with_prefix("idx", entity, fields)
281    }
282
283    /// Validate and construct one unique index identity from an entity + field
284    /// list.
285    pub fn try_unique_from_entity_fields(
286        entity: &EntityName,
287        fields: &[&str],
288    ) -> Result<Self, IndexNameError> {
289        Self::try_from_entity_fields_with_prefix("uniq", entity, fields)
290    }
291
292    fn try_from_entity_fields_with_prefix(
293        prefix: &str,
294        entity: &EntityName,
295        fields: &[&str],
296    ) -> Result<Self, IndexNameError> {
297        // Phase 1: validate index-field count and per-field identity constraints.
298        if fields.is_empty() {
299            return Err(IndexNameError::NoFields);
300        }
301        if fields.len() > MAX_INDEX_FIELDS {
302            return Err(IndexNameError::TooManyFields {
303                len: fields.len(),
304                max: MAX_INDEX_FIELDS,
305            });
306        }
307
308        let mut field_slugs = Vec::with_capacity(fields.len());
309        for field in fields {
310            let field_len = field.len();
311            if field_len == 0 {
312                return Err(IndexNameError::FieldEmpty);
313            }
314            if field_len > MAX_INDEX_FIELD_NAME_LEN {
315                return Err(IndexNameError::FieldTooLong {
316                    field: (*field).to_string(),
317                    max: MAX_INDEX_FIELD_NAME_LEN,
318                });
319            }
320            if !field.is_ascii() {
321                return Err(IndexNameError::FieldNonAscii {
322                    field: (*field).to_string(),
323                });
324            }
325            if field.as_bytes().contains(&INDEX_NAME_SEGMENT_DELIMITER) {
326                return Err(IndexNameError::FieldDelimiter {
327                    field: (*field).to_string(),
328                });
329            }
330            let slug = index_name_slug(field);
331            if slug.is_empty() {
332                return Err(IndexNameError::FieldEmpty);
333            }
334            field_slugs.push(slug);
335        }
336
337        let entity_slug = index_name_slug(entity.as_str());
338        let total_len = prefix
339            .len()
340            .saturating_add(1)
341            .saturating_add(entity_slug.len())
342            .saturating_add(2)
343            .saturating_add(field_slugs.iter().map(String::len).sum::<usize>())
344            .saturating_add(field_slugs.len().saturating_sub(1));
345        if total_len > MAX_INDEX_NAME_LEN {
346            return Err(IndexNameError::TooLong {
347                len: total_len,
348                max: MAX_INDEX_NAME_LEN,
349            });
350        }
351
352        // Phase 2: encode canonical `idx_entity__field...` bytes into fixed storage.
353        let mut out = [0u8; MAX_INDEX_NAME_LEN];
354        let mut len = 0usize;
355
356        Self::push_bytes(&mut out, &mut len, prefix.as_bytes());
357        Self::push_bytes(&mut out, &mut len, b"_");
358        Self::push_bytes(&mut out, &mut len, entity_slug.as_bytes());
359        Self::push_bytes(&mut out, &mut len, b"__");
360        for (index, field_slug) in field_slugs.iter().enumerate() {
361            if index > 0 {
362                Self::push_bytes(&mut out, &mut len, b"_");
363            }
364            Self::push_bytes(&mut out, &mut len, field_slug.as_bytes());
365        }
366
367        Ok(Self {
368            len: len as u16,
369            bytes: out,
370        })
371    }
372
373    /// Borrow raw index-identity bytes excluding trailing fixed-buffer padding.
374    #[must_use]
375    pub fn as_bytes(&self) -> &[u8] {
376        &self.bytes[..self.len as usize]
377    }
378
379    /// Borrow the index identity as UTF-8 text.
380    ///
381    /// # Panics
382    ///
383    /// Panics if the stored index-name bytes violate the ASCII-only identity
384    /// invariant. Construction and decoding are expected to prevent this.
385    #[must_use]
386    pub fn as_str(&self) -> &str {
387        // Invariant: construction and decoding enforce ASCII-only storage,
388        // so UTF-8 decoding cannot fail.
389        std::str::from_utf8(self.as_bytes()).expect("IndexName invariant: ASCII-only storage")
390    }
391
392    /// Encode this identity into its fixed-size persisted representation.
393    #[must_use]
394    pub fn to_bytes(self) -> [u8; Self::STORED_SIZE_USIZE] {
395        let mut out = [0u8; Self::STORED_SIZE_USIZE];
396        out[..2].copy_from_slice(&self.len.to_be_bytes());
397        out[2..].copy_from_slice(&self.bytes);
398        out
399    }
400
401    /// Decode one fixed-size persisted index identity payload.
402    ///
403    /// This validates the canonical fixed-width byte envelope only. It does not
404    /// reconstruct field segments or prove the bytes were produced by
405    /// `try_from_entity_fields`; callers must ensure persisted bytes originate from a
406    /// previously validated `IndexName`.
407    pub fn from_bytes(bytes: &[u8]) -> Result<Self, IdentityDecodeError> {
408        // Phase 1: validate layout and payload bounds.
409        if bytes.len() != Self::STORED_SIZE_USIZE {
410            return Err(IdentityDecodeError::InvalidSize);
411        }
412
413        let len = u16::from_be_bytes([bytes[0], bytes[1]]) as usize;
414        if len == 0 || len > MAX_INDEX_NAME_LEN {
415            return Err(IdentityDecodeError::InvalidLength);
416        }
417        if !bytes[2..2 + len].is_ascii() {
418            return Err(IdentityDecodeError::NonAscii);
419        }
420        if bytes[2 + len..].iter().any(|&b| b != 0) {
421            return Err(IdentityDecodeError::NonZeroPadding);
422        }
423
424        // Phase 2: materialize canonical fixed-buffer identity storage.
425        let mut name = [0u8; MAX_INDEX_NAME_LEN];
426        name.copy_from_slice(&bytes[2..]);
427
428        Ok(Self {
429            len: len as u16,
430            bytes: name,
431        })
432    }
433
434    // Append bytes into the fixed-size identity buffer while tracking write offset.
435    fn push_bytes(out: &mut [u8; MAX_INDEX_NAME_LEN], len: &mut usize, bytes: &[u8]) {
436        let end = *len + bytes.len();
437        out[*len..end].copy_from_slice(bytes);
438        *len = end;
439    }
440}
441
442fn index_name_slug(value: &str) -> String {
443    let separated = value
444        .chars()
445        .map(|ch| if ch.is_ascii_alphanumeric() { ch } else { '_' })
446        .collect::<String>();
447
448    to_snake_case(separated.as_str())
449}
450
451impl Ord for IndexName {
452    fn cmp(&self, other: &Self) -> Ordering {
453        self.to_bytes().cmp(&other.to_bytes())
454    }
455}
456
457impl PartialOrd for IndexName {
458    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
459        Some(self.cmp(other))
460    }
461}
462
463impl fmt::Debug for IndexName {
464    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
465        write!(f, "IndexName({})", self.as_str())
466    }
467}
468
469impl Display for IndexName {
470    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
471        f.write_str(self.as_str())
472    }
473}