Skip to main content

icydb_core/db/identity/
mod.rs

1//! Module: identity
2//! Responsibility: validated entity/index naming and stable byte ordering contracts.
3//! Does not own: schema metadata, relation policy, or storage-layer persistence.
4//! Boundary: all identity construction/decoding for db data/index key domains.
5//!
6//! Invariants:
7//! - Identities are ASCII, non-empty, and bounded by MAX_* limits.
8//! - All construction paths validate invariants.
9//! - Stored byte representation is canonical and order-preserving.
10//! - Ordering semantics follow the length-prefixed stored-byte layout, not
11//!   lexicographic string ordering.
12
13#![expect(clippy::cast_possible_truncation)]
14
15#[cfg(test)]
16mod tests;
17
18use crate::MAX_INDEX_FIELDS;
19use icydb_utils::to_snake_case;
20use std::{
21    cmp::Ordering,
22    fmt::{self, Display},
23};
24use thiserror::Error as ThisError;
25
26///
27/// Constants
28///
29
30pub(super) const MAX_ENTITY_NAME_LEN: usize = 64;
31pub(super) const MAX_INDEX_FIELD_NAME_LEN: usize = 64;
32const MAX_INDEX_NAME_PREFIX_LEN: usize = 5;
33const MAX_ENTITY_NAME_SLUG_LEN: usize = (MAX_ENTITY_NAME_LEN * 3) / 2;
34const MAX_INDEX_FIELD_NAME_SLUG_LEN: usize = (MAX_INDEX_FIELD_NAME_LEN * 3) / 2;
35pub(super) const MAX_INDEX_NAME_LEN: usize = MAX_INDEX_NAME_PREFIX_LEN
36    + MAX_ENTITY_NAME_SLUG_LEN
37    + 2
38    + (MAX_INDEX_FIELDS * MAX_INDEX_FIELD_NAME_SLUG_LEN)
39    + (MAX_INDEX_FIELDS - 1);
40const INDEX_NAME_SEGMENT_DELIMITER: u8 = b'|';
41///
42/// IdentityDecodeError
43/// Decode errors (storage / corruption boundary)
44///
45
46#[derive(Debug, ThisError)]
47pub enum IdentityDecodeError {
48    #[error("invalid size")]
49    InvalidSize,
50
51    #[error("invalid length")]
52    InvalidLength,
53
54    #[error("non-ascii encoding")]
55    NonAscii,
56
57    #[error("non-zero padding")]
58    NonZeroPadding,
59
60    #[error("reserved identity delimiter")]
61    Delimiter,
62}
63
64///
65/// EntityNameError
66///
67
68#[derive(Debug, ThisError)]
69pub enum EntityNameError {
70    #[error("entity name is empty")]
71    Empty,
72
73    #[error("entity name length {len} exceeds max {max}")]
74    TooLong { len: usize, max: usize },
75
76    #[error("entity name must be ASCII")]
77    NonAscii,
78
79    #[error("entity name must not contain '|'")]
80    Delimiter,
81}
82
83///
84/// IndexNameError
85///
86
87#[derive(Debug, ThisError)]
88pub enum IndexNameError {
89    #[error("index has {len} fields (max {max})")]
90    TooManyFields { len: usize, max: usize },
91
92    #[error("index must reference at least one field")]
93    NoFields,
94
95    #[error("index field name is empty")]
96    FieldEmpty,
97
98    #[error("index field name '{field}' exceeds max length {max}")]
99    FieldTooLong { field: String, max: usize },
100
101    #[error("index field name '{field}' must be ASCII")]
102    FieldNonAscii { field: String },
103
104    #[error("index field name '{field}' must not contain '|'")]
105    FieldDelimiter { field: String },
106
107    #[error("index name length {len} exceeds max {max}")]
108    TooLong { len: usize, max: usize },
109}
110
111///
112/// EntityName
113///
114
115#[derive(Clone, Copy, Eq, Hash, PartialEq)]
116pub struct EntityName {
117    len: u8,
118    bytes: [u8; MAX_ENTITY_NAME_LEN],
119}
120
121impl EntityName {
122    /// Fixed on-disk size in bytes (stable, protocol-level)
123    pub const STORED_SIZE_BYTES: u64 = 1 + (MAX_ENTITY_NAME_LEN as u64);
124
125    /// Fixed in-memory size (for buffers and arrays)
126    pub const STORED_SIZE_USIZE: usize = Self::STORED_SIZE_BYTES as usize;
127
128    /// Validate and construct an entity name from one ASCII string.
129    pub fn try_from_str(name: &str) -> Result<Self, EntityNameError> {
130        // Phase 1: validate user-visible identity constraints.
131        let bytes = name.as_bytes();
132        let len = bytes.len();
133
134        if len == 0 {
135            return Err(EntityNameError::Empty);
136        }
137        if len > MAX_ENTITY_NAME_LEN {
138            return Err(EntityNameError::TooLong {
139                len,
140                max: MAX_ENTITY_NAME_LEN,
141            });
142        }
143        if !bytes.is_ascii() {
144            return Err(EntityNameError::NonAscii);
145        }
146        if bytes.contains(&INDEX_NAME_SEGMENT_DELIMITER) {
147            return Err(EntityNameError::Delimiter);
148        }
149
150        // Phase 2: write into fixed-size canonical storage.
151        let mut out = [0u8; MAX_ENTITY_NAME_LEN];
152        out[..len].copy_from_slice(bytes);
153
154        Ok(Self {
155            len: len as u8,
156            bytes: out,
157        })
158    }
159
160    /// Return the stored entity-name length.
161    #[must_use]
162    pub const fn len(&self) -> usize {
163        self.len as usize
164    }
165
166    /// Return whether the stored entity-name length is zero.
167    #[must_use]
168    pub const fn is_empty(&self) -> bool {
169        self.len() == 0
170    }
171
172    /// Borrow raw identity bytes excluding trailing fixed-buffer padding.
173    #[must_use]
174    pub fn as_bytes(&self) -> &[u8] {
175        &self.bytes[..self.len()]
176    }
177
178    /// Borrow the entity name as UTF-8 text.
179    #[must_use]
180    pub fn as_str(&self) -> &str {
181        // Invariant: construction and decoding enforce ASCII-only storage,
182        // so UTF-8 decoding cannot fail.
183        std::str::from_utf8(self.as_bytes()).expect("EntityName invariant: ASCII-only storage")
184    }
185
186    /// Encode this identity into its fixed-size persisted representation.
187    #[must_use]
188    pub fn to_bytes(self) -> [u8; Self::STORED_SIZE_USIZE] {
189        let mut out = [0u8; Self::STORED_SIZE_USIZE];
190        out[0] = self.len;
191        out[1..].copy_from_slice(&self.bytes);
192        out
193    }
194
195    /// Decode one fixed-size persisted entity identity payload.
196    pub fn from_bytes(bytes: &[u8]) -> Result<Self, IdentityDecodeError> {
197        // Phase 1: validate layout and payload bounds.
198        if bytes.len() != Self::STORED_SIZE_USIZE {
199            return Err(IdentityDecodeError::InvalidSize);
200        }
201
202        let len = bytes[0] as usize;
203        if len == 0 || len > MAX_ENTITY_NAME_LEN {
204            return Err(IdentityDecodeError::InvalidLength);
205        }
206        if !bytes[1..=len].is_ascii() {
207            return Err(IdentityDecodeError::NonAscii);
208        }
209        if bytes[1..=len].contains(&INDEX_NAME_SEGMENT_DELIMITER) {
210            return Err(IdentityDecodeError::Delimiter);
211        }
212        if bytes[1 + len..].iter().any(|&b| b != 0) {
213            return Err(IdentityDecodeError::NonZeroPadding);
214        }
215
216        // Phase 2: materialize canonical fixed-buffer identity storage.
217        let mut name = [0u8; MAX_ENTITY_NAME_LEN];
218        name.copy_from_slice(&bytes[1..]);
219
220        Ok(Self {
221            len: len as u8,
222            bytes: name,
223        })
224    }
225}
226
227impl Ord for EntityName {
228    fn cmp(&self, other: &Self) -> Ordering {
229        // Keep ordering consistent with `to_bytes()` (length prefix first).
230        // This is deterministic protocol/storage ordering, not lexical string order.
231        self.len.cmp(&other.len).then(self.bytes.cmp(&other.bytes))
232    }
233}
234
235impl PartialOrd for EntityName {
236    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
237        Some(self.cmp(other))
238    }
239}
240
241impl Display for EntityName {
242    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
243        f.write_str(self.as_str())
244    }
245}
246
247impl fmt::Debug for EntityName {
248    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
249        write!(f, "EntityName({})", self.as_str())
250    }
251}
252
253///
254/// IndexName
255///
256
257#[derive(Clone, Copy, Eq, Hash, PartialEq)]
258pub struct IndexName {
259    len: u16,
260    bytes: [u8; MAX_INDEX_NAME_LEN],
261}
262
263impl IndexName {
264    /// Fixed on-disk size in bytes (stable, protocol-level).
265    pub const STORED_SIZE_BYTES: u64 = 2 + (MAX_INDEX_NAME_LEN as u64);
266    /// Fixed in-memory size (for buffers and arrays).
267    pub const STORED_SIZE_USIZE: usize = Self::STORED_SIZE_BYTES as usize;
268
269    /// Validate and construct one non-unique index identity from an entity +
270    /// field list.
271    pub fn try_from_entity_fields(
272        entity: &EntityName,
273        fields: &[&str],
274    ) -> Result<Self, IndexNameError> {
275        Self::try_from_entity_fields_with_prefix("idx", entity, fields)
276    }
277
278    /// Validate and construct one unique index identity from an entity + field
279    /// list.
280    pub fn try_unique_from_entity_fields(
281        entity: &EntityName,
282        fields: &[&str],
283    ) -> Result<Self, IndexNameError> {
284        Self::try_from_entity_fields_with_prefix("uniq", entity, fields)
285    }
286
287    fn try_from_entity_fields_with_prefix(
288        prefix: &str,
289        entity: &EntityName,
290        fields: &[&str],
291    ) -> Result<Self, IndexNameError> {
292        // Phase 1: validate index-field count and per-field identity constraints.
293        if fields.is_empty() {
294            return Err(IndexNameError::NoFields);
295        }
296        if fields.len() > MAX_INDEX_FIELDS {
297            return Err(IndexNameError::TooManyFields {
298                len: fields.len(),
299                max: MAX_INDEX_FIELDS,
300            });
301        }
302
303        let mut field_slugs = Vec::with_capacity(fields.len());
304        for field in fields {
305            let field_len = field.len();
306            if field_len == 0 {
307                return Err(IndexNameError::FieldEmpty);
308            }
309            if field_len > MAX_INDEX_FIELD_NAME_LEN {
310                return Err(IndexNameError::FieldTooLong {
311                    field: (*field).to_string(),
312                    max: MAX_INDEX_FIELD_NAME_LEN,
313                });
314            }
315            if !field.is_ascii() {
316                return Err(IndexNameError::FieldNonAscii {
317                    field: (*field).to_string(),
318                });
319            }
320            if field.as_bytes().contains(&INDEX_NAME_SEGMENT_DELIMITER) {
321                return Err(IndexNameError::FieldDelimiter {
322                    field: (*field).to_string(),
323                });
324            }
325            let slug = index_name_slug(field);
326            if slug.is_empty() {
327                return Err(IndexNameError::FieldEmpty);
328            }
329            field_slugs.push(slug);
330        }
331
332        let entity_slug = index_name_slug(entity.as_str());
333        let total_len = prefix
334            .len()
335            .saturating_add(1)
336            .saturating_add(entity_slug.len())
337            .saturating_add(2)
338            .saturating_add(field_slugs.iter().map(String::len).sum::<usize>())
339            .saturating_add(field_slugs.len().saturating_sub(1));
340        if total_len > MAX_INDEX_NAME_LEN {
341            return Err(IndexNameError::TooLong {
342                len: total_len,
343                max: MAX_INDEX_NAME_LEN,
344            });
345        }
346
347        // Phase 2: encode canonical `idx_entity__field...` bytes into fixed storage.
348        let mut out = [0u8; MAX_INDEX_NAME_LEN];
349        let mut len = 0usize;
350
351        Self::push_bytes(&mut out, &mut len, prefix.as_bytes());
352        Self::push_bytes(&mut out, &mut len, b"_");
353        Self::push_bytes(&mut out, &mut len, entity_slug.as_bytes());
354        Self::push_bytes(&mut out, &mut len, b"__");
355        for (index, field_slug) in field_slugs.iter().enumerate() {
356            if index > 0 {
357                Self::push_bytes(&mut out, &mut len, b"_");
358            }
359            Self::push_bytes(&mut out, &mut len, field_slug.as_bytes());
360        }
361
362        Ok(Self {
363            len: len as u16,
364            bytes: out,
365        })
366    }
367
368    /// Borrow raw index-identity bytes excluding trailing fixed-buffer padding.
369    #[must_use]
370    pub fn as_bytes(&self) -> &[u8] {
371        &self.bytes[..self.len as usize]
372    }
373
374    /// Borrow the index identity as UTF-8 text.
375    #[must_use]
376    pub fn as_str(&self) -> &str {
377        // Invariant: construction and decoding enforce ASCII-only storage,
378        // so UTF-8 decoding cannot fail.
379        std::str::from_utf8(self.as_bytes()).expect("IndexName invariant: ASCII-only storage")
380    }
381
382    /// Encode this identity into its fixed-size persisted representation.
383    #[must_use]
384    pub fn to_bytes(self) -> [u8; Self::STORED_SIZE_USIZE] {
385        let mut out = [0u8; Self::STORED_SIZE_USIZE];
386        out[..2].copy_from_slice(&self.len.to_be_bytes());
387        out[2..].copy_from_slice(&self.bytes);
388        out
389    }
390
391    /// Decode one fixed-size persisted index identity payload.
392    ///
393    /// This validates the canonical fixed-width byte envelope only. It does not
394    /// reconstruct field segments or prove the bytes were produced by
395    /// `try_from_entity_fields`; callers must ensure persisted bytes originate from a
396    /// previously validated `IndexName`.
397    pub fn from_bytes(bytes: &[u8]) -> Result<Self, IdentityDecodeError> {
398        // Phase 1: validate layout and payload bounds.
399        if bytes.len() != Self::STORED_SIZE_USIZE {
400            return Err(IdentityDecodeError::InvalidSize);
401        }
402
403        let len = u16::from_be_bytes([bytes[0], bytes[1]]) as usize;
404        if len == 0 || len > MAX_INDEX_NAME_LEN {
405            return Err(IdentityDecodeError::InvalidLength);
406        }
407        if !bytes[2..2 + len].is_ascii() {
408            return Err(IdentityDecodeError::NonAscii);
409        }
410        if bytes[2 + len..].iter().any(|&b| b != 0) {
411            return Err(IdentityDecodeError::NonZeroPadding);
412        }
413
414        // Phase 2: materialize canonical fixed-buffer identity storage.
415        let mut name = [0u8; MAX_INDEX_NAME_LEN];
416        name.copy_from_slice(&bytes[2..]);
417
418        Ok(Self {
419            len: len as u16,
420            bytes: name,
421        })
422    }
423
424    // Append bytes into the fixed-size identity buffer while tracking write offset.
425    fn push_bytes(out: &mut [u8; MAX_INDEX_NAME_LEN], len: &mut usize, bytes: &[u8]) {
426        let end = *len + bytes.len();
427        out[*len..end].copy_from_slice(bytes);
428        *len = end;
429    }
430}
431
432fn index_name_slug(value: &str) -> String {
433    let separated = value
434        .chars()
435        .map(|ch| if ch.is_ascii_alphanumeric() { ch } else { '_' })
436        .collect::<String>();
437
438    to_snake_case(separated.as_str())
439}
440
441impl Ord for IndexName {
442    fn cmp(&self, other: &Self) -> Ordering {
443        self.to_bytes().cmp(&other.to_bytes())
444    }
445}
446
447impl PartialOrd for IndexName {
448    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
449        Some(self.cmp(other))
450    }
451}
452
453impl fmt::Debug for IndexName {
454    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
455        write!(f, "IndexName({})", self.as_str())
456    }
457}
458
459impl Display for IndexName {
460    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
461        f.write_str(self.as_str())
462    }
463}