Skip to main content

icydb_core/db/identity/
mod.rs

1//! Module: identity
2//! Responsibility: validated entity/index naming and stable byte ordering contracts.
3//! Does not own: schema metadata, relation policy, or storage-layer persistence.
4//! Boundary: all identity construction/decoding for db data/index key domains.
5//!
6//! Invariants:
7//! - Identities are ASCII, non-empty, and bounded by MAX_* limits.
8//! - All construction paths validate invariants.
9//! - Stored byte representation is canonical and order-preserving.
10//! - Ordering semantics follow the length-prefixed stored-byte layout, not
11//!   lexicographic string ordering.
12
13#![expect(clippy::cast_possible_truncation)]
14
15#[cfg(test)]
16mod tests;
17
18use crate::MAX_INDEX_FIELDS;
19use icydb_utils::to_snake_case;
20use std::{
21    cmp::Ordering,
22    fmt::{self, Display},
23};
24use thiserror::Error as ThisError;
25
26///
27/// Constants
28///
29
30pub(super) const MAX_ENTITY_NAME_LEN: usize = 64;
31pub(super) const MAX_INDEX_FIELD_NAME_LEN: usize = 64;
32const MAX_INDEX_NAME_PREFIX_LEN: usize = 5;
33const MAX_ENTITY_NAME_SLUG_LEN: usize = (MAX_ENTITY_NAME_LEN * 3) / 2;
34const MAX_INDEX_FIELD_NAME_SLUG_LEN: usize = (MAX_INDEX_FIELD_NAME_LEN * 3) / 2;
35pub(super) const MAX_INDEX_NAME_LEN: usize = MAX_INDEX_NAME_PREFIX_LEN
36    + MAX_ENTITY_NAME_SLUG_LEN
37    + 2
38    + (MAX_INDEX_FIELDS * MAX_INDEX_FIELD_NAME_SLUG_LEN)
39    + (MAX_INDEX_FIELDS - 1);
40const INDEX_NAME_SEGMENT_DELIMITER: u8 = b'|';
41const MAX_ASCII_BYTE: u8 = 0x7F;
42
43///
44/// IdentityDecodeError
45/// Decode errors (storage / corruption boundary)
46///
47
48#[derive(Debug, ThisError)]
49pub enum IdentityDecodeError {
50    #[error("invalid size")]
51    InvalidSize,
52
53    #[error("invalid length")]
54    InvalidLength,
55
56    #[error("non-ascii encoding")]
57    NonAscii,
58
59    #[error("non-zero padding")]
60    NonZeroPadding,
61
62    #[error("reserved identity delimiter")]
63    Delimiter,
64}
65
66///
67/// EntityNameError
68///
69
70#[derive(Debug, ThisError)]
71pub enum EntityNameError {
72    #[error("entity name is empty")]
73    Empty,
74
75    #[error("entity name length {len} exceeds max {max}")]
76    TooLong { len: usize, max: usize },
77
78    #[error("entity name must be ASCII")]
79    NonAscii,
80
81    #[error("entity name must not contain '|'")]
82    Delimiter,
83}
84
85///
86/// IndexNameError
87///
88
89#[derive(Debug, ThisError)]
90pub enum IndexNameError {
91    #[error("index has {len} fields (max {max})")]
92    TooManyFields { len: usize, max: usize },
93
94    #[error("index must reference at least one field")]
95    NoFields,
96
97    #[error("index field name is empty")]
98    FieldEmpty,
99
100    #[error("index field name '{field}' exceeds max length {max}")]
101    FieldTooLong { field: String, max: usize },
102
103    #[error("index field name '{field}' must be ASCII")]
104    FieldNonAscii { field: String },
105
106    #[error("index field name '{field}' must not contain '|'")]
107    FieldDelimiter { field: String },
108
109    #[error("index name length {len} exceeds max {max}")]
110    TooLong { len: usize, max: usize },
111}
112
113///
114/// EntityName
115///
116
117#[derive(Clone, Copy, Eq, Hash, PartialEq)]
118pub struct EntityName {
119    len: u8,
120    bytes: [u8; MAX_ENTITY_NAME_LEN],
121}
122
123impl EntityName {
124    /// Fixed on-disk size in bytes (stable, protocol-level)
125    pub const STORED_SIZE_BYTES: u64 = 1 + (MAX_ENTITY_NAME_LEN as u64);
126
127    /// Fixed in-memory size (for buffers and arrays)
128    pub const STORED_SIZE_USIZE: usize = Self::STORED_SIZE_BYTES as usize;
129
130    /// Validate and construct an entity name from one ASCII string.
131    pub fn try_from_str(name: &str) -> Result<Self, EntityNameError> {
132        // Phase 1: validate user-visible identity constraints.
133        let bytes = name.as_bytes();
134        let len = bytes.len();
135
136        if len == 0 {
137            return Err(EntityNameError::Empty);
138        }
139        if len > MAX_ENTITY_NAME_LEN {
140            return Err(EntityNameError::TooLong {
141                len,
142                max: MAX_ENTITY_NAME_LEN,
143            });
144        }
145        if !bytes.is_ascii() {
146            return Err(EntityNameError::NonAscii);
147        }
148        if bytes.contains(&INDEX_NAME_SEGMENT_DELIMITER) {
149            return Err(EntityNameError::Delimiter);
150        }
151
152        // Phase 2: write into fixed-size canonical storage.
153        let mut out = [0u8; MAX_ENTITY_NAME_LEN];
154        out[..len].copy_from_slice(bytes);
155
156        Ok(Self {
157            len: len as u8,
158            bytes: out,
159        })
160    }
161
162    /// Return the stored entity-name length.
163    #[must_use]
164    pub const fn len(&self) -> usize {
165        self.len as usize
166    }
167
168    /// Return whether the stored entity-name length is zero.
169    #[must_use]
170    pub const fn is_empty(&self) -> bool {
171        self.len() == 0
172    }
173
174    /// Borrow raw identity bytes excluding trailing fixed-buffer padding.
175    #[must_use]
176    pub fn as_bytes(&self) -> &[u8] {
177        &self.bytes[..self.len()]
178    }
179
180    /// Borrow the entity name as UTF-8 text.
181    #[must_use]
182    pub fn as_str(&self) -> &str {
183        // Invariant: construction and decoding enforce ASCII-only storage,
184        // so UTF-8 decoding cannot fail.
185        std::str::from_utf8(self.as_bytes()).expect("EntityName invariant: ASCII-only storage")
186    }
187
188    /// Encode this identity into its fixed-size persisted representation.
189    #[must_use]
190    pub fn to_bytes(self) -> [u8; Self::STORED_SIZE_USIZE] {
191        let mut out = [0u8; Self::STORED_SIZE_USIZE];
192        out[0] = self.len;
193        out[1..].copy_from_slice(&self.bytes);
194        out
195    }
196
197    /// Decode one fixed-size persisted entity identity payload.
198    pub fn from_bytes(bytes: &[u8]) -> Result<Self, IdentityDecodeError> {
199        // Phase 1: validate layout and payload bounds.
200        if bytes.len() != Self::STORED_SIZE_USIZE {
201            return Err(IdentityDecodeError::InvalidSize);
202        }
203
204        let len = bytes[0] as usize;
205        if len == 0 || len > MAX_ENTITY_NAME_LEN {
206            return Err(IdentityDecodeError::InvalidLength);
207        }
208        if !bytes[1..=len].is_ascii() {
209            return Err(IdentityDecodeError::NonAscii);
210        }
211        if bytes[1..=len].contains(&INDEX_NAME_SEGMENT_DELIMITER) {
212            return Err(IdentityDecodeError::Delimiter);
213        }
214        if bytes[1 + len..].iter().any(|&b| b != 0) {
215            return Err(IdentityDecodeError::NonZeroPadding);
216        }
217
218        // Phase 2: materialize canonical fixed-buffer identity storage.
219        let mut name = [0u8; MAX_ENTITY_NAME_LEN];
220        name.copy_from_slice(&bytes[1..]);
221
222        Ok(Self {
223            len: len as u8,
224            bytes: name,
225        })
226    }
227
228    /// Return a maximal sortable entity identity sentinel value.
229    #[must_use]
230    pub const fn max_storable() -> Self {
231        Self {
232            len: MAX_ENTITY_NAME_LEN as u8,
233            bytes: [MAX_ASCII_BYTE; MAX_ENTITY_NAME_LEN],
234        }
235    }
236}
237
238impl Ord for EntityName {
239    fn cmp(&self, other: &Self) -> Ordering {
240        // Keep ordering consistent with `to_bytes()` (length prefix first).
241        // This is deterministic protocol/storage ordering, not lexical string order.
242        self.len.cmp(&other.len).then(self.bytes.cmp(&other.bytes))
243    }
244}
245
246impl PartialOrd for EntityName {
247    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
248        Some(self.cmp(other))
249    }
250}
251
252impl Display for EntityName {
253    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
254        f.write_str(self.as_str())
255    }
256}
257
258impl fmt::Debug for EntityName {
259    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
260        write!(f, "EntityName({})", self.as_str())
261    }
262}
263
264///
265/// IndexName
266///
267
268#[derive(Clone, Copy, Eq, Hash, PartialEq)]
269pub struct IndexName {
270    len: u16,
271    bytes: [u8; MAX_INDEX_NAME_LEN],
272}
273
274impl IndexName {
275    /// Fixed on-disk size in bytes (stable, protocol-level).
276    pub const STORED_SIZE_BYTES: u64 = 2 + (MAX_INDEX_NAME_LEN as u64);
277    /// Fixed in-memory size (for buffers and arrays).
278    pub const STORED_SIZE_USIZE: usize = Self::STORED_SIZE_BYTES as usize;
279
280    /// Validate and construct one non-unique index identity from an entity +
281    /// field list.
282    pub fn try_from_parts(entity: &EntityName, fields: &[&str]) -> Result<Self, IndexNameError> {
283        Self::try_from_parts_with_prefix("idx", entity, fields)
284    }
285
286    /// Validate and construct one unique index identity from an entity + field
287    /// list.
288    pub fn try_unique_from_parts(
289        entity: &EntityName,
290        fields: &[&str],
291    ) -> Result<Self, IndexNameError> {
292        Self::try_from_parts_with_prefix("uniq", entity, fields)
293    }
294
295    fn try_from_parts_with_prefix(
296        prefix: &str,
297        entity: &EntityName,
298        fields: &[&str],
299    ) -> Result<Self, IndexNameError> {
300        // Phase 1: validate index-field count and per-field identity constraints.
301        if fields.is_empty() {
302            return Err(IndexNameError::NoFields);
303        }
304        if fields.len() > MAX_INDEX_FIELDS {
305            return Err(IndexNameError::TooManyFields {
306                len: fields.len(),
307                max: MAX_INDEX_FIELDS,
308            });
309        }
310
311        let mut field_slugs = Vec::with_capacity(fields.len());
312        for field in fields {
313            let field_len = field.len();
314            if field_len == 0 {
315                return Err(IndexNameError::FieldEmpty);
316            }
317            if field_len > MAX_INDEX_FIELD_NAME_LEN {
318                return Err(IndexNameError::FieldTooLong {
319                    field: (*field).to_string(),
320                    max: MAX_INDEX_FIELD_NAME_LEN,
321                });
322            }
323            if !field.is_ascii() {
324                return Err(IndexNameError::FieldNonAscii {
325                    field: (*field).to_string(),
326                });
327            }
328            if field.as_bytes().contains(&INDEX_NAME_SEGMENT_DELIMITER) {
329                return Err(IndexNameError::FieldDelimiter {
330                    field: (*field).to_string(),
331                });
332            }
333            let slug = index_name_slug(field);
334            if slug.is_empty() {
335                return Err(IndexNameError::FieldEmpty);
336            }
337            field_slugs.push(slug);
338        }
339
340        let entity_slug = index_name_slug(entity.as_str());
341        let total_len = prefix
342            .len()
343            .saturating_add(1)
344            .saturating_add(entity_slug.len())
345            .saturating_add(2)
346            .saturating_add(field_slugs.iter().map(String::len).sum::<usize>())
347            .saturating_add(field_slugs.len().saturating_sub(1));
348        if total_len > MAX_INDEX_NAME_LEN {
349            return Err(IndexNameError::TooLong {
350                len: total_len,
351                max: MAX_INDEX_NAME_LEN,
352            });
353        }
354
355        // Phase 2: encode canonical `idx_entity__field...` bytes into fixed storage.
356        let mut out = [0u8; MAX_INDEX_NAME_LEN];
357        let mut len = 0usize;
358
359        Self::push_bytes(&mut out, &mut len, prefix.as_bytes());
360        Self::push_bytes(&mut out, &mut len, b"_");
361        Self::push_bytes(&mut out, &mut len, entity_slug.as_bytes());
362        Self::push_bytes(&mut out, &mut len, b"__");
363        for (index, field_slug) in field_slugs.iter().enumerate() {
364            if index > 0 {
365                Self::push_bytes(&mut out, &mut len, b"_");
366            }
367            Self::push_bytes(&mut out, &mut len, field_slug.as_bytes());
368        }
369
370        Ok(Self {
371            len: len as u16,
372            bytes: out,
373        })
374    }
375
376    /// Borrow raw index-identity bytes excluding trailing fixed-buffer padding.
377    #[must_use]
378    pub fn as_bytes(&self) -> &[u8] {
379        &self.bytes[..self.len as usize]
380    }
381
382    /// Borrow the index identity as UTF-8 text.
383    #[must_use]
384    pub fn as_str(&self) -> &str {
385        // Invariant: construction and decoding enforce ASCII-only storage,
386        // so UTF-8 decoding cannot fail.
387        std::str::from_utf8(self.as_bytes()).expect("IndexName invariant: ASCII-only storage")
388    }
389
390    /// Encode this identity into its fixed-size persisted representation.
391    #[must_use]
392    pub fn to_bytes(self) -> [u8; Self::STORED_SIZE_USIZE] {
393        let mut out = [0u8; Self::STORED_SIZE_USIZE];
394        out[..2].copy_from_slice(&self.len.to_be_bytes());
395        out[2..].copy_from_slice(&self.bytes);
396        out
397    }
398
399    /// Decode one fixed-size persisted index identity payload.
400    ///
401    /// This validates the canonical fixed-width byte envelope only. It does not
402    /// reconstruct field segments or prove the bytes were produced by
403    /// `try_from_parts`; callers must ensure persisted bytes originate from a
404    /// previously validated `IndexName`.
405    pub fn from_bytes(bytes: &[u8]) -> Result<Self, IdentityDecodeError> {
406        // Phase 1: validate layout and payload bounds.
407        if bytes.len() != Self::STORED_SIZE_USIZE {
408            return Err(IdentityDecodeError::InvalidSize);
409        }
410
411        let len = u16::from_be_bytes([bytes[0], bytes[1]]) as usize;
412        if len == 0 || len > MAX_INDEX_NAME_LEN {
413            return Err(IdentityDecodeError::InvalidLength);
414        }
415        if !bytes[2..2 + len].is_ascii() {
416            return Err(IdentityDecodeError::NonAscii);
417        }
418        if bytes[2 + len..].iter().any(|&b| b != 0) {
419            return Err(IdentityDecodeError::NonZeroPadding);
420        }
421
422        // Phase 2: materialize canonical fixed-buffer identity storage.
423        let mut name = [0u8; MAX_INDEX_NAME_LEN];
424        name.copy_from_slice(&bytes[2..]);
425
426        Ok(Self {
427            len: len as u16,
428            bytes: name,
429        })
430    }
431
432    // Append bytes into the fixed-size identity buffer while tracking write offset.
433    fn push_bytes(out: &mut [u8; MAX_INDEX_NAME_LEN], len: &mut usize, bytes: &[u8]) {
434        let end = *len + bytes.len();
435        out[*len..end].copy_from_slice(bytes);
436        *len = end;
437    }
438
439    /// Return a maximal sortable index identity sentinel value.
440    #[must_use]
441    pub const fn max_storable() -> Self {
442        Self {
443            len: MAX_INDEX_NAME_LEN as u16,
444            bytes: [MAX_ASCII_BYTE; MAX_INDEX_NAME_LEN],
445        }
446    }
447}
448
449fn index_name_slug(value: &str) -> String {
450    let separated = value
451        .chars()
452        .map(|ch| if ch.is_ascii_alphanumeric() { ch } else { '_' })
453        .collect::<String>();
454
455    to_snake_case(separated.as_str())
456}
457
458impl Ord for IndexName {
459    fn cmp(&self, other: &Self) -> Ordering {
460        self.to_bytes().cmp(&other.to_bytes())
461    }
462}
463
464impl PartialOrd for IndexName {
465    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
466        Some(self.cmp(other))
467    }
468}
469
470impl fmt::Debug for IndexName {
471    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
472        write!(f, "IndexName({})", self.as_str())
473    }
474}
475
476impl Display for IndexName {
477    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
478        f.write_str(self.as_str())
479    }
480}