icydb-core 0.144.7

IcyDB — A schema-first typed query engine and persistence runtime for Internet Computer canisters
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
//! Module: identity
//! Responsibility: validated entity/index naming and stable byte ordering contracts.
//! Does not own: schema metadata, relation policy, or storage-layer persistence.
//! Boundary: all identity construction/decoding for db data/index key domains.
//!
//! Invariants:
//! - Identities are ASCII, non-empty, and bounded by MAX_* limits.
//! - All construction paths validate invariants.
//! - Stored byte representation is canonical and order-preserving.
//! - Ordering semantics follow the length-prefixed stored-byte layout, not
//!   lexicographic string ordering.

#![expect(clippy::cast_possible_truncation)]

#[cfg(test)]
mod tests;

use crate::MAX_INDEX_FIELDS;
use std::{
    cmp::Ordering,
    fmt::{self, Display},
};
use thiserror::Error as ThisError;

///
/// Constants
///

pub(super) const MAX_ENTITY_NAME_LEN: usize = 64;
pub(super) const MAX_INDEX_FIELD_NAME_LEN: usize = 64;
pub(super) const MAX_INDEX_NAME_LEN: usize =
    MAX_ENTITY_NAME_LEN + (MAX_INDEX_FIELDS * (MAX_INDEX_FIELD_NAME_LEN + 1));
const INDEX_NAME_SEGMENT_DELIMITER: u8 = b'|';
const MAX_ASCII_BYTE: u8 = 0x7F;

///
/// IdentityDecodeError
/// Decode errors (storage / corruption boundary)
///

#[derive(Debug, ThisError)]
pub enum IdentityDecodeError {
    #[error("invalid size")]
    InvalidSize,

    #[error("invalid length")]
    InvalidLength,

    #[error("non-ascii encoding")]
    NonAscii,

    #[error("non-zero padding")]
    NonZeroPadding,

    #[error("reserved identity delimiter")]
    Delimiter,
}

///
/// EntityNameError
///

#[derive(Debug, ThisError)]
pub enum EntityNameError {
    #[error("entity name is empty")]
    Empty,

    #[error("entity name length {len} exceeds max {max}")]
    TooLong { len: usize, max: usize },

    #[error("entity name must be ASCII")]
    NonAscii,

    #[error("entity name must not contain '|'")]
    Delimiter,
}

///
/// IndexNameError
///

#[derive(Debug, ThisError)]
pub enum IndexNameError {
    #[error("index has {len} fields (max {max})")]
    TooManyFields { len: usize, max: usize },

    #[error("index must reference at least one field")]
    NoFields,

    #[error("index field name is empty")]
    FieldEmpty,

    #[error("index field name '{field}' exceeds max length {max}")]
    FieldTooLong { field: String, max: usize },

    #[error("index field name '{field}' must be ASCII")]
    FieldNonAscii { field: String },

    #[error("index field name '{field}' must not contain '|'")]
    FieldDelimiter { field: String },

    #[error("index name length {len} exceeds max {max}")]
    TooLong { len: usize, max: usize },
}

///
/// EntityName
///

#[derive(Clone, Copy, Eq, Hash, PartialEq)]
pub struct EntityName {
    len: u8,
    bytes: [u8; MAX_ENTITY_NAME_LEN],
}

impl EntityName {
    /// Fixed on-disk size in bytes (stable, protocol-level)
    pub const STORED_SIZE_BYTES: u64 = 1 + (MAX_ENTITY_NAME_LEN as u64);

    /// Fixed in-memory size (for buffers and arrays)
    pub const STORED_SIZE_USIZE: usize = Self::STORED_SIZE_BYTES as usize;

    /// Validate and construct an entity name from one ASCII string.
    pub fn try_from_str(name: &str) -> Result<Self, EntityNameError> {
        // Phase 1: validate user-visible identity constraints.
        let bytes = name.as_bytes();
        let len = bytes.len();

        if len == 0 {
            return Err(EntityNameError::Empty);
        }
        if len > MAX_ENTITY_NAME_LEN {
            return Err(EntityNameError::TooLong {
                len,
                max: MAX_ENTITY_NAME_LEN,
            });
        }
        if !bytes.is_ascii() {
            return Err(EntityNameError::NonAscii);
        }
        if bytes.contains(&INDEX_NAME_SEGMENT_DELIMITER) {
            return Err(EntityNameError::Delimiter);
        }

        // Phase 2: write into fixed-size canonical storage.
        let mut out = [0u8; MAX_ENTITY_NAME_LEN];
        out[..len].copy_from_slice(bytes);

        Ok(Self {
            len: len as u8,
            bytes: out,
        })
    }

    /// Return the stored entity-name length.
    #[must_use]
    pub const fn len(&self) -> usize {
        self.len as usize
    }

    /// Return whether the stored entity-name length is zero.
    #[must_use]
    pub const fn is_empty(&self) -> bool {
        self.len() == 0
    }

    /// Borrow raw identity bytes excluding trailing fixed-buffer padding.
    #[must_use]
    pub fn as_bytes(&self) -> &[u8] {
        &self.bytes[..self.len()]
    }

    /// Borrow the entity name as UTF-8 text.
    #[must_use]
    pub fn as_str(&self) -> &str {
        // Invariant: construction and decoding enforce ASCII-only storage,
        // so UTF-8 decoding cannot fail.
        std::str::from_utf8(self.as_bytes()).expect("EntityName invariant: ASCII-only storage")
    }

    /// Encode this identity into its fixed-size persisted representation.
    #[must_use]
    pub fn to_bytes(self) -> [u8; Self::STORED_SIZE_USIZE] {
        let mut out = [0u8; Self::STORED_SIZE_USIZE];
        out[0] = self.len;
        out[1..].copy_from_slice(&self.bytes);
        out
    }

    /// Decode one fixed-size persisted entity identity payload.
    pub fn from_bytes(bytes: &[u8]) -> Result<Self, IdentityDecodeError> {
        // Phase 1: validate layout and payload bounds.
        if bytes.len() != Self::STORED_SIZE_USIZE {
            return Err(IdentityDecodeError::InvalidSize);
        }

        let len = bytes[0] as usize;
        if len == 0 || len > MAX_ENTITY_NAME_LEN {
            return Err(IdentityDecodeError::InvalidLength);
        }
        if !bytes[1..=len].is_ascii() {
            return Err(IdentityDecodeError::NonAscii);
        }
        if bytes[1..=len].contains(&INDEX_NAME_SEGMENT_DELIMITER) {
            return Err(IdentityDecodeError::Delimiter);
        }
        if bytes[1 + len..].iter().any(|&b| b != 0) {
            return Err(IdentityDecodeError::NonZeroPadding);
        }

        // Phase 2: materialize canonical fixed-buffer identity storage.
        let mut name = [0u8; MAX_ENTITY_NAME_LEN];
        name.copy_from_slice(&bytes[1..]);

        Ok(Self {
            len: len as u8,
            bytes: name,
        })
    }

    /// Return a maximal sortable entity identity sentinel value.
    #[must_use]
    pub const fn max_storable() -> Self {
        Self {
            len: MAX_ENTITY_NAME_LEN as u8,
            bytes: [MAX_ASCII_BYTE; MAX_ENTITY_NAME_LEN],
        }
    }
}

impl Ord for EntityName {
    fn cmp(&self, other: &Self) -> Ordering {
        // Keep ordering consistent with `to_bytes()` (length prefix first).
        // This is deterministic protocol/storage ordering, not lexical string order.
        self.len.cmp(&other.len).then(self.bytes.cmp(&other.bytes))
    }
}

impl PartialOrd for EntityName {
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
        Some(self.cmp(other))
    }
}

impl Display for EntityName {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        f.write_str(self.as_str())
    }
}

impl fmt::Debug for EntityName {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(f, "EntityName({})", self.as_str())
    }
}

///
/// IndexName
///

#[derive(Clone, Copy, Eq, Hash, PartialEq)]
pub struct IndexName {
    len: u16,
    bytes: [u8; MAX_INDEX_NAME_LEN],
}

impl IndexName {
    /// Fixed on-disk size in bytes (stable, protocol-level).
    pub const STORED_SIZE_BYTES: u64 = 2 + (MAX_INDEX_NAME_LEN as u64);
    /// Fixed in-memory size (for buffers and arrays).
    pub const STORED_SIZE_USIZE: usize = Self::STORED_SIZE_BYTES as usize;

    /// Validate and construct one index identity from an entity + field list.
    pub fn try_from_parts(entity: &EntityName, fields: &[&str]) -> Result<Self, IndexNameError> {
        // Phase 1: validate index-field count and per-field identity constraints.
        if fields.is_empty() {
            return Err(IndexNameError::NoFields);
        }
        if fields.len() > MAX_INDEX_FIELDS {
            return Err(IndexNameError::TooManyFields {
                len: fields.len(),
                max: MAX_INDEX_FIELDS,
            });
        }

        let mut total_len = entity.len();
        for field in fields {
            let field_len = field.len();
            if field_len == 0 {
                return Err(IndexNameError::FieldEmpty);
            }
            if field_len > MAX_INDEX_FIELD_NAME_LEN {
                return Err(IndexNameError::FieldTooLong {
                    field: (*field).to_string(),
                    max: MAX_INDEX_FIELD_NAME_LEN,
                });
            }
            if !field.is_ascii() {
                return Err(IndexNameError::FieldNonAscii {
                    field: (*field).to_string(),
                });
            }
            if field.as_bytes().contains(&INDEX_NAME_SEGMENT_DELIMITER) {
                return Err(IndexNameError::FieldDelimiter {
                    field: (*field).to_string(),
                });
            }
            total_len = total_len.saturating_add(1 + field_len);
        }

        if total_len > MAX_INDEX_NAME_LEN {
            return Err(IndexNameError::TooLong {
                len: total_len,
                max: MAX_INDEX_NAME_LEN,
            });
        }

        // Phase 2: encode canonical `entity|field...` bytes into fixed storage.
        let mut out = [0u8; MAX_INDEX_NAME_LEN];
        let mut len = 0usize;

        Self::push_bytes(&mut out, &mut len, entity.as_bytes());
        for field in fields {
            Self::push_bytes(&mut out, &mut len, b"|");
            Self::push_bytes(&mut out, &mut len, field.as_bytes());
        }

        Ok(Self {
            len: len as u16,
            bytes: out,
        })
    }

    /// Borrow raw index-identity bytes excluding trailing fixed-buffer padding.
    #[must_use]
    pub fn as_bytes(&self) -> &[u8] {
        &self.bytes[..self.len as usize]
    }

    /// Borrow the index identity as UTF-8 text.
    #[must_use]
    pub fn as_str(&self) -> &str {
        // Invariant: construction and decoding enforce ASCII-only storage,
        // so UTF-8 decoding cannot fail.
        std::str::from_utf8(self.as_bytes()).expect("IndexName invariant: ASCII-only storage")
    }

    /// Encode this identity into its fixed-size persisted representation.
    #[must_use]
    pub fn to_bytes(self) -> [u8; Self::STORED_SIZE_USIZE] {
        let mut out = [0u8; Self::STORED_SIZE_USIZE];
        out[..2].copy_from_slice(&self.len.to_be_bytes());
        out[2..].copy_from_slice(&self.bytes);
        out
    }

    /// Decode one fixed-size persisted index identity payload.
    ///
    /// This validates the canonical fixed-width byte envelope only. It does not
    /// reconstruct field segments or prove the bytes were produced by
    /// `try_from_parts`; callers must ensure persisted bytes originate from a
    /// previously validated `IndexName`.
    pub fn from_bytes(bytes: &[u8]) -> Result<Self, IdentityDecodeError> {
        // Phase 1: validate layout and payload bounds.
        if bytes.len() != Self::STORED_SIZE_USIZE {
            return Err(IdentityDecodeError::InvalidSize);
        }

        let len = u16::from_be_bytes([bytes[0], bytes[1]]) as usize;
        if len == 0 || len > MAX_INDEX_NAME_LEN {
            return Err(IdentityDecodeError::InvalidLength);
        }
        if !bytes[2..2 + len].is_ascii() {
            return Err(IdentityDecodeError::NonAscii);
        }
        if bytes[2 + len..].iter().any(|&b| b != 0) {
            return Err(IdentityDecodeError::NonZeroPadding);
        }

        // Phase 2: materialize canonical fixed-buffer identity storage.
        let mut name = [0u8; MAX_INDEX_NAME_LEN];
        name.copy_from_slice(&bytes[2..]);

        Ok(Self {
            len: len as u16,
            bytes: name,
        })
    }

    // Append bytes into the fixed-size identity buffer while tracking write offset.
    fn push_bytes(out: &mut [u8; MAX_INDEX_NAME_LEN], len: &mut usize, bytes: &[u8]) {
        let end = *len + bytes.len();
        out[*len..end].copy_from_slice(bytes);
        *len = end;
    }

    /// Return a maximal sortable index identity sentinel value.
    #[must_use]
    pub const fn max_storable() -> Self {
        Self {
            len: MAX_INDEX_NAME_LEN as u16,
            bytes: [MAX_ASCII_BYTE; MAX_INDEX_NAME_LEN],
        }
    }
}

impl Ord for IndexName {
    fn cmp(&self, other: &Self) -> Ordering {
        self.to_bytes().cmp(&other.to_bytes())
    }
}

impl PartialOrd for IndexName {
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
        Some(self.cmp(other))
    }
}

impl fmt::Debug for IndexName {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(f, "IndexName({})", self.as_str())
    }
}

impl Display for IndexName {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        f.write_str(self.as_str())
    }
}