icydb-core 0.76.10

IcyDB — A type-safe, embedded ORM and schema system for the Internet Computer
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
//! Module: data::structural_row
//! Responsibility: canonical structural persisted-row decode helpers.
//! Does not own: typed entity reconstruction, slot layout planning, or query semantics.
//! Boundary: runtime paths use this module when they need persisted-row structure without `E`.

use crate::{
    db::{codec::ROW_FORMAT_VERSION_CURRENT, data::RawRow},
    error::InternalError,
    model::{entity::EntityModel, field::FieldModel},
};
use serde_cbor::Value as CborValue;
use std::borrow::Cow;
use thiserror::Error as ThisError;

type SlotSpan = Option<(usize, usize)>;
type SlotSpans = Vec<SlotSpan>;
type RowFieldSpans<'a> = (Cow<'a, [u8]>, SlotSpans);

///
/// StructuralRowContract
///
/// StructuralRowContract is the compact static row-shape authority used by
/// structural row readers that do not need the full semantic `EntityModel`.
/// It keeps only the entity path, field table, and primary-key slot required
/// to open canonical persisted rows through the data-layer decode boundary.
///

#[derive(Clone, Copy, Debug)]
pub(in crate::db) struct StructuralRowContract {
    entity_path: &'static str,
    fields: &'static [FieldModel],
    primary_key_slot: usize,
}

impl StructuralRowContract {
    /// Build one structural row contract from the generated entity model.
    #[must_use]
    pub(in crate::db) const fn from_model(model: &'static EntityModel) -> Self {
        Self {
            entity_path: model.path(),
            fields: model.fields(),
            primary_key_slot: model.primary_key_slot(),
        }
    }

    /// Borrow the owning entity path for diagnostics.
    #[must_use]
    pub(in crate::db) const fn entity_path(self) -> &'static str {
        self.entity_path
    }

    /// Borrow the static field table for slot-indexed decode.
    #[must_use]
    pub(in crate::db) const fn fields(self) -> &'static [FieldModel] {
        self.fields
    }

    /// Return the declared structural field count.
    #[must_use]
    pub(in crate::db) const fn field_count(self) -> usize {
        self.fields.len()
    }

    /// Return the authoritative primary-key slot.
    #[must_use]
    pub(in crate::db) const fn primary_key_slot(self) -> usize {
        self.primary_key_slot
    }
}

///
/// StructuralRowFieldBytes
///
/// StructuralRowFieldBytes is the top-level persisted-row field scanner for
/// slot-driven proof paths.
/// It keeps the original encoded field payload bytes and records one byte span
/// per model slot so callers can decode only the fields they actually need.
///

#[derive(Clone, Debug)]
pub(in crate::db) struct StructuralRowFieldBytes<'a> {
    payload: Cow<'a, [u8]>,
    spans: SlotSpans,
}

impl<'a> StructuralRowFieldBytes<'a> {
    /// Decode one raw row payload into contract slot-aligned encoded field spans.
    pub(in crate::db) fn from_row_bytes_with_contract(
        row_bytes: &'a [u8],
        contract: StructuralRowContract,
    ) -> Result<Self, StructuralRowDecodeError> {
        let payload = decode_structural_row_payload_bytes(row_bytes)?;
        let (payload, spans) = decode_row_field_spans(payload, contract)?;

        Ok(Self { payload, spans })
    }

    /// Decode one raw row into model slot-aligned encoded field payload spans.
    pub(in crate::db) fn from_raw_row(
        raw_row: &'a RawRow,
        model: &'static EntityModel,
    ) -> Result<Self, StructuralRowDecodeError> {
        Self::from_raw_row_with_contract(raw_row, StructuralRowContract::from_model(model))
    }

    /// Decode one raw row into contract slot-aligned encoded field payload spans.
    pub(in crate::db) fn from_raw_row_with_contract(
        raw_row: &'a RawRow,
        contract: StructuralRowContract,
    ) -> Result<Self, StructuralRowDecodeError> {
        Self::from_row_bytes_with_contract(raw_row.as_bytes(), contract)
    }

    /// Borrow one encoded persisted field payload by stable slot index.
    #[must_use]
    pub(in crate::db) fn field(&self, slot: usize) -> Option<&[u8]> {
        let (start, end) = self.spans.get(slot).copied().flatten()?;

        Some(&self.payload[start..end])
    }
}

///
/// StructuralRowDecodeError
///
/// StructuralRowDecodeError captures shape failures after persisted-row bytes
/// have already decoded successfully through the shared structural CBOR path.
///

#[derive(Debug, ThisError)]
pub(in crate::db) enum StructuralRowDecodeError {
    #[error(transparent)]
    Deserialize(#[from] InternalError),
}

impl StructuralRowDecodeError {
    // Collapse the local structural decode wrapper back into the internal taxonomy.
    pub(in crate::db) fn into_internal_error(self) -> InternalError {
        match self {
            Self::Deserialize(err) => err,
        }
    }

    // Build one structural row corruption error at the manual decode boundary.
    fn corruption(message: impl Into<String>) -> Self {
        Self::Deserialize(InternalError::serialize_corruption(message.into()))
    }

    // Build one structural row compatibility error at the manual decode boundary.
    fn incompatible_persisted_format(message: impl Into<String>) -> Self {
        Self::Deserialize(InternalError::serialize_incompatible_persisted_format(
            message.into(),
        ))
    }
}

/// Decode one persisted row through the structural row-envelope validation path.
///
/// The only supported persisted row shape is the slot-framed payload envelope,
/// so this helper returns the validated slot payload bytes as `CborValue::Bytes`.
pub(in crate::db) fn decode_structural_row_cbor(
    raw_row: &RawRow,
) -> Result<CborValue, InternalError> {
    let payload = decode_structural_row_payload_bytes(raw_row.as_bytes())
        .map_err(StructuralRowDecodeError::into_internal_error)?;

    Ok(CborValue::Bytes(payload.into_owned()))
}

// Decode one persisted row envelope into the enclosed slot payload bytes.
fn decode_structural_row_payload_bytes(
    bytes: &[u8],
) -> Result<Cow<'_, [u8]>, StructuralRowDecodeError> {
    let Some((major, argument, mut cursor)) = parse_cbor_head(bytes, 0)? else {
        return Err(StructuralRowDecodeError::corruption(
            "row decode: empty row envelope",
        ));
    };
    if major != 4 || argument != 2 {
        return Err(StructuralRowDecodeError::corruption(
            "row decode: expected row envelope array[2]",
        ));
    }

    let Some((version_major, version_argument, version_end)) = parse_cbor_head(bytes, cursor)?
    else {
        return Err(StructuralRowDecodeError::corruption(
            "row decode: missing row format version",
        ));
    };
    if version_major != 0 {
        return Err(StructuralRowDecodeError::corruption(
            "row decode: row format version is not an unsigned integer",
        ));
    }
    let version = u8::try_from(version_argument).map_err(|_| {
        StructuralRowDecodeError::corruption("row decode: row format version out of range")
    })?;
    validate_structural_row_format_version(version)?;
    cursor = version_end;

    let Some((payload_major, payload_argument, payload_start)) = parse_cbor_head(bytes, cursor)?
    else {
        return Err(StructuralRowDecodeError::corruption(
            "row decode: missing row payload",
        ));
    };
    let payload = match payload_major {
        2 => {
            let payload_len = usize::try_from(payload_argument).map_err(|_| {
                StructuralRowDecodeError::corruption("row decode: payload length out of range")
            })?;
            let payload_end = payload_start.checked_add(payload_len).ok_or_else(|| {
                StructuralRowDecodeError::corruption("row decode: payload length overflow")
            })?;
            if payload_end != bytes.len() {
                return Err(StructuralRowDecodeError::corruption(
                    "row decode: trailing bytes after payload",
                ));
            }

            Cow::Borrowed(&bytes[payload_start..payload_end])
        }
        4 => {
            let payload_len = usize::try_from(payload_argument).map_err(|_| {
                StructuralRowDecodeError::corruption(
                    "row decode: payload array length out of range",
                )
            })?;
            let mut payload = Vec::with_capacity(payload_len);
            let mut payload_cursor = payload_start;

            for _ in 0..payload_len {
                let Some((byte_major, byte_argument, next_cursor)) =
                    parse_cbor_head(bytes, payload_cursor)?
                else {
                    return Err(StructuralRowDecodeError::corruption(
                        "row decode: truncated payload byte array",
                    ));
                };
                if byte_major != 0 {
                    return Err(StructuralRowDecodeError::corruption(
                        "row decode: payload byte array contains non-integer element",
                    ));
                }
                let byte = u8::try_from(byte_argument).map_err(|_| {
                    StructuralRowDecodeError::corruption(
                        "row decode: payload byte array element out of range",
                    )
                })?;
                payload.push(byte);
                payload_cursor = next_cursor;
            }

            if payload_cursor != bytes.len() {
                return Err(StructuralRowDecodeError::corruption(
                    "row decode: trailing bytes after payload byte array",
                ));
            }

            Cow::Owned(payload)
        }
        _ => {
            return Err(StructuralRowDecodeError::corruption(
                "row decode: payload is not a byte string",
            ));
        }
    };

    Ok(payload)
}

// Decode the canonical slot-container header into slot-aligned payload spans.
fn decode_row_field_spans(
    payload: Cow<'_, [u8]>,
    contract: StructuralRowContract,
) -> Result<RowFieldSpans<'_>, StructuralRowDecodeError> {
    let bytes = payload.as_ref();
    let field_count_bytes = bytes
        .get(..2)
        .ok_or_else(|| StructuralRowDecodeError::corruption("row decode: truncated slot header"))?;
    let field_count = usize::from(u16::from_be_bytes([
        field_count_bytes[0],
        field_count_bytes[1],
    ]));
    if field_count != contract.field_count() {
        return Err(StructuralRowDecodeError::corruption(format!(
            "row decode: slot count mismatch: expected {}, found {}",
            contract.field_count(),
            field_count,
        )));
    }
    let table_len = field_count
        .checked_mul(8)
        .ok_or_else(|| StructuralRowDecodeError::corruption("row decode: slot table overflow"))?;
    let data_start = 2usize.checked_add(table_len).ok_or_else(|| {
        StructuralRowDecodeError::corruption("row decode: slot payload header overflow")
    })?;
    let table = bytes
        .get(2..data_start)
        .ok_or_else(|| StructuralRowDecodeError::corruption("row decode: truncated slot table"))?;
    let data_section = bytes
        .get(data_start..)
        .ok_or_else(|| StructuralRowDecodeError::corruption("row decode: missing slot payloads"))?;
    let mut spans: SlotSpans = vec![None; contract.field_count()];

    for (slot, span) in spans.iter_mut().enumerate() {
        let entry_start = slot.checked_mul(8).ok_or_else(|| {
            StructuralRowDecodeError::corruption("row decode: slot index overflow")
        })?;
        let entry = table.get(entry_start..entry_start + 8).ok_or_else(|| {
            StructuralRowDecodeError::corruption("row decode: truncated slot table entry")
        })?;
        let start = usize::try_from(u32::from_be_bytes([entry[0], entry[1], entry[2], entry[3]]))
            .map_err(|_| {
            StructuralRowDecodeError::corruption("row decode: slot start out of range")
        })?;
        let len = usize::try_from(u32::from_be_bytes([entry[4], entry[5], entry[6], entry[7]]))
            .map_err(|_| {
                StructuralRowDecodeError::corruption("row decode: slot length out of range")
            })?;
        if len == 0 {
            return Err(StructuralRowDecodeError::corruption(format!(
                "row decode: missing slot payload: slot={slot}",
            )));
        }
        let end = start.checked_add(len).ok_or_else(|| {
            StructuralRowDecodeError::corruption("row decode: slot span overflow")
        })?;
        if end > data_section.len() {
            return Err(StructuralRowDecodeError::corruption(
                "row decode: slot span exceeds payload length",
            ));
        }
        *span = Some((start, end));
    }

    let payload = match payload {
        Cow::Borrowed(bytes) => Cow::Borrowed(&bytes[data_start..]),
        Cow::Owned(bytes) => Cow::Owned(bytes[data_start..].to_vec()),
    };

    Ok((payload, spans))
}

// Parse one CBOR head into `(major, argument, payload_cursor)` while rejecting
// indefinite-length encodings from persisted rows.
fn parse_cbor_head(
    bytes: &[u8],
    cursor: usize,
) -> Result<Option<(u8, u64, usize)>, StructuralRowDecodeError> {
    let Some(&first) = bytes.get(cursor) else {
        return Ok(None);
    };
    let major = first >> 5;
    let additional = first & 0x1f;
    let mut next_cursor = cursor + 1;

    let argument = match additional {
        value @ 0..=23 => u64::from(value),
        24 => {
            let value = *bytes.get(next_cursor).ok_or_else(|| {
                StructuralRowDecodeError::corruption("row decode: truncated CBOR head")
            })?;
            next_cursor += 1;

            u64::from(value)
        }
        25 => {
            let bytes = bytes.get(next_cursor..next_cursor + 2).ok_or_else(|| {
                StructuralRowDecodeError::corruption("row decode: truncated CBOR head")
            })?;
            next_cursor += 2;

            u64::from(u16::from_be_bytes([bytes[0], bytes[1]]))
        }
        26 => {
            let bytes = bytes.get(next_cursor..next_cursor + 4).ok_or_else(|| {
                StructuralRowDecodeError::corruption("row decode: truncated CBOR head")
            })?;
            next_cursor += 4;

            u64::from(u32::from_be_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]))
        }
        27 => {
            let bytes = bytes.get(next_cursor..next_cursor + 8).ok_or_else(|| {
                StructuralRowDecodeError::corruption("row decode: truncated CBOR head")
            })?;
            next_cursor += 8;

            u64::from_be_bytes([
                bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7],
            ])
        }
        31 => {
            return Err(StructuralRowDecodeError::corruption(
                "row decode: indefinite-length CBOR is unsupported in persisted rows",
            ));
        }
        _ => {
            return Err(StructuralRowDecodeError::corruption(
                "row decode: invalid CBOR additional info",
            ));
        }
    };

    Ok(Some((major, argument, next_cursor)))
}

// Validate the manually decoded persisted row format version.
fn validate_structural_row_format_version(
    format_version: u8,
) -> Result<(), StructuralRowDecodeError> {
    if format_version == ROW_FORMAT_VERSION_CURRENT {
        return Ok(());
    }

    Err(StructuralRowDecodeError::incompatible_persisted_format(
        format!(
            "row format version {format_version} is unsupported by runtime version {ROW_FORMAT_VERSION_CURRENT}",
        ),
    ))
}