Skip to main content

mimir_core/
canonical.rs

1//! Canonical bytecode encoder and decoder.
2//!
3//! Implements the on-disk format specified in
4//! `docs/concepts/ir-canonical-form.md`. Every record is framed
5//! `[1 byte opcode][varint body length][body]`. Unknown opcodes are
6//! errors, not silently skipped — this differs from the spec's
7//! forward-compatibility contract, which is reserved for the extension
8//! opcode `0xFF` (not implemented in this milestone).
9
10use std::fmt;
11
12use thiserror::Error;
13
14use crate::clock::ClockTime;
15use crate::confidence::Confidence;
16use crate::symbol::{SymbolId, SymbolKind};
17use crate::value::Value;
18
19// -------------------------------------------------------------------
20// Opcodes
21// -------------------------------------------------------------------
22
23/// Record opcode per `ir-canonical-form.md` § 4.
24#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
25#[repr(u8)]
26pub enum Opcode {
27    /// Semantic memory record.
28    Sem = 0x01,
29    /// Episodic memory record.
30    Epi = 0x02,
31    /// Procedural memory record.
32    Pro = 0x03,
33    /// Inferential memory record.
34    Inf = 0x04,
35    /// Supersession edge.
36    Supersedes = 0x10,
37    /// Episodic correction edge.
38    Corrects = 0x11,
39    /// Inferential stale-parent edge.
40    StaleParent = 0x12,
41    /// Inferential reconfirmation edge.
42    Reconfirms = 0x13,
43    /// Episode boundary marker.
44    Checkpoint = 0x20,
45    /// Episode metadata record (label / parent / retracts). Written
46    /// by the store immediately before the `Checkpoint` for any
47    /// batch that contains an `(episode :start ...)` form. See
48    /// `episode-semantics.md` § 4.2.
49    EpisodeMeta = 0x21,
50    /// New symbol allocation.
51    SymbolAlloc = 0x30,
52    /// Rename edge.
53    SymbolRename = 0x31,
54    /// Alias edge.
55    SymbolAlias = 0x32,
56    /// Retirement flag set.
57    SymbolRetire = 0x33,
58    /// Retirement flag cleared.
59    SymbolUnretire = 0x34,
60    /// Pin flag set (suspends decay).
61    Pin = 0x35,
62    /// Pin flag cleared.
63    Unpin = 0x36,
64    /// Operator-authoritative flag set.
65    AuthoritativeSet = 0x37,
66    /// Operator-authoritative flag cleared.
67    AuthoritativeClear = 0x38,
68}
69
70impl Opcode {
71    fn from_byte(byte: u8) -> Option<Self> {
72        Some(match byte {
73            0x01 => Self::Sem,
74            0x02 => Self::Epi,
75            0x03 => Self::Pro,
76            0x04 => Self::Inf,
77            0x10 => Self::Supersedes,
78            0x11 => Self::Corrects,
79            0x12 => Self::StaleParent,
80            0x13 => Self::Reconfirms,
81            0x20 => Self::Checkpoint,
82            0x21 => Self::EpisodeMeta,
83            0x30 => Self::SymbolAlloc,
84            0x31 => Self::SymbolRename,
85            0x32 => Self::SymbolAlias,
86            0x33 => Self::SymbolRetire,
87            0x34 => Self::SymbolUnretire,
88            0x35 => Self::Pin,
89            0x36 => Self::Unpin,
90            0x37 => Self::AuthoritativeSet,
91            0x38 => Self::AuthoritativeClear,
92            _ => return None,
93        })
94    }
95}
96
97// -------------------------------------------------------------------
98// Clocks and flags
99// -------------------------------------------------------------------
100
101/// Null sentinel for `invalid_at` — `u64::MAX` per `ir-canonical-form.md`
102/// § 3.1. Encoded as 8-byte LE `0xFFFF_FFFF_FFFF_FFFF`; decoded back to
103/// `None`.
104const NONE_SENTINEL: u64 = u64::MAX;
105
106/// Semantic-memory flags — `projected` only per
107/// `ir-canonical-form.md` § 5.1. On the wire: a single byte with
108/// bit 0 = `projected`, bit 1 reserved (must be 0).
109#[derive(Copy, Clone, Debug, PartialEq, Eq, Default)]
110pub struct SemFlags {
111    /// `true` if the memory's `valid_at` is a future projection.
112    pub projected: bool,
113}
114
115impl SemFlags {
116    fn to_u8(self) -> u8 {
117        u8::from(self.projected)
118    }
119
120    fn try_from_u8(b: u8, offset: usize) -> Result<Self, DecodeError> {
121        const ALLOWED_MASK: u8 = 0b0000_0001;
122        if b & !ALLOWED_MASK != 0 {
123            return Err(DecodeError::InvalidFlagBits {
124                byte: b,
125                allowed_mask: ALLOWED_MASK,
126                offset,
127            });
128        }
129
130        Ok(Self {
131            projected: b & (1 << 0) != 0,
132        })
133    }
134}
135
136/// Inferential-memory flags — carries both `projected` and `stale`
137/// (the latter is Inferential-only per `temporal-model.md` § 5.4).
138/// On the wire: one byte with bit 0 = `projected`, bit 1 = `stale`.
139#[derive(Copy, Clone, Debug, PartialEq, Eq, Default)]
140pub struct InfFlags {
141    /// `true` if the memory's `valid_at` is a future projection.
142    pub projected: bool,
143    /// Set when the Inferential was derived from an already-
144    /// superseded parent at write time (spec § 5.4). Runtime
145    /// staleness (any incoming `StaleParent` edge) is a read-time
146    /// overlay, not this flag.
147    pub stale: bool,
148}
149
150impl InfFlags {
151    fn to_u8(self) -> u8 {
152        let mut b = 0_u8;
153        if self.projected {
154            b |= 1 << 0;
155        }
156        if self.stale {
157            b |= 1 << 1;
158        }
159        b
160    }
161
162    fn try_from_u8(b: u8, offset: usize) -> Result<Self, DecodeError> {
163        const ALLOWED_MASK: u8 = 0b0000_0011;
164        if b & !ALLOWED_MASK != 0 {
165            return Err(DecodeError::InvalidFlagBits {
166                byte: b,
167                allowed_mask: ALLOWED_MASK,
168                offset,
169            });
170        }
171
172        Ok(Self {
173            projected: b & (1 << 0) != 0,
174            stale: b & (1 << 1) != 0,
175        })
176    }
177}
178
179/// Four clocks plus the projection/stale flags, shared across the four
180/// memory record shapes. Episodic adds `at_time` separately.
181#[derive(Copy, Clone, Debug, PartialEq, Eq)]
182pub struct Clocks {
183    /// When the fact becomes true in the world.
184    pub valid_at: ClockTime,
185    /// When the librarian observed the memory. For non-Episodic
186    /// memories this equals `committed_at`.
187    pub observed_at: ClockTime,
188    /// When the librarian durably committed the record.
189    pub committed_at: ClockTime,
190    /// When the fact stopped being true. `None` while current.
191    pub invalid_at: Option<ClockTime>,
192}
193
194// -------------------------------------------------------------------
195// Record variants
196// -------------------------------------------------------------------
197
198/// Semantic memory record.
199#[derive(Clone, Debug, PartialEq)]
200pub struct SemRecord {
201    /// Memory ID.
202    pub memory_id: SymbolId,
203    /// Subject.
204    pub s: SymbolId,
205    /// Predicate.
206    pub p: SymbolId,
207    /// Object value.
208    pub o: Value,
209    /// Source.
210    pub source: SymbolId,
211    /// Stored confidence.
212    pub confidence: Confidence,
213    /// Four clocks.
214    pub clocks: Clocks,
215    /// Flags — projected only for Semantic.
216    pub flags: SemFlags,
217}
218
219/// Episodic memory record. No flags — Episodic neither projects nor
220/// stales (the `projected` bit is Semantic / Inferential only, and
221/// `stale` is Inferential only). Dropped from the wire as part of
222/// the schema bump.
223#[derive(Clone, Debug, PartialEq)]
224pub struct EpiRecord {
225    /// Memory ID.
226    pub memory_id: SymbolId,
227    /// Event ID.
228    pub event_id: SymbolId,
229    /// Event-type symbol.
230    pub kind: SymbolId,
231    /// Participants.
232    pub participants: Vec<SymbolId>,
233    /// Location.
234    pub location: SymbolId,
235    /// Event time.
236    pub at_time: ClockTime,
237    /// Observation time.
238    pub observed_at: ClockTime,
239    /// Source.
240    pub source: SymbolId,
241    /// Stored confidence.
242    pub confidence: Confidence,
243    /// Librarian-assigned commit time.
244    pub committed_at: ClockTime,
245    /// Supersession time (sentinel when current — Episodic rarely
246    /// sets this; the librarian may still record it for consistency).
247    pub invalid_at: Option<ClockTime>,
248}
249
250/// Procedural memory record. No flags — Procedural doesn't project
251/// or stale. Dropped from the wire.
252#[derive(Clone, Debug, PartialEq)]
253pub struct ProRecord {
254    /// Memory ID.
255    pub memory_id: SymbolId,
256    /// Rule ID.
257    pub rule_id: SymbolId,
258    /// Trigger.
259    pub trigger: Value,
260    /// Action.
261    pub action: Value,
262    /// Optional precondition.
263    pub precondition: Option<Value>,
264    /// Scope.
265    pub scope: SymbolId,
266    /// Source.
267    pub source: SymbolId,
268    /// Stored confidence.
269    pub confidence: Confidence,
270    /// Four clocks.
271    pub clocks: Clocks,
272}
273
274/// Inferential memory record.
275#[derive(Clone, Debug, PartialEq)]
276pub struct InfRecord {
277    /// Memory ID.
278    pub memory_id: SymbolId,
279    /// Subject.
280    pub s: SymbolId,
281    /// Predicate.
282    pub p: SymbolId,
283    /// Object.
284    pub o: Value,
285    /// Parent memories.
286    pub derived_from: Vec<SymbolId>,
287    /// Inference method.
288    pub method: SymbolId,
289    /// Stored confidence.
290    pub confidence: Confidence,
291    /// Four clocks.
292    pub clocks: Clocks,
293    /// Flags — projection + stale. Inferential is the only record
294    /// that carries both.
295    pub flags: InfFlags,
296}
297
298/// Supersession-family edge record. Shared shape for `SUPERSEDES` /
299/// `CORRECTS` / `STALE_PARENT` / `RECONFIRMS`.
300#[derive(Clone, Copy, Debug, PartialEq, Eq)]
301pub struct EdgeRecord {
302    /// Source memory ID.
303    pub from: SymbolId,
304    /// Target memory ID.
305    pub to: SymbolId,
306    /// Timestamp the edge was applied.
307    pub at: ClockTime,
308}
309
310/// Episode boundary marker.
311#[derive(Clone, Copy, Debug, PartialEq, Eq)]
312pub struct CheckpointRecord {
313    /// Episode ID.
314    pub episode_id: SymbolId,
315    /// Commit time of the episode.
316    pub at: ClockTime,
317    /// Number of memory records that are members of this episode.
318    pub memory_count: u64,
319}
320
321/// Episode metadata record — extends the mechanical `Checkpoint` with
322/// the agent-visible fields from `episode-semantics.md` § 4.2 that
323/// the bare `Checkpoint` doesn't carry: optional label, optional
324/// parent Episode link, and a (possibly empty) list of retracted
325/// Episodes.
326///
327/// Emitted by the store immediately before the batch's
328/// `Checkpoint`. Batches that don't carry an `(episode :start ...)`
329/// form have no `EpisodeMeta`.
330#[derive(Clone, Debug, PartialEq, Eq)]
331pub struct EpisodeMetaRecord {
332    /// Episode ID this metadata describes (same as the following
333    /// `Checkpoint`'s `episode_id`).
334    pub episode_id: SymbolId,
335    /// Commit time — same as the `Checkpoint`'s `at`.
336    pub at: ClockTime,
337    /// Optional human-readable label. Capped at 256 bytes per
338    /// `episode-semantics.md` § 4.3.
339    pub label: Option<String>,
340    /// Optional parent Episode.
341    pub parent_episode_id: Option<SymbolId>,
342    /// Episodes this Episode retracts.
343    pub retracts: Vec<SymbolId>,
344}
345
346/// Symbol-table event record. Shared shape across `SYMBOL_*` opcodes.
347#[derive(Clone, Debug, PartialEq, Eq)]
348pub struct SymbolEventRecord {
349    /// Symbol ID being affected.
350    pub symbol_id: SymbolId,
351    /// Canonical or alias name attached by this event (may be empty
352    /// for retire/unretire where the spec says the field is ignored).
353    pub name: String,
354    /// Locked kind for this symbol.
355    pub symbol_kind: SymbolKind,
356    /// Timestamp of the event.
357    pub at: ClockTime,
358}
359
360/// Pin / authoritative event record. Shared shape across the four
361/// opcodes `PIN`/`UNPIN`/`AUTHORITATIVE_SET`/`AUTHORITATIVE_CLEAR`.
362#[derive(Clone, Copy, Debug, PartialEq, Eq)]
363pub struct FlagEventRecord {
364    /// Target memory.
365    pub memory_id: SymbolId,
366    /// Timestamp of the event.
367    pub at: ClockTime,
368    /// Agent or user who set/cleared the flag.
369    pub actor_symbol: SymbolId,
370}
371
372/// A canonical-form record — the sum of every encoded record shape.
373#[derive(Clone, Debug, PartialEq)]
374pub enum CanonicalRecord {
375    /// Semantic memory.
376    Sem(SemRecord),
377    /// Episodic memory.
378    Epi(EpiRecord),
379    /// Procedural memory.
380    Pro(ProRecord),
381    /// Inferential memory.
382    Inf(InfRecord),
383    /// Supersession edge.
384    Supersedes(EdgeRecord),
385    /// Episodic correction edge.
386    Corrects(EdgeRecord),
387    /// Inferential stale-parent edge.
388    StaleParent(EdgeRecord),
389    /// Inferential reconfirmation edge.
390    Reconfirms(EdgeRecord),
391    /// Episode boundary marker.
392    Checkpoint(CheckpointRecord),
393    /// Episode metadata (label / parent / retracts).
394    EpisodeMeta(EpisodeMetaRecord),
395    /// New symbol allocation.
396    SymbolAlloc(SymbolEventRecord),
397    /// Rename.
398    SymbolRename(SymbolEventRecord),
399    /// Alias.
400    SymbolAlias(SymbolEventRecord),
401    /// Retirement flag set.
402    SymbolRetire(SymbolEventRecord),
403    /// Retirement flag cleared.
404    SymbolUnretire(SymbolEventRecord),
405    /// Pin flag set.
406    Pin(FlagEventRecord),
407    /// Pin flag cleared.
408    Unpin(FlagEventRecord),
409    /// Operator-authoritative flag set.
410    AuthoritativeSet(FlagEventRecord),
411    /// Operator-authoritative flag cleared.
412    AuthoritativeClear(FlagEventRecord),
413}
414
415impl CanonicalRecord {
416    /// The opcode of this record.
417    #[must_use]
418    pub fn opcode(&self) -> Opcode {
419        match self {
420            Self::Sem(_) => Opcode::Sem,
421            Self::Epi(_) => Opcode::Epi,
422            Self::Pro(_) => Opcode::Pro,
423            Self::Inf(_) => Opcode::Inf,
424            Self::Supersedes(_) => Opcode::Supersedes,
425            Self::Corrects(_) => Opcode::Corrects,
426            Self::StaleParent(_) => Opcode::StaleParent,
427            Self::Reconfirms(_) => Opcode::Reconfirms,
428            Self::Checkpoint(_) => Opcode::Checkpoint,
429            Self::EpisodeMeta(_) => Opcode::EpisodeMeta,
430            Self::SymbolAlloc(_) => Opcode::SymbolAlloc,
431            Self::SymbolRename(_) => Opcode::SymbolRename,
432            Self::SymbolAlias(_) => Opcode::SymbolAlias,
433            Self::SymbolRetire(_) => Opcode::SymbolRetire,
434            Self::SymbolUnretire(_) => Opcode::SymbolUnretire,
435            Self::Pin(_) => Opcode::Pin,
436            Self::Unpin(_) => Opcode::Unpin,
437            Self::AuthoritativeSet(_) => Opcode::AuthoritativeSet,
438            Self::AuthoritativeClear(_) => Opcode::AuthoritativeClear,
439        }
440    }
441
442    /// The librarian-assigned commit time for this record.
443    ///
444    /// Every canonical record carries a commit time — memory records in
445    /// `clocks.committed_at`, edge / symbol / checkpoint / flag records
446    /// in the `at` field. This accessor smooths over the field-name
447    /// difference so replay and monotonicity checks can read the commit
448    /// clock uniformly.
449    #[must_use]
450    pub fn committed_at(&self) -> ClockTime {
451        match self {
452            Self::Sem(r) => r.clocks.committed_at,
453            Self::Epi(r) => r.committed_at,
454            Self::Pro(r) => r.clocks.committed_at,
455            Self::Inf(r) => r.clocks.committed_at,
456            Self::Supersedes(r)
457            | Self::Corrects(r)
458            | Self::StaleParent(r)
459            | Self::Reconfirms(r) => r.at,
460            Self::Checkpoint(r) => r.at,
461            Self::EpisodeMeta(r) => r.at,
462            Self::SymbolAlloc(r)
463            | Self::SymbolRename(r)
464            | Self::SymbolAlias(r)
465            | Self::SymbolRetire(r)
466            | Self::SymbolUnretire(r) => r.at,
467            Self::Pin(r)
468            | Self::Unpin(r)
469            | Self::AuthoritativeSet(r)
470            | Self::AuthoritativeClear(r) => r.at,
471        }
472    }
473}
474
475// -------------------------------------------------------------------
476// Errors
477// -------------------------------------------------------------------
478
479/// Errors produced by [`decode_record`].
480#[derive(Clone, Debug, Error, PartialEq, Eq)]
481pub enum DecodeError {
482    /// The input ended before the record was fully decoded.
483    #[error("truncated record at offset {offset}")]
484    Truncated {
485        /// Byte offset where truncation was detected.
486        offset: usize,
487    },
488
489    /// The length-prefix said the body extended past the input.
490    #[error(
491        "length mismatch at offset {offset}: body expects {expected}, only {available} available"
492    )]
493    LengthMismatch {
494        /// Byte offset of the record.
495        offset: usize,
496        /// Declared body length.
497        expected: usize,
498        /// Bytes available after the length prefix.
499        available: usize,
500    },
501
502    /// Opcode byte is not in the registered set.
503    #[error("unknown opcode {byte:#04x} at offset {offset}")]
504    UnknownOpcode {
505        /// The offending byte.
506        byte: u8,
507        /// Offset of the byte.
508        offset: usize,
509    },
510
511    /// Value-tag byte is not in `0x01..=0x06`.
512    #[error("unknown value tag {tag:#04x} at offset {offset}")]
513    UnknownValueTag {
514        /// The offending tag.
515        tag: u8,
516        /// Offset of the tag.
517        offset: usize,
518    },
519
520    /// A string value was not valid UTF-8.
521    #[error("invalid UTF-8 in string payload")]
522    InvalidString,
523
524    /// A `ClockTime` field on the wire carried the `u64::MAX` reserved
525    /// sentinel, which [`ClockTime`] refuses to construct. Only the
526    /// `invalid_at` slot is permitted to be `None`; every other clock
527    /// field must be a concrete millisecond value.
528    #[error("reserved ClockTime sentinel (u64::MAX) at offset {offset}")]
529    ReservedClockSentinel {
530        /// Offset of the first sentinel byte.
531        offset: usize,
532    },
533
534    /// A symbol-kind ordinal byte did not correspond to any variant.
535    #[error("unknown symbol-kind ordinal {ordinal} at offset {offset}")]
536    UnknownSymbolKind {
537        /// The offending byte.
538        ordinal: u8,
539        /// Offset of the byte.
540        offset: usize,
541    },
542
543    /// Record body declared more bytes than the body contained.
544    #[error("body underflow for opcode {opcode:?} at offset {offset}: consumed {consumed} of {declared}")]
545    BodyUnderflow {
546        /// The opcode being decoded.
547        opcode: Opcode,
548        /// Body offset inside the frame.
549        offset: usize,
550        /// Bytes consumed so far.
551        consumed: usize,
552        /// Declared body length.
553        declared: usize,
554    },
555
556    /// Varint decoding overflowed the target type (more than 10 bytes
557    /// for `u64`).
558    #[error("varint overflow at offset {offset}")]
559    VarintOverflow {
560        /// Offset of the varint.
561        offset: usize,
562    },
563
564    /// Varint was well-formed but not encoded with the shortest byte sequence.
565    #[error("non-canonical varint at offset {offset}")]
566    NonCanonicalVarint {
567        /// Offset of the varint.
568        offset: usize,
569    },
570
571    /// Reserved flag bits were set in a record flag byte.
572    #[error("invalid flag byte {byte:#04x} at offset {offset}; allowed mask {allowed_mask:#04x}")]
573    InvalidFlagBits {
574        /// The offending flag byte.
575        byte: u8,
576        /// Mask of allowed flag bits for this record kind.
577        allowed_mask: u8,
578        /// Offset of the flag byte.
579        offset: usize,
580    },
581
582    /// A field discriminant carried a value outside its canonical set.
583    #[error("invalid {field} discriminant {tag:#04x} at offset {offset}")]
584    InvalidDiscriminant {
585        /// Name of the field being decoded.
586        field: &'static str,
587        /// The offending discriminant byte.
588        tag: u8,
589        /// Offset of the discriminant byte.
590        offset: usize,
591    },
592}
593
594impl fmt::Display for Opcode {
595    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
596        write!(f, "{self:?}")
597    }
598}
599
600// -------------------------------------------------------------------
601// Varint + fixed-LE helpers
602// -------------------------------------------------------------------
603
604#[allow(clippy::cast_possible_truncation)]
605fn encode_varint(mut value: u64, out: &mut Vec<u8>) {
606    while value >= 0x80 {
607        out.push(((value as u8) & 0x7F) | 0x80);
608        value >>= 7;
609    }
610    out.push(value as u8);
611}
612
613fn decode_varint(bytes: &[u8], offset: &mut usize) -> Result<u64, DecodeError> {
614    let start_offset = *offset;
615    let mut result: u64 = 0;
616    let mut shift: u32 = 0;
617    for i in 0..10 {
618        if *offset >= bytes.len() {
619            return Err(DecodeError::Truncated { offset: *offset });
620        }
621        let b = bytes[*offset];
622        *offset += 1;
623        let part = u64::from(b & 0x7F);
624        if i == 9 && part > 1 {
625            return Err(DecodeError::VarintOverflow {
626                offset: start_offset,
627            });
628        }
629        result |= part.checked_shl(shift).ok_or(DecodeError::VarintOverflow {
630            offset: start_offset,
631        })?;
632        if b & 0x80 == 0 {
633            let consumed = *offset - start_offset;
634            let mut canonical = Vec::new();
635            encode_varint(result, &mut canonical);
636            if consumed != canonical.len() {
637                return Err(DecodeError::NonCanonicalVarint {
638                    offset: start_offset,
639                });
640            }
641            return Ok(result);
642        }
643        shift += 7;
644        // Last allowed byte: 10th for u64.
645        if i == 9 && (b & 0x80) != 0 {
646            return Err(DecodeError::VarintOverflow {
647                offset: start_offset,
648            });
649        }
650    }
651    Err(DecodeError::VarintOverflow {
652        offset: start_offset,
653    })
654}
655
656#[allow(clippy::cast_sign_loss)]
657fn zigzag_encode(n: i64) -> u64 {
658    ((n << 1) ^ (n >> 63)) as u64
659}
660
661#[allow(clippy::cast_possible_wrap)]
662fn zigzag_decode(u: u64) -> i64 {
663    let shifted = (u >> 1) as i64;
664    let sign = -((u & 1) as i64);
665    shifted ^ sign
666}
667
668fn encode_u64_le(value: u64, out: &mut Vec<u8>) {
669    out.extend_from_slice(&value.to_le_bytes());
670}
671
672fn decode_u64_le(bytes: &[u8], offset: &mut usize) -> Result<u64, DecodeError> {
673    if *offset + 8 > bytes.len() {
674        return Err(DecodeError::Truncated { offset: *offset });
675    }
676    let mut buf = [0_u8; 8];
677    buf.copy_from_slice(&bytes[*offset..*offset + 8]);
678    *offset += 8;
679    Ok(u64::from_le_bytes(buf))
680}
681
682fn encode_u16_le(value: u16, out: &mut Vec<u8>) {
683    out.extend_from_slice(&value.to_le_bytes());
684}
685
686fn decode_u16_le(bytes: &[u8], offset: &mut usize) -> Result<u16, DecodeError> {
687    if *offset + 2 > bytes.len() {
688        return Err(DecodeError::Truncated { offset: *offset });
689    }
690    let mut buf = [0_u8; 2];
691    buf.copy_from_slice(&bytes[*offset..*offset + 2]);
692    *offset += 2;
693    Ok(u16::from_le_bytes(buf))
694}
695
696fn encode_symbol(id: SymbolId, out: &mut Vec<u8>) {
697    encode_varint(id.as_u64(), out);
698}
699
700fn decode_symbol(bytes: &[u8], offset: &mut usize) -> Result<SymbolId, DecodeError> {
701    Ok(SymbolId::new(decode_varint(bytes, offset)?))
702}
703
704fn encode_clocktime(ct: ClockTime, out: &mut Vec<u8>) {
705    encode_u64_le(ct.as_millis(), out);
706}
707
708fn decode_clocktime(bytes: &[u8], offset: &mut usize) -> Result<ClockTime, DecodeError> {
709    // Callers that accept the reserved sentinel (`None`) use
710    // decode_optional_clocktime instead; a sentinel in a non-Option slot
711    // is genuine corruption.
712    let sentinel_offset = *offset;
713    let raw = decode_u64_le(bytes, offset)?;
714    ClockTime::try_from_millis(raw).map_err(|_| DecodeError::ReservedClockSentinel {
715        offset: sentinel_offset,
716    })
717}
718
719fn encode_optional_clocktime(ct: Option<ClockTime>, out: &mut Vec<u8>) {
720    match ct {
721        Some(t) => encode_u64_le(t.as_millis(), out),
722        None => encode_u64_le(NONE_SENTINEL, out),
723    }
724}
725
726fn decode_optional_clocktime(
727    bytes: &[u8],
728    offset: &mut usize,
729) -> Result<Option<ClockTime>, DecodeError> {
730    let sentinel_offset = *offset;
731    let raw = decode_u64_le(bytes, offset)?;
732    if raw == NONE_SENTINEL {
733        Ok(None)
734    } else {
735        // Sentinel is the `None` case above; any `try_from_millis`
736        // failure at a non-sentinel value would be a `ClockTime`
737        // invariant change — not reachable in the current codebase,
738        // but routed to the correct variant for future-proofing.
739        ClockTime::try_from_millis(raw)
740            .map(Some)
741            .map_err(|_| DecodeError::ReservedClockSentinel {
742                offset: sentinel_offset,
743            })
744    }
745}
746
747fn encode_confidence(c: Confidence, out: &mut Vec<u8>) {
748    encode_u16_le(c.as_u16(), out);
749}
750
751fn decode_confidence(bytes: &[u8], offset: &mut usize) -> Result<Confidence, DecodeError> {
752    Ok(Confidence::from_u16(decode_u16_le(bytes, offset)?))
753}
754
755// -------------------------------------------------------------------
756// Value encoding (tag + body)
757// -------------------------------------------------------------------
758
759pub(crate) fn encode_value(value: &Value, out: &mut Vec<u8>) {
760    match value {
761        Value::Symbol(id) => {
762            out.push(0x01);
763            encode_varint(id.as_u64(), out);
764        }
765        Value::Integer(i) => {
766            out.push(0x02);
767            encode_varint(zigzag_encode(*i), out);
768        }
769        Value::Float(f) => {
770            out.push(0x03);
771            out.extend_from_slice(&f.to_le_bytes());
772        }
773        Value::Boolean(b) => {
774            out.push(0x04);
775            out.push(u8::from(*b));
776        }
777        Value::String(s) => {
778            out.push(0x05);
779            let bytes = s.as_bytes();
780            #[allow(clippy::cast_possible_truncation)]
781            let len = bytes.len() as u64;
782            encode_varint(len, out);
783            out.extend_from_slice(bytes);
784        }
785        Value::Timestamp(ct) => {
786            out.push(0x06);
787            encode_u64_le(ct.as_millis(), out);
788        }
789    }
790}
791
792fn decode_value(bytes: &[u8], offset: &mut usize) -> Result<Value, DecodeError> {
793    if *offset >= bytes.len() {
794        return Err(DecodeError::Truncated { offset: *offset });
795    }
796    let tag = bytes[*offset];
797    let tag_offset = *offset;
798    *offset += 1;
799    let value = match tag {
800        0x01 => Value::Symbol(decode_symbol(bytes, offset)?),
801        0x02 => Value::Integer(zigzag_decode(decode_varint(bytes, offset)?)),
802        0x03 => {
803            if *offset + 8 > bytes.len() {
804                return Err(DecodeError::Truncated { offset: *offset });
805            }
806            let mut buf = [0_u8; 8];
807            buf.copy_from_slice(&bytes[*offset..*offset + 8]);
808            *offset += 8;
809            Value::Float(f64::from_le_bytes(buf))
810        }
811        0x04 => {
812            if *offset >= bytes.len() {
813                return Err(DecodeError::Truncated { offset: *offset });
814            }
815            let b = bytes[*offset] != 0;
816            *offset += 1;
817            Value::Boolean(b)
818        }
819        0x05 => {
820            let len = usize::try_from(decode_varint(bytes, offset)?)
821                .map_err(|_| DecodeError::VarintOverflow { offset: tag_offset })?;
822            if *offset + len > bytes.len() {
823                return Err(DecodeError::Truncated { offset: *offset });
824            }
825            let s = std::str::from_utf8(&bytes[*offset..*offset + len])
826                .map_err(|_| DecodeError::InvalidString)?
827                .to_string();
828            *offset += len;
829            Value::String(s)
830        }
831        0x06 => {
832            let sentinel_offset = *offset;
833            let raw = decode_u64_le(bytes, offset)?;
834            Value::Timestamp(ClockTime::try_from_millis(raw).map_err(|_| {
835                DecodeError::ReservedClockSentinel {
836                    offset: sentinel_offset,
837                }
838            })?)
839        }
840        other => {
841            return Err(DecodeError::UnknownValueTag {
842                tag: other,
843                offset: tag_offset,
844            });
845        }
846    };
847    Ok(value)
848}
849
850// -------------------------------------------------------------------
851// SymbolKind ordinal
852// -------------------------------------------------------------------
853
854fn symbol_kind_to_u8(kind: SymbolKind) -> u8 {
855    match kind {
856        SymbolKind::Agent => 0,
857        SymbolKind::Document => 1,
858        SymbolKind::Registry => 2,
859        SymbolKind::Service => 3,
860        SymbolKind::Policy => 4,
861        SymbolKind::Memory => 5,
862        SymbolKind::InferenceMethod => 6,
863        SymbolKind::Scope => 7,
864        SymbolKind::Predicate => 8,
865        SymbolKind::EventType => 9,
866        SymbolKind::Workspace => 10,
867        SymbolKind::Literal => 11,
868    }
869}
870
871fn symbol_kind_from_u8(b: u8, offset: usize) -> Result<SymbolKind, DecodeError> {
872    Ok(match b {
873        0 => SymbolKind::Agent,
874        1 => SymbolKind::Document,
875        2 => SymbolKind::Registry,
876        3 => SymbolKind::Service,
877        4 => SymbolKind::Policy,
878        5 => SymbolKind::Memory,
879        6 => SymbolKind::InferenceMethod,
880        7 => SymbolKind::Scope,
881        8 => SymbolKind::Predicate,
882        9 => SymbolKind::EventType,
883        10 => SymbolKind::Workspace,
884        11 => SymbolKind::Literal,
885        other => {
886            return Err(DecodeError::UnknownSymbolKind {
887                ordinal: other,
888                offset,
889            });
890        }
891    })
892}
893
894// -------------------------------------------------------------------
895// Record body encode / decode
896// -------------------------------------------------------------------
897
898fn encode_clocks(clocks: &Clocks, out: &mut Vec<u8>) {
899    encode_clocktime(clocks.valid_at, out);
900    encode_clocktime(clocks.observed_at, out);
901    encode_clocktime(clocks.committed_at, out);
902    encode_optional_clocktime(clocks.invalid_at, out);
903}
904
905fn decode_clocks(bytes: &[u8], offset: &mut usize) -> Result<Clocks, DecodeError> {
906    let valid_at = decode_clocktime(bytes, offset)?;
907    let observed_at = decode_clocktime(bytes, offset)?;
908    let committed_at = decode_clocktime(bytes, offset)?;
909    let invalid_at = decode_optional_clocktime(bytes, offset)?;
910    Ok(Clocks {
911        valid_at,
912        observed_at,
913        committed_at,
914        invalid_at,
915    })
916}
917
918fn encode_body(record: &CanonicalRecord, out: &mut Vec<u8>) {
919    match record {
920        CanonicalRecord::Sem(r) => encode_sem_body(r, out),
921        CanonicalRecord::Epi(r) => encode_epi_body(r, out),
922        CanonicalRecord::Pro(r) => encode_pro_body(r, out),
923        CanonicalRecord::Inf(r) => encode_inf_body(r, out),
924        CanonicalRecord::Supersedes(r)
925        | CanonicalRecord::Corrects(r)
926        | CanonicalRecord::StaleParent(r)
927        | CanonicalRecord::Reconfirms(r) => encode_edge_body(r, out),
928        CanonicalRecord::Checkpoint(r) => encode_checkpoint_body(r, out),
929        CanonicalRecord::EpisodeMeta(r) => encode_episode_meta_body(r, out),
930        CanonicalRecord::SymbolAlloc(r)
931        | CanonicalRecord::SymbolRename(r)
932        | CanonicalRecord::SymbolAlias(r)
933        | CanonicalRecord::SymbolRetire(r)
934        | CanonicalRecord::SymbolUnretire(r) => encode_symbol_event_body(r, out),
935        CanonicalRecord::Pin(r)
936        | CanonicalRecord::Unpin(r)
937        | CanonicalRecord::AuthoritativeSet(r)
938        | CanonicalRecord::AuthoritativeClear(r) => encode_flag_event_body(r, out),
939    }
940}
941
942fn encode_sem_body(r: &SemRecord, out: &mut Vec<u8>) {
943    encode_symbol(r.memory_id, out);
944    encode_symbol(r.s, out);
945    encode_symbol(r.p, out);
946    encode_value(&r.o, out);
947    encode_symbol(r.source, out);
948    encode_confidence(r.confidence, out);
949    encode_clocks(&r.clocks, out);
950    out.push(r.flags.to_u8());
951}
952
953fn decode_sem_body(bytes: &[u8], offset: &mut usize) -> Result<SemRecord, DecodeError> {
954    let memory_id = decode_symbol(bytes, offset)?;
955    let s = decode_symbol(bytes, offset)?;
956    let p = decode_symbol(bytes, offset)?;
957    let o = decode_value(bytes, offset)?;
958    let source = decode_symbol(bytes, offset)?;
959    let confidence = decode_confidence(bytes, offset)?;
960    let clocks = decode_clocks(bytes, offset)?;
961    let flag_offset = *offset;
962    let flags = SemFlags::try_from_u8(decode_flag_byte(bytes, offset)?, flag_offset)?;
963
964    Ok(SemRecord {
965        memory_id,
966        s,
967        p,
968        o,
969        source,
970        confidence,
971        clocks,
972        flags,
973    })
974}
975
976fn encode_epi_body(r: &EpiRecord, out: &mut Vec<u8>) {
977    encode_symbol(r.memory_id, out);
978    encode_symbol(r.event_id, out);
979    encode_symbol(r.kind, out);
980    #[allow(clippy::cast_possible_truncation)]
981    encode_varint(r.participants.len() as u64, out);
982    for p in &r.participants {
983        encode_symbol(*p, out);
984    }
985    encode_symbol(r.location, out);
986    encode_clocktime(r.at_time, out);
987    encode_clocktime(r.observed_at, out);
988    encode_symbol(r.source, out);
989    encode_confidence(r.confidence, out);
990    encode_clocktime(r.committed_at, out);
991    encode_optional_clocktime(r.invalid_at, out);
992    // No flags byte for Episodic — Epi doesn't project or stale.
993}
994
995fn decode_epi_body(bytes: &[u8], offset: &mut usize) -> Result<EpiRecord, DecodeError> {
996    let memory_id = decode_symbol(bytes, offset)?;
997    let event_id = decode_symbol(bytes, offset)?;
998    let kind = decode_symbol(bytes, offset)?;
999    let count = usize::try_from(decode_varint(bytes, offset)?)
1000        .map_err(|_| DecodeError::VarintOverflow { offset: *offset })?;
1001    // Cap allocation by remaining body bytes — each `decode_symbol`
1002    // consumes at least one byte, so `bytes.len() - *offset` is a sound
1003    // upper bound on honest counts. Without this cap, an attacker who
1004    // sets `count` near `usize::MAX` (encoded as a 10-byte varint) would
1005    // trigger a multi-exabyte `Vec::with_capacity`, aborting the
1006    // process before the decode loop returns `Truncated`. Closes
1007    // Security F2 (P2) from the v1.1 fresh assessment.
1008    let cap = count.min(bytes.len().saturating_sub(*offset));
1009    let mut participants = Vec::with_capacity(cap);
1010    for _ in 0..count {
1011        participants.push(decode_symbol(bytes, offset)?);
1012    }
1013    Ok(EpiRecord {
1014        memory_id,
1015        event_id,
1016        kind,
1017        participants,
1018        location: decode_symbol(bytes, offset)?,
1019        at_time: decode_clocktime(bytes, offset)?,
1020        observed_at: decode_clocktime(bytes, offset)?,
1021        source: decode_symbol(bytes, offset)?,
1022        confidence: decode_confidence(bytes, offset)?,
1023        committed_at: decode_clocktime(bytes, offset)?,
1024        invalid_at: decode_optional_clocktime(bytes, offset)?,
1025    })
1026}
1027
1028fn encode_pro_body(r: &ProRecord, out: &mut Vec<u8>) {
1029    encode_symbol(r.memory_id, out);
1030    encode_symbol(r.rule_id, out);
1031    encode_value(&r.trigger, out);
1032    encode_value(&r.action, out);
1033    match &r.precondition {
1034        Some(pre) => {
1035            out.push(0x01);
1036            encode_value(pre, out);
1037        }
1038        None => out.push(0x00),
1039    }
1040    encode_symbol(r.scope, out);
1041    encode_symbol(r.source, out);
1042    encode_confidence(r.confidence, out);
1043    encode_clocks(&r.clocks, out);
1044    // No flags byte for Procedural — Pro doesn't project or stale.
1045}
1046
1047fn decode_pro_body(bytes: &[u8], offset: &mut usize) -> Result<ProRecord, DecodeError> {
1048    let memory_id = decode_symbol(bytes, offset)?;
1049    let rule_id = decode_symbol(bytes, offset)?;
1050    let trigger = decode_value(bytes, offset)?;
1051    let action = decode_value(bytes, offset)?;
1052    if *offset >= bytes.len() {
1053        return Err(DecodeError::Truncated { offset: *offset });
1054    }
1055    let precondition_offset = *offset;
1056    let has_pre = bytes[*offset];
1057    *offset += 1;
1058    let precondition = match has_pre {
1059        0 => None,
1060        1 => Some(decode_value(bytes, offset)?),
1061        tag => {
1062            return Err(DecodeError::InvalidDiscriminant {
1063                field: "procedural precondition",
1064                tag,
1065                offset: precondition_offset,
1066            });
1067        }
1068    };
1069    Ok(ProRecord {
1070        memory_id,
1071        rule_id,
1072        trigger,
1073        action,
1074        precondition,
1075        scope: decode_symbol(bytes, offset)?,
1076        source: decode_symbol(bytes, offset)?,
1077        confidence: decode_confidence(bytes, offset)?,
1078        clocks: decode_clocks(bytes, offset)?,
1079    })
1080}
1081
1082fn encode_inf_body(r: &InfRecord, out: &mut Vec<u8>) {
1083    encode_symbol(r.memory_id, out);
1084    encode_symbol(r.s, out);
1085    encode_symbol(r.p, out);
1086    encode_value(&r.o, out);
1087    #[allow(clippy::cast_possible_truncation)]
1088    encode_varint(r.derived_from.len() as u64, out);
1089    for parent in &r.derived_from {
1090        encode_symbol(*parent, out);
1091    }
1092    encode_symbol(r.method, out);
1093    encode_confidence(r.confidence, out);
1094    encode_clocks(&r.clocks, out);
1095    out.push(r.flags.to_u8());
1096}
1097
1098fn decode_inf_body(bytes: &[u8], offset: &mut usize) -> Result<InfRecord, DecodeError> {
1099    let memory_id = decode_symbol(bytes, offset)?;
1100    let s = decode_symbol(bytes, offset)?;
1101    let p = decode_symbol(bytes, offset)?;
1102    let o = decode_value(bytes, offset)?;
1103    let count = usize::try_from(decode_varint(bytes, offset)?)
1104        .map_err(|_| DecodeError::VarintOverflow { offset: *offset })?;
1105    // See `decode_epi_body` — same OOM-on-malicious-varint mitigation.
1106    let cap = count.min(bytes.len().saturating_sub(*offset));
1107    let mut derived_from = Vec::with_capacity(cap);
1108    for _ in 0..count {
1109        derived_from.push(decode_symbol(bytes, offset)?);
1110    }
1111    let method = decode_symbol(bytes, offset)?;
1112    let confidence = decode_confidence(bytes, offset)?;
1113    let clocks = decode_clocks(bytes, offset)?;
1114    let flag_offset = *offset;
1115    let flags = InfFlags::try_from_u8(decode_flag_byte(bytes, offset)?, flag_offset)?;
1116
1117    Ok(InfRecord {
1118        memory_id,
1119        s,
1120        p,
1121        o,
1122        derived_from,
1123        method,
1124        confidence,
1125        clocks,
1126        flags,
1127    })
1128}
1129
1130fn encode_edge_body(r: &EdgeRecord, out: &mut Vec<u8>) {
1131    encode_symbol(r.from, out);
1132    encode_symbol(r.to, out);
1133    encode_clocktime(r.at, out);
1134}
1135
1136fn decode_edge_body(bytes: &[u8], offset: &mut usize) -> Result<EdgeRecord, DecodeError> {
1137    Ok(EdgeRecord {
1138        from: decode_symbol(bytes, offset)?,
1139        to: decode_symbol(bytes, offset)?,
1140        at: decode_clocktime(bytes, offset)?,
1141    })
1142}
1143
1144fn encode_checkpoint_body(r: &CheckpointRecord, out: &mut Vec<u8>) {
1145    encode_symbol(r.episode_id, out);
1146    encode_clocktime(r.at, out);
1147    encode_varint(r.memory_count, out);
1148}
1149
1150fn decode_checkpoint_body(
1151    bytes: &[u8],
1152    offset: &mut usize,
1153) -> Result<CheckpointRecord, DecodeError> {
1154    Ok(CheckpointRecord {
1155        episode_id: decode_symbol(bytes, offset)?,
1156        at: decode_clocktime(bytes, offset)?,
1157        memory_count: decode_varint(bytes, offset)?,
1158    })
1159}
1160
1161fn encode_episode_meta_body(r: &EpisodeMetaRecord, out: &mut Vec<u8>) {
1162    encode_symbol(r.episode_id, out);
1163    encode_clocktime(r.at, out);
1164    // Label: length-prefixed UTF-8, `0` encodes `None`. A `Some("")`
1165    // collapses to `None` because episode-semantics.md § 4.3 treats
1166    // the empty string as meaningless.
1167    let label_bytes: &[u8] = match r.label.as_deref() {
1168        Some(s) if !s.is_empty() => s.as_bytes(),
1169        _ => &[],
1170    };
1171    #[allow(clippy::cast_possible_truncation)]
1172    encode_varint(label_bytes.len() as u64, out);
1173    out.extend_from_slice(label_bytes);
1174    // parent_episode: 0 = None, 1 = Some followed by the symbol.
1175    if let Some(id) = r.parent_episode_id {
1176        out.push(0x01);
1177        encode_symbol(id, out);
1178    } else {
1179        out.push(0x00);
1180    }
1181    // retracts: length-prefixed list of symbol ids.
1182    #[allow(clippy::cast_possible_truncation)]
1183    encode_varint(r.retracts.len() as u64, out);
1184    for id in &r.retracts {
1185        encode_symbol(*id, out);
1186    }
1187}
1188
1189fn decode_episode_meta_body(
1190    bytes: &[u8],
1191    offset: &mut usize,
1192) -> Result<EpisodeMetaRecord, DecodeError> {
1193    let episode_id = decode_symbol(bytes, offset)?;
1194    let at = decode_clocktime(bytes, offset)?;
1195    let label_len = usize::try_from(decode_varint(bytes, offset)?)
1196        .map_err(|_| DecodeError::VarintOverflow { offset: *offset })?;
1197    let label = if label_len == 0 {
1198        None
1199    } else {
1200        if *offset + label_len > bytes.len() {
1201            return Err(DecodeError::Truncated { offset: *offset });
1202        }
1203        let s = std::str::from_utf8(&bytes[*offset..*offset + label_len])
1204            .map_err(|_| DecodeError::InvalidString)?
1205            .to_string();
1206        *offset += label_len;
1207        Some(s)
1208    };
1209    if *offset >= bytes.len() {
1210        return Err(DecodeError::Truncated { offset: *offset });
1211    }
1212    let parent_tag = bytes[*offset];
1213    *offset += 1;
1214    let parent_episode_id = match parent_tag {
1215        0x00 => None,
1216        0x01 => Some(decode_symbol(bytes, offset)?),
1217        _ => return Err(DecodeError::InvalidString),
1218    };
1219    let retracts_len = usize::try_from(decode_varint(bytes, offset)?)
1220        .map_err(|_| DecodeError::VarintOverflow { offset: *offset })?;
1221    // See `decode_epi_body` — same OOM-on-malicious-varint mitigation.
1222    let retracts_cap = retracts_len.min(bytes.len().saturating_sub(*offset));
1223    let mut retracts = Vec::with_capacity(retracts_cap);
1224    for _ in 0..retracts_len {
1225        retracts.push(decode_symbol(bytes, offset)?);
1226    }
1227    Ok(EpisodeMetaRecord {
1228        episode_id,
1229        at,
1230        label,
1231        parent_episode_id,
1232        retracts,
1233    })
1234}
1235
1236fn encode_symbol_event_body(r: &SymbolEventRecord, out: &mut Vec<u8>) {
1237    encode_symbol(r.symbol_id, out);
1238    let name_bytes = r.name.as_bytes();
1239    #[allow(clippy::cast_possible_truncation)]
1240    encode_varint(name_bytes.len() as u64, out);
1241    out.extend_from_slice(name_bytes);
1242    out.push(symbol_kind_to_u8(r.symbol_kind));
1243    encode_clocktime(r.at, out);
1244}
1245
1246fn decode_symbol_event_body(
1247    bytes: &[u8],
1248    offset: &mut usize,
1249) -> Result<SymbolEventRecord, DecodeError> {
1250    let symbol_id = decode_symbol(bytes, offset)?;
1251    let name_len = usize::try_from(decode_varint(bytes, offset)?)
1252        .map_err(|_| DecodeError::VarintOverflow { offset: *offset })?;
1253    if *offset + name_len > bytes.len() {
1254        return Err(DecodeError::Truncated { offset: *offset });
1255    }
1256    let name = std::str::from_utf8(&bytes[*offset..*offset + name_len])
1257        .map_err(|_| DecodeError::InvalidString)?
1258        .to_string();
1259    *offset += name_len;
1260    if *offset >= bytes.len() {
1261        return Err(DecodeError::Truncated { offset: *offset });
1262    }
1263    let kind_byte = bytes[*offset];
1264    let kind_offset = *offset;
1265    *offset += 1;
1266    let symbol_kind = symbol_kind_from_u8(kind_byte, kind_offset)?;
1267    let at = decode_clocktime(bytes, offset)?;
1268    Ok(SymbolEventRecord {
1269        symbol_id,
1270        name,
1271        symbol_kind,
1272        at,
1273    })
1274}
1275
1276fn encode_flag_event_body(r: &FlagEventRecord, out: &mut Vec<u8>) {
1277    encode_symbol(r.memory_id, out);
1278    encode_clocktime(r.at, out);
1279    encode_symbol(r.actor_symbol, out);
1280}
1281
1282fn decode_flag_event_body(
1283    bytes: &[u8],
1284    offset: &mut usize,
1285) -> Result<FlagEventRecord, DecodeError> {
1286    Ok(FlagEventRecord {
1287        memory_id: decode_symbol(bytes, offset)?,
1288        at: decode_clocktime(bytes, offset)?,
1289        actor_symbol: decode_symbol(bytes, offset)?,
1290    })
1291}
1292
1293fn decode_flag_byte(bytes: &[u8], offset: &mut usize) -> Result<u8, DecodeError> {
1294    if *offset >= bytes.len() {
1295        return Err(DecodeError::Truncated { offset: *offset });
1296    }
1297    let b = bytes[*offset];
1298    *offset += 1;
1299    Ok(b)
1300}
1301
1302// -------------------------------------------------------------------
1303// Public framing API
1304// -------------------------------------------------------------------
1305
1306/// Encode a [`CanonicalRecord`] with its framing into the output buffer.
1307///
1308/// Framing: `[1 byte opcode][varint body length][body]`.
1309///
1310/// # Examples
1311///
1312/// ```
1313/// # #![allow(clippy::unwrap_used)]
1314/// use mimir_core::canonical::{
1315///     encode_record, decode_record, CanonicalRecord, CheckpointRecord,
1316/// };
1317/// use mimir_core::{ClockTime, SymbolId};
1318///
1319/// let record = CanonicalRecord::Checkpoint(CheckpointRecord {
1320///     episode_id: SymbolId::new(42),
1321///     at: ClockTime::try_from_millis(1_700_000_000_000).expect("non-sentinel"),
1322///     memory_count: 3,
1323/// });
1324/// let mut bytes = Vec::new();
1325/// encode_record(&record, &mut bytes);
1326/// let (decoded, _used) = decode_record(&bytes).unwrap();
1327/// assert_eq!(decoded, record);
1328/// ```
1329pub fn encode_record(record: &CanonicalRecord, out: &mut Vec<u8>) {
1330    out.push(record.opcode() as u8);
1331    // Encode the body into a temporary buffer so we can prefix its length.
1332    let mut body = Vec::new();
1333    encode_body(record, &mut body);
1334    #[allow(clippy::cast_possible_truncation)]
1335    encode_varint(body.len() as u64, out);
1336    out.extend_from_slice(&body);
1337}
1338
1339/// Decode one [`CanonicalRecord`] from the front of `bytes`.
1340///
1341/// Returns the decoded record and the total number of bytes consumed
1342/// (opcode + length varint + body).
1343///
1344/// # Errors
1345///
1346/// - [`DecodeError::Truncated`] if the input ends inside the record.
1347/// - [`DecodeError::UnknownOpcode`] if the first byte isn't a known opcode.
1348/// - [`DecodeError::LengthMismatch`] if the declared body length overruns
1349///   the input.
1350/// - [`DecodeError::BodyUnderflow`] if the body encoded less than the
1351///   declared length.
1352pub fn decode_record(bytes: &[u8]) -> Result<(CanonicalRecord, usize), DecodeError> {
1353    if bytes.is_empty() {
1354        return Err(DecodeError::Truncated { offset: 0 });
1355    }
1356    let opcode_byte = bytes[0];
1357    let opcode = Opcode::from_byte(opcode_byte).ok_or(DecodeError::UnknownOpcode {
1358        byte: opcode_byte,
1359        offset: 0,
1360    })?;
1361    let mut offset = 1;
1362    let body_len = usize::try_from(decode_varint(bytes, &mut offset)?)
1363        .map_err(|_| DecodeError::VarintOverflow { offset: 1 })?;
1364    let body_start = offset;
1365    if body_start + body_len > bytes.len() {
1366        return Err(DecodeError::LengthMismatch {
1367            offset: 0,
1368            expected: body_len,
1369            available: bytes.len() - body_start,
1370        });
1371    }
1372    let body = &bytes[body_start..body_start + body_len];
1373    let mut body_offset = 0;
1374    let record = match opcode {
1375        Opcode::Sem => CanonicalRecord::Sem(decode_sem_body(body, &mut body_offset)?),
1376        Opcode::Epi => CanonicalRecord::Epi(decode_epi_body(body, &mut body_offset)?),
1377        Opcode::Pro => CanonicalRecord::Pro(decode_pro_body(body, &mut body_offset)?),
1378        Opcode::Inf => CanonicalRecord::Inf(decode_inf_body(body, &mut body_offset)?),
1379        Opcode::Supersedes => {
1380            CanonicalRecord::Supersedes(decode_edge_body(body, &mut body_offset)?)
1381        }
1382        Opcode::Corrects => CanonicalRecord::Corrects(decode_edge_body(body, &mut body_offset)?),
1383        Opcode::StaleParent => {
1384            CanonicalRecord::StaleParent(decode_edge_body(body, &mut body_offset)?)
1385        }
1386        Opcode::Reconfirms => {
1387            CanonicalRecord::Reconfirms(decode_edge_body(body, &mut body_offset)?)
1388        }
1389        Opcode::Checkpoint => {
1390            CanonicalRecord::Checkpoint(decode_checkpoint_body(body, &mut body_offset)?)
1391        }
1392        Opcode::EpisodeMeta => {
1393            CanonicalRecord::EpisodeMeta(decode_episode_meta_body(body, &mut body_offset)?)
1394        }
1395        Opcode::SymbolAlloc => {
1396            CanonicalRecord::SymbolAlloc(decode_symbol_event_body(body, &mut body_offset)?)
1397        }
1398        Opcode::SymbolRename => {
1399            CanonicalRecord::SymbolRename(decode_symbol_event_body(body, &mut body_offset)?)
1400        }
1401        Opcode::SymbolAlias => {
1402            CanonicalRecord::SymbolAlias(decode_symbol_event_body(body, &mut body_offset)?)
1403        }
1404        Opcode::SymbolRetire => {
1405            CanonicalRecord::SymbolRetire(decode_symbol_event_body(body, &mut body_offset)?)
1406        }
1407        Opcode::SymbolUnretire => {
1408            CanonicalRecord::SymbolUnretire(decode_symbol_event_body(body, &mut body_offset)?)
1409        }
1410        Opcode::Pin => CanonicalRecord::Pin(decode_flag_event_body(body, &mut body_offset)?),
1411        Opcode::Unpin => CanonicalRecord::Unpin(decode_flag_event_body(body, &mut body_offset)?),
1412        Opcode::AuthoritativeSet => {
1413            CanonicalRecord::AuthoritativeSet(decode_flag_event_body(body, &mut body_offset)?)
1414        }
1415        Opcode::AuthoritativeClear => {
1416            CanonicalRecord::AuthoritativeClear(decode_flag_event_body(body, &mut body_offset)?)
1417        }
1418    };
1419    if body_offset != body.len() {
1420        return Err(DecodeError::BodyUnderflow {
1421            opcode,
1422            offset: body_start,
1423            consumed: body_offset,
1424            declared: body_len,
1425        });
1426    }
1427    Ok((record, body_start + body_len))
1428}
1429
1430/// Decode all records from a byte slice until the slice is exhausted.
1431///
1432/// # Errors
1433///
1434/// Propagates any [`DecodeError`] from the underlying stream.
1435pub fn decode_all(bytes: &[u8]) -> Result<Vec<CanonicalRecord>, DecodeError> {
1436    let mut out = Vec::new();
1437    let mut offset = 0;
1438    while offset < bytes.len() {
1439        let (record, used) = decode_record(&bytes[offset..])?;
1440        out.push(record);
1441        offset += used;
1442    }
1443    Ok(out)
1444}
1445
1446#[cfg(test)]
1447mod tests {
1448    use super::*;
1449
1450    fn ct(ms: u64) -> ClockTime {
1451        ClockTime::try_from_millis(ms).expect("non-sentinel")
1452    }
1453
1454    fn clocks() -> Clocks {
1455        Clocks {
1456            valid_at: ct(1_700_000_000_000),
1457            observed_at: ct(1_700_000_001_000),
1458            committed_at: ct(1_700_000_002_000),
1459            invalid_at: None,
1460        }
1461    }
1462
1463    fn roundtrip(record: &CanonicalRecord) {
1464        let mut bytes = Vec::new();
1465        encode_record(record, &mut bytes);
1466        let (decoded, used) = decode_record(&bytes).unwrap();
1467        assert_eq!(&decoded, record);
1468        assert_eq!(used, bytes.len());
1469    }
1470
1471    #[test]
1472    fn varint_roundtrip_small() {
1473        for v in [0_u64, 1, 127, 128, 16_383, 16_384, u64::MAX] {
1474            let mut out = Vec::new();
1475            encode_varint(v, &mut out);
1476            let mut offset = 0;
1477            let decoded = decode_varint(&out, &mut offset).unwrap();
1478            assert_eq!(decoded, v);
1479            assert_eq!(offset, out.len());
1480        }
1481    }
1482
1483    #[test]
1484    fn overlong_varint_encoding_is_rejected() {
1485        let mut offset = 0;
1486        let err = decode_varint(&[0x80, 0x00], &mut offset).unwrap_err();
1487        assert!(matches!(err, DecodeError::NonCanonicalVarint { offset: 0 }));
1488    }
1489
1490    #[test]
1491    fn zigzag_roundtrip() {
1492        for i in [0_i64, 1, -1, 42, -42, i64::MIN, i64::MAX] {
1493            assert_eq!(zigzag_decode(zigzag_encode(i)), i);
1494        }
1495    }
1496
1497    #[test]
1498    fn value_roundtrip_all_tags() {
1499        let values = [
1500            Value::Symbol(SymbolId::new(7)),
1501            Value::Integer(-42),
1502            Value::Float(1.25),
1503            Value::Boolean(true),
1504            Value::String("hello".into()),
1505            Value::Timestamp(ct(12_345)),
1506        ];
1507        for v in values {
1508            let mut bytes = Vec::new();
1509            encode_value(&v, &mut bytes);
1510            let mut offset = 0;
1511            let decoded = decode_value(&bytes, &mut offset).unwrap();
1512            assert_eq!(decoded, v);
1513            assert_eq!(offset, bytes.len());
1514        }
1515    }
1516
1517    #[test]
1518    fn sem_roundtrip() {
1519        roundtrip(&CanonicalRecord::Sem(SemRecord {
1520            memory_id: SymbolId::new(1),
1521            s: SymbolId::new(2),
1522            p: SymbolId::new(3),
1523            o: Value::String("x".into()),
1524            source: SymbolId::new(4),
1525            confidence: Confidence::from_u16(62_258),
1526            clocks: clocks(),
1527            flags: SemFlags::default(),
1528        }));
1529    }
1530
1531    #[test]
1532    fn sem_reserved_flag_bits_are_rejected() {
1533        let mut bytes = Vec::new();
1534        encode_record(
1535            &CanonicalRecord::Sem(SemRecord {
1536                memory_id: SymbolId::new(1),
1537                s: SymbolId::new(2),
1538                p: SymbolId::new(3),
1539                o: Value::String("x".into()),
1540                source: SymbolId::new(4),
1541                confidence: Confidence::from_u16(62_258),
1542                clocks: clocks(),
1543                flags: SemFlags::default(),
1544            }),
1545            &mut bytes,
1546        );
1547        *bytes.last_mut().expect("flag byte") = 0b0000_0010;
1548        let err = decode_record(&bytes).unwrap_err();
1549        assert!(matches!(
1550            err,
1551            DecodeError::InvalidFlagBits {
1552                byte: 0b0000_0010,
1553                allowed_mask: 0b0000_0001,
1554                ..
1555            }
1556        ));
1557    }
1558
1559    #[test]
1560    fn epi_roundtrip_with_participants() {
1561        roundtrip(&CanonicalRecord::Epi(EpiRecord {
1562            memory_id: SymbolId::new(10),
1563            event_id: SymbolId::new(11),
1564            kind: SymbolId::new(12),
1565            participants: vec![SymbolId::new(13), SymbolId::new(14)],
1566            location: SymbolId::new(15),
1567            at_time: ct(1_700_000_000_000),
1568            observed_at: ct(1_700_000_000_000),
1569            source: SymbolId::new(16),
1570            confidence: Confidence::ONE,
1571            committed_at: ct(1_700_000_005_000),
1572            invalid_at: None,
1573        }));
1574    }
1575
1576    #[test]
1577    fn pro_roundtrip_with_precondition() {
1578        roundtrip(&CanonicalRecord::Pro(ProRecord {
1579            memory_id: SymbolId::new(20),
1580            rule_id: SymbolId::new(21),
1581            trigger: Value::String("agent writing".into()),
1582            action: Value::String("route via librarian".into()),
1583            precondition: Some(Value::String("critical".into())),
1584            scope: SymbolId::new(22),
1585            source: SymbolId::new(23),
1586            confidence: Confidence::ONE,
1587            clocks: clocks(),
1588        }));
1589    }
1590
1591    #[test]
1592    fn pro_precondition_tag_must_be_zero_or_one() {
1593        let mut bytes = Vec::new();
1594        encode_record(
1595            &CanonicalRecord::Pro(ProRecord {
1596                memory_id: SymbolId::new(20),
1597                rule_id: SymbolId::new(21),
1598                trigger: Value::String("agent writing".into()),
1599                action: Value::String("route via librarian".into()),
1600                precondition: Some(Value::String("critical".into())),
1601                scope: SymbolId::new(22),
1602                source: SymbolId::new(23),
1603                confidence: Confidence::ONE,
1604                clocks: clocks(),
1605            }),
1606            &mut bytes,
1607        );
1608
1609        // Framing is one-byte opcode plus one-byte body length for
1610        // this fixture. Body layout starts with memory_id, rule_id,
1611        // trigger string, action string, then the precondition tag.
1612        let precondition_tag_offset =
1613            2 + 1 + 1 + 1 + 1 + "agent writing".len() + 1 + 1 + "route via librarian".len();
1614        assert_eq!(bytes[precondition_tag_offset], 0x01);
1615        bytes[precondition_tag_offset] = 0x02;
1616        let err = decode_record(&bytes).unwrap_err();
1617        assert!(matches!(
1618            err,
1619            DecodeError::InvalidDiscriminant {
1620                field: "procedural precondition",
1621                tag: 0x02,
1622                ..
1623            }
1624        ));
1625    }
1626
1627    #[test]
1628    fn pro_roundtrip_without_precondition() {
1629        roundtrip(&CanonicalRecord::Pro(ProRecord {
1630            memory_id: SymbolId::new(30),
1631            rule_id: SymbolId::new(31),
1632            trigger: Value::String("x".into()),
1633            action: Value::String("y".into()),
1634            precondition: None,
1635            scope: SymbolId::new(32),
1636            source: SymbolId::new(33),
1637            confidence: Confidence::from_u16(40_000),
1638            clocks: clocks(),
1639        }));
1640    }
1641
1642    #[test]
1643    fn inf_roundtrip_with_stale_flag() {
1644        roundtrip(&CanonicalRecord::Inf(InfRecord {
1645            memory_id: SymbolId::new(40),
1646            s: SymbolId::new(41),
1647            p: SymbolId::new(42),
1648            o: Value::Boolean(true),
1649            derived_from: vec![SymbolId::new(43), SymbolId::new(44), SymbolId::new(45)],
1650            method: SymbolId::new(46),
1651            confidence: Confidence::from_u16(50_000),
1652            clocks: clocks(),
1653            flags: InfFlags {
1654                projected: true,
1655                stale: true,
1656            },
1657        }));
1658    }
1659
1660    #[test]
1661    fn inf_reserved_flag_bits_are_rejected() {
1662        let mut bytes = Vec::new();
1663        encode_record(
1664            &CanonicalRecord::Inf(InfRecord {
1665                memory_id: SymbolId::new(40),
1666                s: SymbolId::new(41),
1667                p: SymbolId::new(42),
1668                o: Value::Boolean(true),
1669                derived_from: vec![SymbolId::new(43)],
1670                method: SymbolId::new(46),
1671                confidence: Confidence::from_u16(50_000),
1672                clocks: clocks(),
1673                flags: InfFlags {
1674                    projected: true,
1675                    stale: false,
1676                },
1677            }),
1678            &mut bytes,
1679        );
1680        *bytes.last_mut().expect("flag byte") = 0b0000_0100;
1681        let err = decode_record(&bytes).unwrap_err();
1682        assert!(matches!(
1683            err,
1684            DecodeError::InvalidFlagBits {
1685                byte: 0b0000_0100,
1686                allowed_mask: 0b0000_0011,
1687                ..
1688            }
1689        ));
1690    }
1691
1692    #[test]
1693    fn edge_records_roundtrip() {
1694        let edge = EdgeRecord {
1695            from: SymbolId::new(50),
1696            to: SymbolId::new(51),
1697            at: ct(1_700_000_010_000),
1698        };
1699        roundtrip(&CanonicalRecord::Supersedes(edge));
1700        roundtrip(&CanonicalRecord::Corrects(edge));
1701        roundtrip(&CanonicalRecord::StaleParent(edge));
1702        roundtrip(&CanonicalRecord::Reconfirms(edge));
1703    }
1704
1705    #[test]
1706    fn checkpoint_roundtrip() {
1707        roundtrip(&CanonicalRecord::Checkpoint(CheckpointRecord {
1708            episode_id: SymbolId::new(100),
1709            at: ct(1_700_000_020_000),
1710            memory_count: 7,
1711        }));
1712    }
1713
1714    #[test]
1715    fn episode_meta_roundtrip_minimal() {
1716        roundtrip(&CanonicalRecord::EpisodeMeta(EpisodeMetaRecord {
1717            episode_id: SymbolId::new(101),
1718            at: ct(1_700_000_020_000),
1719            label: None,
1720            parent_episode_id: None,
1721            retracts: Vec::new(),
1722        }));
1723    }
1724
1725    #[test]
1726    fn episode_meta_roundtrip_full() {
1727        roundtrip(&CanonicalRecord::EpisodeMeta(EpisodeMetaRecord {
1728            episode_id: SymbolId::new(102),
1729            at: ct(1_700_000_021_000),
1730            label: Some("tokenizer-bakeoff".into()),
1731            parent_episode_id: Some(SymbolId::new(101)),
1732            retracts: vec![SymbolId::new(50), SymbolId::new(51)],
1733        }));
1734    }
1735
1736    #[test]
1737    fn episode_meta_empty_label_decodes_to_none() {
1738        let mut buf = Vec::new();
1739        encode_record(
1740            &CanonicalRecord::EpisodeMeta(EpisodeMetaRecord {
1741                episode_id: SymbolId::new(103),
1742                at: ct(1_700_000_022_000),
1743                label: Some(String::new()),
1744                parent_episode_id: None,
1745                retracts: Vec::new(),
1746            }),
1747            &mut buf,
1748        );
1749        let (decoded, _) = decode_record(&buf).expect("decode");
1750        let CanonicalRecord::EpisodeMeta(meta) = decoded else {
1751            panic!("expected EpisodeMeta");
1752        };
1753        // Empty string collapses to None on the wire — encoder
1754        // intentionally normalizes so readers don't see "".
1755        assert_eq!(meta.label, None);
1756    }
1757
1758    #[test]
1759    fn symbol_event_roundtrip() {
1760        let rec = SymbolEventRecord {
1761            symbol_id: SymbolId::new(200),
1762            name: "alice".into(),
1763            symbol_kind: SymbolKind::Agent,
1764            at: ct(1_700_000_030_000),
1765        };
1766        roundtrip(&CanonicalRecord::SymbolAlloc(rec.clone()));
1767        roundtrip(&CanonicalRecord::SymbolRename(rec.clone()));
1768        roundtrip(&CanonicalRecord::SymbolAlias(rec.clone()));
1769        roundtrip(&CanonicalRecord::SymbolRetire(rec.clone()));
1770        roundtrip(&CanonicalRecord::SymbolUnretire(rec));
1771    }
1772
1773    #[test]
1774    fn flag_event_roundtrip() {
1775        let rec = FlagEventRecord {
1776            memory_id: SymbolId::new(300),
1777            at: ct(1_700_000_040_000),
1778            actor_symbol: SymbolId::new(301),
1779        };
1780        roundtrip(&CanonicalRecord::Pin(rec));
1781        roundtrip(&CanonicalRecord::Unpin(rec));
1782        roundtrip(&CanonicalRecord::AuthoritativeSet(rec));
1783        roundtrip(&CanonicalRecord::AuthoritativeClear(rec));
1784    }
1785
1786    #[test]
1787    fn decode_all_multiple_records() {
1788        let records = vec![
1789            CanonicalRecord::Checkpoint(CheckpointRecord {
1790                episode_id: SymbolId::new(1),
1791                at: ct(1_000),
1792                memory_count: 0,
1793            }),
1794            CanonicalRecord::Supersedes(EdgeRecord {
1795                from: SymbolId::new(2),
1796                to: SymbolId::new(3),
1797                at: ct(2_000),
1798            }),
1799            CanonicalRecord::Pin(FlagEventRecord {
1800                memory_id: SymbolId::new(4),
1801                at: ct(3_000),
1802                actor_symbol: SymbolId::new(5),
1803            }),
1804        ];
1805        let mut bytes = Vec::new();
1806        for r in &records {
1807            encode_record(r, &mut bytes);
1808        }
1809        let decoded = decode_all(&bytes).unwrap();
1810        assert_eq!(decoded, records);
1811    }
1812
1813    #[test]
1814    fn unknown_opcode_errors() {
1815        let err = decode_record(&[0x77, 0x00]).unwrap_err();
1816        assert!(matches!(err, DecodeError::UnknownOpcode { byte: 0x77, .. }));
1817    }
1818
1819    #[test]
1820    fn truncated_input_errors() {
1821        let err = decode_record(&[]).unwrap_err();
1822        assert!(matches!(err, DecodeError::Truncated { .. }));
1823    }
1824
1825    #[test]
1826    fn length_mismatch_errors() {
1827        // Opcode CHECKPOINT then declared body length 50, but only 2 bytes follow.
1828        let err = decode_record(&[0x20, 50, 0, 0]).unwrap_err();
1829        assert!(matches!(err, DecodeError::LengthMismatch { .. }));
1830    }
1831
1832    #[test]
1833    fn unknown_value_tag_errors() {
1834        // Manually craft a SEM body with bad value tag.
1835        let mut body = Vec::new();
1836        encode_symbol(SymbolId::new(1), &mut body);
1837        encode_symbol(SymbolId::new(2), &mut body);
1838        encode_symbol(SymbolId::new(3), &mut body);
1839        body.push(0x99); // bogus tag
1840        let mut framed = Vec::new();
1841        framed.push(0x01); // SEM opcode
1842        #[allow(clippy::cast_possible_truncation)]
1843        encode_varint(body.len() as u64, &mut framed);
1844        framed.extend_from_slice(&body);
1845        let err = decode_record(&framed).unwrap_err();
1846        assert!(matches!(
1847            err,
1848            DecodeError::UnknownValueTag { tag: 0x99, .. }
1849        ));
1850    }
1851
1852    #[test]
1853    fn confidence_fixed_width_two_bytes() {
1854        let record = CanonicalRecord::Sem(SemRecord {
1855            memory_id: SymbolId::new(1),
1856            s: SymbolId::new(2),
1857            p: SymbolId::new(3),
1858            o: Value::Integer(0),
1859            source: SymbolId::new(4),
1860            confidence: Confidence::from_u16(42),
1861            clocks: clocks(),
1862            flags: SemFlags::default(),
1863        });
1864        let mut bytes = Vec::new();
1865        encode_record(&record, &mut bytes);
1866        let (decoded, _) = decode_record(&bytes).unwrap();
1867        assert_eq!(decoded, record);
1868    }
1869
1870    #[test]
1871    fn invalid_at_sentinel_is_none() {
1872        let record = CanonicalRecord::Sem(SemRecord {
1873            memory_id: SymbolId::new(1),
1874            s: SymbolId::new(2),
1875            p: SymbolId::new(3),
1876            o: Value::Integer(0),
1877            source: SymbolId::new(4),
1878            confidence: Confidence::ONE,
1879            clocks: Clocks {
1880                valid_at: ct(100),
1881                observed_at: ct(101),
1882                committed_at: ct(102),
1883                invalid_at: None,
1884            },
1885            flags: SemFlags::default(),
1886        });
1887        roundtrip(&record);
1888    }
1889
1890    #[test]
1891    fn invalid_at_set_roundtrips() {
1892        let record = CanonicalRecord::Sem(SemRecord {
1893            memory_id: SymbolId::new(1),
1894            s: SymbolId::new(2),
1895            p: SymbolId::new(3),
1896            o: Value::Integer(0),
1897            source: SymbolId::new(4),
1898            confidence: Confidence::ONE,
1899            clocks: Clocks {
1900                valid_at: ct(100),
1901                observed_at: ct(101),
1902                committed_at: ct(102),
1903                invalid_at: Some(ct(200)),
1904            },
1905            flags: SemFlags::default(),
1906        });
1907        roundtrip(&record);
1908    }
1909
1910    /// Pre-schema-break Epi/Pro logs carried a trailing `flags` byte
1911    /// whose `body_len` varint covered that byte. Feeding such a log
1912    /// to the post-split decoder must fail with `BodyUnderflow`, not
1913    /// silently drop the extra byte.
1914    #[test]
1915    fn legacy_epi_with_trailing_flags_byte_rejected() {
1916        let new_record = CanonicalRecord::Epi(EpiRecord {
1917            memory_id: SymbolId::new(1),
1918            event_id: SymbolId::new(2),
1919            kind: SymbolId::new(3),
1920            participants: vec![],
1921            location: SymbolId::new(4),
1922            at_time: ct(100),
1923            observed_at: ct(100),
1924            source: SymbolId::new(5),
1925            confidence: Confidence::ONE,
1926            committed_at: ct(100),
1927            invalid_at: None,
1928        });
1929        let mut new_bytes = Vec::new();
1930        encode_record(&new_record, &mut new_bytes);
1931
1932        // Simulate an old-format frame: same body plus a trailing flags
1933        // byte, with body_len bumped by 1.
1934        let opcode = new_bytes[0];
1935        let mut cursor = 1;
1936        let body_len = decode_varint(&new_bytes, &mut cursor).unwrap();
1937        let body = &new_bytes[cursor..cursor + usize::try_from(body_len).unwrap()];
1938        let mut legacy = Vec::new();
1939        legacy.push(opcode);
1940        encode_varint(body_len + 1, &mut legacy);
1941        legacy.extend_from_slice(body);
1942        legacy.push(0x00); // trailing flags byte
1943
1944        let err = decode_record(&legacy).unwrap_err();
1945        assert!(
1946            matches!(err, DecodeError::BodyUnderflow { .. }),
1947            "old-format trailing flags byte must be rejected, got {err:?}"
1948        );
1949    }
1950
1951    #[test]
1952    fn legacy_pro_with_trailing_flags_byte_rejected() {
1953        let new_record = CanonicalRecord::Pro(ProRecord {
1954            memory_id: SymbolId::new(20),
1955            rule_id: SymbolId::new(21),
1956            trigger: Value::String("x".into()),
1957            action: Value::String("y".into()),
1958            precondition: None,
1959            scope: SymbolId::new(22),
1960            source: SymbolId::new(23),
1961            confidence: Confidence::ONE,
1962            clocks: clocks(),
1963        });
1964        let mut new_bytes = Vec::new();
1965        encode_record(&new_record, &mut new_bytes);
1966
1967        let opcode = new_bytes[0];
1968        let mut cursor = 1;
1969        let body_len = decode_varint(&new_bytes, &mut cursor).unwrap();
1970        let body = &new_bytes[cursor..cursor + usize::try_from(body_len).unwrap()];
1971        let mut legacy = Vec::new();
1972        legacy.push(opcode);
1973        encode_varint(body_len + 1, &mut legacy);
1974        legacy.extend_from_slice(body);
1975        legacy.push(0x00);
1976
1977        let err = decode_record(&legacy).unwrap_err();
1978        assert!(
1979            matches!(err, DecodeError::BodyUnderflow { .. }),
1980            "old-format trailing flags byte must be rejected, got {err:?}"
1981        );
1982    }
1983
1984    // ---- Security F2 (P2) regression: decoder must not OOM on a
1985    // crafted record with an attacker-controlled count varint. The
1986    // pre-fix decoder did `Vec::with_capacity(count)` *before* the
1987    // decode loop ran, so an attacker who set `count` near
1988    // `usize::MAX` (10-byte varint) triggered a multi-exabyte
1989    // allocation that aborts the process. Post-fix: cap is bounded
1990    // by remaining body bytes, so the loop returns `Truncated`
1991    // instead.
1992    //
1993    // These tests assert the post-fix behaviour: the function returns
1994    // an `Err` (any variant — `Truncated`, `LengthMismatch`, or
1995    // `VarintOverflow` are all acceptable) and crucially **does not
1996    // panic or abort**. Test execution itself proves the OOM is
1997    // gone: a pre-fix run would have aborted the test process.
1998
1999    /// Helper: build a frame with a custom body (after `[opcode][varint
2000    /// body_len]`) for the given opcode. Bypasses [`encode_record`] so
2001    /// we can craft adversarial bodies the encoder would never produce.
2002    fn frame(opcode: Opcode, body: &[u8]) -> Vec<u8> {
2003        let mut out = Vec::with_capacity(body.len() + 11);
2004        out.push(opcode as u8);
2005        encode_varint(body.len() as u64, &mut out);
2006        out.extend_from_slice(body);
2007        out
2008    }
2009
2010    #[test]
2011    fn decode_epi_does_not_oom_on_huge_participant_count() {
2012        // EpiRecord body shape: memory_id (varint) + event_id (varint) +
2013        // kind (varint) + participants_count (varint) + participants...
2014        // We craft a body where the count varint encodes a huge
2015        // value but no participants follow.
2016        let mut body = Vec::new();
2017        encode_varint(1, &mut body); // memory_id
2018        encode_varint(2, &mut body); // event_id
2019        encode_varint(3, &mut body); // kind
2020        encode_varint(u64::MAX, &mut body); // participants_count = u64::MAX
2021                                            // No participants follow — `decode_symbol` must hit truncation
2022                                            // immediately.
2023        let frame = frame(Opcode::Epi, &body);
2024        let err = decode_record(&frame).expect_err("must reject huge count");
2025        // The exact variant depends on which check trips first; the
2026        // load-bearing assertion is that the decoder returns an error
2027        // instead of OOMing. VarintOverflow on usize::try_from is the
2028        // most likely path on 64-bit (u64::MAX > usize::MAX is false on
2029        // 64-bit so we get past that and hit Truncated on the first
2030        // missing participant); on 32-bit, VarintOverflow on the
2031        // try_from. Both are acceptable — we just must not panic.
2032        assert!(
2033            matches!(
2034                err,
2035                DecodeError::Truncated { .. }
2036                    | DecodeError::VarintOverflow { .. }
2037                    | DecodeError::BodyUnderflow { .. }
2038            ),
2039            "expected typed error, got {err:?}"
2040        );
2041    }
2042
2043    #[test]
2044    fn decode_inf_does_not_oom_on_huge_derived_from_count() {
2045        // InfRecord body: memory_id + s + p + o (value) + count + parents...
2046        let mut body = Vec::new();
2047        encode_varint(1, &mut body); // memory_id
2048        encode_varint(2, &mut body); // s
2049        encode_varint(3, &mut body); // p
2050                                     // Encode a Symbol value (tag 0x01 + symbol varint) for `o`.
2051        body.push(0x01);
2052        encode_varint(4, &mut body);
2053        encode_varint(u64::MAX, &mut body); // derived_from count
2054        let frame = frame(Opcode::Inf, &body);
2055        let err = decode_record(&frame).expect_err("must reject huge count");
2056        assert!(
2057            matches!(
2058                err,
2059                DecodeError::Truncated { .. }
2060                    | DecodeError::VarintOverflow { .. }
2061                    | DecodeError::BodyUnderflow { .. }
2062            ),
2063            "expected typed error, got {err:?}"
2064        );
2065    }
2066
2067    #[test]
2068    fn decode_episode_meta_does_not_oom_on_huge_retracts_count() {
2069        // EpisodeMeta body: episode_id + at(clocktime) + label_len +
2070        // label_bytes + parent_tag + [parent?] + retracts_len + retracts...
2071        let mut body = Vec::new();
2072        encode_varint(1, &mut body); // episode_id
2073                                     // ClockTime: fixed-LE u64 (8 bytes).
2074        body.extend_from_slice(&1_700_000_000_000_u64.to_le_bytes());
2075        encode_varint(0, &mut body); // label_len = 0 (empty label)
2076        body.push(0x00); // parent_tag = None
2077        encode_varint(u64::MAX, &mut body); // retracts_len = u64::MAX
2078        let frame = frame(Opcode::EpisodeMeta, &body);
2079        let err = decode_record(&frame).expect_err("must reject huge count");
2080        assert!(
2081            matches!(
2082                err,
2083                DecodeError::Truncated { .. }
2084                    | DecodeError::VarintOverflow { .. }
2085                    | DecodeError::BodyUnderflow { .. }
2086            ),
2087            "expected typed error, got {err:?}"
2088        );
2089    }
2090}