Skip to main content

soma_som_core/
codec.rs

1// SPDX-License-Identifier: LGPL-3.0-only
2#![allow(missing_docs)]
3
4//! Shared persistence codec — one implementation, used by all organs.
5//!
6//! Eliminates per-organ duplication of bincode serialize/deserialize wrappers.
7//! Each organ converts `CodecError` into its own error type via `From<CodecError>`.
8//!
9//! # Wire format — magic-prefix discriminator
10//!
11//! ```text
12//! encode:  [0xFF 0xFF 0xFF 0xFF 0x02][bincode-2 payload]
13//! decode:  read first 4 bytes;
14//!          if [0xFF 0xFF 0xFF 0xFF] → read 5th byte (version);
15//!                                     dispatch (2 ⇒ bincode-2 path on bytes[5..]);
16//!          else → legacy bincode-1 path on full bytes (including byte 0).
17//! ```
18//!
19//! The 5-byte magic prefix `[0xFF×4 + version_byte]` is structurally impossible for
20//! bincode-1 length-prefix u64 of redb-bounded String/Vec values (would require length
21//! ≥ 2³² = 4GB; redb max value 4GB; typical persisted values KB-MB) and vanishingly
22//! rare (~1/2³²) for fixint-LE u64/u32 leading scalar fields.
23//!
24//! Future codec evolutions reuse the same lever: `[0xFF×4 + version_byte]` with
25//! `version_byte = 3` for postcard, etc.
26
27use serde::{Serialize, de::DeserializeOwned};
28
29/// Magic prefix that disambiguates v=2+ payloads from legacy bincode-1 bytes.
30const CODEC_MAGIC: [u8; 4] = [0xFF, 0xFF, 0xFF, 0xFF];
31
32/// Current codec version (bincode 2 with serde adapter, default config).
33const CODEC_VERSION_V2: u8 = 2;
34
35/// Error from encode/decode operations.
36#[derive(Debug)]
37pub struct CodecError(pub String);
38
39impl std::fmt::Display for CodecError {
40    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
41        write!(f, "codec error: {}", self.0)
42    }
43}
44
45impl std::error::Error for CodecError {}
46
47/// Encode a value to bytes using the workspace persistence codec.
48///
49/// Output format: `[0xFF 0xFF 0xFF 0xFF 0x02][bincode-2 payload]`.
50pub fn persist_encode<T: Serialize>(value: &T) -> Result<Vec<u8>, CodecError> {
51    let payload = bincode::serde::encode_to_vec(value, bincode::config::standard())
52        .map_err(|e| CodecError(format!("bincode-2 encode: {e}")))?;
53    let mut out = Vec::with_capacity(5 + payload.len());
54    out.extend_from_slice(&CODEC_MAGIC);
55    out.push(CODEC_VERSION_V2);
56    out.extend_from_slice(&payload);
57    Ok(out)
58}
59
60/// Decode bytes into a value using the workspace persistence codec.
61///
62/// Dispatch: 4-byte magic check + 5th-byte version dispatch (v=2 ⇒ bincode-2 on
63/// bytes[5..]); else legacy bincode-1 on full bytes (v=0 implicit).
64pub fn persist_decode<T: DeserializeOwned>(bytes: &[u8]) -> Result<T, CodecError> {
65    if bytes.len() >= 5 && bytes.get(..4).is_some_and(|s| s == CODEC_MAGIC) {
66        let version = bytes.get(4).copied().unwrap_or(0);
67        match version {
68            CODEC_VERSION_V2 => {
69                let (value, _consumed) = bincode::serde::decode_from_slice::<T, _>(
70                    bytes.get(5..).unwrap_or_default(),
71                    bincode::config::standard(),
72                )
73                .map_err(|e| CodecError(format!("bincode-2 decode: {e}")))?;
74                Ok(value)
75            }
76            other => Err(CodecError(format!(
77                "unknown codec version byte: 0x{other:02x} (expected 0x{CODEC_VERSION_V2:02x})"
78            ))),
79        }
80    } else {
81        // Legacy v=0 path: bincode-1 deserialize on full bytes (no prefix).
82        bincode_legacy::deserialize(bytes)
83            .map_err(|e| CodecError(format!("bincode-1 legacy decode: {e}")))
84    }
85}
86
87// inline: exercises module-private items via super::*
88#[cfg(test)]
89mod tests {
90    use super::*;
91    use serde::{Deserialize, Serialize};
92
93    #[derive(Serialize, Deserialize, PartialEq, Debug, Clone)]
94    struct Sample {
95        cycle_index: u64,
96        actor: String,
97    }
98
99    #[derive(Serialize, Deserialize, PartialEq, Debug, Clone)]
100    struct SampleExtended {
101        cycle_index: u64,
102        actor: String,
103        #[serde(default)]
104        new_field: Option<String>,
105    }
106
107    /// AC3.test1 — encode emits magic prefix + version byte 2.
108    #[test]
109    fn encode_emits_magic_prefix_and_version() {
110        let s = Sample {
111            cycle_index: 42,
112            actor: "alice".into(),
113        };
114        let bytes = persist_encode(&s).unwrap();
115        assert_eq!(&bytes[..4], &CODEC_MAGIC, "first 4 bytes must be magic");
116        assert_eq!(bytes[4], CODEC_VERSION_V2, "5th byte must be version 2");
117        assert!(bytes.len() > 5, "must carry payload after header");
118    }
119
120    /// AC3.test2 — legacy bincode-1 bytes decode via legacy path.
121    #[test]
122    fn legacy_bincode_1_bytes_decode_via_legacy_path() {
123        let s = Sample {
124            cycle_index: 1,
125            actor: "bob".into(),
126        };
127        let legacy = bincode_legacy::serialize(&s).unwrap();
128        // legacy bytes carry no magic prefix
129        assert_ne!(&legacy[..4.min(legacy.len())], &CODEC_MAGIC);
130        let decoded: Sample = persist_decode(&legacy).expect("legacy path must decode");
131        assert_eq!(s, decoded);
132    }
133
134    /// AC3.test3 — collision-class legacy bytes (first byte = 0x02) decode via legacy path.
135    /// This is the v1.1-defect-fix proof: under v1.1's 1-byte first-byte dispatch, a struct
136    /// with leading `cycle_index: u64 = 2` would emit bincode-1 bytes starting with 0x02
137    /// and mis-dispatch to the v=2 bincode-2 path. v1.2's magic-prefix dispatch reads the
138    /// FULL 4-byte magic before considering v=2; the leading 0x02 byte alone (without
139    /// `[0xFF×4]` magic) routes correctly to legacy.
140    #[test]
141    fn collision_class_legacy_bytes_first_byte_0x02_decode_via_legacy_path() {
142        let s = Sample {
143            cycle_index: 2, // bincode-1 fixint LE u64=2 emits [02 00 00 00 00 00 00 00]
144            actor: "collision".into(),
145        };
146        let legacy = bincode_legacy::serialize(&s).unwrap();
147        assert_eq!(legacy[0], 0x02, "v1.1-defect collision class: first byte must be 0x02");
148        // v1.2 fix: persist_decode must recognize legacy (no [0xFF×4] magic) and dispatch
149        // to bincode-1, NOT mis-dispatch to v=2 path.
150        let decoded: Sample = persist_decode(&legacy)
151            .expect("collision-class legacy bytes must decode via legacy path under v1.2");
152        assert_eq!(s, decoded);
153    }
154
155    /// AC3.test4 — round-trip via wrapper (encode + decode = identity).
156    #[test]
157    fn round_trip_via_wrapper() {
158        let s = Sample {
159            cycle_index: 12345,
160            actor: "round-trip".into(),
161        };
162        let bytes = persist_encode(&s).unwrap();
163        let decoded: Sample = persist_decode(&bytes).unwrap();
164        assert_eq!(s, decoded);
165    }
166
167    /// Trailing-additive extension is NOT supported by the bincode-2 serde adapter.
168    ///
169    /// Both bincode-1 (positional) and bincode-2 serde adapter (also positional) fail with
170    /// `UnexpectedEnd` when decoding bytes lacking a trailing field, even if that field is
171    /// `#[serde(default)]`. The serde default applies in the Visitor lifecycle, AFTER the
172    /// decoder has already hit EOF on the missing bytes.
173    ///
174    /// This test is the regression guard for the discipline: trailing-additive
175    /// evolution requires explicit per-table migration, not automatic serde defaults.
176    #[test]
177    fn trailing_additive_via_v2_not_supported() {
178        let s = Sample {
179            cycle_index: 100,
180            actor: "trailing".into(),
181        };
182        let bytes = persist_encode(&s).unwrap();
183        let result: Result<SampleExtended, _> = persist_decode(&bytes);
184        assert!(
185            result.is_err(),
186            "bincode-2 serde adapter does NOT support trailing-additive: \
187             #[serde(default)] is not applied before UnexpectedEnd; \
188             per-table migration required"
189        );
190    }
191
192    /// Bonus: empty bytes are an error (not a silent success).
193    #[test]
194    fn empty_bytes_error() {
195        let result: Result<Sample, _> = persist_decode(&[]);
196        assert!(result.is_err());
197    }
198
199    /// Bonus: bytes starting with magic but unknown version → error.
200    #[test]
201    fn unknown_version_byte_error() {
202        let bytes = [0xFF, 0xFF, 0xFF, 0xFF, 0x99, 0x00, 0x00, 0x00];
203        let result: Result<Sample, _> = persist_decode(&bytes);
204        assert!(result.is_err());
205    }
206}