soma-som-core 0.1.0

Universal soma(som) structural primitives — Quad / Tree / Ring / Genesis / Fingerprint / TemporalLedger / CrossingRecord
Documentation
// SPDX-License-Identifier: LGPL-3.0-only
#![allow(missing_docs)]

//! Shared persistence codec — one implementation, used by all organs.
//!
//! Eliminates per-organ duplication of bincode serialize/deserialize wrappers.
//! Each organ converts `CodecError` into its own error type via `From<CodecError>`.
//!
//! # Wire format — magic-prefix discriminator
//!
//! ```text
//! encode:  [0xFF 0xFF 0xFF 0xFF 0x02][bincode-2 payload]
//! decode:  read first 4 bytes;
//!          if [0xFF 0xFF 0xFF 0xFF] → read 5th byte (version);
//!                                     dispatch (2 ⇒ bincode-2 path on bytes[5..]);
//!          else → legacy bincode-1 path on full bytes (including byte 0).
//! ```
//!
//! The 5-byte magic prefix `[0xFF×4 + version_byte]` is structurally impossible for
//! bincode-1 length-prefix u64 of redb-bounded String/Vec values (would require length
//! ≥ 2³² = 4GB; redb max value 4GB; typical persisted values KB-MB) and vanishingly
//! rare (~1/2³²) for fixint-LE u64/u32 leading scalar fields.
//!
//! Future codec evolutions reuse the same lever: `[0xFF×4 + version_byte]` with
//! `version_byte = 3` for postcard, etc.

use serde::{Serialize, de::DeserializeOwned};

/// Magic prefix that disambiguates v=2+ payloads from legacy bincode-1 bytes.
const CODEC_MAGIC: [u8; 4] = [0xFF, 0xFF, 0xFF, 0xFF];

/// Current codec version (bincode 2 with serde adapter, default config).
const CODEC_VERSION_V2: u8 = 2;

/// Error from encode/decode operations.
#[derive(Debug)]
pub struct CodecError(pub String);

impl std::fmt::Display for CodecError {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "codec error: {}", self.0)
    }
}

impl std::error::Error for CodecError {}

/// Encode a value to bytes using the workspace persistence codec.
///
/// Output format: `[0xFF 0xFF 0xFF 0xFF 0x02][bincode-2 payload]`.
pub fn persist_encode<T: Serialize>(value: &T) -> Result<Vec<u8>, CodecError> {
    let payload = bincode::serde::encode_to_vec(value, bincode::config::standard())
        .map_err(|e| CodecError(format!("bincode-2 encode: {e}")))?;
    let mut out = Vec::with_capacity(5 + payload.len());
    out.extend_from_slice(&CODEC_MAGIC);
    out.push(CODEC_VERSION_V2);
    out.extend_from_slice(&payload);
    Ok(out)
}

/// Decode bytes into a value using the workspace persistence codec.
///
/// Dispatch: 4-byte magic check + 5th-byte version dispatch (v=2 ⇒ bincode-2 on
/// bytes[5..]); else legacy bincode-1 on full bytes (v=0 implicit).
pub fn persist_decode<T: DeserializeOwned>(bytes: &[u8]) -> Result<T, CodecError> {
    if bytes.len() >= 5 && bytes.get(..4).is_some_and(|s| s == CODEC_MAGIC) {
        let version = bytes.get(4).copied().unwrap_or(0);
        match version {
            CODEC_VERSION_V2 => {
                let (value, _consumed) = bincode::serde::decode_from_slice::<T, _>(
                    bytes.get(5..).unwrap_or_default(),
                    bincode::config::standard(),
                )
                .map_err(|e| CodecError(format!("bincode-2 decode: {e}")))?;
                Ok(value)
            }
            other => Err(CodecError(format!(
                "unknown codec version byte: 0x{other:02x} (expected 0x{CODEC_VERSION_V2:02x})"
            ))),
        }
    } else {
        // Legacy v=0 path: bincode-1 deserialize on full bytes (no prefix).
        bincode_legacy::deserialize(bytes)
            .map_err(|e| CodecError(format!("bincode-1 legacy decode: {e}")))
    }
}

// inline: exercises module-private items via super::*
#[cfg(test)]
mod tests {
    use super::*;
    use serde::{Deserialize, Serialize};

    #[derive(Serialize, Deserialize, PartialEq, Debug, Clone)]
    struct Sample {
        cycle_index: u64,
        actor: String,
    }

    #[derive(Serialize, Deserialize, PartialEq, Debug, Clone)]
    struct SampleExtended {
        cycle_index: u64,
        actor: String,
        #[serde(default)]
        new_field: Option<String>,
    }

    /// AC3.test1 — encode emits magic prefix + version byte 2.
    #[test]
    fn encode_emits_magic_prefix_and_version() {
        let s = Sample {
            cycle_index: 42,
            actor: "alice".into(),
        };
        let bytes = persist_encode(&s).unwrap();
        assert_eq!(&bytes[..4], &CODEC_MAGIC, "first 4 bytes must be magic");
        assert_eq!(bytes[4], CODEC_VERSION_V2, "5th byte must be version 2");
        assert!(bytes.len() > 5, "must carry payload after header");
    }

    /// AC3.test2 — legacy bincode-1 bytes decode via legacy path.
    #[test]
    fn legacy_bincode_1_bytes_decode_via_legacy_path() {
        let s = Sample {
            cycle_index: 1,
            actor: "bob".into(),
        };
        let legacy = bincode_legacy::serialize(&s).unwrap();
        // legacy bytes carry no magic prefix
        assert_ne!(&legacy[..4.min(legacy.len())], &CODEC_MAGIC);
        let decoded: Sample = persist_decode(&legacy).expect("legacy path must decode");
        assert_eq!(s, decoded);
    }

    /// AC3.test3 — collision-class legacy bytes (first byte = 0x02) decode via legacy path.
    /// This is the v1.1-defect-fix proof: under v1.1's 1-byte first-byte dispatch, a struct
    /// with leading `cycle_index: u64 = 2` would emit bincode-1 bytes starting with 0x02
    /// and mis-dispatch to the v=2 bincode-2 path. v1.2's magic-prefix dispatch reads the
    /// FULL 4-byte magic before considering v=2; the leading 0x02 byte alone (without
    /// `[0xFF×4]` magic) routes correctly to legacy.
    #[test]
    fn collision_class_legacy_bytes_first_byte_0x02_decode_via_legacy_path() {
        let s = Sample {
            cycle_index: 2, // bincode-1 fixint LE u64=2 emits [02 00 00 00 00 00 00 00]
            actor: "collision".into(),
        };
        let legacy = bincode_legacy::serialize(&s).unwrap();
        assert_eq!(legacy[0], 0x02, "v1.1-defect collision class: first byte must be 0x02");
        // v1.2 fix: persist_decode must recognize legacy (no [0xFF×4] magic) and dispatch
        // to bincode-1, NOT mis-dispatch to v=2 path.
        let decoded: Sample = persist_decode(&legacy)
            .expect("collision-class legacy bytes must decode via legacy path under v1.2");
        assert_eq!(s, decoded);
    }

    /// AC3.test4 — round-trip via wrapper (encode + decode = identity).
    #[test]
    fn round_trip_via_wrapper() {
        let s = Sample {
            cycle_index: 12345,
            actor: "round-trip".into(),
        };
        let bytes = persist_encode(&s).unwrap();
        let decoded: Sample = persist_decode(&bytes).unwrap();
        assert_eq!(s, decoded);
    }

    /// Trailing-additive extension is NOT supported by the bincode-2 serde adapter.
    ///
    /// Both bincode-1 (positional) and bincode-2 serde adapter (also positional) fail with
    /// `UnexpectedEnd` when decoding bytes lacking a trailing field, even if that field is
    /// `#[serde(default)]`. The serde default applies in the Visitor lifecycle, AFTER the
    /// decoder has already hit EOF on the missing bytes.
    ///
    /// This test is the regression guard for the discipline: trailing-additive
    /// evolution requires explicit per-table migration, not automatic serde defaults.
    #[test]
    fn trailing_additive_via_v2_not_supported() {
        let s = Sample {
            cycle_index: 100,
            actor: "trailing".into(),
        };
        let bytes = persist_encode(&s).unwrap();
        let result: Result<SampleExtended, _> = persist_decode(&bytes);
        assert!(
            result.is_err(),
            "bincode-2 serde adapter does NOT support trailing-additive: \
             #[serde(default)] is not applied before UnexpectedEnd; \
             per-table migration required"
        );
    }

    /// Bonus: empty bytes are an error (not a silent success).
    #[test]
    fn empty_bytes_error() {
        let result: Result<Sample, _> = persist_decode(&[]);
        assert!(result.is_err());
    }

    /// Bonus: bytes starting with magic but unknown version → error.
    #[test]
    fn unknown_version_byte_error() {
        let bytes = [0xFF, 0xFF, 0xFF, 0xFF, 0x99, 0x00, 0x00, 0x00];
        let result: Result<Sample, _> = persist_decode(&bytes);
        assert!(result.is_err());
    }
}