loom_snapshot 0.1.2

Deterministic, cross-language world-state snapshot + replay anchor for the Loom Engine.
Documentation
//! loom_snapshot - deterministic, cross-language world-state snapshot hash (Rust core).
//!
//! The native sibling of the TS `world-state-snapshot.ts`. v3.0 Phase 1 of the
//! Living Persistent World: a world state reduces to a `state_hash` that is
//! BYTE-IDENTICAL across TypeScript, Python, and Rust, so a snapshot at a known
//! event index can be persisted compactly, verified against the HMAC event chain
//! on resume, and compared across languages to prove no surface diverged.
//!
//! REUSE, not re-implementation. The hash rides `loom_events`' audited,
//! golden-vector-pinned primitives:
//!   state_hash = hmac_sha256_hex(key, field(SNAPSHOT_DOMAIN) + field(canonical_json(state)))
//! so it inherits cross-language byte-parity for free and adds no new canonical
//! or crypto surface. Parity is proven by the shared golden vector
//! (test_vectors/v3_0_snapshot_canonical.json), which is generated by the real
//! TS and asserted here byte-for-byte.
//!
//! DESIGN (reconciled with the Pantheon, matches the TS module):
//!   - The state_hash is a PURE content hash; the event index is version metadata
//!     stored alongside in `WorldStateSnapshot`, never folded into the hash.
//!   - ONE sort rule everywhere: UTF-16 code units. Object keys are sorted by
//!     `canonical_json` (UTF-16); `normalize_tags` sorts tags by the SAME
//!     comparator (`encode_utf16`), NOT Rust's native `str` Ord (UTF-8 bytes).
//!   - Fail-closed: `canonical_json` returns `Err(CanonError)` on any
//!     non-canonical value (non-integer / -0 / unsafe-int / `__proto__` /
//!     over-depth) before a hash is produced.
//!
//! Domain separation: `SNAPSHOT_DOMAIN` is distinct from the event-chain record
//! ("loom.chain.rec/1") and seal ("loom.chain.seal/1") domains, so a snapshot
//! hash can never be reinterpreted as a chain-record signature.

use loom_events::{canonical_json, field, hmac_sha256_hex, CanonError};
use serde_json::Value;
use std::collections::HashSet;

/// Namespace tag for snapshot HMACs. The trailing /1 is a format version.
/// MUST match the TS `SNAPSHOT_DOMAIN` verbatim.
pub const SNAPSHOT_DOMAIN: &str = "loom.snapshot/1";

/// A snapshot commitment: WHICH event index, and the pure content hash of the
/// state at that index.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct WorldStateSnapshot {
    /// The chain event index this snapshot commits to (version metadata).
    pub event_index: u64,
    /// HMAC-SHA-256 hex of the domain-framed canonical world state.
    pub state_hash: String,
}

/// The exact (injective) string fed to HMAC for a snapshot. Length-prefixed +
/// domain-separated via `field()`, identical to the TS `snapshotMessage`. The
/// event index is intentionally NOT included - the hash is a pure function of
/// the state.
fn snapshot_message(state: &Value) -> Result<String, CanonError> {
    let mut msg = String::new();
    msg.push_str(&field(SNAPSHOT_DOMAIN));
    msg.push_str(&field(&canonical_json(state, 0)?));
    Ok(msg)
}

/// The canonical (deterministic, injective) JSON encoding of a world state.
/// Byte-identical to the TS `canonicalWorldState`.
pub fn canonical_world_state(state: &Value) -> Result<String, CanonError> {
    canonical_json(state, 0)
}

/// Compute the pure content hash of a world state. Cross-language byte-identical
/// to the TS `worldStateHash`. `Err` fail-closed on any non-canonical state.
pub fn world_state_hash(key: &[u8], state: &Value) -> Result<String, CanonError> {
    Ok(hmac_sha256_hex(key, &snapshot_message(state)?))
}

/// Per-region leaf hashes `{ regionId: regionHash }` (a region is a world-state-shaped
/// partition). Byte-identical to the TS `regionLeaves`; the leaf order is irrelevant
/// because the map is key-sorted by `canonical_json` when it is hashed.
pub fn region_leaves(key: &[u8], regions: &Value) -> Result<Value, CanonError> {
    let mut leaves = serde_json::Map::new();
    if let Some(obj) = regions.as_object() {
        for (id, state) in obj {
            leaves.insert(id.clone(), Value::String(world_state_hash(key, state)?));
        }
    }
    Ok(Value::Object(leaves))
}

/// The GLOBAL region hash: HMAC over the canonical map of region leaf hashes (the
/// 2-level Merkle root). Byte-identical to the TS `globalRegionHash`. Interest
/// management: a partial-sync client verifies its own region leaf + this root without
/// the full world.
pub fn global_region_hash(key: &[u8], regions: &Value) -> Result<String, CanonError> {
    let leaves = region_leaves(key, regions)?;
    world_state_hash(key, &leaves)
}

/// The max event index that survives a JSON round-trip into JS
/// (Number.MAX_SAFE_INTEGER, 2^53-1). The TS `snapshotWorldState` rejects an
/// unsafe event index at creation; Rust matches it (audit P1) - otherwise a
/// Rust-made snapshot's index could silently round on the JS side and select the
/// wrong replay tail even though state_hash itself is content-only.
pub const MAX_SAFE_EVENT_INDEX: u64 = 9_007_199_254_740_991;

/// Take a snapshot: the `(event_index, state_hash)` commitment. Fails closed if
/// `event_index` exceeds the JS-safe range (so the metadata stays portable
/// across the TS / Python / Rust surfaces).
pub fn snapshot_world_state(
    key: &[u8],
    state: &Value,
    event_index: u64,
) -> Result<WorldStateSnapshot, CanonError> {
    if event_index > MAX_SAFE_EVENT_INDEX {
        return Err(CanonError::UnsafeInteger);
    }
    Ok(WorldStateSnapshot {
        event_index,
        state_hash: world_state_hash(key, state)?,
    })
}

/// Verify a world matches an expected snapshot hash. Constant-time over the hex
/// (no early-exit timing leak), so it is safe as an integrity gate on an
/// untrusted resumed snapshot: a mismatch means do NOT trust the state.
pub fn verify_world_snapshot(
    key: &[u8],
    state: &Value,
    expected_hash: &str,
) -> Result<bool, CanonError> {
    let actual = world_state_hash(key, state)?;
    Ok(ct_eq(actual.as_bytes(), expected_hash.as_bytes()))
}

/// Constant-time byte compare (no early exit). Used for the snapshot-hash gate.
fn ct_eq(a: &[u8], b: &[u8]) -> bool {
    if a.len() != b.len() {
        return false;
    }
    let mut diff: u8 = 0;
    for i in 0..a.len() {
        diff |= a[i] ^ b[i];
    }
    diff == 0
}

/// Deterministically de-duplicate + sort a tag list by UTF-16 code unit - the
/// SAME ordering `canonical_json` uses for object keys (ONE sort rule). Do NOT
/// use Rust's native `slice::sort` on `String` here: that is UTF-8 byte order,
/// which diverges from the TS `normalizeTags` (and JS key sort) on astral chars.
pub fn normalize_tags(tags: &[String]) -> Vec<String> {
    let mut seen: HashSet<&str> = HashSet::new();
    let mut out: Vec<String> = Vec::new();
    for t in tags {
        if seen.insert(t.as_str()) {
            out.push(t.clone());
        }
    }
    out.sort_by(|a, b| a.encode_utf16().cmp(b.encode_utf16()));
    out
}

#[cfg(test)]
mod tests {
    use super::*;
    use serde_json::json;

    #[test]
    fn normalize_tags_utf16_dedupe() {
        let astral = "\u{1F40D}"; // UTF-16 lead 0xD83D
        let bmp = "\u{F8FF}"; // BMP 0xF8FF
        // UTF-16: 'a' (0x61) < astral (0xD83D) < bmp (0xF8FF).
        let got = normalize_tags(&[
            bmp.to_string(),
            astral.to_string(),
            "a".to_string(),
            "a".to_string(),
        ]);
        assert_eq!(got, vec!["a".to_string(), astral.to_string(), bmp.to_string()]);
    }

    #[test]
    fn verify_matches_and_rejects() {
        let key = b"runtime-secret";
        let state = json!({"epoch": 1, "worldSeed": 2, "entities": {}});
        let h = world_state_hash(key, &state).unwrap();
        assert!(verify_world_snapshot(key, &state, &h).unwrap());
        let tampered = json!({"epoch": 1, "worldSeed": 3, "entities": {}});
        assert!(!verify_world_snapshot(key, &tampered, &h).unwrap());
        assert!(!verify_world_snapshot(b"other", &state, &h).unwrap());
    }

    #[test]
    fn fail_closed_on_non_integer() {
        let key = b"k";
        assert!(world_state_hash(key, &json!({"x": 1.5})).is_err());
    }

    #[test]
    fn rejects_unsafe_event_index() {
        // Audit P1: a Rust event index past 2^53-1 would round on a JS round-trip.
        let key = b"k";
        let state = json!({"entities": {}});
        assert!(snapshot_world_state(key, &state, MAX_SAFE_EVENT_INDEX).is_ok());
        assert!(snapshot_world_state(key, &state, MAX_SAFE_EVENT_INDEX + 1).is_err());
    }

    #[test]
    fn insertion_order_irrelevant() {
        let key = b"k";
        let a = json!({"entities": {"x": {"properties": {"b": 2, "a": 1}}}});
        let b = json!({"entities": {"x": {"properties": {"a": 1, "b": 2}}}});
        assert_eq!(
            world_state_hash(key, &a).unwrap(),
            world_state_hash(key, &b).unwrap()
        );
    }
}