Skip to main content

graphrefly_storage/
wal.rs

1//! WAL frame substrate (Phase 14.6 — DS-14-storage Q1+Q3+Q5 locks, M4.A
2//! 2026-05-10).
3//!
4//! On-disk frame format consumed by `Graph::restore_snapshot({ mode:"diff" })`
5//! (M4.E). Each frame decomposes a single graph diff into one DS-14
6//! [`BaseChange<T>`] envelope per structural-or-value change, scoped by
7//! [`Lifecycle`] so callers can narrow rewinds.
8//!
9//! The TS reference impl lives at
10//! `packages/pure-ts/src/extra/storage/wal.ts`. Field names + checksum
11//! algorithm are parity-locked — both impls produce byte-identical hex SHA-256
12//! over the same canonical-JSON encoding of a frame body.
13//!
14//! # Canonical-JSON parity
15//!
16//! TS's `stableJsonString` (`packages/pure-ts/src/extra/storage/core.ts:22-39`)
17//! is "recursively sort object keys, then `JSON.stringify(_, undefined, 0)`".
18//! The Rust port mirrors this by routing the frame body through
19//! [`serde_json::to_value`] (lands in `serde_json::Map` which is `BTreeMap` by
20//! default — sorted iteration) then [`serde_json::to_string`]. Output is
21//! byte-identical to TS for the WAL frame schema: ASCII keys, integer
22//! numerics, no floats.
23//!
24//! **Parity caveats** (lift when a real consumer surfaces):
25//! - String VALUES containing surrogate-pair code points (≥ U+10000): JS sorts
26//!   keys by UTF-16 code-unit order; Rust `BTreeMap` sorts by UTF-8 byte order.
27//!   For ASCII keys these agree; for non-BMP keys they don't. The frame
28//!   schema's keys are ASCII so this can only bite if `path` or
29//!   `change.structure` contains non-BMP code points — neither is expected
30//!   for graph identifiers.
31//! - Float-typed user payloads: JS `JSON.stringify` uses IEEE 754 with
32//!   shortest-decimal-round-trip; Rust's `serde_json` uses `ryu` which agrees
33//!   on finite f64 in safe range but may diverge on subnormals. WAL frames
34//!   typically carry integer-only data; if a user puts a float in
35//!   `change.change`, document the constraint.
36//!
37//! # Checksum
38//!
39//! SHA-256 over canonical-JSON of the frame body (everything except the
40//! `checksum` field itself), encoded as a 64-char lowercase hex string.
41//! Spec-locked at `GRAPHREFLY-SPEC.md:1201-1206` — original BLAKE3 lock was
42//! revised to SHA-256 so the TS impl could stay zero-dep (no BLAKE3 in
43//! `WebCrypto`). Rust matches via `sha2` + `hex`.
44
45use serde::{Deserialize, Serialize};
46use sha2::{Digest, Sha256};
47
48use graphrefly_structures::{BaseChange, Lifecycle};
49
50// ── WAL frame envelope ─────────────────────────────────────────────────────
51
52/// On-disk WAL frame (DS-14-storage Q1 lock).
53///
54/// Two seq fields and two timestamp fields are intentional:
55/// - [`Self::frame_seq`] ≠ `change.seq`: latter is the bundle's `mutations`
56///   cursor (DS-14 T1); former is the WAL tier's own cursor (this record's
57///   position in the WAL stream). Replay uses `frame_seq` for ordering;
58///   `change.seq` is only relevant for bundle-level cursor restoration.
59/// - [`Self::frame_t_ns`] ≠ `change.t_ns`: latter is wall-clock at mutation
60///   entry; former is wall-clock at WAL-write time. Under debounced tiers
61///   they differ by `debounce_ms`.
62///
63/// The bridge wire format (DS-14 PART 5 worker bridge) is the schema-narrowed
64/// subset `{ t, lifecycle, path, change }` — this struct is the
65/// persistence-tier superset (DS-14-storage L3 lock).
66#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
67pub struct WALFrame<T> {
68    /// Bridge tag — discriminator shared with the DS-14 worker-bridge wire
69    /// format. Always `"c"`; allocated as `String` for parity with the TS
70    /// wire shape (TS uses a literal `"c"` value).
71    pub t: WalTag,
72    /// Lifecycle scope (DS-14 PART 4). Determines replay phase ordering.
73    pub lifecycle: Lifecycle,
74    /// Target node / bundle path (per-graph qualified path).
75    pub path: String,
76    /// DS-14 universal [`BaseChange<T>`] envelope — structure-tagged delta.
77    pub change: BaseChange<T>,
78    /// WAL-tier monotonic cursor (uniquely owned by the WAL tier writer).
79    pub frame_seq: u64,
80    /// Wall-clock at WAL-write time (matches `wall_clock_ns()`).
81    pub frame_t_ns: u64,
82    /// SHA-256 over the canonical-JSON of the frame body sans `checksum`,
83    /// encoded as a 64-char lowercase hex string. Hex (vs raw bytes) keeps
84    /// the wire format JSON-codec-friendly. M4.A parity-fixture asserts
85    /// byte-equivalence against the TS impl.
86    #[serde(default)]
87    pub checksum: String,
88    /// Codec version tag. All M4.A frames are implicitly version 1
89    /// (JSON codec). Defaults to `1` for backward-compatible deserialization
90    /// of frames written before this field was added.
91    #[serde(default = "default_format_version")]
92    pub format_version: u32,
93}
94
95fn default_format_version() -> u32 {
96    1
97}
98
99/// Singleton-string discriminator for the bridge wire-format tag. Always
100/// serializes / deserializes as `"c"`; rejects any other value at parse time.
101#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
102pub struct WalTag;
103
104impl WalTag {
105    pub const VALUE: &'static str = "c";
106}
107
108impl Serialize for WalTag {
109    fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
110        serializer.serialize_str(Self::VALUE)
111    }
112}
113
114impl<'de> Deserialize<'de> for WalTag {
115    fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
116        let s = String::deserialize(deserializer)?;
117        if s == Self::VALUE {
118            Ok(WalTag)
119        } else {
120            Err(serde::de::Error::custom(format!(
121                "WALFrame.t must be {:?}, got {:?}",
122                Self::VALUE,
123                s
124            )))
125        }
126    }
127}
128
129// ── Key format (Q5) ────────────────────────────────────────────────────────
130
131/// Default WAL prefix segment relative to a `graph.name`. Frames land at
132/// `${graph.name}/${WAL_KEY_SEGMENT}/${frame_seq:020}`.
133pub const WAL_KEY_SEGMENT: &str = "wal";
134
135/// Pad width for `frame_seq` in WAL keys. 20 digits keeps lex-ASC string sort
136/// = numeric ASC up to `frame_seq < 10^20` (well past `u64::MAX`).
137pub const WAL_FRAME_SEQ_PAD: usize = 20;
138
139/// Build the canonical WAL frame key. `prefix` is the WAL-prefix portion (e.g.
140/// `"my-graph/wal"`); `frame_seq` is the per-frame cursor. Zero-padded so
141/// lex-ASC string sort equals numeric ASC sort.
142#[must_use]
143pub fn wal_frame_key(prefix: &str, frame_seq: u64) -> String {
144    format!("{prefix}/{frame_seq:020}")
145}
146
147/// Default WAL key prefix for a graph by its `name`.
148#[must_use]
149pub fn graph_wal_prefix(graph_name: &str) -> String {
150    format!("{graph_name}/{WAL_KEY_SEGMENT}")
151}
152
153// ── Replay order (Q2) ──────────────────────────────────────────────────────
154
155/// Cross-scope replay order (DS-14 PART 4 lock — `Spec → Data → Ownership`).
156/// Exported so the replay implementation and parity tests share one source of
157/// truth.
158pub const REPLAY_ORDER: [Lifecycle; 3] = [Lifecycle::Spec, Lifecycle::Data, Lifecycle::Ownership];
159
160// ── Checksum ───────────────────────────────────────────────────────────────
161
162/// Errors surfaced by checksum compute / verify.
163#[derive(Debug, thiserror::Error)]
164pub enum ChecksumError {
165    /// `serde_json` rejected the frame body — typically a non-serializable
166    /// payload (e.g. a `Map` with non-string keys, an `f64::NAN`).
167    #[error("canonical JSON encoding failed: {0}")]
168    CanonicalJsonFailed(#[from] serde_json::Error),
169    /// Frame body contains content that cannot round-trip cross-impl through
170    /// canonical JSON. Specifically: non-ASCII object keys (JS sorts by UTF-16
171    /// code-unit order; Rust `BTreeMap` sorts by UTF-8 byte order — these
172    /// diverge for keys containing surrogate-pair code points), or subnormal
173    /// f64 values (JS `JSON.stringify` and Rust `serde_json` via `ryu` may
174    /// diverge on denormalized floats). Banned at the WAL encode boundary
175    /// (B1 — option a) rather than allowed-with-silent-divergence.
176    #[error("non-canonical content rejected by WAL encoder: {reason}")]
177    NonCanonicalContent { reason: String },
178}
179
180/// Body fields contributing to the checksum, in the shape TS computes over
181/// (TS's `canonicalFrameBody` at `wal.ts:141`). The `checksum` field of the
182/// outer [`WALFrame`] is deliberately excluded.
183#[derive(Serialize)]
184struct ChecksumBody<'a, T: Serialize> {
185    t: &'static str,
186    lifecycle: &'a Lifecycle,
187    path: &'a str,
188    change: &'a BaseChange<T>,
189    frame_seq: u64,
190    frame_t_ns: u64,
191}
192
193/// Encode a typed value to canonical JSON (sorted keys, no whitespace).
194/// Routes through [`serde_json::Value`] so the resulting `serde_json::Map<
195/// String, Value>` (BTreeMap-backed by default) iterates in sorted-key order
196/// — byte-identical to TS `stableJsonString` on the WAL schema.
197///
198/// **Cross-impl encode guard (B1 — option a).** After conversion to
199/// [`serde_json::Value`], the tree is walked once and rejected via
200/// [`ChecksumError::NonCanonicalContent`] if it contains any input that
201/// cannot round-trip byte-identically through TS canonical JSON:
202///
203/// - **Non-ASCII object keys** — JS sorts by UTF-16 code-unit order; Rust
204///   `BTreeMap` sorts by UTF-8 byte order. For keys containing surrogate-pair
205///   code points (≥ U+10000) these diverge. WAL frame schema keys are ASCII
206///   by spec; user-supplied identifiers in `path` or `change.structure`
207///   are NOT — this guard surfaces the divergence at write time rather than
208///   letting checksums silently mismatch on cross-impl replay.
209/// - **Subnormal f64** — JS `JSON.stringify` and Rust `serde_json` (via `ryu`)
210///   may format denormalized floats differently; subnormals are vanishingly
211///   rare in graphrefly payloads (integer counters dominate) but the
212///   divergence is theoretical, so the strict guard refuses them.
213/// - **NaN / ±Infinity** — already rejected at `serde_json::Number::from_f64`
214///   conversion (returns `None`); the resulting `serde_json::Error` flows
215///   through the existing `CanonicalJsonFailed` variant.
216fn canonical_json<T: Serialize>(value: &T) -> Result<String, ChecksumError> {
217    let v = serde_json::to_value(value)?;
218    validate_canonical(&v, 0)?;
219    serde_json::to_string(&v).map_err(ChecksumError::from)
220}
221
222/// /qa G2.6 (2026-05-22): depth cap. graphrefly payloads are bounded
223/// (`BaseChange<T>` envelopes), but `serde_json::Value` is the escape
224/// hatch — a misbehaving binding could synthesize a deeply-nested tree
225/// and overflow the recursion stack inside this validator. 128 levels
226/// matches `serde_json::de::Deserializer`'s default recursion limit; a
227/// value tree exceeding it cannot have been parsed from JSON and is
228/// rejected at the canonical-encode boundary instead of crashing the
229/// process.
230const VALIDATE_CANONICAL_MAX_DEPTH: u32 = 128;
231
232/// Recursively validate a `Value` tree against the canonical-encode guard.
233/// See [`canonical_json`] for the rejection criteria.
234fn validate_canonical(v: &serde_json::Value, depth: u32) -> Result<(), ChecksumError> {
235    if depth > VALIDATE_CANONICAL_MAX_DEPTH {
236        return Err(ChecksumError::NonCanonicalContent {
237            reason: format!(
238                "JSON nesting depth exceeds {VALIDATE_CANONICAL_MAX_DEPTH} \
239                 (matches serde_json's default deserialization recursion limit; \
240                 deeper trees cannot round-trip through standard JSON parsers)"
241            ),
242        });
243    }
244    match v {
245        serde_json::Value::Object(map) => {
246            for (k, child) in map {
247                if !k.is_ascii() {
248                    return Err(ChecksumError::NonCanonicalContent {
249                        reason: format!(
250                            "non-ASCII object key {k:?} \
251                             (JS sorts UTF-16 code units, Rust sorts UTF-8 bytes — \
252                             divergent for code points ≥ U+10000)"
253                        ),
254                    });
255                }
256                validate_canonical(child, depth + 1)?;
257            }
258            Ok(())
259        }
260        serde_json::Value::Array(arr) => arr
261            .iter()
262            .try_for_each(|child| validate_canonical(child, depth + 1)),
263        serde_json::Value::Number(n) => {
264            if let Some(f) = n.as_f64() {
265                // `serde_json::Number::from_f64` already rejects NaN/Inf, so
266                // any f64 reaching this point is finite. Subnormals slip
267                // through finite checks but format-differ between TS and Rust.
268                //
269                // /qa G2.5 (2026-05-22): the prior guard `f != 0.0 && !f.is_normal()`
270                // accepted `-0.0` because `-0.0 == 0.0` is true — but
271                // `JSON.stringify(-0.0)` is `"0"` while `serde_json` emits
272                // `"-0.0"`. That is EXACTLY the divergence the guard exists
273                // to catch. Discriminate via the raw bit pattern: positive
274                // zero is `0x0000_0000_0000_0000`; any other bit pattern
275                // (including `0x8000_0000_0000_0000` for `-0.0` and all
276                // subnormals) is rejected.
277                let bits = f.to_bits();
278                if bits != 0 && !f.is_normal() {
279                    return Err(ChecksumError::NonCanonicalContent {
280                        reason: format!(
281                            "non-canonical f64 {f:e} (bits={bits:#018x}) \
282                             (rejects -0.0 + subnormals — JS `JSON.stringify` \
283                             and Rust `serde_json` may format these differently)"
284                        ),
285                    });
286                }
287            }
288            Ok(())
289        }
290        // String values can carry arbitrary UTF-8 — the canonical-JSON rule
291        // only constrains *keys* (which control sort order); string values
292        // serialize identically across impls via `\uXXXX` for non-ASCII.
293        serde_json::Value::String(_) | serde_json::Value::Bool(_) | serde_json::Value::Null => {
294            Ok(())
295        }
296    }
297}
298
299/// Compute the SHA-256 checksum over a frame's body (sans `checksum`),
300/// returning a 64-char lowercase hex string. Parity-locked with TS
301/// `walFrameChecksum`.
302pub fn wal_frame_checksum<T: Serialize>(frame: &WALFrame<T>) -> Result<String, ChecksumError> {
303    let body = ChecksumBody {
304        t: WalTag::VALUE,
305        lifecycle: &frame.lifecycle,
306        path: frame.path.as_str(),
307        change: &frame.change,
308        frame_seq: frame.frame_seq,
309        frame_t_ns: frame.frame_t_ns,
310    };
311    let canonical = canonical_json(&body)?;
312    let digest = Sha256::digest(canonical.as_bytes());
313    Ok(hex::encode(digest))
314}
315
316/// Verify a frame's `checksum` field matches its body. Replay invokes this at
317/// the WAL tail (drop on mismatch by default) and mid-stream (abort on
318/// mismatch by default) per Q3.
319pub fn verify_wal_frame_checksum<T: Serialize>(frame: &WALFrame<T>) -> Result<bool, ChecksumError> {
320    let expected = wal_frame_checksum(frame)?;
321    Ok(frame.checksum == expected)
322}
323
324// ── Tests ──────────────────────────────────────────────────────────────────
325
326#[cfg(test)]
327mod tests {
328    use super::*;
329    use graphrefly_structures::Version;
330
331    fn sample_frame() -> WALFrame<u64> {
332        WALFrame {
333            t: WalTag,
334            lifecycle: Lifecycle::Data,
335            path: "root/state".into(),
336            change: BaseChange {
337                structure: "graphValue".into(),
338                version: Version::Counter(1),
339                t_ns: 1_700_000_000_000,
340                seq: Some(0),
341                lifecycle: Lifecycle::Data,
342                change: 42,
343            },
344            frame_seq: 17,
345            frame_t_ns: 1_700_000_001_000,
346            checksum: String::new(),
347            format_version: 1,
348        }
349    }
350
351    #[test]
352    fn wal_frame_key_zero_pads_to_20_digits() {
353        assert_eq!(wal_frame_key("g/wal", 0), "g/wal/00000000000000000000",);
354        assert_eq!(wal_frame_key("g/wal", 17), "g/wal/00000000000000000017",);
355        assert_eq!(
356            wal_frame_key("g/wal", u64::MAX),
357            format!("g/wal/{:020}", u64::MAX),
358        );
359    }
360
361    #[test]
362    fn wal_frame_key_lex_sort_equals_numeric_sort() {
363        // Build keys for 0, 1, 10, 100, u64::MAX. Sort lex; assert numeric
364        // order is preserved (the core invariant `frame_seq` ASC = lex ASC).
365        let seqs = [0u64, 1, 10, 100, 1_000_000, u64::MAX];
366        let mut keys: Vec<String> = seqs.iter().map(|s| wal_frame_key("g/wal", *s)).collect();
367        keys.sort();
368        for (k, expected) in keys.iter().zip(seqs.iter()) {
369            assert!(
370                k.ends_with(&format!("{expected:020}")),
371                "lex-sort key {k} did not match numeric order for {expected}",
372            );
373        }
374    }
375
376    #[test]
377    fn graph_wal_prefix_joins_with_segment() {
378        assert_eq!(graph_wal_prefix("my-graph"), "my-graph/wal");
379    }
380
381    #[test]
382    fn checksum_roundtrip_verifies() {
383        let mut frame = sample_frame();
384        frame.checksum = wal_frame_checksum(&frame).unwrap();
385        assert!(verify_wal_frame_checksum(&frame).unwrap());
386    }
387
388    #[test]
389    fn checksum_tamper_change_payload_fails_verify() {
390        let mut frame = sample_frame();
391        frame.checksum = wal_frame_checksum(&frame).unwrap();
392        frame.change.change = 43; // tamper the user payload
393        assert!(!verify_wal_frame_checksum(&frame).unwrap());
394    }
395
396    #[test]
397    fn checksum_tamper_path_fails_verify() {
398        let mut frame = sample_frame();
399        frame.checksum = wal_frame_checksum(&frame).unwrap();
400        frame.path = "different/path".into();
401        assert!(!verify_wal_frame_checksum(&frame).unwrap());
402    }
403
404    #[test]
405    fn checksum_tamper_frame_seq_fails_verify() {
406        let mut frame = sample_frame();
407        frame.checksum = wal_frame_checksum(&frame).unwrap();
408        frame.frame_seq = 18;
409        assert!(!verify_wal_frame_checksum(&frame).unwrap());
410    }
411
412    #[test]
413    fn checksum_excludes_checksum_field_itself() {
414        let mut frame = sample_frame();
415        frame.checksum = "deadbeef".repeat(8);
416        let first = wal_frame_checksum(&frame).unwrap();
417        frame.checksum = "00".repeat(32);
418        let second = wal_frame_checksum(&frame).unwrap();
419        assert_eq!(
420            first, second,
421            "wal_frame_checksum must not depend on the existing checksum field",
422        );
423    }
424
425    #[test]
426    fn checksum_is_64_char_lowercase_hex() {
427        let mut frame = sample_frame();
428        frame.checksum = wal_frame_checksum(&frame).unwrap();
429        assert_eq!(frame.checksum.len(), 64);
430        assert!(
431            frame
432                .checksum
433                .chars()
434                .all(|c| matches!(c, '0'..='9' | 'a'..='f')),
435            "checksum must be lowercase hex: {}",
436            frame.checksum,
437        );
438    }
439
440    #[test]
441    fn wal_tag_serializes_as_string_c() {
442        let s = serde_json::to_string(&WalTag).unwrap();
443        assert_eq!(s, "\"c\"");
444    }
445
446    #[test]
447    fn wal_tag_rejects_other_values() {
448        let r: Result<WalTag, _> = serde_json::from_str("\"x\"");
449        assert!(r.is_err(), "WalTag must reject non-c discriminators");
450    }
451
452    #[test]
453    fn canonical_json_sorts_keys() {
454        // Canonical-JSON sanity check on a struct with declaration order
455        // OPPOSITE to alphabetical: `zebra, monkey, apple`. The emitted JSON
456        // must list keys in alphabetical order regardless of declaration
457        // order (mirrors TS `stableJsonString` recursive key sort). Single
458        // level only so `find` matches unambiguously.
459        #[derive(Serialize)]
460        struct Flat {
461            zebra: u32,
462            monkey: u32,
463            apple: u32,
464        }
465        let json = canonical_json(&Flat {
466            zebra: 1,
467            monkey: 2,
468            apple: 3,
469        })
470        .unwrap();
471        assert_eq!(json, "{\"apple\":3,\"monkey\":2,\"zebra\":1}");
472    }
473
474    /// Cross-impl parity fixture.
475    ///
476    /// This is the parity-or-bust check: a hand-computed canonical-JSON +
477    /// SHA-256 fixture sourced from running TS's `walFrameChecksum` on the
478    /// same input. If the Rust impl drifts from byte-identical TS output,
479    /// this test fails loudly.
480    ///
481    /// Fixture inputs are deliberately minimal (single `u64` change payload)
482    /// so the expected canonical bytes are auditable by hand.
483    #[test]
484    fn checksum_parity_fixture_minimal_frame() {
485        // Frame body:
486        //   { t:"c", lifecycle:"data", path:"p",
487        //     change:{ change:0, lifecycle:"data", structure:"s", t_ns:0, version:0 },
488        //     frame_seq:0, frame_t_ns:0 }
489        //
490        // Canonical (sorted-key, no whitespace) form:
491        //   {"change":{"change":0,"lifecycle":"data","structure":"s","t_ns":0,"version":0},"frame_seq":0,"frame_t_ns":0,"lifecycle":"data","path":"p","t":"c"}
492        //
493        // SHA-256 over those bytes is checked below. Regenerate via:
494        //   python3 -c 'import hashlib; print(hashlib.sha256(b\'{"change":...}\').hexdigest())'
495        let frame: WALFrame<u64> = WALFrame {
496            t: WalTag,
497            lifecycle: Lifecycle::Data,
498            path: "p".into(),
499            change: BaseChange {
500                structure: "s".into(),
501                version: Version::Counter(0),
502                t_ns: 0,
503                seq: None,
504                lifecycle: Lifecycle::Data,
505                change: 0,
506            },
507            frame_seq: 0,
508            frame_t_ns: 0,
509            checksum: String::new(),
510            format_version: 1,
511        };
512        let computed = wal_frame_checksum(&frame).unwrap();
513
514        // Sanity: confirm the canonical body the Rust impl is hashing.
515        let body = ChecksumBody {
516            t: WalTag::VALUE,
517            lifecycle: &frame.lifecycle,
518            path: frame.path.as_str(),
519            change: &frame.change,
520            frame_seq: frame.frame_seq,
521            frame_t_ns: frame.frame_t_ns,
522        };
523        let canonical = canonical_json(&body).unwrap();
524        let expected_canonical = "{\"change\":{\"change\":0,\"lifecycle\":\"data\",\"structure\":\"s\",\"t_ns\":0,\"version\":0},\"frame_seq\":0,\"frame_t_ns\":0,\"lifecycle\":\"data\",\"path\":\"p\",\"t\":\"c\"}";
525        assert_eq!(
526            canonical, expected_canonical,
527            "canonical JSON drifted from TS-side stableJsonString shape",
528        );
529
530        // SHA-256 hex of the canonical bytes above (computed via shell:
531        // `printf '<canonical>' | shasum -a 256`).
532        let expected_sha = "d00054d7886e1d73c07a0086e5cbccddf62de3c0cadae31e75d78215b3293ece";
533        assert_eq!(
534            computed, expected_sha,
535            "SHA-256 hex drifted; canonical bytes were:\n  {canonical}",
536        );
537    }
538
539    /// /qa A5 (2026-05-10): parity-fixture for `Lifecycle::Spec` — locks
540    /// the canonical-JSON byte shape and SHA-256 for the `"spec"` discriminant.
541    #[test]
542    fn checksum_parity_fixture_lifecycle_spec() {
543        let frame: WALFrame<u64> = WALFrame {
544            t: WalTag,
545            lifecycle: Lifecycle::Spec,
546            path: "p".into(),
547            change: BaseChange {
548                structure: "s".into(),
549                version: Version::Counter(0),
550                t_ns: 0,
551                seq: None,
552                lifecycle: Lifecycle::Spec,
553                change: 0,
554            },
555            frame_seq: 0,
556            frame_t_ns: 0,
557            checksum: String::new(),
558            format_version: 1,
559        };
560        let expected_sha = "7e857f0862bd429d7d144980a2580da732e0d4b420a03d73d63462368f896c3b";
561        assert_eq!(wal_frame_checksum(&frame).unwrap(), expected_sha);
562    }
563
564    /// /qa A5 (2026-05-10): parity-fixture for `Lifecycle::Ownership`.
565    #[test]
566    fn checksum_parity_fixture_lifecycle_ownership() {
567        let frame: WALFrame<u64> = WALFrame {
568            t: WalTag,
569            lifecycle: Lifecycle::Ownership,
570            path: "p".into(),
571            change: BaseChange {
572                structure: "s".into(),
573                version: Version::Counter(0),
574                t_ns: 0,
575                seq: None,
576                lifecycle: Lifecycle::Ownership,
577                change: 0,
578            },
579            frame_seq: 0,
580            frame_t_ns: 0,
581            checksum: String::new(),
582            format_version: 1,
583        };
584        let expected_sha = "901d3d70d38d954864243bdee5a88cb6d204e5e9823598606d38c10e604c3af4";
585        assert_eq!(wal_frame_checksum(&frame).unwrap(), expected_sha);
586    }
587
588    /// /qa A6 (2026-05-10): parity-fixture for `seq: Some(0)`. The
589    /// `skip_serializing_if` attribute means `None` omits the field; `Some(0)`
590    /// emits `"seq":0`. Both round-trip cleanly. Distinct SHA from the
591    /// `seq: None` fixture above proves the canonical body differs.
592    #[test]
593    fn checksum_parity_fixture_seq_some_zero() {
594        let frame: WALFrame<u64> = WALFrame {
595            t: WalTag,
596            lifecycle: Lifecycle::Data,
597            path: "p".into(),
598            change: BaseChange {
599                structure: "s".into(),
600                version: Version::Counter(0),
601                t_ns: 0,
602                seq: Some(0),
603                lifecycle: Lifecycle::Data,
604                change: 0,
605            },
606            frame_seq: 0,
607            frame_t_ns: 0,
608            checksum: String::new(),
609            format_version: 1,
610        };
611        let expected_sha = "da42bdfa3eff9dbb7ffc60b04c7478cbe7cbb7015ba48963b4ea4661f678c387";
612        assert_eq!(wal_frame_checksum(&frame).unwrap(), expected_sha);
613    }
614
615    /// /qa A7 (2026-05-10): `WalTag` deserialization rejects non-string JSON
616    /// tokens (null, number, array, object) with a clear error — not just
617    /// other string values.
618    #[test]
619    fn wal_tag_rejects_non_string_tokens() {
620        for bad in ["null", "42", "[]", "{}", "true"] {
621            let r: Result<WalTag, _> = serde_json::from_str(bad);
622            assert!(r.is_err(), "WalTag must reject {bad}");
623        }
624    }
625
626    /// /qa A13 (2026-05-10): sanity-check the `WALFrame<T>` shape for two
627    /// non-trivial payload types — unit `()` and `serde_json::Value` (the
628    /// "any JSON" escape hatch). Both must round-trip with stable checksums.
629    #[test]
630    fn wal_frame_unit_payload_round_trips() {
631        let frame: WALFrame<()> = WALFrame {
632            t: WalTag,
633            lifecycle: Lifecycle::Data,
634            path: "p".into(),
635            change: BaseChange {
636                structure: "unit".into(),
637                version: Version::Counter(0),
638                t_ns: 0,
639                seq: None,
640                lifecycle: Lifecycle::Data,
641                change: (),
642            },
643            frame_seq: 0,
644            frame_t_ns: 0,
645            checksum: String::new(),
646            format_version: 1,
647        };
648        let mut f = frame.clone();
649        f.checksum = wal_frame_checksum(&frame).unwrap();
650        assert!(verify_wal_frame_checksum(&f).unwrap());
651    }
652
653    #[test]
654    fn wal_frame_value_payload_round_trips() {
655        use serde_json::json;
656        let payload = json!({"kind": "set", "key": "k1", "value": [1, 2, 3]});
657        let frame: WALFrame<serde_json::Value> = WALFrame {
658            t: WalTag,
659            lifecycle: Lifecycle::Data,
660            path: "node/state".into(),
661            change: BaseChange {
662                structure: "graphValue".into(),
663                version: Version::Counter(1),
664                t_ns: 100,
665                seq: Some(7),
666                lifecycle: Lifecycle::Data,
667                change: payload,
668            },
669            frame_seq: 17,
670            frame_t_ns: 200,
671            checksum: String::new(),
672            format_version: 1,
673        };
674        let mut f = frame.clone();
675        f.checksum = wal_frame_checksum(&frame).unwrap();
676        assert!(verify_wal_frame_checksum(&f).unwrap());
677    }
678
679    /// /qa F5 (2026-05-12): backward-compatible deserialization of
680    /// pre-`format_version` frames. Old frames serialized WITHOUT the
681    /// `format_version` field must deserialize successfully with
682    /// `format_version` defaulting to `1`.
683    #[test]
684    fn format_version_defaults_on_old_frame_json() {
685        // JSON from a pre-format_version frame (no `format_version` key).
686        let old_json = r#"{
687            "t": "c",
688            "lifecycle": "data",
689            "path": "p",
690            "change": {
691                "structure": "s",
692                "version": 0,
693                "t_ns": 0,
694                "lifecycle": "data",
695                "change": 0
696            },
697            "frame_seq": 0,
698            "frame_t_ns": 0,
699            "checksum": ""
700        }"#;
701        let frame: WALFrame<u64> = serde_json::from_str(old_json).unwrap();
702        assert_eq!(
703            frame.format_version, 1,
704            "missing format_version must default to 1"
705        );
706    }
707
708    /// /qa F5 (2026-05-12): new frames with explicit `format_version`
709    /// round-trip correctly.
710    #[test]
711    fn format_version_round_trips() {
712        let frame = WALFrame {
713            t: WalTag,
714            lifecycle: Lifecycle::Data,
715            path: "p".into(),
716            change: BaseChange {
717                structure: "s".into(),
718                version: Version::Counter(0),
719                t_ns: 0,
720                seq: None,
721                lifecycle: Lifecycle::Data,
722                change: 0u64,
723            },
724            frame_seq: 0,
725            frame_t_ns: 0,
726            checksum: String::new(),
727            format_version: 2,
728        };
729        let json = serde_json::to_string(&frame).unwrap();
730        let deser: WALFrame<u64> = serde_json::from_str(&json).unwrap();
731        assert_eq!(deser.format_version, 2);
732    }
733
734    /// B1 (2026-05-22, /porting-to-rs storage-honest-error batch): the WAL
735    /// canonical-JSON encode guard rejects non-ASCII object keys with a
736    /// `ChecksumError::NonCanonicalContent` rather than silently producing
737    /// JSON that diverges from TS `stableJsonString` (UTF-16 vs UTF-8 sort
738    /// order for keys with code points ≥ U+10000). The frame's user-supplied
739    /// `change.structure` field carries arbitrary string content; this test
740    /// uses the structure field to inject a non-ASCII key into the encoded
741    /// `change` body via a nested map payload.
742    #[test]
743    fn canonical_json_rejects_non_ascii_object_keys() {
744        use serde_json::json;
745        // Carry a nested-object payload whose KEY (not value) contains a
746        // non-ASCII code point. The guard rejects at frame_checksum-encode
747        // time (before SHA-256), surfacing the divergence vector.
748        let frame: WALFrame<serde_json::Value> = WALFrame {
749            t: WalTag,
750            lifecycle: Lifecycle::Data,
751            path: "p".into(),
752            change: BaseChange {
753                structure: "s".into(),
754                version: Version::Counter(0),
755                t_ns: 0,
756                seq: None,
757                lifecycle: Lifecycle::Data,
758                // Note the KEY contains "café" (non-ASCII 'é').
759                change: json!({ "café": 1 }),
760            },
761            frame_seq: 0,
762            frame_t_ns: 0,
763            checksum: String::new(),
764            format_version: 1,
765        };
766        let err = wal_frame_checksum(&frame).expect_err("B1 guard must reject");
767        let msg = err.to_string();
768        assert!(
769            matches!(err, ChecksumError::NonCanonicalContent { .. }),
770            "expected NonCanonicalContent, got: {err:?}"
771        );
772        assert!(
773            msg.contains("café"),
774            "diagnostic must name the offending key, got: {msg}"
775        );
776    }
777
778    /// B1 companion: subnormal f64 values are rejected. `serde_json::Number`
779    /// rejects NaN/Inf at construction (those flow through the
780    /// `CanonicalJsonFailed` variant), but subnormals (denormalized floats)
781    /// slip through finite checks while still formatting divergently between
782    /// JS `JSON.stringify` and Rust `serde_json` via `ryu` — so the guard
783    /// also rejects them at encode time.
784    #[test]
785    fn canonical_json_rejects_subnormal_f64() {
786        use serde_json::json;
787        // f64::MIN_POSITIVE.next_down() is in the subnormal range (smaller
788        // than the smallest normal positive f64). `is_normal()` returns
789        // false; `is_finite()` returns true. The guard rejects.
790        let subnormal: f64 = f64::MIN_POSITIVE / 2.0;
791        assert!(!subnormal.is_normal());
792        assert!(subnormal.is_finite());
793        let frame: WALFrame<serde_json::Value> = WALFrame {
794            t: WalTag,
795            lifecycle: Lifecycle::Data,
796            path: "p".into(),
797            change: BaseChange {
798                structure: "s".into(),
799                version: Version::Counter(0),
800                t_ns: 0,
801                seq: None,
802                lifecycle: Lifecycle::Data,
803                change: json!({ "tiny": subnormal }),
804            },
805            frame_seq: 0,
806            frame_t_ns: 0,
807            checksum: String::new(),
808            format_version: 1,
809        };
810        let err = wal_frame_checksum(&frame).expect_err("B1 subnormal guard must reject");
811        assert!(
812            matches!(err, ChecksumError::NonCanonicalContent { .. }),
813            "expected NonCanonicalContent, got: {err:?}"
814        );
815        assert!(
816            err.to_string().contains("subnormal"),
817            "diagnostic must mention subnormal, got: {err}"
818        );
819    }
820
821    /// B1 negative control: ASCII-only keys + normal f64 + integers pass
822    /// the guard unchanged (no regression on the existing happy path).
823    #[test]
824    fn canonical_json_guard_passes_ascii_and_normal_floats() {
825        use serde_json::json;
826        let frame: WALFrame<serde_json::Value> = WALFrame {
827            t: WalTag,
828            lifecycle: Lifecycle::Data,
829            path: "p".into(),
830            change: BaseChange {
831                structure: "s".into(),
832                version: Version::Counter(0),
833                t_ns: 0,
834                seq: None,
835                lifecycle: Lifecycle::Data,
836                change: json!({ "ascii_key": 42, "float": 1.5, "zero": 0.0 }),
837            },
838            frame_seq: 0,
839            frame_t_ns: 0,
840            checksum: String::new(),
841            format_version: 1,
842        };
843        // Just assert it doesn't error — exact hash isn't load-bearing here.
844        wal_frame_checksum(&frame).expect("ASCII keys + normal floats must pass the guard");
845    }
846
847    /// /qa G2.5 (2026-05-22): `-0.0` is rejected because
848    /// `JSON.stringify(-0.0) === "0"` but Rust `serde_json` emits `-0.0` —
849    /// exactly the divergence the canonical-JSON guard is meant to catch.
850    /// The pre-/qa guard `f != 0.0 && !f.is_normal()` accepted `-0.0`
851    /// because `-0.0 == 0.0` is true; the bit-pattern discriminator added
852    /// in this /qa pass closes the gap.
853    #[test]
854    fn canonical_json_rejects_negative_zero() {
855        use serde_json::json;
856        // serde_json::Number won't deserialize -0.0 from `{ "z": -0.0 }`
857        // literals, but `Number::from_f64(-0.0)` succeeds — construct via
858        // the typed entry point.
859        let neg_zero = -0.0_f64;
860        assert_eq!(neg_zero.to_bits(), 0x8000_0000_0000_0000);
861        let mut map = serde_json::Map::new();
862        map.insert(
863            "neg_zero".to_owned(),
864            serde_json::Value::Number(serde_json::Number::from_f64(neg_zero).expect("finite")),
865        );
866        let frame: WALFrame<serde_json::Value> = WALFrame {
867            t: WalTag,
868            lifecycle: Lifecycle::Data,
869            path: "p".into(),
870            change: BaseChange {
871                structure: "s".into(),
872                version: Version::Counter(0),
873                t_ns: 0,
874                seq: None,
875                lifecycle: Lifecycle::Data,
876                change: serde_json::Value::Object(map),
877            },
878            frame_seq: 0,
879            frame_t_ns: 0,
880            checksum: String::new(),
881            format_version: 1,
882        };
883        let err = wal_frame_checksum(&frame).expect_err("B1 / G2.5 guard must reject -0.0");
884        assert!(
885            matches!(err, ChecksumError::NonCanonicalContent { .. }),
886            "expected NonCanonicalContent, got: {err:?}"
887        );
888        // Positive zero stays accepted — sanity check that the bit-pattern
889        // discriminator didn't over-broaden into the legitimate zero case.
890        let frame_pos_zero: WALFrame<serde_json::Value> = WALFrame {
891            t: WalTag,
892            lifecycle: Lifecycle::Data,
893            path: "p".into(),
894            change: BaseChange {
895                structure: "s".into(),
896                version: Version::Counter(0),
897                t_ns: 0,
898                seq: None,
899                lifecycle: Lifecycle::Data,
900                change: json!({ "pos_zero": 0.0 }),
901            },
902            frame_seq: 0,
903            frame_t_ns: 0,
904            checksum: String::new(),
905            format_version: 1,
906        };
907        wal_frame_checksum(&frame_pos_zero).expect("positive +0.0 must continue to pass the guard");
908    }
909
910    /// /qa G2.6 (2026-05-22): the validator's recursion is depth-capped
911    /// (128) so adversarial deeply-nested user payloads can't blow the
912    /// stack at WAL write time. A value tree exceeding the cap is
913    /// rejected as `NonCanonicalContent` (same channel as the other
914    /// divergence-class rejections) rather than crashing the process.
915    #[test]
916    fn canonical_json_rejects_excessive_nesting_depth() {
917        // Build a deeply-nested object tree programmatically (can't be
918        // parsed from JSON literals at this depth without serde's own
919        // recursion guard tripping first — exactly the point of the cap).
920        let mut deep = serde_json::Value::Number(serde_json::Number::from(0_u64));
921        for _ in 0..200 {
922            let mut map = serde_json::Map::new();
923            map.insert("n".to_owned(), deep);
924            deep = serde_json::Value::Object(map);
925        }
926        let frame: WALFrame<serde_json::Value> = WALFrame {
927            t: WalTag,
928            lifecycle: Lifecycle::Data,
929            path: "p".into(),
930            change: BaseChange {
931                structure: "s".into(),
932                version: Version::Counter(0),
933                t_ns: 0,
934                seq: None,
935                lifecycle: Lifecycle::Data,
936                change: deep,
937            },
938            frame_seq: 0,
939            frame_t_ns: 0,
940            checksum: String::new(),
941            format_version: 1,
942        };
943        let err = wal_frame_checksum(&frame).expect_err("G2.6 depth cap must reject");
944        assert!(
945            matches!(err, ChecksumError::NonCanonicalContent { .. }),
946            "expected NonCanonicalContent, got: {err:?}"
947        );
948        assert!(
949            err.to_string().contains("depth"),
950            "diagnostic must mention depth, got: {err}"
951        );
952    }
953
954    /// /qa A10 (2026-05-10): canary detecting `serde_json/preserve_order`
955    /// feature unification. The canonical-JSON parity invariant requires
956    /// `serde_json::Map<String, Value>` to be `BTreeMap`-backed (sorted on
957    /// iter). If any workspace consumer enables `preserve_order` via Cargo
958    /// feature unification, `Map` swaps to `IndexMap` (insertion-order) and
959    /// this test fails loudly with a diff.
960    #[test]
961    fn preserve_order_feature_is_not_enabled() {
962        // Build a Value::Object with INSERTION ORDER = reverse-alphabetical.
963        // BTreeMap-backed Map iterates in alphabetical order on `to_string`.
964        // IndexMap-backed Map preserves insertion order.
965        let mut map = serde_json::Map::new();
966        map.insert("z".into(), serde_json::json!(1));
967        map.insert("a".into(), serde_json::json!(2));
968        let serialized = serde_json::to_string(&serde_json::Value::Object(map)).unwrap();
969        assert_eq!(
970            serialized, r#"{"a":2,"z":1}"#,
971            "serde_json `preserve_order` feature appears to be enabled \
972             workspace-wide via Cargo feature unification — this BREAKS the \
973             WAL checksum canonical-JSON parity invariant. Find the offending \
974             dep with `cargo tree -e features | grep preserve_order` and \
975             either disable it or pin to a non-preserve-order codec route.",
976        );
977    }
978}