graphrefly_storage/wal.rs
1//! WAL frame substrate (Phase 14.6 — DS-14-storage Q1+Q3+Q5 locks, M4.A
2//! 2026-05-10).
3//!
4//! On-disk frame format consumed by `Graph::restore_snapshot({ mode:"diff" })`
5//! (M4.E). Each frame decomposes a single graph diff into one DS-14
6//! [`BaseChange<T>`] envelope per structural-or-value change, scoped by
7//! [`Lifecycle`] so callers can narrow rewinds.
8//!
9//! The TS reference impl lives at
10//! `packages/pure-ts/src/extra/storage/wal.ts`. Field names + checksum
11//! algorithm are parity-locked — both impls produce byte-identical hex SHA-256
12//! over the same canonical-JSON encoding of a frame body.
13//!
14//! # Canonical-JSON parity
15//!
16//! TS's `stableJsonString` (`packages/pure-ts/src/extra/storage/core.ts:22-39`)
17//! is "recursively sort object keys, then `JSON.stringify(_, undefined, 0)`".
18//! The Rust port mirrors this by routing the frame body through
19//! [`serde_json::to_value`] (lands in `serde_json::Map` which is `BTreeMap` by
20//! default — sorted iteration) then [`serde_json::to_string`]. Output is
21//! byte-identical to TS for the WAL frame schema: ASCII keys, integer
22//! numerics, no floats.
23//!
24//! **Parity caveats** (lift when a real consumer surfaces):
25//! - String VALUES containing surrogate-pair code points (≥ U+10000): JS sorts
26//! keys by UTF-16 code-unit order; Rust `BTreeMap` sorts by UTF-8 byte order.
27//! For ASCII keys these agree; for non-BMP keys they don't. The frame
28//! schema's keys are ASCII so this can only bite if `path` or
29//! `change.structure` contains non-BMP code points — neither is expected
30//! for graph identifiers.
31//! - Float-typed user payloads: JS `JSON.stringify` uses IEEE 754 with
32//! shortest-decimal-round-trip; Rust's `serde_json` uses `ryu` which agrees
33//! on finite f64 in safe range but may diverge on subnormals. WAL frames
34//! typically carry integer-only data; if a user puts a float in
35//! `change.change`, document the constraint.
36//!
37//! # Checksum
38//!
39//! SHA-256 over canonical-JSON of the frame body (everything except the
40//! `checksum` field itself), encoded as a 64-char lowercase hex string.
41//! Spec-locked at `GRAPHREFLY-SPEC.md:1201-1206` — original BLAKE3 lock was
42//! revised to SHA-256 so the TS impl could stay zero-dep (no BLAKE3 in
43//! `WebCrypto`). Rust matches via `sha2` + `hex`.
44
45use serde::{Deserialize, Serialize};
46use sha2::{Digest, Sha256};
47
48use graphrefly_structures::{BaseChange, Lifecycle};
49
50// ── WAL frame envelope ─────────────────────────────────────────────────────
51
52/// On-disk WAL frame (DS-14-storage Q1 lock).
53///
54/// Two seq fields and two timestamp fields are intentional:
55/// - [`Self::frame_seq`] ≠ `change.seq`: latter is the bundle's `mutations`
56/// cursor (DS-14 T1); former is the WAL tier's own cursor (this record's
57/// position in the WAL stream). Replay uses `frame_seq` for ordering;
58/// `change.seq` is only relevant for bundle-level cursor restoration.
59/// - [`Self::frame_t_ns`] ≠ `change.t_ns`: latter is wall-clock at mutation
60/// entry; former is wall-clock at WAL-write time. Under debounced tiers
61/// they differ by `debounce_ms`.
62///
63/// The bridge wire format (DS-14 PART 5 worker bridge) is the schema-narrowed
64/// subset `{ t, lifecycle, path, change }` — this struct is the
65/// persistence-tier superset (DS-14-storage L3 lock).
66#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
67pub struct WALFrame<T> {
68 /// Bridge tag — discriminator shared with the DS-14 worker-bridge wire
69 /// format. Always `"c"`; allocated as `String` for parity with the TS
70 /// wire shape (TS uses a literal `"c"` value).
71 pub t: WalTag,
72 /// Lifecycle scope (DS-14 PART 4). Determines replay phase ordering.
73 pub lifecycle: Lifecycle,
74 /// Target node / bundle path (per-graph qualified path).
75 pub path: String,
76 /// DS-14 universal [`BaseChange<T>`] envelope — structure-tagged delta.
77 pub change: BaseChange<T>,
78 /// WAL-tier monotonic cursor (uniquely owned by the WAL tier writer).
79 pub frame_seq: u64,
80 /// Wall-clock at WAL-write time (matches `wall_clock_ns()`).
81 pub frame_t_ns: u64,
82 /// SHA-256 over the canonical-JSON of the frame body sans `checksum`,
83 /// encoded as a 64-char lowercase hex string. Hex (vs raw bytes) keeps
84 /// the wire format JSON-codec-friendly. M4.A parity-fixture asserts
85 /// byte-equivalence against the TS impl.
86 #[serde(default)]
87 pub checksum: String,
88 /// Codec version tag. All M4.A frames are implicitly version 1
89 /// (JSON codec). Defaults to `1` for backward-compatible deserialization
90 /// of frames written before this field was added.
91 #[serde(default = "default_format_version")]
92 pub format_version: u32,
93}
94
95fn default_format_version() -> u32 {
96 1
97}
98
99/// Singleton-string discriminator for the bridge wire-format tag. Always
100/// serializes / deserializes as `"c"`; rejects any other value at parse time.
101#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
102pub struct WalTag;
103
104impl WalTag {
105 pub const VALUE: &'static str = "c";
106}
107
108impl Serialize for WalTag {
109 fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
110 serializer.serialize_str(Self::VALUE)
111 }
112}
113
114impl<'de> Deserialize<'de> for WalTag {
115 fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
116 let s = String::deserialize(deserializer)?;
117 if s == Self::VALUE {
118 Ok(WalTag)
119 } else {
120 Err(serde::de::Error::custom(format!(
121 "WALFrame.t must be {:?}, got {:?}",
122 Self::VALUE,
123 s
124 )))
125 }
126 }
127}
128
129// ── Key format (Q5) ────────────────────────────────────────────────────────
130
131/// Default WAL prefix segment relative to a `graph.name`. Frames land at
132/// `${graph.name}/${WAL_KEY_SEGMENT}/${frame_seq:020}`.
133pub const WAL_KEY_SEGMENT: &str = "wal";
134
135/// Pad width for `frame_seq` in WAL keys. 20 digits keeps lex-ASC string sort
136/// = numeric ASC up to `frame_seq < 10^20` (well past `u64::MAX`).
137pub const WAL_FRAME_SEQ_PAD: usize = 20;
138
139/// Build the canonical WAL frame key. `prefix` is the WAL-prefix portion (e.g.
140/// `"my-graph/wal"`); `frame_seq` is the per-frame cursor. Zero-padded so
141/// lex-ASC string sort equals numeric ASC sort.
142#[must_use]
143pub fn wal_frame_key(prefix: &str, frame_seq: u64) -> String {
144 format!("{prefix}/{frame_seq:020}")
145}
146
147/// Default WAL key prefix for a graph by its `name`.
148#[must_use]
149pub fn graph_wal_prefix(graph_name: &str) -> String {
150 format!("{graph_name}/{WAL_KEY_SEGMENT}")
151}
152
153// ── Replay order (Q2) ──────────────────────────────────────────────────────
154
155/// Cross-scope replay order (DS-14 PART 4 lock — `Spec → Data → Ownership`).
156/// Exported so the replay implementation and parity tests share one source of
157/// truth.
158pub const REPLAY_ORDER: [Lifecycle; 3] = [Lifecycle::Spec, Lifecycle::Data, Lifecycle::Ownership];
159
160// ── Checksum ───────────────────────────────────────────────────────────────
161
162/// Errors surfaced by checksum compute / verify.
163#[derive(Debug, thiserror::Error)]
164pub enum ChecksumError {
165 /// `serde_json` rejected the frame body — typically a non-serializable
166 /// payload (e.g. a `Map` with non-string keys, an `f64::NAN`).
167 #[error("canonical JSON encoding failed: {0}")]
168 CanonicalJsonFailed(#[from] serde_json::Error),
169 /// Frame body contains content that cannot round-trip cross-impl through
170 /// canonical JSON. Specifically: non-ASCII object keys (JS sorts by UTF-16
171 /// code-unit order; Rust `BTreeMap` sorts by UTF-8 byte order — these
172 /// diverge for keys containing surrogate-pair code points), or subnormal
173 /// f64 values (JS `JSON.stringify` and Rust `serde_json` via `ryu` may
174 /// diverge on denormalized floats). Banned at the WAL encode boundary
175 /// (B1 — option a) rather than allowed-with-silent-divergence.
176 #[error("non-canonical content rejected by WAL encoder: {reason}")]
177 NonCanonicalContent { reason: String },
178}
179
180/// Body fields contributing to the checksum, in the shape TS computes over
181/// (TS's `canonicalFrameBody` at `wal.ts:141`). The `checksum` field of the
182/// outer [`WALFrame`] is deliberately excluded.
183#[derive(Serialize)]
184struct ChecksumBody<'a, T: Serialize> {
185 t: &'static str,
186 lifecycle: &'a Lifecycle,
187 path: &'a str,
188 change: &'a BaseChange<T>,
189 frame_seq: u64,
190 frame_t_ns: u64,
191}
192
193/// Encode a typed value to canonical JSON (sorted keys, no whitespace).
194/// Routes through [`serde_json::Value`] so the resulting `serde_json::Map<
195/// String, Value>` (BTreeMap-backed by default) iterates in sorted-key order
196/// — byte-identical to TS `stableJsonString` on the WAL schema.
197///
198/// **Cross-impl encode guard (B1 — option a).** After conversion to
199/// [`serde_json::Value`], the tree is walked once and rejected via
200/// [`ChecksumError::NonCanonicalContent`] if it contains any input that
201/// cannot round-trip byte-identically through TS canonical JSON:
202///
203/// - **Non-ASCII object keys** — JS sorts by UTF-16 code-unit order; Rust
204/// `BTreeMap` sorts by UTF-8 byte order. For keys containing surrogate-pair
205/// code points (≥ U+10000) these diverge. WAL frame schema keys are ASCII
206/// by spec; user-supplied identifiers in `path` or `change.structure`
207/// are NOT — this guard surfaces the divergence at write time rather than
208/// letting checksums silently mismatch on cross-impl replay.
209/// - **Subnormal f64** — JS `JSON.stringify` and Rust `serde_json` (via `ryu`)
210/// may format denormalized floats differently; subnormals are vanishingly
211/// rare in graphrefly payloads (integer counters dominate) but the
212/// divergence is theoretical, so the strict guard refuses them.
213/// - **NaN / ±Infinity** — already rejected at `serde_json::Number::from_f64`
214/// conversion (returns `None`); the resulting `serde_json::Error` flows
215/// through the existing `CanonicalJsonFailed` variant.
216fn canonical_json<T: Serialize>(value: &T) -> Result<String, ChecksumError> {
217 let v = serde_json::to_value(value)?;
218 validate_canonical(&v, 0)?;
219 serde_json::to_string(&v).map_err(ChecksumError::from)
220}
221
222/// /qa G2.6 (2026-05-22): depth cap. graphrefly payloads are bounded
223/// (`BaseChange<T>` envelopes), but `serde_json::Value` is the escape
224/// hatch — a misbehaving binding could synthesize a deeply-nested tree
225/// and overflow the recursion stack inside this validator. 128 levels
226/// matches `serde_json::de::Deserializer`'s default recursion limit; a
227/// value tree exceeding it cannot have been parsed from JSON and is
228/// rejected at the canonical-encode boundary instead of crashing the
229/// process.
230const VALIDATE_CANONICAL_MAX_DEPTH: u32 = 128;
231
232/// Recursively validate a `Value` tree against the canonical-encode guard.
233/// See [`canonical_json`] for the rejection criteria.
234fn validate_canonical(v: &serde_json::Value, depth: u32) -> Result<(), ChecksumError> {
235 if depth > VALIDATE_CANONICAL_MAX_DEPTH {
236 return Err(ChecksumError::NonCanonicalContent {
237 reason: format!(
238 "JSON nesting depth exceeds {VALIDATE_CANONICAL_MAX_DEPTH} \
239 (matches serde_json's default deserialization recursion limit; \
240 deeper trees cannot round-trip through standard JSON parsers)"
241 ),
242 });
243 }
244 match v {
245 serde_json::Value::Object(map) => {
246 for (k, child) in map {
247 if !k.is_ascii() {
248 return Err(ChecksumError::NonCanonicalContent {
249 reason: format!(
250 "non-ASCII object key {k:?} \
251 (JS sorts UTF-16 code units, Rust sorts UTF-8 bytes — \
252 divergent for code points ≥ U+10000)"
253 ),
254 });
255 }
256 validate_canonical(child, depth + 1)?;
257 }
258 Ok(())
259 }
260 serde_json::Value::Array(arr) => arr
261 .iter()
262 .try_for_each(|child| validate_canonical(child, depth + 1)),
263 serde_json::Value::Number(n) => {
264 if let Some(f) = n.as_f64() {
265 // `serde_json::Number::from_f64` already rejects NaN/Inf, so
266 // any f64 reaching this point is finite. Subnormals slip
267 // through finite checks but format-differ between TS and Rust.
268 //
269 // /qa G2.5 (2026-05-22): the prior guard `f != 0.0 && !f.is_normal()`
270 // accepted `-0.0` because `-0.0 == 0.0` is true — but
271 // `JSON.stringify(-0.0)` is `"0"` while `serde_json` emits
272 // `"-0.0"`. That is EXACTLY the divergence the guard exists
273 // to catch. Discriminate via the raw bit pattern: positive
274 // zero is `0x0000_0000_0000_0000`; any other bit pattern
275 // (including `0x8000_0000_0000_0000` for `-0.0` and all
276 // subnormals) is rejected.
277 let bits = f.to_bits();
278 if bits != 0 && !f.is_normal() {
279 return Err(ChecksumError::NonCanonicalContent {
280 reason: format!(
281 "non-canonical f64 {f:e} (bits={bits:#018x}) \
282 (rejects -0.0 + subnormals — JS `JSON.stringify` \
283 and Rust `serde_json` may format these differently)"
284 ),
285 });
286 }
287 }
288 Ok(())
289 }
290 // String values can carry arbitrary UTF-8 — the canonical-JSON rule
291 // only constrains *keys* (which control sort order); string values
292 // serialize identically across impls via `\uXXXX` for non-ASCII.
293 serde_json::Value::String(_) | serde_json::Value::Bool(_) | serde_json::Value::Null => {
294 Ok(())
295 }
296 }
297}
298
299/// Compute the SHA-256 checksum over a frame's body (sans `checksum`),
300/// returning a 64-char lowercase hex string. Parity-locked with TS
301/// `walFrameChecksum`.
302pub fn wal_frame_checksum<T: Serialize>(frame: &WALFrame<T>) -> Result<String, ChecksumError> {
303 let body = ChecksumBody {
304 t: WalTag::VALUE,
305 lifecycle: &frame.lifecycle,
306 path: frame.path.as_str(),
307 change: &frame.change,
308 frame_seq: frame.frame_seq,
309 frame_t_ns: frame.frame_t_ns,
310 };
311 let canonical = canonical_json(&body)?;
312 let digest = Sha256::digest(canonical.as_bytes());
313 Ok(hex::encode(digest))
314}
315
316/// Verify a frame's `checksum` field matches its body. Replay invokes this at
317/// the WAL tail (drop on mismatch by default) and mid-stream (abort on
318/// mismatch by default) per Q3.
319pub fn verify_wal_frame_checksum<T: Serialize>(frame: &WALFrame<T>) -> Result<bool, ChecksumError> {
320 let expected = wal_frame_checksum(frame)?;
321 Ok(frame.checksum == expected)
322}
323
324// ── Tests ──────────────────────────────────────────────────────────────────
325
326#[cfg(test)]
327mod tests {
328 use super::*;
329 use graphrefly_structures::Version;
330
331 fn sample_frame() -> WALFrame<u64> {
332 WALFrame {
333 t: WalTag,
334 lifecycle: Lifecycle::Data,
335 path: "root/state".into(),
336 change: BaseChange {
337 structure: "graphValue".into(),
338 version: Version::Counter(1),
339 t_ns: 1_700_000_000_000,
340 seq: Some(0),
341 lifecycle: Lifecycle::Data,
342 change: 42,
343 },
344 frame_seq: 17,
345 frame_t_ns: 1_700_000_001_000,
346 checksum: String::new(),
347 format_version: 1,
348 }
349 }
350
351 #[test]
352 fn wal_frame_key_zero_pads_to_20_digits() {
353 assert_eq!(wal_frame_key("g/wal", 0), "g/wal/00000000000000000000",);
354 assert_eq!(wal_frame_key("g/wal", 17), "g/wal/00000000000000000017",);
355 assert_eq!(
356 wal_frame_key("g/wal", u64::MAX),
357 format!("g/wal/{:020}", u64::MAX),
358 );
359 }
360
361 #[test]
362 fn wal_frame_key_lex_sort_equals_numeric_sort() {
363 // Build keys for 0, 1, 10, 100, u64::MAX. Sort lex; assert numeric
364 // order is preserved (the core invariant `frame_seq` ASC = lex ASC).
365 let seqs = [0u64, 1, 10, 100, 1_000_000, u64::MAX];
366 let mut keys: Vec<String> = seqs.iter().map(|s| wal_frame_key("g/wal", *s)).collect();
367 keys.sort();
368 for (k, expected) in keys.iter().zip(seqs.iter()) {
369 assert!(
370 k.ends_with(&format!("{expected:020}")),
371 "lex-sort key {k} did not match numeric order for {expected}",
372 );
373 }
374 }
375
376 #[test]
377 fn graph_wal_prefix_joins_with_segment() {
378 assert_eq!(graph_wal_prefix("my-graph"), "my-graph/wal");
379 }
380
381 #[test]
382 fn checksum_roundtrip_verifies() {
383 let mut frame = sample_frame();
384 frame.checksum = wal_frame_checksum(&frame).unwrap();
385 assert!(verify_wal_frame_checksum(&frame).unwrap());
386 }
387
388 #[test]
389 fn checksum_tamper_change_payload_fails_verify() {
390 let mut frame = sample_frame();
391 frame.checksum = wal_frame_checksum(&frame).unwrap();
392 frame.change.change = 43; // tamper the user payload
393 assert!(!verify_wal_frame_checksum(&frame).unwrap());
394 }
395
396 #[test]
397 fn checksum_tamper_path_fails_verify() {
398 let mut frame = sample_frame();
399 frame.checksum = wal_frame_checksum(&frame).unwrap();
400 frame.path = "different/path".into();
401 assert!(!verify_wal_frame_checksum(&frame).unwrap());
402 }
403
404 #[test]
405 fn checksum_tamper_frame_seq_fails_verify() {
406 let mut frame = sample_frame();
407 frame.checksum = wal_frame_checksum(&frame).unwrap();
408 frame.frame_seq = 18;
409 assert!(!verify_wal_frame_checksum(&frame).unwrap());
410 }
411
412 #[test]
413 fn checksum_excludes_checksum_field_itself() {
414 let mut frame = sample_frame();
415 frame.checksum = "deadbeef".repeat(8);
416 let first = wal_frame_checksum(&frame).unwrap();
417 frame.checksum = "00".repeat(32);
418 let second = wal_frame_checksum(&frame).unwrap();
419 assert_eq!(
420 first, second,
421 "wal_frame_checksum must not depend on the existing checksum field",
422 );
423 }
424
425 #[test]
426 fn checksum_is_64_char_lowercase_hex() {
427 let mut frame = sample_frame();
428 frame.checksum = wal_frame_checksum(&frame).unwrap();
429 assert_eq!(frame.checksum.len(), 64);
430 assert!(
431 frame
432 .checksum
433 .chars()
434 .all(|c| matches!(c, '0'..='9' | 'a'..='f')),
435 "checksum must be lowercase hex: {}",
436 frame.checksum,
437 );
438 }
439
440 #[test]
441 fn wal_tag_serializes_as_string_c() {
442 let s = serde_json::to_string(&WalTag).unwrap();
443 assert_eq!(s, "\"c\"");
444 }
445
446 #[test]
447 fn wal_tag_rejects_other_values() {
448 let r: Result<WalTag, _> = serde_json::from_str("\"x\"");
449 assert!(r.is_err(), "WalTag must reject non-c discriminators");
450 }
451
452 #[test]
453 fn canonical_json_sorts_keys() {
454 // Canonical-JSON sanity check on a struct with declaration order
455 // OPPOSITE to alphabetical: `zebra, monkey, apple`. The emitted JSON
456 // must list keys in alphabetical order regardless of declaration
457 // order (mirrors TS `stableJsonString` recursive key sort). Single
458 // level only so `find` matches unambiguously.
459 #[derive(Serialize)]
460 struct Flat {
461 zebra: u32,
462 monkey: u32,
463 apple: u32,
464 }
465 let json = canonical_json(&Flat {
466 zebra: 1,
467 monkey: 2,
468 apple: 3,
469 })
470 .unwrap();
471 assert_eq!(json, "{\"apple\":3,\"monkey\":2,\"zebra\":1}");
472 }
473
474 /// Cross-impl parity fixture.
475 ///
476 /// This is the parity-or-bust check: a hand-computed canonical-JSON +
477 /// SHA-256 fixture sourced from running TS's `walFrameChecksum` on the
478 /// same input. If the Rust impl drifts from byte-identical TS output,
479 /// this test fails loudly.
480 ///
481 /// Fixture inputs are deliberately minimal (single `u64` change payload)
482 /// so the expected canonical bytes are auditable by hand.
483 #[test]
484 fn checksum_parity_fixture_minimal_frame() {
485 // Frame body:
486 // { t:"c", lifecycle:"data", path:"p",
487 // change:{ change:0, lifecycle:"data", structure:"s", t_ns:0, version:0 },
488 // frame_seq:0, frame_t_ns:0 }
489 //
490 // Canonical (sorted-key, no whitespace) form:
491 // {"change":{"change":0,"lifecycle":"data","structure":"s","t_ns":0,"version":0},"frame_seq":0,"frame_t_ns":0,"lifecycle":"data","path":"p","t":"c"}
492 //
493 // SHA-256 over those bytes is checked below. Regenerate via:
494 // python3 -c 'import hashlib; print(hashlib.sha256(b\'{"change":...}\').hexdigest())'
495 let frame: WALFrame<u64> = WALFrame {
496 t: WalTag,
497 lifecycle: Lifecycle::Data,
498 path: "p".into(),
499 change: BaseChange {
500 structure: "s".into(),
501 version: Version::Counter(0),
502 t_ns: 0,
503 seq: None,
504 lifecycle: Lifecycle::Data,
505 change: 0,
506 },
507 frame_seq: 0,
508 frame_t_ns: 0,
509 checksum: String::new(),
510 format_version: 1,
511 };
512 let computed = wal_frame_checksum(&frame).unwrap();
513
514 // Sanity: confirm the canonical body the Rust impl is hashing.
515 let body = ChecksumBody {
516 t: WalTag::VALUE,
517 lifecycle: &frame.lifecycle,
518 path: frame.path.as_str(),
519 change: &frame.change,
520 frame_seq: frame.frame_seq,
521 frame_t_ns: frame.frame_t_ns,
522 };
523 let canonical = canonical_json(&body).unwrap();
524 let expected_canonical = "{\"change\":{\"change\":0,\"lifecycle\":\"data\",\"structure\":\"s\",\"t_ns\":0,\"version\":0},\"frame_seq\":0,\"frame_t_ns\":0,\"lifecycle\":\"data\",\"path\":\"p\",\"t\":\"c\"}";
525 assert_eq!(
526 canonical, expected_canonical,
527 "canonical JSON drifted from TS-side stableJsonString shape",
528 );
529
530 // SHA-256 hex of the canonical bytes above (computed via shell:
531 // `printf '<canonical>' | shasum -a 256`).
532 let expected_sha = "d00054d7886e1d73c07a0086e5cbccddf62de3c0cadae31e75d78215b3293ece";
533 assert_eq!(
534 computed, expected_sha,
535 "SHA-256 hex drifted; canonical bytes were:\n {canonical}",
536 );
537 }
538
539 /// /qa A5 (2026-05-10): parity-fixture for `Lifecycle::Spec` — locks
540 /// the canonical-JSON byte shape and SHA-256 for the `"spec"` discriminant.
541 #[test]
542 fn checksum_parity_fixture_lifecycle_spec() {
543 let frame: WALFrame<u64> = WALFrame {
544 t: WalTag,
545 lifecycle: Lifecycle::Spec,
546 path: "p".into(),
547 change: BaseChange {
548 structure: "s".into(),
549 version: Version::Counter(0),
550 t_ns: 0,
551 seq: None,
552 lifecycle: Lifecycle::Spec,
553 change: 0,
554 },
555 frame_seq: 0,
556 frame_t_ns: 0,
557 checksum: String::new(),
558 format_version: 1,
559 };
560 let expected_sha = "7e857f0862bd429d7d144980a2580da732e0d4b420a03d73d63462368f896c3b";
561 assert_eq!(wal_frame_checksum(&frame).unwrap(), expected_sha);
562 }
563
564 /// /qa A5 (2026-05-10): parity-fixture for `Lifecycle::Ownership`.
565 #[test]
566 fn checksum_parity_fixture_lifecycle_ownership() {
567 let frame: WALFrame<u64> = WALFrame {
568 t: WalTag,
569 lifecycle: Lifecycle::Ownership,
570 path: "p".into(),
571 change: BaseChange {
572 structure: "s".into(),
573 version: Version::Counter(0),
574 t_ns: 0,
575 seq: None,
576 lifecycle: Lifecycle::Ownership,
577 change: 0,
578 },
579 frame_seq: 0,
580 frame_t_ns: 0,
581 checksum: String::new(),
582 format_version: 1,
583 };
584 let expected_sha = "901d3d70d38d954864243bdee5a88cb6d204e5e9823598606d38c10e604c3af4";
585 assert_eq!(wal_frame_checksum(&frame).unwrap(), expected_sha);
586 }
587
588 /// /qa A6 (2026-05-10): parity-fixture for `seq: Some(0)`. The
589 /// `skip_serializing_if` attribute means `None` omits the field; `Some(0)`
590 /// emits `"seq":0`. Both round-trip cleanly. Distinct SHA from the
591 /// `seq: None` fixture above proves the canonical body differs.
592 #[test]
593 fn checksum_parity_fixture_seq_some_zero() {
594 let frame: WALFrame<u64> = WALFrame {
595 t: WalTag,
596 lifecycle: Lifecycle::Data,
597 path: "p".into(),
598 change: BaseChange {
599 structure: "s".into(),
600 version: Version::Counter(0),
601 t_ns: 0,
602 seq: Some(0),
603 lifecycle: Lifecycle::Data,
604 change: 0,
605 },
606 frame_seq: 0,
607 frame_t_ns: 0,
608 checksum: String::new(),
609 format_version: 1,
610 };
611 let expected_sha = "da42bdfa3eff9dbb7ffc60b04c7478cbe7cbb7015ba48963b4ea4661f678c387";
612 assert_eq!(wal_frame_checksum(&frame).unwrap(), expected_sha);
613 }
614
615 /// /qa A7 (2026-05-10): `WalTag` deserialization rejects non-string JSON
616 /// tokens (null, number, array, object) with a clear error — not just
617 /// other string values.
618 #[test]
619 fn wal_tag_rejects_non_string_tokens() {
620 for bad in ["null", "42", "[]", "{}", "true"] {
621 let r: Result<WalTag, _> = serde_json::from_str(bad);
622 assert!(r.is_err(), "WalTag must reject {bad}");
623 }
624 }
625
626 /// /qa A13 (2026-05-10): sanity-check the `WALFrame<T>` shape for two
627 /// non-trivial payload types — unit `()` and `serde_json::Value` (the
628 /// "any JSON" escape hatch). Both must round-trip with stable checksums.
629 #[test]
630 fn wal_frame_unit_payload_round_trips() {
631 let frame: WALFrame<()> = WALFrame {
632 t: WalTag,
633 lifecycle: Lifecycle::Data,
634 path: "p".into(),
635 change: BaseChange {
636 structure: "unit".into(),
637 version: Version::Counter(0),
638 t_ns: 0,
639 seq: None,
640 lifecycle: Lifecycle::Data,
641 change: (),
642 },
643 frame_seq: 0,
644 frame_t_ns: 0,
645 checksum: String::new(),
646 format_version: 1,
647 };
648 let mut f = frame.clone();
649 f.checksum = wal_frame_checksum(&frame).unwrap();
650 assert!(verify_wal_frame_checksum(&f).unwrap());
651 }
652
653 #[test]
654 fn wal_frame_value_payload_round_trips() {
655 use serde_json::json;
656 let payload = json!({"kind": "set", "key": "k1", "value": [1, 2, 3]});
657 let frame: WALFrame<serde_json::Value> = WALFrame {
658 t: WalTag,
659 lifecycle: Lifecycle::Data,
660 path: "node/state".into(),
661 change: BaseChange {
662 structure: "graphValue".into(),
663 version: Version::Counter(1),
664 t_ns: 100,
665 seq: Some(7),
666 lifecycle: Lifecycle::Data,
667 change: payload,
668 },
669 frame_seq: 17,
670 frame_t_ns: 200,
671 checksum: String::new(),
672 format_version: 1,
673 };
674 let mut f = frame.clone();
675 f.checksum = wal_frame_checksum(&frame).unwrap();
676 assert!(verify_wal_frame_checksum(&f).unwrap());
677 }
678
679 /// /qa F5 (2026-05-12): backward-compatible deserialization of
680 /// pre-`format_version` frames. Old frames serialized WITHOUT the
681 /// `format_version` field must deserialize successfully with
682 /// `format_version` defaulting to `1`.
683 #[test]
684 fn format_version_defaults_on_old_frame_json() {
685 // JSON from a pre-format_version frame (no `format_version` key).
686 let old_json = r#"{
687 "t": "c",
688 "lifecycle": "data",
689 "path": "p",
690 "change": {
691 "structure": "s",
692 "version": 0,
693 "t_ns": 0,
694 "lifecycle": "data",
695 "change": 0
696 },
697 "frame_seq": 0,
698 "frame_t_ns": 0,
699 "checksum": ""
700 }"#;
701 let frame: WALFrame<u64> = serde_json::from_str(old_json).unwrap();
702 assert_eq!(
703 frame.format_version, 1,
704 "missing format_version must default to 1"
705 );
706 }
707
708 /// /qa F5 (2026-05-12): new frames with explicit `format_version`
709 /// round-trip correctly.
710 #[test]
711 fn format_version_round_trips() {
712 let frame = WALFrame {
713 t: WalTag,
714 lifecycle: Lifecycle::Data,
715 path: "p".into(),
716 change: BaseChange {
717 structure: "s".into(),
718 version: Version::Counter(0),
719 t_ns: 0,
720 seq: None,
721 lifecycle: Lifecycle::Data,
722 change: 0u64,
723 },
724 frame_seq: 0,
725 frame_t_ns: 0,
726 checksum: String::new(),
727 format_version: 2,
728 };
729 let json = serde_json::to_string(&frame).unwrap();
730 let deser: WALFrame<u64> = serde_json::from_str(&json).unwrap();
731 assert_eq!(deser.format_version, 2);
732 }
733
734 /// B1 (2026-05-22, /porting-to-rs storage-honest-error batch): the WAL
735 /// canonical-JSON encode guard rejects non-ASCII object keys with a
736 /// `ChecksumError::NonCanonicalContent` rather than silently producing
737 /// JSON that diverges from TS `stableJsonString` (UTF-16 vs UTF-8 sort
738 /// order for keys with code points ≥ U+10000). The frame's user-supplied
739 /// `change.structure` field carries arbitrary string content; this test
740 /// uses the structure field to inject a non-ASCII key into the encoded
741 /// `change` body via a nested map payload.
742 #[test]
743 fn canonical_json_rejects_non_ascii_object_keys() {
744 use serde_json::json;
745 // Carry a nested-object payload whose KEY (not value) contains a
746 // non-ASCII code point. The guard rejects at frame_checksum-encode
747 // time (before SHA-256), surfacing the divergence vector.
748 let frame: WALFrame<serde_json::Value> = WALFrame {
749 t: WalTag,
750 lifecycle: Lifecycle::Data,
751 path: "p".into(),
752 change: BaseChange {
753 structure: "s".into(),
754 version: Version::Counter(0),
755 t_ns: 0,
756 seq: None,
757 lifecycle: Lifecycle::Data,
758 // Note the KEY contains "café" (non-ASCII 'é').
759 change: json!({ "café": 1 }),
760 },
761 frame_seq: 0,
762 frame_t_ns: 0,
763 checksum: String::new(),
764 format_version: 1,
765 };
766 let err = wal_frame_checksum(&frame).expect_err("B1 guard must reject");
767 let msg = err.to_string();
768 assert!(
769 matches!(err, ChecksumError::NonCanonicalContent { .. }),
770 "expected NonCanonicalContent, got: {err:?}"
771 );
772 assert!(
773 msg.contains("café"),
774 "diagnostic must name the offending key, got: {msg}"
775 );
776 }
777
778 /// B1 companion: subnormal f64 values are rejected. `serde_json::Number`
779 /// rejects NaN/Inf at construction (those flow through the
780 /// `CanonicalJsonFailed` variant), but subnormals (denormalized floats)
781 /// slip through finite checks while still formatting divergently between
782 /// JS `JSON.stringify` and Rust `serde_json` via `ryu` — so the guard
783 /// also rejects them at encode time.
784 #[test]
785 fn canonical_json_rejects_subnormal_f64() {
786 use serde_json::json;
787 // f64::MIN_POSITIVE.next_down() is in the subnormal range (smaller
788 // than the smallest normal positive f64). `is_normal()` returns
789 // false; `is_finite()` returns true. The guard rejects.
790 let subnormal: f64 = f64::MIN_POSITIVE / 2.0;
791 assert!(!subnormal.is_normal());
792 assert!(subnormal.is_finite());
793 let frame: WALFrame<serde_json::Value> = WALFrame {
794 t: WalTag,
795 lifecycle: Lifecycle::Data,
796 path: "p".into(),
797 change: BaseChange {
798 structure: "s".into(),
799 version: Version::Counter(0),
800 t_ns: 0,
801 seq: None,
802 lifecycle: Lifecycle::Data,
803 change: json!({ "tiny": subnormal }),
804 },
805 frame_seq: 0,
806 frame_t_ns: 0,
807 checksum: String::new(),
808 format_version: 1,
809 };
810 let err = wal_frame_checksum(&frame).expect_err("B1 subnormal guard must reject");
811 assert!(
812 matches!(err, ChecksumError::NonCanonicalContent { .. }),
813 "expected NonCanonicalContent, got: {err:?}"
814 );
815 assert!(
816 err.to_string().contains("subnormal"),
817 "diagnostic must mention subnormal, got: {err}"
818 );
819 }
820
821 /// B1 negative control: ASCII-only keys + normal f64 + integers pass
822 /// the guard unchanged (no regression on the existing happy path).
823 #[test]
824 fn canonical_json_guard_passes_ascii_and_normal_floats() {
825 use serde_json::json;
826 let frame: WALFrame<serde_json::Value> = WALFrame {
827 t: WalTag,
828 lifecycle: Lifecycle::Data,
829 path: "p".into(),
830 change: BaseChange {
831 structure: "s".into(),
832 version: Version::Counter(0),
833 t_ns: 0,
834 seq: None,
835 lifecycle: Lifecycle::Data,
836 change: json!({ "ascii_key": 42, "float": 1.5, "zero": 0.0 }),
837 },
838 frame_seq: 0,
839 frame_t_ns: 0,
840 checksum: String::new(),
841 format_version: 1,
842 };
843 // Just assert it doesn't error — exact hash isn't load-bearing here.
844 wal_frame_checksum(&frame).expect("ASCII keys + normal floats must pass the guard");
845 }
846
847 /// /qa G2.5 (2026-05-22): `-0.0` is rejected because
848 /// `JSON.stringify(-0.0) === "0"` but Rust `serde_json` emits `-0.0` —
849 /// exactly the divergence the canonical-JSON guard is meant to catch.
850 /// The pre-/qa guard `f != 0.0 && !f.is_normal()` accepted `-0.0`
851 /// because `-0.0 == 0.0` is true; the bit-pattern discriminator added
852 /// in this /qa pass closes the gap.
853 #[test]
854 fn canonical_json_rejects_negative_zero() {
855 use serde_json::json;
856 // serde_json::Number won't deserialize -0.0 from `{ "z": -0.0 }`
857 // literals, but `Number::from_f64(-0.0)` succeeds — construct via
858 // the typed entry point.
859 let neg_zero = -0.0_f64;
860 assert_eq!(neg_zero.to_bits(), 0x8000_0000_0000_0000);
861 let mut map = serde_json::Map::new();
862 map.insert(
863 "neg_zero".to_owned(),
864 serde_json::Value::Number(serde_json::Number::from_f64(neg_zero).expect("finite")),
865 );
866 let frame: WALFrame<serde_json::Value> = WALFrame {
867 t: WalTag,
868 lifecycle: Lifecycle::Data,
869 path: "p".into(),
870 change: BaseChange {
871 structure: "s".into(),
872 version: Version::Counter(0),
873 t_ns: 0,
874 seq: None,
875 lifecycle: Lifecycle::Data,
876 change: serde_json::Value::Object(map),
877 },
878 frame_seq: 0,
879 frame_t_ns: 0,
880 checksum: String::new(),
881 format_version: 1,
882 };
883 let err = wal_frame_checksum(&frame).expect_err("B1 / G2.5 guard must reject -0.0");
884 assert!(
885 matches!(err, ChecksumError::NonCanonicalContent { .. }),
886 "expected NonCanonicalContent, got: {err:?}"
887 );
888 // Positive zero stays accepted — sanity check that the bit-pattern
889 // discriminator didn't over-broaden into the legitimate zero case.
890 let frame_pos_zero: WALFrame<serde_json::Value> = WALFrame {
891 t: WalTag,
892 lifecycle: Lifecycle::Data,
893 path: "p".into(),
894 change: BaseChange {
895 structure: "s".into(),
896 version: Version::Counter(0),
897 t_ns: 0,
898 seq: None,
899 lifecycle: Lifecycle::Data,
900 change: json!({ "pos_zero": 0.0 }),
901 },
902 frame_seq: 0,
903 frame_t_ns: 0,
904 checksum: String::new(),
905 format_version: 1,
906 };
907 wal_frame_checksum(&frame_pos_zero).expect("positive +0.0 must continue to pass the guard");
908 }
909
910 /// /qa G2.6 (2026-05-22): the validator's recursion is depth-capped
911 /// (128) so adversarial deeply-nested user payloads can't blow the
912 /// stack at WAL write time. A value tree exceeding the cap is
913 /// rejected as `NonCanonicalContent` (same channel as the other
914 /// divergence-class rejections) rather than crashing the process.
915 #[test]
916 fn canonical_json_rejects_excessive_nesting_depth() {
917 // Build a deeply-nested object tree programmatically (can't be
918 // parsed from JSON literals at this depth without serde's own
919 // recursion guard tripping first — exactly the point of the cap).
920 let mut deep = serde_json::Value::Number(serde_json::Number::from(0_u64));
921 for _ in 0..200 {
922 let mut map = serde_json::Map::new();
923 map.insert("n".to_owned(), deep);
924 deep = serde_json::Value::Object(map);
925 }
926 let frame: WALFrame<serde_json::Value> = WALFrame {
927 t: WalTag,
928 lifecycle: Lifecycle::Data,
929 path: "p".into(),
930 change: BaseChange {
931 structure: "s".into(),
932 version: Version::Counter(0),
933 t_ns: 0,
934 seq: None,
935 lifecycle: Lifecycle::Data,
936 change: deep,
937 },
938 frame_seq: 0,
939 frame_t_ns: 0,
940 checksum: String::new(),
941 format_version: 1,
942 };
943 let err = wal_frame_checksum(&frame).expect_err("G2.6 depth cap must reject");
944 assert!(
945 matches!(err, ChecksumError::NonCanonicalContent { .. }),
946 "expected NonCanonicalContent, got: {err:?}"
947 );
948 assert!(
949 err.to_string().contains("depth"),
950 "diagnostic must mention depth, got: {err}"
951 );
952 }
953
954 /// /qa A10 (2026-05-10): canary detecting `serde_json/preserve_order`
955 /// feature unification. The canonical-JSON parity invariant requires
956 /// `serde_json::Map<String, Value>` to be `BTreeMap`-backed (sorted on
957 /// iter). If any workspace consumer enables `preserve_order` via Cargo
958 /// feature unification, `Map` swaps to `IndexMap` (insertion-order) and
959 /// this test fails loudly with a diff.
960 #[test]
961 fn preserve_order_feature_is_not_enabled() {
962 // Build a Value::Object with INSERTION ORDER = reverse-alphabetical.
963 // BTreeMap-backed Map iterates in alphabetical order on `to_string`.
964 // IndexMap-backed Map preserves insertion order.
965 let mut map = serde_json::Map::new();
966 map.insert("z".into(), serde_json::json!(1));
967 map.insert("a".into(), serde_json::json!(2));
968 let serialized = serde_json::to_string(&serde_json::Value::Object(map)).unwrap();
969 assert_eq!(
970 serialized, r#"{"a":2,"z":1}"#,
971 "serde_json `preserve_order` feature appears to be enabled \
972 workspace-wide via Cargo feature unification — this BREAKS the \
973 WAL checksum canonical-JSON parity invariant. Find the offending \
974 dep with `cargo tree -e features | grep preserve_order` and \
975 either disable it or pin to a non-preserve-order codec route.",
976 );
977 }
978}