use serde::{Deserialize, Serialize};
use sha2::{Digest, Sha256};
use graphrefly_structures::{BaseChange, Lifecycle};
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct WALFrame<T> {
pub t: WalTag,
pub lifecycle: Lifecycle,
pub path: String,
pub change: BaseChange<T>,
pub frame_seq: u64,
pub frame_t_ns: u64,
#[serde(default)]
pub checksum: String,
#[serde(default = "default_format_version")]
pub format_version: u32,
}
fn default_format_version() -> u32 {
1
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub struct WalTag;
impl WalTag {
pub const VALUE: &'static str = "c";
}
impl Serialize for WalTag {
fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
serializer.serialize_str(Self::VALUE)
}
}
impl<'de> Deserialize<'de> for WalTag {
fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
let s = String::deserialize(deserializer)?;
if s == Self::VALUE {
Ok(WalTag)
} else {
Err(serde::de::Error::custom(format!(
"WALFrame.t must be {:?}, got {:?}",
Self::VALUE,
s
)))
}
}
}
pub const WAL_KEY_SEGMENT: &str = "wal";
pub const WAL_FRAME_SEQ_PAD: usize = 20;
#[must_use]
pub fn wal_frame_key(prefix: &str, frame_seq: u64) -> String {
format!("{prefix}/{frame_seq:020}")
}
#[must_use]
pub fn graph_wal_prefix(graph_name: &str) -> String {
format!("{graph_name}/{WAL_KEY_SEGMENT}")
}
pub const REPLAY_ORDER: [Lifecycle; 3] = [Lifecycle::Spec, Lifecycle::Data, Lifecycle::Ownership];
#[derive(Debug, thiserror::Error)]
pub enum ChecksumError {
#[error("canonical JSON encoding failed: {0}")]
CanonicalJsonFailed(#[from] serde_json::Error),
#[error("non-canonical content rejected by WAL encoder: {reason}")]
NonCanonicalContent { reason: String },
}
#[derive(Serialize)]
struct ChecksumBody<'a, T: Serialize> {
t: &'static str,
lifecycle: &'a Lifecycle,
path: &'a str,
change: &'a BaseChange<T>,
frame_seq: u64,
frame_t_ns: u64,
}
fn canonical_json<T: Serialize>(value: &T) -> Result<String, ChecksumError> {
let v = serde_json::to_value(value)?;
validate_canonical(&v, 0)?;
serde_json::to_string(&v).map_err(ChecksumError::from)
}
const VALIDATE_CANONICAL_MAX_DEPTH: u32 = 128;
fn validate_canonical(v: &serde_json::Value, depth: u32) -> Result<(), ChecksumError> {
if depth > VALIDATE_CANONICAL_MAX_DEPTH {
return Err(ChecksumError::NonCanonicalContent {
reason: format!(
"JSON nesting depth exceeds {VALIDATE_CANONICAL_MAX_DEPTH} \
(matches serde_json's default deserialization recursion limit; \
deeper trees cannot round-trip through standard JSON parsers)"
),
});
}
match v {
serde_json::Value::Object(map) => {
for (k, child) in map {
if !k.is_ascii() {
return Err(ChecksumError::NonCanonicalContent {
reason: format!(
"non-ASCII object key {k:?} \
(JS sorts UTF-16 code units, Rust sorts UTF-8 bytes — \
divergent for code points ≥ U+10000)"
),
});
}
validate_canonical(child, depth + 1)?;
}
Ok(())
}
serde_json::Value::Array(arr) => arr
.iter()
.try_for_each(|child| validate_canonical(child, depth + 1)),
serde_json::Value::Number(n) => {
if let Some(f) = n.as_f64() {
let bits = f.to_bits();
if bits != 0 && !f.is_normal() {
return Err(ChecksumError::NonCanonicalContent {
reason: format!(
"non-canonical f64 {f:e} (bits={bits:#018x}) \
(rejects -0.0 + subnormals — JS `JSON.stringify` \
and Rust `serde_json` may format these differently)"
),
});
}
}
Ok(())
}
serde_json::Value::String(_) | serde_json::Value::Bool(_) | serde_json::Value::Null => {
Ok(())
}
}
}
pub fn wal_frame_checksum<T: Serialize>(frame: &WALFrame<T>) -> Result<String, ChecksumError> {
let body = ChecksumBody {
t: WalTag::VALUE,
lifecycle: &frame.lifecycle,
path: frame.path.as_str(),
change: &frame.change,
frame_seq: frame.frame_seq,
frame_t_ns: frame.frame_t_ns,
};
let canonical = canonical_json(&body)?;
let digest = Sha256::digest(canonical.as_bytes());
Ok(hex::encode(digest))
}
pub fn verify_wal_frame_checksum<T: Serialize>(frame: &WALFrame<T>) -> Result<bool, ChecksumError> {
let expected = wal_frame_checksum(frame)?;
Ok(frame.checksum == expected)
}
#[cfg(test)]
mod tests {
use super::*;
use graphrefly_structures::Version;
fn sample_frame() -> WALFrame<u64> {
WALFrame {
t: WalTag,
lifecycle: Lifecycle::Data,
path: "root/state".into(),
change: BaseChange {
structure: "graphValue".into(),
version: Version::Counter(1),
t_ns: 1_700_000_000_000,
seq: Some(0),
lifecycle: Lifecycle::Data,
change: 42,
},
frame_seq: 17,
frame_t_ns: 1_700_000_001_000,
checksum: String::new(),
format_version: 1,
}
}
#[test]
fn wal_frame_key_zero_pads_to_20_digits() {
assert_eq!(wal_frame_key("g/wal", 0), "g/wal/00000000000000000000",);
assert_eq!(wal_frame_key("g/wal", 17), "g/wal/00000000000000000017",);
assert_eq!(
wal_frame_key("g/wal", u64::MAX),
format!("g/wal/{:020}", u64::MAX),
);
}
#[test]
fn wal_frame_key_lex_sort_equals_numeric_sort() {
let seqs = [0u64, 1, 10, 100, 1_000_000, u64::MAX];
let mut keys: Vec<String> = seqs.iter().map(|s| wal_frame_key("g/wal", *s)).collect();
keys.sort();
for (k, expected) in keys.iter().zip(seqs.iter()) {
assert!(
k.ends_with(&format!("{expected:020}")),
"lex-sort key {k} did not match numeric order for {expected}",
);
}
}
#[test]
fn graph_wal_prefix_joins_with_segment() {
assert_eq!(graph_wal_prefix("my-graph"), "my-graph/wal");
}
#[test]
fn checksum_roundtrip_verifies() {
let mut frame = sample_frame();
frame.checksum = wal_frame_checksum(&frame).unwrap();
assert!(verify_wal_frame_checksum(&frame).unwrap());
}
#[test]
fn checksum_tamper_change_payload_fails_verify() {
let mut frame = sample_frame();
frame.checksum = wal_frame_checksum(&frame).unwrap();
frame.change.change = 43; assert!(!verify_wal_frame_checksum(&frame).unwrap());
}
#[test]
fn checksum_tamper_path_fails_verify() {
let mut frame = sample_frame();
frame.checksum = wal_frame_checksum(&frame).unwrap();
frame.path = "different/path".into();
assert!(!verify_wal_frame_checksum(&frame).unwrap());
}
#[test]
fn checksum_tamper_frame_seq_fails_verify() {
let mut frame = sample_frame();
frame.checksum = wal_frame_checksum(&frame).unwrap();
frame.frame_seq = 18;
assert!(!verify_wal_frame_checksum(&frame).unwrap());
}
#[test]
fn checksum_excludes_checksum_field_itself() {
let mut frame = sample_frame();
frame.checksum = "deadbeef".repeat(8);
let first = wal_frame_checksum(&frame).unwrap();
frame.checksum = "00".repeat(32);
let second = wal_frame_checksum(&frame).unwrap();
assert_eq!(
first, second,
"wal_frame_checksum must not depend on the existing checksum field",
);
}
#[test]
fn checksum_is_64_char_lowercase_hex() {
let mut frame = sample_frame();
frame.checksum = wal_frame_checksum(&frame).unwrap();
assert_eq!(frame.checksum.len(), 64);
assert!(
frame
.checksum
.chars()
.all(|c| matches!(c, '0'..='9' | 'a'..='f')),
"checksum must be lowercase hex: {}",
frame.checksum,
);
}
#[test]
fn wal_tag_serializes_as_string_c() {
let s = serde_json::to_string(&WalTag).unwrap();
assert_eq!(s, "\"c\"");
}
#[test]
fn wal_tag_rejects_other_values() {
let r: Result<WalTag, _> = serde_json::from_str("\"x\"");
assert!(r.is_err(), "WalTag must reject non-c discriminators");
}
#[test]
fn canonical_json_sorts_keys() {
#[derive(Serialize)]
struct Flat {
zebra: u32,
monkey: u32,
apple: u32,
}
let json = canonical_json(&Flat {
zebra: 1,
monkey: 2,
apple: 3,
})
.unwrap();
assert_eq!(json, "{\"apple\":3,\"monkey\":2,\"zebra\":1}");
}
#[test]
fn checksum_parity_fixture_minimal_frame() {
let frame: WALFrame<u64> = WALFrame {
t: WalTag,
lifecycle: Lifecycle::Data,
path: "p".into(),
change: BaseChange {
structure: "s".into(),
version: Version::Counter(0),
t_ns: 0,
seq: None,
lifecycle: Lifecycle::Data,
change: 0,
},
frame_seq: 0,
frame_t_ns: 0,
checksum: String::new(),
format_version: 1,
};
let computed = wal_frame_checksum(&frame).unwrap();
let body = ChecksumBody {
t: WalTag::VALUE,
lifecycle: &frame.lifecycle,
path: frame.path.as_str(),
change: &frame.change,
frame_seq: frame.frame_seq,
frame_t_ns: frame.frame_t_ns,
};
let canonical = canonical_json(&body).unwrap();
let expected_canonical = "{\"change\":{\"change\":0,\"lifecycle\":\"data\",\"structure\":\"s\",\"t_ns\":0,\"version\":0},\"frame_seq\":0,\"frame_t_ns\":0,\"lifecycle\":\"data\",\"path\":\"p\",\"t\":\"c\"}";
assert_eq!(
canonical, expected_canonical,
"canonical JSON drifted from TS-side stableJsonString shape",
);
let expected_sha = "d00054d7886e1d73c07a0086e5cbccddf62de3c0cadae31e75d78215b3293ece";
assert_eq!(
computed, expected_sha,
"SHA-256 hex drifted; canonical bytes were:\n {canonical}",
);
}
#[test]
fn checksum_parity_fixture_lifecycle_spec() {
let frame: WALFrame<u64> = WALFrame {
t: WalTag,
lifecycle: Lifecycle::Spec,
path: "p".into(),
change: BaseChange {
structure: "s".into(),
version: Version::Counter(0),
t_ns: 0,
seq: None,
lifecycle: Lifecycle::Spec,
change: 0,
},
frame_seq: 0,
frame_t_ns: 0,
checksum: String::new(),
format_version: 1,
};
let expected_sha = "7e857f0862bd429d7d144980a2580da732e0d4b420a03d73d63462368f896c3b";
assert_eq!(wal_frame_checksum(&frame).unwrap(), expected_sha);
}
#[test]
fn checksum_parity_fixture_lifecycle_ownership() {
let frame: WALFrame<u64> = WALFrame {
t: WalTag,
lifecycle: Lifecycle::Ownership,
path: "p".into(),
change: BaseChange {
structure: "s".into(),
version: Version::Counter(0),
t_ns: 0,
seq: None,
lifecycle: Lifecycle::Ownership,
change: 0,
},
frame_seq: 0,
frame_t_ns: 0,
checksum: String::new(),
format_version: 1,
};
let expected_sha = "901d3d70d38d954864243bdee5a88cb6d204e5e9823598606d38c10e604c3af4";
assert_eq!(wal_frame_checksum(&frame).unwrap(), expected_sha);
}
#[test]
fn checksum_parity_fixture_seq_some_zero() {
let frame: WALFrame<u64> = WALFrame {
t: WalTag,
lifecycle: Lifecycle::Data,
path: "p".into(),
change: BaseChange {
structure: "s".into(),
version: Version::Counter(0),
t_ns: 0,
seq: Some(0),
lifecycle: Lifecycle::Data,
change: 0,
},
frame_seq: 0,
frame_t_ns: 0,
checksum: String::new(),
format_version: 1,
};
let expected_sha = "da42bdfa3eff9dbb7ffc60b04c7478cbe7cbb7015ba48963b4ea4661f678c387";
assert_eq!(wal_frame_checksum(&frame).unwrap(), expected_sha);
}
#[test]
fn wal_tag_rejects_non_string_tokens() {
for bad in ["null", "42", "[]", "{}", "true"] {
let r: Result<WalTag, _> = serde_json::from_str(bad);
assert!(r.is_err(), "WalTag must reject {bad}");
}
}
#[test]
fn wal_frame_unit_payload_round_trips() {
let frame: WALFrame<()> = WALFrame {
t: WalTag,
lifecycle: Lifecycle::Data,
path: "p".into(),
change: BaseChange {
structure: "unit".into(),
version: Version::Counter(0),
t_ns: 0,
seq: None,
lifecycle: Lifecycle::Data,
change: (),
},
frame_seq: 0,
frame_t_ns: 0,
checksum: String::new(),
format_version: 1,
};
let mut f = frame.clone();
f.checksum = wal_frame_checksum(&frame).unwrap();
assert!(verify_wal_frame_checksum(&f).unwrap());
}
#[test]
fn wal_frame_value_payload_round_trips() {
use serde_json::json;
let payload = json!({"kind": "set", "key": "k1", "value": [1, 2, 3]});
let frame: WALFrame<serde_json::Value> = WALFrame {
t: WalTag,
lifecycle: Lifecycle::Data,
path: "node/state".into(),
change: BaseChange {
structure: "graphValue".into(),
version: Version::Counter(1),
t_ns: 100,
seq: Some(7),
lifecycle: Lifecycle::Data,
change: payload,
},
frame_seq: 17,
frame_t_ns: 200,
checksum: String::new(),
format_version: 1,
};
let mut f = frame.clone();
f.checksum = wal_frame_checksum(&frame).unwrap();
assert!(verify_wal_frame_checksum(&f).unwrap());
}
#[test]
fn format_version_defaults_on_old_frame_json() {
let old_json = r#"{
"t": "c",
"lifecycle": "data",
"path": "p",
"change": {
"structure": "s",
"version": 0,
"t_ns": 0,
"lifecycle": "data",
"change": 0
},
"frame_seq": 0,
"frame_t_ns": 0,
"checksum": ""
}"#;
let frame: WALFrame<u64> = serde_json::from_str(old_json).unwrap();
assert_eq!(
frame.format_version, 1,
"missing format_version must default to 1"
);
}
#[test]
fn format_version_round_trips() {
let frame = WALFrame {
t: WalTag,
lifecycle: Lifecycle::Data,
path: "p".into(),
change: BaseChange {
structure: "s".into(),
version: Version::Counter(0),
t_ns: 0,
seq: None,
lifecycle: Lifecycle::Data,
change: 0u64,
},
frame_seq: 0,
frame_t_ns: 0,
checksum: String::new(),
format_version: 2,
};
let json = serde_json::to_string(&frame).unwrap();
let deser: WALFrame<u64> = serde_json::from_str(&json).unwrap();
assert_eq!(deser.format_version, 2);
}
#[test]
fn canonical_json_rejects_non_ascii_object_keys() {
use serde_json::json;
let frame: WALFrame<serde_json::Value> = WALFrame {
t: WalTag,
lifecycle: Lifecycle::Data,
path: "p".into(),
change: BaseChange {
structure: "s".into(),
version: Version::Counter(0),
t_ns: 0,
seq: None,
lifecycle: Lifecycle::Data,
change: json!({ "café": 1 }),
},
frame_seq: 0,
frame_t_ns: 0,
checksum: String::new(),
format_version: 1,
};
let err = wal_frame_checksum(&frame).expect_err("B1 guard must reject");
let msg = err.to_string();
assert!(
matches!(err, ChecksumError::NonCanonicalContent { .. }),
"expected NonCanonicalContent, got: {err:?}"
);
assert!(
msg.contains("café"),
"diagnostic must name the offending key, got: {msg}"
);
}
#[test]
fn canonical_json_rejects_subnormal_f64() {
use serde_json::json;
let subnormal: f64 = f64::MIN_POSITIVE / 2.0;
assert!(!subnormal.is_normal());
assert!(subnormal.is_finite());
let frame: WALFrame<serde_json::Value> = WALFrame {
t: WalTag,
lifecycle: Lifecycle::Data,
path: "p".into(),
change: BaseChange {
structure: "s".into(),
version: Version::Counter(0),
t_ns: 0,
seq: None,
lifecycle: Lifecycle::Data,
change: json!({ "tiny": subnormal }),
},
frame_seq: 0,
frame_t_ns: 0,
checksum: String::new(),
format_version: 1,
};
let err = wal_frame_checksum(&frame).expect_err("B1 subnormal guard must reject");
assert!(
matches!(err, ChecksumError::NonCanonicalContent { .. }),
"expected NonCanonicalContent, got: {err:?}"
);
assert!(
err.to_string().contains("subnormal"),
"diagnostic must mention subnormal, got: {err}"
);
}
#[test]
fn canonical_json_guard_passes_ascii_and_normal_floats() {
use serde_json::json;
let frame: WALFrame<serde_json::Value> = WALFrame {
t: WalTag,
lifecycle: Lifecycle::Data,
path: "p".into(),
change: BaseChange {
structure: "s".into(),
version: Version::Counter(0),
t_ns: 0,
seq: None,
lifecycle: Lifecycle::Data,
change: json!({ "ascii_key": 42, "float": 1.5, "zero": 0.0 }),
},
frame_seq: 0,
frame_t_ns: 0,
checksum: String::new(),
format_version: 1,
};
wal_frame_checksum(&frame).expect("ASCII keys + normal floats must pass the guard");
}
#[test]
fn canonical_json_rejects_negative_zero() {
use serde_json::json;
let neg_zero = -0.0_f64;
assert_eq!(neg_zero.to_bits(), 0x8000_0000_0000_0000);
let mut map = serde_json::Map::new();
map.insert(
"neg_zero".to_owned(),
serde_json::Value::Number(serde_json::Number::from_f64(neg_zero).expect("finite")),
);
let frame: WALFrame<serde_json::Value> = WALFrame {
t: WalTag,
lifecycle: Lifecycle::Data,
path: "p".into(),
change: BaseChange {
structure: "s".into(),
version: Version::Counter(0),
t_ns: 0,
seq: None,
lifecycle: Lifecycle::Data,
change: serde_json::Value::Object(map),
},
frame_seq: 0,
frame_t_ns: 0,
checksum: String::new(),
format_version: 1,
};
let err = wal_frame_checksum(&frame).expect_err("B1 / G2.5 guard must reject -0.0");
assert!(
matches!(err, ChecksumError::NonCanonicalContent { .. }),
"expected NonCanonicalContent, got: {err:?}"
);
let frame_pos_zero: WALFrame<serde_json::Value> = WALFrame {
t: WalTag,
lifecycle: Lifecycle::Data,
path: "p".into(),
change: BaseChange {
structure: "s".into(),
version: Version::Counter(0),
t_ns: 0,
seq: None,
lifecycle: Lifecycle::Data,
change: json!({ "pos_zero": 0.0 }),
},
frame_seq: 0,
frame_t_ns: 0,
checksum: String::new(),
format_version: 1,
};
wal_frame_checksum(&frame_pos_zero).expect("positive +0.0 must continue to pass the guard");
}
#[test]
fn canonical_json_rejects_excessive_nesting_depth() {
let mut deep = serde_json::Value::Number(serde_json::Number::from(0_u64));
for _ in 0..200 {
let mut map = serde_json::Map::new();
map.insert("n".to_owned(), deep);
deep = serde_json::Value::Object(map);
}
let frame: WALFrame<serde_json::Value> = WALFrame {
t: WalTag,
lifecycle: Lifecycle::Data,
path: "p".into(),
change: BaseChange {
structure: "s".into(),
version: Version::Counter(0),
t_ns: 0,
seq: None,
lifecycle: Lifecycle::Data,
change: deep,
},
frame_seq: 0,
frame_t_ns: 0,
checksum: String::new(),
format_version: 1,
};
let err = wal_frame_checksum(&frame).expect_err("G2.6 depth cap must reject");
assert!(
matches!(err, ChecksumError::NonCanonicalContent { .. }),
"expected NonCanonicalContent, got: {err:?}"
);
assert!(
err.to_string().contains("depth"),
"diagnostic must mention depth, got: {err}"
);
}
#[test]
fn preserve_order_feature_is_not_enabled() {
let mut map = serde_json::Map::new();
map.insert("z".into(), serde_json::json!(1));
map.insert("a".into(), serde_json::json!(2));
let serialized = serde_json::to_string(&serde_json::Value::Object(map)).unwrap();
assert_eq!(
serialized, r#"{"a":2,"z":1}"#,
"serde_json `preserve_order` feature appears to be enabled \
workspace-wide via Cargo feature unification — this BREAKS the \
WAL checksum canonical-JSON parity invariant. Find the offending \
dep with `cargo tree -e features | grep preserve_order` and \
either disable it or pin to a non-preserve-order codec route.",
);
}
}