Skip to main content

shadow_core/agentlog/
hash.rs

1//! SHA-256 content addressing per SPEC §6.
2//!
3//! The content id for a record is `sha256(canonical_json(payload))`, lower-case
4//! hex, prefixed with the string `"sha256:"`. Because the hash is over the
5//! canonical form (§5), two payloads that are semantically equivalent
6//! (e.g. the same keys in a different order, or a string with an
7//! alternative Unicode normalization form) map to the same id.
8
9use serde_json::Value;
10use sha2::{Digest, Sha256};
11
12use crate::agentlog::canonical;
13
14/// Prefix applied to every content id. Parsers use it to disambiguate
15/// future hash algorithms (e.g. `"blake3:"`) if we ever add them.
16pub const ID_PREFIX: &str = "sha256:";
17
18/// Length of the hex digest portion of an id (64 hex chars for SHA-256).
19pub const HEX_LEN: usize = 64;
20
21/// Compute the content id for a payload.
22///
23/// The input is the `payload` field of a `.agentlog` envelope — NOT the
24/// whole envelope. See SPEC §6.1 for why the envelope is excluded.
25pub fn content_id(payload: &Value) -> String {
26    let bytes = canonical::to_bytes(payload);
27    let digest = Sha256::digest(&bytes);
28    let mut out = String::with_capacity(ID_PREFIX.len() + HEX_LEN);
29    out.push_str(ID_PREFIX);
30    for byte in digest {
31        out.push(nibble(byte >> 4));
32        out.push(nibble(byte & 0xF));
33    }
34    out
35}
36
37/// True if `s` is a syntactically valid content id (`sha256:` + 64 lowercase hex).
38pub fn is_valid(s: &str) -> bool {
39    if !s.starts_with(ID_PREFIX) {
40        return false;
41    }
42    let hex = &s[ID_PREFIX.len()..];
43    hex.len() == HEX_LEN && hex.bytes().all(|b| matches!(b, b'0'..=b'9' | b'a'..=b'f'))
44}
45
46fn nibble(n: u8) -> char {
47    debug_assert!(n < 16);
48    match n {
49        0..=9 => (b'0' + n) as char,
50        _ => (b'a' + (n - 10)) as char,
51    }
52}
53
54#[cfg(test)]
55mod tests {
56    use super::*;
57    use serde_json::json;
58
59    #[test]
60    fn spec_5_6_known_vector() {
61        // The single normative cross-implementation vector — SPEC §5.6.
62        let payload = json!({"hello": "world"});
63        assert_eq!(
64            content_id(&payload),
65            "sha256:93a23971a914e5eacbf0a8d25154cda309c3c1c72fbb9914d47c60f3cb681588"
66        );
67    }
68
69    #[test]
70    fn id_is_prefixed_and_64_hex_chars() {
71        let id = content_id(&json!(null));
72        assert!(id.starts_with("sha256:"));
73        let hex = &id[ID_PREFIX.len()..];
74        assert_eq!(hex.len(), HEX_LEN);
75        assert!(hex
76            .chars()
77            .all(|c| c.is_ascii_hexdigit() && !c.is_ascii_uppercase()));
78    }
79
80    #[test]
81    fn determinism_across_calls() {
82        let p = json!({"model": "claude-opus-4-7", "temperature": 0.2});
83        assert_eq!(content_id(&p), content_id(&p));
84    }
85
86    #[test]
87    fn equivalent_payloads_hash_equal() {
88        // Same content, different key order. Canonicalization should sort
89        // them, so the hashes match.
90        let a = json!({"a": 1, "b": 2});
91        let b = json!({"b": 2, "a": 1});
92        assert_eq!(content_id(&a), content_id(&b));
93    }
94
95    #[test]
96    fn nfc_equivalence_produces_equal_id() {
97        let decomposed = json!({"key": "e\u{0301}clair"});
98        let precomposed = json!({"key": "\u{00e9}clair"});
99        assert_eq!(content_id(&decomposed), content_id(&precomposed));
100    }
101
102    #[test]
103    fn distinct_payloads_hash_different() {
104        let a = json!({"a": 1});
105        let b = json!({"a": 2});
106        assert_ne!(content_id(&a), content_id(&b));
107    }
108
109    #[test]
110    fn is_valid_accepts_well_formed_id() {
111        assert!(is_valid(
112            "sha256:93a23971a914e5eacbf0a8d25154cda309c3c1c72fbb9914d47c60f3cb681588"
113        ));
114    }
115
116    #[test]
117    fn is_valid_rejects_wrong_prefix() {
118        assert!(!is_valid(
119            "md5:93a23971a914e5eacbf0a8d25154cda309c3c1c72fbb9914d47c60f3cb681588"
120        ));
121        assert!(!is_valid(
122            "93a23971a914e5eacbf0a8d25154cda309c3c1c72fbb9914d47c60f3cb681588"
123        ));
124    }
125
126    #[test]
127    fn is_valid_rejects_wrong_length() {
128        assert!(!is_valid("sha256:abcd"));
129        assert!(!is_valid(&format!("sha256:{}", "a".repeat(63))));
130        assert!(!is_valid(&format!("sha256:{}", "a".repeat(65))));
131    }
132
133    #[test]
134    fn is_valid_rejects_uppercase_hex() {
135        assert!(!is_valid(
136            "sha256:93A23971A914E5EACBF0A8D25154CDA309C3C1C72FBB9914D47C60F3CB681588"
137        ));
138    }
139}