Skip to main content

oris_evolution/gep/
content_hash.rs

1//! Content-addressable identity using SHA-256.
2//!
3//! Every GEP asset has a deterministic asset_id computed from its content,
4//! enabling deduplication and tamper detection.
5
6use serde::{Serialize, Serializer};
7use sha2::{Digest, Sha256};
8use thiserror::Error;
9
10#[derive(Error, Debug)]
11pub enum AssetIdError {
12    #[error("Serialization error: {0}")]
13    Serialization(String),
14    #[error("Invalid asset ID format")]
15    InvalidFormat,
16}
17
18/// Compute the SHA-256 asset ID for a serializable object
19/// The asset_id field is excluded from the hash computation
20pub fn compute_asset_id<T: Serialize>(
21    obj: &T,
22    exclude_fields: &[&str],
23) -> Result<String, AssetIdError> {
24    let json = canonicalize_json(obj, exclude_fields)?;
25    let hash = compute_sha256(&json);
26    Ok(format!("sha256:{}", hash))
27}
28
29/// Compute SHA-256 hash of a string
30pub fn compute_sha256(input: &str) -> String {
31    let mut hasher = Sha256::new();
32    hasher.update(input.as_bytes());
33    let result = hasher.finalize();
34    hex_encode(&result)
35}
36
37/// Hex encode bytes
38fn hex_encode(bytes: &[u8]) -> String {
39    bytes.iter().map(|b| format!("{:02x}", b)).collect()
40}
41
42/// Canonicalize JSON by sorting keys and preserving array order
43/// This ensures deterministic hashing
44fn canonicalize_json<T: Serialize>(
45    obj: T,
46    exclude_fields: &[&str],
47) -> Result<String, AssetIdError> {
48    let value =
49        serde_json::to_value(obj).map_err(|e| AssetIdError::Serialization(e.to_string()))?;
50
51    let canonical = canonicalize_value(&value, exclude_fields);
52
53    serde_json::to_string(&canonical).map_err(|e| AssetIdError::Serialization(e.to_string()))
54}
55
56/// Recursively canonicalize a JSON value
57fn canonicalize_value(value: &serde_json::Value, exclude_fields: &[&str]) -> serde_json::Value {
58    match value {
59        serde_json::Value::Object(map) => {
60            let mut sorted: Vec<_> = map.iter().collect();
61            sorted.sort_by(|a, b| a.0.cmp(b.0));
62
63            let mut result = serde_json::Map::new();
64            for (key, val) in sorted {
65                if exclude_fields.contains(&key.as_str()) {
66                    continue;
67                }
68                result.insert(key.clone(), canonicalize_value(val, exclude_fields));
69            }
70            serde_json::Value::Object(result)
71        }
72        serde_json::Value::Array(arr) => serde_json::Value::Array(
73            arr.iter()
74                .map(|v| canonicalize_value(v, exclude_fields))
75                .collect(),
76        ),
77        // Primitives: convert non-finite numbers to null
78        serde_json::Value::Number(n) => {
79            // Check if number is finite by attempting to convert to f64
80            if n.as_f64().map(|f| f.is_finite()).unwrap_or(false) {
81                value.clone()
82            } else {
83                serde_json::Value::Null
84            }
85        }
86        _ => value.clone(),
87    }
88}
89
90/// Verify that a claimed asset_id matches the computed hash
91pub fn verify_asset_id<T: Serialize>(
92    obj: &T,
93    claimed_id: &str,
94    exclude_fields: &[&str],
95) -> Result<bool, AssetIdError> {
96    if !claimed_id.starts_with("sha256:") {
97        return Err(AssetIdError::InvalidFormat);
98    }
99
100    let computed = compute_asset_id(obj, exclude_fields)?;
101    Ok(claimed_id == computed)
102}
103
104/// Parse asset_id and return the hash portion
105pub fn parse_asset_id(asset_id: &str) -> Result<String, AssetIdError> {
106    if let Some(hash) = asset_id.strip_prefix("sha256:") {
107        if hash.len() == 64 {
108            Ok(hash.to_string())
109        } else {
110            Err(AssetIdError::InvalidFormat)
111        }
112    } else {
113        Err(AssetIdError::InvalidFormat)
114    }
115}
116
117#[cfg(test)]
118mod tests {
119    use super::*;
120    use serde::Serialize;
121
122    #[derive(Serialize)]
123    struct TestAsset {
124        id: String,
125        name: String,
126        #[serde(skip_serializing_if = "Option::is_none")]
127        optional: Option<String>,
128    }
129
130    #[test]
131    fn test_compute_asset_id() {
132        let asset = TestAsset {
133            id: "test-1".to_string(),
134            name: "Test Asset".to_string(),
135            optional: None,
136        };
137
138        let asset_id = compute_asset_id(&asset, &["asset_id"]).unwrap();
139        assert!(asset_id.starts_with("sha256:"));
140        assert_eq!(asset_id.len(), 7 + 64);
141    }
142
143    #[test]
144    fn test_deterministic() {
145        let asset = TestAsset {
146            id: "test-2".to_string(),
147            name: "Deterministic".to_string(),
148            optional: None,
149        };
150
151        let id1 = compute_asset_id(&asset, &["asset_id"]).unwrap();
152        let id2 = compute_asset_id(&asset, &["asset_id"]).unwrap();
153
154        assert_eq!(id1, id2);
155    }
156
157    #[test]
158    fn test_different_content_different_hash() {
159        let asset1 = TestAsset {
160            id: "test-3".to_string(),
161            name: "Name A".to_string(),
162            optional: None,
163        };
164
165        let asset2 = TestAsset {
166            id: "test-4".to_string(),
167            name: "Name B".to_string(),
168            optional: None,
169        };
170
171        let id1 = compute_asset_id(&asset1, &["asset_id"]).unwrap();
172        let id2 = compute_asset_id(&asset2, &["asset_id"]).unwrap();
173
174        assert_ne!(id1, id2);
175    }
176
177    #[test]
178    fn test_verify_asset_id() {
179        let asset = TestAsset {
180            id: "test-5".to_string(),
181            name: "Verify Me".to_string(),
182            optional: None,
183        };
184
185        let claimed = compute_asset_id(&asset, &["asset_id"]).unwrap();
186        let valid = verify_asset_id(&asset, &claimed, &["asset_id"]).unwrap();
187
188        assert!(valid);
189    }
190
191    #[test]
192    fn test_canonicalize_object() {
193        #[derive(Serialize)]
194        struct Unordered {
195            z: String,
196            a: String,
197            m: Vec<u32>,
198        }
199
200        let obj = Unordered {
201            z: "z first".to_string(),
202            a: "a second".to_string(),
203            m: vec![3, 1, 2],
204        };
205
206        let json = canonicalize_json(&obj, &[]).unwrap();
207
208        // Keys should be sorted: a, m, z
209        assert!(json.find("\"a\":").unwrap() < json.find("\"z\":").unwrap());
210    }
211
212    #[test]
213    fn test_parse_asset_id() {
214        // Create a valid 64-char hex string
215        let valid = "sha256:".to_string() + &"a".repeat(64);
216        assert_eq!(parse_asset_id(&valid).unwrap().len(), 64);
217
218        assert!(parse_asset_id("invalid").is_err());
219        assert!(parse_asset_id("sha256:short").is_err());
220    }
221}