Skip to main content

git_internal/internal/object/
integrity.rs

1//! Hash implementation for AI process objects.
2//!
3//! This module defines `IntegrityHash`, which is used for integrity verification
4//! and deduplication of AI objects (Artifacts, Headers, etc.).
5//!
6//! # Why not `ObjectHash`?
7//!
8//! We avoid using `git_internal::hash::ObjectHash` here because:
9//! 1. `ObjectHash` implies Git content addressing (SHA-1 or SHA-256 depending on repo config).
10//! 2. `IntegrityHash` always uses SHA-256 for consistent integrity checks regardless of the
11//!    underlying Git repository format.
12//! 3. This separation prevents accidental usage of integrity checksums as Git object IDs.
13
14use std::{fmt, str::FromStr};
15
16use serde::{Deserialize, Serialize};
17use serde_json::Value;
18use sha2::{Digest, Sha256};
19
20/// A SHA-256 hash used for integrity verification.
21#[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
22pub struct IntegrityHash([u8; 32]);
23
24impl IntegrityHash {
25    /// Create a new hash from raw bytes.
26    pub fn new(bytes: [u8; 32]) -> Self {
27        Self(bytes)
28    }
29
30    /// Compute hash from content bytes.
31    pub fn compute(content: &[u8]) -> Self {
32        let mut hasher = Sha256::new();
33        hasher.update(content);
34        let result = hasher.finalize();
35        Self(result.into())
36    }
37
38    /// Return the hex string representation.
39    pub fn to_hex(&self) -> String {
40        hex::encode(self.0)
41    }
42
43    /// Return the raw bytes.
44    pub fn as_bytes(&self) -> &[u8; 32] {
45        &self.0
46    }
47}
48
49impl fmt::Debug for IntegrityHash {
50    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
51        write!(f, "IntegrityHash({})", self.to_hex())
52    }
53}
54
55impl fmt::Display for IntegrityHash {
56    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
57        write!(f, "{}", self.to_hex())
58    }
59}
60
61impl FromStr for IntegrityHash {
62    type Err = String;
63
64    fn from_str(s: &str) -> Result<Self, Self::Err> {
65        if s.len() != 64 {
66            return Err(format!("Invalid hash length: expected 64, got {}", s.len()));
67        }
68        let mut bytes = [0u8; 32];
69        hex::decode_to_slice(s, &mut bytes).map_err(|e| e.to_string())?;
70        Ok(Self(bytes))
71    }
72}
73
74impl Serialize for IntegrityHash {
75    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
76    where
77        S: serde::Serializer,
78    {
79        serializer.serialize_str(&self.to_hex())
80    }
81}
82
83impl<'de> Deserialize<'de> for IntegrityHash {
84    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
85    where
86        D: serde::Deserializer<'de>,
87    {
88        let s = String::deserialize(deserializer)?;
89        Self::from_str(&s).map_err(serde::de::Error::custom)
90    }
91}
92
93/// Compute canonical JSON hash.
94pub fn compute_integrity_hash<T: Serialize>(
95    object: &T,
96) -> Result<IntegrityHash, serde_json::Error> {
97    let mut value = serde_json::to_value(object)?;
98    canonicalize_json(&mut value);
99    let content = serde_json::to_vec(&value)?;
100    Ok(IntegrityHash::compute(&content))
101}
102
103fn canonicalize_json(value: &mut Value) {
104    match value {
105        Value::Array(items) => {
106            for item in items.iter_mut() {
107                canonicalize_json(item);
108            }
109        }
110        Value::Object(map) => {
111            let mut entries: Vec<(String, Value)> = std::mem::take(map).into_iter().collect();
112            entries.sort_by(|(a, _), (b, _)| a.cmp(b));
113            let mut sorted = serde_json::Map::with_capacity(entries.len());
114            for (key, mut value) in entries {
115                canonicalize_json(&mut value);
116                sorted.insert(key, value);
117            }
118            *map = sorted;
119        }
120        _ => {}
121    }
122}
123
124#[cfg(test)]
125mod tests {
126    use std::collections::HashMap;
127
128    use super::*;
129
130    #[derive(Serialize)]
131    struct MapWrapper {
132        map: HashMap<String, String>,
133    }
134
135    #[test]
136    fn test_integrity_hash_deterministic() {
137        let mut map_a = HashMap::new();
138        map_a.insert("b".to_string(), "2".to_string());
139        map_a.insert("a".to_string(), "1".to_string());
140
141        let mut map_b = HashMap::new();
142        map_b.insert("a".to_string(), "1".to_string());
143        map_b.insert("b".to_string(), "2".to_string());
144
145        let hash_a = compute_integrity_hash(&MapWrapper { map: map_a }).expect("checksum");
146        let hash_b = compute_integrity_hash(&MapWrapper { map: map_b }).expect("checksum");
147
148        assert_eq!(hash_a, hash_b);
149        assert_eq!(hash_a.to_hex().len(), 64);
150    }
151}