Skip to main content

clayers_xml/
hash.rs

1use sha2::{Digest, Sha256};
2use std::fmt;
3use std::str::FromStr;
4
5use crate::Error;
6
7/// A SHA-256 content hash, stored as 32 raw bytes.
8#[derive(Clone, Copy, PartialEq, Eq, Hash)]
9pub struct ContentHash(pub [u8; 32]);
10
11impl ContentHash {
12    /// Create from raw bytes.
13    #[must_use]
14    pub fn from_bytes(bytes: [u8; 32]) -> Self {
15        Self(bytes)
16    }
17
18    /// Compute a hash from canonical XML bytes.
19    #[must_use]
20    pub fn from_canonical(canonical_bytes: &[u8]) -> Self {
21        let hash = Sha256::digest(canonical_bytes);
22        let mut arr = [0u8; 32];
23        arr.copy_from_slice(&hash);
24        Self(arr)
25    }
26
27    /// Hex representation (64 lowercase hex chars).
28    #[must_use]
29    pub fn to_hex(&self) -> String {
30        use std::fmt::Write;
31        self.0.iter().fold(String::with_capacity(64), |mut acc, b| {
32            let _ = write!(acc, "{b:02x}");
33            acc
34        })
35    }
36
37    /// Prefixed form: `sha256:<hex>`.
38    #[must_use]
39    pub fn to_prefixed(&self) -> String {
40        format!("sha256:{}", self.to_hex())
41    }
42}
43
44impl fmt::Display for ContentHash {
45    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
46        write!(f, "sha256:{}", self.to_hex())
47    }
48}
49
50impl fmt::Debug for ContentHash {
51    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
52        write!(f, "ContentHash({})", self.to_hex())
53    }
54}
55
56impl FromStr for ContentHash {
57    type Err = Error;
58
59    fn from_str(s: &str) -> Result<Self, Self::Err> {
60        let hex = s.strip_prefix("sha256:").ok_or_else(|| {
61            Error::InvalidHashFormat(format!("expected sha256: prefix, got: {s}"))
62        })?;
63
64        if hex.len() != 64 {
65            return Err(Error::InvalidHashFormat(format!(
66                "expected 64 hex chars, got {}",
67                hex.len()
68            )));
69        }
70
71        let mut bytes = [0u8; 32];
72        for i in 0..32 {
73            bytes[i] = u8::from_str_radix(&hex[i * 2..i * 2 + 2], 16).map_err(|_| {
74                Error::InvalidHashFormat(format!("invalid hex at position {}", i * 2))
75            })?;
76        }
77
78        Ok(Self(bytes))
79    }
80}
81
82#[cfg(feature = "serde")]
83impl serde::Serialize for ContentHash {
84    fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
85        serializer.serialize_str(&self.to_string())
86    }
87}
88
89#[cfg(feature = "serde")]
90impl<'de> serde::Deserialize<'de> for ContentHash {
91    fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
92        let s = <String as serde::Deserialize>::deserialize(deserializer)?;
93        s.parse().map_err(serde::de::Error::custom)
94    }
95}
96
97// Public API surface (used by ast-grep for structural verification).
98#[cfg(any())]
99mod _api {
100    use super::*;
101    pub fn from_bytes(bytes: [u8; 32]) -> ContentHash;
102    pub fn from_canonical(canonical_bytes: &[u8]) -> ContentHash;
103    pub fn to_hex(&self) -> String;
104    pub fn to_prefixed(&self) -> String;
105}
106
107#[cfg(test)]
108mod tests {
109    use super::*;
110
111    #[test]
112    fn from_bytes_to_hex_roundtrip() {
113        let bytes = [0xab; 32];
114        let hash = ContentHash::from_bytes(bytes);
115        assert_eq!(hash.to_hex().len(), 64);
116        assert_eq!(hash.0, bytes);
117    }
118
119    #[test]
120    fn to_prefixed_format() {
121        let hash = ContentHash::from_bytes([0; 32]);
122        let prefixed = hash.to_prefixed();
123        assert!(prefixed.starts_with("sha256:"));
124        assert_eq!(prefixed.len(), 7 + 64);
125    }
126
127    #[test]
128    fn from_canonical_deterministic() {
129        let data = b"<root>hello</root>";
130        let h1 = ContentHash::from_canonical(data);
131        let h2 = ContentHash::from_canonical(data);
132        assert_eq!(h1, h2);
133    }
134
135    #[test]
136    fn from_canonical_different_input_different_hash() {
137        let h1 = ContentHash::from_canonical(b"<a/>");
138        let h2 = ContentHash::from_canonical(b"<b/>");
139        assert_ne!(h1, h2);
140    }
141
142    #[test]
143    fn display_fromstr_roundtrip() {
144        let hash = ContentHash::from_canonical(b"test data");
145        let s = hash.to_string();
146        let parsed: ContentHash = s.parse().expect("parse failed");
147        assert_eq!(hash, parsed);
148    }
149
150    #[test]
151    fn fromstr_rejects_missing_prefix() {
152        let result = "abcd".parse::<ContentHash>();
153        assert!(result.is_err());
154    }
155
156    #[test]
157    fn fromstr_rejects_wrong_length() {
158        let result = "sha256:abcd".parse::<ContentHash>();
159        assert!(result.is_err());
160    }
161
162    #[cfg(feature = "serde")]
163    #[test]
164    fn serde_json_roundtrip() {
165        let hash = ContentHash::from_canonical(b"serde test");
166        let json = serde_json::to_string(&hash).expect("serialize");
167        let parsed: ContentHash = serde_json::from_str(&json).expect("deserialize");
168        assert_eq!(hash, parsed);
169        // Should be a quoted prefixed hex string
170        assert!(json.starts_with('"'));
171        assert!(json.contains("sha256:"));
172    }
173}