Skip to main content

void_core/support/
hash.rs

1//! Typed content hash for file integrity.
2//!
3//! `ContentHash` wraps `[u8; 32]` (SHA-256 digest) with type safety,
4//! preventing accidental confusion with other 32-byte values (keys, nonces).
5
6use std::fmt;
7
8/// SHA-256 hash of file content.
9///
10/// Used in shard entries, manifest entries, index entries, and diffs
11/// to track file identity and detect modifications.
12#[derive(
13    Clone, Copy, PartialEq, Eq, Hash,
14    serde::Serialize, serde::Deserialize,
15)]
16pub struct ContentHash(pub [u8; 32]);
17
18impl ContentHash {
19    /// All-zero hash, used as a sentinel/default.
20    pub const ZERO: Self = Self([0u8; 32]);
21
22    /// Create from a raw 32-byte digest.
23    pub fn from_bytes(bytes: [u8; 32]) -> Self {
24        Self(bytes)
25    }
26
27    /// Access the underlying bytes.
28    pub fn as_bytes(&self) -> &[u8; 32] {
29        &self.0
30    }
31
32    /// Convert to hex string.
33    pub fn to_hex(&self) -> String {
34        hex::encode(self.0)
35    }
36
37    /// Compute SHA-256 hash of content.
38    pub fn digest(data: &[u8]) -> Self {
39        use sha2::{Digest, Sha256};
40        Self(Sha256::digest(data).into())
41    }
42
43    /// Compute SHA-256 hash incrementally from a reader.
44    ///
45    /// Streams data in 64KB chunks — never loads the full content into memory.
46    /// Produces the same hash as `digest()` for identical content.
47    pub fn digest_reader(reader: &mut impl std::io::Read) -> std::io::Result<Self> {
48        use sha2::{Digest, Sha256};
49        let mut hasher = Sha256::new();
50        let mut buf = [0u8; 64 * 1024];
51        loop {
52            let n = reader.read(&mut buf)?;
53            if n == 0 {
54                break;
55            }
56            hasher.update(&buf[..n]);
57        }
58        Ok(Self(hasher.finalize().into()))
59    }
60}
61
62impl fmt::Debug for ContentHash {
63    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
64        write!(f, "ContentHash({})", &hex::encode(&self.0[..8]))
65    }
66}
67
68impl fmt::Display for ContentHash {
69    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
70        write!(f, "{}", hex::encode(self.0))
71    }
72}
73
74impl AsRef<[u8]> for ContentHash {
75    fn as_ref(&self) -> &[u8] {
76        &self.0
77    }
78}
79
80impl From<[u8; 32]> for ContentHash {
81    fn from(bytes: [u8; 32]) -> Self {
82        Self(bytes)
83    }
84}
85
86impl From<ContentHash> for [u8; 32] {
87    fn from(hash: ContentHash) -> [u8; 32] {
88        hash.0
89    }
90}
91
92#[cfg(test)]
93mod tests {
94    use super::*;
95
96    #[test]
97    fn digest_deterministic() {
98        let h1 = ContentHash::digest(b"hello");
99        let h2 = ContentHash::digest(b"hello");
100        assert_eq!(h1, h2);
101    }
102
103    #[test]
104    fn digest_different_content() {
105        let h1 = ContentHash::digest(b"hello");
106        let h2 = ContentHash::digest(b"world");
107        assert_ne!(h1, h2);
108    }
109
110    #[test]
111    fn hex_roundtrip() {
112        let h = ContentHash::digest(b"test");
113        let hex_str = h.to_hex();
114        assert_eq!(hex_str.len(), 64);
115    }
116
117    #[test]
118    fn from_bytes_roundtrip() {
119        let bytes = [0x42u8; 32];
120        let h = ContentHash::from_bytes(bytes);
121        assert_eq!(*h.as_bytes(), bytes);
122    }
123
124    #[test]
125    fn digest_reader_matches_digest() {
126        let data = b"hello world, this is a streaming hash test";
127        let h1 = ContentHash::digest(data);
128        let h2 = ContentHash::digest_reader(&mut &data[..]).unwrap();
129        assert_eq!(h1, h2);
130    }
131
132    #[test]
133    fn digest_reader_large_data() {
134        // Data larger than the 64KB buffer to test multi-chunk reading
135        let data: Vec<u8> = (0..200_000).map(|i| (i % 256) as u8).collect();
136        let h1 = ContentHash::digest(&data);
137        let h2 = ContentHash::digest_reader(&mut &data[..]).unwrap();
138        assert_eq!(h1, h2);
139    }
140
141    #[test]
142    fn cbor_roundtrip() {
143        let h = ContentHash::digest(b"cbor test");
144        let mut encoded = Vec::new();
145        ciborium::into_writer(&h, &mut encoded).unwrap();
146        let deserialized: ContentHash = ciborium::from_reader(&encoded[..]).unwrap();
147        assert_eq!(h, deserialized);
148    }
149}