Skip to main content

hd_cas/
hash.rs

1use serde::{Deserialize, Serialize};
2use std::fmt;
3
4/// A BLAKE3 content hash. 32 bytes (256 bits).
5#[derive(Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, PartialOrd, Ord)]
6pub struct ContentHash([u8; 32]);
7
8impl ContentHash {
9    /// Hash arbitrary bytes with BLAKE3.
10    pub fn from_bytes(data: &[u8]) -> Self {
11        let hash = blake3::hash(data);
12        ContentHash(*hash.as_bytes())
13    }
14
15    /// Wrap raw hash bytes (no rehashing). Use when you already have
16    /// a finalized BLAKE3 hash (e.g., from a streaming Hasher).
17    pub fn from_raw(bytes: [u8; 32]) -> Self {
18        ContentHash(bytes)
19    }
20
21    /// Return the hash as a 64-character hex string.
22    pub fn to_hex(&self) -> String {
23        hex_encode(&self.0)
24    }
25
26    /// Parse a 64-character hex string into a ContentHash.
27    pub fn from_hex(hex: &str) -> Result<Self, HashError> {
28        if hex.len() != 64 {
29            return Err(HashError::InvalidHexLength(hex.len()));
30        }
31        let mut bytes = [0u8; 32];
32        for i in 0..32 {
33            bytes[i] = u8::from_str_radix(&hex[i * 2..i * 2 + 2], 16)
34                .map_err(|_| HashError::InvalidHexChar)?;
35        }
36        Ok(ContentHash(bytes))
37    }
38
39    /// First two hex characters, used for directory sharding.
40    pub fn shard_prefix(&self) -> String {
41        format!("{:02x}", self.0[0])
42    }
43
44    /// Access the raw 32-byte hash.
45    pub fn as_bytes(&self) -> &[u8; 32] {
46        &self.0
47    }
48}
49
50impl fmt::Debug for ContentHash {
51    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
52        write!(f, "ContentHash({})", &self.to_hex()[..12])
53    }
54}
55
56impl fmt::Display for ContentHash {
57    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
58        write!(f, "{}", self.to_hex())
59    }
60}
61
62fn hex_encode(bytes: &[u8]) -> String {
63    let mut s = String::with_capacity(bytes.len() * 2);
64    for b in bytes {
65        s.push_str(&format!("{:02x}", b));
66    }
67    s
68}
69
70#[derive(Debug, thiserror::Error)]
71pub enum HashError {
72    #[error("invalid hex length: expected 64, got {0}")]
73    InvalidHexLength(usize),
74    #[error("invalid hex character")]
75    InvalidHexChar,
76}
77
78#[cfg(test)]
79mod tests {
80    use super::*;
81
82    #[test]
83    fn hash_bytes_deterministic() {
84        let data = b"hello world";
85        let h1 = ContentHash::from_bytes(data);
86        let h2 = ContentHash::from_bytes(data);
87        assert_eq!(h1, h2);
88    }
89
90    #[test]
91    fn hash_bytes_different_input_different_hash() {
92        let h1 = ContentHash::from_bytes(b"hello");
93        let h2 = ContentHash::from_bytes(b"world");
94        assert_ne!(h1, h2);
95    }
96
97    #[test]
98    fn hash_hex_roundtrip() {
99        let h = ContentHash::from_bytes(b"test data");
100        let hex = h.to_hex();
101        let parsed = ContentHash::from_hex(&hex).unwrap();
102        assert_eq!(h, parsed);
103    }
104
105    #[test]
106    fn hash_shard_prefix() {
107        let h = ContentHash::from_bytes(b"test");
108        let prefix = h.shard_prefix();
109        assert_eq!(prefix.len(), 2);
110        assert_eq!(prefix, &h.to_hex()[..2]);
111    }
112
113    #[test]
114    fn from_raw_does_not_rehash() {
115        let hash = blake3::hash(b"test");
116        let h = ContentHash::from_raw(*hash.as_bytes());
117        assert_eq!(h.as_bytes(), hash.as_bytes());
118        let h2 = ContentHash::from_bytes(hash.as_bytes());
119        assert_ne!(h, h2);
120    }
121}