Skip to main content

haz_cache/
hasher.rs

1//! Hash-function dispatch.
2//!
3//! The cache-key derivation feeds canonicalised bytes into a single
4//! hash function per workspace invocation (`CACHE-002`). The set of
5//! supported functions is closed and append-only: BLAKE3 (default)
6//! and SHA-256.
7//!
8//! [`Hasher`] is an enum because the registry is closed by design;
9//! see also `docs/spec/09-caching.md` `CACHE-002`. Adding a new
10//! variant requires bumping `hash_function_id` per the registry's
11//! append-only rule.
12
13use haz_domain::settings::cache::HashAlgo;
14use sha2::Digest as _;
15
16/// Streaming hash-function dispatcher.
17///
18/// Construct with [`Hasher::new`], feed canonical bytes via
19/// [`Hasher::update`], finalise with [`Hasher::finalize`]. The
20/// finalised digest is a fixed-width 32-byte array: both
21/// specification-recognised functions (`CACHE-002`) emit 32 bytes
22/// (BLAKE3's 256-bit output, SHA-256's natural width). The cache
23/// key is the finalised digest unchanged (`CACHE-001`,
24/// `CACHE-009`).
25///
26/// `Hasher` is intentionally not [`Clone`]: cache-key derivation
27/// consumes a single hasher across all components in order. Cloning
28/// the in-progress state would invite subtle errors where two keys
29/// share a prefix and diverge.
30///
31/// [`Hasher::Blake3`]'s state is large (about 2 KiB of internal
32/// buffers); it is boxed to keep the enum's stack footprint
33/// modest, since SHA-256's state is roughly 112 bytes.
34pub enum Hasher {
35    /// BLAKE3-256.
36    Blake3(Box<blake3::Hasher>),
37    /// SHA-256.
38    Sha256(sha2::Sha256),
39}
40
41impl Hasher {
42    /// Construct a fresh hasher for `algo`.
43    #[must_use]
44    pub fn new(algo: HashAlgo) -> Self {
45        match algo {
46            HashAlgo::Blake3 => Self::Blake3(Box::new(blake3::Hasher::new())),
47            HashAlgo::Sha256 => Self::Sha256(sha2::Sha256::new()),
48        }
49    }
50
51    /// Feed `bytes` into the hash. May be called any number of
52    /// times; the final digest depends on the concatenation of all
53    /// supplied byte slices in call order.
54    pub fn update(&mut self, bytes: &[u8]) {
55        match self {
56            Self::Blake3(h) => {
57                h.update(bytes);
58            }
59            Self::Sha256(h) => {
60                sha2::Digest::update(h, bytes);
61            }
62        }
63    }
64
65    /// Consume the hasher and return the 32-byte digest.
66    #[must_use]
67    pub fn finalize(self) -> [u8; 32] {
68        match self {
69            Self::Blake3(h) => *h.finalize().as_bytes(),
70            Self::Sha256(h) => {
71                let out = sha2::Digest::finalize(h);
72                out.into()
73            }
74        }
75    }
76}
77
78#[cfg(test)]
79mod tests {
80    use haz_domain::settings::cache::HashAlgo;
81
82    use crate::hasher::Hasher;
83
84    fn hash_bytes(algo: HashAlgo, bytes: &[u8]) -> [u8; 32] {
85        let mut h = Hasher::new(algo);
86        h.update(bytes);
87        h.finalize()
88    }
89
90    #[test]
91    fn cache_002_blake3_empty_input_matches_reference_vector() {
92        // BLAKE3 reference vector for the empty input.
93        let expected = [
94            0xaf, 0x13, 0x49, 0xb9, 0xf5, 0xf9, 0xa1, 0xa6, 0xa0, 0x40, 0x4d, 0xea, 0x36, 0xdc,
95            0xc9, 0x49, 0x9b, 0xcb, 0x25, 0xc9, 0xad, 0xc1, 0x12, 0xb7, 0xcc, 0x9a, 0x93, 0xca,
96            0xe4, 0x1f, 0x32, 0x62,
97        ];
98        assert_eq!(hash_bytes(HashAlgo::Blake3, b""), expected);
99    }
100
101    #[test]
102    fn cache_002_sha256_empty_input_matches_reference_vector() {
103        // SHA-256 reference vector for the empty input.
104        let expected = [
105            0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14, 0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f,
106            0xb9, 0x24, 0x27, 0xae, 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c, 0xa4, 0x95, 0x99, 0x1b,
107            0x78, 0x52, 0xb8, 0x55,
108        ];
109        assert_eq!(hash_bytes(HashAlgo::Sha256, b""), expected);
110    }
111
112    #[test]
113    fn cache_002_sha256_abc_matches_reference_vector() {
114        // FIPS 180-2 SHA-256 reference for "abc".
115        let expected = [
116            0xba, 0x78, 0x16, 0xbf, 0x8f, 0x01, 0xcf, 0xea, 0x41, 0x41, 0x40, 0xde, 0x5d, 0xae,
117            0x22, 0x23, 0xb0, 0x03, 0x61, 0xa3, 0x96, 0x17, 0x7a, 0x9c, 0xb4, 0x10, 0xff, 0x61,
118            0xf2, 0x00, 0x15, 0xad,
119        ];
120        assert_eq!(hash_bytes(HashAlgo::Sha256, b"abc"), expected);
121    }
122
123    #[test]
124    fn cache_002_blake3_and_sha256_diverge_on_same_input() {
125        let blake = hash_bytes(HashAlgo::Blake3, b"identical");
126        let sha = hash_bytes(HashAlgo::Sha256, b"identical");
127        assert_ne!(
128            blake, sha,
129            "the two hash functions must never coincide on a single input"
130        );
131    }
132
133    #[test]
134    fn update_chunking_is_irrelevant_to_digest() {
135        // Equivalent under both algorithms: the digest depends on
136        // the concatenation, not on how the bytes were chunked.
137        for algo in [HashAlgo::Blake3, HashAlgo::Sha256] {
138            let mut chunked = Hasher::new(algo);
139            chunked.update(b"hello, ");
140            chunked.update(b"world");
141            let chunked = chunked.finalize();
142
143            let mut whole = Hasher::new(algo);
144            whole.update(b"hello, world");
145            let whole = whole.finalize();
146
147            assert_eq!(chunked, whole, "{algo:?} digest depends on chunking");
148        }
149    }
150
151    #[test]
152    fn fresh_hasher_each_call() {
153        // Sanity: distinct Hasher instances of the same algo are
154        // independent. Hashing the same bytes twice produces the
155        // same digest both times.
156        for algo in [HashAlgo::Blake3, HashAlgo::Sha256] {
157            let a = hash_bytes(algo, b"twice");
158            let b = hash_bytes(algo, b"twice");
159            assert_eq!(a, b);
160        }
161    }
162}