Skip to main content

mkit_core/
hash.rs

1//! BLAKE3 hashing helpers.
2//!
3//! A [`Hash`](tyalias@Hash) is a fixed 32-byte digest. The canonical hex form is 64
4//! lowercase characters. Object-store paths split the digest into a
5//! first-byte directory and 62-char file-name (see `SPEC-OBJECTS.md`
6//! §10).
7
8use core::fmt;
9
10/// Length, in bytes, of a BLAKE3 digest used throughout mkit.
11pub const HASH_LEN: usize = 32;
12/// Length of the lowercase-hex encoding of a [`Hash`](tyalias@Hash).
13pub const HEX_LEN: usize = 64;
14
15/// Fixed-size BLAKE3 digest. `Copy` because it is tiny and cheap.
16pub type Hash = [u8; HASH_LEN];
17
18/// The all-zero digest. Used as the "absent" sentinel for optional
19/// annotation fields on commit objects (`message_hash`, `content_digest`).
20pub const ZERO: Hash = [0u8; HASH_LEN];
21
22/// Hash arbitrary bytes in one shot.
23#[must_use]
24pub fn hash(data: &[u8]) -> Hash {
25    let h = blake3::hash(data);
26    *h.as_bytes()
27}
28
29/// Incremental BLAKE3 hasher for streaming data.
30#[derive(Debug, Default, Clone)]
31pub struct Hasher {
32    inner: blake3::Hasher,
33}
34
35impl Hasher {
36    /// Create a fresh hasher.
37    #[must_use]
38    pub fn new() -> Self {
39        Self {
40            inner: blake3::Hasher::new(),
41        }
42    }
43
44    /// Absorb a chunk of input.
45    pub fn update(&mut self, data: &[u8]) -> &mut Self {
46        self.inner.update(data);
47        self
48    }
49
50    /// Finalise into a 32-byte digest.
51    #[must_use]
52    pub fn finalize(&self) -> Hash {
53        *self.inner.finalize().as_bytes()
54    }
55}
56
57/// Errors returned by [`from_hex`].
58#[derive(Debug, Clone, Copy, PartialEq, Eq, thiserror::Error)]
59pub enum FromHexError {
60    /// The input was not exactly [`HEX_LEN`] bytes.
61    #[error("hex digest must be {} chars, got {actual}", HEX_LEN)]
62    InvalidLength { actual: usize },
63    /// The input contained a non-hex character.
64    #[error("hex digest contained a non-hex byte")]
65    InvalidChar,
66}
67
68/// Render a byte slice as lowercase hex. `format!`-with-`{:02x}`
69/// allocates per byte; the hand-roll here is the workspace's canonical
70/// hex encoder. Use this everywhere a byte slice needs hex rendering.
71#[must_use]
72pub fn to_hex_bytes(bytes: &[u8]) -> String {
73    let mut out = String::with_capacity(bytes.len() * 2);
74    for b in bytes {
75        const HEX: &[u8; 16] = b"0123456789abcdef";
76        out.push(HEX[(b >> 4) as usize] as char);
77        out.push(HEX[(b & 0x0f) as usize] as char);
78    }
79    out
80}
81
82/// Render a [`Hash`](tyalias@Hash) as lowercase hex.
83#[must_use]
84pub fn to_hex(h: &Hash) -> String {
85    to_hex_bytes(h)
86}
87
88/// Domain-separated BLAKE3 digest.
89///
90/// Computes `BLAKE3(len_le16(domain) || domain || body)` — the
91/// canonical mkit recipe for binding a hash output to a domain string.
92/// The 2-byte little-endian length prefix is what stops the
93/// `(domain, body)` pair from being ambiguous; without it,
94/// `("ab", "cX")` and `("abc", "X")` would hash to the same input.
95///
96/// Domain strings are short ASCII constants in this codebase (e.g.
97/// `b"mkit-commit-v1"`); the `u16` cap is comfortable.
98///
99/// Used by `sign` (commit / remix signatures), `sparse` (tree hash
100/// binding the manifest to its source tree), and any future module
101/// that needs a domain-separated hash.
102///
103/// # Panics
104///
105/// Panics if `domain.len()` exceeds `u16::MAX`. Domain strings are
106/// fixed constants in this crate; callers MUST verify the length at
107/// construction time. The check is `debug_assert!` plus a `try_from`
108/// because exceeding 65 535 bytes would be a programmer error.
109#[must_use]
110pub fn domain_digest(domain: &[u8], body: &[u8]) -> Hash {
111    let mut h = blake3::Hasher::new();
112    let domain_len = u16::try_from(domain.len()).expect("domain <= u16::MAX");
113    h.update(&domain_len.to_le_bytes());
114    h.update(domain);
115    h.update(body);
116    *h.finalize().as_bytes()
117}
118
119/// Parse a lowercase-or-uppercase 64-char hex string into a [`Hash`](tyalias@Hash).
120/// Rejects any non-hex byte.
121pub fn from_hex(s: &str) -> Result<Hash, FromHexError> {
122    let bytes = s.as_bytes();
123    if bytes.len() != HEX_LEN {
124        return Err(FromHexError::InvalidLength {
125            actual: bytes.len(),
126        });
127    }
128    let mut out = [0u8; HASH_LEN];
129    for i in 0..HASH_LEN {
130        let hi = hex_nibble(bytes[i * 2])?;
131        let lo = hex_nibble(bytes[i * 2 + 1])?;
132        out[i] = (hi << 4) | lo;
133    }
134    Ok(out)
135}
136
137fn hex_nibble(b: u8) -> Result<u8, FromHexError> {
138    match b {
139        b'0'..=b'9' => Ok(b - b'0'),
140        b'a'..=b'f' => Ok(10 + (b - b'a')),
141        b'A'..=b'F' => Ok(10 + (b - b'A')),
142        _ => Err(FromHexError::InvalidChar),
143    }
144}
145
146/// Object-store path split: `<first-byte-hex>/<remaining-62-hex>`.
147#[derive(Debug, Clone, Copy, PartialEq, Eq)]
148pub struct ObjectPath {
149    /// Two-char directory prefix, ASCII lowercase hex.
150    pub dir: [u8; 2],
151    /// 62-char file name, ASCII lowercase hex.
152    pub file: [u8; 62],
153}
154
155impl fmt::Display for ObjectPath {
156    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
157        // Both halves are ASCII hex by construction.
158        write!(
159            f,
160            "{}/{}",
161            core::str::from_utf8(&self.dir).expect("ascii hex"),
162            core::str::from_utf8(&self.file).expect("ascii hex"),
163        )
164    }
165}
166
167/// Split a [`Hash`](tyalias@Hash) into its object-store path components.
168#[must_use]
169pub fn object_path(h: &Hash) -> ObjectPath {
170    let hex = to_hex(h);
171    let bytes = hex.as_bytes();
172    let mut dir = [0u8; 2];
173    let mut file = [0u8; 62];
174    dir.copy_from_slice(&bytes[..2]);
175    file.copy_from_slice(&bytes[2..]);
176    ObjectPath { dir, file }
177}
178
179#[cfg(test)]
180mod tests {
181    use super::*;
182
183    #[test]
184    fn known_vector_hello() {
185        let h = hash(b"hello");
186        assert_eq!(
187            to_hex(&h),
188            "ea8f163db38682925e4491c5e58d4bb3506ef8c14eb78a86e908c5624a67200f"
189        );
190    }
191
192    #[test]
193    fn to_hex_bytes_matches_to_hex_for_32_byte_slice() {
194        let h = hash(b"any");
195        assert_eq!(to_hex_bytes(h.as_slice()), to_hex(&h));
196    }
197
198    #[test]
199    fn to_hex_bytes_handles_arbitrary_length() {
200        assert_eq!(to_hex_bytes(b""), "");
201        assert_eq!(to_hex_bytes(&[0x00, 0xff]), "00ff");
202        assert_eq!(to_hex_bytes(&[0xde, 0xad, 0xbe, 0xef]), "deadbeef");
203    }
204
205    #[test]
206    fn incremental_matches_oneshot() {
207        let oneshot = hash(b"hello world");
208        let mut h = Hasher::new();
209        h.update(b"hello ").update(b"world");
210        assert_eq!(oneshot, h.finalize());
211    }
212
213    #[test]
214    fn from_hex_roundtrip() {
215        let h = hash(b"test");
216        let hex = to_hex(&h);
217        let parsed = from_hex(&hex).unwrap();
218        assert_eq!(h, parsed);
219    }
220
221    #[test]
222    fn from_hex_accepts_mixed_case() {
223        let lower = "ea8f163db38682925e4491c5e58d4bb3506ef8c14eb78a86e908c5624a67200f";
224        let upper = lower.to_ascii_uppercase();
225        assert_eq!(from_hex(lower).unwrap(), from_hex(&upper).unwrap());
226    }
227
228    #[test]
229    fn from_hex_rejects_too_short() {
230        assert!(matches!(
231            from_hex("abcdef"),
232            Err(FromHexError::InvalidLength { .. })
233        ));
234    }
235
236    #[test]
237    fn from_hex_rejects_bad_char() {
238        let bad: String = "gg".chars().chain("00".repeat(31).chars()).collect();
239        assert_eq!(from_hex(&bad), Err(FromHexError::InvalidChar));
240    }
241
242    #[test]
243    fn to_hex_of_zero_is_all_zeros() {
244        assert_eq!(to_hex(&ZERO), "0".repeat(HEX_LEN));
245    }
246
247    #[test]
248    fn object_path_splits_correctly() {
249        let h = hash(b"test");
250        let path = object_path(&h);
251        let hex = to_hex(&h);
252        assert_eq!(&path.dir, &hex.as_bytes()[..2]);
253        assert_eq!(&path.file[..], &hex.as_bytes()[2..]);
254    }
255}