Skip to main content

paramodel_elements/
fingerprint.rs

1// Copyright (c) Jonathan Shook
2// SPDX-License-Identifier: Apache-2.0
3
4//! BLAKE3 content fingerprints.
5//!
6//! Per SRD-0003 D7, content-addressed identifiers use BLAKE3 with a
7//! lowercase-hex full representation and a `b3:` short form for UI. The
8//! `Fingerprint` type wraps the 32-byte digest; `FingerprintBuilder` wraps
9//! a `blake3::Hasher` so callers can stream canonical bytes into a hash
10//! without materialising them.
11
12use std::fmt;
13
14use serde::{Deserialize, Serialize, de};
15
16/// A 32-byte BLAKE3 content fingerprint.
17///
18/// Display renders lowercase hex (64 characters). `Debug` and
19/// [`Self::display_short`] render the UI-friendly `b3:<12 hex>` form.
20/// Serialisation uses the lowercase hex string (so JSON fingerprints are
21/// human-inspectable).
22#[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
23pub struct Fingerprint([u8; 32]);
24
25impl Fingerprint {
26    /// Construct from an existing 32-byte digest.
27    #[must_use]
28    pub const fn from_bytes(bytes: [u8; 32]) -> Self {
29        Self(bytes)
30    }
31
32    /// Hash the given bytes as a one-shot.
33    #[must_use]
34    pub fn of(bytes: &[u8]) -> Self {
35        Self(*blake3::hash(bytes).as_bytes())
36    }
37
38    /// Borrow the raw digest.
39    #[must_use]
40    pub const fn as_bytes(&self) -> &[u8; 32] {
41        &self.0
42    }
43
44    /// Render the short `b3:<12 hex>` form for UI and logs.
45    #[must_use]
46    pub fn display_short(&self) -> String {
47        let mut out = String::with_capacity(3 + 12);
48        out.push_str("b3:");
49        for byte in &self.0[..6] {
50            let _ = std::fmt::Write::write_fmt(&mut out, format_args!("{byte:02x}"));
51        }
52        out
53    }
54}
55
56impl fmt::Debug for Fingerprint {
57    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
58        // Short form keeps log lines readable.
59        write!(f, "Fingerprint({})", self.display_short())
60    }
61}
62
63impl fmt::Display for Fingerprint {
64    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
65        for byte in &self.0 {
66            write!(f, "{byte:02x}")?;
67        }
68        Ok(())
69    }
70}
71
72impl Serialize for Fingerprint {
73    fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
74        serializer.collect_str(self)
75    }
76}
77
78impl<'de> Deserialize<'de> for Fingerprint {
79    fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
80        let s = <&str>::deserialize(deserializer)?;
81        parse_hex(s).map(Self).map_err(de::Error::custom)
82    }
83}
84
85fn parse_hex(s: &str) -> Result<[u8; 32], &'static str> {
86    if s.len() != 64 {
87        return Err("expected a 64-character hex string");
88    }
89    let mut out = [0u8; 32];
90    let bytes = s.as_bytes();
91    for i in 0..32 {
92        let hi = hex_digit(bytes[i * 2])?;
93        let lo = hex_digit(bytes[i * 2 + 1])?;
94        out[i] = (hi << 4) | lo;
95    }
96    Ok(out)
97}
98
99const fn hex_digit(b: u8) -> Result<u8, &'static str> {
100    match b {
101        b'0'..=b'9' => Ok(b - b'0'),
102        b'a'..=b'f' => Ok(b - b'a' + 10),
103        b'A'..=b'F' => Ok(b - b'A' + 10),
104        _ => Err("non-hex character in fingerprint"),
105    }
106}
107
108/// Streaming fingerprint builder.
109///
110/// Wraps a `blake3::Hasher` so callers can write canonical bytes into a
111/// hash incrementally without materialising them first.
112#[derive(Debug, Default)]
113pub struct FingerprintBuilder {
114    hasher: blake3::Hasher,
115}
116
117impl FingerprintBuilder {
118    /// Start a new builder.
119    #[must_use]
120    pub fn new() -> Self {
121        Self::default()
122    }
123
124    /// Feed bytes into the hash.
125    #[must_use]
126    pub fn update(mut self, data: &[u8]) -> Self {
127        self.hasher.update(data);
128        self
129    }
130
131    /// Feed a single byte into the hash. Convenient for tag bytes and
132    /// discriminators.
133    #[must_use]
134    pub fn byte(self, byte: u8) -> Self {
135        self.update(&[byte])
136    }
137
138    /// Feed a UTF-8 string preceded by a 4-byte little-endian length prefix.
139    #[must_use]
140    pub fn length_prefixed_str(self, s: &str) -> Self {
141        let len = u32::try_from(s.len()).expect("string length fits in u32");
142        self.u32_le(len).update(s.as_bytes())
143    }
144
145    /// Feed a `u32` as four little-endian bytes.
146    #[must_use]
147    pub fn u32_le(self, v: u32) -> Self {
148        self.update(&v.to_le_bytes())
149    }
150
151    /// Feed a `u64` as eight little-endian bytes.
152    #[must_use]
153    pub fn u64_le(self, v: u64) -> Self {
154        self.update(&v.to_le_bytes())
155    }
156
157    /// Feed an `i64` as eight little-endian bytes.
158    #[must_use]
159    pub fn i64_le(self, v: i64) -> Self {
160        self.update(&v.to_le_bytes())
161    }
162
163    /// Finalise the hash.
164    #[must_use]
165    pub fn finish(self) -> Fingerprint {
166        Fingerprint(*self.hasher.finalize().as_bytes())
167    }
168}
169
170#[cfg(test)]
171mod tests {
172    use super::*;
173
174    #[test]
175    fn of_is_deterministic() {
176        let a = Fingerprint::of(b"hello");
177        let b = Fingerprint::of(b"hello");
178        assert_eq!(a, b);
179    }
180
181    #[test]
182    fn of_distinguishes_inputs() {
183        let a = Fingerprint::of(b"hello");
184        let b = Fingerprint::of(b"hellp");
185        assert_ne!(a, b);
186    }
187
188    #[test]
189    fn display_is_64_lowercase_hex() {
190        let fp = Fingerprint::of(b"");
191        let s = format!("{fp}");
192        assert_eq!(s.len(), 64);
193        assert!(s.chars().all(|c| c.is_ascii_hexdigit() && !c.is_uppercase()));
194    }
195
196    #[test]
197    fn display_short_has_b3_prefix_and_12_hex_chars() {
198        let fp = Fingerprint::of(b"hello");
199        let s = fp.display_short();
200        assert!(s.starts_with("b3:"));
201        assert_eq!(s.len(), 3 + 12);
202        assert!(s[3..].chars().all(|c| c.is_ascii_hexdigit()));
203    }
204
205    #[test]
206    fn builder_matches_oneshot() {
207        let one_shot = Fingerprint::of(b"hello world");
208        let streamed = FingerprintBuilder::new()
209            .update(b"hello")
210            .update(b" ")
211            .update(b"world")
212            .finish();
213        assert_eq!(one_shot, streamed);
214    }
215
216    #[test]
217    fn serde_roundtrip_via_hex() {
218        let fp = Fingerprint::of(b"serde-me");
219        let json = serde_json::to_string(&fp).unwrap();
220        let back: Fingerprint = serde_json::from_str(&json).unwrap();
221        assert_eq!(fp, back);
222        // Serialised form is the full 64-char lowercase hex string.
223        let expected = format!("\"{fp}\"");
224        assert_eq!(json, expected);
225    }
226
227    #[test]
228    fn deserialising_non_hex_fails() {
229        let err: Result<Fingerprint, _> =
230            serde_json::from_str("\"not-a-fingerprint-at-all-only-hex-please-64-chars-x\"");
231        assert!(err.is_err());
232    }
233
234    #[test]
235    fn builder_tag_and_length_prefixed_str() {
236        let direct = FingerprintBuilder::new()
237            .byte(0x42)
238            .length_prefixed_str("abc")
239            .finish();
240        let manual = FingerprintBuilder::new()
241            .update(&[0x42])
242            .update(&3u32.to_le_bytes())
243            .update(b"abc")
244            .finish();
245        assert_eq!(direct, manual);
246    }
247}