Skip to main content

triblespace_core/inline/encodings/
hash.rs

1use crate::blob::BlobEncoding;
2use crate::id::ExclusiveId;
3use crate::id::Id;
4use crate::id_hex;
5use crate::macros::entity;
6use crate::metadata;
7use crate::metadata::MetaDescribe;
8use crate::trible::Fragment;
9use crate::inline::RawInline;
10use crate::inline::TryFromInline;
11use crate::inline::TryToInline;
12use crate::inline::Inline;
13use crate::inline::InlineEncoding;
14use std::convert::Infallible;
15
16use anybytes::Bytes;
17use hex::FromHex;
18use hex::FromHexError;
19use std::marker::PhantomData;
20
21/// A 32-byte content-addressed hash function.
22///
23/// triblespace's *storage* layer (handles, blob stores, piles) is
24/// fixed to [`Blake3`] — that's the content-addressing hash that
25/// produces every [`Handle<T>`]. This trait stays generic so
26/// [`Hash<H>`] can carry digests produced by *other* hash functions
27/// (e.g. an external system's SHA-256 fingerprints) alongside
28/// Blake3 in the same store, distinguished by their schema type at
29/// the value layer. Only the storage-side parameter went away; the
30/// value-side distinction between digest families is still useful.
31pub trait HashProtocol: Sized + 'static + MetaDescribe {
32    /// Short lowercase name used in serialised representations (e.g. `"blake3"`).
33    const NAME: &'static str;
34
35    /// One-shot convenience: hash `bytes` and return the digest.
36    fn digest(bytes: &[u8]) -> RawInline;
37}
38
39/// Blake3 hash protocol — the canonical content-addressing hash
40/// for triblespace blob storage. The [`MemoryBlobStore`], [`Pile`],
41/// and [`Handle`] types are all implicitly Blake3-backed.
42///
43/// Implements [`HashProtocol`] so [`Hash<Blake3>`] is also a valid
44/// "blake3 digest" inline encoding, parallel to hypothetical
45/// `Hash<Sha256>` etc. for foreign-hash fingerprints.
46pub struct Blake3 {
47    hasher: blake3::Hasher,
48}
49
50impl Clone for Blake3 {
51    fn clone(&self) -> Self {
52        Self {
53            hasher: self.hasher.clone(),
54        }
55    }
56}
57
58impl Default for Blake3 {
59    fn default() -> Self {
60        Self::new()
61    }
62}
63
64impl Blake3 {
65    /// Short lowercase name used in serialised representations.
66    pub const NAME: &'static str = <Self as HashProtocol>::NAME;
67
68    /// Create a fresh hasher ready to accept input.
69    pub fn new() -> Self {
70        Self {
71            hasher: blake3::Hasher::new(),
72        }
73    }
74
75    /// Feed `bytes` into the streaming state.
76    pub fn update(&mut self, bytes: &[u8]) {
77        self.hasher.update(bytes);
78    }
79
80    /// Return the 32-byte digest of the bytes fed so far.
81    pub fn finalize(&self) -> RawInline {
82        *self.hasher.finalize().as_bytes()
83    }
84
85    /// One-shot convenience: hash `bytes` with Blake3 and return
86    /// the 32-byte digest. Mirrors [`HashProtocol::digest`] as an
87    /// inherent method so call sites don't need to import the trait.
88    pub fn digest(bytes: &[u8]) -> RawInline {
89        <Self as HashProtocol>::digest(bytes)
90    }
91}
92
93impl HashProtocol for Blake3 {
94    const NAME: &'static str = "blake3";
95
96    fn digest(bytes: &[u8]) -> RawInline {
97        *blake3::hash(bytes).as_bytes()
98    }
99}
100
101/// A inline encoding for a 32-byte hash digest.
102///
103/// `H` selects the hash function — `Hash<Blake3>` for blake3-produced
104/// digests, hypothetical `Hash<Sha256>` for foreign 256-bit
105/// fingerprints carried alongside. This stays parametric so a store
106/// can hold both kinds of digests with type-level distinction; only
107/// the storage-side wiring ([`Handle`], [`MemoryBlobStore`], piles)
108/// is fixed to Blake3.
109///
110/// See the [crate::id] module documentation for a discussion on the
111/// length of the digest and its role as an intrinsic identifier.
112pub struct Hash<H> {
113    _hasher: PhantomData<fn(H) -> ()>,
114}
115
116impl<H> MetaDescribe for Hash<H>
117where
118    H: HashProtocol,
119{
120    fn describe() -> Fragment {
121        H::describe()
122    }
123}
124
125impl<H> InlineEncoding for Hash<H>
126where
127    H: HashProtocol,
128{
129    type ValidationError = Infallible;
130    type Encoding = Self;
131}
132
133impl<H> Hash<H>
134where
135    H: HashProtocol,
136{
137    /// Computes the hash of `blob` and returns it as a value.
138    pub fn digest(blob: &Bytes) -> Inline<Self> {
139        Inline::new(H::digest(blob))
140    }
141
142    /// Parses a hex-encoded digest string into a hash value.
143    pub fn from_hex(hex: &str) -> Result<Inline<Self>, FromHexError> {
144        let digest = RawInline::from_hex(hex)?;
145        Ok(Inline::new(digest))
146    }
147
148    /// Returns the digest as an uppercase hex string.
149    pub fn to_hex(value: &Inline<Self>) -> String {
150        hex::encode_upper(value.raw)
151    }
152}
153
154impl<H: HashProtocol> TryFromInline<'_, Hash<H>> for String {
155    type Error = std::convert::Infallible;
156    fn try_from_inline(v: &Inline<Hash<H>>) -> Result<Self, std::convert::Infallible> {
157        let mut out = String::new();
158        out.push_str(H::NAME);
159        out.push(':');
160        out.push_str(&hex::encode(v.raw));
161        Ok(out)
162    }
163}
164
165/// An error that can occur when converting a hash value from a string.
166/// The error can be caused by a bad protocol or a bad hex encoding.
167#[derive(Debug, Clone, Copy, PartialEq)]
168pub enum HashError {
169    /// The string does not start with the expected protocol prefix
170    /// (e.g. `"blake3:"`).
171    BadProtocol,
172    /// The hex portion could not be decoded.
173    BadHex(FromHexError),
174}
175
176impl From<FromHexError> for HashError {
177    fn from(value: FromHexError) -> Self {
178        HashError::BadHex(value)
179    }
180}
181
182impl<H: HashProtocol> TryToInline<Hash<H>> for &str {
183    type Error = HashError;
184
185    fn try_to_inline(self) -> Result<Inline<Hash<H>>, Self::Error> {
186        let protocol = H::NAME;
187        if !(self.starts_with(protocol) && &self[protocol.len()..=protocol.len()] == ":") {
188            return Err(HashError::BadProtocol);
189        }
190        let digest = RawInline::from_hex(&self[protocol.len() + 1..])?;
191
192        Ok(Inline::new(digest))
193    }
194}
195
196impl<H: HashProtocol> TryToInline<Hash<H>> for String {
197    type Error = HashError;
198
199    fn try_to_inline(self) -> Result<Inline<Hash<H>>, Self::Error> {
200        (&self[..]).try_to_inline()
201    }
202}
203
204fn describe_hash<H: HashProtocol>(id: Id) -> Fragment {
205    let name = H::NAME;
206    #[allow(unused_mut)]
207    let mut tribles = entity! { ExclusiveId::force_ref(&id) @
208        metadata::name: name,
209        metadata::description: format!(
210            "{name} 256-bit hash digest of raw bytes. The value stores the digest bytes and is stable across systems.\n\nUse for content-addressed identifiers, deduplication, or integrity checks. Use Handle when you need a typed blob reference with schema metadata.\n\nHashes do not carry type information; the meaning comes from the schema that uses them. If you need provenance or typed payloads, combine with handles or additional metadata."
211        ),
212        metadata::tag: metadata::KIND_INLINE_ENCODING,
213    };
214    #[cfg(feature = "wasm")]
215    {
216        tribles += entity! { ExclusiveId::force_ref(&id) @
217            metadata::value_formatter: wasm_formatter::HASH_HEX_WASM,
218        };
219    }
220    tribles
221}
222
223#[cfg(feature = "wasm")]
224mod wasm_formatter {
225    use core::fmt::Write;
226
227    use triblespace_core_macros::value_formatter;
228
229    #[value_formatter]
230    pub(crate) fn hash_hex(raw: &[u8; 32], out: &mut impl Write) -> Result<(), u32> {
231        out.write_str("hash:").map_err(|_| 1u32)?;
232        const TABLE: &[u8; 16] = b"0123456789ABCDEF";
233        for &byte in raw {
234            let hi = (byte >> 4) as usize;
235            let lo = (byte & 0x0F) as usize;
236            out.write_char(TABLE[hi] as char).map_err(|_| 1u32)?;
237            out.write_char(TABLE[lo] as char).map_err(|_| 1u32)?;
238        }
239        Ok(())
240    }
241}
242
243/// The **lightweight reference form** of a content-addressed blob.
244///
245/// A `Handle<T>` is a 32-byte Blake3 hash plus a phantom marker for
246/// the referenced blob's schema. It's the small, trible-storable,
247/// network-sendable counterpart to a [`Blob<T>`][b] — the same
248/// content/reference duality as `&[u8]`/`Vec<u8>`, except the
249/// reference is hash-based rather than pointer-based and survives
250/// crossing process and storage boundaries.
251///
252/// You store handles in tribles. You store blobs in
253/// [`MemoryBlobStore`][m] / [`Pile`][p] / any other [`BlobStore`][bs]
254/// backend. Pairing them — `(handle in trible) ↔ (blob in store)` —
255/// is the canonical pattern for keeping the entity graph compact
256/// while leaving heavy payloads (text, binary data, archived
257/// subgraphs) addressable by content rather than by location.
258///
259/// Handles are produced *by* blobs: [`Blob::new`][bn] hashes the
260/// bytes and stores the handle in the blob; [`Blob::get_handle`][bg]
261/// returns it. A `&Blob<T>` also `AsRef`s to its handle, so passing
262/// "the lightweight reference" through APIs that accept
263/// `&Inline<Handle<T>>` is allocation-free.
264///
265/// [b]: crate::blob::Blob
266/// [bn]: crate::blob::Blob::new
267/// [bg]: crate::blob::Blob::get_handle
268/// [m]: crate::blob::MemoryBlobStore
269/// [p]: crate::repo::pile::Pile
270/// [bs]: crate::repo::BlobStore
271#[repr(transparent)]
272pub struct Handle<T: BlobEncoding> {
273    digest: Hash<Blake3>,
274    _type: PhantomData<T>,
275}
276
277impl<T: BlobEncoding> Handle<T> {
278    /// Wraps a Blake3 hash value as a typed handle.
279    pub fn from_hash(hash: Inline<Hash<Blake3>>) -> Inline<Self> {
280        hash.transmute()
281    }
282
283    /// Extracts the underlying Blake3 hash, discarding the blob encoding type.
284    pub fn to_hash(handle: Inline<Self>) -> Inline<Hash<Blake3>> {
285        handle.transmute()
286    }
287}
288
289impl<T: BlobEncoding> From<Inline<Hash<Blake3>>> for Inline<Handle<T>> {
290    fn from(value: Inline<Hash<Blake3>>) -> Self {
291        value.transmute()
292    }
293}
294
295impl<T: BlobEncoding> From<Inline<Handle<T>>> for Inline<Hash<Blake3>> {
296    fn from(value: Inline<Handle<T>>) -> Self {
297        value.transmute()
298    }
299}
300
301impl<T> MetaDescribe for Handle<T>
302where
303    T: BlobEncoding + MetaDescribe,
304{
305    fn describe() -> Fragment {
306        // Entity core via `*:` spread. `T::describe()` runs once: its
307        // root becomes the value of `metadata::blob_encoding` and its
308        // facts + blobs fold in automatically. With the hash protocol
309        // fixed to Blake3, only the blob encoding parameter distinguishes
310        // one `Handle<T>` monomorphization from another. Annotations
311        // share the derived root, so merging them with `+=` re-unions
312        // the same id into exports (idempotent) and folds their facts +
313        // auto-put blobs into the core.
314        let mut core = entity! {
315            metadata::blob_encoding*: T::describe(),
316            metadata::hash_schema*: Blake3::describe(),
317            metadata::tag: metadata::KIND_INLINE_ENCODING,
318        };
319        let name = Blake3::NAME;
320        let id = core.root().expect("rooted");
321        let id_ref = ExclusiveId::force_ref(&id);
322        core += entity! { id_ref @
323            metadata::name: "handle",
324            metadata::description: format!(
325                "Typed handle for blobs hashed with {name}; the value stores the digest and metadata points at the referenced blob encoding. The schema id is derived from the hash and blob encoding.\n\nUse when referencing blobs from tribles without embedding data; the blob store holds the payload. For untyped content hashes, use the hash schema directly.\n\nHandles assume the blob store is available and consistent with the digest. If the blob is missing, the handle still validates but dereferencing will fail."
326            ),
327        };
328        #[cfg(feature = "wasm")]
329        {
330            core += entity! { id_ref @
331                metadata::value_formatter: wasm_formatter::HASH_HEX_WASM,
332            };
333        }
334        core
335    }
336}
337
338impl<T: BlobEncoding + MetaDescribe> InlineEncoding for Handle<T> {
339    type ValidationError = Infallible;
340    type Encoding = T;
341}
342
343impl MetaDescribe for Blake3 {
344    fn describe() -> Fragment {
345        describe_hash::<Self>(id_hex!("4160218D6C8F620652ECFBD7FDC7BDB3"))
346    }
347}
348
349#[cfg(test)]
350mod tests {
351    use crate::prelude::*;
352    use crate::inline::encodings::hash::HashError;
353    use rand;
354
355    use super::{Blake3, Hash};
356
357    #[test]
358    fn value_roundtrip() {
359        let v: Inline<Hash<Blake3>> = Inline::new(rand::random());
360        let s: String = v.from_inline();
361        let _: Inline<Hash<Blake3>> = s.try_to_inline().expect("roundtrip should succeed");
362    }
363
364    #[test]
365    fn value_from_known() {
366        let s: &str = "blake3:CA98593CB9DC0FA48B2BE01E53D042E22B47862D646F9F19E2889A7961663663";
367        let _: Inline<Hash<Blake3>> = s
368            .try_to_inline()
369            .expect("packing valid constant should succeed");
370    }
371
372    #[test]
373    fn to_value_fail_protocol() {
374        let s: &str = "bad:CA98593CB9DC0FA48B2BE01E53D042E22B47862D646F9F19E2889A7961663663";
375        let err: HashError = <&str as TryToInline<Hash<Blake3>>>::try_to_inline(s)
376            .expect_err("packing invalid protocol should fail");
377        assert_eq!(err, HashError::BadProtocol);
378    }
379
380    #[test]
381    fn to_value_fail_hex() {
382        let s: &str = "blake3:BAD!";
383        let err: HashError = <&str as TryToInline<Hash<Blake3>>>::try_to_inline(s)
384            .expect_err("packing invalid protocol should fail");
385        assert!(std::matches!(err, HashError::BadHex(..)));
386    }
387}