Skip to main content

triblespace_core/value/schemas/
hash.rs

1use crate::blob::BlobSchema;
2use crate::id::ExclusiveId;
3use crate::id::Id;
4use crate::id_hex;
5use crate::macros::entity;
6use crate::metadata;
7use crate::metadata::{ConstDescribe, ConstId};
8use crate::repo::BlobStore;
9use crate::trible::Fragment;
10use crate::trible::TribleSet;
11use crate::value::TryFromValue;
12use crate::value::RawValue;
13use crate::value::TryToValue;
14use crate::value::Value;
15use crate::value::ValueSchema;
16use std::convert::Infallible;
17
18use anybytes::Bytes;
19use digest::typenum::U32;
20use digest::Digest;
21use hex::FromHex;
22use hex::FromHexError;
23use std::marker::PhantomData;
24
25/// A trait for hash functions.
26/// This trait is implemented by hash functions that can be in a value schema
27/// for example via a [struct@Hash] or a [`Handle`].
28pub trait HashProtocol: Digest<OutputSize = U32> + Clone + Send + 'static + ConstDescribe {
29    /// Short lowercase name used in serialised representations (e.g. `"blake3"`).
30    const NAME: &'static str;
31}
32
33/// A value schema for a hash.
34/// A hash is a fixed-size 256bit digest of a byte sequence.
35///
36/// See the [crate::id] module documentation for a discussion on the length
37/// of the digest and its role as an intrinsic identifier.
38pub struct Hash<H> {
39    _hasher: PhantomData<fn(H) -> ()>,
40}
41
42impl<H> ConstId for Hash<H>
43where
44    H: HashProtocol,
45{
46    const ID: Id = H::ID;
47}
48
49impl<H> ConstDescribe for Hash<H>
50where
51    H: HashProtocol,
52{
53    fn describe<B>(blobs: &mut B) -> Result<Fragment, B::PutError>
54    where
55        B: BlobStore<Blake3>,
56    {
57        H::describe(blobs)
58    }
59}
60
61impl<H> ValueSchema for Hash<H>
62where
63    H: HashProtocol,
64{
65    type ValidationError = Infallible;
66}
67
68impl<H> Hash<H>
69where
70    H: HashProtocol,
71{
72    /// Computes the hash of `blob` and returns it as a value.
73    pub fn digest(blob: &Bytes) -> Value<Self> {
74        Value::new(H::digest(blob).into())
75    }
76
77    /// Parses a hex-encoded digest string into a hash value.
78    pub fn from_hex(hex: &str) -> Result<Value<Self>, FromHexError> {
79        let digest = RawValue::from_hex(hex)?;
80        Ok(Value::new(digest))
81    }
82
83    /// Returns the digest as an uppercase hex string.
84    pub fn to_hex(value: &Value<Self>) -> String {
85        hex::encode_upper(value.raw)
86    }
87}
88
89impl<H> TryFromValue<'_, Hash<H>> for String
90where
91    H: HashProtocol,
92{
93    type Error = std::convert::Infallible;
94    fn try_from_value(v: &Value<Hash<H>>) -> Result<Self, std::convert::Infallible> {
95        let mut out = String::new();
96        out.push_str(<H as HashProtocol>::NAME);
97        out.push(':');
98        out.push_str(&hex::encode(v.raw));
99        Ok(out)
100    }
101}
102
103/// An error that can occur when converting a hash value from a string.
104/// The error can be caused by a bad protocol or a bad hex encoding.
105#[derive(Debug, Clone, Copy, PartialEq)]
106pub enum HashError {
107    /// The string does not start with the expected protocol prefix
108    /// (e.g. `"blake3:"`).
109    BadProtocol,
110    /// The hex portion could not be decoded.
111    BadHex(FromHexError),
112}
113
114impl From<FromHexError> for HashError {
115    fn from(value: FromHexError) -> Self {
116        HashError::BadHex(value)
117    }
118}
119
120impl<H> TryToValue<Hash<H>> for &str
121where
122    H: HashProtocol,
123{
124    type Error = HashError;
125
126    fn try_to_value(self) -> Result<Value<Hash<H>>, Self::Error> {
127        let protocol = <H as HashProtocol>::NAME;
128        if !(self.starts_with(protocol) && &self[protocol.len()..=protocol.len()] == ":") {
129            return Err(HashError::BadProtocol);
130        }
131        let digest = RawValue::from_hex(&self[protocol.len() + 1..])?;
132
133        Ok(Value::new(digest))
134    }
135}
136
137impl<H> TryToValue<Hash<H>> for String
138where
139    H: HashProtocol,
140{
141    type Error = HashError;
142
143    fn try_to_value(self) -> Result<Value<Hash<H>>, Self::Error> {
144        (&self[..]).try_to_value()
145    }
146}
147
148fn describe_hash<H, B>(blobs: &mut B) -> Result<Fragment, B::PutError>
149where
150    H: HashProtocol,
151    B: BlobStore<Blake3>,
152{
153    let id = H::ID;
154    let name = H::NAME;
155    let description = blobs.put(format!(
156        "{name} 256-bit hash digest of raw bytes. The value stores the digest bytes and is stable across systems.\n\nUse for content-addressed identifiers, deduplication, or integrity checks. Use Handle when you need a typed blob reference with schema metadata.\n\nHashes do not carry type information; the meaning comes from the schema that uses them. If you need provenance or typed payloads, combine with handles or additional metadata."
157    ))?;
158    let name_handle = blobs.put(name)?;
159    let tribles = entity! { ExclusiveId::force_ref(&id) @
160        metadata::name: name_handle,
161        metadata::description: description,
162        metadata::tag: metadata::KIND_VALUE_SCHEMA,
163    };
164
165    #[cfg(feature = "wasm")]
166    let tribles = {
167        let mut tribles = tribles;
168        tribles += entity! { ExclusiveId::force_ref(&id) @
169            metadata::value_formatter: blobs.put(wasm_formatter::HASH_HEX_WASM)?,
170        };
171        tribles
172    };
173
174    Ok(tribles)
175}
176
177#[cfg(feature = "wasm")]
178mod wasm_formatter {
179    use core::fmt::Write;
180
181    use triblespace_core_macros::value_formatter;
182
183    #[value_formatter]
184    pub(crate) fn hash_hex(raw: &[u8; 32], out: &mut impl Write) -> Result<(), u32> {
185        out.write_str("hash:").map_err(|_| 1u32)?;
186        const TABLE: &[u8; 16] = b"0123456789ABCDEF";
187        for &byte in raw {
188            let hi = (byte >> 4) as usize;
189            let lo = (byte & 0x0F) as usize;
190            out.write_char(TABLE[hi] as char).map_err(|_| 1u32)?;
191            out.write_char(TABLE[lo] as char).map_err(|_| 1u32)?;
192        }
193        Ok(())
194    }
195}
196
197use blake2::Blake2b as Blake2bUnsized;
198/// Blake2b truncated to 256 bits, usable as a [`HashProtocol`].
199pub type Blake2b = Blake2bUnsized<U32>;
200
201/// Blake3 hasher, usable as a [`HashProtocol`]. This is the default
202/// hash function for content-addressed blob storage.
203pub use blake3::Hasher as Blake3;
204
205impl HashProtocol for Blake2b {
206    const NAME: &'static str = "blake2";
207}
208
209impl HashProtocol for Blake3 {
210    const NAME: &'static str = "blake3";
211}
212
213impl ConstId for Blake2b {
214    const ID: Id = id_hex!("91F880222412A49F012BE999942E6199");
215}
216
217impl ConstDescribe for Blake2b {
218    fn describe<B>(blobs: &mut B) -> Result<Fragment, B::PutError>
219    where
220        B: BlobStore<Blake3>,
221    {
222        describe_hash::<Self, B>(blobs)
223    }
224}
225
226impl ConstId for Blake3 {
227    const ID: Id = id_hex!("4160218D6C8F620652ECFBD7FDC7BDB3");
228}
229
230impl ConstDescribe for Blake3 {
231    fn describe<B>(blobs: &mut B) -> Result<Fragment, B::PutError>
232    where
233        B: BlobStore<Blake3>,
234    {
235        describe_hash::<Self, B>(blobs)
236    }
237}
238
239/// This is a value schema for a handle.
240/// A handle to a blob is comprised of a hash of a blob and type level information about the blobs schema.
241///
242/// The handle can be stored in a Trible, while the blob can be stored in a BlobSet, allowing for a
243/// separation of the blob data from the means of identifying and accessing it.
244///
245/// The handle is generated when a blob is inserted into a BlobSet, and the handle
246/// can be used to retrieve the blob from the BlobSet later.
247#[repr(transparent)]
248pub struct Handle<H: HashProtocol, T: BlobSchema> {
249    digest: Hash<H>,
250    _type: PhantomData<T>,
251}
252
253impl<H: HashProtocol, T: BlobSchema> Handle<H, T> {
254    /// Wraps a hash value as a typed handle.
255    pub fn from_hash(hash: Value<Hash<H>>) -> Value<Self> {
256        hash.transmute()
257    }
258
259    /// Extracts the underlying hash, discarding the blob schema type.
260    pub fn to_hash(handle: Value<Self>) -> Value<Hash<H>> {
261        handle.transmute()
262    }
263}
264
265impl<H: HashProtocol, T: BlobSchema> From<Value<Hash<H>>> for Value<Handle<H, T>> {
266    fn from(value: Value<Hash<H>>) -> Self {
267        value.transmute()
268    }
269}
270
271impl<H: HashProtocol, T: BlobSchema> From<Value<Handle<H, T>>> for Value<Hash<H>> {
272    fn from(value: Value<Handle<H, T>>) -> Self {
273        value.transmute()
274    }
275}
276
277impl<H: HashProtocol, T: BlobSchema> ConstId for Handle<H, T> {
278    const ID: Id = {
279        let mut hasher = const_blake3::Hasher::new();
280        hasher.update(&Hash::<H>::ID.raw());
281        hasher.update(&T::ID.raw());
282        let mut digest = [0u8; 32];
283        hasher.finalize(&mut digest);
284        let mut raw = [0u8; 16];
285        let mut i = 0;
286        while i < raw.len() {
287            raw[i] = digest[16 + i];
288            i += 1;
289        }
290        match Id::new(raw) {
291            Some(id) => id,
292            None => panic!("derived handle schema id must be non-nil"),
293        }
294    };
295}
296
297impl<H, T> ConstDescribe for Handle<H, T>
298where
299    H: HashProtocol,
300    T: BlobSchema + ConstDescribe,
301{
302    fn describe<B>(blobs: &mut B) -> Result<Fragment, B::PutError>
303    where
304        B: BlobStore<Blake3>,
305    {
306        let id = Self::ID;
307        let name = H::NAME;
308        let schema_id = T::ID;
309        let description = blobs.put(format!(
310            "Typed handle for blobs hashed with {name}; the value stores the digest and metadata points at blob schema {schema_id:X}. The schema id is derived from the hash and blob schema.\n\nUse when referencing blobs from tribles without embedding data; the blob store holds the payload. For untyped content hashes, use the hash schema directly.\n\nHandles assume the blob store is available and consistent with the digest. If the blob is missing, the handle still validates but dereferencing will fail."
311        ))?;
312        let name_handle = blobs.put("handle")?;
313        let mut tribles = TribleSet::new();
314        tribles += H::describe(blobs)?;
315        tribles += T::describe(blobs)?;
316
317        tribles += entity! { ExclusiveId::force_ref(&id) @
318            metadata::name: name_handle,
319            metadata::description: description,
320            metadata::blob_schema: schema_id,
321            metadata::hash_schema: H::ID,
322            metadata::tag: metadata::KIND_VALUE_SCHEMA,
323        };
324
325        #[cfg(feature = "wasm")]
326        {
327            tribles += entity! { ExclusiveId::force_ref(&id) @
328                metadata::value_formatter: blobs.put(wasm_formatter::HASH_HEX_WASM)?,
329            };
330        }
331        Ok(Fragment::rooted(id, tribles))
332    }
333}
334
335impl<H: HashProtocol, T: BlobSchema> ValueSchema for Handle<H, T> {
336    type ValidationError = Infallible;
337}
338
339#[cfg(test)]
340mod tests {
341    use super::Blake3;
342    use crate::prelude::*;
343    use crate::value::schemas::hash::HashError;
344    use rand;
345
346    use super::Hash;
347
348    #[test]
349    fn value_roundtrip() {
350        let v: Value<Hash<Blake3>> = Value::new(rand::random());
351        let s: String = v.from_value();
352        let _: Value<Hash<Blake3>> = s.try_to_value().expect("roundtrip should succeed");
353    }
354
355    #[test]
356    fn value_from_known() {
357        let s: &str = "blake3:CA98593CB9DC0FA48B2BE01E53D042E22B47862D646F9F19E2889A7961663663";
358        let _: Value<Hash<Blake3>> = s
359            .try_to_value()
360            .expect("packing valid constant should succeed");
361    }
362
363    #[test]
364    fn to_value_fail_protocol() {
365        let s: &str = "bad:CA98593CB9DC0FA48B2BE01E53D042E22B47862D646F9F19E2889A7961663663";
366        let err: HashError = <&str as TryToValue<Hash<Blake3>>>::try_to_value(s)
367            .expect_err("packing invalid protocol should fail");
368        assert_eq!(err, HashError::BadProtocol);
369    }
370
371    #[test]
372    fn to_value_fail_hex() {
373        let s: &str = "blake3:BAD!";
374        let err: HashError = <&str as TryToValue<Hash<Blake3>>>::try_to_value(s)
375            .expect_err("packing invalid protocol should fail");
376        assert!(std::matches!(err, HashError::BadHex(..)));
377    }
378}