Skip to main content

triblespace_core/value/schemas/
hash.rs

1use crate::blob::BlobSchema;
2use crate::id::ExclusiveId;
3use crate::id::Id;
4use crate::id_hex;
5use crate::macros::entity;
6use crate::metadata;
7use crate::metadata::{ConstDescribe, ConstId};
8use crate::repo::BlobStore;
9use crate::trible::Fragment;
10use crate::trible::TribleSet;
11use crate::value::TryFromValue;
12use crate::value::RawValue;
13use crate::value::TryToValue;
14use crate::value::Value;
15use crate::value::ValueSchema;
16use std::convert::Infallible;
17
18use anybytes::Bytes;
19use digest::typenum::U32;
20use digest::Digest;
21use hex::FromHex;
22use hex::FromHexError;
23use std::marker::PhantomData;
24
25/// A trait for hash functions.
26/// This trait is implemented by hash functions that can be in a value schema
27/// for example via a [struct@Hash] or a [Handle].
28pub trait HashProtocol: Digest<OutputSize = U32> + Clone + Send + 'static + ConstDescribe {
29    const NAME: &'static str;
30}
31
32/// A value schema for a hash.
33/// A hash is a fixed-size 256bit digest of a byte sequence.
34///
35/// See the [crate::id] module documentation for a discussion on the length
36/// of the digest and its role as an intrinsic identifier.
37pub struct Hash<H> {
38    _hasher: PhantomData<fn(H) -> ()>,
39}
40
41impl<H> ConstId for Hash<H>
42where
43    H: HashProtocol,
44{
45    const ID: Id = H::ID;
46}
47
48impl<H> ConstDescribe for Hash<H>
49where
50    H: HashProtocol,
51{
52    fn describe<B>(blobs: &mut B) -> Result<Fragment, B::PutError>
53    where
54        B: BlobStore<Blake3>,
55    {
56        H::describe(blobs)
57    }
58}
59
60impl<H> ValueSchema for Hash<H>
61where
62    H: HashProtocol,
63{
64    type ValidationError = Infallible;
65}
66
67impl<H> Hash<H>
68where
69    H: HashProtocol,
70{
71    pub fn digest(blob: &Bytes) -> Value<Self> {
72        Value::new(H::digest(blob).into())
73    }
74
75    pub fn from_hex(hex: &str) -> Result<Value<Self>, FromHexError> {
76        let digest = RawValue::from_hex(hex)?;
77        Ok(Value::new(digest))
78    }
79
80    pub fn to_hex(value: &Value<Self>) -> String {
81        hex::encode_upper(value.raw)
82    }
83}
84
85impl<H> TryFromValue<'_, Hash<H>> for String
86where
87    H: HashProtocol,
88{
89    type Error = std::convert::Infallible;
90    fn try_from_value(v: &Value<Hash<H>>) -> Result<Self, std::convert::Infallible> {
91        let mut out = String::new();
92        out.push_str(<H as HashProtocol>::NAME);
93        out.push(':');
94        out.push_str(&hex::encode(v.raw));
95        Ok(out)
96    }
97}
98
99/// An error that can occur when converting a hash value from a string.
100/// The error can be caused by a bad protocol or a bad hex encoding.
101#[derive(Debug, Clone, Copy, PartialEq)]
102pub enum HashError {
103    BadProtocol,
104    BadHex(FromHexError),
105}
106
107impl From<FromHexError> for HashError {
108    fn from(value: FromHexError) -> Self {
109        HashError::BadHex(value)
110    }
111}
112
113impl<H> TryToValue<Hash<H>> for &str
114where
115    H: HashProtocol,
116{
117    type Error = HashError;
118
119    fn try_to_value(self) -> Result<Value<Hash<H>>, Self::Error> {
120        let protocol = <H as HashProtocol>::NAME;
121        if !(self.starts_with(protocol) && &self[protocol.len()..=protocol.len()] == ":") {
122            return Err(HashError::BadProtocol);
123        }
124        let digest = RawValue::from_hex(&self[protocol.len() + 1..])?;
125
126        Ok(Value::new(digest))
127    }
128}
129
130impl<H> TryToValue<Hash<H>> for String
131where
132    H: HashProtocol,
133{
134    type Error = HashError;
135
136    fn try_to_value(self) -> Result<Value<Hash<H>>, Self::Error> {
137        (&self[..]).try_to_value()
138    }
139}
140
141fn describe_hash<H, B>(blobs: &mut B) -> Result<Fragment, B::PutError>
142where
143    H: HashProtocol,
144    B: BlobStore<Blake3>,
145{
146    let id = H::ID;
147    let name = H::NAME;
148    let description = blobs.put(format!(
149        "{name} 256-bit hash digest of raw bytes. The value stores the digest bytes and is stable across systems.\n\nUse for content-addressed identifiers, deduplication, or integrity checks. Use Handle when you need a typed blob reference with schema metadata.\n\nHashes do not carry type information; the meaning comes from the schema that uses them. If you need provenance or typed payloads, combine with handles or additional metadata."
150    ))?;
151    let name_handle = blobs.put(name)?;
152    let tribles = entity! { ExclusiveId::force_ref(&id) @
153        metadata::name: name_handle,
154        metadata::description: description,
155        metadata::tag: metadata::KIND_VALUE_SCHEMA,
156    };
157
158    #[cfg(feature = "wasm")]
159    let tribles = {
160        let mut tribles = tribles;
161        tribles += entity! { ExclusiveId::force_ref(&id) @
162            metadata::value_formatter: blobs.put(wasm_formatter::HASH_HEX_WASM)?,
163        };
164        tribles
165    };
166
167    Ok(tribles)
168}
169
170#[cfg(feature = "wasm")]
171mod wasm_formatter {
172    use core::fmt::Write;
173
174    use triblespace_core_macros::value_formatter;
175
176    #[value_formatter]
177    pub(crate) fn hash_hex(raw: &[u8; 32], out: &mut impl Write) -> Result<(), u32> {
178        out.write_str("hash:").map_err(|_| 1u32)?;
179        const TABLE: &[u8; 16] = b"0123456789ABCDEF";
180        for &byte in raw {
181            let hi = (byte >> 4) as usize;
182            let lo = (byte & 0x0F) as usize;
183            out.write_char(TABLE[hi] as char).map_err(|_| 1u32)?;
184            out.write_char(TABLE[lo] as char).map_err(|_| 1u32)?;
185        }
186        Ok(())
187    }
188}
189
190use blake2::Blake2b as Blake2bUnsized;
191pub type Blake2b = Blake2bUnsized<U32>;
192
193pub use blake3::Hasher as Blake3;
194
195impl HashProtocol for Blake2b {
196    const NAME: &'static str = "blake2";
197}
198
199impl HashProtocol for Blake3 {
200    const NAME: &'static str = "blake3";
201}
202
203impl ConstId for Blake2b {
204    const ID: Id = id_hex!("91F880222412A49F012BE999942E6199");
205}
206
207impl ConstDescribe for Blake2b {
208    fn describe<B>(blobs: &mut B) -> Result<Fragment, B::PutError>
209    where
210        B: BlobStore<Blake3>,
211    {
212        describe_hash::<Self, B>(blobs)
213    }
214}
215
216impl ConstId for Blake3 {
217    const ID: Id = id_hex!("4160218D6C8F620652ECFBD7FDC7BDB3");
218}
219
220impl ConstDescribe for Blake3 {
221    fn describe<B>(blobs: &mut B) -> Result<Fragment, B::PutError>
222    where
223        B: BlobStore<Blake3>,
224    {
225        describe_hash::<Self, B>(blobs)
226    }
227}
228
229/// This is a value schema for a handle.
230/// A handle to a blob is comprised of a hash of a blob and type level information about the blobs schema.
231///
232/// The handle can be stored in a Trible, while the blob can be stored in a BlobSet, allowing for a
233/// separation of the blob data from the means of identifying and accessing it.
234///
235/// The handle is generated when a blob is inserted into a BlobSet, and the handle
236/// can be used to retrieve the blob from the BlobSet later.
237#[repr(transparent)]
238pub struct Handle<H: HashProtocol, T: BlobSchema> {
239    digest: Hash<H>,
240    _type: PhantomData<T>,
241}
242
243impl<H: HashProtocol, T: BlobSchema> Handle<H, T> {
244    pub fn from_hash(hash: Value<Hash<H>>) -> Value<Self> {
245        hash.transmute()
246    }
247
248    pub fn to_hash(handle: Value<Self>) -> Value<Hash<H>> {
249        handle.transmute()
250    }
251}
252
253impl<H: HashProtocol, T: BlobSchema> From<Value<Hash<H>>> for Value<Handle<H, T>> {
254    fn from(value: Value<Hash<H>>) -> Self {
255        value.transmute()
256    }
257}
258
259impl<H: HashProtocol, T: BlobSchema> From<Value<Handle<H, T>>> for Value<Hash<H>> {
260    fn from(value: Value<Handle<H, T>>) -> Self {
261        value.transmute()
262    }
263}
264
265impl<H: HashProtocol, T: BlobSchema> ConstId for Handle<H, T> {
266    const ID: Id = {
267        let mut hasher = const_blake3::Hasher::new();
268        hasher.update(&Hash::<H>::ID.raw());
269        hasher.update(&T::ID.raw());
270        let mut digest = [0u8; 32];
271        hasher.finalize(&mut digest);
272        let mut raw = [0u8; 16];
273        let mut i = 0;
274        while i < raw.len() {
275            raw[i] = digest[16 + i];
276            i += 1;
277        }
278        match Id::new(raw) {
279            Some(id) => id,
280            None => panic!("derived handle schema id must be non-nil"),
281        }
282    };
283}
284
285impl<H, T> ConstDescribe for Handle<H, T>
286where
287    H: HashProtocol,
288    T: BlobSchema + ConstDescribe,
289{
290    fn describe<B>(blobs: &mut B) -> Result<Fragment, B::PutError>
291    where
292        B: BlobStore<Blake3>,
293    {
294        let id = Self::ID;
295        let name = H::NAME;
296        let schema_id = T::ID;
297        let description = blobs.put(format!(
298            "Typed handle for blobs hashed with {name}; the value stores the digest and metadata points at blob schema {schema_id:X}. The schema id is derived from the hash and blob schema.\n\nUse when referencing blobs from tribles without embedding data; the blob store holds the payload. For untyped content hashes, use the hash schema directly.\n\nHandles assume the blob store is available and consistent with the digest. If the blob is missing, the handle still validates but dereferencing will fail."
299        ))?;
300        let name_handle = blobs.put("handle")?;
301        let mut tribles = TribleSet::new();
302        tribles += H::describe(blobs)?;
303        tribles += T::describe(blobs)?;
304
305        tribles += entity! { ExclusiveId::force_ref(&id) @
306            metadata::name: name_handle,
307            metadata::description: description,
308            metadata::blob_schema: schema_id,
309            metadata::hash_schema: H::ID,
310            metadata::tag: metadata::KIND_VALUE_SCHEMA,
311        };
312
313        #[cfg(feature = "wasm")]
314        {
315            tribles += entity! { ExclusiveId::force_ref(&id) @
316                metadata::value_formatter: blobs.put(wasm_formatter::HASH_HEX_WASM)?,
317            };
318        }
319        Ok(Fragment::rooted(id, tribles))
320    }
321}
322
323impl<H: HashProtocol, T: BlobSchema> ValueSchema for Handle<H, T> {
324    type ValidationError = Infallible;
325}
326
327#[cfg(test)]
328mod tests {
329    use super::Blake3;
330    use crate::prelude::*;
331    use crate::value::schemas::hash::HashError;
332    use rand;
333
334    use super::Hash;
335
336    #[test]
337    fn value_roundtrip() {
338        let v: Value<Hash<Blake3>> = Value::new(rand::random());
339        let s: String = v.from_value();
340        let _: Value<Hash<Blake3>> = s.try_to_value().expect("roundtrip should succeed");
341    }
342
343    #[test]
344    fn value_from_known() {
345        let s: &str = "blake3:CA98593CB9DC0FA48B2BE01E53D042E22B47862D646F9F19E2889A7961663663";
346        let _: Value<Hash<Blake3>> = s
347            .try_to_value()
348            .expect("packing valid constant should succeed");
349    }
350
351    #[test]
352    fn to_value_fail_protocol() {
353        let s: &str = "bad:CA98593CB9DC0FA48B2BE01E53D042E22B47862D646F9F19E2889A7961663663";
354        let err: HashError = <&str as TryToValue<Hash<Blake3>>>::try_to_value(s)
355            .expect_err("packing invalid protocol should fail");
356        assert_eq!(err, HashError::BadProtocol);
357    }
358
359    #[test]
360    fn to_value_fail_hex() {
361        let s: &str = "blake3:BAD!";
362        let err: HashError = <&str as TryToValue<Hash<Blake3>>>::try_to_value(s)
363            .expect_err("packing invalid protocol should fail");
364        assert!(std::matches!(err, HashError::BadHex(..)));
365    }
366}