Skip to main content

triblespace_core/value/schemas/
hash.rs

1use crate::blob::BlobSchema;
2use crate::id::ExclusiveId;
3use crate::id::Id;
4use crate::id_hex;
5use crate::macros::entity;
6use crate::metadata;
7use crate::metadata::ConstMetadata;
8use crate::repo::BlobStore;
9use crate::trible::TribleSet;
10use crate::value::FromValue;
11use crate::value::RawValue;
12use crate::value::TryToValue;
13use crate::value::Value;
14use crate::value::ValueSchema;
15use std::convert::Infallible;
16
17use anybytes::Bytes;
18use digest::typenum::U32;
19use digest::Digest;
20use hex::FromHex;
21use hex::FromHexError;
22use std::marker::PhantomData;
23
24#[cfg(feature = "wasm")]
25use crate::blob::schemas::wasmcode::WasmCode;
26/// A trait for hash functions.
27/// This trait is implemented by hash functions that can be in a value schema
28/// for example via a [struct@Hash] or a [Handle].
29pub trait HashProtocol: Digest<OutputSize = U32> + Clone + Send + 'static + ConstMetadata {
30    const NAME: &'static str;
31}
32
33/// A value schema for a hash.
34/// A hash is a fixed-size 256bit digest of a byte sequence.
35///
36/// See the [crate::id] module documentation for a discussion on the length
37/// of the digest and its role as an intrinsic identifier.
38pub struct Hash<H> {
39    _hasher: PhantomData<fn(H) -> ()>,
40}
41
42impl<H> ConstMetadata for Hash<H>
43where
44    H: HashProtocol,
45{
46    fn id() -> Id {
47        <H as ConstMetadata>::id()
48    }
49
50    fn describe<B>(blobs: &mut B) -> Result<TribleSet, B::PutError>
51    where
52        B: BlobStore<Blake3>,
53    {
54        H::describe(blobs)
55    }
56}
57
58impl<H> ValueSchema for Hash<H>
59where
60    H: HashProtocol,
61{
62    type ValidationError = Infallible;
63}
64
65impl<H> Hash<H>
66where
67    H: HashProtocol,
68{
69    pub fn digest(blob: &Bytes) -> Value<Self> {
70        Value::new(H::digest(blob).into())
71    }
72
73    pub fn from_hex(hex: &str) -> Result<Value<Self>, FromHexError> {
74        let digest = RawValue::from_hex(hex)?;
75        Ok(Value::new(digest))
76    }
77
78    pub fn to_hex(value: &Value<Self>) -> String {
79        hex::encode_upper(value.raw)
80    }
81}
82
83impl<H> FromValue<'_, Hash<H>> for String
84where
85    H: HashProtocol,
86{
87    fn from_value(v: &Value<Hash<H>>) -> Self {
88        let mut out = String::new();
89        out.push_str(<H as HashProtocol>::NAME);
90        out.push(':');
91        out.push_str(&hex::encode(v.raw));
92        out
93    }
94}
95
96/// An error that can occur when converting a hash value from a string.
97/// The error can be caused by a bad protocol or a bad hex encoding.
98#[derive(Debug, Clone, Copy, PartialEq)]
99pub enum HashError {
100    BadProtocol,
101    BadHex(FromHexError),
102}
103
104impl From<FromHexError> for HashError {
105    fn from(value: FromHexError) -> Self {
106        HashError::BadHex(value)
107    }
108}
109
110impl<H> TryToValue<Hash<H>> for &str
111where
112    H: HashProtocol,
113{
114    type Error = HashError;
115
116    fn try_to_value(self) -> Result<Value<Hash<H>>, Self::Error> {
117        let protocol = <H as HashProtocol>::NAME;
118        if !(self.starts_with(protocol) && &self[protocol.len()..=protocol.len()] == ":") {
119            return Err(HashError::BadProtocol);
120        }
121        let digest = RawValue::from_hex(&self[protocol.len() + 1..])?;
122
123        Ok(Value::new(digest))
124    }
125}
126
127impl<H> TryToValue<Hash<H>> for String
128where
129    H: HashProtocol,
130{
131    type Error = HashError;
132
133    fn try_to_value(self) -> Result<Value<Hash<H>>, Self::Error> {
134        (&self[..]).try_to_value()
135    }
136}
137
138fn describe_hash<H, B>(blobs: &mut B) -> Result<TribleSet, B::PutError>
139where
140    H: HashProtocol,
141    B: BlobStore<Blake3>,
142{
143    let id = H::id();
144    let name = H::NAME;
145    let description = blobs.put(format!(
146        "{name} 256-bit hash digest of raw bytes. The value stores the digest bytes and is stable across systems.\n\nUse for content-addressed identifiers, deduplication, or integrity checks. Use Handle when you need a typed blob reference with schema metadata.\n\nHashes do not carry type information; the meaning comes from the schema that uses them. If you need provenance or typed payloads, combine with handles or additional metadata."
147    ))?;
148    let name_handle = blobs.put(name.to_string())?;
149    let mut tribles = TribleSet::new();
150
151    tribles += entity! { ExclusiveId::force_ref(&id) @
152        metadata::name: name_handle,
153        metadata::description: description,
154        metadata::tag: metadata::KIND_VALUE_SCHEMA,
155    };
156
157    #[cfg(feature = "wasm")]
158    {
159        tribles += entity! { ExclusiveId::force_ref(&id) @
160            metadata::value_formatter: blobs.put(wasm_formatter::HASH_HEX_WASM)?,
161        };
162    }
163
164    Ok(tribles)
165}
166
167#[cfg(feature = "wasm")]
168mod wasm_formatter {
169    use core::fmt::Write;
170
171    use triblespace_core_macros::value_formatter;
172
173    #[value_formatter]
174    pub(crate) fn hash_hex(raw: &[u8; 32], out: &mut impl Write) -> Result<(), u32> {
175        out.write_str("hash:").map_err(|_| 1u32)?;
176        const TABLE: &[u8; 16] = b"0123456789ABCDEF";
177        for &byte in raw {
178            let hi = (byte >> 4) as usize;
179            let lo = (byte & 0x0F) as usize;
180            out.write_char(TABLE[hi] as char).map_err(|_| 1u32)?;
181            out.write_char(TABLE[lo] as char).map_err(|_| 1u32)?;
182        }
183        Ok(())
184    }
185}
186
187use blake2::Blake2b as Blake2bUnsized;
188pub type Blake2b = Blake2bUnsized<U32>;
189
190pub use blake3::Hasher as Blake3;
191
192impl HashProtocol for Blake2b {
193    const NAME: &'static str = "blake2";
194}
195
196impl HashProtocol for Blake3 {
197    const NAME: &'static str = "blake3";
198}
199
200impl ConstMetadata for Blake2b {
201    fn id() -> Id {
202        id_hex!("91F880222412A49F012BE999942E6199")
203    }
204
205    fn describe<B>(blobs: &mut B) -> Result<TribleSet, B::PutError>
206    where
207        B: BlobStore<Blake3>,
208    {
209        describe_hash::<Self, B>(blobs)
210    }
211}
212
213impl ConstMetadata for Blake3 {
214    fn id() -> Id {
215        id_hex!("4160218D6C8F620652ECFBD7FDC7BDB3")
216    }
217
218    fn describe<B>(blobs: &mut B) -> Result<TribleSet, B::PutError>
219    where
220        B: BlobStore<Blake3>,
221    {
222        describe_hash::<Self, B>(blobs)
223    }
224}
225
226/// This is a value schema for a handle.
227/// A handle to a blob is comprised of a hash of a blob and type level information about the blobs schema.
228///
229/// The handle can be stored in a Trible, while the blob can be stored in a BlobSet, allowing for a
230/// separation of the blob data from the means of identifying and accessing it.
231///
232/// The handle is generated when a blob is inserted into a BlobSet, and the handle
233/// can be used to retrieve the blob from the BlobSet later.
234#[repr(transparent)]
235pub struct Handle<H: HashProtocol, T: BlobSchema> {
236    digest: Hash<H>,
237    _type: PhantomData<T>,
238}
239
240impl<H: HashProtocol, T: BlobSchema> Handle<H, T> {
241    pub fn from_hash(hash: Value<Hash<H>>) -> Value<Self> {
242        hash.transmute()
243    }
244
245    pub fn to_hash(handle: Value<Self>) -> Value<Hash<H>> {
246        handle.transmute()
247    }
248}
249
250impl<H: HashProtocol, T: BlobSchema> From<Value<Hash<H>>> for Value<Handle<H, T>> {
251    fn from(value: Value<Hash<H>>) -> Self {
252        value.transmute()
253    }
254}
255
256impl<H: HashProtocol, T: BlobSchema> From<Value<Handle<H, T>>> for Value<Hash<H>> {
257    fn from(value: Value<Handle<H, T>>) -> Self {
258        value.transmute()
259    }
260}
261
262impl<H: HashProtocol, T: BlobSchema> ConstMetadata for Handle<H, T> {
263    // NOTE: This can't be a `const fn` while we rely on the runtime `blake3`
264    // hasher to derive the identifier. Once a const-friendly hashing API is
265    // available we can revisit this.
266    fn id() -> Id {
267        let mut hasher = blake3::Hasher::new();
268        hasher.update(Hash::<H>::id().as_ref());
269        hasher.update(T::id().as_ref());
270        let digest = hasher.finalize();
271        let mut raw = [0u8; 16];
272        let bytes: &[u8] = digest.as_ref();
273        let lower_half = &bytes[bytes.len() - raw.len()..];
274        raw.copy_from_slice(lower_half);
275        Id::new(raw).expect("derived handle schema id must be non-nil")
276    }
277
278    fn describe<B>(blobs: &mut B) -> Result<TribleSet, B::PutError>
279    where
280        B: BlobStore<Blake3>,
281    {
282        let id = Self::id();
283        let name = H::NAME;
284        let schema_id = T::id();
285        let description = blobs.put(format!(
286            "Typed handle for blobs hashed with {name}; the value stores the digest and metadata points at blob schema {schema_id:X}. The schema id is derived from the hash and blob schema.\n\nUse when referencing blobs from tribles without embedding data; the blob store holds the payload. For untyped content hashes, use the hash schema directly.\n\nHandles assume the blob store is available and consistent with the digest. If the blob is missing, the handle still validates but dereferencing will fail."
287        ))?;
288        let name_handle = blobs.put("handle".to_string())?;
289        let mut tribles = TribleSet::new();
290        tribles += H::describe(blobs)?;
291        tribles += T::describe(blobs)?;
292
293        tribles += entity! { ExclusiveId::force_ref(&id) @
294            metadata::name: name_handle,
295            metadata::description: description,
296            metadata::blob_schema: schema_id,
297            metadata::hash_schema: H::id(),
298            metadata::tag: metadata::KIND_VALUE_SCHEMA,
299        };
300
301        #[cfg(feature = "wasm")]
302        {
303            tribles += entity! { ExclusiveId::force_ref(&id) @
304                metadata::value_formatter: blobs.put(wasm_formatter::HASH_HEX_WASM)?,
305            };
306        }
307        Ok(tribles)
308    }
309}
310
311impl<H: HashProtocol, T: BlobSchema> ValueSchema for Handle<H, T> {
312    type ValidationError = Infallible;
313}
314
315#[cfg(test)]
316mod tests {
317    use super::Blake3;
318    use crate::prelude::*;
319    use crate::value::schemas::hash::HashError;
320    use rand;
321
322    use super::Hash;
323
324    #[test]
325    fn value_roundtrip() {
326        let v: Value<Hash<Blake3>> = Value::new(rand::random());
327        let s: String = v.from_value();
328        let _: Value<Hash<Blake3>> = s.try_to_value().expect("roundtrip should succeed");
329    }
330
331    #[test]
332    fn value_from_known() {
333        let s: &str = "blake3:CA98593CB9DC0FA48B2BE01E53D042E22B47862D646F9F19E2889A7961663663";
334        let _: Value<Hash<Blake3>> = s
335            .try_to_value()
336            .expect("packing valid constant should succeed");
337    }
338
339    #[test]
340    fn to_value_fail_protocol() {
341        let s: &str = "bad:CA98593CB9DC0FA48B2BE01E53D042E22B47862D646F9F19E2889A7961663663";
342        let err: HashError = <&str as TryToValue<Hash<Blake3>>>::try_to_value(s)
343            .expect_err("packing invalid protocol should fail");
344        assert_eq!(err, HashError::BadProtocol);
345    }
346
347    #[test]
348    fn to_value_fail_hex() {
349        let s: &str = "blake3:BAD!";
350        let err: HashError = <&str as TryToValue<Hash<Blake3>>>::try_to_value(s)
351            .expect_err("packing invalid protocol should fail");
352        assert!(std::matches!(err, HashError::BadHex(..)));
353    }
354}