Skip to main content

triblespace_core/value/schemas/
hash.rs

1use crate::blob::BlobSchema;
2use crate::id::ExclusiveId;
3use crate::id::Id;
4use crate::id_hex;
5use crate::macros::entity;
6use crate::metadata;
7use crate::metadata::{ConstDescribe, ConstId};
8use crate::repo::BlobStore;
9use crate::trible::Fragment;
10use crate::trible::TribleSet;
11use crate::value::FromValue;
12use crate::value::RawValue;
13use crate::value::TryToValue;
14use crate::value::Value;
15use crate::value::ValueSchema;
16use std::convert::Infallible;
17
18use anybytes::Bytes;
19use digest::typenum::U32;
20use digest::Digest;
21use hex::FromHex;
22use hex::FromHexError;
23use std::marker::PhantomData;
24
25/// A trait for hash functions.
26/// This trait is implemented by hash functions that can be in a value schema
27/// for example via a [struct@Hash] or a [Handle].
28pub trait HashProtocol: Digest<OutputSize = U32> + Clone + Send + 'static + ConstDescribe {
29    const NAME: &'static str;
30}
31
32/// A value schema for a hash.
33/// A hash is a fixed-size 256bit digest of a byte sequence.
34///
35/// See the [crate::id] module documentation for a discussion on the length
36/// of the digest and its role as an intrinsic identifier.
37pub struct Hash<H> {
38    _hasher: PhantomData<fn(H) -> ()>,
39}
40
41impl<H> ConstId for Hash<H>
42where
43    H: HashProtocol,
44{
45    const ID: Id = H::ID;
46}
47
48impl<H> ConstDescribe for Hash<H>
49where
50    H: HashProtocol,
51{
52    fn describe<B>(blobs: &mut B) -> Result<Fragment, B::PutError>
53    where
54        B: BlobStore<Blake3>,
55    {
56        H::describe(blobs)
57    }
58}
59
60impl<H> ValueSchema for Hash<H>
61where
62    H: HashProtocol,
63{
64    type ValidationError = Infallible;
65}
66
67impl<H> Hash<H>
68where
69    H: HashProtocol,
70{
71    pub fn digest(blob: &Bytes) -> Value<Self> {
72        Value::new(H::digest(blob).into())
73    }
74
75    pub fn from_hex(hex: &str) -> Result<Value<Self>, FromHexError> {
76        let digest = RawValue::from_hex(hex)?;
77        Ok(Value::new(digest))
78    }
79
80    pub fn to_hex(value: &Value<Self>) -> String {
81        hex::encode_upper(value.raw)
82    }
83}
84
85impl<H> FromValue<'_, Hash<H>> for String
86where
87    H: HashProtocol,
88{
89    fn from_value(v: &Value<Hash<H>>) -> Self {
90        let mut out = String::new();
91        out.push_str(<H as HashProtocol>::NAME);
92        out.push(':');
93        out.push_str(&hex::encode(v.raw));
94        out
95    }
96}
97
98/// An error that can occur when converting a hash value from a string.
99/// The error can be caused by a bad protocol or a bad hex encoding.
100#[derive(Debug, Clone, Copy, PartialEq)]
101pub enum HashError {
102    BadProtocol,
103    BadHex(FromHexError),
104}
105
106impl From<FromHexError> for HashError {
107    fn from(value: FromHexError) -> Self {
108        HashError::BadHex(value)
109    }
110}
111
112impl<H> TryToValue<Hash<H>> for &str
113where
114    H: HashProtocol,
115{
116    type Error = HashError;
117
118    fn try_to_value(self) -> Result<Value<Hash<H>>, Self::Error> {
119        let protocol = <H as HashProtocol>::NAME;
120        if !(self.starts_with(protocol) && &self[protocol.len()..=protocol.len()] == ":") {
121            return Err(HashError::BadProtocol);
122        }
123        let digest = RawValue::from_hex(&self[protocol.len() + 1..])?;
124
125        Ok(Value::new(digest))
126    }
127}
128
129impl<H> TryToValue<Hash<H>> for String
130where
131    H: HashProtocol,
132{
133    type Error = HashError;
134
135    fn try_to_value(self) -> Result<Value<Hash<H>>, Self::Error> {
136        (&self[..]).try_to_value()
137    }
138}
139
140fn describe_hash<H, B>(blobs: &mut B) -> Result<Fragment, B::PutError>
141where
142    H: HashProtocol,
143    B: BlobStore<Blake3>,
144{
145    let id = H::ID;
146    let name = H::NAME;
147    let description = blobs.put(format!(
148        "{name} 256-bit hash digest of raw bytes. The value stores the digest bytes and is stable across systems.\n\nUse for content-addressed identifiers, deduplication, or integrity checks. Use Handle when you need a typed blob reference with schema metadata.\n\nHashes do not carry type information; the meaning comes from the schema that uses them. If you need provenance or typed payloads, combine with handles or additional metadata."
149    ))?;
150    let name_handle = blobs.put(name)?;
151    let tribles = entity! { ExclusiveId::force_ref(&id) @
152        metadata::name: name_handle,
153        metadata::description: description,
154        metadata::tag: metadata::KIND_VALUE_SCHEMA,
155    };
156
157    #[cfg(feature = "wasm")]
158    let tribles = {
159        let mut tribles = tribles;
160        tribles += entity! { ExclusiveId::force_ref(&id) @
161            metadata::value_formatter: blobs.put(wasm_formatter::HASH_HEX_WASM)?,
162        };
163        tribles
164    };
165
166    Ok(tribles)
167}
168
169#[cfg(feature = "wasm")]
170mod wasm_formatter {
171    use core::fmt::Write;
172
173    use triblespace_core_macros::value_formatter;
174
175    #[value_formatter]
176    pub(crate) fn hash_hex(raw: &[u8; 32], out: &mut impl Write) -> Result<(), u32> {
177        out.write_str("hash:").map_err(|_| 1u32)?;
178        const TABLE: &[u8; 16] = b"0123456789ABCDEF";
179        for &byte in raw {
180            let hi = (byte >> 4) as usize;
181            let lo = (byte & 0x0F) as usize;
182            out.write_char(TABLE[hi] as char).map_err(|_| 1u32)?;
183            out.write_char(TABLE[lo] as char).map_err(|_| 1u32)?;
184        }
185        Ok(())
186    }
187}
188
189use blake2::Blake2b as Blake2bUnsized;
190pub type Blake2b = Blake2bUnsized<U32>;
191
192pub use blake3::Hasher as Blake3;
193
194impl HashProtocol for Blake2b {
195    const NAME: &'static str = "blake2";
196}
197
198impl HashProtocol for Blake3 {
199    const NAME: &'static str = "blake3";
200}
201
202impl ConstId for Blake2b {
203    const ID: Id = id_hex!("91F880222412A49F012BE999942E6199");
204}
205
206impl ConstDescribe for Blake2b {
207    fn describe<B>(blobs: &mut B) -> Result<Fragment, B::PutError>
208    where
209        B: BlobStore<Blake3>,
210    {
211        describe_hash::<Self, B>(blobs)
212    }
213}
214
215impl ConstId for Blake3 {
216    const ID: Id = id_hex!("4160218D6C8F620652ECFBD7FDC7BDB3");
217}
218
219impl ConstDescribe for Blake3 {
220    fn describe<B>(blobs: &mut B) -> Result<Fragment, B::PutError>
221    where
222        B: BlobStore<Blake3>,
223    {
224        describe_hash::<Self, B>(blobs)
225    }
226}
227
228/// This is a value schema for a handle.
229/// A handle to a blob is comprised of a hash of a blob and type level information about the blobs schema.
230///
231/// The handle can be stored in a Trible, while the blob can be stored in a BlobSet, allowing for a
232/// separation of the blob data from the means of identifying and accessing it.
233///
234/// The handle is generated when a blob is inserted into a BlobSet, and the handle
235/// can be used to retrieve the blob from the BlobSet later.
236#[repr(transparent)]
237pub struct Handle<H: HashProtocol, T: BlobSchema> {
238    digest: Hash<H>,
239    _type: PhantomData<T>,
240}
241
242impl<H: HashProtocol, T: BlobSchema> Handle<H, T> {
243    pub fn from_hash(hash: Value<Hash<H>>) -> Value<Self> {
244        hash.transmute()
245    }
246
247    pub fn to_hash(handle: Value<Self>) -> Value<Hash<H>> {
248        handle.transmute()
249    }
250}
251
252impl<H: HashProtocol, T: BlobSchema> From<Value<Hash<H>>> for Value<Handle<H, T>> {
253    fn from(value: Value<Hash<H>>) -> Self {
254        value.transmute()
255    }
256}
257
258impl<H: HashProtocol, T: BlobSchema> From<Value<Handle<H, T>>> for Value<Hash<H>> {
259    fn from(value: Value<Handle<H, T>>) -> Self {
260        value.transmute()
261    }
262}
263
264impl<H: HashProtocol, T: BlobSchema> ConstId for Handle<H, T> {
265    const ID: Id = {
266        let mut hasher = const_blake3::Hasher::new();
267        hasher.update(&Hash::<H>::ID.raw());
268        hasher.update(&T::ID.raw());
269        let mut digest = [0u8; 32];
270        hasher.finalize(&mut digest);
271        let mut raw = [0u8; 16];
272        let mut i = 0;
273        while i < raw.len() {
274            raw[i] = digest[16 + i];
275            i += 1;
276        }
277        match Id::new(raw) {
278            Some(id) => id,
279            None => panic!("derived handle schema id must be non-nil"),
280        }
281    };
282}
283
284impl<H, T> ConstDescribe for Handle<H, T>
285where
286    H: HashProtocol,
287    T: BlobSchema + ConstDescribe,
288{
289    fn describe<B>(blobs: &mut B) -> Result<Fragment, B::PutError>
290    where
291        B: BlobStore<Blake3>,
292    {
293        let id = Self::ID;
294        let name = H::NAME;
295        let schema_id = T::ID;
296        let description = blobs.put(format!(
297            "Typed handle for blobs hashed with {name}; the value stores the digest and metadata points at blob schema {schema_id:X}. The schema id is derived from the hash and blob schema.\n\nUse when referencing blobs from tribles without embedding data; the blob store holds the payload. For untyped content hashes, use the hash schema directly.\n\nHandles assume the blob store is available and consistent with the digest. If the blob is missing, the handle still validates but dereferencing will fail."
298        ))?;
299        let name_handle = blobs.put("handle")?;
300        let mut tribles = TribleSet::new();
301        tribles += H::describe(blobs)?;
302        tribles += T::describe(blobs)?;
303
304        tribles += entity! { ExclusiveId::force_ref(&id) @
305            metadata::name: name_handle,
306            metadata::description: description,
307            metadata::blob_schema: schema_id,
308            metadata::hash_schema: H::ID,
309            metadata::tag: metadata::KIND_VALUE_SCHEMA,
310        };
311
312        #[cfg(feature = "wasm")]
313        {
314            tribles += entity! { ExclusiveId::force_ref(&id) @
315                metadata::value_formatter: blobs.put(wasm_formatter::HASH_HEX_WASM)?,
316            };
317        }
318        Ok(Fragment::rooted(id, tribles))
319    }
320}
321
322impl<H: HashProtocol, T: BlobSchema> ValueSchema for Handle<H, T> {
323    type ValidationError = Infallible;
324}
325
326#[cfg(test)]
327mod tests {
328    use super::Blake3;
329    use crate::prelude::*;
330    use crate::value::schemas::hash::HashError;
331    use rand;
332
333    use super::Hash;
334
335    #[test]
336    fn value_roundtrip() {
337        let v: Value<Hash<Blake3>> = Value::new(rand::random());
338        let s: String = v.from_value();
339        let _: Value<Hash<Blake3>> = s.try_to_value().expect("roundtrip should succeed");
340    }
341
342    #[test]
343    fn value_from_known() {
344        let s: &str = "blake3:CA98593CB9DC0FA48B2BE01E53D042E22B47862D646F9F19E2889A7961663663";
345        let _: Value<Hash<Blake3>> = s
346            .try_to_value()
347            .expect("packing valid constant should succeed");
348    }
349
350    #[test]
351    fn to_value_fail_protocol() {
352        let s: &str = "bad:CA98593CB9DC0FA48B2BE01E53D042E22B47862D646F9F19E2889A7961663663";
353        let err: HashError = <&str as TryToValue<Hash<Blake3>>>::try_to_value(s)
354            .expect_err("packing invalid protocol should fail");
355        assert_eq!(err, HashError::BadProtocol);
356    }
357
358    #[test]
359    fn to_value_fail_hex() {
360        let s: &str = "blake3:BAD!";
361        let err: HashError = <&str as TryToValue<Hash<Blake3>>>::try_to_value(s)
362            .expect_err("packing invalid protocol should fail");
363        assert!(std::matches!(err, HashError::BadHex(..)));
364    }
365}