Skip to main content

triblespace_core/value/schemas/
hash.rs

1use crate::blob::BlobSchema;
2use crate::id::ExclusiveId;
3use crate::id::Id;
4use crate::id_hex;
5use crate::macros::entity;
6use crate::metadata;
7use crate::metadata::{ConstDescribe, ConstId};
8use crate::repo::BlobStore;
9use crate::trible::TribleSet;
10use crate::value::FromValue;
11use crate::value::RawValue;
12use crate::value::TryToValue;
13use crate::value::Value;
14use crate::value::ValueSchema;
15use std::convert::Infallible;
16
17use anybytes::Bytes;
18use digest::typenum::U32;
19use digest::Digest;
20use hex::FromHex;
21use hex::FromHexError;
22use std::marker::PhantomData;
23
24/// A trait for hash functions.
25/// This trait is implemented by hash functions that can be in a value schema
26/// for example via a [struct@Hash] or a [Handle].
27pub trait HashProtocol: Digest<OutputSize = U32> + Clone + Send + 'static + ConstDescribe {
28    const NAME: &'static str;
29}
30
31/// A value schema for a hash.
32/// A hash is a fixed-size 256bit digest of a byte sequence.
33///
34/// See the [crate::id] module documentation for a discussion on the length
35/// of the digest and its role as an intrinsic identifier.
36pub struct Hash<H> {
37    _hasher: PhantomData<fn(H) -> ()>,
38}
39
40impl<H> ConstId for Hash<H>
41where
42    H: HashProtocol,
43{
44    const ID: Id = H::ID;
45}
46
47impl<H> ConstDescribe for Hash<H>
48where
49    H: HashProtocol,
50{
51    fn describe<B>(blobs: &mut B) -> Result<TribleSet, B::PutError>
52    where
53        B: BlobStore<Blake3>,
54    {
55        H::describe(blobs)
56    }
57}
58
59impl<H> ValueSchema for Hash<H>
60where
61    H: HashProtocol,
62{
63    type ValidationError = Infallible;
64}
65
66impl<H> Hash<H>
67where
68    H: HashProtocol,
69{
70    pub fn digest(blob: &Bytes) -> Value<Self> {
71        Value::new(H::digest(blob).into())
72    }
73
74    pub fn from_hex(hex: &str) -> Result<Value<Self>, FromHexError> {
75        let digest = RawValue::from_hex(hex)?;
76        Ok(Value::new(digest))
77    }
78
79    pub fn to_hex(value: &Value<Self>) -> String {
80        hex::encode_upper(value.raw)
81    }
82}
83
84impl<H> FromValue<'_, Hash<H>> for String
85where
86    H: HashProtocol,
87{
88    fn from_value(v: &Value<Hash<H>>) -> Self {
89        let mut out = String::new();
90        out.push_str(<H as HashProtocol>::NAME);
91        out.push(':');
92        out.push_str(&hex::encode(v.raw));
93        out
94    }
95}
96
97/// An error that can occur when converting a hash value from a string.
98/// The error can be caused by a bad protocol or a bad hex encoding.
99#[derive(Debug, Clone, Copy, PartialEq)]
100pub enum HashError {
101    BadProtocol,
102    BadHex(FromHexError),
103}
104
105impl From<FromHexError> for HashError {
106    fn from(value: FromHexError) -> Self {
107        HashError::BadHex(value)
108    }
109}
110
111impl<H> TryToValue<Hash<H>> for &str
112where
113    H: HashProtocol,
114{
115    type Error = HashError;
116
117    fn try_to_value(self) -> Result<Value<Hash<H>>, Self::Error> {
118        let protocol = <H as HashProtocol>::NAME;
119        if !(self.starts_with(protocol) && &self[protocol.len()..=protocol.len()] == ":") {
120            return Err(HashError::BadProtocol);
121        }
122        let digest = RawValue::from_hex(&self[protocol.len() + 1..])?;
123
124        Ok(Value::new(digest))
125    }
126}
127
128impl<H> TryToValue<Hash<H>> for String
129where
130    H: HashProtocol,
131{
132    type Error = HashError;
133
134    fn try_to_value(self) -> Result<Value<Hash<H>>, Self::Error> {
135        (&self[..]).try_to_value()
136    }
137}
138
139fn describe_hash<H, B>(blobs: &mut B) -> Result<TribleSet, B::PutError>
140where
141    H: HashProtocol,
142    B: BlobStore<Blake3>,
143{
144    let id = H::ID;
145    let name = H::NAME;
146    let description = blobs.put(format!(
147        "{name} 256-bit hash digest of raw bytes. The value stores the digest bytes and is stable across systems.\n\nUse for content-addressed identifiers, deduplication, or integrity checks. Use Handle when you need a typed blob reference with schema metadata.\n\nHashes do not carry type information; the meaning comes from the schema that uses them. If you need provenance or typed payloads, combine with handles or additional metadata."
148    ))?;
149    let name_handle = blobs.put(name.to_string())?;
150    let mut tribles = TribleSet::new();
151
152    tribles += entity! { ExclusiveId::force_ref(&id) @
153        metadata::name: name_handle,
154        metadata::description: description,
155        metadata::tag: metadata::KIND_VALUE_SCHEMA,
156    };
157
158    #[cfg(feature = "wasm")]
159    {
160        tribles += entity! { ExclusiveId::force_ref(&id) @
161            metadata::value_formatter: blobs.put(wasm_formatter::HASH_HEX_WASM)?,
162        };
163    }
164
165    Ok(tribles)
166}
167
168#[cfg(feature = "wasm")]
169mod wasm_formatter {
170    use core::fmt::Write;
171
172    use triblespace_core_macros::value_formatter;
173
174    #[value_formatter]
175    pub(crate) fn hash_hex(raw: &[u8; 32], out: &mut impl Write) -> Result<(), u32> {
176        out.write_str("hash:").map_err(|_| 1u32)?;
177        const TABLE: &[u8; 16] = b"0123456789ABCDEF";
178        for &byte in raw {
179            let hi = (byte >> 4) as usize;
180            let lo = (byte & 0x0F) as usize;
181            out.write_char(TABLE[hi] as char).map_err(|_| 1u32)?;
182            out.write_char(TABLE[lo] as char).map_err(|_| 1u32)?;
183        }
184        Ok(())
185    }
186}
187
188use blake2::Blake2b as Blake2bUnsized;
189pub type Blake2b = Blake2bUnsized<U32>;
190
191pub use blake3::Hasher as Blake3;
192
193impl HashProtocol for Blake2b {
194    const NAME: &'static str = "blake2";
195}
196
197impl HashProtocol for Blake3 {
198    const NAME: &'static str = "blake3";
199}
200
201impl ConstId for Blake2b {
202    const ID: Id = id_hex!("91F880222412A49F012BE999942E6199");
203}
204
205impl ConstDescribe for Blake2b {
206    fn describe<B>(blobs: &mut B) -> Result<TribleSet, B::PutError>
207    where
208        B: BlobStore<Blake3>,
209    {
210        describe_hash::<Self, B>(blobs)
211    }
212}
213
214impl ConstId for Blake3 {
215    const ID: Id = id_hex!("4160218D6C8F620652ECFBD7FDC7BDB3");
216}
217
218impl ConstDescribe for Blake3 {
219    fn describe<B>(blobs: &mut B) -> Result<TribleSet, B::PutError>
220    where
221        B: BlobStore<Blake3>,
222    {
223        describe_hash::<Self, B>(blobs)
224    }
225}
226
227/// This is a value schema for a handle.
228/// A handle to a blob is comprised of a hash of a blob and type level information about the blobs schema.
229///
230/// The handle can be stored in a Trible, while the blob can be stored in a BlobSet, allowing for a
231/// separation of the blob data from the means of identifying and accessing it.
232///
233/// The handle is generated when a blob is inserted into a BlobSet, and the handle
234/// can be used to retrieve the blob from the BlobSet later.
235#[repr(transparent)]
236pub struct Handle<H: HashProtocol, T: BlobSchema> {
237    digest: Hash<H>,
238    _type: PhantomData<T>,
239}
240
241impl<H: HashProtocol, T: BlobSchema> Handle<H, T> {
242    pub fn from_hash(hash: Value<Hash<H>>) -> Value<Self> {
243        hash.transmute()
244    }
245
246    pub fn to_hash(handle: Value<Self>) -> Value<Hash<H>> {
247        handle.transmute()
248    }
249}
250
251impl<H: HashProtocol, T: BlobSchema> From<Value<Hash<H>>> for Value<Handle<H, T>> {
252    fn from(value: Value<Hash<H>>) -> Self {
253        value.transmute()
254    }
255}
256
257impl<H: HashProtocol, T: BlobSchema> From<Value<Handle<H, T>>> for Value<Hash<H>> {
258    fn from(value: Value<Handle<H, T>>) -> Self {
259        value.transmute()
260    }
261}
262
263impl<H: HashProtocol, T: BlobSchema> ConstId for Handle<H, T> {
264    const ID: Id = {
265        let mut hasher = const_blake3::Hasher::new();
266        hasher.update(&Hash::<H>::ID.raw());
267        hasher.update(&T::ID.raw());
268        let mut digest = [0u8; 32];
269        hasher.finalize(&mut digest);
270        let mut raw = [0u8; 16];
271        let mut i = 0;
272        while i < raw.len() {
273            raw[i] = digest[16 + i];
274            i += 1;
275        }
276        match Id::new(raw) {
277            Some(id) => id,
278            None => panic!("derived handle schema id must be non-nil"),
279        }
280    };
281}
282
283impl<H, T> ConstDescribe for Handle<H, T>
284where
285    H: HashProtocol,
286    T: BlobSchema + ConstDescribe,
287{
288
289    fn describe<B>(blobs: &mut B) -> Result<TribleSet, B::PutError>
290    where
291        B: BlobStore<Blake3>,
292    {
293        let id = Self::ID;
294        let name = H::NAME;
295        let schema_id = T::ID;
296        let description = blobs.put(format!(
297            "Typed handle for blobs hashed with {name}; the value stores the digest and metadata points at blob schema {schema_id:X}. The schema id is derived from the hash and blob schema.\n\nUse when referencing blobs from tribles without embedding data; the blob store holds the payload. For untyped content hashes, use the hash schema directly.\n\nHandles assume the blob store is available and consistent with the digest. If the blob is missing, the handle still validates but dereferencing will fail."
298        ))?;
299        let name_handle = blobs.put("handle".to_string())?;
300        let mut tribles = TribleSet::new();
301        tribles += H::describe(blobs)?;
302        tribles += T::describe(blobs)?;
303
304        tribles += entity! { ExclusiveId::force_ref(&id) @
305            metadata::name: name_handle,
306            metadata::description: description,
307            metadata::blob_schema: schema_id,
308            metadata::hash_schema: H::ID,
309            metadata::tag: metadata::KIND_VALUE_SCHEMA,
310        };
311
312        #[cfg(feature = "wasm")]
313        {
314            tribles += entity! { ExclusiveId::force_ref(&id) @
315                metadata::value_formatter: blobs.put(wasm_formatter::HASH_HEX_WASM)?,
316            };
317        }
318        Ok(tribles)
319    }
320}
321
322impl<H: HashProtocol, T: BlobSchema> ValueSchema for Handle<H, T> {
323    type ValidationError = Infallible;
324}
325
326#[cfg(test)]
327mod tests {
328    use super::Blake3;
329    use crate::prelude::*;
330    use crate::value::schemas::hash::HashError;
331    use rand;
332
333    use super::Hash;
334
335    #[test]
336    fn value_roundtrip() {
337        let v: Value<Hash<Blake3>> = Value::new(rand::random());
338        let s: String = v.from_value();
339        let _: Value<Hash<Blake3>> = s.try_to_value().expect("roundtrip should succeed");
340    }
341
342    #[test]
343    fn value_from_known() {
344        let s: &str = "blake3:CA98593CB9DC0FA48B2BE01E53D042E22B47862D646F9F19E2889A7961663663";
345        let _: Value<Hash<Blake3>> = s
346            .try_to_value()
347            .expect("packing valid constant should succeed");
348    }
349
350    #[test]
351    fn to_value_fail_protocol() {
352        let s: &str = "bad:CA98593CB9DC0FA48B2BE01E53D042E22B47862D646F9F19E2889A7961663663";
353        let err: HashError = <&str as TryToValue<Hash<Blake3>>>::try_to_value(s)
354            .expect_err("packing invalid protocol should fail");
355        assert_eq!(err, HashError::BadProtocol);
356    }
357
358    #[test]
359    fn to_value_fail_hex() {
360        let s: &str = "blake3:BAD!";
361        let err: HashError = <&str as TryToValue<Hash<Blake3>>>::try_to_value(s)
362            .expect_err("packing invalid protocol should fail");
363        assert!(std::matches!(err, HashError::BadHex(..)));
364    }
365}