Skip to main content

triblespace_core/value/schemas/
hash.rs

1use crate::blob::schemas::longstring::LongString;
2use crate::blob::BlobSchema;
3use crate::id::ExclusiveId;
4use crate::id::Id;
5use crate::id_hex;
6use crate::macros::entity;
7use crate::metadata;
8use crate::metadata::ConstMetadata;
9use crate::repo::BlobStore;
10use crate::trible::TribleSet;
11use crate::value::FromValue;
12use crate::value::RawValue;
13use crate::value::TryToValue;
14use crate::value::Value;
15use crate::value::ValueSchema;
16use std::convert::Infallible;
17
18use anybytes::Bytes;
19use digest::typenum::U32;
20use digest::Digest;
21use hex::FromHex;
22use hex::FromHexError;
23use std::marker::PhantomData;
24
25#[cfg(feature = "wasm")]
26use crate::blob::schemas::wasmcode::WasmCode;
27/// A trait for hash functions.
28/// This trait is implemented by hash functions that can be in a value schema
29/// for example via a [struct@Hash] or a [Handle].
30pub trait HashProtocol: Digest<OutputSize = U32> + Clone + Send + 'static + ConstMetadata {
31    const NAME: &'static str;
32}
33
34/// A value schema for a hash.
35/// A hash is a fixed-size 256bit digest of a byte sequence.
36///
37/// See the [crate::id] module documentation for a discussion on the length
38/// of the digest and its role as an intrinsic identifier.
39pub struct Hash<H> {
40    _hasher: PhantomData<fn(H) -> ()>,
41}
42
43impl<H> ConstMetadata for Hash<H>
44where
45    H: HashProtocol,
46{
47    fn id() -> Id {
48        <H as ConstMetadata>::id()
49    }
50
51    fn describe<B>(blobs: &mut B) -> Result<TribleSet, B::PutError>
52    where
53        B: BlobStore<Blake3>,
54    {
55        H::describe(blobs)
56    }
57}
58
59impl<H> ValueSchema for Hash<H>
60where
61    H: HashProtocol,
62{
63    type ValidationError = Infallible;
64}
65
66impl<H> Hash<H>
67where
68    H: HashProtocol,
69{
70    pub fn digest(blob: &Bytes) -> Value<Self> {
71        Value::new(H::digest(blob).into())
72    }
73
74    pub fn from_hex(hex: &str) -> Result<Value<Self>, FromHexError> {
75        let digest = RawValue::from_hex(hex)?;
76        Ok(Value::new(digest))
77    }
78
79    pub fn to_hex(value: &Value<Self>) -> String {
80        hex::encode_upper(value.raw)
81    }
82}
83
84impl<H> FromValue<'_, Hash<H>> for String
85where
86    H: HashProtocol,
87{
88    fn from_value(v: &Value<Hash<H>>) -> Self {
89        let mut out = String::new();
90        out.push_str(<H as HashProtocol>::NAME);
91        out.push(':');
92        out.push_str(&hex::encode(v.raw));
93        out
94    }
95}
96
97/// An error that can occur when converting a hash value from a string.
98/// The error can be caused by a bad protocol or a bad hex encoding.
99#[derive(Debug, Clone, Copy, PartialEq)]
100pub enum HashError {
101    BadProtocol,
102    BadHex(FromHexError),
103}
104
105impl From<FromHexError> for HashError {
106    fn from(value: FromHexError) -> Self {
107        HashError::BadHex(value)
108    }
109}
110
111impl<H> TryToValue<Hash<H>> for &str
112where
113    H: HashProtocol,
114{
115    type Error = HashError;
116
117    fn try_to_value(self) -> Result<Value<Hash<H>>, Self::Error> {
118        let protocol = <H as HashProtocol>::NAME;
119        if !(self.starts_with(protocol) && &self[protocol.len()..=protocol.len()] == ":") {
120            return Err(HashError::BadProtocol);
121        }
122        let digest = RawValue::from_hex(&self[protocol.len() + 1..])?;
123
124        Ok(Value::new(digest))
125    }
126}
127
128impl<H> TryToValue<Hash<H>> for String
129where
130    H: HashProtocol,
131{
132    type Error = HashError;
133
134    fn try_to_value(self) -> Result<Value<Hash<H>>, Self::Error> {
135        (&self[..]).try_to_value()
136    }
137}
138
139fn describe_hash<H, B>(blobs: &mut B) -> Result<TribleSet, B::PutError>
140where
141    H: HashProtocol,
142    B: BlobStore<Blake3>,
143{
144    let id = H::id();
145    let name = H::NAME;
146    let description = blobs.put::<LongString, _>(format!(
147        "{name} 256-bit hash digest of raw bytes. The value stores the digest bytes and is stable across systems.\n\nUse for content-addressed identifiers, deduplication, or integrity checks. Use Handle when you need a typed blob reference with schema metadata.\n\nHashes do not carry type information; the meaning comes from the schema that uses them. If you need provenance or typed payloads, combine with handles or additional metadata."
148    ))?;
149    let mut tribles = TribleSet::new();
150
151    tribles += entity! { ExclusiveId::force_ref(&id) @
152        metadata::shortname: name,
153        metadata::description: description,
154        metadata::tag: metadata::KIND_VALUE_SCHEMA,
155    };
156
157    #[cfg(feature = "wasm")]
158    {
159        tribles += entity! { ExclusiveId::force_ref(&id) @
160            metadata::value_formatter: blobs.put::<WasmCode, _>(wasm_formatter::HASH_HEX_WASM)?,
161        };
162    }
163
164    Ok(tribles)
165}
166
167#[cfg(feature = "wasm")]
168mod wasm_formatter {
169    use core::fmt::Write;
170
171    use triblespace_core_macros::value_formatter;
172
173    #[value_formatter]
174    pub(crate) fn hash_hex(raw: &[u8; 32], out: &mut impl Write) -> Result<(), u32> {
175        out.write_str("hash:").map_err(|_| 1u32)?;
176        const TABLE: &[u8; 16] = b"0123456789ABCDEF";
177        for &byte in raw {
178            let hi = (byte >> 4) as usize;
179            let lo = (byte & 0x0F) as usize;
180            out.write_char(TABLE[hi] as char).map_err(|_| 1u32)?;
181            out.write_char(TABLE[lo] as char).map_err(|_| 1u32)?;
182        }
183        Ok(())
184    }
185}
186
187use blake2::Blake2b as Blake2bUnsized;
188pub type Blake2b = Blake2bUnsized<U32>;
189
190pub use blake3::Hasher as Blake3;
191
192impl HashProtocol for Blake2b {
193    const NAME: &'static str = "blake2";
194}
195
196impl HashProtocol for Blake3 {
197    const NAME: &'static str = "blake3";
198}
199
200impl ConstMetadata for Blake2b {
201    fn id() -> Id {
202        id_hex!("91F880222412A49F012BE999942E6199")
203    }
204
205    fn describe<B>(blobs: &mut B) -> Result<TribleSet, B::PutError>
206    where
207        B: BlobStore<Blake3>,
208    {
209        describe_hash::<Self, B>(blobs)
210    }
211}
212
213impl ConstMetadata for Blake3 {
214    fn id() -> Id {
215        id_hex!("4160218D6C8F620652ECFBD7FDC7BDB3")
216    }
217
218    fn describe<B>(blobs: &mut B) -> Result<TribleSet, B::PutError>
219    where
220        B: BlobStore<Blake3>,
221    {
222        describe_hash::<Self, B>(blobs)
223    }
224}
225
226/// This is a value schema for a handle.
227/// A handle to a blob is comprised of a hash of a blob and type level information about the blobs schema.
228///
229/// The handle can be stored in a Trible, while the blob can be stored in a BlobSet, allowing for a
230/// separation of the blob data from the means of identifying and accessing it.
231///
232/// The handle is generated when a blob is inserted into a BlobSet, and the handle
233/// can be used to retrieve the blob from the BlobSet later.
234#[repr(transparent)]
235pub struct Handle<H: HashProtocol, T: BlobSchema> {
236    digest: Hash<H>,
237    _type: PhantomData<T>,
238}
239
240impl<H: HashProtocol, T: BlobSchema> Handle<H, T> {
241    pub fn from_hash(hash: Value<Hash<H>>) -> Value<Self> {
242        hash.transmute()
243    }
244
245    pub fn to_hash(handle: Value<Self>) -> Value<Hash<H>> {
246        handle.transmute()
247    }
248}
249
250impl<H: HashProtocol, T: BlobSchema> From<Value<Hash<H>>> for Value<Handle<H, T>> {
251    fn from(value: Value<Hash<H>>) -> Self {
252        value.transmute()
253    }
254}
255
256impl<H: HashProtocol, T: BlobSchema> From<Value<Handle<H, T>>> for Value<Hash<H>> {
257    fn from(value: Value<Handle<H, T>>) -> Self {
258        value.transmute()
259    }
260}
261
262impl<H: HashProtocol, T: BlobSchema> ConstMetadata for Handle<H, T> {
263    // NOTE: This can't be a `const fn` while we rely on the runtime `blake3`
264    // hasher to derive the identifier. Once a const-friendly hashing API is
265    // available we can revisit this.
266    fn id() -> Id {
267        let mut hasher = blake3::Hasher::new();
268        hasher.update(Hash::<H>::id().as_ref());
269        hasher.update(T::id().as_ref());
270        let digest = hasher.finalize();
271        let mut raw = [0u8; 16];
272        let bytes: &[u8] = digest.as_ref();
273        let lower_half = &bytes[bytes.len() - raw.len()..];
274        raw.copy_from_slice(lower_half);
275        Id::new(raw).expect("derived handle schema id must be non-nil")
276    }
277
278    fn describe<B>(blobs: &mut B) -> Result<TribleSet, B::PutError>
279    where
280        B: BlobStore<Blake3>,
281    {
282        let id = Self::id();
283        let name = H::NAME;
284        let schema_id = T::id();
285        let description = blobs.put::<LongString, _>(format!(
286            "Typed handle for blobs hashed with {name}; the value stores the digest and metadata points at blob schema {schema_id:X}. The schema id is derived from the hash and blob schema.\n\nUse when referencing blobs from tribles without embedding data; the blob store holds the payload. For untyped content hashes, use the hash schema directly.\n\nHandles assume the blob store is available and consistent with the digest. If the blob is missing, the handle still validates but dereferencing will fail."
287        ))?;
288        let mut tribles = TribleSet::new();
289        tribles += H::describe(blobs)?;
290        tribles += T::describe(blobs)?;
291
292        tribles += entity! { ExclusiveId::force_ref(&id) @
293            metadata::shortname: "handle",
294            metadata::description: description,
295            metadata::blob_schema: schema_id,
296            metadata::hash_schema: H::id(),
297            metadata::tag: metadata::KIND_VALUE_SCHEMA,
298        };
299
300        #[cfg(feature = "wasm")]
301        {
302            tribles += entity! { ExclusiveId::force_ref(&id) @
303                metadata::value_formatter: blobs.put::<WasmCode, _>(wasm_formatter::HASH_HEX_WASM)?,
304            };
305        }
306        Ok(tribles)
307    }
308}
309
310impl<H: HashProtocol, T: BlobSchema> ValueSchema for Handle<H, T> {
311    type ValidationError = Infallible;
312}
313
314#[cfg(test)]
315mod tests {
316    use super::Blake3;
317    use crate::prelude::*;
318    use crate::value::schemas::hash::HashError;
319    use rand;
320
321    use super::Hash;
322
323    #[test]
324    fn value_roundtrip() {
325        let v: Value<Hash<Blake3>> = Value::new(rand::random());
326        let s: String = v.from_value();
327        let _: Value<Hash<Blake3>> = s.try_to_value().expect("roundtrip should succeed");
328    }
329
330    #[test]
331    fn value_from_known() {
332        let s: &str = "blake3:CA98593CB9DC0FA48B2BE01E53D042E22B47862D646F9F19E2889A7961663663";
333        let _: Value<Hash<Blake3>> = s
334            .try_to_value()
335            .expect("packing valid constant should succeed");
336    }
337
338    #[test]
339    fn to_value_fail_protocol() {
340        let s: &str = "bad:CA98593CB9DC0FA48B2BE01E53D042E22B47862D646F9F19E2889A7961663663";
341        let err: HashError = <&str as TryToValue<Hash<Blake3>>>::try_to_value(s)
342            .expect_err("packing invalid protocol should fail");
343        assert_eq!(err, HashError::BadProtocol);
344    }
345
346    #[test]
347    fn to_value_fail_hex() {
348        let s: &str = "blake3:BAD!";
349        let err: HashError = <&str as TryToValue<Hash<Blake3>>>::try_to_value(s)
350            .expect_err("packing invalid protocol should fail");
351        assert!(std::matches!(err, HashError::BadHex(..)));
352    }
353}