Skip to main content

sayiir_core/
hash32.rs

1//! Fixed-length 32-byte identifiers backed by SHA-256.
2//!
3//! [`Hash32`] is a primitive value type — a `[u8; 32]` with cheap `Copy`,
4//! constant-time-equivalent comparison (single SIMD-friendly memcmp), and a
5//! single hash-map probe instead of the per-character hashing a `String`
6//! incurs. It is the building block for the semantic id newtypes
7//! [`DefinitionHash`], [`WorkflowId`], and [`TaskId`], each of which is a
8//! distinct `Hash32` newtype so the type system prevents mixing identifier
9//! kinds at call sites.
10//!
11//! Serde encodes a `Hash32` as a 64-character lowercase hex string on
12//! human-readable formats (JSON, TOML) and as raw 32 bytes on binary formats
13//! (bincode, rkyv-derived codecs). This keeps user-facing snapshot blobs and
14//! API payloads readable while making over-the-wire transports compact.
15
16use core::fmt;
17use core::str::FromStr;
18
19use serde::{Deserialize, Deserializer, Serialize, Serializer};
20use sha2::{Digest, Sha256};
21
22/// A 32-byte fixed-length identifier, typically the output of SHA-256.
23///
24/// Stored inline as `[u8; 32]` — no heap allocation, no length prefix, `Copy`,
25/// and trivially `Hash`/`Eq` (one memcmp). `Default` returns [`Hash32::ZERO`]
26/// (all-zero bytes) so semantic newtypes wrapping it can be `Default` too.
27#[derive(Clone, Copy, Default, PartialEq, Eq, Hash, PartialOrd, Ord)]
28pub struct Hash32([u8; 32]);
29
30impl Hash32 {
31    /// Zero hash — useful as a sentinel for "uninitialised". Not a valid
32    /// SHA-256 output in practice.
33    pub const ZERO: Self = Self([0u8; 32]);
34
35    /// Construct from raw bytes.
36    #[must_use]
37    pub const fn from_bytes(bytes: [u8; 32]) -> Self {
38        Self(bytes)
39    }
40
41    /// Construct from a byte slice, validating the length.
42    ///
43    /// Use this on the wire/storage boundary — e.g. decoding a `BYTEA`
44    /// column from sqlx — where you have a `&[u8]` rather than a
45    /// fixed-size array.
46    ///
47    /// # Errors
48    ///
49    /// Returns [`Hash32ParseError::WrongLength`] if `bytes.len() != 32`.
50    pub fn from_slice(bytes: &[u8]) -> Result<Self, Hash32ParseError> {
51        let arr: [u8; 32] = bytes
52            .try_into()
53            .map_err(|_| Hash32ParseError::WrongLength(bytes.len()))?;
54        Ok(Self(arr))
55    }
56
57    /// Borrow the underlying bytes.
58    #[must_use]
59    pub const fn as_bytes(&self) -> &[u8; 32] {
60        &self.0
61    }
62
63    /// Consume into the underlying bytes.
64    #[must_use]
65    pub const fn into_bytes(self) -> [u8; 32] {
66        self.0
67    }
68
69    /// Compute SHA-256 of the given input.
70    #[must_use]
71    pub fn sha256(input: impl AsRef<[u8]>) -> Self {
72        let mut hasher = Sha256::new();
73        hasher.update(input.as_ref());
74        Self::from_digest(hasher)
75    }
76
77    /// Finalise a hasher into a `Hash32`.
78    #[must_use]
79    pub fn from_digest(hasher: Sha256) -> Self {
80        let out = hasher.finalize();
81        let mut bytes = [0u8; 32];
82        bytes.copy_from_slice(&out);
83        Self(bytes)
84    }
85
86    /// Lowercase hex encoding (64 chars). Allocates.
87    #[must_use]
88    pub fn to_hex(&self) -> String {
89        use fmt::Write as _;
90        let mut s = String::with_capacity(64);
91        for byte in &self.0 {
92            let _ = write!(&mut s, "{byte:02x}");
93        }
94        s
95    }
96
97    /// Parse from a 64-character lowercase or uppercase hex string.
98    ///
99    /// # Errors
100    ///
101    /// Returns [`Hash32ParseError`] if the input is not 64 hex digits.
102    pub fn from_hex(s: &str) -> Result<Self, Hash32ParseError> {
103        let bytes_in = s.as_bytes();
104        if bytes_in.len() != 64 {
105            return Err(Hash32ParseError::WrongLength(bytes_in.len()));
106        }
107        let mut bytes = [0u8; 32];
108        for (i, byte) in bytes.iter_mut().enumerate() {
109            let lo = bytes_in.get(i * 2).copied().unwrap_or(0);
110            let hi = bytes_in.get(i * 2 + 1).copied().unwrap_or(0);
111            *byte = (decode_nibble(lo)? << 4) | decode_nibble(hi)?;
112        }
113        Ok(Self(bytes))
114    }
115}
116
117#[inline]
118fn decode_nibble(c: u8) -> Result<u8, Hash32ParseError> {
119    match c {
120        b'0'..=b'9' => Ok(c - b'0'),
121        b'a'..=b'f' => Ok(c - b'a' + 10),
122        b'A'..=b'F' => Ok(c - b'A' + 10),
123        _ => Err(Hash32ParseError::InvalidChar(c)),
124    }
125}
126
127/// Errors from parsing a hex-encoded [`Hash32`].
128#[derive(Debug, thiserror::Error)]
129pub enum Hash32ParseError {
130    /// Input length was not exactly 64 hex characters.
131    #[error("expected 64 hex characters, got {0}")]
132    WrongLength(usize),
133    /// Non-hex byte encountered.
134    #[error("invalid hex character: {:?}", *.0 as char)]
135    InvalidChar(u8),
136}
137
138impl fmt::Display for Hash32 {
139    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
140        for byte in &self.0 {
141            write!(f, "{byte:02x}")?;
142        }
143        Ok(())
144    }
145}
146
147impl fmt::Debug for Hash32 {
148    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
149        fmt::Display::fmt(self, f)
150    }
151}
152
153impl FromStr for Hash32 {
154    type Err = Hash32ParseError;
155    fn from_str(s: &str) -> Result<Self, Self::Err> {
156        Self::from_hex(s)
157    }
158}
159
160impl AsRef<[u8]> for Hash32 {
161    fn as_ref(&self) -> &[u8] {
162        &self.0
163    }
164}
165
166impl Serialize for Hash32 {
167    fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
168        if serializer.is_human_readable() {
169            serializer.collect_str(self)
170        } else {
171            serializer.serialize_bytes(&self.0)
172        }
173    }
174}
175
176impl<'de> Deserialize<'de> for Hash32 {
177    fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
178        if deserializer.is_human_readable() {
179            let s = <&str>::deserialize(deserializer)?;
180            Self::from_hex(s).map_err(serde::de::Error::custom)
181        } else {
182            struct V;
183            impl<'de> serde::de::Visitor<'de> for V {
184                type Value = Hash32;
185                fn expecting(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
186                    f.write_str("32 raw bytes")
187                }
188                fn visit_bytes<E: serde::de::Error>(self, v: &[u8]) -> Result<Hash32, E> {
189                    if v.len() != 32 {
190                        return Err(E::invalid_length(v.len(), &self));
191                    }
192                    let mut bytes = [0u8; 32];
193                    bytes.copy_from_slice(v);
194                    Ok(Hash32(bytes))
195                }
196                fn visit_borrowed_bytes<E: serde::de::Error>(
197                    self,
198                    v: &'de [u8],
199                ) -> Result<Hash32, E> {
200                    self.visit_bytes(v)
201                }
202                fn visit_byte_buf<E: serde::de::Error>(self, v: Vec<u8>) -> Result<Hash32, E> {
203                    self.visit_bytes(&v)
204                }
205                fn visit_seq<A: serde::de::SeqAccess<'de>>(
206                    self,
207                    mut seq: A,
208                ) -> Result<Hash32, A::Error> {
209                    let mut bytes = [0u8; 32];
210                    for (i, byte) in bytes.iter_mut().enumerate() {
211                        *byte = seq
212                            .next_element()?
213                            .ok_or_else(|| serde::de::Error::invalid_length(i, &"32 bytes"))?;
214                    }
215                    Ok(Hash32(bytes))
216                }
217            }
218            deserializer.deserialize_bytes(V)
219        }
220    }
221}
222
223// ============================================================================
224// Semantic newtypes — `DefinitionHash` and `TaskId`.
225//
226// Both are `Hash32`-backed but distinct at the type level so the compiler
227// catches mixups. The macro keeps their surface in lock-step.
228// ============================================================================
229
230/// Define a `Hash32`-backed semantic newtype with the standard surface:
231/// `Copy`/`Eq`/`Hash`, hex `Display`, hex `FromStr`, transparent serde,
232/// and `From<&str>`/`From<String>` that **hash** the input via SHA-256
233/// (not hex-parse). The hex round-trip path is `from_hex` / `to_hex`.
234macro_rules! hash32_newtype {
235    (
236        $(#[$attr:meta])*
237        $name:ident
238    ) => {
239        $(#[$attr])*
240        #[derive(
241            Clone, Copy, Default, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize,
242        )]
243        #[serde(transparent)]
244        pub struct $name(Hash32);
245
246        impl $name {
247            #[doc = concat!("Construct a [`", stringify!($name), "`] from a [`Hash32`].")]
248            #[must_use]
249            pub const fn from_hash(hash: Hash32) -> Self { Self(hash) }
250
251            #[doc = concat!("Construct a [`", stringify!($name), "`] from raw 32 bytes.")]
252            #[must_use]
253            pub const fn from_bytes(bytes: [u8; 32]) -> Self {
254                Self(Hash32::from_bytes(bytes))
255            }
256
257            #[doc = concat!(
258                "Construct a [`", stringify!($name),
259                "`] from a length-checked byte slice. See [`Hash32::from_slice`]."
260            )]
261            ///
262            /// # Errors
263            ///
264            /// Returns [`Hash32ParseError::WrongLength`] if `bytes.len() != 32`.
265            pub fn from_slice(bytes: &[u8]) -> Result<Self, Hash32ParseError> {
266                Hash32::from_slice(bytes).map(Self)
267            }
268
269            /// SHA-256-hash the given input and wrap the result.
270            ///
271            /// This is the canonical way to mint a fresh id from a
272            /// human-readable name at construction time.
273            #[must_use]
274            pub fn sha256(input: impl AsRef<[u8]>) -> Self {
275                Self(Hash32::sha256(input))
276            }
277
278            /// Borrow the underlying [`Hash32`].
279            #[must_use]
280            pub const fn as_hash(&self) -> &Hash32 { &self.0 }
281
282            /// Borrow the raw 32 bytes.
283            #[must_use]
284            pub const fn as_bytes(&self) -> &[u8; 32] { self.0.as_bytes() }
285
286            /// Lowercase hex encoding (64 chars).
287            #[must_use]
288            pub fn to_hex(&self) -> String { self.0.to_hex() }
289
290            /// Parse from a 64-character hex string.
291            ///
292            /// # Errors
293            ///
294            /// See [`Hash32::from_hex`].
295            pub fn from_hex(s: &str) -> Result<Self, Hash32ParseError> {
296                Hash32::from_hex(s).map(Self)
297            }
298        }
299
300        impl fmt::Display for $name {
301            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
302                fmt::Display::fmt(&self.0, f)
303            }
304        }
305
306        impl fmt::Debug for $name {
307            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
308                write!(f, concat!(stringify!($name), "({})"), self.0)
309            }
310        }
311
312        impl FromStr for $name {
313            type Err = Hash32ParseError;
314            fn from_str(s: &str) -> Result<Self, Self::Err> { Self::from_hex(s) }
315        }
316
317        impl From<Hash32> for $name {
318            fn from(h: Hash32) -> Self { Self(h) }
319        }
320
321        impl From<$name> for Hash32 {
322            fn from(h: $name) -> Self { h.0 }
323        }
324
325        // NOTE: `From<&str>` / `From<String>` SHA-256-hash the input. They do
326        // **not** parse a hex hash — use `from_hex` for that. These impls keep
327        // builder APIs and test fixtures (`"task-1".into()`) ergonomic.
328        impl From<&str> for $name {
329            fn from(s: &str) -> Self { Self::sha256(s.as_bytes()) }
330        }
331
332        impl From<&String> for $name {
333            fn from(s: &String) -> Self { Self::sha256(s.as_bytes()) }
334        }
335
336        impl From<String> for $name {
337            fn from(s: String) -> Self { Self::sha256(s.as_bytes()) }
338        }
339    };
340}
341
342hash32_newtype! {
343    /// SHA-256 fingerprint of a workflow's structural definition.
344    ///
345    /// Computed from the workflow's continuation tree (task IDs, retry
346    /// policies, fork shapes, delays, signals, loops, child workflows). Used
347    /// by the runtime to detect when a serialised snapshot was written
348    /// against a different workflow definition than the one currently in
349    /// memory.
350    ///
351    /// Compares in a single 32-byte memcmp instead of a 64-character string
352    /// equality, and hashes to one `u64` instead of per-character siphash.
353    DefinitionHash
354}
355
356hash32_newtype! {
357    /// SHA-256 hash of a task's user-facing name (e.g. `"validate"`).
358    ///
359    /// Stored on every runtime data structure that previously held a task
360    /// id as a `String` — [`ExecutionPosition`](crate::snapshot::ExecutionPosition)
361    /// variants, [`TaskResult`](crate::snapshot::TaskResult), the
362    /// `completed_tasks`/`task_retries`/`loop_iterations` `HashMap` keys on
363    /// [`WorkflowSnapshot`](crate::snapshot::WorkflowSnapshot),
364    /// [`AvailableTask`](crate::task_claim::AvailableTask), and so on. The
365    /// human-readable name only lives once per workflow definition on the
366    /// continuation tree.
367    ///
368    /// 32-byte memcmp + single-`u64` hash, same wins as [`DefinitionHash`].
369    TaskId
370}
371
372hash32_newtype! {
373    /// SHA-256 hash of a workflow's user-facing identifier (e.g.
374    /// `"order-pipeline"`).
375    ///
376    /// Used in runtime contexts and dispatch maps where the workflow
377    /// identifier needs cheap comparison without keeping the string alive.
378    /// The human-readable name remains on the
379    /// [`Workflow`](crate::workflow::Workflow) /
380    /// [`WorkflowContext`](crate::context::WorkflowContext) for log/error
381    /// display.
382    WorkflowId
383}
384
385#[cfg(test)]
386#[allow(clippy::unwrap_used)]
387mod tests {
388    use super::*;
389
390    #[test]
391    fn hash32_round_trips_via_hex() {
392        let h = Hash32::sha256(b"hello world");
393        let hex = h.to_hex();
394        assert_eq!(hex.len(), 64);
395        assert_eq!(Hash32::from_hex(&hex).unwrap(), h);
396    }
397
398    #[test]
399    fn from_hex_rejects_wrong_length() {
400        assert!(matches!(
401            Hash32::from_hex("abc"),
402            Err(Hash32ParseError::WrongLength(3))
403        ));
404    }
405
406    #[test]
407    fn from_hex_rejects_non_hex_char() {
408        let bad = "z".repeat(64);
409        assert!(matches!(
410            Hash32::from_hex(&bad),
411            Err(Hash32ParseError::InvalidChar(b'z'))
412        ));
413    }
414
415    #[test]
416    fn from_hex_accepts_uppercase() {
417        let lower = "abcd".repeat(16);
418        let upper = "ABCD".repeat(16);
419        assert_eq!(
420            Hash32::from_hex(&lower).unwrap(),
421            Hash32::from_hex(&upper).unwrap()
422        );
423    }
424
425    #[test]
426    fn json_round_trip_is_hex_string() {
427        let h = Hash32::sha256(b"payload");
428        let json = serde_json::to_string(&h).unwrap();
429        assert_eq!(json.len(), 66);
430        assert!(json.starts_with('"') && json.ends_with('"'));
431        let parsed: Hash32 = serde_json::from_str(&json).unwrap();
432        assert_eq!(parsed, h);
433    }
434
435    #[test]
436    fn definition_hash_displays_as_hex() {
437        let d = DefinitionHash::from_bytes([0xab; 32]);
438        assert_eq!(format!("{d}"), "ab".repeat(32));
439    }
440
441    #[test]
442    fn definition_hash_serde_transparent() {
443        let d = DefinitionHash::from_hash(Hash32::sha256(b"wf"));
444        let as_hash_json = serde_json::to_string(d.as_hash()).unwrap();
445        let as_def_json = serde_json::to_string(&d).unwrap();
446        assert_eq!(as_hash_json, as_def_json);
447    }
448
449    #[test]
450    fn definition_hash_round_trips_via_hex() {
451        let d = DefinitionHash::from_hash(Hash32::sha256(b"abc"));
452        let parsed: DefinitionHash = d.to_hex().parse().unwrap();
453        assert_eq!(parsed, d);
454    }
455
456    #[test]
457    fn definition_hash_from_str_hashes_input() {
458        let by_str: DefinitionHash = "wf-1".into();
459        let by_sha = DefinitionHash::sha256(b"wf-1");
460        assert_eq!(by_str, by_sha);
461    }
462}