vr_jcs/digest.rs
1//! Strategy-bearing digest API over RFC 8785 canonical bytes.
2//!
3//! Canonicalization is a schema decision; digest algorithm choice
4//! (BLAKE3 vs. keyed BLAKE3 vs. domain-separated BLAKE3 vs. SHA-256
5//! vs. …) is a separate cryptographic / governance decision. This
6//! module owns that split.
7//!
8//! ## Strategy-bearing path
9//!
10//! For any site whose digest algorithm is or may become a policy
11//! variable:
12//!
13//! - [`DigestAlgorithm`] — the algorithm enum.
14//! - [`DigestStrategy`] — an algorithm plus any future policy knobs.
15//! - [`CanonicalDigest`] — typed output that remembers which algorithm
16//! produced it.
17//! - [`to_canon_digest_with`] — canonicalize `value`, digest under
18//! `strategy`.
19//!
20//! ## BLAKE3 fixed-policy convenience
21//!
22//! For sites where receipt policy has explicitly frozen the algorithm
23//! to plain BLAKE3:
24//!
25//! - [`to_canon_blake3_digest`] — `&Value` → `[u8; 32]`.
26//! - [`to_canon_blake3_digest_from_slice`] — strict-parse `&[u8]` →
27//! `[u8; 32]`.
28
29use serde_json::Value;
30
31use crate::canonicalize::to_canon_bytes_value;
32use crate::error::JcsError;
33
34/// Digest algorithm variant.
35///
36/// Explicit enum so the algorithm choice is a named governance
37/// decision rather than an implicit default.
38/// [`Blake3Untagged`](Self::Blake3Untagged) is the plain `blake3::hash`
39/// pattern; keyed and domain-separated variants carry their
40/// differentiating input so two uses with different keys or contexts
41/// cannot accidentally collide on a digest.
42///
43/// [`Sha256`](Self::Sha256) is declared but not yet wired in this
44/// crate; requesting it returns [`JcsError::UnsupportedAlgorithm`].
45/// The variant exists so receipt schemas and policy packs can
46/// reference it without waiting for the implementation.
47#[derive(Clone, Debug, PartialEq, Eq)]
48#[non_exhaustive]
49pub enum DigestAlgorithm {
50 /// BLAKE3 plain `hash(canonical_bytes)`.
51 Blake3Untagged,
52 /// BLAKE3 keyed with a 32-byte key: `keyed_hash(key, canonical_bytes)`.
53 Blake3Keyed {
54 /// Domain key. Value is load-bearing for the digest; choose it
55 /// once per receipt domain and never reuse across unrelated
56 /// domains.
57 key: [u8; 32],
58 },
59 /// BLAKE3 domain-separated via `derive_key(context, canonical_bytes)`.
60 /// The context string is the domain identifier (must be a
61 /// compile-time constant in user code — BLAKE3 semantics require
62 /// it to be globally unique per domain).
63 Blake3DomainSeparated {
64 /// Domain context string.
65 context: String,
66 },
67 /// SHA-256 over canonical bytes. Declared in the API but not yet wired.
68 Sha256,
69}
70
71impl DigestAlgorithm {
72 /// Short stable name suitable for use in receipt schemas and logs.
73 #[must_use]
74 pub const fn name(&self) -> &'static str {
75 match self {
76 Self::Blake3Untagged => "blake3-untagged",
77 Self::Blake3Keyed { .. } => "blake3-keyed",
78 Self::Blake3DomainSeparated { .. } => "blake3-domain-separated",
79 Self::Sha256 => "sha256",
80 }
81 }
82}
83
84/// A digest strategy bundles the algorithm with any future policy
85/// knobs (output truncation, pre-hash prefix, etc.). Today it's a thin
86/// newtype; the wrapper exists so extensions don't churn call sites.
87#[derive(Clone, Debug, PartialEq, Eq)]
88pub struct DigestStrategy {
89 /// The algorithm to apply.
90 pub algorithm: DigestAlgorithm,
91}
92
93impl DigestStrategy {
94 /// Plain untagged BLAKE3 over canonical bytes.
95 #[must_use]
96 pub const fn blake3_untagged() -> Self {
97 Self {
98 algorithm: DigestAlgorithm::Blake3Untagged,
99 }
100 }
101
102 /// Keyed BLAKE3 over canonical bytes.
103 #[must_use]
104 pub const fn blake3_keyed(key: [u8; 32]) -> Self {
105 Self {
106 algorithm: DigestAlgorithm::Blake3Keyed { key },
107 }
108 }
109
110 /// Domain-separated BLAKE3 over canonical bytes.
111 #[must_use]
112 pub fn blake3_domain_separated(context: impl Into<String>) -> Self {
113 Self {
114 algorithm: DigestAlgorithm::Blake3DomainSeparated {
115 context: context.into(),
116 },
117 }
118 }
119
120 /// SHA-256 over canonical bytes. Presently returns
121 /// [`JcsError::UnsupportedAlgorithm`] at call time; the
122 /// constructor is provided so policy code can reference it today.
123 #[must_use]
124 pub const fn sha256() -> Self {
125 Self {
126 algorithm: DigestAlgorithm::Sha256,
127 }
128 }
129}
130
131/// Typed output of a canonical digest computation.
132///
133/// Carries the algorithm that produced `bytes` so downstream consumers
134/// (receipt envelopes, audit logs) can record the algorithm without
135/// out-of-band convention.
136#[derive(Clone, Debug, PartialEq, Eq)]
137pub struct CanonicalDigest {
138 /// The algorithm used.
139 pub algorithm: DigestAlgorithm,
140 /// Raw digest bytes. Length depends on the algorithm (32 for
141 /// BLAKE3 variants, 32 for SHA-256 once wired).
142 pub bytes: Vec<u8>,
143}
144
145/// Canonicalize a trusted `serde_json::Value` and digest the canonical
146/// bytes under the given strategy.
147///
148/// The value is assumed to come from caller-controlled construction.
149/// For untrusted input, use [`crate::to_canon_bytes_from_slice`] first
150/// (strict admission) and then pass the resulting `Value` back through
151/// this function — or use a strict-parse sibling when one exists for
152/// the target strategy.
153///
154/// # Errors
155///
156/// Returns:
157/// - [`JcsError::Json`] if the value cannot be canonicalized
158/// - [`JcsError::InvalidString`] for I-JSON forbidden code points
159/// - [`JcsError::InvalidNumber`] for non-interoperable numbers
160/// - [`JcsError::NestingDepthExceeded`] for values beyond
161/// [`crate::MAX_NESTING_DEPTH`]
162/// - [`JcsError::UnsupportedAlgorithm`] if the strategy names an
163/// algorithm not wired in this build (e.g. SHA-256 today)
164pub fn to_canon_digest_with(
165 value: &Value,
166 strategy: &DigestStrategy,
167) -> Result<CanonicalDigest, JcsError> {
168 let bytes = to_canon_bytes_value(value)?;
169 let digest_bytes = match &strategy.algorithm {
170 DigestAlgorithm::Blake3Untagged => blake3::hash(&bytes).as_bytes().to_vec(),
171 DigestAlgorithm::Blake3Keyed { key } => blake3::keyed_hash(key, &bytes).as_bytes().to_vec(),
172 DigestAlgorithm::Blake3DomainSeparated { context } => {
173 // BLAKE3 derive_key is the standard domain-separated digest:
174 // context is the domain identifier, key_material is the data.
175 blake3::derive_key(context, &bytes).to_vec()
176 }
177 DigestAlgorithm::Sha256 => {
178 return Err(JcsError::UnsupportedAlgorithm(
179 "SHA-256 over canonical bytes is declared in the API but not \
180 wired in this build; open a follow-up to add the sha2 dep"
181 .to_string(),
182 ));
183 }
184 };
185 Ok(CanonicalDigest {
186 algorithm: strategy.algorithm.clone(),
187 bytes: digest_bytes,
188 })
189}
190
191/// BLAKE3 fixed-policy convenience. Canonicalize `value` and return
192/// `blake3::hash(canonical_bytes)` as a 32-byte array.
193///
194/// Use this only at sites where the receipt convention explicitly
195/// fixes the algorithm to plain BLAKE3. For anything else, use
196/// [`to_canon_digest_with`] with an explicit [`DigestStrategy`].
197///
198/// # Errors
199///
200/// Same as [`to_canon_digest_with`] minus
201/// [`JcsError::UnsupportedAlgorithm`].
202pub fn to_canon_blake3_digest(value: &Value) -> Result<[u8; 32], JcsError> {
203 let bytes = to_canon_bytes_value(value)?;
204 Ok(*blake3::hash(&bytes).as_bytes())
205}
206
207/// Strict-parse sibling of [`to_canon_blake3_digest`] for untrusted
208/// JSON bytes.
209///
210/// # Errors
211///
212/// Returns [`JcsError::Json`] for malformed JSON or duplicate property
213/// names, [`JcsError::InvalidString`] or [`JcsError::InvalidNumber`]
214/// for I-JSON violations, and [`JcsError::NestingDepthExceeded`] for
215/// depth limit breach.
216pub fn to_canon_blake3_digest_from_slice(json: &[u8]) -> Result<[u8; 32], JcsError> {
217 let bytes = crate::to_canon_bytes_from_slice(json)?;
218 Ok(*blake3::hash(&bytes).as_bytes())
219}