Skip to main content

vr_jcs/
digest.rs

1//! Strategy-bearing digest API over RFC 8785 canonical bytes.
2//!
3//! Canonicalization is a schema decision; digest algorithm choice
4//! (BLAKE3 vs. keyed BLAKE3 vs. domain-separated BLAKE3 vs. SHA-256
5//! vs. …) is a separate cryptographic / governance decision. This
6//! module owns that split.
7//!
8//! ## Strategy-bearing path
9//!
10//! For any site whose digest algorithm is or may become a policy
11//! variable:
12//!
13//! - [`DigestAlgorithm`] — the algorithm enum.
14//! - [`DigestStrategy`] — an algorithm plus any future policy knobs.
15//! - [`CanonicalDigest`] — typed output that remembers which algorithm
16//!   produced it.
17//! - [`to_canon_digest_with`] — canonicalize `value`, digest under
18//!   `strategy`.
19//!
20//! ## BLAKE3 fixed-policy convenience
21//!
22//! For sites where receipt policy has explicitly frozen the algorithm
23//! to plain BLAKE3:
24//!
25//! - [`to_canon_blake3_digest`] — `&Value` → `[u8; 32]`.
26//! - [`to_canon_blake3_digest_from_slice`] — strict-parse `&[u8]` →
27//!   `[u8; 32]`.
28
29use serde_json::Value;
30
31use crate::canonicalize::to_canon_bytes_value;
32use crate::error::JcsError;
33
34/// Digest algorithm variant.
35///
36/// Explicit enum so the algorithm choice is a named governance
37/// decision rather than an implicit default.
38/// [`Blake3Untagged`](Self::Blake3Untagged) is the plain `blake3::hash`
39/// pattern; keyed and domain-separated variants carry their
40/// differentiating input so two uses with different keys or contexts
41/// cannot accidentally collide on a digest.
42///
43/// [`Sha256`](Self::Sha256) is declared but not yet wired in this
44/// crate; requesting it returns [`JcsError::UnsupportedAlgorithm`].
45/// The variant exists so receipt schemas and policy packs can
46/// reference it without waiting for the implementation.
47#[derive(Clone, Debug, PartialEq, Eq)]
48#[non_exhaustive]
49pub enum DigestAlgorithm {
50    /// BLAKE3 plain `hash(canonical_bytes)`.
51    Blake3Untagged,
52    /// BLAKE3 keyed with a 32-byte key: `keyed_hash(key, canonical_bytes)`.
53    Blake3Keyed {
54        /// Domain key. Value is load-bearing for the digest; choose it
55        /// once per receipt domain and never reuse across unrelated
56        /// domains.
57        key: [u8; 32],
58    },
59    /// BLAKE3 domain-separated via `derive_key(context, canonical_bytes)`.
60    /// The context string is the domain identifier (must be a
61    /// compile-time constant in user code — BLAKE3 semantics require
62    /// it to be globally unique per domain).
63    Blake3DomainSeparated {
64        /// Domain context string.
65        context: String,
66    },
67    /// SHA-256 over canonical bytes. Declared in the API but not yet wired.
68    Sha256,
69}
70
71impl DigestAlgorithm {
72    /// Short stable name suitable for use in receipt schemas and logs.
73    #[must_use]
74    pub const fn name(&self) -> &'static str {
75        match self {
76            Self::Blake3Untagged => "blake3-untagged",
77            Self::Blake3Keyed { .. } => "blake3-keyed",
78            Self::Blake3DomainSeparated { .. } => "blake3-domain-separated",
79            Self::Sha256 => "sha256",
80        }
81    }
82}
83
84/// A digest strategy bundles the algorithm with any future policy
85/// knobs (output truncation, pre-hash prefix, etc.). Today it's a thin
86/// newtype; the wrapper exists so extensions don't churn call sites.
87#[derive(Clone, Debug, PartialEq, Eq)]
88pub struct DigestStrategy {
89    /// The algorithm to apply.
90    pub algorithm: DigestAlgorithm,
91}
92
93impl DigestStrategy {
94    /// Plain untagged BLAKE3 over canonical bytes.
95    #[must_use]
96    pub const fn blake3_untagged() -> Self {
97        Self {
98            algorithm: DigestAlgorithm::Blake3Untagged,
99        }
100    }
101
102    /// Keyed BLAKE3 over canonical bytes.
103    #[must_use]
104    pub const fn blake3_keyed(key: [u8; 32]) -> Self {
105        Self {
106            algorithm: DigestAlgorithm::Blake3Keyed { key },
107        }
108    }
109
110    /// Domain-separated BLAKE3 over canonical bytes.
111    #[must_use]
112    pub fn blake3_domain_separated(context: impl Into<String>) -> Self {
113        Self {
114            algorithm: DigestAlgorithm::Blake3DomainSeparated {
115                context: context.into(),
116            },
117        }
118    }
119
120    /// SHA-256 over canonical bytes. Presently returns
121    /// [`JcsError::UnsupportedAlgorithm`] at call time; the
122    /// constructor is provided so policy code can reference it today.
123    #[must_use]
124    pub const fn sha256() -> Self {
125        Self {
126            algorithm: DigestAlgorithm::Sha256,
127        }
128    }
129}
130
131/// Typed output of a canonical digest computation.
132///
133/// Carries the algorithm that produced `bytes` so downstream consumers
134/// (receipt envelopes, audit logs) can record the algorithm without
135/// out-of-band convention.
136#[derive(Clone, Debug, PartialEq, Eq)]
137pub struct CanonicalDigest {
138    /// The algorithm used.
139    pub algorithm: DigestAlgorithm,
140    /// Raw digest bytes. Length depends on the algorithm (32 for
141    /// BLAKE3 variants, 32 for SHA-256 once wired).
142    pub bytes: Vec<u8>,
143}
144
145/// Canonicalize a trusted `serde_json::Value` and digest the canonical
146/// bytes under the given strategy.
147///
148/// The value is assumed to come from caller-controlled construction.
149/// For untrusted input, use [`crate::to_canon_bytes_from_slice`] first
150/// (strict admission) and then pass the resulting `Value` back through
151/// this function — or use a strict-parse sibling when one exists for
152/// the target strategy.
153///
154/// # Errors
155///
156/// Returns:
157/// - [`JcsError::Json`] if the value cannot be canonicalized
158/// - [`JcsError::InvalidString`] for I-JSON forbidden code points
159/// - [`JcsError::InvalidNumber`] for non-interoperable numbers
160/// - [`JcsError::NestingDepthExceeded`] for values beyond
161///   [`crate::MAX_NESTING_DEPTH`]
162/// - [`JcsError::UnsupportedAlgorithm`] if the strategy names an
163///   algorithm not wired in this build (e.g. SHA-256 today)
164pub fn to_canon_digest_with(
165    value: &Value,
166    strategy: &DigestStrategy,
167) -> Result<CanonicalDigest, JcsError> {
168    let bytes = to_canon_bytes_value(value)?;
169    let digest_bytes = match &strategy.algorithm {
170        DigestAlgorithm::Blake3Untagged => blake3::hash(&bytes).as_bytes().to_vec(),
171        DigestAlgorithm::Blake3Keyed { key } => blake3::keyed_hash(key, &bytes).as_bytes().to_vec(),
172        DigestAlgorithm::Blake3DomainSeparated { context } => {
173            // BLAKE3 derive_key is the standard domain-separated digest:
174            // context is the domain identifier, key_material is the data.
175            blake3::derive_key(context, &bytes).to_vec()
176        }
177        DigestAlgorithm::Sha256 => {
178            return Err(JcsError::UnsupportedAlgorithm(
179                "SHA-256 over canonical bytes is declared in the API but not \
180                 wired in this build; open a follow-up to add the sha2 dep"
181                    .to_string(),
182            ));
183        }
184    };
185    Ok(CanonicalDigest {
186        algorithm: strategy.algorithm.clone(),
187        bytes: digest_bytes,
188    })
189}
190
191/// BLAKE3 fixed-policy convenience. Canonicalize `value` and return
192/// `blake3::hash(canonical_bytes)` as a 32-byte array.
193///
194/// Use this only at sites where the receipt convention explicitly
195/// fixes the algorithm to plain BLAKE3. For anything else, use
196/// [`to_canon_digest_with`] with an explicit [`DigestStrategy`].
197///
198/// # Errors
199///
200/// Same as [`to_canon_digest_with`] minus
201/// [`JcsError::UnsupportedAlgorithm`].
202pub fn to_canon_blake3_digest(value: &Value) -> Result<[u8; 32], JcsError> {
203    let bytes = to_canon_bytes_value(value)?;
204    Ok(*blake3::hash(&bytes).as_bytes())
205}
206
207/// Strict-parse sibling of [`to_canon_blake3_digest`] for untrusted
208/// JSON bytes.
209///
210/// # Errors
211///
212/// Returns [`JcsError::Json`] for malformed JSON or duplicate property
213/// names, [`JcsError::InvalidString`] or [`JcsError::InvalidNumber`]
214/// for I-JSON violations, and [`JcsError::NestingDepthExceeded`] for
215/// depth limit breach.
216pub fn to_canon_blake3_digest_from_slice(json: &[u8]) -> Result<[u8; 32], JcsError> {
217    let bytes = crate::to_canon_bytes_from_slice(json)?;
218    Ok(*blake3::hash(&bytes).as_bytes())
219}