kimberlite_crypto/hash.rs
1//! Dual-hash abstraction for compliance vs performance paths.
2//!
3//! `Kimberlite` uses FIPS-approved algorithms for compliance-critical operations.
4//! Additional cryptographic hashes may be used internally for performance.
5//!
6//! # Algorithm Selection
7//!
8//! | Purpose | Algorithm | FIPS | Use Case |
9//! |---------|-----------|------|----------|
10//! | Compliance | SHA-256 | Yes (180-4) | Audit trails, exports, proofs |
11//! | Internal | BLAKE3 | No | Dedup, Merkle trees, fingerprints |
12//!
13//! # Example
14//!
15//! ```
16//! use kimberlite_crypto::hash::{HashPurpose, InternalHash, internal_hash, hash_with_purpose};
17//!
18//! // Fast internal hash (BLAKE3) for deduplication
19//! let hash = internal_hash(b"content to fingerprint");
20//!
21//! // Purpose-driven selection
22//! let (algo, digest) = hash_with_purpose(HashPurpose::Internal, b"data");
23//! ```
24//!
25//! # Compliance Note
26//!
27//! All externally-verifiable proofs use FIPS-approved SHA-256 via [`crate::chain_hash`].
28//! BLAKE3 is used only for internal performance optimization and never appears
29//! in audit trails, checkpoints, or exported data.
30
31use std::fmt::Debug;
32
33use sha2::{Digest, Sha256};
34
35// ============================================================================
36// Constants
37// ============================================================================
38
39/// Length of both SHA-256 and BLAKE3 hashes in bytes (256 bits).
40pub const HASH_LENGTH: usize = 32;
41
42/// Maximum data size for hashing (64 MiB).
43///
44/// This is a sanity limit to catch accidental misuse.
45/// Only used in debug assertions.
46#[allow(dead_code)]
47const MAX_DATA_LENGTH: usize = 64 * 1024 * 1024;
48
49// ============================================================================
50// HashPurpose
51// ============================================================================
52
53/// Distinguishes compliance-critical vs internal hashing.
54///
55/// This enum enforces the boundary between FIPS-compliant operations
56/// and internal performance-optimized operations at the type level.
57///
58/// # Algorithm Selection
59///
60/// - [`HashPurpose::Compliance`] → SHA-256 (FIPS 180-4)
61/// - [`HashPurpose::Internal`] → BLAKE3
62#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
63pub enum HashPurpose {
64 /// FIPS-compliant hashing for audit trails, exports, proofs.
65 ///
66 /// Uses SHA-256 (FIPS 180-4). Required for:
67 /// - Log record hash chains
68 /// - Checkpoint sealing signatures
69 /// - Audit exports and third-party proofs
70 /// - Any data examined by regulators or auditors
71 Compliance,
72
73 /// High-performance hashing for internal operations.
74 ///
75 /// Uses BLAKE3. Appropriate for:
76 /// - Content addressing and deduplication
77 /// - Merkle tree construction for snapshots
78 /// - Internal consistency verification
79 /// - Streaming message fingerprinting
80 Internal,
81}
82
83// ============================================================================
84// HashAlgorithm
85// ============================================================================
86
87/// Hash algorithm identifier for versioned/tagged hashes.
88///
89/// Used when storing hashes to record which algorithm was used,
90/// enabling future algorithm migration if needed.
91#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
92#[repr(u8)]
93pub enum HashAlgorithm {
94 /// SHA-256 (FIPS 180-4) - compliance path.
95 Sha256 = 1,
96 /// BLAKE3 - internal performance path.
97 Blake3 = 2,
98}
99
100impl HashAlgorithm {
101 /// Returns the algorithm used for the given purpose.
102 #[must_use]
103 pub const fn for_purpose(purpose: HashPurpose) -> Self {
104 match purpose {
105 HashPurpose::Compliance => Self::Sha256,
106 HashPurpose::Internal => Self::Blake3,
107 }
108 }
109}
110
111// ============================================================================
112// InternalHash
113// ============================================================================
114
115/// A 32-byte BLAKE3 hash for internal operations.
116///
117/// BLAKE3 is used for internal operations where performance matters
118/// and FIPS compliance is not required. It is ~3-5x faster than SHA-256
119/// and supports parallel hashing for large inputs.
120///
121/// # When to Use
122///
123/// - Content addressing and deduplication
124/// - Merkle tree construction for snapshots
125/// - Internal consistency verification
126/// - Streaming message fingerprinting
127///
128/// # When NOT to Use
129///
130/// For compliance-critical paths (audit trails, exports, proofs),
131/// use [`crate::ChainHash`] and [`crate::chain_hash`] instead.
132#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
133pub struct InternalHash([u8; HASH_LENGTH]);
134
135impl InternalHash {
136 /// Length of the hash in bytes.
137 pub const LENGTH: usize = HASH_LENGTH;
138
139 /// Creates an `InternalHash` from raw bytes.
140 #[must_use]
141 pub const fn from_bytes(bytes: [u8; HASH_LENGTH]) -> Self {
142 Self(bytes)
143 }
144
145 /// Returns the hash as a byte slice.
146 #[must_use]
147 pub const fn as_bytes(&self) -> &[u8; HASH_LENGTH] {
148 &self.0
149 }
150}
151
152impl From<[u8; HASH_LENGTH]> for InternalHash {
153 fn from(value: [u8; HASH_LENGTH]) -> Self {
154 Self(value)
155 }
156}
157
158impl From<InternalHash> for [u8; HASH_LENGTH] {
159 fn from(value: InternalHash) -> Self {
160 value.0
161 }
162}
163
164impl Debug for InternalHash {
165 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
166 write!(
167 f,
168 "InternalHash({:016x}...)",
169 u64::from_le_bytes(self.0[..8].try_into().unwrap())
170 )
171 }
172}
173
174// ============================================================================
175// Hash Functions
176// ============================================================================
177
178/// Computes a BLAKE3 hash for internal operations.
179///
180/// This is the fast path for internal operations where FIPS compliance
181/// is not required. BLAKE3 is ~3-5x faster than SHA-256 and supports
182/// parallel hashing.
183///
184/// # Arguments
185///
186/// * `data` - The data to hash
187///
188/// # Returns
189///
190/// A 32-byte [`InternalHash`] (BLAKE3).
191///
192/// # Panics
193///
194/// Debug builds will panic if `data` exceeds 64 MiB.
195///
196/// # Example
197///
198/// ```
199/// use kimberlite_crypto::hash::internal_hash;
200///
201/// let hash = internal_hash(b"content for deduplication");
202/// ```
203#[must_use]
204pub fn internal_hash(data: &[u8]) -> InternalHash {
205 // Precondition: data length is reasonable
206 debug_assert!(
207 data.len() <= MAX_DATA_LENGTH,
208 "data exceeds {MAX_DATA_LENGTH} byte sanity limit"
209 );
210
211 let hash = blake3::hash(data);
212 let hash_bytes: [u8; HASH_LENGTH] = *hash.as_bytes();
213
214 // Postcondition: hash isn't degenerate
215 assert!(
216 hash_bytes.iter().any(|&b| b != 0),
217 "BLAKE3 produced all-zero hash - cryptographic library bug"
218 );
219
220 InternalHash(hash_bytes)
221}
222
223/// Computes a hash using the algorithm for the given purpose.
224///
225/// This function selects the appropriate algorithm based on the purpose:
226/// - [`HashPurpose::Compliance`] → SHA-256 (FIPS 180-4)
227/// - [`HashPurpose::Internal`] → BLAKE3
228///
229/// # Arguments
230///
231/// * `purpose` - The intended use of this hash
232/// * `data` - The data to hash
233///
234/// # Returns
235///
236/// A tuple of (`HashAlgorithm`, 32-byte digest).
237///
238/// # Panics
239///
240/// Debug builds will panic if `data` exceeds 64 MiB.
241///
242/// # Example
243///
244/// ```
245/// use kimberlite_crypto::hash::{HashPurpose, HashAlgorithm, hash_with_purpose};
246///
247/// // Compliance path uses SHA-256
248/// let (algo, digest) = hash_with_purpose(HashPurpose::Compliance, b"audit data");
249/// assert_eq!(algo, HashAlgorithm::Sha256);
250///
251/// // Internal path uses BLAKE3
252/// let (algo, digest) = hash_with_purpose(HashPurpose::Internal, b"internal data");
253/// assert_eq!(algo, HashAlgorithm::Blake3);
254/// ```
255#[must_use]
256pub fn hash_with_purpose(purpose: HashPurpose, data: &[u8]) -> (HashAlgorithm, [u8; HASH_LENGTH]) {
257 // Precondition: data length is reasonable
258 debug_assert!(
259 data.len() <= MAX_DATA_LENGTH,
260 "data exceeds {MAX_DATA_LENGTH} byte sanity limit"
261 );
262
263 let (algorithm, hash_bytes) = match purpose {
264 HashPurpose::Compliance => {
265 let digest = Sha256::digest(data);
266 (HashAlgorithm::Sha256, digest.into())
267 }
268 HashPurpose::Internal => {
269 let hash = blake3::hash(data);
270 (HashAlgorithm::Blake3, *hash.as_bytes())
271 }
272 };
273
274 // Postcondition: hash isn't degenerate
275 assert!(
276 hash_bytes.iter().any(|&b| b != 0),
277 "Hash produced all-zero output - cryptographic library bug"
278 );
279
280 (algorithm, hash_bytes)
281}
282
283// ============================================================================
284// Tests
285// ============================================================================
286
287#[cfg(test)]
288mod tests {
289 use super::*;
290
291 #[test]
292 fn test_internal_hash_deterministic() {
293 let data = b"test data for hashing";
294
295 let hash1 = internal_hash(data);
296 let hash2 = internal_hash(data);
297
298 assert_eq!(hash1, hash2);
299 }
300
301 #[test]
302 fn test_internal_hash_different_inputs() {
303 let hash1 = internal_hash(b"input one");
304 let hash2 = internal_hash(b"input two");
305
306 assert_ne!(hash1, hash2);
307 }
308
309 #[test]
310 fn test_hash_with_purpose_compliance_uses_sha256() {
311 let (algo, _) = hash_with_purpose(HashPurpose::Compliance, b"data");
312 assert_eq!(algo, HashAlgorithm::Sha256);
313 }
314
315 #[test]
316 fn test_hash_with_purpose_internal_uses_blake3() {
317 let (algo, _) = hash_with_purpose(HashPurpose::Internal, b"data");
318 assert_eq!(algo, HashAlgorithm::Blake3);
319 }
320
321 #[test]
322 fn test_hash_with_purpose_deterministic() {
323 let data = b"same data";
324
325 let (algo1, digest1) = hash_with_purpose(HashPurpose::Internal, data);
326 let (algo2, digest2) = hash_with_purpose(HashPurpose::Internal, data);
327
328 assert_eq!(algo1, algo2);
329 assert_eq!(digest1, digest2);
330 }
331
332 #[test]
333 fn test_compliance_and_internal_differ() {
334 let data = b"same data";
335
336 let (_, compliance_digest) = hash_with_purpose(HashPurpose::Compliance, data);
337 let (_, internal_digest) = hash_with_purpose(HashPurpose::Internal, data);
338
339 // Different algorithms produce different hashes
340 assert_ne!(compliance_digest, internal_digest);
341 }
342
343 #[test]
344 fn test_internal_hash_matches_blake3_crate() {
345 let data = b"verify against blake3 crate directly";
346
347 let internal = internal_hash(data);
348 let direct = blake3::hash(data);
349
350 assert_eq!(internal.as_bytes(), direct.as_bytes());
351 }
352
353 #[test]
354 fn test_compliance_matches_sha256_crate() {
355 use sha2::{Digest, Sha256};
356
357 let data = b"verify against sha2 crate directly";
358
359 let (_, digest) = hash_with_purpose(HashPurpose::Compliance, data);
360 let direct: [u8; 32] = Sha256::digest(data).into();
361
362 assert_eq!(digest, direct);
363 }
364
365 #[test]
366 fn test_algorithm_for_purpose() {
367 assert_eq!(
368 HashAlgorithm::for_purpose(HashPurpose::Compliance),
369 HashAlgorithm::Sha256
370 );
371 assert_eq!(
372 HashAlgorithm::for_purpose(HashPurpose::Internal),
373 HashAlgorithm::Blake3
374 );
375 }
376
377 #[test]
378 fn test_internal_hash_conversions() {
379 let bytes = [42u8; HASH_LENGTH];
380
381 let hash = InternalHash::from(bytes);
382 let back: [u8; HASH_LENGTH] = hash.into();
383
384 assert_eq!(bytes, back);
385 }
386}