Skip to main content

kimberlite_crypto/
hash.rs

1//! Dual-hash abstraction for compliance vs performance paths.
2//!
3//! `Kimberlite` uses FIPS-approved algorithms for compliance-critical operations.
4//! Additional cryptographic hashes may be used internally for performance.
5//!
6//! # Algorithm Selection
7//!
8//! | Purpose | Algorithm | FIPS | Use Case |
9//! |---------|-----------|------|----------|
10//! | Compliance | SHA-256 | Yes (180-4) | Audit trails, exports, proofs |
11//! | Internal | BLAKE3 | No | Dedup, Merkle trees, fingerprints |
12//!
13//! # Example
14//!
15//! ```
16//! use kimberlite_crypto::hash::{HashPurpose, InternalHash, internal_hash, hash_with_purpose};
17//!
18//! // Fast internal hash (BLAKE3) for deduplication
19//! let hash = internal_hash(b"content to fingerprint");
20//!
21//! // Purpose-driven selection
22//! let (algo, digest) = hash_with_purpose(HashPurpose::Internal, b"data");
23//! ```
24//!
25//! # Compliance Note
26//!
27//! All externally-verifiable proofs use FIPS-approved SHA-256 via [`crate::chain_hash`].
28//! BLAKE3 is used only for internal performance optimization and never appears
29//! in audit trails, checkpoints, or exported data.
30
31use std::fmt::Debug;
32
33use sha2::{Digest, Sha256};
34
35// ============================================================================
36// Constants
37// ============================================================================
38
39/// Length of both SHA-256 and BLAKE3 hashes in bytes (256 bits).
40pub const HASH_LENGTH: usize = 32;
41
42/// Maximum data size for hashing (64 MiB).
43///
44/// This is a sanity limit to catch accidental misuse.
45/// Only used in debug assertions.
46#[allow(dead_code)]
47const MAX_DATA_LENGTH: usize = 64 * 1024 * 1024;
48
49// ============================================================================
50// HashPurpose
51// ============================================================================
52
53/// Distinguishes compliance-critical vs internal hashing.
54///
55/// This enum enforces the boundary between FIPS-compliant operations
56/// and internal performance-optimized operations at the type level.
57///
58/// # Algorithm Selection
59///
60/// - [`HashPurpose::Compliance`] → SHA-256 (FIPS 180-4)
61/// - [`HashPurpose::Internal`] → BLAKE3
62#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
63pub enum HashPurpose {
64    /// FIPS-compliant hashing for audit trails, exports, proofs.
65    ///
66    /// Uses SHA-256 (FIPS 180-4). Required for:
67    /// - Log record hash chains
68    /// - Checkpoint sealing signatures
69    /// - Audit exports and third-party proofs
70    /// - Any data examined by regulators or auditors
71    Compliance,
72
73    /// High-performance hashing for internal operations.
74    ///
75    /// Uses BLAKE3. Appropriate for:
76    /// - Content addressing and deduplication
77    /// - Merkle tree construction for snapshots
78    /// - Internal consistency verification
79    /// - Streaming message fingerprinting
80    Internal,
81}
82
83// ============================================================================
84// HashAlgorithm
85// ============================================================================
86
87/// Hash algorithm identifier for versioned/tagged hashes.
88///
89/// Used when storing hashes to record which algorithm was used,
90/// enabling future algorithm migration if needed.
91#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
92#[repr(u8)]
93pub enum HashAlgorithm {
94    /// SHA-256 (FIPS 180-4) - compliance path.
95    Sha256 = 1,
96    /// BLAKE3 - internal performance path.
97    Blake3 = 2,
98}
99
100impl HashAlgorithm {
101    /// Returns the algorithm used for the given purpose.
102    #[must_use]
103    pub const fn for_purpose(purpose: HashPurpose) -> Self {
104        match purpose {
105            HashPurpose::Compliance => Self::Sha256,
106            HashPurpose::Internal => Self::Blake3,
107        }
108    }
109}
110
111// ============================================================================
112// InternalHash
113// ============================================================================
114
115/// A 32-byte BLAKE3 hash for internal operations.
116///
117/// BLAKE3 is used for internal operations where performance matters
118/// and FIPS compliance is not required. It is ~3-5x faster than SHA-256
119/// and supports parallel hashing for large inputs.
120///
121/// # When to Use
122///
123/// - Content addressing and deduplication
124/// - Merkle tree construction for snapshots
125/// - Internal consistency verification
126/// - Streaming message fingerprinting
127///
128/// # When NOT to Use
129///
130/// For compliance-critical paths (audit trails, exports, proofs),
131/// use [`crate::ChainHash`] and [`crate::chain_hash`] instead.
132#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
133pub struct InternalHash([u8; HASH_LENGTH]);
134
135impl InternalHash {
136    /// Length of the hash in bytes.
137    pub const LENGTH: usize = HASH_LENGTH;
138
139    /// Creates an `InternalHash` from raw bytes.
140    #[must_use]
141    pub const fn from_bytes(bytes: [u8; HASH_LENGTH]) -> Self {
142        Self(bytes)
143    }
144
145    /// Returns the hash as a byte slice.
146    #[must_use]
147    pub const fn as_bytes(&self) -> &[u8; HASH_LENGTH] {
148        &self.0
149    }
150}
151
152impl From<[u8; HASH_LENGTH]> for InternalHash {
153    fn from(value: [u8; HASH_LENGTH]) -> Self {
154        Self(value)
155    }
156}
157
158impl From<InternalHash> for [u8; HASH_LENGTH] {
159    fn from(value: InternalHash) -> Self {
160        value.0
161    }
162}
163
164impl Debug for InternalHash {
165    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
166        write!(
167            f,
168            "InternalHash({:016x}...)",
169            u64::from_le_bytes(self.0[..8].try_into().unwrap())
170        )
171    }
172}
173
174// ============================================================================
175// Hash Functions
176// ============================================================================
177
178/// Computes a BLAKE3 hash for internal operations.
179///
180/// This is the fast path for internal operations where FIPS compliance
181/// is not required. BLAKE3 is ~3-5x faster than SHA-256 and supports
182/// parallel hashing.
183///
184/// # Arguments
185///
186/// * `data` - The data to hash
187///
188/// # Returns
189///
190/// A 32-byte [`InternalHash`] (BLAKE3).
191///
192/// # Panics
193///
194/// Debug builds will panic if `data` exceeds 64 MiB.
195///
196/// # Example
197///
198/// ```
199/// use kimberlite_crypto::hash::internal_hash;
200///
201/// let hash = internal_hash(b"content for deduplication");
202/// ```
203#[must_use]
204pub fn internal_hash(data: &[u8]) -> InternalHash {
205    // Precondition: data length is reasonable
206    debug_assert!(
207        data.len() <= MAX_DATA_LENGTH,
208        "data exceeds {MAX_DATA_LENGTH} byte sanity limit"
209    );
210
211    let hash = blake3::hash(data);
212    let hash_bytes: [u8; HASH_LENGTH] = *hash.as_bytes();
213
214    // Postcondition: hash isn't degenerate
215    assert!(
216        hash_bytes.iter().any(|&b| b != 0),
217        "BLAKE3 produced all-zero hash - cryptographic library bug"
218    );
219
220    InternalHash(hash_bytes)
221}
222
223/// Computes a hash using the algorithm for the given purpose.
224///
225/// This function selects the appropriate algorithm based on the purpose:
226/// - [`HashPurpose::Compliance`] → SHA-256 (FIPS 180-4)
227/// - [`HashPurpose::Internal`] → BLAKE3
228///
229/// # Arguments
230///
231/// * `purpose` - The intended use of this hash
232/// * `data` - The data to hash
233///
234/// # Returns
235///
236/// A tuple of (`HashAlgorithm`, 32-byte digest).
237///
238/// # Panics
239///
240/// Debug builds will panic if `data` exceeds 64 MiB.
241///
242/// # Example
243///
244/// ```
245/// use kimberlite_crypto::hash::{HashPurpose, HashAlgorithm, hash_with_purpose};
246///
247/// // Compliance path uses SHA-256
248/// let (algo, digest) = hash_with_purpose(HashPurpose::Compliance, b"audit data");
249/// assert_eq!(algo, HashAlgorithm::Sha256);
250///
251/// // Internal path uses BLAKE3
252/// let (algo, digest) = hash_with_purpose(HashPurpose::Internal, b"internal data");
253/// assert_eq!(algo, HashAlgorithm::Blake3);
254/// ```
255#[must_use]
256pub fn hash_with_purpose(purpose: HashPurpose, data: &[u8]) -> (HashAlgorithm, [u8; HASH_LENGTH]) {
257    // Precondition: data length is reasonable
258    debug_assert!(
259        data.len() <= MAX_DATA_LENGTH,
260        "data exceeds {MAX_DATA_LENGTH} byte sanity limit"
261    );
262
263    let (algorithm, hash_bytes) = match purpose {
264        HashPurpose::Compliance => {
265            let digest = Sha256::digest(data);
266            (HashAlgorithm::Sha256, digest.into())
267        }
268        HashPurpose::Internal => {
269            let hash = blake3::hash(data);
270            (HashAlgorithm::Blake3, *hash.as_bytes())
271        }
272    };
273
274    // Postcondition: hash isn't degenerate
275    assert!(
276        hash_bytes.iter().any(|&b| b != 0),
277        "Hash produced all-zero output - cryptographic library bug"
278    );
279
280    (algorithm, hash_bytes)
281}
282
283// ============================================================================
284// Tests
285// ============================================================================
286
287#[cfg(test)]
288mod tests {
289    use super::*;
290
291    #[test]
292    fn test_internal_hash_deterministic() {
293        let data = b"test data for hashing";
294
295        let hash1 = internal_hash(data);
296        let hash2 = internal_hash(data);
297
298        assert_eq!(hash1, hash2);
299    }
300
301    #[test]
302    fn test_internal_hash_different_inputs() {
303        let hash1 = internal_hash(b"input one");
304        let hash2 = internal_hash(b"input two");
305
306        assert_ne!(hash1, hash2);
307    }
308
309    #[test]
310    fn test_hash_with_purpose_compliance_uses_sha256() {
311        let (algo, _) = hash_with_purpose(HashPurpose::Compliance, b"data");
312        assert_eq!(algo, HashAlgorithm::Sha256);
313    }
314
315    #[test]
316    fn test_hash_with_purpose_internal_uses_blake3() {
317        let (algo, _) = hash_with_purpose(HashPurpose::Internal, b"data");
318        assert_eq!(algo, HashAlgorithm::Blake3);
319    }
320
321    #[test]
322    fn test_hash_with_purpose_deterministic() {
323        let data = b"same data";
324
325        let (algo1, digest1) = hash_with_purpose(HashPurpose::Internal, data);
326        let (algo2, digest2) = hash_with_purpose(HashPurpose::Internal, data);
327
328        assert_eq!(algo1, algo2);
329        assert_eq!(digest1, digest2);
330    }
331
332    #[test]
333    fn test_compliance_and_internal_differ() {
334        let data = b"same data";
335
336        let (_, compliance_digest) = hash_with_purpose(HashPurpose::Compliance, data);
337        let (_, internal_digest) = hash_with_purpose(HashPurpose::Internal, data);
338
339        // Different algorithms produce different hashes
340        assert_ne!(compliance_digest, internal_digest);
341    }
342
343    #[test]
344    fn test_internal_hash_matches_blake3_crate() {
345        let data = b"verify against blake3 crate directly";
346
347        let internal = internal_hash(data);
348        let direct = blake3::hash(data);
349
350        assert_eq!(internal.as_bytes(), direct.as_bytes());
351    }
352
353    #[test]
354    fn test_compliance_matches_sha256_crate() {
355        use sha2::{Digest, Sha256};
356
357        let data = b"verify against sha2 crate directly";
358
359        let (_, digest) = hash_with_purpose(HashPurpose::Compliance, data);
360        let direct: [u8; 32] = Sha256::digest(data).into();
361
362        assert_eq!(digest, direct);
363    }
364
365    #[test]
366    fn test_algorithm_for_purpose() {
367        assert_eq!(
368            HashAlgorithm::for_purpose(HashPurpose::Compliance),
369            HashAlgorithm::Sha256
370        );
371        assert_eq!(
372            HashAlgorithm::for_purpose(HashPurpose::Internal),
373            HashAlgorithm::Blake3
374        );
375    }
376
377    #[test]
378    fn test_internal_hash_conversions() {
379        let bytes = [42u8; HASH_LENGTH];
380
381        let hash = InternalHash::from(bytes);
382        let back: [u8; HASH_LENGTH] = hash.into();
383
384        assert_eq!(bytes, back);
385    }
386}