Skip to main content

ave_identity/
hash.rs

1//! Hashing primitives with algorithm identifiers.
2//!
3//! A digest stores the algorithm together with the hash bytes so it can be
4//! serialized, parsed and verified without external metadata.
5
6use crate::common::{AlgorithmIdentifiedBytes, base64_encoding};
7use crate::error::CryptoError;
8use borsh::{BorshDeserialize, BorshSerialize};
9use serde::{Deserialize, Serialize};
10use std::fmt;
11
12/// 1-byte identifier for Blake3 hash algorithm: 'B'
13const BLAKE3_ID: u8 = b'B';
14
15/// Blake3 hash output length in bytes
16pub const BLAKE3_OUTPUT_LENGTH: usize = 32;
17
18/// Common interface for supported hash algorithms.
19pub trait Hash {
20    /// Returns the one-byte identifier used by this algorithm.
21    fn algorithm_id(&self) -> u8;
22
23    /// Returns the digest length, excluding the identifier byte.
24    fn output_length(&self) -> usize;
25
26    /// Hashes `data` and returns a typed digest.
27    fn hash(&self, data: &[u8]) -> DigestIdentifier;
28
29    /// Returns the enum variant for this algorithm.
30    fn algorithm(&self) -> HashAlgorithm;
31}
32
33/// Serializes `value` with Borsh and hashes the resulting bytes.
34#[inline]
35pub fn hash_borsh<T: BorshSerialize>(
36    hasher: &dyn Hash,
37    value: &T,
38) -> Result<DigestIdentifier, CryptoError> {
39    let serialized = borsh::to_vec(value)
40        .map_err(|e| CryptoError::SerializationError(e.to_string()))?;
41    Ok(hasher.hash(&serialized))
42}
43
44/// Hash algorithms supported by this crate.
45#[derive(
46    Debug,
47    Clone,
48    Copy,
49    PartialEq,
50    Eq,
51    Hash,
52    PartialOrd,
53    Ord,
54    Serialize,
55    Deserialize,
56    BorshSerialize,
57    BorshDeserialize,
58)]
59pub enum HashAlgorithm {
60    Blake3,
61}
62
63impl HashAlgorithm {
64    /// Returns the one-byte identifier for this algorithm.
65    pub const fn identifier(&self) -> u8 {
66        match self {
67            Self::Blake3 => BLAKE3_ID,
68        }
69    }
70
71    /// Returns the digest length, excluding the identifier byte.
72    pub const fn output_length(&self) -> usize {
73        match self {
74            Self::Blake3 => BLAKE3_OUTPUT_LENGTH,
75        }
76    }
77
78    /// Parses an algorithm from its one-byte identifier.
79    pub fn from_identifier(id: u8) -> Result<Self, CryptoError> {
80        match id {
81            BLAKE3_ID => Ok(Self::Blake3),
82            _ => Err(CryptoError::UnknownAlgorithm(format!("{}", id as char))),
83        }
84    }
85
86    /// Creates a hasher instance for this algorithm.
87    pub fn hasher(&self) -> Box<dyn Hash> {
88        match self {
89            Self::Blake3 => Box::new(Blake3Hasher),
90        }
91    }
92}
93
94impl fmt::Display for HashAlgorithm {
95    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
96        match self {
97            Self::Blake3 => write!(f, "Blake3"),
98        }
99    }
100}
101
102/// Digest bytes plus the algorithm used to produce them.
103#[derive(
104    Clone,
105    PartialEq,
106    Eq,
107    Hash,
108    BorshSerialize,
109    BorshDeserialize,
110    Ord,
111    PartialOrd,
112)]
113pub struct DigestIdentifier {
114    inner: AlgorithmIdentifiedBytes<HashAlgorithm>,
115}
116
117impl DigestIdentifier {
118    /// Creates a digest and validates the byte length for `algorithm`.
119    pub fn new(
120        algorithm: HashAlgorithm,
121        hash: Vec<u8>,
122    ) -> Result<Self, CryptoError> {
123        let expected_len = algorithm.output_length();
124        Ok(Self {
125            inner: AlgorithmIdentifiedBytes::new(
126                algorithm,
127                hash,
128                expected_len,
129            )?,
130        })
131    }
132
133    /// Returns the digest algorithm.
134    #[inline]
135    pub const fn algorithm(&self) -> HashAlgorithm {
136        self.inner.algorithm
137    }
138
139    /// Returns the raw digest bytes, without the identifier.
140    #[inline]
141    pub fn hash_bytes(&self) -> &[u8] {
142        self.inner.as_bytes()
143    }
144
145    /// Converts the digest bytes into an array of size `N`.
146    ///
147    /// Returns an error when `N` does not match the length required by the
148    /// embedded algorithm.
149    pub fn hash_array<const N: usize>(&self) -> Result<[u8; N], CryptoError> {
150        let hash_bytes = self.hash_bytes();
151        let expected_len = self.algorithm().output_length();
152
153        if N != expected_len {
154            return Err(CryptoError::InvalidDataLength {
155                expected: expected_len,
156                actual: N,
157            });
158        }
159
160        hash_bytes
161            .try_into()
162            .map_err(|_| CryptoError::InvalidDataLength {
163                expected: N,
164                actual: hash_bytes.len(),
165            })
166    }
167
168    /// Serializes the digest as `identifier || digest_bytes`.
169    #[inline]
170    pub fn to_bytes(&self) -> Vec<u8> {
171        self.inner
172            .to_bytes_with_prefix(self.inner.algorithm.identifier())
173    }
174
175    /// Parses a digest from `identifier || digest_bytes`.
176    pub fn from_bytes(bytes: &[u8]) -> Result<Self, CryptoError> {
177        if bytes.is_empty() {
178            return Err(CryptoError::InvalidHashFormat(
179                "Empty bytes".to_string(),
180            ));
181        }
182
183        let algorithm = HashAlgorithm::from_identifier(bytes[0])?;
184        let expected_len = algorithm.output_length();
185
186        let inner = AlgorithmIdentifiedBytes::from_bytes_with_prefix(
187            bytes,
188            HashAlgorithm::from_identifier,
189            expected_len,
190            "DigestIdentifier",
191        )?;
192
193        Ok(Self { inner })
194    }
195
196    // Internal method for Base64 encoding
197    #[inline]
198    fn to_base64(&self) -> String {
199        // Special case: empty digest serializes as empty string
200        if self.is_empty() {
201            String::new()
202        } else {
203            // Format: algorithm_char + base64(hash_bytes)
204            // Example: "B" + base64(hash) for Blake3
205            let algorithm_char = self.inner.algorithm.identifier() as char;
206            let data_base64 = base64_encoding::encode(&self.inner.bytes);
207            format!("{}{}", algorithm_char, data_base64)
208        }
209    }
210
211    /// Hashes `data` and compares it with `self`.
212    pub fn verify(&self, data: &[u8]) -> bool {
213        let hasher = self.inner.algorithm.hasher();
214        let computed = hasher.hash(data);
215        computed == *self
216    }
217
218    /// Returns `true` when this is the empty placeholder value.
219    #[inline]
220    pub const fn is_empty(&self) -> bool {
221        self.inner.bytes.is_empty()
222    }
223}
224
225impl Default for DigestIdentifier {
226    /// Creates an empty placeholder digest using Blake3 as the default tag.
227    fn default() -> Self {
228        Self {
229            inner: AlgorithmIdentifiedBytes {
230                algorithm: HashAlgorithm::Blake3,
231                bytes: Vec::new(),
232            },
233        }
234    }
235}
236
237impl fmt::Debug for DigestIdentifier {
238    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
239        f.debug_struct("DigestIdentifier")
240            .field("algorithm", &self.inner.algorithm)
241            .field("hash", &base64_encoding::encode(&self.inner.bytes))
242            .finish()
243    }
244}
245
246impl fmt::Display for DigestIdentifier {
247    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
248        write!(f, "{}", self.to_base64())
249    }
250}
251
252impl Serialize for DigestIdentifier {
253    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
254    where
255        S: serde::Serializer,
256    {
257        serializer.serialize_str(&self.to_base64())
258    }
259}
260
261impl<'de> Deserialize<'de> for DigestIdentifier {
262    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
263    where
264        D: serde::Deserializer<'de>,
265    {
266        let s = <String as Deserialize>::deserialize(deserializer)?;
267        s.parse().map_err(serde::de::Error::custom)
268    }
269}
270
271impl std::str::FromStr for DigestIdentifier {
272    type Err = CryptoError;
273
274    fn from_str(s: &str) -> Result<Self, Self::Err> {
275        // Special case: empty string deserializes to default (empty) digest
276        if s.is_empty() {
277            return Ok(Self::default());
278        }
279
280        // Format: algorithm_char + base64(hash_bytes)
281        // First character is the algorithm identifier
282        let mut chars = s.chars();
283        let algorithm_char = chars.next().ok_or_else(|| {
284            CryptoError::InvalidHashFormat("Empty string".to_string())
285        })?;
286
287        let algorithm = HashAlgorithm::from_identifier(algorithm_char as u8)?;
288
289        // Rest is base64-encoded hash data
290        let data_str: String = chars.collect();
291        let hash_bytes = base64_encoding::decode(&data_str)
292            .map_err(|e| CryptoError::Base64DecodeError(e.to_string()))?;
293
294        // Validate length
295        let expected_len = algorithm.output_length();
296        if hash_bytes.len() != expected_len {
297            return Err(CryptoError::InvalidDataLength {
298                expected: expected_len,
299                actual: hash_bytes.len(),
300            });
301        }
302
303        Ok(Self {
304            inner: AlgorithmIdentifiedBytes {
305                algorithm,
306                bytes: hash_bytes,
307            },
308        })
309    }
310}
311
312/// Blake3 hasher.
313#[derive(Debug, Clone, Copy)]
314pub struct Blake3Hasher;
315
316/// Reusable Blake3 hasher instance.
317pub const BLAKE3_HASHER: Blake3Hasher = Blake3Hasher;
318
319impl Hash for Blake3Hasher {
320    fn algorithm_id(&self) -> u8 {
321        BLAKE3_ID
322    }
323
324    fn output_length(&self) -> usize {
325        BLAKE3_OUTPUT_LENGTH
326    }
327
328    fn hash(&self, data: &[u8]) -> DigestIdentifier {
329        let hash = blake3::hash(data);
330        let hash_bytes = hash.as_bytes();
331
332        // Blake3 always produces exactly 32 bytes, so this will never fail
333        DigestIdentifier::new(HashAlgorithm::Blake3, hash_bytes.to_vec())
334            .expect("Blake3 always produces 32 bytes")
335    }
336
337    fn algorithm(&self) -> HashAlgorithm {
338        HashAlgorithm::Blake3
339    }
340}
341
342#[cfg(test)]
343mod tests {
344    use super::*;
345
346    #[test]
347    fn test_blake3_hash() {
348        let hasher = Blake3Hasher;
349        let data = b"Hello, World!";
350
351        let hash = hasher.hash(data);
352        assert_eq!(hash.algorithm(), HashAlgorithm::Blake3);
353        assert_eq!(hash.hash_bytes().len(), 32);
354    }
355
356    #[test]
357    fn test_hash_to_string() {
358        let hasher = Blake3Hasher;
359        let data = b"Hello, World!";
360
361        let hash = hasher.hash(data);
362        let hash_str = hash.to_string();
363
364        // String representation should not be empty
365        assert!(!hash_str.is_empty());
366
367        // Blake3 hash should start with 'B'
368        assert!(
369            hash_str.starts_with('B'),
370            "Blake3 hash should start with 'B', got: {}",
371            hash_str
372        );
373
374        // Should be able to parse back
375        let parsed: DigestIdentifier = hash_str.parse().unwrap();
376        assert_eq!(hash, parsed);
377    }
378
379    #[test]
380    fn test_hash_verify() {
381        let hasher = Blake3Hasher;
382        let data = b"Hello, World!";
383
384        let hash = hasher.hash(data);
385
386        // Should verify with correct data
387        assert!(hash.verify(data));
388
389        // Should fail with incorrect data
390        assert!(!hash.verify(b"Different data"));
391    }
392
393    #[test]
394    fn test_hash_bytes_roundtrip() {
395        let hasher = Blake3Hasher;
396        let data = b"Test data";
397
398        let hash = hasher.hash(data);
399        let bytes = hash.to_bytes();
400
401        // First byte should be algorithm identifier 'B'
402        assert_eq!(bytes[0], b'B');
403
404        // Should parse back correctly
405        let parsed = DigestIdentifier::from_bytes(&bytes).unwrap();
406        assert_eq!(hash, parsed);
407    }
408
409    #[test]
410    fn test_algorithm_detection() {
411        let hasher = Blake3Hasher;
412        let data = b"Test data";
413
414        let hash = hasher.hash(data);
415        let hash_str = hash.to_string();
416
417        // Parse should automatically detect Blake3
418        let parsed: DigestIdentifier = hash_str.parse().unwrap();
419        assert_eq!(parsed.algorithm(), HashAlgorithm::Blake3);
420    }
421
422    #[test]
423    fn test_invalid_algorithm_identifier() {
424        let mut bytes = vec![b'X']; // Invalid identifier
425        bytes.extend_from_slice(&[0u8; 32]); // Add 32 bytes of data
426
427        let result = DigestIdentifier::from_bytes(&bytes);
428        assert!(result.is_err());
429        assert!(matches!(
430            result.unwrap_err(),
431            CryptoError::UnknownAlgorithm(_)
432        ));
433    }
434
435    #[test]
436    fn test_serde_serialization() {
437        let hasher = Blake3Hasher;
438        let data = b"Test serialization";
439
440        let hash = hasher.hash(data);
441
442        // Serialize to JSON
443        let json = serde_json::to_string(&hash).unwrap();
444
445        // Deserialize back
446        let deserialized: DigestIdentifier =
447            serde_json::from_str(&json).unwrap();
448
449        assert_eq!(hash, deserialized);
450    }
451
452    #[test]
453    fn test_hash_borsh() {
454        use crate::hash_borsh;
455
456        #[derive(BorshSerialize)]
457        struct TestData {
458            value: u64,
459            name: String,
460        }
461
462        let data = TestData {
463            value: 42,
464            name: "test".to_string(),
465        };
466
467        // Hash using borsh serialization
468        let hash1 = hash_borsh(&BLAKE3_HASHER, &data).unwrap();
469
470        // Manually serialize and hash to verify
471        let serialized = borsh::to_vec(&data).unwrap();
472        let hash2 = BLAKE3_HASHER.hash(&serialized);
473
474        // Both methods should produce the same hash
475        assert_eq!(hash1, hash2);
476        assert_eq!(hash1.algorithm(), HashAlgorithm::Blake3);
477    }
478
479    #[test]
480    fn test_hash_borsh_deterministic() {
481        use crate::hash_borsh;
482
483        #[derive(BorshSerialize)]
484        struct TestData {
485            x: u32,
486            y: u32,
487        }
488
489        let data1 = TestData { x: 10, y: 20 };
490        let data2 = TestData { x: 10, y: 20 };
491
492        let hash1 = hash_borsh(&BLAKE3_HASHER, &data1).unwrap();
493        let hash2 = hash_borsh(&BLAKE3_HASHER, &data2).unwrap();
494
495        // Same data should produce same hash
496        assert_eq!(hash1, hash2);
497    }
498
499    #[test]
500    fn test_default_digest_identifier() {
501        let default_digest = DigestIdentifier::default();
502
503        // Default should be empty
504        assert!(default_digest.is_empty());
505
506        // Default should use Blake3 algorithm
507        assert_eq!(default_digest.algorithm(), HashAlgorithm::Blake3);
508
509        // Should have empty bytes
510        assert_eq!(default_digest.hash_bytes().len(), 0);
511    }
512
513    #[test]
514    fn test_is_empty() {
515        // Default digest is empty
516        let empty = DigestIdentifier::default();
517        assert!(empty.is_empty());
518
519        // Hashed data is not empty
520        let hasher = Blake3Hasher;
521        let hash = hasher.hash(b"test data");
522        assert!(!hash.is_empty());
523        assert_eq!(hash.hash_bytes().len(), 32);
524    }
525
526    #[test]
527    fn test_hash_array() {
528        let hasher = Blake3Hasher;
529        let data = b"Test data for array conversion";
530        let hash = hasher.hash(data);
531
532        // Get as array of correct size (Blake3 = 32 bytes)
533        let array: [u8; 32] = hash.hash_array().unwrap();
534        assert_eq!(array.len(), 32);
535        assert_eq!(&array[..], hash.hash_bytes());
536
537        // Wrong size should fail
538        let result: Result<[u8; 64], _> = hash.hash_array();
539        assert!(result.is_err());
540        match result.unwrap_err() {
541            CryptoError::InvalidDataLength { expected, actual } => {
542                assert_eq!(expected, 32);
543                assert_eq!(actual, 64);
544            }
545            _ => panic!("Expected InvalidDataLength error"),
546        }
547    }
548
549    #[test]
550    fn test_hash_array_type_inference() {
551        let hasher = Blake3Hasher;
552        let hash = hasher.hash(b"test");
553
554        // Type inference should work
555        let array = hash.hash_array::<32>().unwrap();
556        assert_eq!(array.len(), 32);
557
558        // Verify content matches
559        for (i, byte) in array.iter().enumerate() {
560            assert_eq!(*byte, hash.hash_bytes()[i]);
561        }
562    }
563
564    #[test]
565    fn test_empty_digest_serialization() {
566        let empty = DigestIdentifier::default();
567
568        // Should serialize to empty string
569        assert_eq!(empty.to_string(), "");
570
571        // Should parse from empty string
572        let parsed: DigestIdentifier = "".parse().unwrap();
573        assert!(parsed.is_empty());
574        assert_eq!(parsed.algorithm(), HashAlgorithm::Blake3);
575
576        // Round trip should work
577        let serialized = empty.to_string();
578        let deserialized: DigestIdentifier = serialized.parse().unwrap();
579        assert!(deserialized.is_empty());
580        assert_eq!(deserialized.algorithm(), empty.algorithm());
581    }
582
583    #[test]
584    fn test_empty_digest_serde() {
585        use serde_json;
586
587        let empty = DigestIdentifier::default();
588
589        // Should serialize to empty string in JSON
590        let json = serde_json::to_string(&empty).unwrap();
591        assert_eq!(json, "\"\"");
592
593        // Should deserialize from empty string
594        let deserialized: DigestIdentifier =
595            serde_json::from_str("\"\"").unwrap();
596        assert!(deserialized.is_empty());
597        assert_eq!(deserialized.algorithm(), HashAlgorithm::Blake3);
598    }
599
600    #[test]
601    fn test_empty_digest_bincode() {
602        let empty = DigestIdentifier::default();
603
604        println!("\n=== EMPTY DIGEST BINCODE TEST ===");
605        println!("Is empty: {}", empty.is_empty());
606        println!("String representation: '{}'", empty.to_string());
607
608        // Should serialize with bincode
609        let bytes = borsh::to_vec(&empty).unwrap();
610
611        println!("Serialized length: {}", bytes.len());
612        println!("Serialized bytes: {:?}", bytes);
613
614        // Should deserialize with bincode
615        let result: DigestIdentifier = borsh::from_slice(&bytes).unwrap();
616
617        assert!(result.is_empty());
618        assert_eq!(result.algorithm(), HashAlgorithm::Blake3);
619    }
620}