Skip to main content

ave_identity/
hash.rs

1//! Generic hash functions with algorithm identification
2//!
3//! This module provides a generic interface for hash functions with automatic
4//! algorithm identification via 1-byte prefixes.
5//!
6//! ## Example
7//!
8//! ```rust
9//! use ave_identity::hash::{Hash, Blake3Hasher, DigestIdentifier};
10//!
11//! let hasher = Blake3Hasher;
12//! let data = b"Hello, World!";
13//!
14//! // Compute hash
15//! let hash = hasher.hash(data);
16//!
17//! // Convert to string (includes algorithm identifier)
18//! let hash_str = hash.to_string();
19//!
20//! // Parse from string (automatically detects algorithm)
21//! let parsed: DigestIdentifier = hash_str.parse().unwrap();
22//!
23//! // Verify
24//! assert_eq!(hash, parsed);
25//! ```
26
27use crate::common::{AlgorithmIdentifiedBytes, base64_encoding};
28use crate::error::CryptoError;
29use borsh::{BorshDeserialize, BorshSerialize};
30use serde::{Deserialize, Serialize};
31use std::fmt;
32
33/// 1-byte identifier for Blake3 hash algorithm: 'B'
34const BLAKE3_ID: u8 = b'B';
35
36/// Blake3 hash output length in bytes
37pub const BLAKE3_OUTPUT_LENGTH: usize = 32;
38
39/// Trait for hash algorithms with algorithm identification
40pub trait Hash {
41    /// Get the algorithm identifier (1 byte)
42    fn algorithm_id(&self) -> u8;
43
44    /// Get the expected output length in bytes (excluding identifier)
45    fn output_length(&self) -> usize;
46
47    /// Compute hash of the input data
48    fn hash(&self, data: &[u8]) -> DigestIdentifier;
49
50    /// Get the algorithm enum variant
51    fn algorithm(&self) -> HashAlgorithm;
52}
53
54/// Compute hash of any value that implements BorshSerialize
55///
56/// This is a convenience function that serializes the value using Borsh
57/// and then hashes it using the specified hasher.
58///
59/// # Example
60///
61/// ```
62/// use ave_identity::{hash_borsh, BLAKE3_HASHER};
63/// use borsh::BorshSerialize;
64///
65/// #[derive(BorshSerialize)]
66/// struct MyData {
67///     value: u64,
68/// }
69///
70/// let data = MyData { value: 42 };
71/// let hash = hash_borsh(&BLAKE3_HASHER, &data).unwrap();
72/// ```
73#[inline]
74pub fn hash_borsh<T: BorshSerialize>(
75    hasher: &dyn Hash,
76    value: &T,
77) -> Result<DigestIdentifier, CryptoError> {
78    let serialized = borsh::to_vec(value)
79        .map_err(|e| CryptoError::SerializationError(e.to_string()))?;
80    Ok(hasher.hash(&serialized))
81}
82
83/// Enumeration of supported hash algorithms
84#[derive(
85    Debug,
86    Clone,
87    Copy,
88    PartialEq,
89    Eq,
90    Hash,
91    PartialOrd,
92    Ord,
93    Serialize,
94    Deserialize,
95    BorshSerialize,
96    BorshDeserialize,
97)]
98pub enum HashAlgorithm {
99    Blake3,
100}
101
102impl HashAlgorithm {
103    /// Get the 1-byte identifier for this algorithm
104    pub fn identifier(&self) -> u8 {
105        match self {
106            HashAlgorithm::Blake3 => BLAKE3_ID,
107        }
108    }
109
110    /// Get the output length for this algorithm (excluding identifier)
111    pub fn output_length(&self) -> usize {
112        match self {
113            HashAlgorithm::Blake3 => BLAKE3_OUTPUT_LENGTH,
114        }
115    }
116
117    /// Parse algorithm from 1-byte identifier
118    pub fn from_identifier(id: u8) -> Result<Self, CryptoError> {
119        match id {
120            BLAKE3_ID => Ok(HashAlgorithm::Blake3),
121            _ => Err(CryptoError::UnknownAlgorithm(format!("{}", id as char))),
122        }
123    }
124
125    /// Create a hasher instance for this algorithm
126    pub fn hasher(&self) -> Box<dyn Hash> {
127        match self {
128            HashAlgorithm::Blake3 => Box::new(Blake3Hasher),
129        }
130    }
131}
132
133impl fmt::Display for HashAlgorithm {
134    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
135        match self {
136            HashAlgorithm::Blake3 => write!(f, "Blake3"),
137        }
138    }
139}
140
141/// Digest identifier with algorithm identification
142///
143/// The output contains:
144/// - 1 byte: algorithm identifier
145/// - N bytes: actual hash value (length depends on algorithm)
146#[derive(
147    Clone,
148    PartialEq,
149    Eq,
150    Hash,
151    BorshSerialize,
152    BorshDeserialize,
153    Ord,
154    PartialOrd,
155)]
156pub struct DigestIdentifier {
157    inner: AlgorithmIdentifiedBytes<HashAlgorithm>,
158}
159
160impl DigestIdentifier {
161    /// Create a new hash output
162    pub fn new(
163        algorithm: HashAlgorithm,
164        hash: Vec<u8>,
165    ) -> Result<Self, CryptoError> {
166        let expected_len = algorithm.output_length();
167        Ok(Self {
168            inner: AlgorithmIdentifiedBytes::new(
169                algorithm,
170                hash,
171                expected_len,
172            )?,
173        })
174    }
175
176    /// Get the algorithm used
177    #[inline]
178    pub fn algorithm(&self) -> HashAlgorithm {
179        self.inner.algorithm
180    }
181
182    /// Get the hash bytes (without identifier)
183    #[inline]
184    pub fn hash_bytes(&self) -> &[u8] {
185        self.inner.as_bytes()
186    }
187
188    /// Get the hash as a fixed-size array
189    ///
190    /// This method converts the hash bytes into an array of the specified size.
191    /// The size must match the algorithm's output length.
192    ///
193    /// # Errors
194    /// Returns an error if the requested size doesn't match the algorithm's output length.
195    ///
196    /// # Example
197    /// ```
198    /// use ave_identity::{BLAKE3_HASHER, hash::{BLAKE3_OUTPUT_LENGTH, Hash}};
199    ///
200    /// let hash = BLAKE3_HASHER.hash(b"Hello, World!");
201    /// let array: [u8; 32] = hash.hash_array().unwrap();
202    /// assert_eq!(array.len(), BLAKE3_OUTPUT_LENGTH);
203    /// ```
204    pub fn hash_array<const N: usize>(&self) -> Result<[u8; N], CryptoError> {
205        let hash_bytes = self.hash_bytes();
206        let expected_len = self.algorithm().output_length();
207
208        if N != expected_len {
209            return Err(CryptoError::InvalidDataLength {
210                expected: expected_len,
211                actual: N,
212            });
213        }
214
215        hash_bytes
216            .try_into()
217            .map_err(|_| CryptoError::InvalidDataLength {
218                expected: N,
219                actual: hash_bytes.len(),
220            })
221    }
222
223    /// Get the full bytes including algorithm identifier
224    #[inline]
225    pub fn to_bytes(&self) -> Vec<u8> {
226        self.inner
227            .to_bytes_with_prefix(self.inner.algorithm.identifier())
228    }
229
230    /// Parse from bytes (includes algorithm identifier)
231    pub fn from_bytes(bytes: &[u8]) -> Result<Self, CryptoError> {
232        if bytes.is_empty() {
233            return Err(CryptoError::InvalidHashFormat(
234                "Empty bytes".to_string(),
235            ));
236        }
237
238        let algorithm = HashAlgorithm::from_identifier(bytes[0])?;
239        let expected_len = algorithm.output_length();
240
241        let inner = AlgorithmIdentifiedBytes::from_bytes_with_prefix(
242            bytes,
243            HashAlgorithm::from_identifier,
244            expected_len,
245            "DigestIdentifier",
246        )?;
247
248        Ok(Self { inner })
249    }
250
251    // Internal method for Base64 encoding
252    #[inline]
253    fn to_base64(&self) -> String {
254        // Special case: empty digest serializes as empty string
255        if self.is_empty() {
256            String::new()
257        } else {
258            // Format: algorithm_char + base64(hash_bytes)
259            // Example: "B" + base64(hash) for Blake3
260            let algorithm_char = self.inner.algorithm.identifier() as char;
261            let data_base64 = base64_encoding::encode(&self.inner.bytes);
262            format!("{}{}", algorithm_char, data_base64)
263        }
264    }
265
266    /// Verify that this hash matches the given data using the embedded algorithm
267    pub fn verify(&self, data: &[u8]) -> bool {
268        let hasher = self.inner.algorithm.hasher();
269        let computed = hasher.hash(data);
270        computed == *self
271    }
272
273    /// Check if this is an empty digest (created via Default)
274    ///
275    /// Returns `true` if the hash bytes are empty, which indicates
276    /// this digest was created using `Default::default()`.
277    #[inline]
278    pub fn is_empty(&self) -> bool {
279        self.inner.bytes.is_empty()
280    }
281}
282
283impl Default for DigestIdentifier {
284    /// Creates an empty digest identifier using Blake3 algorithm
285    ///
286    /// This is primarily useful for initialization purposes.
287    /// Use `is_empty()` to check if a digest was created via `default()`.
288    fn default() -> Self {
289        Self {
290            inner: AlgorithmIdentifiedBytes {
291                algorithm: HashAlgorithm::Blake3,
292                bytes: Vec::new(),
293            },
294        }
295    }
296}
297
298impl fmt::Debug for DigestIdentifier {
299    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
300        f.debug_struct("DigestIdentifier")
301            .field("algorithm", &self.inner.algorithm)
302            .field("hash", &base64_encoding::encode(&self.inner.bytes))
303            .finish()
304    }
305}
306
307impl fmt::Display for DigestIdentifier {
308    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
309        write!(f, "{}", self.to_base64())
310    }
311}
312
313impl Serialize for DigestIdentifier {
314    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
315    where
316        S: serde::Serializer,
317    {
318        serializer.serialize_str(&self.to_base64())
319    }
320}
321
322impl<'de> Deserialize<'de> for DigestIdentifier {
323    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
324    where
325        D: serde::Deserializer<'de>,
326    {
327        let s = <String as Deserialize>::deserialize(deserializer)?;
328        s.parse().map_err(serde::de::Error::custom)
329    }
330}
331
332impl std::str::FromStr for DigestIdentifier {
333    type Err = CryptoError;
334
335    fn from_str(s: &str) -> Result<Self, Self::Err> {
336        // Special case: empty string deserializes to default (empty) digest
337        if s.is_empty() {
338            return Ok(Self::default());
339        }
340
341        // Format: algorithm_char + base64(hash_bytes)
342        // First character is the algorithm identifier
343        let mut chars = s.chars();
344        let algorithm_char = chars.next().ok_or_else(|| {
345            CryptoError::InvalidHashFormat("Empty string".to_string())
346        })?;
347
348        let algorithm = HashAlgorithm::from_identifier(algorithm_char as u8)?;
349
350        // Rest is base64-encoded hash data
351        let data_str: String = chars.collect();
352        let hash_bytes = base64_encoding::decode(&data_str)
353            .map_err(|e| CryptoError::Base64DecodeError(e.to_string()))?;
354
355        // Validate length
356        let expected_len = algorithm.output_length();
357        if hash_bytes.len() != expected_len {
358            return Err(CryptoError::InvalidDataLength {
359                expected: expected_len,
360                actual: hash_bytes.len(),
361            });
362        }
363
364        Ok(Self {
365            inner: AlgorithmIdentifiedBytes {
366                algorithm,
367                bytes: hash_bytes,
368            },
369        })
370    }
371}
372
373/// Blake3 hasher implementation (32 bytes output)
374#[derive(Debug, Clone, Copy)]
375pub struct Blake3Hasher;
376
377/// Global constant instance of Blake3 hasher for efficient reuse
378pub const BLAKE3_HASHER: Blake3Hasher = Blake3Hasher;
379
380impl Hash for Blake3Hasher {
381    fn algorithm_id(&self) -> u8 {
382        BLAKE3_ID
383    }
384
385    fn output_length(&self) -> usize {
386        BLAKE3_OUTPUT_LENGTH
387    }
388
389    fn hash(&self, data: &[u8]) -> DigestIdentifier {
390        let hash = blake3::hash(data);
391        let hash_bytes = hash.as_bytes();
392
393        // Blake3 always produces exactly 32 bytes, so this will never fail
394        DigestIdentifier::new(HashAlgorithm::Blake3, hash_bytes.to_vec())
395            .expect("Blake3 always produces 32 bytes")
396    }
397
398    fn algorithm(&self) -> HashAlgorithm {
399        HashAlgorithm::Blake3
400    }
401}
402
403#[cfg(test)]
404mod tests {
405    use super::*;
406
407    #[test]
408    fn test_blake3_hash() {
409        let hasher = Blake3Hasher;
410        let data = b"Hello, World!";
411
412        let hash = hasher.hash(data);
413        assert_eq!(hash.algorithm(), HashAlgorithm::Blake3);
414        assert_eq!(hash.hash_bytes().len(), 32);
415    }
416
417    #[test]
418    fn test_hash_to_string() {
419        let hasher = Blake3Hasher;
420        let data = b"Hello, World!";
421
422        let hash = hasher.hash(data);
423        let hash_str = hash.to_string();
424
425        // String representation should not be empty
426        assert!(!hash_str.is_empty());
427
428        // Blake3 hash should start with 'B'
429        assert!(
430            hash_str.starts_with('B'),
431            "Blake3 hash should start with 'B', got: {}",
432            hash_str
433        );
434
435        // Should be able to parse back
436        let parsed: DigestIdentifier = hash_str.parse().unwrap();
437        assert_eq!(hash, parsed);
438    }
439
440    #[test]
441    fn test_hash_verify() {
442        let hasher = Blake3Hasher;
443        let data = b"Hello, World!";
444
445        let hash = hasher.hash(data);
446
447        // Should verify with correct data
448        assert!(hash.verify(data));
449
450        // Should fail with incorrect data
451        assert!(!hash.verify(b"Different data"));
452    }
453
454    #[test]
455    fn test_hash_bytes_roundtrip() {
456        let hasher = Blake3Hasher;
457        let data = b"Test data";
458
459        let hash = hasher.hash(data);
460        let bytes = hash.to_bytes();
461
462        // First byte should be algorithm identifier 'B'
463        assert_eq!(bytes[0], b'B');
464
465        // Should parse back correctly
466        let parsed = DigestIdentifier::from_bytes(&bytes).unwrap();
467        assert_eq!(hash, parsed);
468    }
469
470    #[test]
471    fn test_algorithm_detection() {
472        let hasher = Blake3Hasher;
473        let data = b"Test data";
474
475        let hash = hasher.hash(data);
476        let hash_str = hash.to_string();
477
478        // Parse should automatically detect Blake3
479        let parsed: DigestIdentifier = hash_str.parse().unwrap();
480        assert_eq!(parsed.algorithm(), HashAlgorithm::Blake3);
481    }
482
483    #[test]
484    fn test_invalid_algorithm_identifier() {
485        let mut bytes = vec![b'X']; // Invalid identifier
486        bytes.extend_from_slice(&[0u8; 32]); // Add 32 bytes of data
487
488        let result = DigestIdentifier::from_bytes(&bytes);
489        assert!(result.is_err());
490        assert!(matches!(
491            result.unwrap_err(),
492            CryptoError::UnknownAlgorithm(_)
493        ));
494    }
495
496    #[test]
497    fn test_serde_serialization() {
498        let hasher = Blake3Hasher;
499        let data = b"Test serialization";
500
501        let hash = hasher.hash(data);
502
503        // Serialize to JSON
504        let json = serde_json::to_string(&hash).unwrap();
505
506        // Deserialize back
507        let deserialized: DigestIdentifier =
508            serde_json::from_str(&json).unwrap();
509
510        assert_eq!(hash, deserialized);
511    }
512
513    #[test]
514    fn test_hash_borsh() {
515        use crate::hash_borsh;
516
517        #[derive(BorshSerialize)]
518        struct TestData {
519            value: u64,
520            name: String,
521        }
522
523        let data = TestData {
524            value: 42,
525            name: "test".to_string(),
526        };
527
528        // Hash using borsh serialization
529        let hash1 = hash_borsh(&BLAKE3_HASHER, &data).unwrap();
530
531        // Manually serialize and hash to verify
532        let serialized = borsh::to_vec(&data).unwrap();
533        let hash2 = BLAKE3_HASHER.hash(&serialized);
534
535        // Both methods should produce the same hash
536        assert_eq!(hash1, hash2);
537        assert_eq!(hash1.algorithm(), HashAlgorithm::Blake3);
538    }
539
540    #[test]
541    fn test_hash_borsh_deterministic() {
542        use crate::hash_borsh;
543
544        #[derive(BorshSerialize)]
545        struct TestData {
546            x: u32,
547            y: u32,
548        }
549
550        let data1 = TestData { x: 10, y: 20 };
551        let data2 = TestData { x: 10, y: 20 };
552
553        let hash1 = hash_borsh(&BLAKE3_HASHER, &data1).unwrap();
554        let hash2 = hash_borsh(&BLAKE3_HASHER, &data2).unwrap();
555
556        // Same data should produce same hash
557        assert_eq!(hash1, hash2);
558    }
559
560    #[test]
561    fn test_default_digest_identifier() {
562        let default_digest = DigestIdentifier::default();
563
564        // Default should be empty
565        assert!(default_digest.is_empty());
566
567        // Default should use Blake3 algorithm
568        assert_eq!(default_digest.algorithm(), HashAlgorithm::Blake3);
569
570        // Should have empty bytes
571        assert_eq!(default_digest.hash_bytes().len(), 0);
572    }
573
574    #[test]
575    fn test_is_empty() {
576        // Default digest is empty
577        let empty = DigestIdentifier::default();
578        assert!(empty.is_empty());
579
580        // Hashed data is not empty
581        let hasher = Blake3Hasher;
582        let hash = hasher.hash(b"test data");
583        assert!(!hash.is_empty());
584        assert_eq!(hash.hash_bytes().len(), 32);
585    }
586
587    #[test]
588    fn test_hash_array() {
589        let hasher = Blake3Hasher;
590        let data = b"Test data for array conversion";
591        let hash = hasher.hash(data);
592
593        // Get as array of correct size (Blake3 = 32 bytes)
594        let array: [u8; 32] = hash.hash_array().unwrap();
595        assert_eq!(array.len(), 32);
596        assert_eq!(&array[..], hash.hash_bytes());
597
598        // Wrong size should fail
599        let result: Result<[u8; 64], _> = hash.hash_array();
600        assert!(result.is_err());
601        match result.unwrap_err() {
602            CryptoError::InvalidDataLength { expected, actual } => {
603                assert_eq!(expected, 32);
604                assert_eq!(actual, 64);
605            }
606            _ => panic!("Expected InvalidDataLength error"),
607        }
608    }
609
610    #[test]
611    fn test_hash_array_type_inference() {
612        let hasher = Blake3Hasher;
613        let hash = hasher.hash(b"test");
614
615        // Type inference should work
616        let array = hash.hash_array::<32>().unwrap();
617        assert_eq!(array.len(), 32);
618
619        // Verify content matches
620        for (i, byte) in array.iter().enumerate() {
621            assert_eq!(*byte, hash.hash_bytes()[i]);
622        }
623    }
624
625    #[test]
626    fn test_empty_digest_serialization() {
627        let empty = DigestIdentifier::default();
628
629        // Should serialize to empty string
630        assert_eq!(empty.to_string(), "");
631
632        // Should parse from empty string
633        let parsed: DigestIdentifier = "".parse().unwrap();
634        assert!(parsed.is_empty());
635        assert_eq!(parsed.algorithm(), HashAlgorithm::Blake3);
636
637        // Round trip should work
638        let serialized = empty.to_string();
639        let deserialized: DigestIdentifier = serialized.parse().unwrap();
640        assert!(deserialized.is_empty());
641        assert_eq!(deserialized.algorithm(), empty.algorithm());
642    }
643
644    #[test]
645    fn test_empty_digest_serde() {
646        use serde_json;
647
648        let empty = DigestIdentifier::default();
649
650        // Should serialize to empty string in JSON
651        let json = serde_json::to_string(&empty).unwrap();
652        assert_eq!(json, "\"\"");
653
654        // Should deserialize from empty string
655        let deserialized: DigestIdentifier =
656            serde_json::from_str("\"\"").unwrap();
657        assert!(deserialized.is_empty());
658        assert_eq!(deserialized.algorithm(), HashAlgorithm::Blake3);
659    }
660
661    #[test]
662    fn test_empty_digest_bincode() {
663        let empty = DigestIdentifier::default();
664
665        println!("\n=== EMPTY DIGEST BINCODE TEST ===");
666        println!("Is empty: {}", empty.is_empty());
667        println!("String representation: '{}'", empty.to_string());
668
669        // Should serialize with bincode
670        let bytes = borsh::to_vec(&empty).unwrap();
671
672        println!("Serialized length: {}", bytes.len());
673        println!("Serialized bytes: {:?}", bytes);
674
675        // Should deserialize with bincode
676        let result: DigestIdentifier = borsh::from_slice(&bytes).unwrap();
677
678        assert!(result.is_empty());
679        assert_eq!(result.algorithm(), HashAlgorithm::Blake3);
680    }
681}