atlas_common/hash/
algorithms.rs

1use crate::error::{Error, Result};
2use crate::hash::calculate_hash_with_algorithm;
3use serde::{Deserialize, Serialize};
4use std::fmt;
5use std::str::FromStr;
6
7/// Supported cryptographic hash algorithms
8///
9/// All algorithms use the SHA-2 family for security and performance.
10#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
11pub enum HashAlgorithm {
12    /// SHA-256 (256-bit/32-byte output)
13    #[serde(rename = "sha256")]
14    Sha256,
15    /// SHA-384 (384-bit/48-byte output) - Default
16    #[serde(rename = "sha384")]
17    Sha384,
18    /// SHA-512 (512-bit/64-byte output)
19    #[serde(rename = "sha512")]
20    Sha512,
21}
22
23impl HashAlgorithm {
24    /// Get algorithm name as string
25    ///
26    /// # Example
27    ///
28    /// ```rust
29    /// use atlas_common::hash::HashAlgorithm;
30    ///
31    /// assert_eq!(HashAlgorithm::Sha256.as_str(), "sha256");
32    /// ```
33    pub fn as_str(&self) -> &'static str {
34        match self {
35            HashAlgorithm::Sha256 => "sha256",
36            HashAlgorithm::Sha384 => "sha384",
37            HashAlgorithm::Sha512 => "sha512",
38        }
39    }
40
41    /// Get the output size in bytes
42    ///
43    /// # Example
44    ///
45    /// ```rust
46    /// use atlas_common::hash::HashAlgorithm;
47    ///
48    /// assert_eq!(HashAlgorithm::Sha256.output_size(), 32);
49    /// assert_eq!(HashAlgorithm::Sha512.output_size(), 64);
50    /// ```
51    pub fn output_size(&self) -> usize {
52        match self {
53            HashAlgorithm::Sha256 => 32,
54            HashAlgorithm::Sha384 => 48,
55            HashAlgorithm::Sha512 => 64,
56        }
57    }
58
59    /// Get the output size in hex characters
60    ///
61    /// # Example
62    ///
63    /// ```rust
64    /// use atlas_common::hash::HashAlgorithm;
65    ///
66    /// assert_eq!(HashAlgorithm::Sha256.hex_length(), 64);
67    /// ```
68    pub fn hex_length(&self) -> usize {
69        self.output_size() * 2
70    }
71
72    /// Check if a hash string matches this algorithm's expected format
73    ///
74    /// # Example
75    ///
76    /// ```rust
77    /// use atlas_common::hash::HashAlgorithm;
78    ///
79    /// let hash = "a".repeat(64);
80    /// assert!(HashAlgorithm::Sha256.validate_hash(&hash));
81    /// assert!(!HashAlgorithm::Sha384.validate_hash(&hash));
82    /// ```
83    pub fn validate_hash(&self, hash: &str) -> bool {
84        hash.len() == self.hex_length() && hash.chars().all(|c| c.is_ascii_hexdigit())
85    }
86}
87
88impl Default for HashAlgorithm {
89    fn default() -> Self {
90        HashAlgorithm::Sha384
91    }
92}
93
94impl fmt::Display for HashAlgorithm {
95    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
96        write!(f, "{}", self.as_str())
97    }
98}
99
100impl FromStr for HashAlgorithm {
101    type Err = Error;
102    /// Parse algorithm from string
103    ///
104    /// Accepts: "sha256", "sha-256", "sha384", "sha-384", "sha512", "sha-512"
105    /// (case-insensitive)
106    fn from_str(s: &str) -> Result<Self> {
107        match s.to_lowercase().as_str() {
108            "sha256" | "sha-256" => Ok(HashAlgorithm::Sha256),
109            "sha384" | "sha-384" => Ok(HashAlgorithm::Sha384),
110            "sha512" | "sha-512" => Ok(HashAlgorithm::Sha512),
111            _ => Err(Error::InvalidFormat(format!(
112                "Unknown hash algorithm: {}. Supported: sha256, sha384, sha512",
113                s
114            ))),
115        }
116    }
117}
118
119/// Hardware capabilities detected at runtime
120#[derive(Debug, Clone, Copy)]
121pub struct HardwareCapabilities {
122    /// Intel SHA-NI extensions available
123    pub sha_extensions: bool,
124    /// AVX-512 available (Intel Xeon optimization)
125    pub avx512: bool,
126    /// ARM crypto extensions (Apple Silicon/ARM64)
127    pub arm_crypto: bool,
128    /// Number of CPU cores
129    pub cpu_cores: usize,
130}
131
132impl HardwareCapabilities {
133    /// Detect available hardware capabilities
134    pub fn detect() -> Self {
135        let cpu_cores = num_cpus::get();
136
137        #[cfg(target_arch = "x86_64")]
138        {
139            Self {
140                sha_extensions: is_x86_feature_detected!("sha"),
141                avx512: is_x86_feature_detected!("avx512f"),
142                arm_crypto: false,
143                cpu_cores,
144            }
145        }
146
147        #[cfg(target_arch = "aarch64")]
148        {
149            Self {
150                sha_extensions: false,
151                avx512: false,
152                arm_crypto: Self::detect_arm_crypto(),
153                cpu_cores,
154            }
155        }
156
157        #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
158        {
159            Self {
160                sha_extensions: false,
161                avx512: false,
162                arm_crypto: false,
163                cpu_cores,
164            }
165        }
166    }
167
168    #[cfg(target_arch = "aarch64")]
169    fn detect_arm_crypto() -> bool {
170        std::arch::is_aarch64_feature_detected!("aes")
171            && std::arch::is_aarch64_feature_detected!("sha2")
172    }
173
174    /// Get optimal chunk size for parallel processing
175    pub fn optimal_chunk_size(&self) -> usize {
176        match self.cpu_cores {
177            1..=4 => 16 * 1024 * 1024,  // 16MB
178            5..=8 => 32 * 1024 * 1024,  // 32MB
179            9..=16 => 64 * 1024 * 1024, // 64MB
180            _ => 128 * 1024 * 1024,     // 128MB for high-core Xeons
181        }
182    }
183}
184
185/// Strategy for hash optimization
186#[derive(Debug, Clone, Copy)]
187enum HashOptimization {
188    /// Intel SHA-NI (best for single-threaded)
189    IntelShaExtensions,
190    /// Intel Xeon parallel with AVX-512
191    XeonParallel,
192    /// Apple Silicon ARM crypto
193    AppleSiliconCrypto,
194    /// Generic multi-core parallel
195    MultiCore,
196    /// Standard software
197    Software,
198}
199
200impl HashOptimization {
201    fn select(capabilities: &HardwareCapabilities, data_size: usize) -> Self {
202        let parallel_threshold = capabilities.optimal_chunk_size();
203
204        #[cfg(target_arch = "x86_64")]
205        {
206            if capabilities.sha_extensions && data_size < parallel_threshold {
207                return Self::IntelShaExtensions;
208            }
209            if capabilities.avx512 && data_size >= parallel_threshold && capabilities.cpu_cores >= 4
210            {
211                return Self::XeonParallel;
212            }
213            if capabilities.sha_extensions {
214                return Self::IntelShaExtensions;
215            }
216        }
217
218        #[cfg(target_arch = "aarch64")]
219        {
220            if capabilities.arm_crypto {
221                return Self::AppleSiliconCrypto;
222            }
223        }
224
225        if data_size >= parallel_threshold && capabilities.cpu_cores >= 3 {
226            return Self::MultiCore;
227        }
228
229        Self::Software
230    }
231}
232
233/// Builder for incremental hashing
234///
235/// Useful when hashing data that arrives in chunks or from multiple sources.
236///
237/// # Example
238///
239/// ```rust
240/// use atlas_common::hash::{HashBuilder, HashAlgorithm};
241///
242/// let mut builder = HashBuilder::new(HashAlgorithm::Sha256);
243/// builder.update(b"part1");
244/// builder.update(b"part2");
245/// let hash = builder.finalize();
246/// ```
247pub struct HashBuilder {
248    algorithm: HashAlgorithm,
249    hasher: HashBuilderInner,
250}
251
252enum HashBuilderInner {
253    Sha256(sha2::Sha256),
254    Sha384(sha2::Sha384),
255    Sha512(sha2::Sha512),
256}
257
258impl HashBuilder {
259    /// Create a new hash builder with the specified algorithm
260    pub fn new(algorithm: HashAlgorithm) -> Self {
261        use sha2::Digest;
262
263        let hasher = match algorithm {
264            HashAlgorithm::Sha256 => HashBuilderInner::Sha256(sha2::Sha256::new()),
265            HashAlgorithm::Sha384 => HashBuilderInner::Sha384(sha2::Sha384::new()),
266            HashAlgorithm::Sha512 => HashBuilderInner::Sha512(sha2::Sha512::new()),
267        };
268
269        Self { algorithm, hasher }
270    }
271
272    /// Update the hash with more data
273    ///
274    /// Can be called multiple times to add data incrementally.
275    pub fn update(&mut self, data: &[u8]) {
276        use sha2::Digest;
277
278        match &mut self.hasher {
279            HashBuilderInner::Sha256(h) => h.update(data),
280            HashBuilderInner::Sha384(h) => h.update(data),
281            HashBuilderInner::Sha512(h) => h.update(data),
282        }
283    }
284
285    /// Finalize and get the hash as a hex string
286    ///
287    /// Consumes the builder.
288    pub fn finalize(self) -> String {
289        use sha2::Digest;
290
291        match self.hasher {
292            HashBuilderInner::Sha256(h) => hex::encode(h.finalize()),
293            HashBuilderInner::Sha384(h) => hex::encode(h.finalize()),
294            HashBuilderInner::Sha512(h) => hex::encode(h.finalize()),
295        }
296    }
297
298    /// Get the algorithm being used
299    pub fn algorithm(&self) -> HashAlgorithm {
300        self.algorithm
301    }
302}
303
304/// Trait for types that can be hashed
305///
306/// Implemented for `[u8]`, `str`, and `String`.
307///
308/// # Example
309///
310/// ```rust
311/// use atlas_common::hash::{Hasher, HashAlgorithm};
312///
313/// let text = "Hello, World!";
314/// let hash = text.hash(HashAlgorithm::Sha256);
315///
316/// let bytes = b"raw bytes";
317/// let hash2 = bytes.hash(HashAlgorithm::Sha512);
318///
319/// let large_data = vec![0u8; 100_000_000]; // 100MB
320/// let optimized_hash = large_data.hash_optimized(HashAlgorithm::Sha384);
321/// ```
322pub trait Hasher {
323    fn hash(&self, algorithm: HashAlgorithm) -> String;
324    fn hash_default(&self) -> String {
325        self.hash(HashAlgorithm::default())
326    }
327
328    /// Hash with hardware optimization
329    ///
330    /// Uses Intel Xeon parallel processing, Apple Silicon crypto, or multi-core
331    /// optimization based on available hardware and data size.
332    fn hash_optimized(&self, algorithm: HashAlgorithm) -> String;
333}
334
335impl Hasher for [u8] {
336    fn hash(&self, algorithm: HashAlgorithm) -> String {
337        calculate_hash_with_algorithm(self, &algorithm)
338    }
339
340    fn hash_optimized(&self, algorithm: HashAlgorithm) -> String {
341        calculate_hash_optimized(self, algorithm)
342    }
343}
344
345impl Hasher for str {
346    fn hash(&self, algorithm: HashAlgorithm) -> String {
347        self.as_bytes().hash(algorithm)
348    }
349
350    fn hash_optimized(&self, algorithm: HashAlgorithm) -> String {
351        self.as_bytes().hash_optimized(algorithm)
352    }
353}
354
355impl Hasher for String {
356    fn hash(&self, algorithm: HashAlgorithm) -> String {
357        self.as_bytes().hash(algorithm)
358    }
359
360    fn hash_optimized(&self, algorithm: HashAlgorithm) -> String {
361        self.as_bytes().hash_optimized(algorithm)
362    }
363}
364
365/// Calculate hash with hardware optimization
366///
367/// Automatically selects the best optimization strategy:
368/// - Intel Xeon: Uses SHA-NI extensions + AVX-512 parallel processing
369/// - Apple Silicon: Uses ARM crypto extensions  
370/// - Other: Uses multi-core parallel processing when beneficial (Risc?)
371pub fn calculate_hash_optimized(data: &[u8], algorithm: HashAlgorithm) -> String {
372    let capabilities = HardwareCapabilities::detect();
373    let strategy = HashOptimization::select(&capabilities, data.len());
374
375    match strategy {
376        HashOptimization::IntelShaExtensions => calculate_intel_sha_ni(data, algorithm),
377        HashOptimization::XeonParallel => calculate_xeon_parallel(data, algorithm, &capabilities),
378        HashOptimization::AppleSiliconCrypto => calculate_apple_silicon(data, algorithm),
379        HashOptimization::MultiCore => calculate_multicore_parallel(data, algorithm, &capabilities),
380        HashOptimization::Software => calculate_hash_with_algorithm(data, &algorithm),
381    }
382}
383
384#[cfg(target_arch = "x86_64")]
385fn calculate_intel_sha_ni(data: &[u8], algorithm: HashAlgorithm) -> String {
386    calculate_hash_with_algorithm(data, &algorithm)
387}
388
389#[cfg(target_arch = "x86_64")]
390fn calculate_xeon_parallel(
391    data: &[u8],
392    algorithm: HashAlgorithm,
393    capabilities: &HardwareCapabilities,
394) -> String {
395    let chunk_size = capabilities.optimal_chunk_size();
396
397    if data.len() <= chunk_size {
398        return calculate_hash_with_algorithm(data, &algorithm);
399    }
400
401    use rayon::prelude::*;
402
403    let chunk_hashes: Vec<String> = data
404        .par_chunks(chunk_size)
405        .map(|chunk| calculate_hash_with_algorithm(chunk, &algorithm))
406        .collect();
407
408    let combined = chunk_hashes.join("");
409    calculate_hash_with_algorithm(combined.as_bytes(), &algorithm)
410}
411
412#[cfg(target_arch = "aarch64")]
413fn calculate_apple_silicon(data: &[u8], algorithm: HashAlgorithm) -> String {
414    calculate_hash_with_algorithm(data, &algorithm)
415}
416
417fn calculate_multicore_parallel(
418    data: &[u8],
419    algorithm: HashAlgorithm,
420    capabilities: &HardwareCapabilities,
421) -> String {
422    let chunk_size = capabilities.optimal_chunk_size();
423
424    if data.len() <= chunk_size || capabilities.cpu_cores < 3 {
425        return calculate_hash_with_algorithm(data, &algorithm);
426    }
427
428    use rayon::prelude::*;
429
430    let chunk_hashes: Vec<String> = data
431        .par_chunks(chunk_size)
432        .map(|chunk| calculate_hash_with_algorithm(chunk, &algorithm))
433        .collect();
434
435    let combined = chunk_hashes.join("");
436    calculate_hash_with_algorithm(combined.as_bytes(), &algorithm)
437}
438
439#[cfg(not(target_arch = "x86_64"))]
440fn calculate_intel_sha_ni(data: &[u8], algorithm: HashAlgorithm) -> String {
441    calculate_hash_with_algorithm(data, &algorithm)
442}
443
444#[cfg(not(target_arch = "x86_64"))]
445fn calculate_xeon_parallel(
446    data: &[u8],
447    algorithm: HashAlgorithm,
448    capabilities: &HardwareCapabilities,
449) -> String {
450    calculate_multicore_parallel(data, algorithm, capabilities)
451}
452
453#[cfg(not(target_arch = "aarch64"))]
454fn calculate_apple_silicon(data: &[u8], algorithm: HashAlgorithm) -> String {
455    calculate_hash_with_algorithm(data, &algorithm)
456}
457
458/// Batch hasher for processing multiple inputs efficiently
459pub struct BatchHasher {
460    capabilities: HardwareCapabilities,
461}
462
463impl BatchHasher {
464    pub fn new() -> Self {
465        Self {
466            capabilities: HardwareCapabilities::detect(),
467        }
468    }
469
470    /// Hash multiple inputs in parallel
471    pub fn hash_batch(&self, inputs: &[&[u8]], algorithm: HashAlgorithm) -> Vec<String> {
472        if inputs.len() < 4 || self.capabilities.cpu_cores < 3 {
473            return inputs
474                .iter()
475                .map(|data| calculate_hash_optimized(data, algorithm))
476                .collect();
477        }
478
479        use rayon::prelude::*;
480        inputs
481            .par_iter()
482            .map(|data| calculate_hash_optimized(data, algorithm))
483            .collect()
484    }
485}
486
487impl Default for BatchHasher {
488    fn default() -> Self {
489        Self::new()
490    }
491}
492
493/// Get hardware capabilities information
494pub fn get_hardware_capabilities() -> HardwareCapabilities {
495    HardwareCapabilities::detect()
496}
497
498#[cfg(test)]
499mod tests {
500    use super::*;
501
502    #[test]
503    fn test_algorithm_properties() {
504        assert_eq!(HashAlgorithm::Sha256.output_size(), 32);
505        assert_eq!(HashAlgorithm::Sha256.hex_length(), 64);
506
507        assert_eq!(HashAlgorithm::Sha384.output_size(), 48);
508        assert_eq!(HashAlgorithm::Sha384.hex_length(), 96);
509
510        assert_eq!(HashAlgorithm::Sha512.output_size(), 64);
511        assert_eq!(HashAlgorithm::Sha512.hex_length(), 128);
512    }
513
514    #[test]
515    fn test_algorithm_parsing() {
516        assert_eq!(
517            HashAlgorithm::from_str("sha256").unwrap(),
518            HashAlgorithm::Sha256
519        );
520        assert_eq!(
521            HashAlgorithm::from_str("SHA384").unwrap(),
522            HashAlgorithm::Sha384
523        );
524        assert_eq!(
525            HashAlgorithm::from_str("sha-512").unwrap(),
526            HashAlgorithm::Sha512
527        );
528
529        assert!(HashAlgorithm::from_str("md5").is_err());
530    }
531
532    #[test]
533    fn test_hash_builder() {
534        let mut builder = HashBuilder::new(HashAlgorithm::Sha256);
535        builder.update(b"hello ");
536        builder.update(b"world");
537        let hash = builder.finalize();
538
539        assert_eq!(hash.len(), 64);
540
541        let direct_hash = "hello world".hash(HashAlgorithm::Sha256);
542        assert_eq!(hash, direct_hash);
543    }
544
545    #[test]
546    fn test_hasher_trait() {
547        let data = "test data";
548        let hash1 = data.hash(HashAlgorithm::Sha256);
549        let hash2 = data.as_bytes().hash(HashAlgorithm::Sha256);
550        let hash3 = data.to_string().hash(HashAlgorithm::Sha256);
551
552        assert_eq!(hash1, hash2);
553        assert_eq!(hash2, hash3);
554    }
555
556    #[test]
557    fn test_optimized_hashing() {
558        let data = "test data for optimization";
559        let normal_hash = data.hash(HashAlgorithm::Sha384);
560        let optimized_hash = data.hash_optimized(HashAlgorithm::Sha384);
561
562        assert_eq!(normal_hash, optimized_hash);
563    }
564
565    #[test]
566    fn test_hardware_capabilities() {
567        let caps = get_hardware_capabilities();
568        assert!(caps.cpu_cores > 0);
569        assert!(caps.optimal_chunk_size() >= 16 * 1024 * 1024);
570    }
571
572    #[test]
573    fn test_batch_hasher() {
574        let batch_hasher = BatchHasher::new();
575        let inputs = vec![
576            b"input 1".as_slice(),
577            b"input 2".as_slice(),
578            b"input 3".as_slice(),
579        ];
580
581        let batch_results = batch_hasher.hash_batch(&inputs, HashAlgorithm::Sha256);
582
583        for (input, result) in inputs.iter().zip(batch_results.iter()) {
584            let expected = input.hash(HashAlgorithm::Sha256);
585            assert_eq!(*result, expected);
586        }
587    }
588
589    #[test]
590    fn test_large_data_optimization() {
591        let small_data = vec![0u8; 1024];
592        let large_data = vec![0u8; 50 * 1024 * 1024]; // 50MB
593
594        let small_hash = calculate_hash_optimized(&small_data, HashAlgorithm::Sha384);
595        let large_hash = calculate_hash_optimized(&large_data, HashAlgorithm::Sha384);
596
597        assert_eq!(small_hash.len(), 96);
598        assert_eq!(large_hash.len(), 96);
599
600        let small_standard = calculate_hash_with_algorithm(&small_data, &HashAlgorithm::Sha384);
601        assert_eq!(small_hash, small_standard);
602    }
603}