base_d/features/
hashing.rs

1use blake2::{Blake2b512, Blake2s256};
2use blake3::Hasher as Blake3Hasher;
3use md5::Md5;
4use sha2::{Digest, Sha224, Sha256, Sha384, Sha512};
5use sha3::{Keccak224, Keccak256, Keccak384, Keccak512, Sha3_224, Sha3_256, Sha3_384, Sha3_512};
6use std::hash::Hasher;
7use twox_hash::xxhash3_64::Hasher as Xxh3Hash64;
8use twox_hash::xxhash3_128::Hasher as Xxh3Hash128;
9use twox_hash::{XxHash32, XxHash64};
10
11/// Configuration for xxHash algorithms.
12#[derive(Debug, Clone, Default)]
13pub struct XxHashConfig {
14    /// Seed value (0-u64::MAX)
15    pub seed: u64,
16    /// Secret for XXH3 variants (must be >= 136 bytes)
17    pub secret: Option<Vec<u8>>,
18}
19
20impl XxHashConfig {
21    /// Create config with a custom seed.
22    pub fn with_seed(seed: u64) -> Self {
23        Self { seed, secret: None }
24    }
25
26    /// Create config with seed and secret for XXH3 variants.
27    /// Secret must be at least 136 bytes.
28    pub fn with_secret(seed: u64, secret: Vec<u8>) -> Result<Self, String> {
29        if secret.len() < 136 {
30            return Err(format!(
31                "XXH3 secret must be >= 136 bytes, got {}",
32                secret.len()
33            ));
34        }
35        Ok(Self {
36            seed,
37            secret: Some(secret),
38        })
39    }
40}
41
42/// Supported hash algorithms.
43#[derive(Debug, Clone, Copy, PartialEq, Eq)]
44pub enum HashAlgorithm {
45    Md5,
46    Sha224,
47    Sha256,
48    Sha384,
49    Sha512,
50    Sha3_224,
51    Sha3_256,
52    Sha3_384,
53    Sha3_512,
54    Keccak224,
55    Keccak256,
56    Keccak384,
57    Keccak512,
58    Blake2b,
59    Blake2s,
60    Blake3,
61    // CRC variants
62    Crc32,
63    Crc32c,
64    Crc16,
65    Crc64,
66    // xxHash variants
67    XxHash32,
68    XxHash64,
69    XxHash3_64,
70    XxHash3_128,
71}
72
73impl HashAlgorithm {
74    /// Parse hash algorithm from string.
75    #[allow(clippy::should_implement_trait)]
76    pub fn from_str(s: &str) -> Result<Self, String> {
77        match s.to_lowercase().as_str() {
78            "md5" => Ok(HashAlgorithm::Md5),
79            "sha224" | "sha-224" => Ok(HashAlgorithm::Sha224),
80            "sha256" | "sha-256" => Ok(HashAlgorithm::Sha256),
81            "sha384" | "sha-384" => Ok(HashAlgorithm::Sha384),
82            "sha512" | "sha-512" => Ok(HashAlgorithm::Sha512),
83            "sha3-224" | "sha3_224" => Ok(HashAlgorithm::Sha3_224),
84            "sha3-256" | "sha3_256" => Ok(HashAlgorithm::Sha3_256),
85            "sha3-384" | "sha3_384" => Ok(HashAlgorithm::Sha3_384),
86            "sha3-512" | "sha3_512" => Ok(HashAlgorithm::Sha3_512),
87            "keccak224" | "keccak-224" => Ok(HashAlgorithm::Keccak224),
88            "keccak256" | "keccak-256" => Ok(HashAlgorithm::Keccak256),
89            "keccak384" | "keccak-384" => Ok(HashAlgorithm::Keccak384),
90            "keccak512" | "keccak-512" => Ok(HashAlgorithm::Keccak512),
91            "blake2b" | "blake2b-512" => Ok(HashAlgorithm::Blake2b),
92            "blake2s" | "blake2s-256" => Ok(HashAlgorithm::Blake2s),
93            "blake3" => Ok(HashAlgorithm::Blake3),
94            "crc32" => Ok(HashAlgorithm::Crc32),
95            "crc32c" => Ok(HashAlgorithm::Crc32c),
96            "crc16" => Ok(HashAlgorithm::Crc16),
97            "crc64" => Ok(HashAlgorithm::Crc64),
98            "xxhash32" | "xxh32" => Ok(HashAlgorithm::XxHash32),
99            "xxhash64" | "xxh64" => Ok(HashAlgorithm::XxHash64),
100            "xxhash3" | "xxh3" | "xxhash3-64" | "xxh3-64" => Ok(HashAlgorithm::XxHash3_64),
101            "xxhash3-128" | "xxh3-128" => Ok(HashAlgorithm::XxHash3_128),
102            _ => Err(format!("Unknown hash algorithm: {}", s)),
103        }
104    }
105
106    pub fn as_str(&self) -> &str {
107        match self {
108            HashAlgorithm::Md5 => "md5",
109            HashAlgorithm::Sha224 => "sha224",
110            HashAlgorithm::Sha256 => "sha256",
111            HashAlgorithm::Sha384 => "sha384",
112            HashAlgorithm::Sha512 => "sha512",
113            HashAlgorithm::Sha3_224 => "sha3-224",
114            HashAlgorithm::Sha3_256 => "sha3-256",
115            HashAlgorithm::Sha3_384 => "sha3-384",
116            HashAlgorithm::Sha3_512 => "sha3-512",
117            HashAlgorithm::Keccak224 => "keccak224",
118            HashAlgorithm::Keccak256 => "keccak256",
119            HashAlgorithm::Keccak384 => "keccak384",
120            HashAlgorithm::Keccak512 => "keccak512",
121            HashAlgorithm::Blake2b => "blake2b",
122            HashAlgorithm::Blake2s => "blake2s",
123            HashAlgorithm::Blake3 => "blake3",
124            HashAlgorithm::Crc32 => "crc32",
125            HashAlgorithm::Crc32c => "crc32c",
126            HashAlgorithm::Crc16 => "crc16",
127            HashAlgorithm::Crc64 => "crc64",
128            HashAlgorithm::XxHash32 => "xxhash32",
129            HashAlgorithm::XxHash64 => "xxhash64",
130            HashAlgorithm::XxHash3_64 => "xxhash3-64",
131            HashAlgorithm::XxHash3_128 => "xxhash3-128",
132        }
133    }
134
135    /// Get the output size in bytes for this algorithm.
136    pub fn output_size(&self) -> usize {
137        match self {
138            HashAlgorithm::Md5 => 16,
139            HashAlgorithm::Sha224 => 28,
140            HashAlgorithm::Sha256 => 32,
141            HashAlgorithm::Sha384 => 48,
142            HashAlgorithm::Sha512 => 64,
143            HashAlgorithm::Sha3_224 => 28,
144            HashAlgorithm::Sha3_256 => 32,
145            HashAlgorithm::Sha3_384 => 48,
146            HashAlgorithm::Sha3_512 => 64,
147            HashAlgorithm::Keccak224 => 28,
148            HashAlgorithm::Keccak256 => 32,
149            HashAlgorithm::Keccak384 => 48,
150            HashAlgorithm::Keccak512 => 64,
151            HashAlgorithm::Blake2b => 64,
152            HashAlgorithm::Blake2s => 32,
153            HashAlgorithm::Blake3 => 32,
154            HashAlgorithm::Crc16 => 2,
155            HashAlgorithm::Crc32 => 4,
156            HashAlgorithm::Crc32c => 4,
157            HashAlgorithm::Crc64 => 8,
158            HashAlgorithm::XxHash32 => 4,
159            HashAlgorithm::XxHash64 => 8,
160            HashAlgorithm::XxHash3_64 => 8,
161            HashAlgorithm::XxHash3_128 => 16,
162        }
163    }
164}
165
166/// Compute hash of data using the specified algorithm.
167/// Uses default configuration (seed = 0, no secret).
168pub fn hash(data: &[u8], algorithm: HashAlgorithm) -> Vec<u8> {
169    hash_with_config(data, algorithm, &XxHashConfig::default())
170}
171
172/// Compute hash of data using the specified algorithm with custom configuration.
173pub fn hash_with_config(data: &[u8], algorithm: HashAlgorithm, config: &XxHashConfig) -> Vec<u8> {
174    match algorithm {
175        HashAlgorithm::Md5 => {
176            let mut hasher = Md5::new();
177            hasher.update(data);
178            hasher.finalize().to_vec()
179        }
180        HashAlgorithm::Sha224 => {
181            let mut hasher = Sha224::new();
182            hasher.update(data);
183            hasher.finalize().to_vec()
184        }
185        HashAlgorithm::Sha256 => {
186            let mut hasher = Sha256::new();
187            hasher.update(data);
188            hasher.finalize().to_vec()
189        }
190        HashAlgorithm::Sha384 => {
191            let mut hasher = Sha384::new();
192            hasher.update(data);
193            hasher.finalize().to_vec()
194        }
195        HashAlgorithm::Sha512 => {
196            let mut hasher = Sha512::new();
197            hasher.update(data);
198            hasher.finalize().to_vec()
199        }
200        HashAlgorithm::Sha3_224 => {
201            let mut hasher = Sha3_224::new();
202            hasher.update(data);
203            hasher.finalize().to_vec()
204        }
205        HashAlgorithm::Sha3_256 => {
206            let mut hasher = Sha3_256::new();
207            hasher.update(data);
208            hasher.finalize().to_vec()
209        }
210        HashAlgorithm::Sha3_384 => {
211            let mut hasher = Sha3_384::new();
212            hasher.update(data);
213            hasher.finalize().to_vec()
214        }
215        HashAlgorithm::Sha3_512 => {
216            let mut hasher = Sha3_512::new();
217            hasher.update(data);
218            hasher.finalize().to_vec()
219        }
220        HashAlgorithm::Keccak224 => {
221            let mut hasher = Keccak224::new();
222            hasher.update(data);
223            hasher.finalize().to_vec()
224        }
225        HashAlgorithm::Keccak256 => {
226            let mut hasher = Keccak256::new();
227            hasher.update(data);
228            hasher.finalize().to_vec()
229        }
230        HashAlgorithm::Keccak384 => {
231            let mut hasher = Keccak384::new();
232            hasher.update(data);
233            hasher.finalize().to_vec()
234        }
235        HashAlgorithm::Keccak512 => {
236            let mut hasher = Keccak512::new();
237            hasher.update(data);
238            hasher.finalize().to_vec()
239        }
240        HashAlgorithm::Blake2b => {
241            let mut hasher = Blake2b512::new();
242            hasher.update(data);
243            hasher.finalize().to_vec()
244        }
245        HashAlgorithm::Blake2s => {
246            let mut hasher = Blake2s256::new();
247            hasher.update(data);
248            hasher.finalize().to_vec()
249        }
250        HashAlgorithm::Blake3 => {
251            let mut hasher = Blake3Hasher::new();
252            hasher.update(data);
253            hasher.finalize().as_bytes().to_vec()
254        }
255        HashAlgorithm::Crc16 => {
256            let crc = crc::Crc::<u16>::new(&crc::CRC_16_IBM_SDLC);
257            let result = crc.checksum(data);
258            result.to_be_bytes().to_vec()
259        }
260        HashAlgorithm::Crc32 => {
261            let crc = crc::Crc::<u32>::new(&crc::CRC_32_ISO_HDLC);
262            let result = crc.checksum(data);
263            result.to_be_bytes().to_vec()
264        }
265        HashAlgorithm::Crc32c => {
266            let crc = crc::Crc::<u32>::new(&crc::CRC_32_ISCSI);
267            let result = crc.checksum(data);
268            result.to_be_bytes().to_vec()
269        }
270        HashAlgorithm::Crc64 => {
271            let crc = crc::Crc::<u64>::new(&crc::CRC_64_ECMA_182);
272            let result = crc.checksum(data);
273            result.to_be_bytes().to_vec()
274        }
275        HashAlgorithm::XxHash32 => {
276            let mut hasher = XxHash32::with_seed(config.seed as u32);
277            hasher.write(data);
278            (hasher.finish() as u32).to_be_bytes().to_vec()
279        }
280        HashAlgorithm::XxHash64 => {
281            let mut hasher = XxHash64::with_seed(config.seed);
282            hasher.write(data);
283            hasher.finish().to_be_bytes().to_vec()
284        }
285        HashAlgorithm::XxHash3_64 => {
286            let mut hasher = if let Some(ref secret) = config.secret {
287                Xxh3Hash64::with_seed_and_secret(config.seed, secret.as_slice()).expect(
288                    "XXH3 secret validation should have been done in XxHashConfig::with_secret",
289                )
290            } else {
291                Xxh3Hash64::with_seed(config.seed)
292            };
293            hasher.write(data);
294            hasher.finish().to_be_bytes().to_vec()
295        }
296        HashAlgorithm::XxHash3_128 => {
297            let mut hasher = if let Some(ref secret) = config.secret {
298                Xxh3Hash128::with_seed_and_secret(config.seed, secret.as_slice()).expect(
299                    "XXH3 secret validation should have been done in XxHashConfig::with_secret",
300                )
301            } else {
302                Xxh3Hash128::with_seed(config.seed)
303            };
304            hasher.write(data);
305            hasher.finish_128().to_be_bytes().to_vec()
306        }
307    }
308}
309
310#[cfg(test)]
311#[allow(deprecated)]
312mod tests {
313    use super::*;
314
315    #[test]
316    fn test_md5() {
317        let data = b"hello world";
318        let hash = hash(data, HashAlgorithm::Md5);
319        assert_eq!(hash.len(), 16);
320        // MD5 of "hello world" is 5eb63bbbe01eeed093cb22bb8f5acdc3
321        assert_eq!(hex::encode(&hash), "5eb63bbbe01eeed093cb22bb8f5acdc3");
322    }
323
324    #[test]
325    fn test_sha256() {
326        let data = b"hello world";
327        let hash = hash(data, HashAlgorithm::Sha256);
328        assert_eq!(hash.len(), 32);
329        // SHA-256 of "hello world"
330        assert_eq!(
331            hex::encode(&hash),
332            "b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9"
333        );
334    }
335
336    #[test]
337    fn test_sha512() {
338        let data = b"hello world";
339        let hash = hash(data, HashAlgorithm::Sha512);
340        assert_eq!(hash.len(), 64);
341    }
342
343    #[test]
344    fn test_sha3_256() {
345        let data = b"hello world";
346        let hash = hash(data, HashAlgorithm::Sha3_256);
347        assert_eq!(hash.len(), 32);
348    }
349
350    #[test]
351    fn test_blake2b() {
352        let data = b"hello world";
353        let hash = hash(data, HashAlgorithm::Blake2b);
354        assert_eq!(hash.len(), 64);
355    }
356
357    #[test]
358    fn test_blake2s() {
359        let data = b"hello world";
360        let hash = hash(data, HashAlgorithm::Blake2s);
361        assert_eq!(hash.len(), 32);
362    }
363
364    #[test]
365    fn test_blake3() {
366        let data = b"hello world";
367        let hash = hash(data, HashAlgorithm::Blake3);
368        assert_eq!(hash.len(), 32);
369    }
370
371    #[test]
372    fn test_empty_input() {
373        let data = b"";
374        let hash = hash(data, HashAlgorithm::Sha256);
375        assert_eq!(hash.len(), 32);
376        // SHA-256 of empty string
377        assert_eq!(
378            hex::encode(&hash),
379            "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
380        );
381    }
382
383    #[test]
384    fn test_output_sizes() {
385        assert_eq!(HashAlgorithm::Md5.output_size(), 16);
386        assert_eq!(HashAlgorithm::Sha256.output_size(), 32);
387        assert_eq!(HashAlgorithm::Sha512.output_size(), 64);
388        assert_eq!(HashAlgorithm::Blake3.output_size(), 32);
389        assert_eq!(HashAlgorithm::Crc16.output_size(), 2);
390        assert_eq!(HashAlgorithm::Crc32.output_size(), 4);
391        assert_eq!(HashAlgorithm::Crc64.output_size(), 8);
392        assert_eq!(HashAlgorithm::XxHash32.output_size(), 4);
393        assert_eq!(HashAlgorithm::XxHash64.output_size(), 8);
394        assert_eq!(HashAlgorithm::XxHash3_64.output_size(), 8);
395        assert_eq!(HashAlgorithm::XxHash3_128.output_size(), 16);
396    }
397
398    #[test]
399    fn test_crc32() {
400        let data = b"hello world";
401        let result = hash(data, HashAlgorithm::Crc32);
402        assert_eq!(result.len(), 4);
403        // CRC32 is deterministic
404        let result2 = hash(data, HashAlgorithm::Crc32);
405        assert_eq!(result, result2);
406    }
407
408    #[test]
409    fn test_crc32c() {
410        let data = b"hello world";
411        let result = hash(data, HashAlgorithm::Crc32c);
412        assert_eq!(result.len(), 4);
413    }
414
415    #[test]
416    fn test_crc16() {
417        let data = b"hello world";
418        let result = hash(data, HashAlgorithm::Crc16);
419        assert_eq!(result.len(), 2);
420    }
421
422    #[test]
423    fn test_crc64() {
424        let data = b"hello world";
425        let result = hash(data, HashAlgorithm::Crc64);
426        assert_eq!(result.len(), 8);
427    }
428
429    #[test]
430    fn test_xxhash32() {
431        let data = b"hello world";
432        let result = hash(data, HashAlgorithm::XxHash32);
433        assert_eq!(result.len(), 4);
434        // xxHash is deterministic with same seed
435        let result2 = hash(data, HashAlgorithm::XxHash32);
436        assert_eq!(result, result2);
437    }
438
439    #[test]
440    fn test_xxhash64() {
441        let data = b"hello world";
442        let result = hash(data, HashAlgorithm::XxHash64);
443        assert_eq!(result.len(), 8);
444    }
445
446    #[test]
447    fn test_xxhash3_64() {
448        let data = b"hello world";
449        let result = hash(data, HashAlgorithm::XxHash3_64);
450        assert_eq!(result.len(), 8);
451    }
452
453    #[test]
454    fn test_xxhash3_128() {
455        let data = b"hello world";
456        let result = hash(data, HashAlgorithm::XxHash3_128);
457        assert_eq!(result.len(), 16);
458    }
459
460    #[test]
461    fn test_xxhash_config_default() {
462        let config = XxHashConfig::default();
463        assert_eq!(config.seed, 0);
464        assert!(config.secret.is_none());
465    }
466
467    #[test]
468    fn test_xxhash_config_secret_too_short() {
469        let result = XxHashConfig::with_secret(0, vec![0u8; 100]);
470        assert!(result.is_err());
471        assert!(result.unwrap_err().contains("136 bytes"));
472    }
473
474    #[test]
475    fn test_xxhash_config_secret_valid() {
476        let result = XxHashConfig::with_secret(42, vec![0u8; 136]);
477        assert!(result.is_ok());
478        let config = result.unwrap();
479        assert_eq!(config.seed, 42);
480        assert_eq!(config.secret.as_ref().unwrap().len(), 136);
481    }
482
483    #[test]
484    fn test_hash_seed_changes_output() {
485        let data = b"test";
486        let h1 = hash_with_config(data, HashAlgorithm::XxHash64, &XxHashConfig::with_seed(0));
487        let h2 = hash_with_config(data, HashAlgorithm::XxHash64, &XxHashConfig::with_seed(42));
488        assert_ne!(h1, h2);
489    }
490
491    #[test]
492    fn test_backward_compatibility() {
493        let data = b"test";
494        let old = hash(data, HashAlgorithm::XxHash64);
495        let new = hash_with_config(data, HashAlgorithm::XxHash64, &XxHashConfig::default());
496        assert_eq!(old, new);
497    }
498
499    #[test]
500    fn test_xxhash3_with_seed() {
501        let data = b"test data for secret hashing";
502
503        // Test that different seeds produce different hashes
504        let h1 = hash_with_config(data, HashAlgorithm::XxHash3_64, &XxHashConfig::with_seed(0));
505        let h2 = hash_with_config(
506            data,
507            HashAlgorithm::XxHash3_64,
508            &XxHashConfig::with_seed(123),
509        );
510        assert_ne!(h1, h2, "Different seeds should produce different hashes");
511
512        // Test that same seed produces same hash
513        let h3 = hash_with_config(
514            data,
515            HashAlgorithm::XxHash3_64,
516            &XxHashConfig::with_seed(123),
517        );
518        assert_eq!(h2, h3, "Same seed should produce same hash");
519    }
520
521    #[test]
522    fn test_xxhash32_with_seed() {
523        let data = b"test";
524        let h1 = hash_with_config(data, HashAlgorithm::XxHash32, &XxHashConfig::with_seed(0));
525        let h2 = hash_with_config(data, HashAlgorithm::XxHash32, &XxHashConfig::with_seed(999));
526        assert_ne!(h1, h2);
527    }
528}