1use std::{fmt, str::FromStr};
14
15use base64::{Engine, engine::general_purpose::STANDARD as BASE64_STANDARD};
16use digest::Digest;
17
18#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
25pub enum ChecksumAlgorithm {
26 Crc32,
28 Crc32c,
30 Crc64Nvme,
32 Sha1,
34 Sha256,
36}
37
38impl ChecksumAlgorithm {
39 #[must_use]
41 pub fn as_str(&self) -> &'static str {
42 match self {
43 Self::Crc32 => "CRC32",
44 Self::Crc32c => "CRC32C",
45 Self::Crc64Nvme => "CRC64NVME",
46 Self::Sha1 => "SHA1",
47 Self::Sha256 => "SHA256",
48 }
49 }
50}
51
52impl fmt::Display for ChecksumAlgorithm {
53 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
54 f.write_str(self.as_str())
55 }
56}
57
58#[derive(Debug, Clone, thiserror::Error)]
60#[error("unknown checksum algorithm: {0}")]
61pub struct ParseChecksumAlgorithmError(String);
62
63impl FromStr for ChecksumAlgorithm {
64 type Err = ParseChecksumAlgorithmError;
65
66 fn from_str(s: &str) -> Result<Self, Self::Err> {
67 match s.to_ascii_uppercase().as_str() {
68 "CRC32" => Ok(Self::Crc32),
69 "CRC32C" => Ok(Self::Crc32c),
70 "CRC64NVME" => Ok(Self::Crc64Nvme),
71 "SHA1" => Ok(Self::Sha1),
72 "SHA256" => Ok(Self::Sha256),
73 _ => Err(ParseChecksumAlgorithmError(s.to_owned())),
74 }
75 }
76}
77
78#[derive(Debug, Clone, PartialEq, Eq)]
84pub struct ChecksumValue {
85 pub algorithm: ChecksumAlgorithm,
87 pub value: String,
89}
90
91#[must_use]
109pub fn compute_md5(data: &[u8]) -> String {
110 let hash = md5::Md5::digest(data);
111 hex::encode(hash)
112}
113
114#[must_use]
129pub fn compute_etag(data: &[u8]) -> String {
130 let md5_hex = compute_md5(data);
131 format!("\"{md5_hex}\"")
132}
133
134#[must_use]
151pub fn compute_multipart_etag(part_md5_hexes: &[impl AsRef<str>], part_count: usize) -> String {
152 let mut combined = Vec::with_capacity(part_md5_hexes.len() * 16);
153 for hex_str in part_md5_hexes {
154 let hex_str = hex_str.as_ref().trim_matches('"');
155 if let Ok(bytes) = hex::decode(hex_str) {
156 combined.extend_from_slice(&bytes);
157 }
158 }
159 let final_md5 = hex::encode(md5::Md5::digest(&combined));
160 format!("\"{final_md5}-{part_count}\"")
161}
162
163#[must_use]
174pub fn compute_checksum(algorithm: ChecksumAlgorithm, data: &[u8]) -> String {
175 match algorithm {
176 ChecksumAlgorithm::Crc32 => {
177 let mut hasher = crc32fast::Hasher::new();
178 hasher.update(data);
179 let value = hasher.finalize();
180 BASE64_STANDARD.encode(value.to_be_bytes())
181 }
182 ChecksumAlgorithm::Crc32c => {
183 let value = crc32c::crc32c(data);
184 BASE64_STANDARD.encode(value.to_be_bytes())
185 }
186 ChecksumAlgorithm::Crc64Nvme => {
187 let mut hasher = crc64fast_nvme::Digest::new();
188 hasher.write(data);
189 let value = hasher.sum64();
190 BASE64_STANDARD.encode(value.to_be_bytes())
191 }
192 ChecksumAlgorithm::Sha1 => {
193 let hash = sha1::Sha1::digest(data);
194 BASE64_STANDARD.encode(hash)
195 }
196 ChecksumAlgorithm::Sha256 => {
197 let hash = sha2::Sha256::digest(data);
198 BASE64_STANDARD.encode(hash)
199 }
200 }
201}
202
203#[must_use]
222pub fn compute_composite_checksum(
223 algorithm: ChecksumAlgorithm,
224 part_checksums_b64: &[impl AsRef<str>],
225) -> String {
226 let mut combined = Vec::new();
227 for b64 in part_checksums_b64 {
228 if let Ok(bytes) = BASE64_STANDARD.decode(b64.as_ref()) {
229 combined.extend_from_slice(&bytes);
230 }
231 }
232 let checksum_b64 = compute_checksum(algorithm, &combined);
233 format!("{checksum_b64}-{}", part_checksums_b64.len())
234}
235
236#[derive(Debug, Clone)]
242pub struct HasherResult {
243 pub md5_hex: String,
245 pub checksums: Vec<ChecksumValue>,
248}
249
250pub struct StreamingHasher {
266 md5: md5::Md5,
267 sha1: Option<sha1::Sha1>,
268 sha256: Option<sha2::Sha256>,
269 crc32: Option<crc32fast::Hasher>,
270 crc32c: Option<u32>,
271 crc64nvme: Option<crc64fast_nvme::Digest>,
272 algorithms: Vec<ChecksumAlgorithm>,
273}
274
275impl fmt::Debug for StreamingHasher {
276 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
277 f.debug_struct("StreamingHasher")
278 .field("algorithms", &self.algorithms)
279 .finish_non_exhaustive()
280 }
281}
282
283impl StreamingHasher {
284 #[must_use]
289 pub fn new(algorithms: &[ChecksumAlgorithm]) -> Self {
290 let mut sha1 = None;
291 let mut sha256 = None;
292 let mut crc32 = None;
293 let mut crc32c = None;
294 let mut crc64nvme = None;
295
296 for &algo in algorithms {
297 match algo {
298 ChecksumAlgorithm::Sha1 => {
299 sha1 = Some(<sha1::Sha1 as Digest>::new());
300 }
301 ChecksumAlgorithm::Sha256 => {
302 sha256 = Some(<sha2::Sha256 as Digest>::new());
303 }
304 ChecksumAlgorithm::Crc32 => {
305 crc32 = Some(crc32fast::Hasher::new());
306 }
307 ChecksumAlgorithm::Crc32c => {
308 crc32c = Some(0);
309 }
310 ChecksumAlgorithm::Crc64Nvme => {
311 crc64nvme = Some(crc64fast_nvme::Digest::new());
312 }
313 }
314 }
315
316 Self {
317 md5: <md5::Md5 as Digest>::new(),
318 sha1,
319 sha256,
320 crc32,
321 crc32c,
322 crc64nvme,
323 algorithms: algorithms.to_vec(),
324 }
325 }
326
327 pub fn update(&mut self, data: &[u8]) {
329 Digest::update(&mut self.md5, data);
330
331 if let Some(ref mut h) = self.sha1 {
332 Digest::update(h, data);
333 }
334 if let Some(ref mut h) = self.sha256 {
335 Digest::update(h, data);
336 }
337 if let Some(ref mut h) = self.crc32 {
338 h.update(data);
339 }
340 if let Some(ref mut val) = self.crc32c {
341 *val = crc32c::crc32c_append(*val, data);
342 }
343 if let Some(ref mut h) = self.crc64nvme {
344 h.write(data);
345 }
346 }
347
348 #[must_use]
352 pub fn finish(self) -> HasherResult {
353 let md5_hex = hex::encode(Digest::finalize(self.md5));
354
355 let mut checksums = Vec::with_capacity(self.algorithms.len());
356 for algo in &self.algorithms {
357 let value = match algo {
358 ChecksumAlgorithm::Sha1 => {
359 let hash = Digest::finalize(self.sha1.clone().unwrap_or_default());
360 BASE64_STANDARD.encode(hash)
361 }
362 ChecksumAlgorithm::Sha256 => {
363 let hash = Digest::finalize(self.sha256.clone().unwrap_or_default());
364 BASE64_STANDARD.encode(hash)
365 }
366 ChecksumAlgorithm::Crc32 => {
367 let val = self
368 .crc32
369 .as_ref()
370 .map_or(0, crc32fast::Hasher::clone_finalize);
371 BASE64_STANDARD.encode(val.to_be_bytes())
372 }
373 ChecksumAlgorithm::Crc32c => {
374 let val = self.crc32c.unwrap_or(0);
375 BASE64_STANDARD.encode(val.to_be_bytes())
376 }
377 ChecksumAlgorithm::Crc64Nvme => {
378 let val = self
379 .crc64nvme
380 .as_ref()
381 .map_or(0, crc64fast_nvme::Digest::sum64);
382 BASE64_STANDARD.encode(val.to_be_bytes())
383 }
384 };
385 checksums.push(ChecksumValue {
386 algorithm: *algo,
387 value,
388 });
389 }
390
391 HasherResult { md5_hex, checksums }
392 }
393}
394
395trait CloneFinalize {
402 fn clone_finalize(&self) -> u32;
404}
405
406impl CloneFinalize for crc32fast::Hasher {
407 fn clone_finalize(&self) -> u32 {
408 self.clone().finalize()
409 }
410}
411
412#[cfg(test)]
413mod tests {
414 use super::*;
415
416 #[test]
421 fn test_should_display_checksum_algorithm() {
422 assert_eq!(ChecksumAlgorithm::Crc32.to_string(), "CRC32");
423 assert_eq!(ChecksumAlgorithm::Crc32c.to_string(), "CRC32C");
424 assert_eq!(ChecksumAlgorithm::Crc64Nvme.to_string(), "CRC64NVME");
425 assert_eq!(ChecksumAlgorithm::Sha1.to_string(), "SHA1");
426 assert_eq!(ChecksumAlgorithm::Sha256.to_string(), "SHA256");
427 }
428
429 #[test]
430 fn test_should_parse_checksum_algorithm() {
431 assert_eq!(
432 "CRC32".parse::<ChecksumAlgorithm>().ok(),
433 Some(ChecksumAlgorithm::Crc32)
434 );
435 assert_eq!(
436 "crc32c".parse::<ChecksumAlgorithm>().ok(),
437 Some(ChecksumAlgorithm::Crc32c)
438 );
439 assert_eq!(
440 "CRC64NVME".parse::<ChecksumAlgorithm>().ok(),
441 Some(ChecksumAlgorithm::Crc64Nvme)
442 );
443 assert_eq!(
444 "sha1".parse::<ChecksumAlgorithm>().ok(),
445 Some(ChecksumAlgorithm::Sha1)
446 );
447 assert_eq!(
448 "SHA256".parse::<ChecksumAlgorithm>().ok(),
449 Some(ChecksumAlgorithm::Sha256)
450 );
451 assert!("unknown".parse::<ChecksumAlgorithm>().is_err());
452 }
453
454 #[test]
459 fn test_should_compute_md5_empty() {
460 assert_eq!(compute_md5(b""), "d41d8cd98f00b204e9800998ecf8427e");
461 }
462
463 #[test]
464 fn test_should_compute_md5_hello() {
465 assert_eq!(compute_md5(b"hello"), "5d41402abc4b2a76b9719d911017c592");
466 }
467
468 #[test]
469 fn test_should_compute_etag_empty() {
470 assert_eq!(compute_etag(b""), "\"d41d8cd98f00b204e9800998ecf8427e\"");
471 }
472
473 #[test]
474 fn test_should_compute_etag_with_data() {
475 let etag = compute_etag(b"hello");
476 assert!(etag.starts_with('"'));
477 assert!(etag.ends_with('"'));
478 assert_eq!(etag.len(), 34); }
480
481 #[test]
486 fn test_should_compute_multipart_etag() {
487 let part1_hex = compute_md5(b"hello");
488 let part2_hex = compute_md5(b"world");
489 let etag = compute_multipart_etag(&[part1_hex, part2_hex], 2);
490 assert!(etag.starts_with('"'));
491 assert!(etag.ends_with("-2\""));
492 }
493
494 #[test]
495 fn test_should_compute_multipart_etag_single_part() {
496 let part_hex = compute_md5(b"data");
497 let etag = compute_multipart_etag(&[part_hex], 1);
498 assert!(etag.ends_with("-1\""));
499 }
500
501 #[test]
506 fn test_should_compute_crc32_checksum() {
507 let b64 = compute_checksum(ChecksumAlgorithm::Crc32, b"hello");
508 assert!(!b64.is_empty());
509 let decoded = BASE64_STANDARD.decode(&b64);
511 assert!(decoded.is_ok());
512 assert_eq!(decoded.expect("test decode").len(), 4);
513 }
514
515 #[test]
516 fn test_should_compute_crc32c_checksum() {
517 let b64 = compute_checksum(ChecksumAlgorithm::Crc32c, b"hello");
518 assert!(!b64.is_empty());
519 }
520
521 #[test]
522 fn test_should_compute_crc64nvme_checksum() {
523 let b64 = compute_checksum(ChecksumAlgorithm::Crc64Nvme, b"hello");
524 assert!(!b64.is_empty());
525 let decoded = BASE64_STANDARD.decode(&b64);
526 assert!(decoded.is_ok());
527 assert_eq!(decoded.expect("test decode").len(), 8);
528 }
529
530 #[test]
531 fn test_should_compute_sha1_checksum() {
532 let b64 = compute_checksum(ChecksumAlgorithm::Sha1, b"hello");
533 let decoded = BASE64_STANDARD.decode(&b64);
534 assert!(decoded.is_ok());
535 assert_eq!(decoded.expect("test decode").len(), 20);
536 }
537
538 #[test]
539 fn test_should_compute_sha256_checksum() {
540 let b64 = compute_checksum(ChecksumAlgorithm::Sha256, b"hello");
541 let decoded = BASE64_STANDARD.decode(&b64);
542 assert!(decoded.is_ok());
543 assert_eq!(decoded.expect("test decode").len(), 32);
544 }
545
546 #[test]
551 fn test_should_compute_composite_checksum() {
552 let p1 = compute_checksum(ChecksumAlgorithm::Sha256, b"part1");
553 let p2 = compute_checksum(ChecksumAlgorithm::Sha256, b"part2");
554 let composite = compute_composite_checksum(ChecksumAlgorithm::Sha256, &[p1, p2]);
555 assert!(composite.contains("-2"));
556 }
557
558 #[test]
563 fn test_should_stream_md5_only() {
564 let mut hasher = StreamingHasher::new(&[]);
565 hasher.update(b"hello");
566 let result = hasher.finish();
567 assert_eq!(result.md5_hex, "5d41402abc4b2a76b9719d911017c592");
568 assert!(result.checksums.is_empty());
569 }
570
571 #[test]
572 fn test_should_stream_with_sha256() {
573 let mut hasher = StreamingHasher::new(&[ChecksumAlgorithm::Sha256]);
574 hasher.update(b"hello ");
575 hasher.update(b"world");
576 let result = hasher.finish();
577
578 assert_eq!(result.md5_hex, compute_md5(b"hello world"));
580
581 assert_eq!(result.checksums.len(), 1);
583 assert_eq!(result.checksums[0].algorithm, ChecksumAlgorithm::Sha256);
584 assert_eq!(
585 result.checksums[0].value,
586 compute_checksum(ChecksumAlgorithm::Sha256, b"hello world"),
587 );
588 }
589
590 #[test]
591 fn test_should_stream_multiple_algorithms() {
592 let algos = [
593 ChecksumAlgorithm::Crc32,
594 ChecksumAlgorithm::Crc32c,
595 ChecksumAlgorithm::Crc64Nvme,
596 ChecksumAlgorithm::Sha1,
597 ChecksumAlgorithm::Sha256,
598 ];
599 let mut hasher = StreamingHasher::new(&algos);
600 hasher.update(b"test data");
601 let result = hasher.finish();
602
603 assert_eq!(result.checksums.len(), 5);
604 for (i, algo) in algos.iter().enumerate() {
605 assert_eq!(result.checksums[i].algorithm, *algo);
606 assert_eq!(
607 result.checksums[i].value,
608 compute_checksum(*algo, b"test data"),
609 );
610 }
611 }
612
613 #[test]
614 fn test_should_match_single_shot_and_streaming_results() {
615 let data = b"The quick brown fox jumps over the lazy dog";
616
617 let single_md5 = compute_md5(data);
618 let single_sha256 = compute_checksum(ChecksumAlgorithm::Sha256, data);
619
620 let mut hasher = StreamingHasher::new(&[ChecksumAlgorithm::Sha256]);
621 hasher.update(&data[..10]);
623 hasher.update(&data[10..30]);
624 hasher.update(&data[30..]);
625 let result = hasher.finish();
626
627 assert_eq!(result.md5_hex, single_md5);
628 assert_eq!(result.checksums[0].value, single_sha256);
629 }
630}