use std::{fmt, str::FromStr};
use base64::{Engine, engine::general_purpose::STANDARD as BASE64_STANDARD};
use digest::Digest;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum ChecksumAlgorithm {
Crc32,
Crc32c,
Crc64Nvme,
Sha1,
Sha256,
}
impl ChecksumAlgorithm {
#[must_use]
pub fn as_str(&self) -> &'static str {
match self {
Self::Crc32 => "CRC32",
Self::Crc32c => "CRC32C",
Self::Crc64Nvme => "CRC64NVME",
Self::Sha1 => "SHA1",
Self::Sha256 => "SHA256",
}
}
}
impl fmt::Display for ChecksumAlgorithm {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(self.as_str())
}
}
#[derive(Debug, Clone, thiserror::Error)]
#[error("unknown checksum algorithm: {0}")]
pub struct ParseChecksumAlgorithmError(String);
impl FromStr for ChecksumAlgorithm {
type Err = ParseChecksumAlgorithmError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s.to_ascii_uppercase().as_str() {
"CRC32" => Ok(Self::Crc32),
"CRC32C" => Ok(Self::Crc32c),
"CRC64NVME" => Ok(Self::Crc64Nvme),
"SHA1" => Ok(Self::Sha1),
"SHA256" => Ok(Self::Sha256),
_ => Err(ParseChecksumAlgorithmError(s.to_owned())),
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ChecksumValue {
pub algorithm: ChecksumAlgorithm,
pub value: String,
}
#[must_use]
pub fn compute_md5(data: &[u8]) -> String {
let hash = md5::Md5::digest(data);
hex::encode(hash)
}
#[must_use]
pub fn compute_etag(data: &[u8]) -> String {
let md5_hex = compute_md5(data);
format!("\"{md5_hex}\"")
}
#[must_use]
pub fn compute_multipart_etag(part_md5_hexes: &[impl AsRef<str>], part_count: usize) -> String {
let mut combined = Vec::with_capacity(part_md5_hexes.len() * 16);
for hex_str in part_md5_hexes {
let hex_str = hex_str.as_ref().trim_matches('"');
if let Ok(bytes) = hex::decode(hex_str) {
combined.extend_from_slice(&bytes);
}
}
let final_md5 = hex::encode(md5::Md5::digest(&combined));
format!("\"{final_md5}-{part_count}\"")
}
#[must_use]
pub fn compute_checksum(algorithm: ChecksumAlgorithm, data: &[u8]) -> String {
match algorithm {
ChecksumAlgorithm::Crc32 => {
let mut hasher = crc32fast::Hasher::new();
hasher.update(data);
let value = hasher.finalize();
BASE64_STANDARD.encode(value.to_be_bytes())
}
ChecksumAlgorithm::Crc32c => {
let value = crc32c::crc32c(data);
BASE64_STANDARD.encode(value.to_be_bytes())
}
ChecksumAlgorithm::Crc64Nvme => {
let mut hasher = crc64fast_nvme::Digest::new();
hasher.write(data);
let value = hasher.sum64();
BASE64_STANDARD.encode(value.to_be_bytes())
}
ChecksumAlgorithm::Sha1 => {
let hash = sha1::Sha1::digest(data);
BASE64_STANDARD.encode(hash)
}
ChecksumAlgorithm::Sha256 => {
let hash = sha2::Sha256::digest(data);
BASE64_STANDARD.encode(hash)
}
}
}
#[must_use]
pub fn compute_composite_checksum(
algorithm: ChecksumAlgorithm,
part_checksums_b64: &[impl AsRef<str>],
) -> String {
let mut combined = Vec::new();
for b64 in part_checksums_b64 {
if let Ok(bytes) = BASE64_STANDARD.decode(b64.as_ref()) {
combined.extend_from_slice(&bytes);
}
}
let checksum_b64 = compute_checksum(algorithm, &combined);
format!("{checksum_b64}-{}", part_checksums_b64.len())
}
#[derive(Debug, Clone)]
pub struct HasherResult {
pub md5_hex: String,
pub checksums: Vec<ChecksumValue>,
}
pub struct StreamingHasher {
md5: md5::Md5,
sha1: Option<sha1::Sha1>,
sha256: Option<sha2::Sha256>,
crc32: Option<crc32fast::Hasher>,
crc32c: Option<u32>,
crc64nvme: Option<crc64fast_nvme::Digest>,
algorithms: Vec<ChecksumAlgorithm>,
}
impl fmt::Debug for StreamingHasher {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("StreamingHasher")
.field("algorithms", &self.algorithms)
.finish_non_exhaustive()
}
}
impl StreamingHasher {
#[must_use]
pub fn new(algorithms: &[ChecksumAlgorithm]) -> Self {
let mut sha1 = None;
let mut sha256 = None;
let mut crc32 = None;
let mut crc32c = None;
let mut crc64nvme = None;
for &algo in algorithms {
match algo {
ChecksumAlgorithm::Sha1 => {
sha1 = Some(<sha1::Sha1 as Digest>::new());
}
ChecksumAlgorithm::Sha256 => {
sha256 = Some(<sha2::Sha256 as Digest>::new());
}
ChecksumAlgorithm::Crc32 => {
crc32 = Some(crc32fast::Hasher::new());
}
ChecksumAlgorithm::Crc32c => {
crc32c = Some(0);
}
ChecksumAlgorithm::Crc64Nvme => {
crc64nvme = Some(crc64fast_nvme::Digest::new());
}
}
}
Self {
md5: <md5::Md5 as Digest>::new(),
sha1,
sha256,
crc32,
crc32c,
crc64nvme,
algorithms: algorithms.to_vec(),
}
}
pub fn update(&mut self, data: &[u8]) {
Digest::update(&mut self.md5, data);
if let Some(ref mut h) = self.sha1 {
Digest::update(h, data);
}
if let Some(ref mut h) = self.sha256 {
Digest::update(h, data);
}
if let Some(ref mut h) = self.crc32 {
h.update(data);
}
if let Some(ref mut val) = self.crc32c {
*val = crc32c::crc32c_append(*val, data);
}
if let Some(ref mut h) = self.crc64nvme {
h.write(data);
}
}
#[must_use]
pub fn finish(self) -> HasherResult {
let md5_hex = hex::encode(Digest::finalize(self.md5));
let mut checksums = Vec::with_capacity(self.algorithms.len());
for algo in &self.algorithms {
let value = match algo {
ChecksumAlgorithm::Sha1 => {
let hash = Digest::finalize(self.sha1.clone().unwrap_or_default());
BASE64_STANDARD.encode(hash)
}
ChecksumAlgorithm::Sha256 => {
let hash = Digest::finalize(self.sha256.clone().unwrap_or_default());
BASE64_STANDARD.encode(hash)
}
ChecksumAlgorithm::Crc32 => {
let val = self
.crc32
.as_ref()
.map_or(0, crc32fast::Hasher::clone_finalize);
BASE64_STANDARD.encode(val.to_be_bytes())
}
ChecksumAlgorithm::Crc32c => {
let val = self.crc32c.unwrap_or(0);
BASE64_STANDARD.encode(val.to_be_bytes())
}
ChecksumAlgorithm::Crc64Nvme => {
let val = self
.crc64nvme
.as_ref()
.map_or(0, crc64fast_nvme::Digest::sum64);
BASE64_STANDARD.encode(val.to_be_bytes())
}
};
checksums.push(ChecksumValue {
algorithm: *algo,
value,
});
}
HasherResult { md5_hex, checksums }
}
}
trait CloneFinalize {
fn clone_finalize(&self) -> u32;
}
impl CloneFinalize for crc32fast::Hasher {
fn clone_finalize(&self) -> u32 {
self.clone().finalize()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_should_display_checksum_algorithm() {
assert_eq!(ChecksumAlgorithm::Crc32.to_string(), "CRC32");
assert_eq!(ChecksumAlgorithm::Crc32c.to_string(), "CRC32C");
assert_eq!(ChecksumAlgorithm::Crc64Nvme.to_string(), "CRC64NVME");
assert_eq!(ChecksumAlgorithm::Sha1.to_string(), "SHA1");
assert_eq!(ChecksumAlgorithm::Sha256.to_string(), "SHA256");
}
#[test]
fn test_should_parse_checksum_algorithm() {
assert_eq!(
"CRC32".parse::<ChecksumAlgorithm>().ok(),
Some(ChecksumAlgorithm::Crc32)
);
assert_eq!(
"crc32c".parse::<ChecksumAlgorithm>().ok(),
Some(ChecksumAlgorithm::Crc32c)
);
assert_eq!(
"CRC64NVME".parse::<ChecksumAlgorithm>().ok(),
Some(ChecksumAlgorithm::Crc64Nvme)
);
assert_eq!(
"sha1".parse::<ChecksumAlgorithm>().ok(),
Some(ChecksumAlgorithm::Sha1)
);
assert_eq!(
"SHA256".parse::<ChecksumAlgorithm>().ok(),
Some(ChecksumAlgorithm::Sha256)
);
assert!("unknown".parse::<ChecksumAlgorithm>().is_err());
}
#[test]
fn test_should_compute_md5_empty() {
assert_eq!(compute_md5(b""), "d41d8cd98f00b204e9800998ecf8427e");
}
#[test]
fn test_should_compute_md5_hello() {
assert_eq!(compute_md5(b"hello"), "5d41402abc4b2a76b9719d911017c592");
}
#[test]
fn test_should_compute_etag_empty() {
assert_eq!(compute_etag(b""), "\"d41d8cd98f00b204e9800998ecf8427e\"");
}
#[test]
fn test_should_compute_etag_with_data() {
let etag = compute_etag(b"hello");
assert!(etag.starts_with('"'));
assert!(etag.ends_with('"'));
assert_eq!(etag.len(), 34); }
#[test]
fn test_should_compute_multipart_etag() {
let part1_hex = compute_md5(b"hello");
let part2_hex = compute_md5(b"world");
let etag = compute_multipart_etag(&[part1_hex, part2_hex], 2);
assert!(etag.starts_with('"'));
assert!(etag.ends_with("-2\""));
}
#[test]
fn test_should_compute_multipart_etag_single_part() {
let part_hex = compute_md5(b"data");
let etag = compute_multipart_etag(&[part_hex], 1);
assert!(etag.ends_with("-1\""));
}
#[test]
fn test_should_compute_crc32_checksum() {
let b64 = compute_checksum(ChecksumAlgorithm::Crc32, b"hello");
assert!(!b64.is_empty());
let decoded = BASE64_STANDARD.decode(&b64);
assert!(decoded.is_ok());
assert_eq!(decoded.expect("test decode").len(), 4);
}
#[test]
fn test_should_compute_crc32c_checksum() {
let b64 = compute_checksum(ChecksumAlgorithm::Crc32c, b"hello");
assert!(!b64.is_empty());
}
#[test]
fn test_should_compute_crc64nvme_checksum() {
let b64 = compute_checksum(ChecksumAlgorithm::Crc64Nvme, b"hello");
assert!(!b64.is_empty());
let decoded = BASE64_STANDARD.decode(&b64);
assert!(decoded.is_ok());
assert_eq!(decoded.expect("test decode").len(), 8);
}
#[test]
fn test_should_compute_sha1_checksum() {
let b64 = compute_checksum(ChecksumAlgorithm::Sha1, b"hello");
let decoded = BASE64_STANDARD.decode(&b64);
assert!(decoded.is_ok());
assert_eq!(decoded.expect("test decode").len(), 20);
}
#[test]
fn test_should_compute_sha256_checksum() {
let b64 = compute_checksum(ChecksumAlgorithm::Sha256, b"hello");
let decoded = BASE64_STANDARD.decode(&b64);
assert!(decoded.is_ok());
assert_eq!(decoded.expect("test decode").len(), 32);
}
#[test]
fn test_should_compute_composite_checksum() {
let p1 = compute_checksum(ChecksumAlgorithm::Sha256, b"part1");
let p2 = compute_checksum(ChecksumAlgorithm::Sha256, b"part2");
let composite = compute_composite_checksum(ChecksumAlgorithm::Sha256, &[p1, p2]);
assert!(composite.contains("-2"));
}
#[test]
fn test_should_stream_md5_only() {
let mut hasher = StreamingHasher::new(&[]);
hasher.update(b"hello");
let result = hasher.finish();
assert_eq!(result.md5_hex, "5d41402abc4b2a76b9719d911017c592");
assert!(result.checksums.is_empty());
}
#[test]
fn test_should_stream_with_sha256() {
let mut hasher = StreamingHasher::new(&[ChecksumAlgorithm::Sha256]);
hasher.update(b"hello ");
hasher.update(b"world");
let result = hasher.finish();
assert_eq!(result.md5_hex, compute_md5(b"hello world"));
assert_eq!(result.checksums.len(), 1);
assert_eq!(result.checksums[0].algorithm, ChecksumAlgorithm::Sha256);
assert_eq!(
result.checksums[0].value,
compute_checksum(ChecksumAlgorithm::Sha256, b"hello world"),
);
}
#[test]
fn test_should_stream_multiple_algorithms() {
let algos = [
ChecksumAlgorithm::Crc32,
ChecksumAlgorithm::Crc32c,
ChecksumAlgorithm::Crc64Nvme,
ChecksumAlgorithm::Sha1,
ChecksumAlgorithm::Sha256,
];
let mut hasher = StreamingHasher::new(&algos);
hasher.update(b"test data");
let result = hasher.finish();
assert_eq!(result.checksums.len(), 5);
for (i, algo) in algos.iter().enumerate() {
assert_eq!(result.checksums[i].algorithm, *algo);
assert_eq!(
result.checksums[i].value,
compute_checksum(*algo, b"test data"),
);
}
}
#[test]
fn test_should_match_single_shot_and_streaming_results() {
let data = b"The quick brown fox jumps over the lazy dog";
let single_md5 = compute_md5(data);
let single_sha256 = compute_checksum(ChecksumAlgorithm::Sha256, data);
let mut hasher = StreamingHasher::new(&[ChecksumAlgorithm::Sha256]);
hasher.update(&data[..10]);
hasher.update(&data[10..30]);
hasher.update(&data[30..]);
let result = hasher.finish();
assert_eq!(result.md5_hex, single_md5);
assert_eq!(result.checksums[0].value, single_sha256);
}
}