use crate::s3::MinioClient;
use crate::s3::error::ValidationErr;
use crate::s3::multimap_ext::Multimap;
use crate::s3::segmented_bytes::SegmentedBytes;
use crate::s3::sse::{Sse, SseCustomerKey};
use crate::s3::types::{BucketName, ObjectKey};
use base64::engine::Engine as _;
use chrono::{DateTime, Datelike, NaiveDateTime, Utc};
use crc_fast::{CrcAlgorithm, Digest as CrcFastDigest, checksum as crc_fast_checksum};
use lazy_static::lazy_static;
use percent_encoding::{AsciiSet, NON_ALPHANUMERIC, percent_decode_str, utf8_percent_encode};
use regex::Regex;
#[cfg(feature = "ring")]
use ring::digest::{Context, SHA256};
use sha1::{Digest as Sha1Digest, Sha1};
#[cfg(not(feature = "ring"))]
use sha2::Sha256;
use std::collections::HashMap;
use std::str::FromStr;
use std::sync::Arc;
use xmltree::Element;
pub type UtcTime = DateTime<Utc>;
pub fn url_decode(s: &str) -> String {
url::form_urlencoded::parse(s.as_bytes())
.map(|(k, _)| k)
.collect()
}
pub fn url_encode(s: &str) -> String {
urlencoding::encode(s).into_owned()
}
pub fn b64_encode(input: impl AsRef<[u8]>) -> String {
base64::engine::general_purpose::STANDARD.encode(input)
}
pub fn crc32(data: &[u8]) -> u32 {
crc_fast_checksum(CrcAlgorithm::Crc32IsoHdlc, data) as u32
}
pub fn crc64nvme(data: &[u8]) -> u64 {
crc_fast_checksum(CrcAlgorithm::Crc64Nvme, data)
}
pub fn uint32(data: &[u8]) -> Result<u32, ValidationErr> {
if data.len() < 4 {
return Err(ValidationErr::InvalidIntegerValue {
message: "data is not a valid 32-bit BigEndian unsigned integer".into(),
source: Box::new(std::io::Error::new(
std::io::ErrorKind::UnexpectedEof,
"not enough bytes",
)),
});
}
Ok(u32::from_be_bytes(data[..4].try_into().unwrap()))
}
pub const EMPTY_SHA256: &str = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855";
pub fn sha256_hash(data: &[u8]) -> String {
#[cfg(feature = "ring")]
{
hex_encode(ring::digest::digest(&SHA256, data).as_ref())
}
#[cfg(not(feature = "ring"))]
{
hex_encode(Sha256::new_with_prefix(data).finalize().as_ref())
}
}
pub fn hex_encode(bytes: &[u8]) -> String {
const LUT: &[u8; 16] = b"0123456789abcdef";
let len = bytes.len();
let mut s = String::with_capacity(len * 2);
unsafe {
let v = s.as_mut_vec();
v.set_len(len * 2);
for i in 0..len {
let b = bytes.get_unchecked(i);
let hi = LUT.get_unchecked((b >> 4) as usize);
let lo = LUT.get_unchecked((b & 0xF) as usize);
let j = i * 2;
*v.get_unchecked_mut(j) = *hi;
*v.get_unchecked_mut(j + 1) = *lo;
}
}
s
}
pub fn sha256_hash_sb(sb: Arc<SegmentedBytes>) -> String {
#[cfg(feature = "ring")]
{
let mut context = Context::new(&SHA256);
for data in sb.iter() {
context.update(data.as_ref());
}
hex_encode(context.finish().as_ref())
}
#[cfg(not(feature = "ring"))]
{
let mut hasher = Sha256::new();
for data in sb.iter() {
hasher.update(data);
}
hex_encode(hasher.finalize().as_ref())
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ChecksumAlgorithm {
CRC32,
CRC32C,
SHA1,
SHA256,
CRC64NVME,
}
impl ChecksumAlgorithm {
pub fn as_str(&self) -> &'static str {
match self {
ChecksumAlgorithm::CRC32 => "CRC32",
ChecksumAlgorithm::CRC32C => "CRC32C",
ChecksumAlgorithm::SHA1 => "SHA1",
ChecksumAlgorithm::SHA256 => "SHA256",
ChecksumAlgorithm::CRC64NVME => "CRC64NVME",
}
}
pub fn header_name(&self) -> &'static str {
use crate::s3::types::header_constants::*;
match self {
ChecksumAlgorithm::CRC32 => X_AMZ_CHECKSUM_CRC32,
ChecksumAlgorithm::CRC32C => X_AMZ_CHECKSUM_CRC32C,
ChecksumAlgorithm::SHA1 => X_AMZ_CHECKSUM_SHA1,
ChecksumAlgorithm::SHA256 => X_AMZ_CHECKSUM_SHA256,
ChecksumAlgorithm::CRC64NVME => X_AMZ_CHECKSUM_CRC64NVME,
}
}
}
impl FromStr for ChecksumAlgorithm {
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s.to_uppercase().as_str() {
"CRC32" => Ok(ChecksumAlgorithm::CRC32),
"CRC32C" => Ok(ChecksumAlgorithm::CRC32C),
"SHA1" => Ok(ChecksumAlgorithm::SHA1),
"SHA256" => Ok(ChecksumAlgorithm::SHA256),
"CRC64NVME" => Ok(ChecksumAlgorithm::CRC64NVME),
_ => Err(format!("Unknown checksum algorithm: {}", s)),
}
}
}
pub fn crc32c(data: &[u8]) -> String {
let checksum = crc_fast_checksum(CrcAlgorithm::Crc32Iscsi, data) as u32;
b64_encode(checksum.to_be_bytes())
}
pub fn sha1_hash(data: &[u8]) -> String {
let mut hasher = Sha1::new();
hasher.update(data);
let result = hasher.finalize();
b64_encode(&result[..])
}
pub fn sha256_checksum(data: &[u8]) -> String {
#[cfg(feature = "ring")]
{
b64_encode(ring::digest::digest(&SHA256, data).as_ref())
}
#[cfg(not(feature = "ring"))]
{
let result = Sha256::new_with_prefix(data).finalize();
b64_encode(&result[..])
}
}
pub fn crc32_checksum(data: &[u8]) -> String {
b64_encode(crc32(data).to_be_bytes())
}
pub fn crc64nvme_checksum(data: &[u8]) -> String {
b64_encode(crc64nvme(data).to_be_bytes())
}
pub fn compute_checksum(algorithm: ChecksumAlgorithm, data: &[u8]) -> String {
match algorithm {
ChecksumAlgorithm::CRC32 => crc32_checksum(data),
ChecksumAlgorithm::CRC32C => crc32c(data),
ChecksumAlgorithm::SHA1 => sha1_hash(data),
ChecksumAlgorithm::SHA256 => sha256_checksum(data),
ChecksumAlgorithm::CRC64NVME => crc64nvme_checksum(data),
}
}
pub fn compute_checksum_sb(algorithm: ChecksumAlgorithm, sb: &Arc<SegmentedBytes>) -> String {
match algorithm {
ChecksumAlgorithm::CRC32 => {
let mut digest = CrcFastDigest::new(CrcAlgorithm::Crc32IsoHdlc);
for data in sb.iter() {
digest.update(data.as_ref());
}
b64_encode((digest.finalize() as u32).to_be_bytes())
}
ChecksumAlgorithm::CRC32C => {
let mut digest = CrcFastDigest::new(CrcAlgorithm::Crc32Iscsi);
for data in sb.iter() {
digest.update(data.as_ref());
}
b64_encode((digest.finalize() as u32).to_be_bytes())
}
ChecksumAlgorithm::SHA1 => {
let mut hasher = Sha1::new();
for data in sb.iter() {
hasher.update(data.as_ref());
}
let result = hasher.finalize();
b64_encode(&result[..])
}
ChecksumAlgorithm::SHA256 => {
#[cfg(feature = "ring")]
{
let mut context = Context::new(&SHA256);
for data in sb.iter() {
context.update(data.as_ref());
}
b64_encode(context.finish().as_ref())
}
#[cfg(not(feature = "ring"))]
{
let mut hasher = Sha256::new();
for data in sb.iter() {
hasher.update(data.as_ref());
}
let result = hasher.finalize();
b64_encode(&result[..])
}
}
ChecksumAlgorithm::CRC64NVME => {
let mut digest = CrcFastDigest::new(CrcAlgorithm::Crc64Nvme);
for data in sb.iter() {
digest.update(data.as_ref());
}
b64_encode(digest.finalize().to_be_bytes())
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use bytes::Bytes;
use std::collections::HashMap;
#[test]
fn test_url_decode_spaces() {
assert_eq!(url_decode("hello%20world"), "hello world");
assert_eq!(url_decode("hello+world"), "hello world");
}
#[test]
fn test_url_decode_plus_sign() {
assert_eq!(url_decode("a%2Bb"), "a+b");
assert_eq!(url_decode("a%2bb"), "a+b");
}
#[test]
fn test_url_decode_special_chars() {
assert_eq!(url_decode("a%26b"), "a&b");
assert_eq!(url_decode("a%3Db"), "a=b");
assert_eq!(url_decode("a%2Fb"), "a/b");
}
#[test]
fn test_url_encode_spaces() {
assert_eq!(url_encode("hello world"), "hello%20world");
}
#[test]
fn test_url_encode_plus_sign() {
assert_eq!(url_encode("a+b"), "a%2Bb");
}
#[test]
fn test_url_encode_special_chars() {
assert_eq!(url_encode("a&b=c"), "a%26b%3Dc");
assert_eq!(url_encode("a/b"), "a%2Fb");
}
#[test]
fn test_b64_encode() {
assert_eq!(b64_encode("hello"), "aGVsbG8=");
assert_eq!(b64_encode(""), "");
assert_eq!(b64_encode([0xFF, 0x00, 0xFF]), "/wD/");
assert_eq!(
b64_encode("The quick brown fox"),
"VGhlIHF1aWNrIGJyb3duIGZveA=="
);
}
#[test]
fn test_crc32() {
assert_eq!(crc32(b"hello"), 0x3610a686);
assert_eq!(crc32(b""), 0);
assert_eq!(crc32(b"123456789"), 0xcbf43926);
}
#[test]
fn test_uint32_valid() {
assert_eq!(uint32(&[0x00, 0x00, 0x00, 0x42]).unwrap(), 66);
assert_eq!(uint32(&[0xFF, 0xFF, 0xFF, 0xFF]).unwrap(), 4294967295);
assert_eq!(uint32(&[0x00, 0x00, 0x00, 0x00]).unwrap(), 0);
assert_eq!(uint32(&[0x12, 0x34, 0x56, 0x78]).unwrap(), 0x12345678);
}
#[test]
fn test_uint32_insufficient_bytes() {
assert!(uint32(&[]).is_err());
assert!(uint32(&[0x00]).is_err());
assert!(uint32(&[0x00, 0x01]).is_err());
assert!(uint32(&[0x00, 0x01, 0x02]).is_err());
}
#[test]
fn test_uint32_extra_bytes() {
assert_eq!(uint32(&[0x00, 0x00, 0x00, 0x42, 0xFF, 0xFF]).unwrap(), 66);
}
#[test]
fn test_sha256_hash() {
assert_eq!(sha256_hash(b""), EMPTY_SHA256);
assert_eq!(
sha256_hash(b"hello"),
"2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824"
);
assert_eq!(
sha256_hash(b"The quick brown fox jumps over the lazy dog"),
"d7a8fbb307d7809469ca9abcb0082e4f8d5651e46d3cdb762d02d0bf37c9e592"
);
}
#[test]
fn test_hex_encode() {
assert_eq!(hex_encode(&[]), "");
assert_eq!(hex_encode(&[0x00]), "00");
assert_eq!(hex_encode(&[0xFF]), "ff");
assert_eq!(hex_encode(&[0xDE, 0xAD, 0xBE, 0xEF]), "deadbeef");
assert_eq!(
hex_encode(&[0x12, 0x34, 0x56, 0x78, 0x9A, 0xBC]),
"123456789abc"
);
}
#[test]
fn test_empty_sha256_segmented_bytes() {
assert_eq!(
EMPTY_SHA256,
sha256_hash_sb(Arc::new(SegmentedBytes::new()))
);
}
#[test]
fn test_md5sum_hash() {
let hash = md5sum_hash(b"hello");
assert!(!hash.is_empty());
assert_eq!(hash, "XUFAKrxLKna5cZ2REBfFkg==");
let empty_hash = md5sum_hash(b"");
assert_eq!(empty_hash, "1B2M2Y8AsgTpgAmY7PhCfg==");
}
#[test]
fn test_crc32c() {
let checksum = crc32c(b"hello");
assert!(!checksum.is_empty());
let checksum_empty = crc32c(b"");
assert!(!checksum_empty.is_empty());
let checksum_standard = crc32c(b"123456789");
assert!(!checksum_standard.is_empty());
}
#[test]
fn test_sha1_hash() {
let hash = sha1_hash(b"hello");
assert!(!hash.is_empty());
let hash_empty = sha1_hash(b"");
assert!(!hash_empty.is_empty());
let hash_fox = sha1_hash(b"The quick brown fox jumps over the lazy dog");
assert!(!hash_fox.is_empty());
}
#[test]
fn test_sha256_checksum() {
let checksum = sha256_checksum(b"hello");
assert!(!checksum.is_empty());
let checksum_empty = sha256_checksum(b"");
assert!(!checksum_empty.is_empty());
}
#[test]
fn test_crc32_checksum() {
let checksum = crc32_checksum(b"hello");
assert!(!checksum.is_empty());
let checksum_empty = crc32_checksum(b"");
assert_eq!(checksum_empty, "AAAAAA==");
let checksum_standard = crc32_checksum(b"123456789");
assert!(!checksum_standard.is_empty());
}
#[test]
fn test_checksum_algorithm_as_str() {
assert_eq!(ChecksumAlgorithm::CRC32.as_str(), "CRC32");
assert_eq!(ChecksumAlgorithm::CRC32C.as_str(), "CRC32C");
assert_eq!(ChecksumAlgorithm::SHA1.as_str(), "SHA1");
assert_eq!(ChecksumAlgorithm::SHA256.as_str(), "SHA256");
}
#[test]
fn test_checksum_algorithm_from_str() {
assert_eq!(
"CRC32".parse::<ChecksumAlgorithm>().unwrap(),
ChecksumAlgorithm::CRC32
);
assert_eq!(
"crc32c".parse::<ChecksumAlgorithm>().unwrap(),
ChecksumAlgorithm::CRC32C
);
assert_eq!(
"SHA1".parse::<ChecksumAlgorithm>().unwrap(),
ChecksumAlgorithm::SHA1
);
assert_eq!(
"sha256".parse::<ChecksumAlgorithm>().unwrap(),
ChecksumAlgorithm::SHA256
);
assert!("invalid".parse::<ChecksumAlgorithm>().is_err());
}
#[test]
fn test_compute_checksum() {
let data = b"hello world";
let crc32_result = compute_checksum(ChecksumAlgorithm::CRC32, data);
assert!(!crc32_result.is_empty());
let crc32c_result = compute_checksum(ChecksumAlgorithm::CRC32C, data);
assert!(!crc32c_result.is_empty());
let sha1_result = compute_checksum(ChecksumAlgorithm::SHA1, data);
assert!(!sha1_result.is_empty());
let sha256_result = compute_checksum(ChecksumAlgorithm::SHA256, data);
assert!(!sha256_result.is_empty());
assert_ne!(crc32_result, crc32c_result);
assert_ne!(sha1_result, sha256_result);
}
#[test]
fn test_parse_bool_true() {
assert!(parse_bool("true").unwrap());
assert!(parse_bool("True").unwrap());
assert!(parse_bool("TRUE").unwrap());
assert!(parse_bool("TrUe").unwrap());
}
#[test]
fn test_parse_bool_false() {
assert!(!parse_bool("false").unwrap());
assert!(!parse_bool("False").unwrap());
assert!(!parse_bool("FALSE").unwrap());
assert!(!parse_bool("FaLsE").unwrap());
}
#[test]
fn test_parse_bool_invalid() {
assert!(parse_bool("yes").is_err());
assert!(parse_bool("no").is_err());
assert!(parse_bool("1").is_err());
assert!(parse_bool("0").is_err());
assert!(parse_bool("").is_err());
}
#[test]
fn test_match_hostname_valid() {
assert!(match_hostname("example.com"));
assert!(match_hostname("sub.example.com"));
assert!(match_hostname("my-server"));
assert!(match_hostname("server123"));
assert!(match_hostname("a.b.c.d.example.com"));
}
#[test]
fn test_match_hostname_invalid() {
assert!(!match_hostname("-invalid"));
assert!(!match_hostname("invalid-"));
assert!(!match_hostname("_invalid"));
assert!(!match_hostname("invalid_"));
assert!(!match_hostname("in..valid"));
}
#[test]
fn test_check_bucket_name_valid() {
assert!(check_bucket_name("mybucket", false).is_ok());
assert!(check_bucket_name("my-bucket", true).is_ok());
assert!(check_bucket_name("my.bucket", true).is_ok());
assert!(check_bucket_name("bucket123", false).is_ok());
assert!(check_bucket_name("abc", false).is_ok());
}
#[test]
fn test_check_bucket_name_empty() {
assert!(check_bucket_name("", false).is_err());
assert!(check_bucket_name(" ", false).is_err());
}
#[test]
fn test_check_bucket_name_too_short() {
assert!(check_bucket_name("ab", false).is_err());
assert!(check_bucket_name("a", false).is_err());
}
#[test]
fn test_check_bucket_name_too_long() {
let long_name = "a".repeat(64);
assert!(check_bucket_name(&long_name, false).is_err());
}
#[test]
fn test_check_bucket_name_ip_address() {
assert!(check_bucket_name("192.168.1.1", false).is_err());
assert!(check_bucket_name("10.0.0.1", false).is_err());
}
#[test]
fn test_check_bucket_name_invalid_successive_chars() {
assert!(check_bucket_name("my..bucket", false).is_err());
assert!(check_bucket_name("my.-bucket", false).is_err());
assert!(check_bucket_name("my-.bucket", false).is_err());
}
#[test]
fn test_check_bucket_name_strict() {
assert!(check_bucket_name("My-Bucket", false).is_ok());
assert!(check_bucket_name("My-Bucket", true).is_err());
assert!(check_bucket_name("my_bucket", false).is_ok());
assert!(check_bucket_name("my_bucket", true).is_err());
assert!(check_bucket_name("xn--bucket", false).is_ok());
assert!(check_bucket_name("xn--bucket", true).is_err());
assert!(check_bucket_name("sthree-bucket", false).is_ok());
assert!(check_bucket_name("sthree-bucket", true).is_err());
assert!(check_bucket_name("bucket-s3alias", false).is_ok());
assert!(check_bucket_name("bucket-s3alias", true).is_err());
assert!(check_bucket_name("my-bucket", true).is_ok());
assert!(check_bucket_name("bucket123", true).is_ok());
assert!(check_bucket_name("my.bucket.name", true).is_ok());
}
#[test]
fn test_check_object_name_valid() {
assert!(check_object_name("myobject").is_ok());
assert!(check_object_name("my/object/path").is_ok());
assert!(check_object_name("object-with-dashes").is_ok());
assert!(check_object_name("a").is_ok());
}
#[test]
fn test_check_object_name_empty() {
assert!(check_object_name("").is_err());
}
#[test]
fn test_check_object_name_too_long() {
let long_name = "a".repeat(1025);
assert!(check_object_name(&long_name).is_err());
}
#[test]
fn test_trim_quotes() {
assert_eq!(trim_quotes("\"hello\"".to_string()), "hello");
assert_eq!(trim_quotes("\"\"".to_string()), "");
assert_eq!(trim_quotes("hello".to_string()), "hello");
assert_eq!(trim_quotes("\"hello".to_string()), "\"hello");
assert_eq!(trim_quotes("hello\"".to_string()), "hello\"");
assert_eq!(trim_quotes("\"".to_string()), "\"");
}
#[test]
fn test_copy_slice() {
let src = [1, 2, 3, 4, 5];
let mut dst = [0; 5];
let copied = copy_slice(&mut dst, &src);
assert_eq!(copied, 5);
assert_eq!(dst, [1, 2, 3, 4, 5]);
}
#[test]
fn test_copy_slice_partial() {
let src = [1, 2, 3, 4, 5];
let mut dst = [0; 3];
let copied = copy_slice(&mut dst, &src);
assert_eq!(copied, 3);
assert_eq!(dst, [1, 2, 3]);
}
#[test]
fn test_copy_slice_empty() {
let src: [u8; 0] = [];
let mut dst: [u8; 0] = [];
let copied = copy_slice(&mut dst, &src);
assert_eq!(copied, 0);
}
#[test]
fn test_encode_tags() {
let mut tags = HashMap::new();
tags.insert("key1".to_string(), "value1".to_string());
tags.insert("key2".to_string(), "value2".to_string());
let encoded = encode_tags(&tags);
assert!(encoded.contains("key1=value1"));
assert!(encoded.contains("key2=value2"));
}
#[test]
fn test_encode_tags_special_chars() {
let mut tags = HashMap::new();
tags.insert("key with spaces".to_string(), "value&special".to_string());
let encoded = encode_tags(&tags);
assert!(encoded.contains("key%20with%20spaces=value%26special"));
}
#[test]
fn test_parse_tags() {
let tags = parse_tags("key1=value1&key2=value2").unwrap();
assert_eq!(tags.get("key1"), Some(&"value1".to_string()));
assert_eq!(tags.get("key2"), Some(&"value2".to_string()));
}
#[test]
fn test_parse_tags_encoded() {
let tags = parse_tags("key%20one=value%26special").unwrap();
assert_eq!(tags.get("key one"), Some(&"value&special".to_string()));
}
#[test]
fn test_parse_tags_empty_value() {
let tags = parse_tags("key1=&key2=value2").unwrap();
assert_eq!(tags.get("key1"), Some(&"".to_string()));
assert_eq!(tags.get("key2"), Some(&"value2".to_string()));
}
#[test]
fn test_parse_tags_no_value() {
let tags = parse_tags("key1&key2=value2").unwrap();
assert_eq!(tags.get("key1"), Some(&"".to_string()));
assert_eq!(tags.get("key2"), Some(&"value2".to_string()));
}
#[test]
fn test_parse_tags_too_many_equals() {
assert!(parse_tags("key1=value1=extra").is_err());
}
#[test]
fn test_urlencode_object_key() {
assert_eq!(urlencode_object_key("file.txt"), "file.txt");
assert_eq!(urlencode_object_key("my/path/file.txt"), "my/path/file.txt");
assert_eq!(urlencode_object_key("file name.txt"), "file%20name.txt");
assert_eq!(urlencode_object_key("special&chars"), "special%26chars");
}
#[test]
fn test_insert_multimap() {
let result = insert(None, "key1");
assert!(result.contains_key("key1"));
assert_eq!(result.get_vec("key1"), Some(&vec!["".to_string()]));
let mut existing = Multimap::new();
existing.insert("existing".to_string(), "value".to_string());
let result = insert(Some(existing), "key2");
assert_eq!(result.get_vec("existing"), Some(&vec!["value".to_string()]));
assert_eq!(result.get_vec("key2"), Some(&vec!["".to_string()]));
}
#[test]
fn test_to_signer_date() {
let time = from_iso8601utc("2024-01-15T10:30:45.000Z").unwrap();
assert_eq!(to_signer_date(time), "20240115");
}
#[test]
fn test_to_amz_date() {
let time = from_iso8601utc("2024-01-15T10:30:45.000Z").unwrap();
assert_eq!(to_amz_date(time), "20240115T103045Z");
}
#[test]
fn test_to_iso8601utc() {
let time = from_iso8601utc("2024-01-15T10:30:45.123Z").unwrap();
let result = to_iso8601utc(time);
assert!(result.starts_with("2024-01-15T10:30:45"));
}
#[test]
fn test_from_iso8601utc_with_millis() {
let result = from_iso8601utc("2024-01-15T10:30:45.123Z");
assert!(result.is_ok());
let time = result.unwrap();
assert_eq!(time.year(), 2024);
assert_eq!(time.month(), 1);
assert_eq!(time.day(), 15);
}
#[test]
fn test_from_iso8601utc_without_millis() {
let result = from_iso8601utc("2024-01-15T10:30:45Z");
assert!(result.is_ok());
let time = result.unwrap();
assert_eq!(time.year(), 2024);
}
#[test]
fn test_from_iso8601utc_invalid() {
assert!(from_iso8601utc("invalid").is_err());
assert!(from_iso8601utc("2024-13-45T25:70:80Z").is_err());
}
#[test]
fn test_from_http_header_value_edge_cases() {
let result = from_http_header_value("Mon, 15 Jan 2024 10:30:45 GMT");
assert!(result.is_ok());
}
#[test]
fn test_from_http_header_value_invalid_format() {
assert!(from_http_header_value("invalid").is_err());
}
#[test]
fn test_match_region_basic() {
let _result = match_region("us-east-1");
}
#[test]
fn test_check_ssec_valid_length() {
let key_32_bytes = vec![0u8; 32];
let key_64_encoded = b64_encode(&key_32_bytes);
assert!(!key_64_encoded.is_empty());
}
#[test]
fn test_get_text_default() {
let xml_str = r#"<root><name>test</name></root>"#;
let root = xmltree::Element::parse(xml_str.as_bytes()).unwrap();
let value = get_text_default(&root, "name");
assert_eq!(value, "test");
}
#[test]
fn test_get_text_default_missing() {
let xml_str = r#"<root><other>test</other></root>"#;
let root = xmltree::Element::parse(xml_str.as_bytes()).unwrap();
let value = get_text_default(&root, "name");
assert_eq!(value, "");
}
#[test]
fn test_get_text_option_present() {
let xml_str = r#"<root><name>test value</name></root>"#;
let root = xmltree::Element::parse(xml_str.as_bytes()).unwrap();
let value = get_text_option(&root, "name");
assert_eq!(value, Some("test value".to_string()));
}
#[test]
fn test_get_text_option_missing() {
let xml_str = r#"<root><other>test</other></root>"#;
let root = xmltree::Element::parse(xml_str.as_bytes()).unwrap();
let value = get_text_option(&root, "name");
assert_eq!(value, None);
}
#[test]
fn test_get_text_result_present() {
let xml_str = r#"<root><name>test value</name></root>"#;
let root = xmltree::Element::parse(xml_str.as_bytes()).unwrap();
let value = get_text_result(&root, "name");
assert!(value.is_ok());
assert_eq!(value.unwrap(), "test value");
}
#[test]
fn test_get_text_result_missing() {
let xml_str = r#"<root><other>test</other></root>"#;
let root = xmltree::Element::parse(xml_str.as_bytes()).unwrap();
let value = get_text_result(&root, "name");
assert!(value.is_err());
}
#[test]
fn test_insert_multimap_new() {
let map = insert(None, "key1");
assert_eq!(map.len(), 1);
}
#[test]
fn test_insert_multimap_existing() {
let mut map = insert(None, "key1");
map = insert(Some(map), "key2");
assert_eq!(map.len(), 2);
}
#[test]
fn test_parse_tags_valid_tags() {
let tags = parse_tags("key1=value1&key2=value2").unwrap();
assert_eq!(tags.len(), 2);
}
#[test]
fn test_parse_tags_encoded_values() {
let tags = parse_tags("Environment=Production").unwrap();
assert!(!tags.is_empty());
}
#[test]
fn test_url_encode_equals_escape() {
let test_cases = [
"simple",
"with spaces",
"special&chars",
"path/like",
"equals=sign",
"plus+sign",
"asterisk*here",
"tilde~ok",
"dash-ok",
"underscore_ok",
"dot.ok",
"mixed Key & Value = test",
];
for input in test_cases {
assert_eq!(
url_encode(input),
escape(input),
"Encoding mismatch for: {input}"
);
}
}
#[test]
fn test_compute_checksum_sb_matches_compute_checksum() {
let test_data = b"The quick brown fox jumps over the lazy dog";
let mut sb = SegmentedBytes::new();
sb.append(Bytes::from(&test_data[0..10]));
sb.append(Bytes::from(&test_data[10..25]));
sb.append(Bytes::from(&test_data[25..]));
let sb = Arc::new(sb);
for algo in [
ChecksumAlgorithm::CRC32,
ChecksumAlgorithm::CRC32C,
ChecksumAlgorithm::CRC64NVME,
ChecksumAlgorithm::SHA1,
ChecksumAlgorithm::SHA256,
] {
let from_bytes = compute_checksum(algo, test_data);
let from_sb = compute_checksum_sb(algo, &sb);
assert_eq!(
from_bytes, from_sb,
"Mismatch for {:?}: bytes='{}' vs sb='{}'",
algo, from_bytes, from_sb
);
}
}
}
pub fn md5sum_hash(data: &[u8]) -> String {
b64_encode(md5::compute(data).as_slice())
}
pub fn utc_now() -> UtcTime {
chrono::offset::Utc::now()
}
pub fn to_signer_date(time: UtcTime) -> String {
time.format("%Y%m%d").to_string()
}
pub fn to_amz_date(time: UtcTime) -> String {
time.format("%Y%m%dT%H%M%SZ").to_string()
}
pub fn to_http_header_value(time: UtcTime) -> String {
format!(
"{}, {} {} {} GMT",
time.weekday(),
time.day(),
match time.month() {
1 => "Jan",
2 => "Feb",
3 => "Mar",
4 => "Apr",
5 => "May",
6 => "Jun",
7 => "Jul",
8 => "Aug",
9 => "Sep",
10 => "Oct",
11 => "Nov",
12 => "Dec",
_ => "",
},
time.format("%Y %H:%M:%S")
)
}
pub fn to_iso8601utc(time: UtcTime) -> String {
time.format("%Y-%m-%dT%H:%M:%S.%3fZ").to_string()
}
pub fn from_iso8601utc(s: &str) -> Result<UtcTime, ValidationErr> {
let dt = NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S.%3fZ")
.or_else(|_| NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%SZ"))?;
Ok(DateTime::<Utc>::from_naive_utc_and_offset(dt, Utc))
}
const OBJECT_KEY_ENCODE_SET: &AsciiSet = &NON_ALPHANUMERIC
.remove(b'-')
.remove(b'_')
.remove(b'.')
.remove(b'~')
.remove(b'/');
pub fn urlencode_object_key(key: &str) -> String {
utf8_percent_encode(key, OBJECT_KEY_ENCODE_SET).collect()
}
pub mod aws_date_format {
use super::{UtcTime, from_iso8601utc, to_iso8601utc};
use serde::{Deserialize, Deserializer, Serializer};
pub fn serialize<S>(date: &UtcTime, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.serialize_str(&to_iso8601utc(*date))
}
pub fn deserialize<'de, D>(deserializer: D) -> Result<UtcTime, D::Error>
where
D: Deserializer<'de>,
{
let s = String::deserialize(deserializer)?;
from_iso8601utc(&s).map_err(serde::de::Error::custom)
}
}
pub fn parse_bool(value: &str) -> Result<bool, ValidationErr> {
if value.eq_ignore_ascii_case("true") {
Ok(true)
} else if value.eq_ignore_ascii_case("false") {
Ok(false)
} else {
Err(ValidationErr::InvalidBooleanValue(value.to_string()))
}
}
pub fn from_http_header_value(s: &str) -> Result<UtcTime, ValidationErr> {
let dt = NaiveDateTime::parse_from_str(s, "%a, %d %b %Y %H:%M:%S GMT")?;
Ok(DateTime::<Utc>::from_naive_utc_and_offset(dt, Utc))
}
pub fn match_hostname(value: &str) -> bool {
lazy_static! {
static ref HOSTNAME_REGEX: Regex =
Regex::new(r"^([a-z_\d-]{1,63}\.)*([a-z_\d-]{1,63})$").unwrap();
}
if !HOSTNAME_REGEX.is_match(value.to_lowercase().as_str()) {
return false;
}
for token in value.split('.') {
if token.starts_with('-')
|| token.starts_with('_')
|| token.ends_with('-')
|| token.ends_with('_')
{
return false;
}
}
true
}
pub fn match_region(value: &str) -> bool {
lazy_static! {
static ref REGION_REGEX: Regex = Regex::new(r"^([a-z_\d-]{1,63})$").unwrap();
}
!REGION_REGEX.is_match(value.to_lowercase().as_str())
|| value.starts_with('-')
|| value.starts_with('_')
|| value.ends_with('-')
|| value.ends_with('_')
}
pub fn check_bucket_name(bucket: impl AsRef<str>, strict: bool) -> Result<(), ValidationErr> {
let bucket: &str = bucket.as_ref().trim();
let bucket_len = bucket.len();
if bucket_len == 0 {
return Err(ValidationErr::InvalidBucketName {
name: "".into(),
reason: "bucket name cannot be empty".into(),
});
}
if bucket_len < 3 {
return Err(ValidationErr::InvalidBucketName {
name: bucket.into(),
reason: "bucket name cannot be less than 3 characters".into(),
});
}
if bucket_len > 63 {
return Err(ValidationErr::InvalidBucketName {
name: bucket.into(),
reason: "bucket name cannot be greater than 63 characters".into(),
});
}
lazy_static! {
static ref IPV4_REGEX: Regex = Regex::new(r"^((25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9])\.){3}(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9])$").unwrap();
static ref VALID_BUCKET_NAME_REGEX: Regex =
Regex::new("^[A-Za-z0-9][A-Za-z0-9\\.\\-_:]{1,61}[A-Za-z0-9]$").unwrap();
static ref VALID_BUCKET_NAME_STRICT_REGEX: Regex =
Regex::new("^[a-z0-9][a-z0-9\\.\\-]{1,61}[a-z0-9]$").unwrap();
}
if IPV4_REGEX.is_match(bucket) {
return Err(ValidationErr::InvalidBucketName {
name: bucket.into(),
reason: "bucket name cannot be an IP address".into(),
});
}
if bucket.contains("..") || bucket.contains(".-") || bucket.contains("-.") {
return Err(ValidationErr::InvalidBucketName {
name: bucket.into(),
reason: "bucket name contains invalid successive characters '..', '.-' or '-.'".into(),
});
}
if strict {
if !VALID_BUCKET_NAME_STRICT_REGEX.is_match(bucket) {
return Err(ValidationErr::InvalidBucketName {
name: bucket.into(),
reason: format!(
"bucket name does not follow S3 standards strictly, according to {}",
*VALID_BUCKET_NAME_STRICT_REGEX
),
});
}
if bucket.starts_with("xn--") {
return Err(ValidationErr::InvalidBucketName {
name: bucket.into(),
reason: "bucket name cannot start with 'xn--' (reserved for IDN)".into(),
});
}
if bucket.starts_with("sthree-") {
return Err(ValidationErr::InvalidBucketName {
name: bucket.into(),
reason: "bucket name cannot start with 'sthree-' (reserved by AWS)".into(),
});
}
if bucket.ends_with("-s3alias") {
return Err(ValidationErr::InvalidBucketName {
name: bucket.into(),
reason: "bucket name cannot end with '-s3alias' (reserved for S3 Access Points)"
.into(),
});
}
} else if !VALID_BUCKET_NAME_REGEX.is_match(bucket) {
return Err(ValidationErr::InvalidBucketName {
name: bucket.into(),
reason: format!(
"bucket name does not follow S3 standards, according to {}",
*VALID_BUCKET_NAME_REGEX
),
});
}
Ok(())
}
pub fn check_object_name(object: impl AsRef<str>) -> Result<(), ValidationErr> {
let object: &str = object.as_ref();
match object.len() {
0 => Err(ValidationErr::InvalidObjectName(
"object name cannot be empty".into(),
)),
n if n > 1024 => Err(ValidationErr::InvalidObjectName(format!(
"Object name ('{object}') cannot be greater than 1024 bytes"
))),
_ => Ok(()),
}
}
pub fn check_sse(sse: &Option<Arc<dyn Sse>>, client: &MinioClient) -> Result<(), ValidationErr> {
if let Some(v) = &sse
&& v.tls_required()
&& !client.is_secure()
{
return Err(ValidationErr::SseTlsRequired(None));
}
Ok(())
}
pub fn check_ssec(
ssec: &Option<SseCustomerKey>,
client: &MinioClient,
) -> Result<(), ValidationErr> {
if ssec.is_some() && !client.is_secure() {
return Err(ValidationErr::SseTlsRequired(None));
}
Ok(())
}
pub fn check_ssec_with_log(
ssec: &Option<SseCustomerKey>,
client: &MinioClient,
bucket: &BucketName,
object: &ObjectKey,
version: &Option<String>,
) -> Result<(), ValidationErr> {
if ssec.is_some() && !client.is_secure() {
return Err(ValidationErr::SseTlsRequired(Some(format!(
"source {bucket}/{object}{}: ",
version
.as_ref()
.map_or(String::new(), |v| String::from("?versionId=") + v)
))));
}
Ok(())
}
pub fn get_text_default(element: &Element, tag: &str) -> String {
element.get_child(tag).map_or(String::new(), |v| {
v.get_text().unwrap_or_default().to_string()
})
}
pub fn get_text_result(element: &Element, tag: &str) -> Result<String, ValidationErr> {
Ok(element
.get_child(tag)
.ok_or(ValidationErr::xml_error(format!("<{tag}> tag not found")))?
.get_text()
.ok_or(ValidationErr::xml_error(format!(
"text of <{tag}> tag not found"
)))?
.to_string())
}
pub fn get_text_option(element: &Element, tag: &str) -> Option<String> {
element
.get_child(tag)
.and_then(|v| v.get_text().map(|s| s.to_string()))
}
pub fn trim_quotes(mut s: String) -> String {
if s.len() >= 2 && s.starts_with('"') && s.ends_with('"') {
s.drain(0..1); s.pop(); }
s
}
pub fn copy_slice(dst: &mut [u8], src: &[u8]) -> usize {
let mut c = 0;
for (d, s) in dst.iter_mut().zip(src.iter()) {
*d = *s;
c += 1;
}
c
}
const QUERY_ESCAPE: &AsciiSet = &NON_ALPHANUMERIC
.remove(b'-')
.remove(b'_')
.remove(b'.')
.remove(b'~');
fn unescape(s: &str) -> Result<String, ValidationErr> {
percent_decode_str(s)
.decode_utf8()
.map_err(|e| ValidationErr::TagDecodingError {
input: s.to_string(),
error_message: e.to_string(),
})
.map(|s| s.to_string())
}
fn escape(s: &str) -> String {
utf8_percent_encode(s, QUERY_ESCAPE).collect()
}
pub fn encode_tags(h: &HashMap<String, String>) -> String {
let mut tags = Vec::with_capacity(h.len());
for (k, v) in h {
tags.push(format!("{}={}", escape(k), escape(v)));
}
tags.join("&")
}
pub fn parse_tags(s: &str) -> Result<HashMap<String, String>, ValidationErr> {
let mut tags = HashMap::new();
for tag in s.split('&') {
let mut kv = tag.split('=');
let k = match kv.next() {
Some(v) => unescape(v)?,
None => {
return Err(ValidationErr::TagDecodingError {
input: s.into(),
error_message: "tag key was empty".into(),
});
}
};
let v = match kv.next() {
Some(v) => unescape(v)?,
None => "".to_owned(),
};
if kv.next().is_some() {
return Err(ValidationErr::TagDecodingError {
input: s.into(),
error_message: "tag had too many values for a key".into(),
});
}
tags.insert(k, v);
}
Ok(tags)
}
#[must_use]
pub fn insert(data: Option<Multimap>, key: impl Into<String>) -> Multimap {
let mut result: Multimap = data.unwrap_or_default();
result.insert(key.into(), String::new());
result
}
pub mod xml {
use crate::s3::error::ValidationErr;
use std::collections::HashMap;
#[derive(Debug, Clone)]
struct XmlElementIndex {
children: HashMap<String, Vec<usize>>,
}
impl XmlElementIndex {
fn get_first(&self, tag: &str) -> Option<usize> {
let tag: String = tag.to_string();
let is = self.children.get(&tag)?;
is.first().copied()
}
fn get(&self, tag: &str) -> Option<&Vec<usize>> {
let tag: String = tag.to_string();
self.children.get(&tag)
}
}
impl From<&xmltree::Element> for XmlElementIndex {
fn from(value: &xmltree::Element) -> Self {
let mut children = HashMap::new();
for (i, e) in value
.children
.iter()
.enumerate()
.filter_map(|(i, v)| v.as_element().map(|e| (i, e)))
{
children
.entry(e.name.clone())
.or_insert_with(Vec::new)
.push(i);
}
Self { children }
}
}
#[derive(Debug, Clone)]
pub struct Element<'a> {
inner: &'a xmltree::Element,
child_element_index: XmlElementIndex,
}
impl<'a> From<&'a xmltree::Element> for Element<'a> {
fn from(value: &'a xmltree::Element) -> Self {
let element_index = XmlElementIndex::from(value);
Self {
inner: value,
child_element_index: element_index,
}
}
}
impl Element<'_> {
pub fn name(&self) -> &str {
&self.inner.name
}
pub fn get_child_text(&self, tag: &str) -> Option<String> {
let index = self.child_element_index.get_first(tag)?;
self.inner.children[index]
.as_element()?
.get_text()
.map(|v| v.to_string())
}
pub fn get_child_text_or_error(&self, tag: &str) -> Result<String, ValidationErr> {
let i = self
.child_element_index
.get_first(tag)
.ok_or(ValidationErr::xml_error(format!("<{tag}> tag not found")))?;
self.inner.children[i]
.as_element()
.unwrap()
.get_text()
.map(|x| x.to_string())
.ok_or(ValidationErr::xml_error(format!(
"text of <{tag}> tag not found"
)))
}
pub fn get_matching_children(&self, tag: &str) -> Vec<(usize, Element<'_>)> {
self.child_element_index
.get(tag)
.unwrap_or(&vec![])
.iter()
.map(|i| (*i, self.inner.children[*i].as_element().unwrap().into()))
.collect()
}
pub fn get_child(&self, tag: &str) -> Option<Element<'_>> {
let index = self.child_element_index.get_first(tag)?;
Some(self.inner.children[index].as_element()?.into())
}
pub fn get_xmltree_children(&self) -> Vec<&xmltree::Element> {
self.inner
.children
.iter()
.filter_map(|v| v.as_element())
.collect()
}
}
pub struct MergeXmlElements<'a> {
v1: &'a Vec<(usize, Element<'a>)>,
v2: &'a Vec<(usize, Element<'a>)>,
i1: usize,
i2: usize,
}
impl<'a> MergeXmlElements<'a> {
pub fn new(v1: &'a Vec<(usize, Element<'a>)>, v2: &'a Vec<(usize, Element<'a>)>) -> Self {
Self {
v1,
v2,
i1: 0,
i2: 0,
}
}
}
impl<'a> Iterator for MergeXmlElements<'a> {
type Item = &'a Element<'a>;
fn next(&mut self) -> Option<Self::Item> {
let c1 = self.v1.get(self.i1);
let c2 = self.v2.get(self.i2);
match (c1, c2) {
(Some(val1), Some(val2)) => {
if val1.0 < val2.0 {
self.i1 += 1;
Some(&val1.1)
} else {
self.i2 += 1;
Some(&val2.1)
}
}
(Some(val1), None) => {
self.i1 += 1;
Some(&val1.1)
}
(None, Some(val2)) => {
self.i2 += 1;
Some(&val2.1)
}
(None, None) => None,
}
}
}
}