minio 0.4.0 - Docs.rs

// MinIO Rust Library for Amazon S3 Compatible Cloud Storage
// Copyright 2022 MinIO, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use crate::s3::MinioClient;
use crate::s3::error::ValidationErr;
use crate::s3::multimap_ext::Multimap;
use crate::s3::segmented_bytes::SegmentedBytes;
use crate::s3::sse::{Sse, SseCustomerKey};
use crate::s3::types::{BucketName, ObjectKey};
use base64::engine::Engine as _;
use chrono::{DateTime, Datelike, NaiveDateTime, Utc};
use crc_fast::{CrcAlgorithm, Digest as CrcFastDigest, checksum as crc_fast_checksum};
use lazy_static::lazy_static;
use percent_encoding::{AsciiSet, NON_ALPHANUMERIC, percent_decode_str, utf8_percent_encode};
use regex::Regex;
#[cfg(feature = "ring")]
use ring::digest::{Context, SHA256};
use sha1::{Digest as Sha1Digest, Sha1};
#[cfg(not(feature = "ring"))]
use sha2::Sha256;
use std::collections::HashMap;
use std::str::FromStr;
use std::sync::Arc;
use xmltree::Element;

/// Date and time with UTC timezone.
pub type UtcTime = DateTime<Utc>;

// Great stuff to get confused about.
// String "a b+c" in Percent-Encoding (RFC 3986) becomes "a%20b%2Bc".
// S3 sometimes returns Form-Encoding (application/x-www-form-urlencoded) rendering string "a%20b%2Bc" into "a+b%2Bc"
// If you were to do Percent-Decoding on "a+b%2Bc" you would get "a+b+c", which is wrong.
// If you use Form-Decoding on "a+b%2Bc" you would get "a b+c", which is correct.

/// Decodes a URL-encoded string in the application/x-www-form-urlencoded syntax into a string.
/// Note that "+" is decoded to a space character, and "%2B" is decoded to a plus sign.
pub fn url_decode(s: &str) -> String {
    url::form_urlencoded::parse(s.as_bytes())
        .map(|(k, _)| k)
        .collect()
}

/// Encodes a string using URL encoding. Note that a whitespace is encoded as "%20" and plus
/// sign is encoded as "%2B".
pub fn url_encode(s: &str) -> String {
    urlencoding::encode(s).into_owned()
}

/// Encodes data using base64 algorithm.
pub fn b64_encode(input: impl AsRef<[u8]>) -> String {
    base64::engine::general_purpose::STANDARD.encode(input)
}

/// Computes CRC32 of given data using hardware-accelerated SIMD implementation.
///
/// Uses crc-fast which provides hardware acceleration via PCLMULQDQ/CLMUL instructions
/// on modern CPUs, achieving >50 GiB/s throughput (vs ~0.5 GiB/s for software).
pub fn crc32(data: &[u8]) -> u32 {
    crc_fast_checksum(CrcAlgorithm::Crc32IsoHdlc, data) as u32
}

/// Computes CRC64-NVME of given data using hardware-accelerated SIMD implementation.
///
/// Uses crc-fast which provides hardware acceleration via PCLMULQDQ/CLMUL instructions
/// on modern CPUs, achieving >50 GiB/s throughput (vs ~0.5 GiB/s for software).
pub fn crc64nvme(data: &[u8]) -> u64 {
    crc_fast_checksum(CrcAlgorithm::Crc64Nvme, data)
}

/// Converts data array into 32-bit BigEndian unsigned int.
pub fn uint32(data: &[u8]) -> Result<u32, ValidationErr> {
    if data.len() < 4 {
        return Err(ValidationErr::InvalidIntegerValue {
            message: "data is not a valid 32-bit BigEndian unsigned integer".into(),
            source: Box::new(std::io::Error::new(
                std::io::ErrorKind::UnexpectedEof,
                "not enough bytes",
            )),
        });
    }
    Ok(u32::from_be_bytes(data[..4].try_into().unwrap()))
}

/// SHA256 hash of empty data.
pub const EMPTY_SHA256: &str = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855";

/// Gets hex-encoded SHA256 hash of given data.
pub fn sha256_hash(data: &[u8]) -> String {
    #[cfg(feature = "ring")]
    {
        hex_encode(ring::digest::digest(&SHA256, data).as_ref())
    }
    #[cfg(not(feature = "ring"))]
    {
        hex_encode(Sha256::new_with_prefix(data).finalize().as_ref())
    }
}

/// Hex-encode a byte slice into a lowercase ASCII string.
///
/// # Safety
/// This implementation uses `unsafe` code for performance reasons:
/// - We call [`String::as_mut_vec`] to get direct access to the
///   underlying `Vec<u8>` backing the `String`.
/// - We then use `Vec::set_len` to pre-allocate the final length without
///   initializing the contents first.
/// - Finally, we use `slice::get_unchecked` and `slice::get_unchecked_mut` to
///   avoid bounds checking inside the tight encoding loop.
///
/// # Why unsafe is needed
/// Normally, writing this function with safe Rust requires:
/// - Pushing each hex digit one-by-one into the string (extra bounds checks).
/// - Or allocating and copying temporary buffers.
///
/// Using `unsafe` avoids redundant checks and makes this implementation
///   significantly faster, especially for large inputs.
///
/// # Why this is correct
/// - `s` is allocated with exactly `len * 2` capacity, and we immediately
///   set its length to that value. Every byte in the string buffer will be
///   initialized before being read or used.
/// - The loop index `i` is always in `0..len`, so `bytes.get_unchecked(i)`
///   is safe.
/// - Each write goes to positions `j` and `j + 1`, where `j = i * 2`.
///   Since `i < len`, the maximum write index is `2*len - 1`, which is
///   within the allocated range.
/// - All written bytes come from the `LUT` table, which has exactly 16
///   elements, and indices are masked into the 0–15 range.
///
/// Therefore, although `unsafe` is used to skip bounds checking,
/// the logic ensures all memory accesses remain in-bounds and initialized.
pub fn hex_encode(bytes: &[u8]) -> String {
    const LUT: &[u8; 16] = b"0123456789abcdef";
    let len = bytes.len();
    let mut s = String::with_capacity(len * 2);

    unsafe {
        let v = s.as_mut_vec();
        v.set_len(len * 2);
        for i in 0..len {
            let b = bytes.get_unchecked(i);
            let hi = LUT.get_unchecked((b >> 4) as usize);
            let lo = LUT.get_unchecked((b & 0xF) as usize);
            let j = i * 2;
            *v.get_unchecked_mut(j) = *hi;
            *v.get_unchecked_mut(j + 1) = *lo;
        }
    }

    s
}

pub fn sha256_hash_sb(sb: Arc<SegmentedBytes>) -> String {
    #[cfg(feature = "ring")]
    {
        let mut context = Context::new(&SHA256);
        for data in sb.iter() {
            context.update(data.as_ref());
        }
        hex_encode(context.finish().as_ref())
    }
    #[cfg(not(feature = "ring"))]
    {
        let mut hasher = Sha256::new();
        for data in sb.iter() {
            hasher.update(data);
        }
        hex_encode(hasher.finalize().as_ref())
    }
}

/// S3 checksum algorithms supported by the API
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ChecksumAlgorithm {
    CRC32,
    CRC32C,
    SHA1,
    SHA256,
    CRC64NVME,
}

impl ChecksumAlgorithm {
    /// Returns the AWS header value for this checksum algorithm.
    pub fn as_str(&self) -> &'static str {
        match self {
            ChecksumAlgorithm::CRC32 => "CRC32",
            ChecksumAlgorithm::CRC32C => "CRC32C",
            ChecksumAlgorithm::SHA1 => "SHA1",
            ChecksumAlgorithm::SHA256 => "SHA256",
            ChecksumAlgorithm::CRC64NVME => "CRC64NVME",
        }
    }

    /// Returns the HTTP header name for this checksum algorithm (e.g., "X-Amz-Checksum-CRC32").
    pub fn header_name(&self) -> &'static str {
        use crate::s3::types::header_constants::*;
        match self {
            ChecksumAlgorithm::CRC32 => X_AMZ_CHECKSUM_CRC32,
            ChecksumAlgorithm::CRC32C => X_AMZ_CHECKSUM_CRC32C,
            ChecksumAlgorithm::SHA1 => X_AMZ_CHECKSUM_SHA1,
            ChecksumAlgorithm::SHA256 => X_AMZ_CHECKSUM_SHA256,
            ChecksumAlgorithm::CRC64NVME => X_AMZ_CHECKSUM_CRC64NVME,
        }
    }
}

/// Parses a checksum algorithm name from a string.
///
/// Case-insensitive parsing of S3 checksum algorithm names. Useful for parsing
/// header values or configuration strings.
///
/// # Supported Values
///
/// - `"CRC32"` / `"crc32"` - Standard CRC32 checksum
/// - `"CRC32C"` / `"crc32c"` - CRC32C (Castagnoli) checksum
/// - `"SHA1"` / `"sha1"` - SHA-1 hash
/// - `"SHA256"` / `"sha256"` - SHA-256 hash
/// - `"CRC64NVME"` / `"crc64nvme"` - CRC-64/NVME checksum
///
/// # Errors
///
/// Returns an error string if the algorithm name is not recognized.
impl FromStr for ChecksumAlgorithm {
    type Err = String;

    fn from_str(s: &str) -> Result<Self, Self::Err> {
        match s.to_uppercase().as_str() {
            "CRC32" => Ok(ChecksumAlgorithm::CRC32),
            "CRC32C" => Ok(ChecksumAlgorithm::CRC32C),
            "SHA1" => Ok(ChecksumAlgorithm::SHA1),
            "SHA256" => Ok(ChecksumAlgorithm::SHA256),
            "CRC64NVME" => Ok(ChecksumAlgorithm::CRC64NVME),
            _ => Err(format!("Unknown checksum algorithm: {}", s)),
        }
    }
}

/// Computes CRC32C checksum (Castagnoli polynomial) and returns base64-encoded value.
///
/// Uses crc-fast which provides hardware acceleration via PCLMULQDQ/CLMUL instructions
/// on modern CPUs, achieving >50 GiB/s throughput.
pub fn crc32c(data: &[u8]) -> String {
    let checksum = crc_fast_checksum(CrcAlgorithm::Crc32Iscsi, data) as u32;
    b64_encode(checksum.to_be_bytes())
}

/// Computes SHA1 hash and returns base64-encoded value
pub fn sha1_hash(data: &[u8]) -> String {
    let mut hasher = Sha1::new();
    hasher.update(data);
    let result = hasher.finalize();
    b64_encode(&result[..])
}

/// Computes SHA256 hash and returns base64-encoded value (for checksums, not authentication)
pub fn sha256_checksum(data: &[u8]) -> String {
    #[cfg(feature = "ring")]
    {
        b64_encode(ring::digest::digest(&SHA256, data).as_ref())
    }
    #[cfg(not(feature = "ring"))]
    {
        let result = Sha256::new_with_prefix(data).finalize();
        b64_encode(&result[..])
    }
}

/// Computes CRC32 checksum and returns base64-encoded value
pub fn crc32_checksum(data: &[u8]) -> String {
    b64_encode(crc32(data).to_be_bytes())
}

/// Computes CRC64-NVME checksum and returns base64-encoded value
pub fn crc64nvme_checksum(data: &[u8]) -> String {
    b64_encode(crc64nvme(data).to_be_bytes())
}

/// Computes checksum based on the specified algorithm for contiguous byte slices.
///
/// This function computes checksums on already-materialized `&[u8]` data. Use this when:
/// - Data is already in a contiguous buffer (e.g., from `reqwest::Response::bytes()`)
/// - Working with small byte arrays in tests
/// - Data comes from sources other than `SegmentedBytes`
///
/// **Performance Note**: If you have data in `SegmentedBytes`, use [`compute_checksum_sb`]
/// instead to avoid copying. Calling `.to_bytes()` on `SegmentedBytes` creates a full copy
/// of all segments, which is expensive for large objects (up to 5GB per part).
///
/// # Arguments
///
/// * `algorithm` - The checksum algorithm to use (CRC32, CRC32C, CRC64NVME, SHA1, SHA256)
/// * `data` - The contiguous byte slice to compute checksum over
///
/// # Returns
///
/// Base64-encoded checksum string suitable for S3 headers
///
/// # Example
///
/// ```
/// use minio::s3::utils::{compute_checksum, ChecksumAlgorithm};
///
/// let data = b"hello world";
/// let checksum = compute_checksum(ChecksumAlgorithm::CRC32C, data);
/// println!("CRC32C: {}", checksum);
/// ```
pub fn compute_checksum(algorithm: ChecksumAlgorithm, data: &[u8]) -> String {
    match algorithm {
        ChecksumAlgorithm::CRC32 => crc32_checksum(data),
        ChecksumAlgorithm::CRC32C => crc32c(data),
        ChecksumAlgorithm::SHA1 => sha1_hash(data),
        ChecksumAlgorithm::SHA256 => sha256_checksum(data),
        ChecksumAlgorithm::CRC64NVME => crc64nvme_checksum(data),
    }
}

/// Computes checksum for `SegmentedBytes` without copying data (zero-copy streaming).
///
/// This function computes checksums by iterating over segments incrementally, avoiding
/// the need to materialize the entire buffer in contiguous memory. This is critical for
/// performance when working with large objects (up to 5GB per part in multipart uploads).
///
/// **Always use this function for `SegmentedBytes` data** instead of calling `.to_bytes()`
/// followed by `compute_checksum()`, which would create an expensive full copy.
///
/// # Performance Characteristics
///
/// - **Memory**: Only allocates hasher state (~64 bytes for SHA256, ~4-8 bytes for CRC)
/// - **CPU**: Hardware-accelerated where available (CRC32C with SSE 4.2)
/// - **Streaming**: Processes data incrementally without buffering
///
/// # Arguments
///
/// * `algorithm` - The checksum algorithm to use (CRC32, CRC32C, CRC64NVME, SHA1, SHA256)
/// * `sb` - The segmented bytes to compute checksum over (passed by reference, not consumed)
///
/// # Returns
///
/// Base64-encoded checksum string suitable for S3 headers
///
/// # Example
///
/// ```
/// use minio::s3::utils::{compute_checksum_sb, ChecksumAlgorithm};
/// use minio::s3::segmented_bytes::SegmentedBytes;
/// use std::sync::Arc;
/// use bytes::Bytes;
///
/// let mut sb = SegmentedBytes::new();
/// sb.append(Bytes::from("hello "));
/// sb.append(Bytes::from("world"));
/// let sb = Arc::new(sb);
///
/// let checksum = compute_checksum_sb(ChecksumAlgorithm::CRC32C, &sb);
/// println!("CRC32C: {}", checksum);
/// ```
///
/// # See Also
///
/// - [`compute_checksum`] - For already-contiguous `&[u8]` data
pub fn compute_checksum_sb(algorithm: ChecksumAlgorithm, sb: &Arc<SegmentedBytes>) -> String {
    match algorithm {
        ChecksumAlgorithm::CRC32 => {
            let mut digest = CrcFastDigest::new(CrcAlgorithm::Crc32IsoHdlc);
            for data in sb.iter() {
                digest.update(data.as_ref());
            }
            b64_encode((digest.finalize() as u32).to_be_bytes())
        }
        ChecksumAlgorithm::CRC32C => {
            let mut digest = CrcFastDigest::new(CrcAlgorithm::Crc32Iscsi);
            for data in sb.iter() {
                digest.update(data.as_ref());
            }
            b64_encode((digest.finalize() as u32).to_be_bytes())
        }
        ChecksumAlgorithm::SHA1 => {
            let mut hasher = Sha1::new();
            for data in sb.iter() {
                hasher.update(data.as_ref());
            }
            let result = hasher.finalize();
            b64_encode(&result[..])
        }
        ChecksumAlgorithm::SHA256 => {
            #[cfg(feature = "ring")]
            {
                let mut context = Context::new(&SHA256);
                for data in sb.iter() {
                    context.update(data.as_ref());
                }
                b64_encode(context.finish().as_ref())
            }
            #[cfg(not(feature = "ring"))]
            {
                let mut hasher = Sha256::new();
                for data in sb.iter() {
                    hasher.update(data.as_ref());
                }
                let result = hasher.finalize();
                b64_encode(&result[..])
            }
        }
        ChecksumAlgorithm::CRC64NVME => {
            let mut digest = CrcFastDigest::new(CrcAlgorithm::Crc64Nvme);
            for data in sb.iter() {
                digest.update(data.as_ref());
            }
            b64_encode(digest.finalize().to_be_bytes())
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use bytes::Bytes;
    use std::collections::HashMap;

    #[test]
    fn test_url_decode_spaces() {
        assert_eq!(url_decode("hello%20world"), "hello world");
        assert_eq!(url_decode("hello+world"), "hello world");
    }

    #[test]
    fn test_url_decode_plus_sign() {
        assert_eq!(url_decode("a%2Bb"), "a+b");
        assert_eq!(url_decode("a%2bb"), "a+b");
    }

    #[test]
    fn test_url_decode_special_chars() {
        assert_eq!(url_decode("a%26b"), "a&b");
        assert_eq!(url_decode("a%3Db"), "a=b");
        assert_eq!(url_decode("a%2Fb"), "a/b");
    }

    #[test]
    fn test_url_encode_spaces() {
        assert_eq!(url_encode("hello world"), "hello%20world");
    }

    #[test]
    fn test_url_encode_plus_sign() {
        assert_eq!(url_encode("a+b"), "a%2Bb");
    }

    #[test]
    fn test_url_encode_special_chars() {
        assert_eq!(url_encode("a&b=c"), "a%26b%3Dc");
        assert_eq!(url_encode("a/b"), "a%2Fb");
    }

    #[test]
    fn test_b64_encode() {
        assert_eq!(b64_encode("hello"), "aGVsbG8=");
        assert_eq!(b64_encode(""), "");
        assert_eq!(b64_encode([0xFF, 0x00, 0xFF]), "/wD/");
        assert_eq!(
            b64_encode("The quick brown fox"),
            "VGhlIHF1aWNrIGJyb3duIGZveA=="
        );
    }

    #[test]
    fn test_crc32() {
        assert_eq!(crc32(b"hello"), 0x3610a686);
        assert_eq!(crc32(b""), 0);
        assert_eq!(crc32(b"123456789"), 0xcbf43926);
    }

    #[test]
    fn test_uint32_valid() {
        assert_eq!(uint32(&[0x00, 0x00, 0x00, 0x42]).unwrap(), 66);
        assert_eq!(uint32(&[0xFF, 0xFF, 0xFF, 0xFF]).unwrap(), 4294967295);
        assert_eq!(uint32(&[0x00, 0x00, 0x00, 0x00]).unwrap(), 0);
        assert_eq!(uint32(&[0x12, 0x34, 0x56, 0x78]).unwrap(), 0x12345678);
    }

    #[test]
    fn test_uint32_insufficient_bytes() {
        assert!(uint32(&[]).is_err());
        assert!(uint32(&[0x00]).is_err());
        assert!(uint32(&[0x00, 0x01]).is_err());
        assert!(uint32(&[0x00, 0x01, 0x02]).is_err());
    }

    #[test]
    fn test_uint32_extra_bytes() {
        assert_eq!(uint32(&[0x00, 0x00, 0x00, 0x42, 0xFF, 0xFF]).unwrap(), 66);
    }

    #[test]
    fn test_sha256_hash() {
        assert_eq!(sha256_hash(b""), EMPTY_SHA256);
        assert_eq!(
            sha256_hash(b"hello"),
            "2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824"
        );
        assert_eq!(
            sha256_hash(b"The quick brown fox jumps over the lazy dog"),
            "d7a8fbb307d7809469ca9abcb0082e4f8d5651e46d3cdb762d02d0bf37c9e592"
        );
    }

    #[test]
    fn test_hex_encode() {
        assert_eq!(hex_encode(&[]), "");
        assert_eq!(hex_encode(&[0x00]), "00");
        assert_eq!(hex_encode(&[0xFF]), "ff");
        assert_eq!(hex_encode(&[0xDE, 0xAD, 0xBE, 0xEF]), "deadbeef");
        assert_eq!(
            hex_encode(&[0x12, 0x34, 0x56, 0x78, 0x9A, 0xBC]),
            "123456789abc"
        );
    }

    #[test]
    fn test_empty_sha256_segmented_bytes() {
        assert_eq!(
            EMPTY_SHA256,
            sha256_hash_sb(Arc::new(SegmentedBytes::new()))
        );
    }

    #[test]
    fn test_md5sum_hash() {
        let hash = md5sum_hash(b"hello");
        assert!(!hash.is_empty());
        assert_eq!(hash, "XUFAKrxLKna5cZ2REBfFkg==");

        let empty_hash = md5sum_hash(b"");
        assert_eq!(empty_hash, "1B2M2Y8AsgTpgAmY7PhCfg==");
    }

    #[test]
    fn test_crc32c() {
        let checksum = crc32c(b"hello");
        assert!(!checksum.is_empty());
        let checksum_empty = crc32c(b"");
        assert!(!checksum_empty.is_empty());
        let checksum_standard = crc32c(b"123456789");
        assert!(!checksum_standard.is_empty());
    }

    #[test]
    fn test_sha1_hash() {
        let hash = sha1_hash(b"hello");
        assert!(!hash.is_empty());
        let hash_empty = sha1_hash(b"");
        assert!(!hash_empty.is_empty());
        let hash_fox = sha1_hash(b"The quick brown fox jumps over the lazy dog");
        assert!(!hash_fox.is_empty());
    }

    #[test]
    fn test_sha256_checksum() {
        let checksum = sha256_checksum(b"hello");
        assert!(!checksum.is_empty());
        let checksum_empty = sha256_checksum(b"");
        assert!(!checksum_empty.is_empty());
    }

    #[test]
    fn test_crc32_checksum() {
        let checksum = crc32_checksum(b"hello");
        assert!(!checksum.is_empty());
        let checksum_empty = crc32_checksum(b"");
        assert_eq!(checksum_empty, "AAAAAA==");
        let checksum_standard = crc32_checksum(b"123456789");
        assert!(!checksum_standard.is_empty());
    }

    #[test]
    fn test_checksum_algorithm_as_str() {
        assert_eq!(ChecksumAlgorithm::CRC32.as_str(), "CRC32");
        assert_eq!(ChecksumAlgorithm::CRC32C.as_str(), "CRC32C");
        assert_eq!(ChecksumAlgorithm::SHA1.as_str(), "SHA1");
        assert_eq!(ChecksumAlgorithm::SHA256.as_str(), "SHA256");
    }

    #[test]
    fn test_checksum_algorithm_from_str() {
        assert_eq!(
            "CRC32".parse::<ChecksumAlgorithm>().unwrap(),
            ChecksumAlgorithm::CRC32
        );
        assert_eq!(
            "crc32c".parse::<ChecksumAlgorithm>().unwrap(),
            ChecksumAlgorithm::CRC32C
        );
        assert_eq!(
            "SHA1".parse::<ChecksumAlgorithm>().unwrap(),
            ChecksumAlgorithm::SHA1
        );
        assert_eq!(
            "sha256".parse::<ChecksumAlgorithm>().unwrap(),
            ChecksumAlgorithm::SHA256
        );
        assert!("invalid".parse::<ChecksumAlgorithm>().is_err());
    }

    #[test]
    fn test_compute_checksum() {
        let data = b"hello world";

        let crc32_result = compute_checksum(ChecksumAlgorithm::CRC32, data);
        assert!(!crc32_result.is_empty());

        let crc32c_result = compute_checksum(ChecksumAlgorithm::CRC32C, data);
        assert!(!crc32c_result.is_empty());

        let sha1_result = compute_checksum(ChecksumAlgorithm::SHA1, data);
        assert!(!sha1_result.is_empty());

        let sha256_result = compute_checksum(ChecksumAlgorithm::SHA256, data);
        assert!(!sha256_result.is_empty());

        assert_ne!(crc32_result, crc32c_result);
        assert_ne!(sha1_result, sha256_result);
    }

    #[test]
    fn test_parse_bool_true() {
        assert!(parse_bool("true").unwrap());
        assert!(parse_bool("True").unwrap());
        assert!(parse_bool("TRUE").unwrap());
        assert!(parse_bool("TrUe").unwrap());
    }

    #[test]
    fn test_parse_bool_false() {
        assert!(!parse_bool("false").unwrap());
        assert!(!parse_bool("False").unwrap());
        assert!(!parse_bool("FALSE").unwrap());
        assert!(!parse_bool("FaLsE").unwrap());
    }

    #[test]
    fn test_parse_bool_invalid() {
        assert!(parse_bool("yes").is_err());
        assert!(parse_bool("no").is_err());
        assert!(parse_bool("1").is_err());
        assert!(parse_bool("0").is_err());
        assert!(parse_bool("").is_err());
    }

    #[test]
    fn test_match_hostname_valid() {
        assert!(match_hostname("example.com"));
        assert!(match_hostname("sub.example.com"));
        assert!(match_hostname("my-server"));
        assert!(match_hostname("server123"));
        assert!(match_hostname("a.b.c.d.example.com"));
    }

    #[test]
    fn test_match_hostname_invalid() {
        assert!(!match_hostname("-invalid"));
        assert!(!match_hostname("invalid-"));
        assert!(!match_hostname("_invalid"));
        assert!(!match_hostname("invalid_"));
        assert!(!match_hostname("in..valid"));
    }

    #[test]
    fn test_check_bucket_name_valid() {
        assert!(check_bucket_name("mybucket", false).is_ok());
        assert!(check_bucket_name("my-bucket", true).is_ok());
        assert!(check_bucket_name("my.bucket", true).is_ok());
        assert!(check_bucket_name("bucket123", false).is_ok());
        assert!(check_bucket_name("abc", false).is_ok());
    }

    #[test]
    fn test_check_bucket_name_empty() {
        assert!(check_bucket_name("", false).is_err());
        assert!(check_bucket_name("  ", false).is_err());
    }

    #[test]
    fn test_check_bucket_name_too_short() {
        assert!(check_bucket_name("ab", false).is_err());
        assert!(check_bucket_name("a", false).is_err());
    }

    #[test]
    fn test_check_bucket_name_too_long() {
        let long_name = "a".repeat(64);
        assert!(check_bucket_name(&long_name, false).is_err());
    }

    #[test]
    fn test_check_bucket_name_ip_address() {
        assert!(check_bucket_name("192.168.1.1", false).is_err());
        assert!(check_bucket_name("10.0.0.1", false).is_err());
    }

    #[test]
    fn test_check_bucket_name_invalid_successive_chars() {
        assert!(check_bucket_name("my..bucket", false).is_err());
        assert!(check_bucket_name("my.-bucket", false).is_err());
        assert!(check_bucket_name("my-.bucket", false).is_err());
    }

    #[test]
    fn test_check_bucket_name_strict() {
        // Uppercase not allowed in strict mode
        assert!(check_bucket_name("My-Bucket", false).is_ok());
        assert!(check_bucket_name("My-Bucket", true).is_err());
        // Underscore not allowed in strict mode
        assert!(check_bucket_name("my_bucket", false).is_ok());
        assert!(check_bucket_name("my_bucket", true).is_err());
        // Reserved prefixes not allowed in strict mode
        assert!(check_bucket_name("xn--bucket", false).is_ok());
        assert!(check_bucket_name("xn--bucket", true).is_err());
        assert!(check_bucket_name("sthree-bucket", false).is_ok());
        assert!(check_bucket_name("sthree-bucket", true).is_err());
        // Reserved suffix not allowed in strict mode
        assert!(check_bucket_name("bucket-s3alias", false).is_ok());
        assert!(check_bucket_name("bucket-s3alias", true).is_err());
        // Valid strict names
        assert!(check_bucket_name("my-bucket", true).is_ok());
        assert!(check_bucket_name("bucket123", true).is_ok());
        assert!(check_bucket_name("my.bucket.name", true).is_ok());
    }

    #[test]
    fn test_check_object_name_valid() {
        assert!(check_object_name("myobject").is_ok());
        assert!(check_object_name("my/object/path").is_ok());
        assert!(check_object_name("object-with-dashes").is_ok());
        assert!(check_object_name("a").is_ok());
    }

    #[test]
    fn test_check_object_name_empty() {
        assert!(check_object_name("").is_err());
    }

    #[test]
    fn test_check_object_name_too_long() {
        let long_name = "a".repeat(1025);
        assert!(check_object_name(&long_name).is_err());
    }

    #[test]
    fn test_trim_quotes() {
        assert_eq!(trim_quotes("\"hello\"".to_string()), "hello");
        assert_eq!(trim_quotes("\"\"".to_string()), "");
        assert_eq!(trim_quotes("hello".to_string()), "hello");
        assert_eq!(trim_quotes("\"hello".to_string()), "\"hello");
        assert_eq!(trim_quotes("hello\"".to_string()), "hello\"");
        assert_eq!(trim_quotes("\"".to_string()), "\"");
    }

    #[test]
    fn test_copy_slice() {
        let src = [1, 2, 3, 4, 5];
        let mut dst = [0; 5];
        let copied = copy_slice(&mut dst, &src);
        assert_eq!(copied, 5);
        assert_eq!(dst, [1, 2, 3, 4, 5]);
    }

    #[test]
    fn test_copy_slice_partial() {
        let src = [1, 2, 3, 4, 5];
        let mut dst = [0; 3];
        let copied = copy_slice(&mut dst, &src);
        assert_eq!(copied, 3);
        assert_eq!(dst, [1, 2, 3]);
    }

    #[test]
    fn test_copy_slice_empty() {
        let src: [u8; 0] = [];
        let mut dst: [u8; 0] = [];
        let copied = copy_slice(&mut dst, &src);
        assert_eq!(copied, 0);
    }

    #[test]
    fn test_encode_tags() {
        let mut tags = HashMap::new();
        tags.insert("key1".to_string(), "value1".to_string());
        tags.insert("key2".to_string(), "value2".to_string());
        let encoded = encode_tags(&tags);
        assert!(encoded.contains("key1=value1"));
        assert!(encoded.contains("key2=value2"));
    }

    #[test]
    fn test_encode_tags_special_chars() {
        let mut tags = HashMap::new();
        tags.insert("key with spaces".to_string(), "value&special".to_string());
        let encoded = encode_tags(&tags);
        assert!(encoded.contains("key%20with%20spaces=value%26special"));
    }

    #[test]
    fn test_parse_tags() {
        let tags = parse_tags("key1=value1&key2=value2").unwrap();
        assert_eq!(tags.get("key1"), Some(&"value1".to_string()));
        assert_eq!(tags.get("key2"), Some(&"value2".to_string()));
    }

    #[test]
    fn test_parse_tags_encoded() {
        let tags = parse_tags("key%20one=value%26special").unwrap();
        assert_eq!(tags.get("key one"), Some(&"value&special".to_string()));
    }

    #[test]
    fn test_parse_tags_empty_value() {
        let tags = parse_tags("key1=&key2=value2").unwrap();
        assert_eq!(tags.get("key1"), Some(&"".to_string()));
        assert_eq!(tags.get("key2"), Some(&"value2".to_string()));
    }

    #[test]
    fn test_parse_tags_no_value() {
        let tags = parse_tags("key1&key2=value2").unwrap();
        assert_eq!(tags.get("key1"), Some(&"".to_string()));
        assert_eq!(tags.get("key2"), Some(&"value2".to_string()));
    }

    #[test]
    fn test_parse_tags_too_many_equals() {
        assert!(parse_tags("key1=value1=extra").is_err());
    }

    #[test]
    fn test_urlencode_object_key() {
        assert_eq!(urlencode_object_key("file.txt"), "file.txt");
        assert_eq!(urlencode_object_key("my/path/file.txt"), "my/path/file.txt");
        assert_eq!(urlencode_object_key("file name.txt"), "file%20name.txt");
        assert_eq!(urlencode_object_key("special&chars"), "special%26chars");
    }

    #[test]
    fn test_insert_multimap() {
        let result = insert(None, "key1");
        assert!(result.contains_key("key1"));
        assert_eq!(result.get_vec("key1"), Some(&vec!["".to_string()]));

        let mut existing = Multimap::new();
        existing.insert("existing".to_string(), "value".to_string());
        let result = insert(Some(existing), "key2");
        assert_eq!(result.get_vec("existing"), Some(&vec!["value".to_string()]));
        assert_eq!(result.get_vec("key2"), Some(&vec!["".to_string()]));
    }

    #[test]
    fn test_to_signer_date() {
        let time = from_iso8601utc("2024-01-15T10:30:45.000Z").unwrap();
        assert_eq!(to_signer_date(time), "20240115");
    }

    #[test]
    fn test_to_amz_date() {
        let time = from_iso8601utc("2024-01-15T10:30:45.000Z").unwrap();
        assert_eq!(to_amz_date(time), "20240115T103045Z");
    }

    #[test]
    fn test_to_iso8601utc() {
        let time = from_iso8601utc("2024-01-15T10:30:45.123Z").unwrap();
        let result = to_iso8601utc(time);
        assert!(result.starts_with("2024-01-15T10:30:45"));
    }

    #[test]
    fn test_from_iso8601utc_with_millis() {
        let result = from_iso8601utc("2024-01-15T10:30:45.123Z");
        assert!(result.is_ok());
        let time = result.unwrap();
        assert_eq!(time.year(), 2024);
        assert_eq!(time.month(), 1);
        assert_eq!(time.day(), 15);
    }

    #[test]
    fn test_from_iso8601utc_without_millis() {
        let result = from_iso8601utc("2024-01-15T10:30:45Z");
        assert!(result.is_ok());
        let time = result.unwrap();
        assert_eq!(time.year(), 2024);
    }

    #[test]
    fn test_from_iso8601utc_invalid() {
        assert!(from_iso8601utc("invalid").is_err());
        assert!(from_iso8601utc("2024-13-45T25:70:80Z").is_err());
    }

    #[test]
    fn test_from_http_header_value_edge_cases() {
        let result = from_http_header_value("Mon, 15 Jan 2024 10:30:45 GMT");
        assert!(result.is_ok());
    }

    #[test]
    fn test_from_http_header_value_invalid_format() {
        assert!(from_http_header_value("invalid").is_err());
    }

    #[test]
    fn test_match_region_basic() {
        let _result = match_region("us-east-1");
        // TODO consider fixing or removing this test
        // Test that match_region returns a boolean (always true)
    }

    #[test]
    fn test_check_ssec_valid_length() {
        let key_32_bytes = vec![0u8; 32];
        let key_64_encoded = b64_encode(&key_32_bytes);
        assert!(!key_64_encoded.is_empty());
    }

    #[test]
    fn test_get_text_default() {
        let xml_str = r#"<root><name>test</name></root>"#;
        let root = xmltree::Element::parse(xml_str.as_bytes()).unwrap();
        let value = get_text_default(&root, "name");
        assert_eq!(value, "test");
    }

    #[test]
    fn test_get_text_default_missing() {
        let xml_str = r#"<root><other>test</other></root>"#;
        let root = xmltree::Element::parse(xml_str.as_bytes()).unwrap();
        let value = get_text_default(&root, "name");
        assert_eq!(value, "");
    }

    #[test]
    fn test_get_text_option_present() {
        let xml_str = r#"<root><name>test value</name></root>"#;
        let root = xmltree::Element::parse(xml_str.as_bytes()).unwrap();
        let value = get_text_option(&root, "name");
        assert_eq!(value, Some("test value".to_string()));
    }

    #[test]
    fn test_get_text_option_missing() {
        let xml_str = r#"<root><other>test</other></root>"#;
        let root = xmltree::Element::parse(xml_str.as_bytes()).unwrap();
        let value = get_text_option(&root, "name");
        assert_eq!(value, None);
    }

    #[test]
    fn test_get_text_result_present() {
        let xml_str = r#"<root><name>test value</name></root>"#;
        let root = xmltree::Element::parse(xml_str.as_bytes()).unwrap();
        let value = get_text_result(&root, "name");
        assert!(value.is_ok());
        assert_eq!(value.unwrap(), "test value");
    }

    #[test]
    fn test_get_text_result_missing() {
        let xml_str = r#"<root><other>test</other></root>"#;
        let root = xmltree::Element::parse(xml_str.as_bytes()).unwrap();
        let value = get_text_result(&root, "name");
        assert!(value.is_err());
    }

    #[test]
    fn test_insert_multimap_new() {
        let map = insert(None, "key1");
        assert_eq!(map.len(), 1);
    }

    #[test]
    fn test_insert_multimap_existing() {
        let mut map = insert(None, "key1");
        map = insert(Some(map), "key2");
        assert_eq!(map.len(), 2);
    }

    #[test]
    fn test_parse_tags_valid_tags() {
        let tags = parse_tags("key1=value1&key2=value2").unwrap();
        assert_eq!(tags.len(), 2);
    }

    #[test]
    fn test_parse_tags_encoded_values() {
        let tags = parse_tags("Environment=Production").unwrap();
        assert!(!tags.is_empty());
    }

    #[test]
    fn test_url_encode_equals_escape() {
        // Both functions should encode the same way (RFC 3986 unreserved chars)
        let test_cases = [
            "simple",
            "with spaces",
            "special&chars",
            "path/like",
            "equals=sign",
            "plus+sign",
            "asterisk*here",
            "tilde~ok",
            "dash-ok",
            "underscore_ok",
            "dot.ok",
            "mixed Key & Value = test",
        ];

        for input in test_cases {
            assert_eq!(
                url_encode(input),
                escape(input),
                "Encoding mismatch for: {input}"
            );
        }
    }

    #[test]
    fn test_compute_checksum_sb_matches_compute_checksum() {
        // Test data
        let test_data = b"The quick brown fox jumps over the lazy dog";

        // Create SegmentedBytes with multiple segments to test incremental computation
        let mut sb = SegmentedBytes::new();
        sb.append(Bytes::from(&test_data[0..10]));
        sb.append(Bytes::from(&test_data[10..25]));
        sb.append(Bytes::from(&test_data[25..]));
        let sb = Arc::new(sb);

        // Test all algorithms
        for algo in [
            ChecksumAlgorithm::CRC32,
            ChecksumAlgorithm::CRC32C,
            ChecksumAlgorithm::CRC64NVME,
            ChecksumAlgorithm::SHA1,
            ChecksumAlgorithm::SHA256,
        ] {
            let from_bytes = compute_checksum(algo, test_data);
            let from_sb = compute_checksum_sb(algo, &sb);
            assert_eq!(
                from_bytes, from_sb,
                "Mismatch for {:?}: bytes='{}' vs sb='{}'",
                algo, from_bytes, from_sb
            );
        }
    }
}

/// Gets base64-encoded MD5 hash of given data.
pub fn md5sum_hash(data: &[u8]) -> String {
    b64_encode(md5::compute(data).as_slice())
}

/// Gets current UTC time.
pub fn utc_now() -> UtcTime {
    chrono::offset::Utc::now()
}

/// Gets signer date value of given time.
pub fn to_signer_date(time: UtcTime) -> String {
    time.format("%Y%m%d").to_string()
}

/// Gets AMZ date value of given time.
pub fn to_amz_date(time: UtcTime) -> String {
    time.format("%Y%m%dT%H%M%SZ").to_string()
}

/// Gets HTTP header value of given time.
pub fn to_http_header_value(time: UtcTime) -> String {
    format!(
        "{}, {} {} {} GMT",
        time.weekday(),
        time.day(),
        match time.month() {
            1 => "Jan",
            2 => "Feb",
            3 => "Mar",
            4 => "Apr",
            5 => "May",
            6 => "Jun",
            7 => "Jul",
            8 => "Aug",
            9 => "Sep",
            10 => "Oct",
            11 => "Nov",
            12 => "Dec",
            _ => "",
        },
        time.format("%Y %H:%M:%S")
    )
}

/// Gets ISO8601 UTC formatted value of given time.
pub fn to_iso8601utc(time: UtcTime) -> String {
    time.format("%Y-%m-%dT%H:%M:%S.%3fZ").to_string()
}

/// Parses ISO8601 UTC formatted value to time.
pub fn from_iso8601utc(s: &str) -> Result<UtcTime, ValidationErr> {
    let dt = NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S.%3fZ")
        .or_else(|_| NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%SZ"))?;
    Ok(DateTime::<Utc>::from_naive_utc_and_offset(dt, Utc))
}

const OBJECT_KEY_ENCODE_SET: &AsciiSet = &NON_ALPHANUMERIC
    .remove(b'-')
    .remove(b'_')
    .remove(b'.')
    .remove(b'~')
    .remove(b'/');

pub fn urlencode_object_key(key: &str) -> String {
    utf8_percent_encode(key, OBJECT_KEY_ENCODE_SET).collect()
}

pub mod aws_date_format {
    use super::{UtcTime, from_iso8601utc, to_iso8601utc};
    use serde::{Deserialize, Deserializer, Serializer};

    pub fn serialize<S>(date: &UtcTime, serializer: S) -> Result<S::Ok, S::Error>
    where
        S: Serializer,
    {
        serializer.serialize_str(&to_iso8601utc(*date))
    }

    pub fn deserialize<'de, D>(deserializer: D) -> Result<UtcTime, D::Error>
    where
        D: Deserializer<'de>,
    {
        let s = String::deserialize(deserializer)?;
        from_iso8601utc(&s).map_err(serde::de::Error::custom)
    }
}

pub fn parse_bool(value: &str) -> Result<bool, ValidationErr> {
    if value.eq_ignore_ascii_case("true") {
        Ok(true)
    } else if value.eq_ignore_ascii_case("false") {
        Ok(false)
    } else {
        Err(ValidationErr::InvalidBooleanValue(value.to_string()))
    }
}

/// Parses HTTP header value to time.
pub fn from_http_header_value(s: &str) -> Result<UtcTime, ValidationErr> {
    let dt = NaiveDateTime::parse_from_str(s, "%a, %d %b %Y %H:%M:%S GMT")?;
    Ok(DateTime::<Utc>::from_naive_utc_and_offset(dt, Utc))
}

/// Checks if given hostname is valid or not.
pub fn match_hostname(value: &str) -> bool {
    lazy_static! {
        static ref HOSTNAME_REGEX: Regex =
            Regex::new(r"^([a-z_\d-]{1,63}\.)*([a-z_\d-]{1,63})$").unwrap();
    }

    if !HOSTNAME_REGEX.is_match(value.to_lowercase().as_str()) {
        return false;
    }

    for token in value.split('.') {
        if token.starts_with('-')
            || token.starts_with('_')
            || token.ends_with('-')
            || token.ends_with('_')
        {
            return false;
        }
    }

    true
}

/// Checks if given region is valid or not.
pub fn match_region(value: &str) -> bool {
    lazy_static! {
        static ref REGION_REGEX: Regex = Regex::new(r"^([a-z_\d-]{1,63})$").unwrap();
    }

    !REGION_REGEX.is_match(value.to_lowercase().as_str())
        || value.starts_with('-')
        || value.starts_with('_')
        || value.ends_with('-')
        || value.ends_with('_')
}

/// Validates given bucket name.
// TODO: S3Express has slightly different rules for bucket names
pub fn check_bucket_name(bucket: impl AsRef<str>, strict: bool) -> Result<(), ValidationErr> {
    let bucket: &str = bucket.as_ref().trim();
    let bucket_len = bucket.len();
    if bucket_len == 0 {
        return Err(ValidationErr::InvalidBucketName {
            name: "".into(),
            reason: "bucket name cannot be empty".into(),
        });
    }
    if bucket_len < 3 {
        return Err(ValidationErr::InvalidBucketName {
            name: bucket.into(),
            reason: "bucket name cannot be less than 3 characters".into(),
        });
    }
    if bucket_len > 63 {
        return Err(ValidationErr::InvalidBucketName {
            name: bucket.into(),
            reason: "bucket name cannot be greater than 63 characters".into(),
        });
    }

    lazy_static! {
    static ref IPV4_REGEX: Regex = Regex::new(r"^((25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9])\.){3}(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9])$").unwrap();
        static ref VALID_BUCKET_NAME_REGEX: Regex =
            Regex::new("^[A-Za-z0-9][A-Za-z0-9\\.\\-_:]{1,61}[A-Za-z0-9]$").unwrap();
        static ref VALID_BUCKET_NAME_STRICT_REGEX: Regex =
            Regex::new("^[a-z0-9][a-z0-9\\.\\-]{1,61}[a-z0-9]$").unwrap();
    }

    if IPV4_REGEX.is_match(bucket) {
        return Err(ValidationErr::InvalidBucketName {
            name: bucket.into(),
            reason: "bucket name cannot be an IP address".into(),
        });
    }

    if bucket.contains("..") || bucket.contains(".-") || bucket.contains("-.") {
        return Err(ValidationErr::InvalidBucketName {
            name: bucket.into(),
            reason: "bucket name contains invalid successive characters '..', '.-' or '-.'".into(),
        });
    }

    if strict {
        if !VALID_BUCKET_NAME_STRICT_REGEX.is_match(bucket) {
            return Err(ValidationErr::InvalidBucketName {
                name: bucket.into(),
                reason: format!(
                    "bucket name does not follow S3 standards strictly, according to {}",
                    *VALID_BUCKET_NAME_STRICT_REGEX
                ),
            });
        }
        // AWS reserved prefixes and suffixes
        if bucket.starts_with("xn--") {
            return Err(ValidationErr::InvalidBucketName {
                name: bucket.into(),
                reason: "bucket name cannot start with 'xn--' (reserved for IDN)".into(),
            });
        }
        if bucket.starts_with("sthree-") {
            return Err(ValidationErr::InvalidBucketName {
                name: bucket.into(),
                reason: "bucket name cannot start with 'sthree-' (reserved by AWS)".into(),
            });
        }
        if bucket.ends_with("-s3alias") {
            return Err(ValidationErr::InvalidBucketName {
                name: bucket.into(),
                reason: "bucket name cannot end with '-s3alias' (reserved for S3 Access Points)"
                    .into(),
            });
        }
    } else if !VALID_BUCKET_NAME_REGEX.is_match(bucket) {
        return Err(ValidationErr::InvalidBucketName {
            name: bucket.into(),
            reason: format!(
                "bucket name does not follow S3 standards, according to {}",
                *VALID_BUCKET_NAME_REGEX
            ),
        });
    }

    Ok(())
}

/// Validates given object name.
// TODO: S3Express has slightly different rules for object names
pub fn check_object_name(object: impl AsRef<str>) -> Result<(), ValidationErr> {
    let object: &str = object.as_ref();
    match object.len() {
        0 => Err(ValidationErr::InvalidObjectName(
            "object name cannot be empty".into(),
        )),
        n if n > 1024 => Err(ValidationErr::InvalidObjectName(format!(
            "Object name ('{object}') cannot be greater than 1024 bytes"
        ))),
        _ => Ok(()),
    }
}

/// Validates SSE (Server-Side Encryption) settings.
pub fn check_sse(sse: &Option<Arc<dyn Sse>>, client: &MinioClient) -> Result<(), ValidationErr> {
    if let Some(v) = &sse
        && v.tls_required()
        && !client.is_secure()
    {
        return Err(ValidationErr::SseTlsRequired(None));
    }
    Ok(())
}

/// Validates SSE-C (Server-Side Encryption with Customer-Provided Keys) settings.
pub fn check_ssec(
    ssec: &Option<SseCustomerKey>,
    client: &MinioClient,
) -> Result<(), ValidationErr> {
    if ssec.is_some() && !client.is_secure() {
        return Err(ValidationErr::SseTlsRequired(None));
    }
    Ok(())
}

/// Validates SSE-C (Server-Side Encryption with Customer-Provided Keys) settings and logs an error.
pub fn check_ssec_with_log(
    ssec: &Option<SseCustomerKey>,
    client: &MinioClient,
    bucket: &BucketName,
    object: &ObjectKey,
    version: &Option<String>,
) -> Result<(), ValidationErr> {
    if ssec.is_some() && !client.is_secure() {
        return Err(ValidationErr::SseTlsRequired(Some(format!(
            "source {bucket}/{object}{}: ",
            version
                .as_ref()
                .map_or(String::new(), |v| String::from("?versionId=") + v)
        ))));
    }
    Ok(())
}

/// Gets default text value of given XML element for given tag.
pub fn get_text_default(element: &Element, tag: &str) -> String {
    element.get_child(tag).map_or(String::new(), |v| {
        v.get_text().unwrap_or_default().to_string()
    })
}

/// Gets text value of given XML element for given tag.
pub fn get_text_result(element: &Element, tag: &str) -> Result<String, ValidationErr> {
    Ok(element
        .get_child(tag)
        .ok_or(ValidationErr::xml_error(format!("<{tag}> tag not found")))?
        .get_text()
        .ok_or(ValidationErr::xml_error(format!(
            "text of <{tag}> tag not found"
        )))?
        .to_string())
}

/// Gets optional text value of given XML element for given tag.
pub fn get_text_option(element: &Element, tag: &str) -> Option<String> {
    element
        .get_child(tag)
        .and_then(|v| v.get_text().map(|s| s.to_string()))
}

/// Trims leading and trailing quotes from a string.
///
/// Takes ownership of and potentially modifies the input string in place
/// (via `drain` and `pop`). The original string is not preserved.
pub fn trim_quotes(mut s: String) -> String {
    if s.len() >= 2 && s.starts_with('"') && s.ends_with('"') {
        s.drain(0..1); // remove the leading quote
        s.pop(); // remove the trailing quote
    }
    s
}

/// Copies source byte slice into destination byte slice.
pub fn copy_slice(dst: &mut [u8], src: &[u8]) -> usize {
    let mut c = 0;
    for (d, s) in dst.iter_mut().zip(src.iter()) {
        *d = *s;
        c += 1;
    }
    c
}

// Characters to escape in query strings. Based on RFC 3986 and the golang
// net/url implementation used in the MinIO server.
//
// https://tools.ietf.org/html/rfc3986
//
// 1. All non-ascii characters are escaped always.
// 2. All reserved characters are escaped.
// 3. Any other characters are not escaped.
//
// Unreserved characters in addition to alphanumeric characters are: '-', '_',
// '.', '~' (§2.3 Unreserved characters (mark))
//
// Reserved characters for query strings: '$', '&', '+', ',', '/', ':', ';',
// '=', '?', '@' (§3.4)
//
// NON_ALPHANUMERIC already escapes everything non-alphanumeric (it includes all
// the reserved characters). So we only remove the unreserved characters from
// this set.
const QUERY_ESCAPE: &AsciiSet = &NON_ALPHANUMERIC
    .remove(b'-')
    .remove(b'_')
    .remove(b'.')
    .remove(b'~');

fn unescape(s: &str) -> Result<String, ValidationErr> {
    percent_decode_str(s)
        .decode_utf8()
        .map_err(|e| ValidationErr::TagDecodingError {
            input: s.to_string(),
            error_message: e.to_string(),
        })
        .map(|s| s.to_string())
}

fn escape(s: &str) -> String {
    utf8_percent_encode(s, QUERY_ESCAPE).collect()
}

/// Encodes tags as URL-encoded query parameters for the `x-amz-tagging` header.
///
/// Handles escaping compatible with MinIO server and AWS S3.
pub fn encode_tags(h: &HashMap<String, String>) -> String {
    let mut tags = Vec::with_capacity(h.len());
    for (k, v) in h {
        tags.push(format!("{}={}", escape(k), escape(v)));
    }
    tags.join("&")
}

pub fn parse_tags(s: &str) -> Result<HashMap<String, String>, ValidationErr> {
    let mut tags = HashMap::new();
    for tag in s.split('&') {
        let mut kv = tag.split('=');
        let k = match kv.next() {
            Some(v) => unescape(v)?,
            None => {
                return Err(ValidationErr::TagDecodingError {
                    input: s.into(),
                    error_message: "tag key was empty".into(),
                });
            }
        };
        let v = match kv.next() {
            Some(v) => unescape(v)?,
            None => "".to_owned(),
        };
        if kv.next().is_some() {
            return Err(ValidationErr::TagDecodingError {
                input: s.into(),
                error_message: "tag had too many values for a key".into(),
            });
        }
        tags.insert(k, v);
    }
    Ok(tags)
}

/// Returns the consumed data and inserts a key into it with an empty value.
#[must_use]
pub fn insert(data: Option<Multimap>, key: impl Into<String>) -> Multimap {
    let mut result: Multimap = data.unwrap_or_default();
    result.insert(key.into(), String::new());
    result
}

pub mod xml {
    use crate::s3::error::ValidationErr;
    use std::collections::HashMap;

    #[derive(Debug, Clone)]
    struct XmlElementIndex {
        children: HashMap<String, Vec<usize>>,
    }

    impl XmlElementIndex {
        fn get_first(&self, tag: &str) -> Option<usize> {
            let tag: String = tag.to_string();
            let is = self.children.get(&tag)?;
            is.first().copied()
        }

        fn get(&self, tag: &str) -> Option<&Vec<usize>> {
            let tag: String = tag.to_string();
            self.children.get(&tag)
        }
    }

    impl From<&xmltree::Element> for XmlElementIndex {
        fn from(value: &xmltree::Element) -> Self {
            let mut children = HashMap::new();
            for (i, e) in value
                .children
                .iter()
                .enumerate()
                .filter_map(|(i, v)| v.as_element().map(|e| (i, e)))
            {
                children
                    .entry(e.name.clone())
                    .or_insert_with(Vec::new)
                    .push(i);
            }
            Self { children }
        }
    }

    #[derive(Debug, Clone)]
    pub struct Element<'a> {
        inner: &'a xmltree::Element,
        child_element_index: XmlElementIndex,
    }

    impl<'a> From<&'a xmltree::Element> for Element<'a> {
        fn from(value: &'a xmltree::Element) -> Self {
            let element_index = XmlElementIndex::from(value);
            Self {
                inner: value,
                child_element_index: element_index,
            }
        }
    }

    impl Element<'_> {
        pub fn name(&self) -> &str {
            &self.inner.name
        }

        pub fn get_child_text(&self, tag: &str) -> Option<String> {
            let index = self.child_element_index.get_first(tag)?;
            self.inner.children[index]
                .as_element()?
                .get_text()
                .map(|v| v.to_string())
        }

        pub fn get_child_text_or_error(&self, tag: &str) -> Result<String, ValidationErr> {
            let i = self
                .child_element_index
                .get_first(tag)
                .ok_or(ValidationErr::xml_error(format!("<{tag}> tag not found")))?;
            self.inner.children[i]
                .as_element()
                .unwrap()
                .get_text()
                .map(|x| x.to_string())
                .ok_or(ValidationErr::xml_error(format!(
                    "text of <{tag}> tag not found"
                )))
        }

        // Returns all children with given tag along with their index.
        pub fn get_matching_children(&self, tag: &str) -> Vec<(usize, Element<'_>)> {
            self.child_element_index
                .get(tag)
                .unwrap_or(&vec![])
                .iter()
                .map(|i| (*i, self.inner.children[*i].as_element().unwrap().into()))
                .collect()
        }

        pub fn get_child(&self, tag: &str) -> Option<Element<'_>> {
            let index = self.child_element_index.get_first(tag)?;
            Some(self.inner.children[index].as_element()?.into())
        }

        pub fn get_xmltree_children(&self) -> Vec<&xmltree::Element> {
            self.inner
                .children
                .iter()
                .filter_map(|v| v.as_element())
                .collect()
        }
    }

    // Helper type that implements merge sort in the iterator.
    pub struct MergeXmlElements<'a> {
        v1: &'a Vec<(usize, Element<'a>)>,
        v2: &'a Vec<(usize, Element<'a>)>,
        i1: usize,
        i2: usize,
    }

    impl<'a> MergeXmlElements<'a> {
        pub fn new(v1: &'a Vec<(usize, Element<'a>)>, v2: &'a Vec<(usize, Element<'a>)>) -> Self {
            Self {
                v1,
                v2,
                i1: 0,
                i2: 0,
            }
        }
    }

    impl<'a> Iterator for MergeXmlElements<'a> {
        type Item = &'a Element<'a>;

        fn next(&mut self) -> Option<Self::Item> {
            let c1 = self.v1.get(self.i1);
            let c2 = self.v2.get(self.i2);
            match (c1, c2) {
                (Some(val1), Some(val2)) => {
                    if val1.0 < val2.0 {
                        self.i1 += 1;
                        Some(&val1.1)
                    } else {
                        self.i2 += 1;
                        Some(&val2.1)
                    }
                }
                (Some(val1), None) => {
                    self.i1 += 1;
                    Some(&val1.1)
                }
                (None, Some(val2)) => {
                    self.i2 += 1;
                    Some(&val2.1)
                }
                (None, None) => None,
            }
        }
    }
}