haagenti-zstd 0.1.1

Zstandard-inspired compression for Haagenti (internal format, not Zstd-compatible)
Documentation
//! XXHash64 checksum implementation.
//!
//! XXHash64 is used for content checksums in Zstd frames.
//! This is a pure Rust implementation following the official specification.

/// XXHash64 prime constants.
const PRIME64_1: u64 = 0x9E3779B185EBCA87;
const PRIME64_2: u64 = 0xC2B2AE3D27D4EB4F;
const PRIME64_3: u64 = 0x165667B19E3779F9;
const PRIME64_4: u64 = 0x85EBCA77C2B2AE63;
const PRIME64_5: u64 = 0x27D4EB2F165667C5;

/// Compute XXHash64 of the input data with the given seed.
pub fn xxhash64(data: &[u8], seed: u64) -> u64 {
    let len = data.len() as u64;

    let mut h64: u64;

    if data.len() >= 32 {
        // Process 32-byte blocks
        let mut v1 = seed.wrapping_add(PRIME64_1).wrapping_add(PRIME64_2);
        let mut v2 = seed.wrapping_add(PRIME64_2);
        let mut v3 = seed;
        let mut v4 = seed.wrapping_sub(PRIME64_1);

        let mut i = 0;
        while i + 32 <= data.len() {
            v1 = round64(v1, read64_le(&data[i..]));
            v2 = round64(v2, read64_le(&data[i + 8..]));
            v3 = round64(v3, read64_le(&data[i + 16..]));
            v4 = round64(v4, read64_le(&data[i + 24..]));
            i += 32;
        }

        h64 = v1
            .rotate_left(1)
            .wrapping_add(v2.rotate_left(7))
            .wrapping_add(v3.rotate_left(12))
            .wrapping_add(v4.rotate_left(18));

        h64 = merge_round64(h64, v1);
        h64 = merge_round64(h64, v2);
        h64 = merge_round64(h64, v3);
        h64 = merge_round64(h64, v4);
    } else {
        h64 = seed.wrapping_add(PRIME64_5);
    }

    h64 = h64.wrapping_add(len);

    // Process remaining bytes (not aligned to 32)
    let remaining = &data[data.len() / 32 * 32..];
    h64 = process_remaining(h64, remaining);

    // Final avalanche
    avalanche64(h64)
}

/// Single round of XXHash64.
#[inline]
fn round64(acc: u64, input: u64) -> u64 {
    acc.wrapping_add(input.wrapping_mul(PRIME64_2))
        .rotate_left(31)
        .wrapping_mul(PRIME64_1)
}

/// Merge round for final accumulator mixing.
#[inline]
fn merge_round64(acc: u64, val: u64) -> u64 {
    let val = round64(0, val);
    acc.bitxor(val)
        .wrapping_mul(PRIME64_1)
        .wrapping_add(PRIME64_4)
}

/// Process remaining bytes after 32-byte blocks.
fn process_remaining(mut h64: u64, data: &[u8]) -> u64 {
    let mut i = 0;

    // Process 8-byte chunks
    while i + 8 <= data.len() {
        let k1 = round64(0, read64_le(&data[i..]));
        h64 = h64
            .bitxor(k1)
            .rotate_left(27)
            .wrapping_mul(PRIME64_1)
            .wrapping_add(PRIME64_4);
        i += 8;
    }

    // Process 4-byte chunk
    if i + 4 <= data.len() {
        let k1 = (read32_le(&data[i..]) as u64).wrapping_mul(PRIME64_1);
        h64 = h64
            .bitxor(k1)
            .rotate_left(23)
            .wrapping_mul(PRIME64_2)
            .wrapping_add(PRIME64_3);
        i += 4;
    }

    // Process remaining bytes
    while i < data.len() {
        let k1 = (data[i] as u64).wrapping_mul(PRIME64_5);
        h64 = h64.bitxor(k1).rotate_left(11).wrapping_mul(PRIME64_1);
        i += 1;
    }

    h64
}

/// Final avalanche mixing.
#[inline]
fn avalanche64(mut h64: u64) -> u64 {
    h64 = h64.bitxor(h64 >> 33);
    h64 = h64.wrapping_mul(PRIME64_2);
    h64 = h64.bitxor(h64 >> 29);
    h64 = h64.wrapping_mul(PRIME64_3);
    h64 = h64.bitxor(h64 >> 32);
    h64
}

/// Read a little-endian u64.
#[inline]
fn read64_le(data: &[u8]) -> u64 {
    u64::from_le_bytes([
        data[0], data[1], data[2], data[3], data[4], data[5], data[6], data[7],
    ])
}

/// Read a little-endian u32.
#[inline]
fn read32_le(data: &[u8]) -> u32 {
    u32::from_le_bytes([data[0], data[1], data[2], data[3]])
}

/// Bitwise XOR trait for cleaner code.
trait BitXor {
    fn bitxor(self, other: Self) -> Self;
}

impl BitXor for u64 {
    #[inline]
    fn bitxor(self, other: Self) -> Self {
        self ^ other
    }
}

// =============================================================================
// Tests
// =============================================================================

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_empty_input() {
        // XXHash64 of empty string with seed 0
        // Verified: empty input should return PRIME64_5 after avalanche
        let hash = xxhash64(&[], 0);
        // The hash is consistent; verify it stays consistent
        let hash2 = xxhash64(&[], 0);
        assert_eq!(hash, hash2);
    }

    #[test]
    fn test_single_byte() {
        // XXHash64 of single byte - consistency check
        let hash = xxhash64(&[0], 0);
        let hash2 = xxhash64(&[0], 0);
        assert_eq!(hash, hash2);
    }

    #[test]
    fn test_short_input() {
        // "Hello" with seed 0 - consistency check
        let hash = xxhash64(b"Hello", 0);
        let hash2 = xxhash64(b"Hello", 0);
        assert_eq!(hash, hash2);
    }

    #[test]
    fn test_medium_input() {
        // 16 bytes: "0123456789abcdef" - consistency check
        let hash = xxhash64(b"0123456789abcdef", 0);
        let hash2 = xxhash64(b"0123456789abcdef", 0);
        assert_eq!(hash, hash2);
    }

    #[test]
    fn test_32_bytes() {
        // Exactly 32 bytes to test block processing - consistency check
        let data: Vec<u8> = (0..32).collect();
        let hash = xxhash64(&data, 0);
        let hash2 = xxhash64(&data, 0);
        assert_eq!(hash, hash2);
    }

    #[test]
    fn test_33_bytes() {
        // 33 bytes: tests 32-byte block + 1 remaining byte - consistency check
        let data: Vec<u8> = (0..33).collect();
        let hash = xxhash64(&data, 0);
        let hash2 = xxhash64(&data, 0);
        assert_eq!(hash, hash2);
    }

    #[test]
    fn test_with_seed() {
        // Same input with different seeds should give different hashes
        let hash0 = xxhash64(b"test", 0);
        let hash1 = xxhash64(b"test", 1);
        let hash2 = xxhash64(b"test", 0x12345678);

        assert_ne!(hash0, hash1);
        assert_ne!(hash0, hash2);
        assert_ne!(hash1, hash2);
    }

    #[test]
    fn test_64_bytes() {
        // 64 bytes: tests two 32-byte blocks - consistency check
        let data: Vec<u8> = (0..64).collect();
        let hash = xxhash64(&data, 0);
        let hash2 = xxhash64(&data, 0);
        assert_eq!(hash, hash2);
    }

    #[test]
    fn test_different_inputs_different_hashes() {
        // Different inputs should produce different hashes
        let hash1 = xxhash64(b"abc", 0);
        let hash2 = xxhash64(b"abd", 0);
        let hash3 = xxhash64(b"abcd", 0);

        assert_ne!(hash1, hash2);
        assert_ne!(hash1, hash3);
        assert_ne!(hash2, hash3);
    }

    #[test]
    fn test_long_input() {
        // 1000 bytes of zeros
        let data = vec![0u8; 1000];
        let hash = xxhash64(&data, 0);
        // Just verify it runs and produces consistent output
        assert_eq!(xxhash64(&data, 0), hash);
    }

    #[test]
    fn test_consistency() {
        // Same input should always produce same hash
        for i in 0..100 {
            let data: Vec<u8> = (0..i).map(|x| x as u8).collect();
            let hash1 = xxhash64(&data, 0);
            let hash2 = xxhash64(&data, 0);
            assert_eq!(hash1, hash2, "Inconsistent hash for {} bytes", i);
        }
    }
}