clock-hash 1.0.0

//! Integration tests for SIMD operations
//!
//! Tests full hash computation pipelines using both SIMD and scalar implementations
//! to ensure end-to-end correctness.

#[cfg(test)]
mod integration_tests {
    extern crate alloc;
    use crate::padding::{BLOCK_SIZE, pad_message_in_place};
    use crate::simd::dispatch::*;
    use crate::simd::scalar::*;
    use alloc::vec::Vec;

    /// Full hash computation using scalar implementation for comparison
    fn scalar_hash_compute(input: &[u8]) -> [u64; 8] {
        use crate::constants::IV;
        use crate::padding::{BLOCK_SIZE, pad_message_in_place};

        let mut state = IV;
        let mut buffer = [0u8; 128];
        let mut buffer_len = 0;
        let mut message_len = input.len() as u64;
        let mut input_offset = 0;

        // Process input data
        while input_offset < input.len() {
            let remaining = input.len() - input_offset;
            let to_copy = core::cmp::min(BLOCK_SIZE - buffer_len, remaining);

            buffer[buffer_len..buffer_len + to_copy]
                .copy_from_slice(&input[input_offset..input_offset + to_copy]);
            buffer_len += to_copy;
            input_offset += to_copy;

            if buffer_len == BLOCK_SIZE {
                process_block_simd_scalar(&buffer, &mut state);
                buffer_len = 0;
            }
        }

        // Process final block with padding
        if buffer_len > 0 || message_len == 0 {
            // Apply padding
            pad_message_in_place(&mut buffer, buffer_len, 0, message_len as usize);

            // Process padded block(s)
            let padded_len = crate::padding::padded_length(buffer_len);
            let mut offset = 0;
            while offset < padded_len {
                let mut block = [0u8; BLOCK_SIZE];
                let chunk_size = core::cmp::min(BLOCK_SIZE, padded_len - offset);
                block[..chunk_size].copy_from_slice(&buffer[offset..offset + chunk_size]);

                process_block_simd_scalar(&block, &mut state);
                offset += BLOCK_SIZE;
            }
        }

        // Finalization: XOR state with IV
        for i in 0..8 {
            state[i] ^= IV[i];
        }

        state
    }

    /// Full hash computation using SIMD implementation
    fn simd_hash_compute(input: &[u8]) -> [u64; 8] {
        use crate::constants::IV;
        use crate::padding::{BLOCK_SIZE, pad_message_in_place};

        let mut state = IV;
        let mut buffer = [0u8; 128];
        let mut buffer_len = 0;
        let mut message_len = input.len() as u64;
        let mut input_offset = 0;

        // Process input data
        while input_offset < input.len() {
            let remaining = input.len() - input_offset;
            let to_copy = core::cmp::min(BLOCK_SIZE - buffer_len, remaining);

            buffer[buffer_len..buffer_len + to_copy]
                .copy_from_slice(&input[input_offset..input_offset + to_copy]);
            buffer_len += to_copy;
            input_offset += to_copy;

            if buffer_len == BLOCK_SIZE {
                process_block_simd(&buffer, &mut state);
                buffer_len = 0;
            }
        }

        // Process final block with padding
        if buffer_len > 0 || message_len == 0 {
            // Apply padding
            pad_message_in_place(&mut buffer, buffer_len, 0, message_len as usize);

            // Process padded block(s)
            let padded_len = crate::padding::padded_length(buffer_len);
            let mut offset = 0;
            while offset < padded_len {
                let mut block = [0u8; BLOCK_SIZE];
                let chunk_size = core::cmp::min(BLOCK_SIZE, padded_len - offset);
                block[..chunk_size].copy_from_slice(&buffer[offset..offset + chunk_size]);

                process_block_simd(&block, &mut state);
                offset += BLOCK_SIZE;
            }
        }

        // Finalization: XOR state with IV
        for i in 0..8 {
            state[i] ^= IV[i];
        }

        state
    }

    #[test]
    fn test_empty_input_consistency() {
        let input = &[];
        let scalar_result = scalar_hash_compute(input);
        let simd_result = simd_hash_compute(input);

        assert_eq!(
            scalar_result, simd_result,
            "Empty input should produce consistent results"
        );
    }

    #[test]
    fn test_single_block_consistency() {
        let input = b"Hello, World! This is a test message for ClockHash.";
        let scalar_result = scalar_hash_compute(input);
        let simd_result = simd_hash_compute(input);

        assert_eq!(
            scalar_result, simd_result,
            "Single block input should produce consistent results"
        );
    }

    #[test]
    fn test_multi_block_consistency() {
        // Create input larger than one block
        let mut input = Vec::with_capacity(512);
        for i in 0..512 {
            input.push((i % 256) as u8);
        }

        let scalar_result = scalar_hash_compute(&input);
        let simd_result = simd_hash_compute(&input);

        assert_eq!(
            scalar_result, simd_result,
            "Multi-block input should produce consistent results"
        );
    }

    #[test]
    fn test_exact_block_boundary() {
        // Test input exactly at block boundary
        let input = Vec::from([0xABu8; 128]);
        let scalar_result = scalar_hash_compute(&input);
        let simd_result = simd_hash_compute(&input);

        assert_eq!(
            scalar_result, simd_result,
            "Exact block boundary should produce consistent results"
        );
    }

    #[test]
    fn test_partial_final_block() {
        // Test input that doesn't fill the final block completely
        let input = Vec::from([0xCDu8; 100]); // 100 bytes, not full 128-byte block
        let scalar_result = scalar_hash_compute(&input);
        let simd_result = simd_hash_compute(&input);

        assert_eq!(
            scalar_result, simd_result,
            "Partial final block should produce consistent results"
        );
    }

    #[test]
    fn test_large_input_consistency() {
        // Test with a larger input to stress test the implementation
        let mut input = Vec::with_capacity(2048);
        for i in 0..2048 {
            input.push(((i * 7 + 13) % 256) as u8);
        }

        let scalar_result = scalar_hash_compute(&input);
        let simd_result = simd_hash_compute(&input);

        assert_eq!(
            scalar_result, simd_result,
            "Large input should produce consistent results"
        );
    }

    #[test]
    fn test_known_answer_test() {
        // Known answer test - specific input should produce specific output
        let input = b"The quick brown fox jumps over the lazy dog";
        let expected = scalar_hash_compute(input);

        // SIMD should produce the same result
        let actual = simd_hash_compute(input);
        assert_eq!(actual, expected, "SIMD should match scalar known answer");
    }

    #[test]
    fn test_incremental_vs_batch_consistency() {
        let data = b"This is a test message that will be processed incrementally";

        // Process in one batch
        let batch_result = scalar_hash_compute(data);

        // Process incrementally using proper incremental logic
        let mut state = crate::constants::IV;
        let mut buffer = [0u8; 128];
        let mut buffer_len = 0;
        let mut message_len = 0u64;
        let chunk_size = 32; // Process in 32-byte chunks
        let mut offset = 0;

        while offset < data.len() {
            let remaining = data.len() - offset;
            let current_chunk = core::cmp::min(chunk_size, remaining);

            // Add chunk to buffer (like ClockHasher.update does)
            let chunk_data = &data[offset..offset + current_chunk];
            message_len = message_len.wrapping_add(chunk_data.len() as u64);

            let mut chunk_offset = 0;

            // If we have buffered data, try to complete a block
            if buffer_len > 0 {
                let needed = 128 - buffer_len;
                let to_copy = core::cmp::min(needed, chunk_data.len());

                buffer[buffer_len..buffer_len + to_copy].copy_from_slice(&chunk_data[0..to_copy]);
                buffer_len += to_copy;
                chunk_offset = to_copy;

                // If we completed a block, process it
                if buffer_len == 128 {
                    process_block_simd_scalar(&buffer, &mut state);
                    buffer_len = 0;
                }
            }

            // Process complete blocks from remaining chunk data
            while chunk_offset + 128 <= chunk_data.len() {
                let mut block = [0u8; 128];
                block.copy_from_slice(&chunk_data[chunk_offset..chunk_offset + 128]);
                process_block_simd_scalar(&block, &mut state);
                chunk_offset += 128;
            }

            // Buffer remaining partial chunk
            if chunk_offset < chunk_data.len() {
                let remaining_chunk = chunk_data.len() - chunk_offset;
                buffer[0..remaining_chunk].copy_from_slice(&chunk_data[chunk_offset..]);
                buffer_len = remaining_chunk;
            }

            offset += current_chunk;
        }

        // Process final partial block with proper padding
        if buffer_len > 0 || message_len == 0 {
            // Apply padding
            pad_message_in_place(&mut buffer, buffer_len, 0, message_len as usize);

            // Process padded block(s)
            let padded_len = crate::padding::padded_length(buffer_len);
            let mut block_offset = 0;
            while block_offset < padded_len {
                let mut block = [0u8; BLOCK_SIZE];
                let chunk_size = core::cmp::min(BLOCK_SIZE, padded_len - block_offset);
                block[..chunk_size]
                    .copy_from_slice(&buffer[block_offset..block_offset + chunk_size]);

                process_block_simd_scalar(&block, &mut state);
                block_offset += BLOCK_SIZE;
            }
        }

        // Finalization: XOR state with IV
        for i in 0..8 {
            state[i] ^= crate::constants::IV[i];
        }

        assert_eq!(
            state, batch_result,
            "Incremental processing should match batch processing"
        );
    }

    #[test]
    fn test_simd_incremental_consistency() {
        use crate::constants::IV;
        let data = b"This is a test message that will be processed incrementally with SIMD";

        // Process in one batch with SIMD
        let batch_result = simd_hash_compute(data);

        // Process incrementally with SIMD using proper incremental logic
        let mut state = crate::constants::IV;
        let mut buffer = [0u8; 128];
        let mut buffer_len = 0;
        let mut message_len = 0u64;
        let chunk_size = 47; // Unusual chunk size to test edge cases
        let mut offset = 0;

        while offset < data.len() {
            let remaining = data.len() - offset;
            let current_chunk = core::cmp::min(chunk_size, remaining);

            // Add chunk to buffer (like ClockHasher.update does)
            let chunk_data = &data[offset..offset + current_chunk];
            message_len = message_len.wrapping_add(chunk_data.len() as u64);

            let mut chunk_offset = 0;

            // If we have buffered data, try to complete a block
            if buffer_len > 0 {
                let needed = 128 - buffer_len;
                let to_copy = core::cmp::min(needed, chunk_data.len());

                buffer[buffer_len..buffer_len + to_copy].copy_from_slice(&chunk_data[0..to_copy]);
                buffer_len += to_copy;
                chunk_offset = to_copy;

                // If we completed a block, process it
                if buffer_len == 128 {
                    process_block_simd(&buffer, &mut state);
                    buffer_len = 0;
                }
            }

            // Process complete blocks from remaining chunk data
            while chunk_offset + 128 <= chunk_data.len() {
                let mut block = [0u8; 128];
                block.copy_from_slice(&chunk_data[chunk_offset..chunk_offset + 128]);
                process_block_simd(&block, &mut state);
                chunk_offset += 128;
            }

            // Buffer remaining partial chunk
            if chunk_offset < chunk_data.len() {
                let remaining_chunk = chunk_data.len() - chunk_offset;
                buffer[0..remaining_chunk].copy_from_slice(&chunk_data[chunk_offset..]);
                buffer_len = remaining_chunk;
            }

            offset += current_chunk;
        }

        // Process final partial block with proper padding (same as batch processing)
        if buffer_len > 0 {
            // Apply proper padding
            pad_message_in_place(&mut buffer, buffer_len, 0, message_len as usize);

            // Process padded block(s)
            let padded_len = crate::padding::padded_length(buffer_len);
            let mut offset = 0;
            while offset < padded_len {
                let mut block = [0u8; BLOCK_SIZE];
                let chunk_size = core::cmp::min(BLOCK_SIZE, padded_len - offset);
                block[..chunk_size].copy_from_slice(&buffer[offset..offset + chunk_size]);

                process_block_simd(&block, &mut state);
                offset += BLOCK_SIZE;
            }
        }

        // Finalization: XOR state with IV (same as batch processing)
        for i in 0..8 {
            state[i] ^= IV[i];
        }

        assert_eq!(
            state, batch_result,
            "SIMD incremental processing should match SIMD batch processing"
        );
    }

    #[test]
    fn test_cross_implementation_consistency() {
        // Test that SIMD and scalar produce same results for various inputs
        let zeros_128 = [0u8; 128];
        let ones_128 = [255u8; 128];
        let zeros_256 = [0u8; 256];

        let test_inputs = Vec::from([
            &b""[..],
            &b"A"[..],
            &b"Hello"[..],
            &b"The quick brown fox jumps over the lazy dog"[..],
            &b"Lorem ipsum dolor sit amet, consectetur adipiscing elit."[..],
            &zeros_128[..], // Full block of zeros
            &ones_128[..],  // Full block of ones
            &zeros_256[..], // Multiple blocks
        ]);

        for (i, input) in test_inputs.iter().enumerate() {
            let scalar_result = scalar_hash_compute(input);
            let simd_result = simd_hash_compute(input);

            assert_eq!(
                scalar_result, simd_result,
                "Cross-implementation consistency failed for input {}",
                i
            );
        }
    }

    #[test]
    fn test_padding_consistency() {
        // Test that padding (zero-fill of partial blocks) works correctly
        let input = b"Short";
        let mut buffer1 = [0u8; 128];
        let mut buffer2 = [0u8; 128];

        // Manually prepare buffers with different padding strategies
        buffer1[..input.len()].copy_from_slice(input);
        // buffer1 remaining bytes are already 0

        buffer2[..input.len()].copy_from_slice(input);
        for i in input.len()..128 {
            buffer2[i] = 0; // Explicit zero padding
        }

        let mut state1 = crate::constants::IV;
        let mut state2 = crate::constants::IV;

        process_block_simd_scalar(&buffer1, &mut state1);
        process_block_simd_scalar(&buffer2, &mut state2);

        assert_eq!(
            state1, state2,
            "Different padding strategies should produce same result"
        );

        // Also test with SIMD
        let mut state3 = crate::constants::IV;
        process_block_simd(&buffer1, &mut state3);

        assert_eq!(state1, state3, "SIMD should match scalar padding behavior");
    }

    #[test]
    fn test_state_initialization() {
        // Test that both implementations start with the same IV
        let iv = crate::constants::IV;
        assert_eq!(iv.len(), 8, "IV should have 8 elements");

        // IV should not be all zeros
        assert!(!iv.iter().all(|&x| x == 0), "IV should not be all zeros");
    }

    #[test]
    fn test_different_chunk_sizes() {
        use crate::constants::IV;
        let data = Vec::from([0x42u8; 300]); // 300 bytes

        // Test various chunk sizes
        for &chunk_size in &[1, 7, 13, 31, 47, 64, 97, 128] {
            // Process incrementally using proper incremental logic
            let mut state = crate::constants::IV;
            let mut buffer = [0u8; 128];
            let mut buffer_len = 0;
            let mut message_len = 0u64;
            let mut offset = 0;

            while offset < data.len() {
                let remaining = data.len() - offset;
                let current_chunk = std::cmp::min(chunk_size, remaining);

                // Add chunk to buffer
                let chunk_data = &data[offset..offset + current_chunk];
                message_len = message_len.wrapping_add(chunk_data.len() as u64);

                let mut chunk_offset = 0;

                // If we have buffered data, try to complete a block
                if buffer_len > 0 {
                    let needed = 128 - buffer_len;
                    let to_copy = std::cmp::min(needed, chunk_data.len());

                    buffer[buffer_len..buffer_len + to_copy]
                        .copy_from_slice(&chunk_data[0..to_copy]);
                    buffer_len += to_copy;
                    chunk_offset = to_copy;

                    // If we completed a block, process it
                    if buffer_len == 128 {
                        process_block_simd(&buffer, &mut state);
                        buffer_len = 0;
                    }
                }

                // Process complete blocks from remaining chunk data
                while chunk_offset + 128 <= chunk_data.len() {
                    let mut block = [0u8; 128];
                    block.copy_from_slice(&chunk_data[chunk_offset..chunk_offset + 128]);
                    process_block_simd(&block, &mut state);
                    chunk_offset += 128;
                }

                // Buffer remaining partial chunk
                if chunk_offset < chunk_data.len() {
                    let remaining_chunk = chunk_data.len() - chunk_offset;
                    buffer[0..remaining_chunk].copy_from_slice(&chunk_data[chunk_offset..]);
                    buffer_len = remaining_chunk;
                }

                offset += current_chunk;
            }

            // Process final partial block with proper padding
            if buffer_len > 0 {
                // Apply proper padding
                pad_message_in_place(&mut buffer, buffer_len, 0, message_len as usize);

                // Process padded block(s)
                let padded_len = crate::padding::padded_length(buffer_len);
                let mut offset = 0;
                while offset < padded_len {
                    let mut block = [0u8; BLOCK_SIZE];
                    let chunk_size = core::cmp::min(BLOCK_SIZE, padded_len - offset);
                    block[..chunk_size].copy_from_slice(&buffer[offset..offset + chunk_size]);

                    process_block_simd(&block, &mut state);
                    offset += BLOCK_SIZE;
                }
            }

            // Finalization: XOR state with IV
            for i in 0..8 {
                state[i] ^= IV[i];
            }

            // All chunk sizes should produce the same final state
            let reference_result = scalar_hash_compute(&data);
            assert_eq!(
                state, reference_result,
                "Chunk size {} should produce consistent results",
                chunk_size
            );
        }
    }

    /// Scalar version of process_block_simd for testing
    fn process_block_simd_scalar(block: &[u8; 128], state: &mut [u64; 8]) {
        // Parse block to 16 u64 words (little-endian)
        let mut words = [0u64; 16];
        for i in 0..16 {
            let offset = i * 8;
            words[i] = u64::from_le_bytes([
                block[offset],
                block[offset + 1],
                block[offset + 2],
                block[offset + 3],
                block[offset + 4],
                block[offset + 5],
                block[offset + 6],
                block[offset + 7],
            ]);
        }

        // Apply ClockMix
        scalar_clock_mix(&mut words);

        // Inject into state
        for i in 0..8 {
            state[i] = state[i].wrapping_add(words[i]);
            let rot_idx = (i + 4) % 8;
            state[i] ^= crate::utils::rotl64(state[rot_idx], 17);
        }

        crate::clockpermute::clock_permute(state);
    }
}