crc32_light 0.1.2

Calculate CRC-32 checksum
Documentation
//! Calculate CRC32 checksum for binary data.
//! # Examples
//! ```rs
//! assert_eq!(crc32(b"cat"), 0x9E5E43A8);
//! assert_eq!(crc32(b"dog"), 0x812C397D);
//! ```

// ------------------------------------------------------------------------------

/// CRC32 table (CRC32_POLY = 0xEDB88320)
const CRC32_TABLE: [u32; 256] = [
    0x00000000,0x77073096,0xEE0E612C,0x990951BA,0x076DC419,0x706AF48F,0xE963A535,0x9E6495A3,
    0x0EDB8832,0x79DCB8A4,0xE0D5E91E,0x97D2D988,0x09B64C2B,0x7EB17CBD,0xE7B82D07,0x90BF1D91,
    0x1DB71064,0x6AB020F2,0xF3B97148,0x84BE41DE,0x1ADAD47D,0x6DDDE4EB,0xF4D4B551,0x83D385C7,
    0x136C9856,0x646BA8C0,0xFD62F97A,0x8A65C9EC,0x14015C4F,0x63066CD9,0xFA0F3D63,0x8D080DF5,
    0x3B6E20C8,0x4C69105E,0xD56041E4,0xA2677172,0x3C03E4D1,0x4B04D447,0xD20D85FD,0xA50AB56B,
    0x35B5A8FA,0x42B2986C,0xDBBBC9D6,0xACBCF940,0x32D86CE3,0x45DF5C75,0xDCD60DCF,0xABD13D59,
    0x26D930AC,0x51DE003A,0xC8D75180,0xBFD06116,0x21B4F4B5,0x56B3C423,0xCFBA9599,0xB8BDA50F,
    0x2802B89E,0x5F058808,0xC60CD9B2,0xB10BE924,0x2F6F7C87,0x58684C11,0xC1611DAB,0xB6662D3D,
    0x76DC4190,0x01DB7106,0x98D220BC,0xEFD5102A,0x71B18589,0x06B6B51F,0x9FBFE4A5,0xE8B8D433,
    0x7807C9A2,0x0F00F934,0x9609A88E,0xE10E9818,0x7F6A0DBB,0x086D3D2D,0x91646C97,0xE6635C01,
    0x6B6B51F4,0x1C6C6162,0x856530D8,0xF262004E,0x6C0695ED,0x1B01A57B,0x8208F4C1,0xF50FC457,
    0x65B0D9C6,0x12B7E950,0x8BBEB8EA,0xFCB9887C,0x62DD1DDF,0x15DA2D49,0x8CD37CF3,0xFBD44C65,
    0x4DB26158,0x3AB551CE,0xA3BC0074,0xD4BB30E2,0x4ADFA541,0x3DD895D7,0xA4D1C46D,0xD3D6F4FB,
    0x4369E96A,0x346ED9FC,0xAD678846,0xDA60B8D0,0x44042D73,0x33031DE5,0xAA0A4C5F,0xDD0D7CC9,
    0x5005713C,0x270241AA,0xBE0B1010,0xC90C2086,0x5768B525,0x206F85B3,0xB966D409,0xCE61E49F,
    0x5EDEF90E,0x29D9C998,0xB0D09822,0xC7D7A8B4,0x59B33D17,0x2EB40D81,0xB7BD5C3B,0xC0BA6CAD,
    0xEDB88320,0x9ABFB3B6,0x03B6E20C,0x74B1D29A,0xEAD54739,0x9DD277AF,0x04DB2615,0x73DC1683,
    0xE3630B12,0x94643B84,0x0D6D6A3E,0x7A6A5AA8,0xE40ECF0B,0x9309FF9D,0x0A00AE27,0x7D079EB1,
    0xF00F9344,0x8708A3D2,0x1E01F268,0x6906C2FE,0xF762575D,0x806567CB,0x196C3671,0x6E6B06E7,
    0xFED41B76,0x89D32BE0,0x10DA7A5A,0x67DD4ACC,0xF9B9DF6F,0x8EBEEFF9,0x17B7BE43,0x60B08ED5,
    0xD6D6A3E8,0xA1D1937E,0x38D8C2C4,0x4FDFF252,0xD1BB67F1,0xA6BC5767,0x3FB506DD,0x48B2364B,
    0xD80D2BDA,0xAF0A1B4C,0x36034AF6,0x41047A60,0xDF60EFC3,0xA867DF55,0x316E8EEF,0x4669BE79,
    0xCB61B38C,0xBC66831A,0x256FD2A0,0x5268E236,0xCC0C7795,0xBB0B4703,0x220216B9,0x5505262F,
    0xC5BA3BBE,0xB2BD0B28,0x2BB45A92,0x5CB36A04,0xC2D7FFA7,0xB5D0CF31,0x2CD99E8B,0x5BDEAE1D,
    0x9B64C2B0,0xEC63F226,0x756AA39C,0x026D930A,0x9C0906A9,0xEB0E363F,0x72076785,0x05005713,
    0x95BF4A82,0xE2B87A14,0x7BB12BAE,0x0CB61B38,0x92D28E9B,0xE5D5BE0D,0x7CDCEFB7,0x0BDBDF21,
    0x86D3D2D4,0xF1D4E242,0x68DDB3F8,0x1FDA836E,0x81BE16CD,0xF6B9265B,0x6FB077E1,0x18B74777,
    0x88085AE6,0xFF0F6A70,0x66063BCA,0x11010B5C,0x8F659EFF,0xF862AE69,0x616BFFD3,0x166CCF45,
    0xA00AE278,0xD70DD2EE,0x4E048354,0x3903B3C2,0xA7672661,0xD06016F7,0x4969474D,0x3E6E77DB,
    0xAED16A4A,0xD9D65ADC,0x40DF0B66,0x37D83BF0,0xA9BCAE53,0xDEBB9EC5,0x47B2CF7F,0x30B5FFE9,
    0xBDBDF21C,0xCABAC28A,0x53B39330,0x24B4A3A6,0xBAD03605,0xCDD70693,0x54DE5729,0x23D967BF,
    0xB3667A2E,0xC4614AB8,0x5D681B02,0x2A6F2B94,0xB40BBE37,0xC30C8EA1,0x5A05DF1B,0x2D02EF8D,
];

/// calc crc32 for binary data - basic implementation
pub fn crc32basic(bin_data: &[u8]) -> u32 {
    // CRC-32 default value
    let mut crc:u32 = 0xFFFFFFFF;
    for byte in bin_data {
        // calc with cache table
        crc = (crc >> 8) ^ CRC32_TABLE[((crc & 0xFF) ^ (*byte as u32)) as usize];
    }
    crc ^ 0xFFFFFFFF
}

// ------------------------------------------------------------------------------

/// CRC32 calculation using optimized chunking
/// This function processes multiple bytes at a time for better performance
pub fn crc32(bin_data: &[u8]) -> u32 {
    let mut crc: u32 = 0xFFFFFFFF;
    let len = bin_data.len();
    let mut i = 0;

    // Process 8 bytes at a time using unrolled loop for better performance
    while i + 8 <= len {
        crc = (crc >> 8) ^ CRC32_TABLE[((crc & 0xFF) ^ (bin_data[i] as u32)) as usize];
        crc = (crc >> 8) ^ CRC32_TABLE[((crc & 0xFF) ^ (bin_data[i + 1] as u32)) as usize];
        crc = (crc >> 8) ^ CRC32_TABLE[((crc & 0xFF) ^ (bin_data[i + 2] as u32)) as usize];
        crc = (crc >> 8) ^ CRC32_TABLE[((crc & 0xFF) ^ (bin_data[i + 3] as u32)) as usize];
        crc = (crc >> 8) ^ CRC32_TABLE[((crc & 0xFF) ^ (bin_data[i + 4] as u32)) as usize];
        crc = (crc >> 8) ^ CRC32_TABLE[((crc & 0xFF) ^ (bin_data[i + 5] as u32)) as usize];
        crc = (crc >> 8) ^ CRC32_TABLE[((crc & 0xFF) ^ (bin_data[i + 6] as u32)) as usize];
        crc = (crc >> 8) ^ CRC32_TABLE[((crc & 0xFF) ^ (bin_data[i + 7] as u32)) as usize];
        i += 8;
    }

    // Process remaining bytes (less than 8)
    while i < len {
        crc = (crc >> 8) ^ CRC32_TABLE[((crc & 0xFF) ^ (bin_data[i] as u32)) as usize];
        i += 1;
    }

    crc ^ 0xFFFFFFFF
}

// ------------------------------------------------------------------------------

/// High-speed CRC32 calculation with aggressive optimizations
/// Uses unsafe code and processes data in large chunks for maximum throughput
pub fn crc32speed(bin_data: &[u8]) -> u32 {
    let mut crc: u32 = 0xFFFFFFFF;
    let len = bin_data.len();
    let mut pos = 0;
    
    // Macro for inlining byte processing without function call overhead
    macro_rules! process_bytes {
        ($ptr:expr, $($offset:expr),+) => {
            $(
                crc = (crc >> 8) ^ CRC32_TABLE[((crc & 0xFF) ^ (*$ptr.add($offset) as u32)) as usize];
            )+
        };
    }
    
    // Process 32 bytes at a time for optimal balance
    while pos + 32 <= len {
        unsafe {
            let ptr = bin_data.as_ptr().add(pos);
            process_bytes!(ptr, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
                               16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
        }
        pos += 32;
    }
    
    // Process 8 bytes at a time for remaining data
    while pos + 8 <= len {
        unsafe {
            let ptr = bin_data.as_ptr().add(pos);
            process_bytes!(ptr, 0, 1, 2, 3, 4, 5, 6, 7);
        }
        pos += 8;
    }

    // Process remaining bytes one at a time
    while pos < len {
        crc = (crc >> 8) ^ CRC32_TABLE[((crc & 0xFF) ^ (bin_data[pos] as u32)) as usize];
        pos += 1;
    }

    crc ^ 0xFFFFFFFF
}

// ------------------------------------------------------------------------------

/// Streaming CRC32 calculator that maintains state across multiple data chunks
/// This allows processing large files or streaming data in chunks
pub struct Crc32Stream {
    crc: u32,
}

impl Crc32Stream {
    /// Create a new CRC32 stream calculator
    pub fn new() -> Self {
        Self {
            crc: 0xFFFFFFFF,
        }
    }

    /// Update the CRC32 with a new chunk of data
    pub fn update(&mut self, data: &[u8]) {
        for &byte in data {
            self.crc = (self.crc >> 8) ^ CRC32_TABLE[((self.crc & 0xFF) ^ (byte as u32)) as usize];
        }
    }

    /// Get the current CRC32 value (can be called multiple times)
    pub fn get(&self) -> u32 {
        self.crc ^ 0xFFFFFFFF
    }

    /// Finalize and return the CRC32 value, consuming the stream
    pub fn finalize(self) -> u32 {
        self.crc ^ 0xFFFFFFFF
    }

    /// Reset the stream to initial state for reuse
    pub fn reset(&mut self) {
        self.crc = 0xFFFFFFFF;
    }
}

impl Default for Crc32Stream {
    fn default() -> Self {
        Self::new()
    }
}

/// Convenience function for streaming CRC32 calculation
/// Returns a new Crc32Stream instance
pub fn crc32stream() -> Crc32Stream {
    Crc32Stream::new()
}

// ------------------------------------------------------------------------------

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn it_works() {
        assert_eq!(crc32basic(b"cat"), 0x9E5E43A8);
        assert_eq!(crc32basic(b"dog"), 0x812C397D);
        assert_eq!(crc32basic(b"bird"), 0xA0BBAE0E);
        assert_eq!(crc32basic(b"FISH"), 0x09E1A987);
        assert_eq!(crc32basic(b"snake"), 0x516D6B9E);
        assert_eq!(crc32basic(b"Python"), 0xA378BD8E);
        assert_eq!(crc32basic(b"This is a pen."), 0x3BCADC93);
        assert_eq!(crc32basic(b"sakura"), 0x01871032);
    }

    #[test]
    fn test_crc32_optimized() {
        // Test that crc32 produces the same results as crc32basic
        assert_eq!(crc32(b"cat"), 0x9E5E43A8);
        assert_eq!(crc32(b"dog"), 0x812C397D);
        assert_eq!(crc32(b"bird"), 0xA0BBAE0E);
        assert_eq!(crc32(b"FISH"), 0x09E1A987);
        assert_eq!(crc32(b"snake"), 0x516D6B9E);
        assert_eq!(crc32(b"Python"), 0xA378BD8E);
        assert_eq!(crc32(b"This is a pen."), 0x3BCADC93);
        assert_eq!(crc32(b"sakura"), 0x01871032);
        
        // Test with longer data
        let long_data = b"The quick brown fox jumps over the lazy dog";
        assert_eq!(crc32(long_data), crc32basic(long_data));
        
        // Test with various lengths to cover edge cases
        for len in 0..20 {
            let data: Vec<u8> = (0..len).map(|i| (i * 7) as u8).collect();
            assert_eq!(crc32(&data), crc32basic(&data), "Failed for length {}", len);
        }
    }

    #[test]
    fn test_crc32speed_correctness() {
        // Test that crc32speed produces the same results as crc32basic
        assert_eq!(crc32speed(b"cat"), 0x9E5E43A8);
        assert_eq!(crc32speed(b"dog"), 0x812C397D);
        assert_eq!(crc32speed(b"bird"), 0xA0BBAE0E);
        assert_eq!(crc32speed(b"FISH"), 0x09E1A987);
        assert_eq!(crc32speed(b"snake"), 0x516D6B9E);
        assert_eq!(crc32speed(b"Python"), 0xA378BD8E);
        assert_eq!(crc32speed(b"This is a pen."), 0x3BCADC93);
        assert_eq!(crc32speed(b"sakura"), 0x01871032);
        
        // Test with longer data
        let long_data = b"The quick brown fox jumps over the lazy dog";
        assert_eq!(crc32speed(long_data), crc32basic(long_data));
        
        // Test with various lengths including edge cases
        for len in 0..50 {
            let data: Vec<u8> = (0..len).map(|i| (i * 7) as u8).collect();
            assert_eq!(crc32speed(&data), crc32basic(&data), "Failed for length {}", len);
        }
    }

    #[test]
    fn test_crc32stream() {
        // Test basic streaming
        let mut stream = crc32stream();
        stream.update(b"cat");
        assert_eq!(stream.finalize(), 0x9E5E43A8);

        // Test streaming with multiple chunks
        let mut stream = Crc32Stream::new();
        stream.update(b"The quick ");
        stream.update(b"brown fox ");
        stream.update(b"jumps over ");
        stream.update(b"the lazy dog");
        let result = stream.finalize();
        
        // Compare with non-streaming version
        let expected = crc32basic(b"The quick brown fox jumps over the lazy dog");
        assert_eq!(result, expected);

        // Test get() without consuming
        let mut stream = Crc32Stream::new();
        stream.update(b"dog");
        assert_eq!(stream.get(), 0x812C397D);
        assert_eq!(stream.get(), 0x812C397D); // Can call multiple times
        assert_eq!(stream.finalize(), 0x812C397D);

        // Test reset
        let mut stream = Crc32Stream::new();
        stream.update(b"test");
        stream.reset();
        stream.update(b"cat");
        assert_eq!(stream.finalize(), 0x9E5E43A8);

        // Test with various chunk sizes
        let test_data = b"This is a pen.";
        let expected = crc32basic(test_data);
        
        // Process in different chunk sizes
        for chunk_size in 1..=test_data.len() {
            let mut stream = Crc32Stream::new();
            for chunk in test_data.chunks(chunk_size) {
                stream.update(chunk);
            }
            assert_eq!(stream.finalize(), expected, "Failed for chunk size {}", chunk_size);
        }

        // Test empty data
        let stream = Crc32Stream::new();
        assert_eq!(stream.finalize(), crc32basic(b""));

        // Test single byte updates
        let mut stream = Crc32Stream::new();
        for &byte in b"Python" {
            stream.update(&[byte]);
        }
        assert_eq!(stream.finalize(), 0xA378BD8E);
    }

    #[test]
    fn test_crc32_speed_test() {
        use std::time::Instant;
        
        // Create test data of various sizes
        let sizes = vec![1024, 10240, 102400, 1024000]; // 1KB, 10KB, 100KB, 1MB
        
        for size in sizes {
            let data: Vec<u8> = (0..size).map(|i| (i % 256) as u8).collect();
            
            // Test crc32basic
            let start = Instant::now();
            let mut result_basic = 0u32;
            for _ in 0..100 {
                result_basic = crc32basic(&data);
            }
            let duration_basic = start.elapsed();
            
            // Test crc32 (optimized with 8-byte unrolling)
            let start = Instant::now();
            let mut result_optimized = 0u32;
            for _ in 0..100 {
                result_optimized = crc32(&data);
            }
            let duration_optimized = start.elapsed();
            
            // Test crc32speed (ultra-optimized with 16-byte unrolling and unsafe)
            let start = Instant::now();
            let mut result_speed = 0u32;
            for _ in 0..100 {
                result_speed = crc32speed(&data);
            }
            let duration_speed = start.elapsed();
            
            // Verify all produce same result
            assert_eq!(result_basic, result_optimized);
            assert_eq!(result_basic, result_speed);
            
            // Calculate throughput in MB/s (per iteration, not total)
            let mb = size as f64 / 1_000_000.0;
            let throughput_basic = mb / (duration_basic.as_secs_f64() / 100.0);
            let throughput_optimized = mb / (duration_optimized.as_secs_f64() / 100.0);
            let throughput_speed = mb / (duration_speed.as_secs_f64() / 100.0);
            
            println!("Data size: {} bytes ({:.2} KB)", size, size as f64 / 1024.0);
            println!("  crc32basic:  {:?} ({:.2} MB/s)", duration_basic, throughput_basic);
            println!("  crc32:       {:?} ({:.2} MB/s)", duration_optimized, throughput_optimized);
            println!("  crc32speed:  {:?} ({:.2} MB/s)", duration_speed, throughput_speed);
            println!("  Speedup (crc32):      {:.2}x", 
                duration_basic.as_secs_f64() / duration_optimized.as_secs_f64());
            println!("  Speedup (crc32speed): {:.2}x", 
                duration_basic.as_secs_f64() / duration_speed.as_secs_f64());
            println!("  Throughput improvement: crc32={:.1}%, crc32speed={:.1}%",
                (throughput_optimized / throughput_basic - 1.0) * 100.0,
                (throughput_speed / throughput_basic - 1.0) * 100.0);
            println!();
        }
    }
}