commonware_cryptography/crc32/
mod.rs

1//! CRC32C implementation of the `Hasher` trait.
2//!
3//! This implementation uses the `crc-fast` crate to generate CRC32C (iSCSI/Castagnoli)
4//! checksums as specified in RFC 3720. CRC32C uses polynomial 0x1EDC6F41.
5//!
6//! # Warning
7//!
8//! CRC32 is not a cryptographic hash function. It is designed for error
9//! detection, not security. Use SHA-256 or Blake3 for cryptographic purposes.
10//!
11//! # Example
12//!
13//! ```rust
14//! use commonware_cryptography::{Hasher, Crc32};
15//!
16//! // One-shot checksum (returns u32 directly)
17//! let checksum: u32 = Crc32::checksum(b"hello world");
18//!
19//! // Using the Hasher trait
20//! let mut hasher = Crc32::new();
21//! hasher.update(b"hello ");
22//! hasher.update(b"world");
23//! let digest = hasher.finalize();
24//!
25//! // Convert digest to u32
26//! assert_eq!(digest.as_u32(), checksum);
27//! ```
28
29use crate::Hasher;
30#[cfg(not(feature = "std"))]
31use alloc::vec;
32use bytes::{Buf, BufMut};
33use commonware_codec::{Error as CodecError, FixedSize, Read, ReadExt, Write};
34use commonware_math::algebra::Random;
35use commonware_utils::{hex, Array, Span};
36use core::{
37    fmt::{Debug, Display},
38    ops::Deref,
39};
40use rand_core::CryptoRngCore;
41
42/// Size of a CRC32 checksum in bytes.
43const SIZE: usize = 4;
44
45/// The CRC32 algorithm used (CRC32C/iSCSI/Castagnoli).
46const ALGORITHM: crc_fast::CrcAlgorithm = crc_fast::CrcAlgorithm::Crc32Iscsi;
47
48/// CRC32C hasher.
49///
50/// Uses the iSCSI polynomial (0x1EDC6F41) as specified in RFC 3720.
51#[derive(Debug)]
52pub struct Crc32 {
53    inner: crc_fast::Digest,
54}
55
56impl Default for Crc32 {
57    fn default() -> Self {
58        Self {
59            inner: crc_fast::Digest::new(ALGORITHM),
60        }
61    }
62}
63
64impl Clone for Crc32 {
65    fn clone(&self) -> Self {
66        // We manually implement `Clone` to avoid cloning the hasher state.
67        Self::default()
68    }
69}
70
71impl Crc32 {
72    /// Compute a CRC32 checksum of the given data (one-shot).
73    ///
74    /// Returns the checksum as a `u32` directly.
75    #[inline]
76    pub fn checksum(data: &[u8]) -> u32 {
77        crc_fast::checksum(ALGORITHM, data) as u32
78    }
79}
80
81impl Hasher for Crc32 {
82    type Digest = Digest;
83
84    fn update(&mut self, message: &[u8]) -> &mut Self {
85        self.inner.update(message);
86        self
87    }
88
89    fn finalize(&mut self) -> Self::Digest {
90        Self::Digest::from(self.inner.finalize_reset() as u32)
91    }
92
93    fn reset(&mut self) -> &mut Self {
94        self.inner = crc_fast::Digest::new(ALGORITHM);
95        self
96    }
97}
98
99/// Digest of a CRC32 hashing operation (4 bytes).
100#[derive(Clone, Copy, Eq, PartialEq, Ord, PartialOrd, Hash)]
101#[repr(transparent)]
102pub struct Digest(pub [u8; SIZE]);
103
104#[cfg(feature = "arbitrary")]
105impl<'a> arbitrary::Arbitrary<'a> for Digest {
106    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
107        // Generate random bytes and compute their CRC32 checksum
108        let len = u.int_in_range(0..=256)?;
109        let data = u.bytes(len)?;
110        Ok(Crc32::hash(data))
111    }
112}
113
114impl Digest {
115    /// Get the digest as a `u32` value.
116    #[inline]
117    pub const fn as_u32(&self) -> u32 {
118        u32::from_be_bytes(self.0)
119    }
120}
121
122impl Write for Digest {
123    fn write(&self, buf: &mut impl BufMut) {
124        self.0.write(buf);
125    }
126}
127
128impl Read for Digest {
129    type Cfg = ();
130
131    fn read_cfg(buf: &mut impl Buf, _: &()) -> Result<Self, CodecError> {
132        let array = <[u8; SIZE]>::read(buf)?;
133        Ok(Self(array))
134    }
135}
136
137impl FixedSize for Digest {
138    const SIZE: usize = SIZE;
139}
140
141impl Span for Digest {}
142
143impl Array for Digest {}
144
145impl From<[u8; SIZE]> for Digest {
146    fn from(value: [u8; SIZE]) -> Self {
147        Self(value)
148    }
149}
150
151impl From<u32> for Digest {
152    fn from(value: u32) -> Self {
153        Self(value.to_be_bytes())
154    }
155}
156
157impl AsRef<[u8]> for Digest {
158    fn as_ref(&self) -> &[u8] {
159        &self.0
160    }
161}
162
163impl Deref for Digest {
164    type Target = [u8];
165    fn deref(&self) -> &[u8] {
166        &self.0
167    }
168}
169
170impl Debug for Digest {
171    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
172        write!(f, "{}", hex(&self.0))
173    }
174}
175
176impl Display for Digest {
177    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
178        write!(f, "{}", hex(&self.0))
179    }
180}
181
182impl crate::Digest for Digest {
183    const EMPTY: Self = Self([0u8; SIZE]);
184}
185
186impl Random for Digest {
187    fn random(mut rng: impl CryptoRngCore) -> Self {
188        let mut array = [0u8; SIZE];
189        rng.fill_bytes(&mut array);
190        Self(array)
191    }
192}
193
194#[cfg(test)]
195mod tests {
196    use super::*;
197    use crate::Hasher;
198    use commonware_codec::{DecodeExt, Encode};
199    use crc::{Crc, CRC_32_ISCSI};
200
201    /// Reference CRC32C implementation from the [`crc`](https://crates.io/crates/crc) crate.
202    const CRC32C_REF: Crc<u32> = Crc::<u32>::new(&CRC_32_ISCSI);
203
204    /// Verify checksum against both the reference `crc` crate and our implementation.
205    fn verify(data: &[u8], expected: u32) {
206        assert_eq!(CRC32C_REF.checksum(data), expected);
207        assert_eq!(Crc32::checksum(data), expected);
208    }
209
210    /// Generate deterministic test data: sequential bytes wrapping at 256.
211    fn sequential_data(len: usize) -> Vec<u8> {
212        (0..len).map(|i| (i & 0xFF) as u8).collect()
213    }
214
215    /// Test vectors from RFC 3720 Appendix B.4 "CRC Examples".
216    /// https://datatracker.ietf.org/doc/html/rfc3720#appendix-B.4
217    #[test]
218    fn rfc3720_test_vectors() {
219        // 32 bytes of zeros -> CRC = aa 36 91 8a
220        verify(&[0x00; 32], 0x8A9136AA);
221
222        // 32 bytes of 0xFF -> CRC = 43 ab a8 62
223        verify(&[0xFF; 32], 0x62A8AB43);
224
225        // 32 bytes ascending (0x00..0x1F) -> CRC = 4e 79 dd 46
226        let ascending: Vec<u8> = (0x00..0x20).collect();
227        verify(&ascending, 0x46DD794E);
228
229        // 32 bytes descending (0x1F..0x00) -> CRC = 5c db 3f 11
230        let descending: Vec<u8> = (0x00..0x20).rev().collect();
231        verify(&descending, 0x113FDB5C);
232
233        // iSCSI SCSI Read (10) Command PDU -> CRC = 56 3a 96 d9
234        let iscsi_read_pdu: [u8; 48] = [
235            0x01, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
236            0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x14,
237            0x00, 0x00, 0x00, 0x18, 0x28, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00,
238            0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
239        ];
240        verify(&iscsi_read_pdu, 0xD9963A56);
241    }
242
243    /// Additional test vectors from external sources.
244    /// https://reveng.sourceforge.io/crc-catalogue/17plus.htm#crc.cat.crc-32c
245    /// https://github.com/ICRAR/crc32c/blob/master/test/test_crc32c.py
246    /// https://github.com/google/leveldb/blob/main/util/crc32c_test.cc
247    #[test]
248    fn external_test_vectors() {
249        // CRC catalogue test vector
250        verify(b"", 0x00000000);
251        verify(b"123456789", 0xE3069283);
252
253        // ICRAR test vectors
254        verify(b"23456789", 0xBFE92A83);
255        verify(b"The quick brown fox jumps over the lazy dog", 0x22620404);
256
257        // LevelDB test vector: sequential 0x01-0xF0 (240 bytes)
258        let sequential_240: Vec<u8> = (0x01..=0xF0).collect();
259        verify(&sequential_240, 0x24C5D375);
260    }
261
262    /// SIMD boundary tests.
263    ///
264    /// SIMD implementations (PCLMULQDQ, ARM CRC) have different code paths
265    /// based on input size. These tests verify correctness at critical boundaries.
266    #[test]
267    fn simd_boundaries() {
268        // Critical sizes where SIMD implementations change code paths:
269        // - 16: single 128-bit register
270        // - 32: two 128-bit registers / one 256-bit register
271        // - 64: fold-by-4 block size
272        // - 128: large data threshold
273        // - 256, 512, 1024: power-of-2 boundaries
274        // - 4096: page boundary (common in storage)
275        const BOUNDARY_SIZES: &[usize] = &[
276            0, 1, 2, 3, 4, 7, 8, 9, // Small sizes
277            15, 16, 17, // 128-bit boundary
278            31, 32, 33, // 256-bit boundary
279            63, 64, 65, // Fold-by-4 boundary
280            127, 128, 129, // Large threshold
281            255, 256, 257, // 256-byte boundary
282            511, 512, 513, // 512-byte boundary
283            1023, 1024, 1025, // 1KB boundary
284            4095, 4096, 4097, // Page boundary
285        ];
286
287        // Pre-computed expected values for sequential data pattern.
288        // Generated with the [`crc`](https://crates.io/crates/crc) crate.
289        const EXPECTED: &[(usize, u32)] = &[
290            (0, 0x00000000),
291            (1, 0x527D5351),
292            (2, 0x030AF4D1),
293            (3, 0x92FD4BFA),
294            (4, 0xD9331AA3),
295            (7, 0xA359ED4C),
296            (8, 0x8A2CBC3B),
297            (9, 0x7144C5A8),
298            (15, 0x68EF03F6),
299            (16, 0xD9C908EB),
300            (17, 0x38435E17),
301            (31, 0xE95CABCB),
302            (32, 0x46DD794E), // Matches RFC 3720
303            (33, 0x9F85A26D),
304            (63, 0x7A873004),
305            (64, 0xFB6D36EB),
306            (65, 0x694420FA),
307            (127, 0x6C31BD0C),
308            (128, 0x30D9C515),
309            (129, 0xF514629F),
310            (255, 0x8953C482),
311            (256, 0x9C44184B),
312            (257, 0x8A13A1CE),
313            (511, 0x35348950),
314            (512, 0xAE10EE5A),
315            (513, 0x6814B154),
316            (1023, 0x0C8F24D0),
317            (1024, 0x2CDF6E8F),
318            (1025, 0x8EB48B63),
319            (4095, 0xBCB5BD82),
320            (4096, 0x9C71FE32),
321            (4097, 0x83391BE9),
322        ];
323
324        assert_eq!(
325            BOUNDARY_SIZES,
326            EXPECTED.iter().map(|(size, _)| *size).collect::<Vec<_>>()
327        );
328
329        for &(size, expected) in EXPECTED {
330            let data = sequential_data(size);
331            verify(&data, expected);
332        }
333    }
334
335    /// Verify incremental hashing produces the same result regardless of chunk size.
336    #[test]
337    fn chunk_size_independence() {
338        let data = sequential_data(1024);
339        let expected = CRC32C_REF.checksum(&data);
340
341        // Test chunk sizes from 1 to 64 bytes
342        for chunk_size in 1..=64 {
343            let mut hasher = Crc32::new();
344            for chunk in data.chunks(chunk_size) {
345                hasher.update(chunk);
346            }
347            assert_eq!(hasher.finalize().as_u32(), expected);
348        }
349    }
350
351    /// Test with unaligned data by processing at different offsets within a buffer.
352    #[test]
353    fn alignment_independence() {
354        // Create a larger buffer and test CRC of a fixed-size window at different offsets
355        let base_data: Vec<u8> = (0..256).map(|i| i as u8).collect();
356        let test_len = 64;
357
358        // Get reference CRC for the first 64 bytes
359        let reference = CRC32C_REF.checksum(&base_data[..test_len]);
360
361        // Verify the same 64-byte pattern produces the same CRC regardless of where
362        // it appears in the source buffer (tests alignment handling)
363        for offset in 0..16 {
364            let data = &base_data[offset..offset + test_len];
365            let expected = CRC32C_REF.checksum(data);
366            assert_eq!(Crc32::checksum(data), expected);
367        }
368
369        // Also verify that the first 64 bytes always produce the reference CRC
370        verify(&base_data[..test_len], reference);
371    }
372
373    #[test]
374    fn test_crc32_hasher_trait() {
375        let msg = b"hello world";
376
377        // Generate initial hash using Hasher trait
378        let mut hasher = Crc32::new();
379        hasher.update(msg);
380        let digest = hasher.finalize();
381        assert!(Digest::decode(digest.as_ref()).is_ok());
382
383        // Verify against reference
384        let expected = CRC32C_REF.checksum(msg);
385        assert_eq!(digest.as_u32(), expected);
386
387        // Reuse hasher (should auto-reset after finalize)
388        hasher.update(msg);
389        let digest2 = hasher.finalize();
390        assert_eq!(digest, digest2);
391
392        // Test Hasher::hash convenience method
393        let hash = Crc32::hash(msg);
394        assert_eq!(hash.as_u32(), expected);
395    }
396
397    #[test]
398    fn test_crc32_len() {
399        assert_eq!(Digest::SIZE, SIZE);
400        assert_eq!(SIZE, 4);
401    }
402
403    #[test]
404    fn test_codec() {
405        let msg = b"hello world";
406        let mut hasher = Crc32::new();
407        hasher.update(msg);
408        let digest = hasher.finalize();
409
410        let encoded = digest.encode();
411        assert_eq!(encoded.len(), SIZE);
412        assert_eq!(encoded, digest.as_ref());
413
414        let decoded = Digest::decode(encoded).unwrap();
415        assert_eq!(digest, decoded);
416    }
417
418    #[test]
419    fn test_digest_from_u32() {
420        let value: u32 = 0xDEADBEEF;
421        let digest = Digest::from(value);
422        assert_eq!(digest.as_u32(), value);
423        assert_eq!(digest.0, [0xDE, 0xAD, 0xBE, 0xEF]);
424    }
425
426    #[test]
427    fn test_checksum_returns_u32() {
428        // Verify the one-shot checksum returns u32 directly
429        let checksum: u32 = Crc32::checksum(b"test");
430        let expected = CRC32C_REF.checksum(b"test");
431        assert_eq!(checksum, expected);
432    }
433
434    #[cfg(feature = "arbitrary")]
435    mod conformance {
436        use super::*;
437        use commonware_codec::conformance::CodecConformance;
438
439        commonware_conformance::conformance_tests! {
440            CodecConformance<Digest>,
441        }
442    }
443}