Skip to main content

commonware_cryptography/crc32/
mod.rs

1//! CRC32C implementation of the `Hasher` trait.
2//!
3//! This implementation uses the `crc-fast` crate to generate CRC32C (iSCSI/Castagnoli)
4//! checksums as specified in RFC 3720. CRC32C uses polynomial 0x1EDC6F41.
5//!
6//! # Warning
7//!
8//! CRC32 is not a cryptographic hash function. It is designed for error
9//! detection, not security. Use SHA-256 or Blake3 for cryptographic purposes.
10//!
11//! # Example
12//!
13//! ```rust
14//! use commonware_cryptography::{Hasher, Crc32};
15//!
16//! // One-shot checksum (returns u32 directly)
17//! let checksum: u32 = Crc32::checksum(b"hello world");
18//!
19//! // Using the Hasher trait
20//! let mut hasher = Crc32::new();
21//! hasher.update(b"hello ");
22//! hasher.update(b"world");
23//! let digest = hasher.finalize();
24//!
25//! // Convert digest to u32
26//! assert_eq!(digest.as_u32(), checksum);
27//! ```
28
29use crate::Hasher;
30use bytes::{Buf, BufMut};
31use commonware_codec::{Error as CodecError, FixedSize, Read, ReadExt, Write};
32use commonware_formatting::Hex;
33use commonware_math::algebra::Random;
34use commonware_utils::{Array, Span};
35use core::{
36    fmt::{Debug, Display},
37    ops::Deref,
38};
39use rand_core::CryptoRngCore;
40
41/// Size of a CRC32 checksum in bytes.
42const SIZE: usize = 4;
43
44/// The CRC32 algorithm used (CRC32C/iSCSI/Castagnoli).
45const ALGORITHM: crc_fast::CrcAlgorithm = crc_fast::CrcAlgorithm::Crc32Iscsi;
46
47/// CRC32C hasher.
48///
49/// Uses the iSCSI polynomial (0x1EDC6F41) as specified in RFC 3720.
50#[derive(Debug)]
51pub struct Crc32 {
52    inner: crc_fast::Digest,
53}
54
55impl Default for Crc32 {
56    fn default() -> Self {
57        Self {
58            inner: crc_fast::Digest::new(ALGORITHM),
59        }
60    }
61}
62
63impl Clone for Crc32 {
64    fn clone(&self) -> Self {
65        // We manually implement `Clone` to avoid cloning the hasher state.
66        Self::default()
67    }
68}
69
70impl Crc32 {
71    /// Compute a CRC32 checksum of the given data (one-shot).
72    ///
73    /// Returns the checksum as a `u32` directly.
74    #[inline]
75    pub fn checksum(data: &[u8]) -> u32 {
76        crc_fast::checksum(ALGORITHM, data) as u32
77    }
78}
79
80impl Hasher for Crc32 {
81    type Digest = Digest;
82
83    fn update(&mut self, message: &[u8]) -> &mut Self {
84        self.inner.update(message);
85        self
86    }
87
88    fn finalize(&mut self) -> Self::Digest {
89        Self::Digest::from(self.inner.finalize_reset() as u32)
90    }
91
92    fn reset(&mut self) -> &mut Self {
93        self.inner = crc_fast::Digest::new(ALGORITHM);
94        self
95    }
96}
97
98/// Digest of a CRC32 hashing operation (4 bytes).
99#[derive(Clone, Copy, Eq, PartialEq, Ord, PartialOrd, Hash)]
100#[repr(transparent)]
101pub struct Digest(pub [u8; SIZE]);
102
103#[cfg(feature = "arbitrary")]
104impl<'a> arbitrary::Arbitrary<'a> for Digest {
105    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
106        // Generate random bytes and compute their CRC32 checksum
107        let len = u.int_in_range(0..=256)?;
108        let data = u.bytes(len)?;
109        Ok(Crc32::hash(data))
110    }
111}
112
113impl Digest {
114    /// Get the digest as a `u32` value.
115    #[inline]
116    pub const fn as_u32(&self) -> u32 {
117        u32::from_be_bytes(self.0)
118    }
119}
120
121impl Write for Digest {
122    fn write(&self, buf: &mut impl BufMut) {
123        self.0.write(buf);
124    }
125}
126
127impl Read for Digest {
128    type Cfg = ();
129
130    fn read_cfg(buf: &mut impl Buf, _: &()) -> Result<Self, CodecError> {
131        let array = <[u8; SIZE]>::read(buf)?;
132        Ok(Self(array))
133    }
134}
135
136impl FixedSize for Digest {
137    const SIZE: usize = SIZE;
138}
139
140impl Span for Digest {}
141
142impl Array for Digest {}
143
144impl From<[u8; SIZE]> for Digest {
145    fn from(value: [u8; SIZE]) -> Self {
146        Self(value)
147    }
148}
149
150impl From<u32> for Digest {
151    fn from(value: u32) -> Self {
152        Self(value.to_be_bytes())
153    }
154}
155
156impl AsRef<[u8]> for Digest {
157    fn as_ref(&self) -> &[u8] {
158        &self.0
159    }
160}
161
162impl Deref for Digest {
163    type Target = [u8];
164    fn deref(&self) -> &[u8] {
165        &self.0
166    }
167}
168
169impl Debug for Digest {
170    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
171        write!(f, "{}", Hex(&self.0))
172    }
173}
174
175impl Display for Digest {
176    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
177        write!(f, "{}", Hex(&self.0))
178    }
179}
180
181impl crate::Digest for Digest {
182    const EMPTY: Self = Self([0u8; SIZE]);
183}
184
185impl Random for Digest {
186    fn random(mut rng: impl CryptoRngCore) -> Self {
187        let mut array = [0u8; SIZE];
188        rng.fill_bytes(&mut array);
189        Self(array)
190    }
191}
192
193#[cfg(test)]
194mod tests {
195    use super::*;
196    use crate::Hasher;
197    use commonware_codec::{DecodeExt, Encode};
198    use crc::{Crc, CRC_32_ISCSI};
199
200    /// Reference CRC32C implementation from the [`crc`](https://crates.io/crates/crc) crate.
201    const CRC32C_REF: Crc<u32> = Crc::<u32>::new(&CRC_32_ISCSI);
202
203    /// Verify checksum against both the reference `crc` crate and our implementation.
204    fn verify(data: &[u8], expected: u32) {
205        assert_eq!(CRC32C_REF.checksum(data), expected);
206        assert_eq!(Crc32::checksum(data), expected);
207    }
208
209    /// Generate deterministic test data: sequential bytes wrapping at 256.
210    fn sequential_data(len: usize) -> Vec<u8> {
211        (0..len).map(|i| (i & 0xFF) as u8).collect()
212    }
213
214    /// Test vectors from RFC 3720 Appendix B.4 "CRC Examples".
215    /// https://datatracker.ietf.org/doc/html/rfc3720#appendix-B.4
216    #[test]
217    fn rfc3720_test_vectors() {
218        // 32 bytes of zeros -> CRC = aa 36 91 8a
219        verify(&[0x00; 32], 0x8A9136AA);
220
221        // 32 bytes of 0xFF -> CRC = 43 ab a8 62
222        verify(&[0xFF; 32], 0x62A8AB43);
223
224        // 32 bytes ascending (0x00..0x1F) -> CRC = 4e 79 dd 46
225        let ascending: Vec<u8> = (0x00..0x20).collect();
226        verify(&ascending, 0x46DD794E);
227
228        // 32 bytes descending (0x1F..0x00) -> CRC = 5c db 3f 11
229        let descending: Vec<u8> = (0x00..0x20).rev().collect();
230        verify(&descending, 0x113FDB5C);
231
232        // iSCSI SCSI Read (10) Command PDU -> CRC = 56 3a 96 d9
233        let iscsi_read_pdu: [u8; 48] = [
234            0x01, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
235            0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x14,
236            0x00, 0x00, 0x00, 0x18, 0x28, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00,
237            0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
238        ];
239        verify(&iscsi_read_pdu, 0xD9963A56);
240    }
241
242    /// Additional test vectors from external sources.
243    /// https://reveng.sourceforge.io/crc-catalogue/17plus.htm#crc.cat.crc-32c
244    /// https://github.com/ICRAR/crc32c/blob/master/test/test_crc32c.py
245    /// https://github.com/google/leveldb/blob/main/util/crc32c_test.cc
246    #[test]
247    fn external_test_vectors() {
248        // CRC catalogue test vector
249        verify(b"", 0x00000000);
250        verify(b"123456789", 0xE3069283);
251
252        // ICRAR test vectors
253        verify(b"23456789", 0xBFE92A83);
254        verify(b"The quick brown fox jumps over the lazy dog", 0x22620404);
255
256        // LevelDB test vector: sequential 0x01-0xF0 (240 bytes)
257        let sequential_240: Vec<u8> = (0x01..=0xF0).collect();
258        verify(&sequential_240, 0x24C5D375);
259    }
260
261    /// SIMD boundary tests.
262    ///
263    /// SIMD implementations (PCLMULQDQ, ARM CRC) have different code paths
264    /// based on input size. These tests verify correctness at critical boundaries.
265    #[test]
266    fn simd_boundaries() {
267        // Critical sizes where SIMD implementations change code paths:
268        // - 16: single 128-bit register
269        // - 32: two 128-bit registers / one 256-bit register
270        // - 64: fold-by-4 block size
271        // - 128: large data threshold
272        // - 256, 512, 1024: power-of-2 boundaries
273        // - 4096: page boundary (common in storage)
274        const BOUNDARY_SIZES: &[usize] = &[
275            0, 1, 2, 3, 4, 7, 8, 9, // Small sizes
276            15, 16, 17, // 128-bit boundary
277            31, 32, 33, // 256-bit boundary
278            63, 64, 65, // Fold-by-4 boundary
279            127, 128, 129, // Large threshold
280            255, 256, 257, // 256-byte boundary
281            511, 512, 513, // 512-byte boundary
282            1023, 1024, 1025, // 1KB boundary
283            4095, 4096, 4097, // Page boundary
284        ];
285
286        // Pre-computed expected values for sequential data pattern.
287        // Generated with the [`crc`](https://crates.io/crates/crc) crate.
288        const EXPECTED: &[(usize, u32)] = &[
289            (0, 0x00000000),
290            (1, 0x527D5351),
291            (2, 0x030AF4D1),
292            (3, 0x92FD4BFA),
293            (4, 0xD9331AA3),
294            (7, 0xA359ED4C),
295            (8, 0x8A2CBC3B),
296            (9, 0x7144C5A8),
297            (15, 0x68EF03F6),
298            (16, 0xD9C908EB),
299            (17, 0x38435E17),
300            (31, 0xE95CABCB),
301            (32, 0x46DD794E), // Matches RFC 3720
302            (33, 0x9F85A26D),
303            (63, 0x7A873004),
304            (64, 0xFB6D36EB),
305            (65, 0x694420FA),
306            (127, 0x6C31BD0C),
307            (128, 0x30D9C515),
308            (129, 0xF514629F),
309            (255, 0x8953C482),
310            (256, 0x9C44184B),
311            (257, 0x8A13A1CE),
312            (511, 0x35348950),
313            (512, 0xAE10EE5A),
314            (513, 0x6814B154),
315            (1023, 0x0C8F24D0),
316            (1024, 0x2CDF6E8F),
317            (1025, 0x8EB48B63),
318            (4095, 0xBCB5BD82),
319            (4096, 0x9C71FE32),
320            (4097, 0x83391BE9),
321        ];
322
323        assert_eq!(
324            BOUNDARY_SIZES,
325            EXPECTED.iter().map(|(size, _)| *size).collect::<Vec<_>>()
326        );
327
328        for &(size, expected) in EXPECTED {
329            let data = sequential_data(size);
330            verify(&data, expected);
331        }
332    }
333
334    /// Verify incremental hashing produces the same result regardless of chunk size.
335    #[test]
336    fn chunk_size_independence() {
337        let data = sequential_data(1024);
338        let expected = CRC32C_REF.checksum(&data);
339
340        // Test chunk sizes from 1 to 64 bytes
341        for chunk_size in 1..=64 {
342            let mut hasher = Crc32::new();
343            for chunk in data.chunks(chunk_size) {
344                hasher.update(chunk);
345            }
346            assert_eq!(hasher.finalize().as_u32(), expected);
347        }
348    }
349
350    /// Test with unaligned data by processing at different offsets within a buffer.
351    #[test]
352    fn alignment_independence() {
353        // Create a larger buffer and test CRC of a fixed-size window at different offsets
354        let base_data: Vec<u8> = (0..256).map(|i| i as u8).collect();
355        let test_len = 64;
356
357        // Get reference CRC for the first 64 bytes
358        let reference = CRC32C_REF.checksum(&base_data[..test_len]);
359
360        // Verify the same 64-byte pattern produces the same CRC regardless of where
361        // it appears in the source buffer (tests alignment handling)
362        for offset in 0..16 {
363            let data = &base_data[offset..offset + test_len];
364            let expected = CRC32C_REF.checksum(data);
365            assert_eq!(Crc32::checksum(data), expected);
366        }
367
368        // Also verify that the first 64 bytes always produce the reference CRC
369        verify(&base_data[..test_len], reference);
370    }
371
372    #[test]
373    fn test_crc32_hasher_trait() {
374        let msg = b"hello world";
375
376        // Generate initial hash using Hasher trait
377        let mut hasher = Crc32::new();
378        hasher.update(msg);
379        let digest = hasher.finalize();
380        assert!(Digest::decode(digest.as_ref()).is_ok());
381
382        // Verify against reference
383        let expected = CRC32C_REF.checksum(msg);
384        assert_eq!(digest.as_u32(), expected);
385
386        // Reuse hasher (should auto-reset after finalize)
387        hasher.update(msg);
388        let digest2 = hasher.finalize();
389        assert_eq!(digest, digest2);
390
391        // Test Hasher::hash convenience method
392        let hash = Crc32::hash(msg);
393        assert_eq!(hash.as_u32(), expected);
394    }
395
396    #[test]
397    fn test_crc32_len() {
398        assert_eq!(Digest::SIZE, SIZE);
399        assert_eq!(SIZE, 4);
400    }
401
402    #[test]
403    fn test_codec() {
404        let msg = b"hello world";
405        let mut hasher = Crc32::new();
406        hasher.update(msg);
407        let digest = hasher.finalize();
408
409        let encoded = digest.encode();
410        assert_eq!(encoded.len(), SIZE);
411        assert_eq!(encoded, digest.as_ref());
412
413        let decoded = Digest::decode(encoded).unwrap();
414        assert_eq!(digest, decoded);
415    }
416
417    #[test]
418    fn test_digest_from_u32() {
419        let value: u32 = 0xDEADBEEF;
420        let digest = Digest::from(value);
421        assert_eq!(digest.as_u32(), value);
422        assert_eq!(digest.0, [0xDE, 0xAD, 0xBE, 0xEF]);
423    }
424
425    #[test]
426    fn test_checksum_returns_u32() {
427        // Verify the one-shot checksum returns u32 directly
428        let checksum: u32 = Crc32::checksum(b"test");
429        let expected = CRC32C_REF.checksum(b"test");
430        assert_eq!(checksum, expected);
431    }
432
433    #[cfg(feature = "arbitrary")]
434    mod conformance {
435        use super::*;
436        use commonware_codec::conformance::CodecConformance;
437
438        commonware_conformance::conformance_tests! {
439            CodecConformance<Digest>,
440        }
441    }
442}