crc64fast_nvme/
lib.rs

1// Copyright 2019 TiKV Project Authors. Licensed under MIT or Apache-2.0.
2
3//! `crc64fast-nvme`
4//! ===========
5//!
6//! SIMD-accelerated CRC-64/NVME computation
7//! (similar to [`crc32fast`](https://crates.io/crates/crc32fast)).
8//!
9//! ## Usage
10//!
11//! ### Rust
12//!
13//! ```rust
14//! use crc64fast_nvme::Digest;
15//!
16//! let mut c = Digest::new();
17//! c.write(b"hello ");
18//! c.write(b"world!");
19//! let checksum = c.sum64();
20//! assert_eq!(checksum, 0xd9160d1fa8e418e3);
21//! ```
22//! ### C-compatible shared library example (PHP)
23//!
24//! ```php
25//! $digest = $ffi->digest_new();
26//! $ffi->digest_write($digest, 'hello world!', 12);
27//! $checksum = $ffi->digest_sum64($digest); // 0xd9160d1fa8e418e3
28//! ```
29//!
30//! Tracking links for unstable features used here with the
31//! [experimental VPCLMULQDQ](https://github.com/awesomized/crc64fast-nvme?tab=readme-ov-file#experimental-vector-carry-less-multiplication-of-quadwords-vpclmulqdq-support)
32//! features (which require nightly builds):
33//!
34//! - [simd_ffi](https://github.com/rust-lang/rust/issues/27731)
35//! - [link_llvm_intrinsics](https://github.com/rust-lang/rust/issues/29602)
36//! - [avx512_target_feature](https://github.com/rust-lang/rust/issues/111137)
37
38#![cfg_attr(
39    feature = "vpclmulqdq",
40    feature(avx512_target_feature, stdarch_x86_avx512)
41)]
42
43use std::os::raw::c_char;
44use std::slice;
45
46mod pclmulqdq;
47mod table;
48
49type UpdateFn = unsafe fn(u64, &[u8]) -> u64;
50
51/// Represents an in-progress CRC-64 computation.
52#[derive(Clone)]
53pub struct Digest {
54    computer: UpdateFn,
55    state: u64,
56}
57
58// begin C-compatible shared library methods
59
60/// Opaque type for C for use in FFI (C-compatible shared library)
61#[repr(C)]
62pub struct DigestHandle(*mut Digest);
63
64/// Creates a new Digest (C-compatible shared library)
65#[no_mangle]
66pub extern "C" fn digest_new() -> *mut DigestHandle {
67    let digest = Box::new(Digest::new());
68    let handle = Box::new(DigestHandle(Box::into_raw(digest)));
69    Box::into_raw(handle)
70}
71
72/// Writes data to the Digest (C-compatible shared library)
73///
74/// # Safety
75///
76/// Uses unsafe method calls
77#[no_mangle]
78pub unsafe extern "C" fn digest_write(handle: *mut DigestHandle, data: *const c_char, len: usize) {
79    if handle.is_null() || data.is_null() {
80        return;
81    }
82
83    let digest = &mut *(*handle).0;
84    let bytes = slice::from_raw_parts(data as *const u8, len);
85    digest.write(bytes);
86}
87
88/// Calculates the CRC-64 checksum from the Digest (C-compatible shared library)
89///
90/// # Safety
91///
92/// Uses unsafe method calls
93#[no_mangle]
94pub unsafe extern "C" fn digest_sum64(handle: *const DigestHandle) -> u64 {
95    if handle.is_null() {
96        return 0;
97    }
98
99    let digest = &*(*handle).0;
100    digest.sum64()
101}
102
103/// Frees the Digest (C-compatible shared library)
104///
105/// # Safety
106///
107/// Uses unsafe method calls
108#[no_mangle]
109pub unsafe extern "C" fn digest_free(handle: *mut DigestHandle) {
110    if !handle.is_null() {
111        let handle = Box::from_raw(handle);
112        let _ = Box::from_raw(handle.0);
113    }
114}
115
116// end C-compatible shared library methods
117
118impl Digest {
119    /// Creates a new `Digest`.
120    ///
121    /// It will perform runtime CPU feature detection to determine which
122    /// algorithm to choose.
123    pub fn new() -> Self {
124        Self {
125            computer: pclmulqdq::get_update(),
126            state: !0,
127        }
128    }
129
130    /// Creates a new `Digest` using table-based algorithm.
131    pub fn new_table() -> Self {
132        Self {
133            computer: table::update,
134            state: !0,
135        }
136    }
137
138    /// Writes some data into the digest.
139    pub fn write(&mut self, bytes: &[u8]) {
140        unsafe {
141            self.state = (self.computer)(self.state, bytes);
142        }
143    }
144
145    /// Computes the current CRC-64/NVME value.
146    pub fn sum64(&self) -> u64 {
147        !self.state
148    }
149}
150
151impl Default for Digest {
152    fn default() -> Self {
153        Self::new()
154    }
155}
156
157impl core::hash::Hasher for Digest {
158    fn finish(&self) -> u64 {
159        self.sum64()
160    }
161
162    fn write(&mut self, bytes: &[u8]) {
163        self.write(bytes);
164    }
165}
166
167#[cfg(test)]
168mod tests {
169    use super::*;
170    use proptest::collection::size_range;
171    use proptest::prelude::*;
172    use std::fs::{read, write};
173    use std::ptr;
174    extern crate cbindgen;
175
176    // CRC-64/NVME
177    //
178    // NVM Express® NVM Command Set Specification (Revision 1.0d, December 2023)
179    //
180    // https://nvmexpress.org/wp-content/uploads/NVM-Express-NVM-Command-Set-Specification-1.0d-2023.12.28-Ratified.pdf
181    //
182    // Note: The Check value published in the spec is incorrect (Section 5.2.1.3.4, Figure 120, page 83).
183    const CRC_NVME: crc::Algorithm<u64> = crc::Algorithm {
184        width: 64,
185        poly: 0xAD93D23594C93659,
186        init: 0xFFFFFFFFFFFFFFFF,
187        refin: true,
188        refout: true,
189        xorout: 0xFFFFFFFFFFFFFFFF,
190        check: 0xae8b14860a799888,
191        residue: 0x0000000000000000,
192    };
193
194    #[test]
195    fn test_standard_vectors() {
196        static CASES: &[(&[u8], u64)] = &[
197            // from the NVM Express® NVM Command Set Specification (Revision 1.0d, December 2023),
198            // Section 5.2.1.3.5, Figure 122, page 84.
199            // https://nvmexpress.org/wp-content/uploads/NVM-Express-NVM-Command-Set-Specification-1.0d-2023.12.28-Ratified.pdf
200            // and the Linux kernel
201            // https://github.com/torvalds/linux/blob/f3813f4b287e480b1fcd62ca798d8556644b8278/crypto/testmgr.h#L3685-L3695
202            (&[0; 4096], 0x6482d367eb22b64e),
203            (&[255; 4096], 0xc0ddba7302eca3ac),
204
205            // from our own internal tests, since the Check value in the  NVM Express® NVM Command
206            // Set Specification (Revision 1.0d, December 2023) is incorrect (Section 5.2.1.3.4, Figure 120, page 83).
207            (b"123456789", 0xae8b14860a799888),
208
209            // updated values from the original CRC-64/XZ fork of this project
210            (b"", 0),
211            (b"@", 0x2808afa9582aa47),
212            (b"1\x97", 0xb4af0ae0feb08e0f),
213            (b"M\"\xdf", 0x85d7cd041a2a8a5d),
214            (b"l\xcd\x13\xd7", 0x1860820ea79b0fa3),
215
216            (&[0; 32], 0xcf3473434d4ecf3b),
217            (&[255; 32], 0xa0a06974c34d63c4),
218            (b"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F", 0xb9d9d4a8492cbd7f),
219
220            (&[0; 1024], 0x691bb2b09be5498a),
221
222            (b"hello world!", 0xd9160d1fa8e418e3),
223        ];
224
225        for (input, result) in CASES {
226            let mut hasher = Digest::new();
227            hasher.write(input);
228            assert_eq!(hasher.sum64(), *result, "test case {:x?}", input);
229        }
230    }
231
232    #[test]
233    fn test_core_hasher_impl() {
234        use core::hash::Hasher;
235
236        let mut hasher = Digest::new();
237
238        Hasher::write(&mut hasher, b"hello ");
239        Hasher::write(&mut hasher, b"world!");
240        assert_eq!(Hasher::finish(&hasher), 0xd9160d1fa8e418e3);
241    }
242
243    fn any_buffer() -> <Box<[u8]> as Arbitrary>::Strategy {
244        any_with::<Box<[u8]>>(size_range(..65536).lift())
245    }
246
247    prop_compose! {
248        fn bytes_and_split_index()
249            (bytes in any_buffer())
250            (index in 0..=bytes.len(), bytes in Just(bytes)) -> (Box<[u8]>, usize)
251        {
252            (bytes, index)
253        }
254    }
255
256    proptest! {
257        #[test]
258        fn equivalent_to_crc(bytes in any_buffer()) {
259            let mut hasher = Digest::new();
260            hasher.write(&bytes);
261
262            // CRC-64/NVME
263            let crc = crc::Crc::<u64>::new(&CRC_NVME);
264            let mut digest = crc.digest();
265            digest.update(&bytes);
266
267            prop_assert_eq!(hasher.sum64(), digest.finalize());
268        }
269
270        #[test]
271        fn concatenation((bytes, split_index) in bytes_and_split_index()) {
272            let mut hasher_1 = Digest::new();
273            hasher_1.write(&bytes);
274            let mut hasher_2 = Digest::new();
275            let (left, right) = bytes.split_at(split_index);
276            hasher_2.write(left);
277            hasher_2.write(right);
278            prop_assert_eq!(hasher_1.sum64(), hasher_2.sum64());
279        }
280
281        #[test]
282        fn state_cloning(left in any_buffer(), right in any_buffer()) {
283            let mut hasher_1 = Digest::new();
284            hasher_1.write(&left);
285            let mut hasher_2 = hasher_1.clone();
286            hasher_1.write(&right);
287            hasher_2.write(&right);
288            prop_assert_eq!(hasher_1.sum64(), hasher_2.sum64());
289        }
290    }
291
292    // test the FFI Digest functions
293    #[test]
294    fn test_ffi_digest_lifecycle() {
295        unsafe {
296            // Create new digest
297            let handle = digest_new();
298            assert!(!handle.is_null(), "Digest creation failed");
299
300            // Write some data
301            let data = b"hello world!";
302            digest_write(handle, data.as_ptr() as *const c_char, data.len());
303
304            // Get sum and verify against known value
305            let sum = digest_sum64(handle);
306            assert_eq!(sum, 0xd9160d1fa8e418e3, "CRC64 calculation incorrect");
307
308            // Clean up
309            digest_free(handle);
310        }
311    }
312
313    #[test]
314    fn test_ffi_null_handling() {
315        unsafe {
316            // Test null handle with write
317            digest_write(ptr::null_mut(), b"test".as_ptr() as *const c_char, 4);
318
319            // Test null data with valid handle
320            let handle = digest_new();
321            digest_write(handle, ptr::null(), 0);
322
323            // Test null handle with sum64
324            let sum = digest_sum64(ptr::null());
325            assert_eq!(sum, 0, "Null handle should return 0");
326
327            // Clean up
328            digest_free(handle);
329        }
330    }
331
332    #[test]
333    fn test_ffi_empty_data() {
334        unsafe {
335            let handle = digest_new();
336
337            // Write empty data
338            digest_write(handle, b"".as_ptr() as *const c_char, 0);
339            let sum = digest_sum64(handle);
340            assert_eq!(sum, 0, "Empty data should produce 0");
341
342            digest_free(handle);
343        }
344    }
345
346    #[test]
347    fn test_ffi_binary_data() {
348        unsafe {
349            let handle = digest_new();
350
351            // Test with binary data including null bytes
352            let data = [0u8, 1, 2, 3, 0, 4, 5, 0, 6];
353            digest_write(handle, data.as_ptr() as *const c_char, data.len());
354
355            // Write additional data to test streaming
356            let more_data = [7u8, 8, 9];
357            digest_write(handle, more_data.as_ptr() as *const c_char, more_data.len());
358
359            let sum = digest_sum64(handle);
360            assert_ne!(sum, 0, "Binary data should produce non-zero CRC");
361
362            digest_free(handle);
363        }
364    }
365
366    #[test]
367    fn test_ffi_large_vectors() {
368        unsafe {
369            let zeros = vec![0u8; 4096];
370            let ones = vec![255u8; 4096];
371
372            let handle = digest_new();
373            digest_write(handle, zeros.as_ptr() as *const c_char, zeros.len());
374            let sum = digest_sum64(handle);
375            assert_eq!(sum, 0x6482d367eb22b64e, "Failed on 4096 zeros");
376            digest_free(handle);
377
378            let handle = digest_new();
379            digest_write(handle, ones.as_ptr() as *const c_char, ones.len());
380            let sum = digest_sum64(handle);
381            assert_eq!(sum, 0xc0ddba7302eca3ac, "Failed on 4096 ones");
382            digest_free(handle);
383        }
384    }
385
386    #[test]
387    fn test_ffi_standard_strings() {
388        unsafe {
389            let test_cases: Vec<(&[u8], u64)> = vec![(b"123456789", 0xae8b14860a799888), (b"", 0)];
390
391            for (input, expected) in test_cases {
392                let handle = digest_new();
393                digest_write(handle, input.as_ptr() as *const c_char, input.len());
394                let sum = digest_sum64(handle);
395                assert_eq!(sum, expected, "Failed on test vector: {:?}", input);
396                digest_free(handle);
397            }
398        }
399    }
400
401    #[test]
402    fn test_ffi_incremental_update() {
403        unsafe {
404            let handle = digest_new();
405
406            // Write data incrementally
407            let data = "hello world!";
408            for byte in data.bytes() {
409                digest_write(handle, &byte as *const u8 as *const c_char, 1);
410            }
411
412            let sum = digest_sum64(handle);
413            assert_eq!(sum, 0xd9160d1fa8e418e3, "Incremental update failed");
414
415            digest_free(handle);
416        }
417    }
418
419    #[test]
420    fn test_crc64fast_nvme_bindings() -> Result<(), String> {
421        const BINDING: &str = "crc64fast_nvme.h";
422        let crate_dir = std::env::var("CARGO_MANIFEST_DIR").map_err(|error| error.to_string())?;
423
424        let mut expected = Vec::new();
425        cbindgen::generate(crate_dir)
426            .map_err(|error| error.to_string())?
427            .write(&mut expected);
428
429        let actual = read(BINDING).map_err(|error| error.to_string())?;
430
431        if expected != actual {
432            write(BINDING, expected).map_err(|error| error.to_string())?;
433            return Err(format!(
434                "{BINDING} is not up-to-date, commit the generated file and try again"
435            ));
436        }
437
438        Ok(())
439    }
440}