crc_fast/
traits.rs

1// Copyright 2025 Don MacAskill. Licensed under MIT or Apache-2.0.
2
3#![allow(dead_code)]
4
5#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))]
6use crate::enums::Reflector;
7
8use crate::CrcParams;
9
10#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))]
11use crate::structs::CrcState;
12
13use core::ops::BitXor;
14
15/// Marker trait for CRC width
16pub trait CrcWidth {
17    /// The width in bits
18    const WIDTH: u32;
19    /// The natural value type for this width
20    type Value: Copy + BitXor<Output = Self::Value>;
21}
22
23pub(crate) trait CrcCalculator {
24    fn update(data: &[u8], state: u64, params: CrcParams) -> u64 {
25        Self::calculate(state, data, params)
26    }
27
28    fn checksum(data: &[u8], params: CrcParams) -> u64 {
29        Self::calculate(params.init, data, params) ^ params.xorout
30    }
31
32    fn calculate(state: u64, data: &[u8], params: CrcParams) -> u64;
33}
34
35#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))]
36/// Trait defining architecture-specific SIMD operations for CRC calculation
37pub trait ArchOps: Sized + Copy + Clone {
38    /// The SIMD vector type used by this architecture
39    type Vector;
40
41    /// Process aligned blocks using potentially accelerated SIMD operations
42    ///
43    /// Returns true if the operation was handled by the accelerated path (for example,
44    /// using VPCLMULQDQ)
45    unsafe fn process_enhanced_simd_blocks<W: EnhancedCrcWidth>(
46        &self,
47        _state: &mut CrcState<Self::Vector>,
48        _first: &[Self::Vector; 8],
49        _rest: &[[Self::Vector; 8]],
50        _reflector: &Reflector<Self::Vector>,
51        _keys: [u64; 23],
52    ) -> bool
53    where
54        Self::Vector: Copy,
55    {
56        // Default implementation just returns false
57        // indicating the non-enhanced algorithm should be used
58        false
59    }
60
61    /// Create a SIMD vector from a u64 pair
62    ///
63    /// # Safety
64    /// May use native CPU features
65    unsafe fn create_vector_from_u64_pair(
66        &self,
67        high: u64,
68        low: u64,
69        reflected: bool,
70    ) -> Self::Vector;
71
72    /// Create a SIMD vector from a u64 pair without reflection
73    ///
74    /// TODO: I have no idea (yet) why CRC-32 doesn't use reflection, but CRC-64 does.
75    ///
76    /// # Safety
77    /// May use native CPU features
78    unsafe fn create_vector_from_u64_pair_non_reflected(&self, high: u64, low: u64)
79        -> Self::Vector;
80
81    /// Create a SIMD vector with a single u64 value
82    ///
83    /// # Safety
84    /// May use native CPU features
85    unsafe fn create_vector_from_u64(&self, value: u64, high: bool) -> Self::Vector;
86
87    /// Extract two u64 values from a SIMD vector
88    ///
89    /// # Safety
90    /// May use native CPU features
91    unsafe fn extract_u64s(&self, vector: Self::Vector) -> [u64; 2];
92
93    /// Extract two polynomial values (for carryless multiplication)
94    ///
95    /// # Safety
96    /// May use native CPU features
97    unsafe fn extract_poly64s(&self, vector: Self::Vector) -> [u64; 2];
98
99    /// XOR two SIMD vectors
100    ///
101    /// # Safety
102    /// May use native CPU features
103    unsafe fn xor_vectors(&self, a: Self::Vector, b: Self::Vector) -> Self::Vector;
104
105    /// Load bytes from memory into a SIMD vector
106    ///
107    /// # Safety
108    /// May use native CPU features
109    unsafe fn load_bytes(&self, ptr: *const u8) -> Self::Vector;
110
111    /// Load aligned bytes from memory
112    ///
113    /// # Safety
114    /// May use native CPU features
115    unsafe fn load_aligned(&self, ptr: *const [u64; 2]) -> Self::Vector;
116
117    //unsafe fn load_aligned(&self, ptr: &[u64]) -> Self::Vector;
118
119    //unsafe fn load_aligned_const(&self, ptr: *const [u64; 2]) -> Self::Vector;
120
121    /// Shuffle/permute bytes according to a mask
122    ///
123    /// # Safety
124    /// May use native CPU features
125    unsafe fn shuffle_bytes(&self, data: Self::Vector, mask: Self::Vector) -> Self::Vector;
126
127    /// Blend two vectors using a mask (select from a or b based on mask bits)
128    ///
129    /// # Safety
130    /// May use native CPU features
131    unsafe fn blend_vectors(
132        &self,
133        a: Self::Vector,
134        b: Self::Vector,
135        mask: Self::Vector,
136    ) -> Self::Vector;
137
138    /// Shift a vector left by 8 bytes
139    ///
140    /// # Safety
141    /// May use native CPU features
142    unsafe fn shift_left_8(&self, vector: Self::Vector) -> Self::Vector;
143
144    /// Create a vector with all bytes set to the same value
145    ///
146    /// # Safety
147    /// May use native CPU features
148    unsafe fn set_all_bytes(&self, value: u8) -> Self::Vector;
149
150    /// Create a comparison mask (for blending operations)
151    ///
152    /// # Safety
153    /// May use native CPU features
154    unsafe fn create_compare_mask(&self, vector: Self::Vector) -> Self::Vector;
155
156    /// AND two vectors
157    ///
158    /// # Safety
159    /// May use native CPU features
160    unsafe fn and_vectors(&self, a: Self::Vector, b: Self::Vector) -> Self::Vector;
161
162    /// Shift a vector right by 32 bits (4 bytes)
163    ///
164    /// # Safety
165    /// May use native CPU features
166    unsafe fn shift_right_32(&self, vector: Self::Vector) -> Self::Vector;
167
168    /// Shift a vector left by 32 bits (4 bytes)
169    ///
170    /// # Safety
171    /// May use native CPU features
172    unsafe fn shift_left_32(&self, vector: Self::Vector) -> Self::Vector;
173
174    /// Create a SIMD vector with a single u32 value
175    ///
176    /// # Safety
177    /// May use native CPU features
178    unsafe fn create_vector_from_u32(&self, value: u32, high: bool) -> Self::Vector;
179
180    /// Shift a vector left by 4 bytes (32 bits)
181    ///
182    /// # Safety
183    /// May use native CPU features
184    unsafe fn shift_left_4(&self, vector: Self::Vector) -> Self::Vector;
185
186    /// Shift a vector right by 4 bytes (32 bits)
187    ///
188    /// # Safety
189    /// May use native CPU features
190    unsafe fn shift_right_4(&self, vector: Self::Vector) -> Self::Vector;
191
192    /// Shift a vector right by 8 bytes (64 bits)
193    ///
194    /// # Safety
195    /// May use native CPU features
196    unsafe fn shift_right_8(&self, vector: Self::Vector) -> Self::Vector;
197
198    /// Shift a vector right by 5 bytes
199    unsafe fn shift_right_5(&self, vector: Self::Vector) -> Self::Vector;
200
201    /// Shift a vector right by 6 bytes
202    unsafe fn shift_right_6(&self, vector: Self::Vector) -> Self::Vector;
203
204    /// Shift a vector right by 7 bytes
205    unsafe fn shift_right_7(&self, vector: Self::Vector) -> Self::Vector;
206
207    /// Shift a vector right by 12 bytes
208    unsafe fn shift_right_12(&self, vector: Self::Vector) -> Self::Vector;
209
210    /// Shift a vector left by 12 bytes
211    unsafe fn shift_left_12(&self, vector: Self::Vector) -> Self::Vector;
212
213    /// Perform carryless multiplication with immediate value 0x00 (low 64 bits of both vectors)
214    unsafe fn carryless_mul_00(&self, a: Self::Vector, b: Self::Vector) -> Self::Vector;
215
216    /// Perform carryless multiplication with immediate value 0x01 (low 64 bits of a, high 64 bits of b)
217    unsafe fn carryless_mul_01(&self, a: Self::Vector, b: Self::Vector) -> Self::Vector;
218
219    /// Perform carryless multiplication with immediate value 0x10 (high 64 bits of a, low 64 bits of b)
220    unsafe fn carryless_mul_10(&self, a: Self::Vector, b: Self::Vector) -> Self::Vector;
221
222    /// Perform carryless multiplication with immediate value 0x11 (high 64 bits of both vectors)
223    unsafe fn carryless_mul_11(&self, a: Self::Vector, b: Self::Vector) -> Self::Vector;
224
225    /// XOR three vectors together: a XOR b XOR c
226    /// Uses native XOR3 instructions when available, falls back to two XOR operations otherwise
227    unsafe fn xor3_vectors(
228        &self,
229        a: Self::Vector,
230        b: Self::Vector,
231        c: Self::Vector,
232    ) -> Self::Vector;
233}
234
235#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))]
236/// Enhanced CrcWidth trait with additional operations for generic CRC implementation
237pub trait EnhancedCrcWidth: CrcWidth {
238    /// Load constants specific to CRC width
239    fn load_constants(reflected: bool) -> [[u64; 2]; 4];
240
241    /// Create a CRC state with the initial value positioned correctly for the width
242    unsafe fn create_state<T: ArchOps>(
243        value: Self::Value,
244        reflected: bool,
245        ops: &T,
246    ) -> CrcState<T::Vector>
247    where
248        T::Vector: Copy;
249
250    /// Extract the final CRC result from a SIMD vector
251    unsafe fn extract_result<T: ArchOps>(
252        vector: T::Vector,
253        reflected: bool,
254        ops: &T,
255    ) -> Self::Value
256    where
257        T::Vector: Copy;
258
259    /// Perform width-specific folding operations using CLMUL and two XOR operations (or one XOR3)
260    unsafe fn fold_16<T: ArchOps>(
261        state: &mut CrcState<T::Vector>,
262        coefficient: T::Vector,
263        data_to_xor: T::Vector,
264        ops: &T,
265    ) where
266        T::Vector: Copy;
267
268    /// Fold width-specific number of bytes
269    unsafe fn fold_width<T: ArchOps>(state: &mut CrcState<T::Vector>, high: u64, low: u64, ops: &T)
270    where
271        T::Vector: Copy;
272
273    /// Width-specific Barrett reduction
274    unsafe fn barrett_reduction<T: ArchOps>(
275        state: &CrcState<T::Vector>,
276        poly: u64,
277        mu: u64,
278        ops: &T,
279    ) -> Self::Value
280    where
281        T::Vector: Copy;
282
283    /// Create a coefficient vector for folding operations
284    unsafe fn create_coefficient<T: ArchOps>(
285        high: u64,
286        low: u64,
287        reflected: bool,
288        ops: &T,
289    ) -> T::Vector
290    where
291        T::Vector: Copy;
292
293    /// Perform final reduction for the specific width
294    unsafe fn perform_final_reduction<T: ArchOps>(
295        state: T::Vector,
296        reflected: bool,
297        keys: [u64; 23],
298        ops: &T,
299    ) -> Self::Value
300    where
301        T::Vector: Copy;
302
303    /// Get the appropriate shuffle table pointer and offset for handling last bytes
304    fn get_last_bytes_table_ptr(reflected: bool, remaining_len: usize) -> (*const u8, usize);
305}