crc_fast/traits.rs
1// Copyright 2025 Don MacAskill. Licensed under MIT or Apache-2.0.
2
3#![allow(dead_code)]
4
5#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))]
6use crate::enums::Reflector;
7
8use crate::CrcParams;
9
10#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))]
11use crate::structs::CrcState;
12
13use core::ops::BitXor;
14
15/// Marker trait for CRC width
16pub trait CrcWidth {
17 /// The width in bits
18 const WIDTH: u32;
19 /// The natural value type for this width
20 type Value: Copy + BitXor<Output = Self::Value>;
21}
22
23pub(crate) trait CrcCalculator {
24 fn update(data: &[u8], state: u64, params: &CrcParams) -> u64 {
25 Self::calculate(state, data, params)
26 }
27
28 fn checksum(data: &[u8], params: &CrcParams) -> u64 {
29 Self::calculate(params.init, data, params) ^ params.xorout
30 }
31
32 fn calculate(state: u64, data: &[u8], params: &CrcParams) -> u64;
33}
34
35#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))]
36/// Trait defining architecture-specific SIMD operations for CRC calculation
37pub trait ArchOps: Sized + Copy + Clone {
38 /// The SIMD vector type used by this architecture
39 type Vector;
40
41 /// Process aligned blocks using potentially accelerated SIMD operations
42 ///
43 /// Returns true if the operation was handled by the accelerated path (for example,
44 /// using VPCLMULQDQ)
45 unsafe fn process_enhanced_simd_blocks<W: EnhancedCrcWidth>(
46 &self,
47 _state: &mut CrcState<Self::Vector>,
48 _first: &[Self::Vector; 8],
49 _rest: &[[Self::Vector; 8]],
50 _reflector: &Reflector<Self::Vector>,
51 _keys: &[u64; 23],
52 ) -> bool
53 where
54 Self::Vector: Copy,
55 {
56 // Default implementation just returns false
57 // indicating the non-enhanced algorithm should be used
58 false
59 }
60
61 /// Create a SIMD vector from a u64 pair
62 ///
63 /// # Safety
64 /// May use native CPU features
65 unsafe fn create_vector_from_u64_pair(
66 &self,
67 high: u64,
68 low: u64,
69 reflected: bool,
70 ) -> Self::Vector;
71
72 /// Create a SIMD vector from a u64 pair without reflection
73 ///
74 /// TODO: I have no idea (yet) why CRC-32 doesn't use reflection, but CRC-64 does.
75 ///
76 /// # Safety
77 /// May use native CPU features
78 unsafe fn create_vector_from_u64_pair_non_reflected(&self, high: u64, low: u64)
79 -> Self::Vector;
80
81 /// Create a SIMD vector with a single u64 value
82 ///
83 /// # Safety
84 /// May use native CPU features
85 unsafe fn create_vector_from_u64(&self, value: u64, high: bool) -> Self::Vector;
86
87 /// Extract two u64 values from a SIMD vector
88 ///
89 /// # Safety
90 /// May use native CPU features
91 unsafe fn extract_u64s(&self, vector: Self::Vector) -> [u64; 2];
92
93 /// Extract two polynomial values (for carryless multiplication)
94 ///
95 /// # Safety
96 /// May use native CPU features
97 unsafe fn extract_poly64s(&self, vector: Self::Vector) -> [u64; 2];
98
99 /// XOR two SIMD vectors
100 ///
101 /// # Safety
102 /// May use native CPU features
103 unsafe fn xor_vectors(&self, a: Self::Vector, b: Self::Vector) -> Self::Vector;
104
105 /// Load bytes from memory into a SIMD vector
106 ///
107 /// # Safety
108 /// May use native CPU features
109 unsafe fn load_bytes(&self, ptr: *const u8) -> Self::Vector;
110
111 /// Load aligned bytes from memory
112 ///
113 /// # Safety
114 /// May use native CPU features
115 unsafe fn load_aligned(&self, ptr: *const [u64; 2]) -> Self::Vector;
116
117 //unsafe fn load_aligned(&self, ptr: &[u64]) -> Self::Vector;
118
119 //unsafe fn load_aligned_const(&self, ptr: *const [u64; 2]) -> Self::Vector;
120
121 /// Shuffle/permute bytes according to a mask
122 ///
123 /// # Safety
124 /// May use native CPU features
125 unsafe fn shuffle_bytes(&self, data: Self::Vector, mask: Self::Vector) -> Self::Vector;
126
127 /// Blend two vectors using a mask (select from a or b based on mask bits)
128 ///
129 /// # Safety
130 /// May use native CPU features
131 unsafe fn blend_vectors(
132 &self,
133 a: Self::Vector,
134 b: Self::Vector,
135 mask: Self::Vector,
136 ) -> Self::Vector;
137
138 /// Shift a vector left by 8 bytes
139 ///
140 /// # Safety
141 /// May use native CPU features
142 unsafe fn shift_left_8(&self, vector: Self::Vector) -> Self::Vector;
143
144 /// Create a vector with all bytes set to the same value
145 ///
146 /// # Safety
147 /// May use native CPU features
148 unsafe fn set_all_bytes(&self, value: u8) -> Self::Vector;
149
150 /// Create a comparison mask (for blending operations)
151 ///
152 /// # Safety
153 /// May use native CPU features
154 unsafe fn create_compare_mask(&self, vector: Self::Vector) -> Self::Vector;
155
156 /// AND two vectors
157 ///
158 /// # Safety
159 /// May use native CPU features
160 unsafe fn and_vectors(&self, a: Self::Vector, b: Self::Vector) -> Self::Vector;
161
162 /// Shift a vector right by 32 bits (4 bytes)
163 ///
164 /// # Safety
165 /// May use native CPU features
166 unsafe fn shift_right_32(&self, vector: Self::Vector) -> Self::Vector;
167
168 /// Shift a vector left by 32 bits (4 bytes)
169 ///
170 /// # Safety
171 /// May use native CPU features
172 unsafe fn shift_left_32(&self, vector: Self::Vector) -> Self::Vector;
173
174 /// Create a SIMD vector with a single u32 value
175 ///
176 /// # Safety
177 /// May use native CPU features
178 unsafe fn create_vector_from_u32(&self, value: u32, high: bool) -> Self::Vector;
179
180 /// Shift a vector left by 4 bytes (32 bits)
181 ///
182 /// # Safety
183 /// May use native CPU features
184 unsafe fn shift_left_4(&self, vector: Self::Vector) -> Self::Vector;
185
186 /// Shift a vector right by 4 bytes (32 bits)
187 ///
188 /// # Safety
189 /// May use native CPU features
190 unsafe fn shift_right_4(&self, vector: Self::Vector) -> Self::Vector;
191
192 /// Shift a vector right by 8 bytes (64 bits)
193 ///
194 /// # Safety
195 /// May use native CPU features
196 unsafe fn shift_right_8(&self, vector: Self::Vector) -> Self::Vector;
197
198 /// Shift a vector right by 5 bytes
199 unsafe fn shift_right_5(&self, vector: Self::Vector) -> Self::Vector;
200
201 /// Shift a vector right by 6 bytes
202 unsafe fn shift_right_6(&self, vector: Self::Vector) -> Self::Vector;
203
204 /// Shift a vector right by 7 bytes
205 unsafe fn shift_right_7(&self, vector: Self::Vector) -> Self::Vector;
206
207 /// Shift a vector right by 12 bytes
208 unsafe fn shift_right_12(&self, vector: Self::Vector) -> Self::Vector;
209
210 /// Shift a vector left by 12 bytes
211 unsafe fn shift_left_12(&self, vector: Self::Vector) -> Self::Vector;
212
213 /// Perform carryless multiplication with immediate value 0x00 (low 64 bits of both vectors)
214 unsafe fn carryless_mul_00(&self, a: Self::Vector, b: Self::Vector) -> Self::Vector;
215
216 /// Perform carryless multiplication with immediate value 0x01 (low 64 bits of a, high 64 bits of b)
217 unsafe fn carryless_mul_01(&self, a: Self::Vector, b: Self::Vector) -> Self::Vector;
218
219 /// Perform carryless multiplication with immediate value 0x10 (high 64 bits of a, low 64 bits of b)
220 unsafe fn carryless_mul_10(&self, a: Self::Vector, b: Self::Vector) -> Self::Vector;
221
222 /// Perform carryless multiplication with immediate value 0x11 (high 64 bits of both vectors)
223 unsafe fn carryless_mul_11(&self, a: Self::Vector, b: Self::Vector) -> Self::Vector;
224
225 /// XOR three vectors together: a XOR b XOR c
226 /// Uses native XOR3 instructions when available, falls back to two XOR operations otherwise
227 unsafe fn xor3_vectors(
228 &self,
229 a: Self::Vector,
230 b: Self::Vector,
231 c: Self::Vector,
232 ) -> Self::Vector;
233}
234
235#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))]
236/// Enhanced CrcWidth trait with additional operations for generic CRC implementation
237pub trait EnhancedCrcWidth: CrcWidth {
238 /// Load constants specific to CRC width
239 fn load_constants(reflected: bool) -> [[u64; 2]; 4];
240
241 /// Create a CRC state with the initial value positioned correctly for the width
242 unsafe fn create_state<T: ArchOps>(
243 value: Self::Value,
244 reflected: bool,
245 ops: &T,
246 ) -> CrcState<T::Vector>
247 where
248 T::Vector: Copy;
249
250 /// Extract the final CRC result from a SIMD vector
251 unsafe fn extract_result<T: ArchOps>(
252 vector: T::Vector,
253 reflected: bool,
254 ops: &T,
255 ) -> Self::Value
256 where
257 T::Vector: Copy;
258
259 /// Perform width-specific folding operations using CLMUL and two XOR operations (or one XOR3)
260 unsafe fn fold_16<T: ArchOps>(
261 state: &mut CrcState<T::Vector>,
262 coefficient: T::Vector,
263 data_to_xor: T::Vector,
264 ops: &T,
265 ) where
266 T::Vector: Copy;
267
268 /// Fold width-specific number of bytes
269 unsafe fn fold_width<T: ArchOps>(state: &mut CrcState<T::Vector>, high: u64, low: u64, ops: &T)
270 where
271 T::Vector: Copy;
272
273 /// Width-specific Barrett reduction
274 unsafe fn barrett_reduction<T: ArchOps>(
275 state: &CrcState<T::Vector>,
276 poly: u64,
277 mu: u64,
278 ops: &T,
279 ) -> Self::Value
280 where
281 T::Vector: Copy;
282
283 /// Create a coefficient vector for folding operations
284 unsafe fn create_coefficient<T: ArchOps>(
285 high: u64,
286 low: u64,
287 reflected: bool,
288 ops: &T,
289 ) -> T::Vector
290 where
291 T::Vector: Copy;
292
293 /// Perform final reduction for the specific width
294 unsafe fn perform_final_reduction<T: ArchOps>(
295 state: T::Vector,
296 reflected: bool,
297 keys: &[u64; 23],
298 ops: &T,
299 ) -> Self::Value
300 where
301 T::Vector: Copy;
302
303 /// Get the appropriate shuffle table pointer and offset for handling last bytes
304 fn get_last_bytes_table_ptr(reflected: bool, remaining_len: usize) -> (*const u8, usize);
305}