1#![cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))]
7
8use crate::algorithm;
9use crate::consts::CRC_CHUNK_SIZE;
10use crate::crc32::consts::{PSHUFB_SHF_TABLE_FORWARD, PSHUFB_SHF_TABLE_REVERSE, SIMD_CONSTANTS};
11use crate::enums::Reflector;
12use crate::structs::CrcState;
13use crate::traits::{ArchOps, EnhancedCrcWidth};
14
15impl EnhancedCrcWidth for crate::structs::Width32 {
16 #[inline(always)]
17 fn load_constants(reflected: bool) -> [[u64; 2]; 4] {
18 if reflected {
19 [
21 [0x08090a0b0c0d0e0f, 0x0001020304050607], [0x8080808080808080, 0x8080808080808080], [0xFFFFFFFF00000000, 0xFFFFFFFFFFFFFFFF], [0x0000000000000000, 0x0000000000000000], ]
26 } else {
27 [
29 [0x08090a0b0c0d0e0f, 0x0001020304050607], [0x8080808080808080, 0x8080808080808080], [0xffffffffffffffff, 0x00000000ffffffff], [0x0000000000000000, 0x0000000000000000], ]
34 }
35 }
36
37 #[inline(always)]
38 unsafe fn create_state<T: ArchOps>(
39 value: Self::Value,
40 reflected: bool,
41 ops: &T,
42 ) -> CrcState<T::Vector>
43 where
44 T::Vector: Copy,
45 {
46 let vector = if reflected {
47 ops.create_vector_from_u32(value, false)
49 } else {
50 ops.create_vector_from_u32(value, true)
53 };
54
55 CrcState {
56 value: vector,
57 reflected,
58 }
59 }
60
61 #[inline(always)]
62 unsafe fn extract_result<T: ArchOps>(vector: T::Vector, reflected: bool, ops: &T) -> Self::Value
63 where
64 T::Vector: Copy,
65 {
66 let u64s = ops.extract_u64s(vector);
68
69 if reflected {
70 u64s[0] as u32
72 } else {
73 (u64s[1] >> 32) as u32
75 }
76 }
77
78 #[inline(always)]
79 unsafe fn fold_16<T: ArchOps>(
80 state: &mut CrcState<T::Vector>,
81 coeff: T::Vector,
82 data_to_xor: T::Vector,
83 ops: &T,
84 ) where
85 T::Vector: Copy,
86 {
87 let (h, l) = if state.reflected {
89 (
91 ops.carryless_mul_10(state.value, coeff),
92 ops.carryless_mul_01(state.value, coeff),
93 )
94 } else {
95 (
97 ops.carryless_mul_00(state.value, coeff), ops.carryless_mul_11(state.value, coeff), )
100 };
101
102 state.value = ops.xor3_vectors(h, l, data_to_xor);
103 }
104
105 #[inline(always)]
107 unsafe fn fold_width<T: ArchOps>(state: &mut CrcState<T::Vector>, high: u64, low: u64, ops: &T)
108 where
109 T::Vector: Copy,
110 {
111 let coeff_vector_low = ops.create_vector_from_u64_pair_non_reflected(0, low);
112 let coeff_vector_high = ops.create_vector_from_u64_pair_non_reflected(high, 0);
113
114 state.value = if state.reflected {
115 ops.xor_vectors(
116 ops.carryless_mul_00(state.value, coeff_vector_low),
117 ops.shift_right_8(state.value),
118 )
119 } else {
120 ops.xor_vectors(
121 ops.carryless_mul_01(state.value, coeff_vector_low),
122 ops.shift_left_8(state.value),
123 )
124 };
125
126 let (clmul, masked) = if state.reflected {
127 let mask2 = ops.load_aligned(&[0xFFFFFFFF00000000, 0xFFFFFFFFFFFFFFFF]);
128 let masked = ops.and_vectors(state.value, mask2);
129 let shifted = ops.shift_left_12(state.value);
130 let clmul = ops.carryless_mul_11(shifted, coeff_vector_high);
131
132 (clmul, masked)
133 } else {
134 let mask2 = ops.load_aligned(&[0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF]);
135 let masked = ops.and_vectors(state.value, mask2);
136 let shifted = ops.shift_right_12(state.value);
137 let clmul = ops.carryless_mul_10(shifted, coeff_vector_high);
138
139 (clmul, masked)
140 };
141
142 state.value = ops.xor_vectors(clmul, masked);
143 }
144
145 #[inline(always)]
146 unsafe fn barrett_reduction<T: ArchOps>(
147 state: &CrcState<T::Vector>,
148 poly: u64,
149 mu: u64,
150 ops: &T,
151 ) -> Self::Value
152 where
153 T::Vector: Copy,
154 {
155 let x = state.value;
156 let mu_poly = ops.create_vector_from_u64_pair_non_reflected(poly, mu);
157
158 if state.reflected {
159 let clmul1 = ops.carryless_mul_00(x, mu_poly);
160 let clmul2 = ops.carryless_mul_10(clmul1, mu_poly);
161 let xorred = ops.xor_vectors(x, clmul2);
162
163 ops.extract_u64s(xorred)[1] as u32
164 } else {
165 let clmul1 = ops.shift_left_4(ops.carryless_mul_01(x, mu_poly));
166 let clmul2_shifted = ops.shift_left_4(ops.carryless_mul_11(clmul1, mu_poly));
167 let final_xor = ops.xor_vectors(clmul2_shifted, x);
168
169 (ops.extract_u64s(final_xor)[0] >> 32) as u32
170 }
171 }
172
173 #[inline(always)]
174 unsafe fn create_coefficient<T: ArchOps>(
175 high: u64,
176 low: u64,
177 _reflected: bool,
178 ops: &T,
179 ) -> T::Vector
180 where
181 T::Vector: Copy,
182 {
183 ops.create_vector_from_u64_pair_non_reflected(high, low)
185 }
186
187 #[inline(always)]
188 unsafe fn perform_final_reduction<T: ArchOps>(
189 state: T::Vector,
190 reflected: bool,
191 keys: [u64; 23],
192 ops: &T,
193 ) -> Self::Value
194 where
195 T::Vector: Copy,
196 {
197 let mut state = CrcState {
198 value: state,
199 reflected,
200 };
201
202 Self::fold_width(&mut state, keys[6], keys[5], ops);
204
205 Self::barrett_reduction(&state, keys[8], keys[7], ops)
207 }
208
209 #[inline(always)]
210 fn get_last_bytes_table_ptr(reflected: bool, remaining_len: usize) -> (*const u8, usize) {
211 use crate::crc32::consts::{PSHUFB_SHF_TABLE_FORWARD, PSHUFB_SHF_TABLE_REVERSE};
212
213 if reflected {
214 let base_ptr = &PSHUFB_SHF_TABLE_REVERSE as *const _ as *const u8;
216 let offset = remaining_len;
217
218 (base_ptr, offset)
219 } else {
220 let base_ptr = &PSHUFB_SHF_TABLE_FORWARD as *const _ as *const u8;
222 let offset = 16 - remaining_len;
223
224 (base_ptr, offset)
225 }
226 }
227}
228
229#[inline]
231#[cfg_attr(
232 any(target_arch = "x86", target_arch = "x86_64"),
233 target_feature(enable = "ssse3,sse4.1,pclmulqdq")
234)]
235#[cfg_attr(target_arch = "aarch64", target_feature(enable = "aes"))]
236pub(crate) unsafe fn process_0_to_15<T: ArchOps, W: EnhancedCrcWidth>(
237 data: &[u8],
238 state: &mut CrcState<T::Vector>,
239 reflector: &Reflector<T::Vector>,
240 keys: [u64; 23],
241 ops: &T,
242) -> W::Value
243where
244 T::Vector: Copy,
245{
246 let mut buffer = [0u8; CRC_CHUNK_SIZE];
247 if state.reflected {
248 buffer[CRC_CHUNK_SIZE - data.len()..].copy_from_slice(data);
249 } else {
250 buffer[..data.len()].copy_from_slice(data);
251 }
252
253 let len = data.len() as i32;
254 let base = &PSHUFB_SHF_TABLE_REVERSE as *const _ as *const u8;
255
256 let xmm7 = if state.reflected {
257 let data = ops.load_bytes(buffer.as_ptr());
258 let mask1 = ops.load_aligned(&SIMD_CONSTANTS[1]);
259
260 let ptr = base.add(if len < 4 {
261 8 + len as usize
262 } else {
263 len as usize
264 });
265 let mask = ops.load_bytes(ptr);
266 let modified_mask = ops.xor_vectors(mask, mask1);
267 let shuffled_crc = ops.shuffle_bytes(state.value, modified_mask);
268
269 ops.xor_vectors(
270 if len < 4 {
271 ops.shift_right_8(data)
272 } else {
273 data
274 },
275 shuffled_crc,
276 )
277 } else {
278 let data_arr = ops.load_bytes(buffer.as_ptr());
279 let reflected_data = algorithm::reflect_bytes(reflector, data_arr, ops);
280 let data_with_crc = ops.xor_vectors(reflected_data, state.value);
281
282 if len < 4 {
283 let result = match len {
284 3 => ops.shift_right_5(data_with_crc),
285 2 => ops.shift_right_6(data_with_crc),
286 1 => ops.shift_right_7(data_with_crc),
287 _ => data_with_crc,
288 };
289
290 return W::barrett_reduction(
291 &CrcState {
292 value: result,
293 reflected: false,
294 },
295 keys[8],
296 keys[7],
297 ops,
298 );
299 }
300
301 let base = &PSHUFB_SHF_TABLE_FORWARD as *const _ as *const u8;
302 let ptr = base.add(16 - len as usize);
303 let x0 = ops.load_bytes(ptr);
304 let mask1 = ops.load_aligned(&SIMD_CONSTANTS[1]);
305 let x0 = ops.xor_vectors(x0, mask1);
306
307 if len < 8 {
308 ops.shuffle_bytes(data_with_crc, x0)
309 } else {
310 let mut xmm7 = ops.load_bytes(buffer.as_ptr());
311 if let Reflector::ForwardReflector { smask } = reflector {
312 xmm7 = ops.shuffle_bytes(xmm7, *smask);
313 }
314 xmm7 = ops.xor_vectors(xmm7, state.value);
315 let ptr = base.add(16 - len as usize);
316 let x0 = ops.load_bytes(ptr);
317 let xmm0 = ops.xor_vectors(x0, mask1);
318
319 ops.shuffle_bytes(xmm7, xmm0)
320 }
321 };
322
323 if len >= 4 {
324 return W::perform_final_reduction(xmm7, state.reflected, keys, ops);
325 }
326
327 let final_state = CrcState {
328 value: xmm7,
329 reflected: state.reflected,
330 };
331
332 W::barrett_reduction(&final_state, keys[8], keys[7], ops)
333}