1use super::Block;
2
3const MOD: u64 = 0b10000111; #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
7cpufeatures::new!(target_feature_pclmulqdq, "pclmulqdq");
8
9impl Block {
10 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
14 #[inline]
15 pub fn clmul(&self, rhs: &Self) -> (Self, Self) {
16 if target_feature_pclmulqdq::get() {
17 unsafe {
19 let (low, high) = clmul::clmul128(self.into(), rhs.into());
20 (low.into(), high.into())
21 }
22 } else {
23 let (low, high) = scalar::clmul128(self.into(), rhs.into());
24 (low.into(), high.into())
25 }
26 }
27
28 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
32 #[inline]
33 pub fn clmul(&self, rhs: &Self) -> (Self, Self) {
34 let (low, high) = scalar::clmul128(self.into(), rhs.into());
35 (low.into(), high.into())
36 }
37
38 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
42 #[inline]
43 pub fn gf_mul(&self, rhs: &Self) -> Self {
44 if target_feature_pclmulqdq::get() {
45 unsafe { clmul::gf128_mul(self.into(), rhs.into()).into() }
47 } else {
48 scalar::gf128_mul(self.into(), rhs.into()).into()
49 }
50 }
51
52 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
56 #[inline]
57 pub fn gf_mul(&self, rhs: &Self) -> Self {
58 scalar::gf128_mul(self.into(), rhs.into()).into()
59 }
60
61 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
63 #[inline]
64 pub fn gf_reduce(low: &Self, high: &Self) -> Self {
65 if target_feature_pclmulqdq::get() {
66 unsafe { clmul::gf128_reduce(low.into(), high.into()).into() }
68 } else {
69 scalar::gf128_reduce(low.into(), high.into()).into()
70 }
71 }
72
73 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
75 #[inline]
76 pub fn gf_reduce(low: &Self, high: &Self) -> Self {
77 scalar::gf128_reduce(low.into(), high.into()).into()
78 }
79
80 #[inline]
81 pub fn gf_pow(&self, mut exp: u64) -> Block {
82 let mut s = Block::ONE;
83 let mut pow2 = *self;
84
85 while exp != 0 {
86 if exp & 1 != 0 {
87 s = s.gf_mul(&pow2);
88 }
89 pow2 = pow2.gf_mul(&pow2);
90 exp >>= 1;
91 }
92 s
93 }
94}
95
96#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
97mod clmul {
98 #[cfg(target_arch = "x86")]
99 use std::arch::x86::*;
100 #[cfg(target_arch = "x86_64")]
101 use std::arch::x86_64::*;
102
103 use super::MOD;
104
105 #[target_feature(enable = "pclmulqdq")]
106 #[inline]
107 pub fn gf128_mul(a: __m128i, b: __m128i) -> __m128i {
108 let (low, high) = clmul128(a, b);
109 gf128_reduce(low, high)
110 }
111
112 #[target_feature(enable = "pclmulqdq")]
116 #[inline]
117 pub fn clmul128(a: __m128i, b: __m128i) -> (__m128i, __m128i) {
118 let ab_low = _mm_clmulepi64_si128::<0x00>(a, b);
121 let ab_high = _mm_clmulepi64_si128::<0x11>(a, b);
122 let ab_lohi1 = _mm_clmulepi64_si128::<0x01>(a, b);
123 let ab_lohi2 = _mm_clmulepi64_si128::<0x10>(a, b);
124 let ab_mid = _mm_xor_si128(ab_lohi1, ab_lohi2);
125 let low = _mm_xor_si128(ab_low, _mm_slli_si128::<8>(ab_mid));
126 let high = _mm_xor_si128(ab_high, _mm_srli_si128::<8>(ab_mid));
127 (low, high)
128 }
129
130 #[target_feature(enable = "pclmulqdq")]
131 #[inline]
132 pub fn gf128_reduce(mut low: __m128i, mut high: __m128i) -> __m128i {
133 let modulus = [MOD, 0];
136 let modulus = unsafe { _mm_loadu_si64(modulus.as_ptr().cast()) };
138
139 let tmp = _mm_clmulepi64_si128::<0x01>(high, modulus);
140 let tmp_shifted = _mm_slli_si128::<8>(tmp);
141 low = _mm_xor_si128(low, tmp_shifted);
142 high = _mm_xor_si128(high, tmp_shifted);
143
144 let tmp = _mm_clmulepi64_si128::<0x01>(tmp, modulus);
146 low = _mm_xor_si128(low, tmp);
147
148 let tmp = _mm_clmulepi64_si128::<0x00>(high, modulus);
149 _mm_xor_si128(low, tmp)
150 }
151
152 #[cfg(all(test, target_feature = "pclmulqdq"))]
153 mod test {
154 use std::{arch::x86_64::__m128i, mem::transmute};
155
156 use crate::block::gf128::clmul::{clmul128, gf128_mul, gf128_reduce};
157
158 #[test]
159 fn test_gf128_mul_zero() {
160 unsafe {
161 let a = transmute(0x19831239123916248127031273012381_u128);
162 let b = transmute(0_u128);
163 let exp = 0_u128;
164 let mul = transmute(gf128_mul(a, b));
165 assert_eq!(exp, mul);
166 }
167 }
168
169 #[test]
170 fn test_gf128_mul_onw() {
171 unsafe {
172 let a = transmute(0x19831239123916248127031273012381_u128);
173 let b = transmute(0x1_u128);
174 let exp = 0x19831239123916248127031273012381_u128;
175 let mul = transmute(gf128_mul(a, b));
176 assert_eq!(exp, mul);
177 }
178 }
179
180 #[test]
181 fn test_gf128_mul() {
182 unsafe {
183 let a = transmute(0x19831239123916248127031273012381_u128);
184 let b = transmute(0xabcdef0123456789abcdef0123456789_u128);
185 let exp = 0x63a033d0ed643e85153c50f4268a7d9_u128;
186 let mul = transmute(gf128_mul(a, b));
187 assert_eq!(exp, mul);
188 }
189 }
190
191 #[test]
192 fn test_clmul128() {
193 unsafe {
194 let a: __m128i = transmute(0x19831239123916248127031273012381_u128);
195 let b: __m128i = transmute(0xabcdef0123456789abcdef0123456789_u128);
196 let (low, high) = clmul128(a, b);
197 let [low, high] = transmute([low, high]);
198 let exp_low: u128 = 0xa5de9b50e6db7b5147e92b99ee261809;
199 let exp_high: u128 = 0xf1d6d37d58114afed2addfedd7c77f7;
200 assert_eq!(exp_low, low);
201 assert_eq!(exp_high, high);
202 }
203 }
204
205 #[test]
206 fn test_gf128_reduce() {
207 unsafe {
208 let low: __m128i = transmute(0x0123456789abcdef0123456789abcdef_u128);
210 let high: __m128i = transmute(0xabcdef0123456789abcdef0123456789_u128);
211 let exp = 0xb4b548f1c3c23f86b4b548f1c3c21572_u128;
212 let res: u128 = transmute(gf128_reduce(low, high));
213
214 println!("res: {res:b}");
215 println!("exp: {exp:b}");
216 assert_eq!(exp, res);
217 }
218 }
219 }
220
221 #[cfg(all(is_nightly, test, target_feature = "pclmulqdq"))]
222 mod benches {
223 extern crate test;
224
225 use std::{hint::black_box, mem::transmute};
226
227 use rand::{RngExt, rng};
228 use test::Bencher;
229
230 #[bench]
231 fn bench_gf128_mul(b: &mut Bencher) {
232 let [low, high] = unsafe { transmute(rng().random::<[u128; 2]>()) };
233 b.iter(|| black_box(unsafe { super::gf128_mul(black_box(low), black_box(high)) }));
234 }
235
236 #[bench]
237 fn bench_gf128_reduce(b: &mut Bencher) {
238 let [low, high] = unsafe { transmute(rng().random::<[u128; 2]>()) };
239 b.iter(|| black_box(unsafe { super::gf128_reduce(black_box(low), black_box(high)) }));
240 }
241 }
242}
243
244#[allow(dead_code)]
247mod scalar {
248 #[inline]
249 pub fn gf128_mul(a: u128, b: u128) -> u128 {
250 let (low, high) = clmul128(a, b);
251 gf128_reduce(low, high)
252 }
253
254 #[inline]
258 pub fn clmul128(a: u128, b: u128) -> (u128, u128) {
259 let (a_low, a_high) = (a as u64, (a >> 64) as u64);
260 let (b_low, b_high) = (b as u64, (b >> 64) as u64);
261
262 let ab_low = clmul64(a_low, b_low);
264 let ab_high = clmul64(a_high, b_high);
265 let ab_mid = clmul64(a_low ^ a_high, b_low ^ b_high) ^ ab_low ^ ab_high;
266 let low = ab_low ^ (ab_mid << 64);
267 let high = ab_high ^ (ab_mid >> 64);
268 (low, high)
269 }
270
271 #[inline]
282 fn clmul64(x: u64, y: u64) -> u128 {
283 pub const fn mask(offset: u32) -> u128 {
286 let mut mask: u128 = 0;
287 let mut i = offset;
288 while i < 128 {
289 mask |= 1 << i;
290 i += 5;
291 }
292
293 mask
294 }
295 let x0 = x as u128 & const { mask(0) };
296 let x1 = x as u128 & const { mask(1) };
297 let x2 = x as u128 & const { mask(2) };
298 let x3 = x as u128 & const { mask(3) };
299 let x4 = x as u128 & const { mask(4) };
300 let y0 = y as u128 & const { mask(0) };
301 let y1 = y as u128 & const { mask(1) };
302 let y2 = y as u128 & const { mask(2) };
303 let y3 = y as u128 & const { mask(3) };
304 let y4 = y as u128 & const { mask(4) };
305
306 let mut z0 = (x0 * y0) ^ (x1 * y4) ^ (x2 * y3) ^ (x3 * y2) ^ (x4 * y1);
307 let mut z1 = (x0 * y1) ^ (x1 * y0) ^ (x2 * y4) ^ (x3 * y3) ^ (x4 * y2);
308 let mut z2 = (x0 * y2) ^ (x1 * y1) ^ (x2 * y0) ^ (x3 * y4) ^ (x4 * y3);
309 let mut z3 = (x0 * y3) ^ (x1 * y2) ^ (x2 * y1) ^ (x3 * y0) ^ (x4 * y4);
310 let mut z4 = (x0 * y4) ^ (x1 * y3) ^ (x2 * y2) ^ (x3 * y1) ^ (x4 * y0);
311
312 z0 &= const { mask(0) };
313 z1 &= const { mask(1) };
314 z2 &= const { mask(2) };
315 z3 &= const { mask(3) };
316 z4 &= const { mask(4) };
317
318 z0 | z1 | z2 | z3 | z4
319 }
320
321 #[inline]
329 pub fn gf128_reduce(low: u128, high: u128) -> u128 {
330 #[inline]
334 fn shift_u128(x: u128, shift: u32) -> (u128, u128) {
335 let overflow = x >> (128 - shift);
337 let lower = x << shift;
338 (overflow, lower)
339 }
340
341 let (ov7, lo7) = shift_u128(high, 7);
347 let (ov2, lo2) = shift_u128(high, 2);
348 let (ov1, lo1) = shift_u128(high, 1);
349 let lo0 = high; let combined_low = lo7 ^ lo2 ^ lo1 ^ lo0;
353 let combined_overflow = ov7 ^ ov2 ^ ov1;
355
356 let reduced_overflow = (combined_overflow << 7)
360 ^ (combined_overflow << 2)
361 ^ (combined_overflow << 1)
362 ^ combined_overflow;
363
364 let poly_contrib = combined_low ^ reduced_overflow;
367
368 low ^ poly_contrib
370 }
371
372 #[cfg(test)]
373 mod tests {
374 use super::{clmul128, gf128_mul, gf128_reduce};
375
376 #[test]
377 fn test_gf128_mul_zero() {
378 let a = 0x19831239123916248127031273012381;
379 let b = 0;
380 let exp = 0;
381 let mul = gf128_mul(a, b);
382 assert_eq!(exp, mul);
383 }
384
385 #[test]
386 fn test_gf128_mul_one() {
387 let a = 0x19831239123916248127031273012381;
388 let b = 1;
389 let exp = 0x19831239123916248127031273012381;
390 let mul = gf128_mul(a, b);
391 assert_eq!(exp, mul);
392 }
393
394 #[test]
395 fn test_gf128_mul() {
396 let a = 0x19831239123916248127031273012381;
397 let b = 0xabcdef0123456789abcdef0123456789;
398 let exp = 0x63a033d0ed643e85153c50f4268a7d9;
399 let mul = gf128_mul(a, b);
400 assert_eq!(exp, mul);
401 }
402
403 #[test]
404 fn test_gf128_reduce_zero() {
405 assert_eq!(gf128_reduce(0, 0), 0);
406 }
407
408 #[test]
409 fn test_gf128_reduce_low_only() {
410 assert_eq!(gf128_reduce(1, 0), 1);
411 assert_eq!(gf128_reduce(0x87, 0), 0x87); assert_eq!(gf128_reduce(0xFFFFFFFFFFFFFFFF, 0), 0xFFFFFFFFFFFFFFFF);
413 }
414
415 #[test]
416 fn test_gf128_reduce_high_only() {
417 assert_eq!(gf128_reduce(0, 1), 0x87);
419 assert_eq!(gf128_reduce(0, 2), 0x87 << 1);
420 assert_eq!(gf128_reduce(0, 3), (0x87 << 1) ^ 0x87);
421
422 assert_eq!(gf128_reduce(0, 1 << 63), 0x87 << 63);
423 }
424
425 #[test]
426 fn test_gf128_reduce_overflow() {
427 let high = u128::MAX; let low = u128::MAX; assert_eq!(gf128_reduce(low, high), 0xffffffffffffffffffffffffffffc071);
430 }
431
432 #[test]
433 fn tests_gf128_reduce() {
434 let low = 0x0123456789abcdef0123456789abcdef;
436 let high = 0xabcdef0123456789abcdef0123456789;
437 let exp = 0xb4b548f1c3c23f86b4b548f1c3c21572;
438 let res = gf128_reduce(low, high);
439
440 println!("res: {res:b}");
441 println!("exp: {exp:b}");
442 assert_eq!(exp, res);
443 }
444
445 #[test]
446 fn test_clmul128() {
447 let a = 0x19831239123916248127031273012381;
448 let b = 0xabcdef0123456789abcdef0123456789;
449 let (low, high) = clmul128(a, b);
450 let exp_low = 0xa5de9b50e6db7b5147e92b99ee261809;
451 let exp_high = 0xf1d6d37d58114afed2addfedd7c77f7;
452 assert_eq!(exp_low, low);
453 assert_eq!(exp_high, high);
454 }
455 }
456
457 #[cfg(all(is_nightly, test))]
458 mod benches {
459 extern crate test;
460
461 use criterion::black_box;
462 use rand::{RngExt, rng};
463 use test::Bencher;
464
465 #[bench]
466 fn bench_gf128_mul(b: &mut Bencher) {
467 let [low, high] = rng().random::<[u128; 2]>();
468 b.iter(|| black_box(super::gf128_mul(black_box(low), black_box(high))));
469 }
470
471 #[bench]
472 fn bench_gf128_reduce(b: &mut Bencher) {
473 let [low, high] = rng().random::<[u128; 2]>();
474 b.iter(|| black_box(super::gf128_reduce(black_box(low), black_box(high))));
475 }
476 }
477}
478
479#[cfg(all(test, not(miri), target_feature = "pclmulqdq"))]
482mod scalar_simd_tests {
483 use std::mem::transmute;
484
485 use proptest::prelude::*;
486
487 use super::{clmul, scalar};
488
489 fn u128_with_edges() -> impl Strategy<Value = u128> {
492 prop_oneof![
493 1 => Just(0u128),
495 1 => Just(1u128),
496 1 => Just(u128::MAX),
497 1 => Just(u128::MAX - 1),
498 6 => any::<u128>(),
499 ]
500 }
501
502 proptest! {
503 #[test]
504 fn test_clmul128(a in u128_with_edges(), b in u128_with_edges()) {
505 unsafe {
506 let clmul_res = clmul::clmul128(transmute(a), transmute(b));
507 let scalar_res = scalar::clmul128(a, b);
508 assert_eq!(scalar_res.0, transmute(clmul_res.0));
509 }
510 }
511 }
512
513 proptest! {
514 #[test]
515 fn test_gf128_reduce(a in u128_with_edges(), b in u128_with_edges()) {
516 unsafe {
517 let clmul_res = clmul::gf128_reduce(transmute(a), transmute(b));
518 let scalar_res = scalar::gf128_reduce(a, b);
519 assert_eq!(scalar_res, transmute(clmul_res));
520 }
521 }
522 }
523
524 proptest! {
525 #[test]
526 fn test_gf128_mul(a in u128_with_edges(), b in u128_with_edges()) {
527 unsafe {
528 let clmul_res = clmul::gf128_mul(transmute(a), transmute(b));
529 let scalar_res = scalar::gf128_mul(a, b);
530 assert_eq!(scalar_res, transmute(clmul_res));
531 }
532 }
533 }
534}
535
536#[cfg(test)]
537mod tests {
538 use crate::Block;
539
540 #[test]
541 fn test_gf_pow() {
542 let b: Block = 24646523424323_u128.into();
543 assert_eq!(Block::ONE, b.gf_pow(0));
544 assert_eq!(b, b.gf_pow(1));
545 assert_eq!(b.gf_mul(&b), b.gf_pow(2));
546 assert_eq!(b.gf_mul(&b.gf_mul(&b)), b.gf_pow(3));
547 }
548}