1use crate::towers::bit::Bit;
20use crate::towers::block8::Block8;
21use crate::towers::block16::Block16;
22use crate::towers::block32::Block32;
23use crate::towers::block64::Block64;
24use crate::{
25 CanonicalDeserialize, CanonicalSerialize, Flat, FlatPromote, HardwareField, PackableField,
26 PackedFlat, TowerField, constants,
27};
28use core::ops::{Add, AddAssign, BitXor, BitXorAssign, Mul, MulAssign, Sub, SubAssign};
29use serde::{Deserialize, Serialize};
30use zeroize::Zeroize;
31
32#[cfg(not(feature = "table-math"))]
33#[repr(align(64))]
34struct CtConvertBasisU128<const N: usize>([u128; N]);
35
36#[cfg(not(feature = "table-math"))]
37static TOWER_TO_FLAT_BASIS_128: CtConvertBasisU128<128> =
38 CtConvertBasisU128(constants::RAW_TOWER_TO_FLAT_128);
39
40#[cfg(not(feature = "table-math"))]
41static FLAT_TO_TOWER_BASIS_128: CtConvertBasisU128<128> =
42 CtConvertBasisU128(constants::RAW_FLAT_TO_TOWER_128);
43
44#[derive(Copy, Clone, Default, Debug, Eq, PartialEq, Serialize, Deserialize, Zeroize)]
45#[repr(transparent)]
46pub struct Block128(pub u128);
47
48impl Block128 {
49 const TAU: Self = Block128(0x2000_0000_0000_0000_0000_0000_0000_0000);
51
52 pub fn new(lo: Block64, hi: Block64) -> Self {
53 Self((hi.0 as u128) << 64 | (lo.0 as u128))
54 }
55
56 #[inline(always)]
57 pub fn split(self) -> (Block64, Block64) {
58 (Block64(self.0 as u64), Block64((self.0 >> 64) as u64))
59 }
60}
61
62impl TowerField for Block128 {
63 const BITS: usize = 128;
64 const ZERO: Self = Block128(0);
65 const ONE: Self = Block128(1);
66
67 const EXTENSION_TAU: Self = Self::TAU;
68
69 fn invert(&self) -> Self {
70 let (l, h) = self.split();
71 let h2 = h * h;
72 let l2 = l * l;
73 let hl = h * l;
74 let norm = (h2 * Block64::TAU) + hl + l2;
75
76 let norm_inv = norm.invert();
77 let res_hi = h * norm_inv;
78 let res_lo = (h + l) * norm_inv;
79
80 Self::new(res_lo, res_hi)
81 }
82
83 fn from_uniform_bytes(bytes: &[u8; 32]) -> Self {
84 let mut buf = [0u8; 16];
85 buf.copy_from_slice(&bytes[0..16]);
86
87 Self(u128::from_le_bytes(buf))
88 }
89}
90
91impl Add for Block128 {
92 type Output = Self;
93
94 fn add(self, rhs: Self) -> Self {
95 Self(self.0.bitxor(rhs.0))
96 }
97}
98
99impl Sub for Block128 {
100 type Output = Self;
101
102 fn sub(self, rhs: Self) -> Self {
103 self.add(rhs)
104 }
105}
106
107impl Mul for Block128 {
108 type Output = Self;
109
110 fn mul(self, rhs: Self) -> Self {
111 let (a0, a1) = self.split();
112 let (b0, b1) = rhs.split();
113
114 let v0 = a0 * b0;
115 let v1 = a1 * b1;
116 let v_sum = (a0 + a1) * (b0 + b1);
117
118 let c_hi = v0 + v_sum;
119 let c_lo = v0 + (v1 * Block64::TAU);
120
121 Self::new(c_lo, c_hi)
122 }
123}
124
125impl AddAssign for Block128 {
126 fn add_assign(&mut self, rhs: Self) {
127 self.0.bitxor_assign(rhs.0);
128 }
129}
130
131impl SubAssign for Block128 {
132 fn sub_assign(&mut self, rhs: Self) {
133 self.0.bitxor_assign(rhs.0);
134 }
135}
136
137impl MulAssign for Block128 {
138 fn mul_assign(&mut self, rhs: Self) {
139 *self = *self * rhs;
140 }
141}
142
143impl CanonicalSerialize for Block128 {
144 fn serialized_size(&self) -> usize {
145 16
146 }
147
148 fn serialize(&self, writer: &mut [u8]) -> Result<(), ()> {
149 if writer.len() < 16 {
150 return Err(());
151 }
152
153 writer.copy_from_slice(&self.0.to_le_bytes());
154
155 Ok(())
156 }
157}
158
159impl CanonicalDeserialize for Block128 {
160 fn deserialize(bytes: &[u8]) -> Result<Self, ()> {
161 if bytes.len() < 16 {
162 return Err(());
163 }
164
165 let mut buf = [0u8; 16];
166 buf.copy_from_slice(&bytes[0..16]);
167
168 Ok(Self(u128::from_le_bytes(buf)))
169 }
170}
171
172impl From<u8> for Block128 {
173 fn from(val: u8) -> Self {
174 Self(val as u128)
175 }
176}
177
178impl From<u32> for Block128 {
179 #[inline]
180 fn from(val: u32) -> Self {
181 Self(val as u128)
182 }
183}
184
185impl From<u64> for Block128 {
186 #[inline]
187 fn from(val: u64) -> Self {
188 Self::from(val as u128)
189 }
190}
191
192impl From<u128> for Block128 {
193 #[inline]
194 fn from(val: u128) -> Self {
195 Self(val)
196 }
197}
198
199impl From<Bit> for Block128 {
204 #[inline(always)]
205 fn from(val: Bit) -> Self {
206 Self(val.0 as u128)
207 }
208}
209
210impl From<Block8> for Block128 {
211 #[inline(always)]
212 fn from(val: Block8) -> Self {
213 Self(val.0 as u128)
214 }
215}
216
217impl From<Block16> for Block128 {
218 #[inline(always)]
219 fn from(val: Block16) -> Self {
220 Self(val.0 as u128)
221 }
222}
223
224impl From<Block32> for Block128 {
225 #[inline(always)]
226 fn from(val: Block32) -> Self {
227 Self(val.0 as u128)
228 }
229}
230
231impl From<Block64> for Block128 {
232 #[inline(always)]
233 fn from(val: Block64) -> Self {
234 Self(val.0 as u128)
235 }
236}
237
238pub const PACKED_WIDTH_128: usize = 4;
243
244#[derive(Clone, Copy, Debug, PartialEq, Eq, Default)]
248#[repr(C, align(32))]
249pub struct PackedBlock128(pub [Block128; PACKED_WIDTH_128]);
250
251impl PackedBlock128 {
252 #[inline(always)]
254 pub fn zero() -> Self {
255 Self([Block128::ZERO; PACKED_WIDTH_128])
256 }
257
258 #[inline(always)]
260 pub fn broadcast(val: Block128) -> Self {
261 Self([val; PACKED_WIDTH_128])
262 }
263}
264
265impl PackableField for Block128 {
266 type Packed = PackedBlock128;
267
268 const WIDTH: usize = PACKED_WIDTH_128;
269
270 #[inline(always)]
271 fn pack(chunk: &[Self]) -> Self::Packed {
272 assert!(
273 chunk.len() >= PACKED_WIDTH_128,
274 "PackableField::pack: input slice too short",
275 );
276
277 let mut arr = [Self::ZERO; PACKED_WIDTH_128];
278 arr.copy_from_slice(&chunk[..PACKED_WIDTH_128]);
279
280 PackedBlock128(arr)
281 }
282
283 #[inline(always)]
284 fn unpack(packed: Self::Packed, output: &mut [Self]) {
285 assert!(
286 output.len() >= PACKED_WIDTH_128,
287 "PackableField::unpack: output slice too short",
288 );
289
290 output[..PACKED_WIDTH_128].copy_from_slice(&packed.0);
291 }
292}
293
294impl Add for PackedBlock128 {
299 type Output = Self;
300
301 #[inline(always)]
302 fn add(self, rhs: Self) -> Self {
303 let mut res = [Block128::ZERO; PACKED_WIDTH_128];
304 for ((out, l), r) in res.iter_mut().zip(self.0.iter()).zip(rhs.0.iter()) {
305 *out = *l + *r;
306 }
307
308 Self(res)
309 }
310}
311
312impl AddAssign for PackedBlock128 {
313 #[inline(always)]
314 fn add_assign(&mut self, rhs: Self) {
315 for (l, r) in self.0.iter_mut().zip(rhs.0.iter()) {
316 *l += *r;
317 }
318 }
319}
320
321impl Sub for PackedBlock128 {
324 type Output = Self;
325
326 #[inline(always)]
327 fn sub(self, rhs: Self) -> Self {
328 self.add(rhs)
329 }
330}
331
332impl SubAssign for PackedBlock128 {
333 #[inline(always)]
334 fn sub_assign(&mut self, rhs: Self) {
335 self.add_assign(rhs);
336 }
337}
338
339impl Mul for PackedBlock128 {
342 type Output = Self;
343
344 #[inline(always)]
345 fn mul(self, rhs: Self) -> Self {
346 #[cfg(target_arch = "aarch64")]
347 {
348 let mut res = [Block128::ZERO; PACKED_WIDTH_128];
349 for ((out, l), r) in res.iter_mut().zip(self.0.iter()).zip(rhs.0.iter()) {
350 let a_flat = l.to_hardware();
351 let b_flat = r.to_hardware();
352 let c_flat =
353 Flat::from_raw(neon::mul_flat_128(a_flat.into_raw(), b_flat.into_raw()));
354
355 *out = c_flat.to_tower();
356 }
357
358 Self(res)
359 }
360
361 #[cfg(not(target_arch = "aarch64"))]
362 {
363 let mut res = [Block128::ZERO; PACKED_WIDTH_128];
364 for ((out, l), r) in res.iter_mut().zip(self.0.iter()).zip(rhs.0.iter()) {
365 *out = *l * *r;
366 }
367
368 Self(res)
369 }
370 }
371}
372
373impl MulAssign for PackedBlock128 {
374 #[inline(always)]
375 fn mul_assign(&mut self, rhs: Self) {
376 for (l, r) in self.0.iter_mut().zip(rhs.0.iter()) {
377 *l *= *r;
378 }
379 }
380}
381
382impl Mul<Block128> for PackedBlock128 {
386 type Output = Self;
387
388 #[inline(always)]
389 fn mul(self, rhs: Block128) -> Self {
390 let mut res = [Block128::ZERO; PACKED_WIDTH_128];
391 for (out, v) in res.iter_mut().zip(self.0.iter()) {
392 *out = *v * rhs;
393 }
394
395 Self(res)
396 }
397}
398
399impl MulAssign<Block128> for PackedBlock128 {
400 #[inline(always)]
401 fn mul_assign(&mut self, rhs: Block128) {
402 for v in self.0.iter_mut() {
403 *v *= rhs;
404 }
405 }
406}
407
408impl HardwareField for Block128 {
413 #[inline(always)]
414 fn to_hardware(self) -> Flat<Self> {
415 #[cfg(feature = "table-math")]
416 {
417 Flat::from_raw(apply_matrix_128(self, &constants::TOWER_TO_FLAT_128))
418 }
419
420 #[cfg(not(feature = "table-math"))]
421 {
422 Flat::from_raw(Block128(map_ct_128_split(
423 self.0,
424 &TOWER_TO_FLAT_BASIS_128.0,
425 )))
426 }
427 }
428
429 #[inline(always)]
430 fn from_hardware(value: Flat<Self>) -> Self {
431 let value = value.into_raw();
432
433 #[cfg(feature = "table-math")]
434 {
435 apply_matrix_128(value, &constants::FLAT_TO_TOWER_128)
436 }
437
438 #[cfg(not(feature = "table-math"))]
439 {
440 Block128(map_ct_128_split(value.0, &FLAT_TO_TOWER_BASIS_128.0))
441 }
442 }
443
444 #[inline(always)]
445 fn add_hardware(lhs: Flat<Self>, rhs: Flat<Self>) -> Flat<Self> {
446 Flat::from_raw(lhs.into_raw() + rhs.into_raw())
447 }
448
449 #[inline(always)]
450 fn add_hardware_packed(lhs: PackedFlat<Self>, rhs: PackedFlat<Self>) -> PackedFlat<Self> {
451 let lhs = lhs.into_raw();
452 let rhs = rhs.into_raw();
453
454 #[cfg(target_arch = "aarch64")]
455 {
456 PackedFlat::from_raw(neon::add_packed_128(lhs, rhs))
457 }
458
459 #[cfg(not(target_arch = "aarch64"))]
460 {
461 PackedFlat::from_raw(lhs + rhs)
462 }
463 }
464
465 #[inline(always)]
466 fn mul_hardware(lhs: Flat<Self>, rhs: Flat<Self>) -> Flat<Self> {
467 let lhs = lhs.into_raw();
468 let rhs = rhs.into_raw();
469
470 #[cfg(target_arch = "aarch64")]
471 {
472 Flat::from_raw(neon::mul_flat_128(lhs, rhs))
473 }
474
475 #[cfg(not(target_arch = "aarch64"))]
476 {
477 let a_tower = Self::from_hardware(Flat::from_raw(lhs));
478 let b_tower = Self::from_hardware(Flat::from_raw(rhs));
479
480 (a_tower * b_tower).to_hardware()
481 }
482 }
483
484 #[inline(always)]
485 fn mul_hardware_packed(lhs: PackedFlat<Self>, rhs: PackedFlat<Self>) -> PackedFlat<Self> {
486 let lhs = lhs.into_raw();
487 let rhs = rhs.into_raw();
488
489 #[cfg(target_arch = "aarch64")]
490 {
491 let mut res = [Block128::ZERO; PACKED_WIDTH_128];
492 for ((out, l), r) in res.iter_mut().zip(lhs.0.iter()).zip(rhs.0.iter()) {
493 *out = neon::mul_flat_128(*l, *r);
494 }
495
496 PackedFlat::from_raw(PackedBlock128(res))
497 }
498
499 #[cfg(not(target_arch = "aarch64"))]
500 {
501 let mut l = [Self::ZERO; <Self as PackableField>::WIDTH];
502 let mut r = [Self::ZERO; <Self as PackableField>::WIDTH];
503 let mut res = [Self::ZERO; <Self as PackableField>::WIDTH];
504
505 Self::unpack(lhs, &mut l);
506 Self::unpack(rhs, &mut r);
507
508 for i in 0..<Self as PackableField>::WIDTH {
509 res[i] = Self::mul_hardware(Flat::from_raw(l[i]), Flat::from_raw(r[i])).into_raw();
510 }
511
512 PackedFlat::from_raw(Self::pack(&res))
513 }
514 }
515
516 #[inline(always)]
517 fn mul_hardware_scalar_packed(lhs: PackedFlat<Self>, rhs: Flat<Self>) -> PackedFlat<Self> {
518 let broadcasted = PackedBlock128::broadcast(rhs.into_raw());
519 Self::mul_hardware_packed(lhs, PackedFlat::from_raw(broadcasted))
520 }
521
522 #[inline(always)]
523 fn tower_bit_from_hardware(value: Flat<Self>, bit_idx: usize) -> u8 {
524 let mask = constants::FLAT_TO_TOWER_BIT_MASKS_128[bit_idx];
525
526 let mut v = value.into_raw().0 & mask;
530 v ^= v >> 64;
531 v ^= v >> 32;
532 v ^= v >> 16;
533 v ^= v >> 8;
534 v ^= v >> 4;
535 v ^= v >> 2;
536 v ^= v >> 1;
537
538 (v & 1) as u8
539 }
540}
541
542#[cfg(not(feature = "table-math"))]
551impl FlatPromote<Block8> for Block128 {
552 #[inline(always)]
553 fn promote_flat(val: Flat<Block8>) -> Flat<Self> {
554 let val = val.into_raw();
555 Flat::from_raw(Block128(lift_ct::<8>(
556 val.0 as u64,
557 &constants::LIFT_BASIS_8.0,
558 )))
559 }
560
561 fn promote_flat_batch(input: &[Flat<Block8>], output: &mut [Flat<Self>]) {
562 let n = input.len().min(output.len());
563
564 #[cfg(target_arch = "aarch64")]
565 {
566 let full = n / 16;
567 for chunk in 0..full {
568 let i = chunk * 16;
569 unsafe {
570 neon::promote_batch_8_to_128(
571 input.as_ptr().add(i).cast::<u8>(),
572 output.as_mut_ptr().add(i).cast::<u128>(),
573 );
574 }
575 }
576
577 let tail = full * 16;
578 for i in tail..n {
579 output[i] = Self::promote_flat(input[i]);
580 }
581 }
582
583 #[cfg(not(target_arch = "aarch64"))]
584 {
585 for i in 0..n {
586 output[i] = Self::promote_flat(input[i]);
587 }
588 }
589 }
590}
591
592#[cfg(not(feature = "table-math"))]
593impl FlatPromote<Block16> for Block128 {
594 #[inline(always)]
595 fn promote_flat(val: Flat<Block16>) -> Flat<Self> {
596 Flat::from_raw(Block128(lift_ct::<16>(
597 val.into_raw().0 as u64,
598 &constants::LIFT_BASIS_16.0,
599 )))
600 }
601
602 fn promote_flat_batch(input: &[Flat<Block16>], output: &mut [Flat<Self>]) {
603 let n = input.len().min(output.len());
604
605 #[cfg(target_arch = "aarch64")]
606 {
607 let full = n / 16;
608 for chunk in 0..full {
609 let i = chunk * 16;
610 unsafe {
611 neon::promote_batch_16_to_128(
612 input.as_ptr().add(i).cast::<u8>(),
613 output.as_mut_ptr().add(i).cast::<u128>(),
614 );
615 }
616 }
617
618 let tail = full * 16;
619 for i in tail..n {
620 output[i] = Self::promote_flat(input[i]);
621 }
622 }
623
624 #[cfg(not(target_arch = "aarch64"))]
625 {
626 for i in 0..n {
627 output[i] = Self::promote_flat(input[i]);
628 }
629 }
630 }
631}
632
633#[cfg(not(feature = "table-math"))]
634impl FlatPromote<Block32> for Block128 {
635 #[inline(always)]
636 fn promote_flat(val: Flat<Block32>) -> Flat<Self> {
637 Flat::from_raw(Block128(lift_ct::<32>(
638 val.into_raw().0 as u64,
639 &constants::LIFT_BASIS_32.0,
640 )))
641 }
642
643 fn promote_flat_batch(input: &[Flat<Block32>], output: &mut [Flat<Self>]) {
644 let n = input.len().min(output.len());
645
646 #[cfg(target_arch = "aarch64")]
647 {
648 let full = n / 16;
649 for chunk in 0..full {
650 let i = chunk * 16;
651 unsafe {
652 neon::promote_batch_32_to_128(
653 input.as_ptr().add(i).cast::<u8>(),
654 output.as_mut_ptr().add(i).cast::<u128>(),
655 );
656 }
657 }
658
659 let tail = full * 16;
660 for i in tail..n {
661 output[i] = Self::promote_flat(input[i]);
662 }
663 }
664
665 #[cfg(not(target_arch = "aarch64"))]
666 {
667 for i in 0..n {
668 output[i] = Self::promote_flat(input[i]);
669 }
670 }
671 }
672}
673
674#[cfg(not(feature = "table-math"))]
675impl FlatPromote<Block64> for Block128 {
676 #[inline(always)]
677 fn promote_flat(val: Flat<Block64>) -> Flat<Self> {
678 Flat::from_raw(Block128(lift_ct::<64>(
679 val.into_raw().0,
680 &constants::LIFT_BASIS_64.0,
681 )))
682 }
683}
684
685#[cfg(feature = "table-math")]
687impl FlatPromote<Block8> for Block128 {
688 #[inline(always)]
689 fn promote_flat(val: Flat<Block8>) -> Flat<Self> {
690 let idx = val.into_raw().0 as usize;
691 Flat::from_raw(Block128(unsafe {
692 *constants::LIFT_TABLE_8_TO_128.get_unchecked(idx)
693 }))
694 }
695}
696
697#[cfg(feature = "table-math")]
698impl FlatPromote<Block16> for Block128 {
699 #[inline(always)]
700 fn promote_flat(val: Flat<Block16>) -> Flat<Self> {
701 let v = val.into_raw().0;
702 let res = unsafe {
703 *constants::PROMOTE_16_BYTE_0_TO_128.get_unchecked((v & 0xFF) as usize)
704 ^ *constants::PROMOTE_16_BYTE_1_TO_128.get_unchecked(((v >> 8) & 0xFF) as usize)
705 };
706
707 Flat::from_raw(Block128(res))
708 }
709}
710
711#[cfg(feature = "table-math")]
712impl FlatPromote<Block32> for Block128 {
713 #[inline(always)]
714 fn promote_flat(val: Flat<Block32>) -> Flat<Self> {
715 let v = val.into_raw().0;
716 let res = unsafe {
717 *constants::PROMOTE_32_BYTE_0_TO_128.get_unchecked((v & 0xFF) as usize)
718 ^ *constants::PROMOTE_32_BYTE_1_TO_128.get_unchecked(((v >> 8) & 0xFF) as usize)
719 ^ *constants::PROMOTE_32_BYTE_2_TO_128.get_unchecked(((v >> 16) & 0xFF) as usize)
720 ^ *constants::PROMOTE_32_BYTE_3_TO_128.get_unchecked(((v >> 24) & 0xFF) as usize)
721 };
722
723 Flat::from_raw(Block128(res))
724 }
725}
726
727#[cfg(feature = "table-math")]
728impl FlatPromote<Block64> for Block128 {
729 #[inline(always)]
730 fn promote_flat(val: Flat<Block64>) -> Flat<Self> {
731 let v = val.into_raw().0;
732 let res = unsafe {
733 *constants::PROMOTE_64_BYTE_0_TO_128.get_unchecked((v & 0xFF) as usize)
734 ^ *constants::PROMOTE_64_BYTE_1_TO_128.get_unchecked(((v >> 8) & 0xFF) as usize)
735 ^ *constants::PROMOTE_64_BYTE_2_TO_128.get_unchecked(((v >> 16) & 0xFF) as usize)
736 ^ *constants::PROMOTE_64_BYTE_3_TO_128.get_unchecked(((v >> 24) & 0xFF) as usize)
737 ^ *constants::PROMOTE_64_BYTE_4_TO_128.get_unchecked(((v >> 32) & 0xFF) as usize)
738 ^ *constants::PROMOTE_64_BYTE_5_TO_128.get_unchecked(((v >> 40) & 0xFF) as usize)
739 ^ *constants::PROMOTE_64_BYTE_6_TO_128.get_unchecked(((v >> 48) & 0xFF) as usize)
740 ^ *constants::PROMOTE_64_BYTE_7_TO_128.get_unchecked(((v >> 56) & 0xFF) as usize)
741 };
742
743 Flat::from_raw(Block128(res))
744 }
745}
746
747#[cfg(feature = "table-math")]
752#[inline(always)]
753pub fn apply_matrix_128(val: Block128, table: &[u128; 4096]) -> Block128 {
754 let mut res = 0u128;
755 let v = val.0;
756
757 for i in 0..16 {
761 let byte = (v >> (i * 8)) & 0xFF;
762 let idx = (i * 256) + (byte as usize);
763 res ^= unsafe { *table.get_unchecked(idx) };
764 }
765
766 Block128(res)
767}
768
769#[cfg(not(feature = "table-math"))]
770#[inline(always)]
771fn map_ct_128_split(x: u128, basis: &[u128; 128]) -> u128 {
772 let mut acc_lo = 0u64;
773 let mut acc_hi = 0u64;
774 let mut i = 0usize;
775
776 while i < 128 {
777 let bit = ((x >> i) & 1) as u64;
778 let mask = 0u64.wrapping_sub(bit);
779
780 let b = basis[i];
781 acc_lo ^= (b as u64) & mask;
782 acc_hi ^= ((b >> 64) as u64) & mask;
783
784 i += 1;
785 }
786
787 (acc_lo as u128) | ((acc_hi as u128) << 64)
788}
789
790#[cfg(not(feature = "table-math"))]
791#[inline(always)]
792fn lift_ct<const N: usize>(x: u64, basis: &'static [u128; N]) -> u128 {
793 let mut acc = 0u128;
794
795 let mut i = 0usize;
796 while i < N {
797 let bit = (x >> i) & 1;
798 let mask = 0u128.wrapping_sub(bit as u128);
799 acc ^= basis[i] & mask;
800 i += 1;
801 }
802
803 acc
804}
805
806#[cfg(target_arch = "aarch64")]
811mod neon {
812 use super::*;
813 use core::arch::aarch64::*;
814 use core::mem::transmute;
815
816 #[inline(always)]
817 pub fn add_packed_128(lhs: PackedBlock128, rhs: PackedBlock128) -> PackedBlock128 {
818 unsafe {
819 let l: [uint8x16_t; 4] = transmute(lhs.0);
822 let r: [uint8x16_t; 4] = transmute(rhs.0);
823
824 let res = [
825 veorq_u8(l[0], r[0]),
826 veorq_u8(l[1], r[1]),
827 veorq_u8(l[2], r[2]),
828 veorq_u8(l[3], r[3]),
829 ];
830
831 transmute(res)
832 }
833 }
834
835 #[inline(always)]
836 pub fn mul_flat_128(a: Block128, b: Block128) -> Block128 {
837 unsafe {
838 let a_vec: uint64x2_t = transmute(a.0);
840 let b_vec: uint64x2_t = transmute(b.0);
841
842 let a0 = vgetq_lane_u64(a_vec, 0);
843 let a1 = vgetq_lane_u64(a_vec, 1);
844 let b0 = vgetq_lane_u64(b_vec, 0);
845 let b1 = vgetq_lane_u64(b_vec, 1);
846
847 let d0 = vmull_p64(a0, b0);
850 let d2 = vmull_p64(a1, b1);
851 let d1 = vmull_p64(a0 ^ a1, b0 ^ b1);
852
853 let d0_v: uint8x16_t = transmute(d0);
857 let d1_v: uint8x16_t = transmute(d1);
858 let d2_v: uint8x16_t = transmute(d2);
859
860 let mid_v = veorq_u8(d1_v, veorq_u8(d0_v, d2_v));
861
862 let d0_u64: uint64x2_t = transmute(d0);
864 let mid_u64: uint64x2_t = transmute(mid_v);
865 let d2_u64: uint64x2_t = transmute(d2);
866
867 let c0 = vgetq_lane_u64(d0_u64, 0);
868 let c1 = vgetq_lane_u64(d0_u64, 1) ^ vgetq_lane_u64(mid_u64, 0);
869 let c2 = vgetq_lane_u64(d2_u64, 0) ^ vgetq_lane_u64(mid_u64, 1);
870 let c3 = vgetq_lane_u64(d2_u64, 1);
871
872 let r_val = constants::POLY_128 as u64;
875
876 let p0 = vmull_p64(c2, r_val);
879 let p1 = vmull_p64(c3, r_val);
880
881 let p0_u64: uint64x2_t = transmute(p0);
882 let p1_u64: uint64x2_t = transmute(p1);
883
884 let folded_0 = vgetq_lane_u64(p0_u64, 0);
885 let folded_1 = vgetq_lane_u64(p0_u64, 1) ^ vgetq_lane_u64(p1_u64, 0);
886 let carry = vgetq_lane_u64(p1_u64, 1);
887
888 let final_0 = c0 ^ folded_0;
889 let final_1 = c1 ^ folded_1;
890
891 let carry_mul = vmull_p64(carry, r_val);
893
894 let carry_res_vec: uint64x2_t = transmute(carry_mul);
897 let carry_res = vgetq_lane_u64(carry_res_vec, 0);
898
899 let res_lo = final_0 ^ carry_res;
900 let res_hi = final_1;
901
902 Block128((res_lo as u128) | ((res_hi as u128) << 64))
903 }
904 }
905
906 #[cfg(not(feature = "table-math"))]
909 #[inline(always)]
910 pub unsafe fn promote_batch_8_to_128(input: *const u8, output: *mut u128) {
911 unsafe {
912 let vals = vld1q_u8(input);
913
914 let mask_0f = vdupq_n_u8(0x0F);
915 let lo_nib = vandq_u8(vals, mask_0f);
916 let hi_nib = vshrq_n_u8::<4>(vals);
917
918 let mut out = [vdupq_n_u8(0); 16];
919
920 macro_rules! lookup {
921 ($j:expr, $lo:ident, $hi:ident, $dst:ident) => {{
922 let t0 = vld1q_u8(constants::NIBBLE_PROMOTE_8_0_TO_128[$j].as_ptr());
923 let t1 = vld1q_u8(constants::NIBBLE_PROMOTE_8_1_TO_128[$j].as_ptr());
924
925 $dst[$j] = veorq_u8(vqtbl1q_u8(t0, $lo), vqtbl1q_u8(t1, $hi));
926 }};
927 }
928
929 lookup!(0, lo_nib, hi_nib, out);
930 lookup!(1, lo_nib, hi_nib, out);
931 lookup!(2, lo_nib, hi_nib, out);
932 lookup!(3, lo_nib, hi_nib, out);
933 lookup!(4, lo_nib, hi_nib, out);
934 lookup!(5, lo_nib, hi_nib, out);
935 lookup!(6, lo_nib, hi_nib, out);
936 lookup!(7, lo_nib, hi_nib, out);
937 lookup!(8, lo_nib, hi_nib, out);
938 lookup!(9, lo_nib, hi_nib, out);
939 lookup!(10, lo_nib, hi_nib, out);
940 lookup!(11, lo_nib, hi_nib, out);
941 lookup!(12, lo_nib, hi_nib, out);
942 lookup!(13, lo_nib, hi_nib, out);
943 lookup!(14, lo_nib, hi_nib, out);
944 lookup!(15, lo_nib, hi_nib, out);
945
946 let elems = transpose_16x16(&out);
947 for (i, elem) in elems.iter().enumerate() {
948 vst1q_u8(output.add(i).cast::<u8>(), *elem);
949 }
950 }
951 }
952
953 #[cfg(not(feature = "table-math"))]
956 #[inline(always)]
957 pub unsafe fn promote_batch_16_to_128(input: *const u8, output: *mut u128) {
958 unsafe {
959 let raw0 = vld1q_u8(input);
960 let raw1 = vld1q_u8(input.add(16));
961
962 let lo_bytes = vuzp1q_u8(raw0, raw1);
963 let hi_bytes = vuzp2q_u8(raw0, raw1);
964
965 let mask_0f = vdupq_n_u8(0x0F);
966 let n0 = vandq_u8(lo_bytes, mask_0f);
967 let n1 = vshrq_n_u8::<4>(lo_bytes);
968 let n2 = vandq_u8(hi_bytes, mask_0f);
969 let n3 = vshrq_n_u8::<4>(hi_bytes);
970
971 let mut out = [vdupq_n_u8(0); 16];
972
973 macro_rules! lookup {
974 ($j:expr, $n0:ident, $n1:ident, $n2:ident, $n3:ident, $dst:ident) => {{
975 let t0 = vld1q_u8(constants::NIBBLE_PROMOTE_16_0_TO_128[$j].as_ptr());
976 let t1 = vld1q_u8(constants::NIBBLE_PROMOTE_16_1_TO_128[$j].as_ptr());
977 let t2 = vld1q_u8(constants::NIBBLE_PROMOTE_16_2_TO_128[$j].as_ptr());
978 let t3 = vld1q_u8(constants::NIBBLE_PROMOTE_16_3_TO_128[$j].as_ptr());
979
980 $dst[$j] = veorq_u8(
981 veorq_u8(vqtbl1q_u8(t0, $n0), vqtbl1q_u8(t1, $n1)),
982 veorq_u8(vqtbl1q_u8(t2, $n2), vqtbl1q_u8(t3, $n3)),
983 );
984 }};
985 }
986
987 lookup!(0, n0, n1, n2, n3, out);
988 lookup!(1, n0, n1, n2, n3, out);
989 lookup!(2, n0, n1, n2, n3, out);
990 lookup!(3, n0, n1, n2, n3, out);
991 lookup!(4, n0, n1, n2, n3, out);
992 lookup!(5, n0, n1, n2, n3, out);
993 lookup!(6, n0, n1, n2, n3, out);
994 lookup!(7, n0, n1, n2, n3, out);
995 lookup!(8, n0, n1, n2, n3, out);
996 lookup!(9, n0, n1, n2, n3, out);
997 lookup!(10, n0, n1, n2, n3, out);
998 lookup!(11, n0, n1, n2, n3, out);
999 lookup!(12, n0, n1, n2, n3, out);
1000 lookup!(13, n0, n1, n2, n3, out);
1001 lookup!(14, n0, n1, n2, n3, out);
1002 lookup!(15, n0, n1, n2, n3, out);
1003
1004 let elems = transpose_16x16(&out);
1005 for (i, elem) in elems.iter().enumerate() {
1006 vst1q_u8(output.add(i).cast::<u8>(), *elem);
1007 }
1008 }
1009 }
1010
1011 #[cfg(not(feature = "table-math"))]
1014 #[inline(always)]
1015 pub unsafe fn promote_batch_32_to_128(input: *const u8, output: *mut u128) {
1016 unsafe {
1017 let raw0 = vld1q_u8(input);
1018 let raw1 = vld1q_u8(input.add(16));
1019 let raw2 = vld1q_u8(input.add(32));
1020 let raw3 = vld1q_u8(input.add(48));
1021
1022 let a02 = vuzp1q_u8(raw0, raw1);
1023 let a13 = vuzp2q_u8(raw0, raw1);
1024 let b02 = vuzp1q_u8(raw2, raw3);
1025 let b13 = vuzp2q_u8(raw2, raw3);
1026
1027 let byte0 = vuzp1q_u8(a02, b02);
1028 let byte2 = vuzp2q_u8(a02, b02);
1029 let byte1 = vuzp1q_u8(a13, b13);
1030 let byte3 = vuzp2q_u8(a13, b13);
1031
1032 let mask_0f = vdupq_n_u8(0x0F);
1033 let n0 = vandq_u8(byte0, mask_0f);
1034 let n1 = vshrq_n_u8::<4>(byte0);
1035 let n2 = vandq_u8(byte1, mask_0f);
1036 let n3 = vshrq_n_u8::<4>(byte1);
1037 let n4 = vandq_u8(byte2, mask_0f);
1038 let n5 = vshrq_n_u8::<4>(byte2);
1039 let n6 = vandq_u8(byte3, mask_0f);
1040 let n7 = vshrq_n_u8::<4>(byte3);
1041
1042 let mut out = [vdupq_n_u8(0); 16];
1043
1044 macro_rules! lookup {
1045 ($j:expr, $n0:ident, $n1:ident, $n2:ident, $n3:ident,
1046 $n4:ident, $n5:ident, $n6:ident, $n7:ident, $dst:ident) => {{
1047 let t0 = vld1q_u8(constants::NIBBLE_PROMOTE_32_0_TO_128[$j].as_ptr());
1048 let t1 = vld1q_u8(constants::NIBBLE_PROMOTE_32_1_TO_128[$j].as_ptr());
1049 let t2 = vld1q_u8(constants::NIBBLE_PROMOTE_32_2_TO_128[$j].as_ptr());
1050 let t3 = vld1q_u8(constants::NIBBLE_PROMOTE_32_3_TO_128[$j].as_ptr());
1051 let t4 = vld1q_u8(constants::NIBBLE_PROMOTE_32_4_TO_128[$j].as_ptr());
1052 let t5 = vld1q_u8(constants::NIBBLE_PROMOTE_32_5_TO_128[$j].as_ptr());
1053 let t6 = vld1q_u8(constants::NIBBLE_PROMOTE_32_6_TO_128[$j].as_ptr());
1054 let t7 = vld1q_u8(constants::NIBBLE_PROMOTE_32_7_TO_128[$j].as_ptr());
1055
1056 $dst[$j] = veorq_u8(
1057 veorq_u8(
1058 veorq_u8(vqtbl1q_u8(t0, $n0), vqtbl1q_u8(t1, $n1)),
1059 veorq_u8(vqtbl1q_u8(t2, $n2), vqtbl1q_u8(t3, $n3)),
1060 ),
1061 veorq_u8(
1062 veorq_u8(vqtbl1q_u8(t4, $n4), vqtbl1q_u8(t5, $n5)),
1063 veorq_u8(vqtbl1q_u8(t6, $n6), vqtbl1q_u8(t7, $n7)),
1064 ),
1065 );
1066 }};
1067 }
1068
1069 lookup!(0, n0, n1, n2, n3, n4, n5, n6, n7, out);
1070 lookup!(1, n0, n1, n2, n3, n4, n5, n6, n7, out);
1071 lookup!(2, n0, n1, n2, n3, n4, n5, n6, n7, out);
1072 lookup!(3, n0, n1, n2, n3, n4, n5, n6, n7, out);
1073 lookup!(4, n0, n1, n2, n3, n4, n5, n6, n7, out);
1074 lookup!(5, n0, n1, n2, n3, n4, n5, n6, n7, out);
1075 lookup!(6, n0, n1, n2, n3, n4, n5, n6, n7, out);
1076 lookup!(7, n0, n1, n2, n3, n4, n5, n6, n7, out);
1077 lookup!(8, n0, n1, n2, n3, n4, n5, n6, n7, out);
1078 lookup!(9, n0, n1, n2, n3, n4, n5, n6, n7, out);
1079 lookup!(10, n0, n1, n2, n3, n4, n5, n6, n7, out);
1080 lookup!(11, n0, n1, n2, n3, n4, n5, n6, n7, out);
1081 lookup!(12, n0, n1, n2, n3, n4, n5, n6, n7, out);
1082 lookup!(13, n0, n1, n2, n3, n4, n5, n6, n7, out);
1083 lookup!(14, n0, n1, n2, n3, n4, n5, n6, n7, out);
1084 lookup!(15, n0, n1, n2, n3, n4, n5, n6, n7, out);
1085
1086 let elems = transpose_16x16(&out);
1087 for (i, elem) in elems.iter().enumerate() {
1088 vst1q_u8(output.add(i).cast::<u8>(), *elem);
1089 }
1090 }
1091 }
1092
1093 #[cfg(not(feature = "table-math"))]
1095 #[inline(always)]
1096 unsafe fn transpose_16x16(r: &[uint8x16_t; 16]) -> [uint8x16_t; 16] {
1097 #[inline(always)]
1100 const fn u8_to_u16(v: uint8x16_t) -> uint16x8_t {
1101 unsafe { transmute::<uint8x16_t, uint16x8_t>(v) }
1102 }
1103
1104 #[inline(always)]
1105 const fn u16_to_u32(v: uint16x8_t) -> uint32x4_t {
1106 unsafe { transmute::<uint16x8_t, uint32x4_t>(v) }
1107 }
1108
1109 #[inline(always)]
1110 const fn u32_to_u64(v: uint32x4_t) -> uint64x2_t {
1111 unsafe { transmute::<uint32x4_t, uint64x2_t>(v) }
1112 }
1113
1114 #[inline(always)]
1115 const fn u64_to_u8(v: uint64x2_t) -> uint8x16_t {
1116 unsafe { transmute::<uint64x2_t, uint8x16_t>(v) }
1117 }
1118
1119 unsafe {
1120 let a0 = vtrn1q_u8(r[0], r[1]);
1123 let a1 = vtrn2q_u8(r[0], r[1]);
1124 let a2 = vtrn1q_u8(r[2], r[3]);
1125 let a3 = vtrn2q_u8(r[2], r[3]);
1126 let a4 = vtrn1q_u8(r[4], r[5]);
1127 let a5 = vtrn2q_u8(r[4], r[5]);
1128 let a6 = vtrn1q_u8(r[6], r[7]);
1129 let a7 = vtrn2q_u8(r[6], r[7]);
1130 let a8 = vtrn1q_u8(r[8], r[9]);
1131 let a9 = vtrn2q_u8(r[8], r[9]);
1132 let a10 = vtrn1q_u8(r[10], r[11]);
1133 let a11 = vtrn2q_u8(r[10], r[11]);
1134 let a12 = vtrn1q_u8(r[12], r[13]);
1135 let a13 = vtrn2q_u8(r[12], r[13]);
1136 let a14 = vtrn1q_u8(r[14], r[15]);
1137 let a15 = vtrn2q_u8(r[14], r[15]);
1138
1139 let b0 = vtrn1q_u16(u8_to_u16(a0), u8_to_u16(a2));
1142 let b2 = vtrn2q_u16(u8_to_u16(a0), u8_to_u16(a2));
1143 let b1 = vtrn1q_u16(u8_to_u16(a1), u8_to_u16(a3));
1144 let b3 = vtrn2q_u16(u8_to_u16(a1), u8_to_u16(a3));
1145 let b4 = vtrn1q_u16(u8_to_u16(a4), u8_to_u16(a6));
1146 let b6 = vtrn2q_u16(u8_to_u16(a4), u8_to_u16(a6));
1147 let b5 = vtrn1q_u16(u8_to_u16(a5), u8_to_u16(a7));
1148 let b7 = vtrn2q_u16(u8_to_u16(a5), u8_to_u16(a7));
1149 let b8 = vtrn1q_u16(u8_to_u16(a8), u8_to_u16(a10));
1150 let b10 = vtrn2q_u16(u8_to_u16(a8), u8_to_u16(a10));
1151 let b9 = vtrn1q_u16(u8_to_u16(a9), u8_to_u16(a11));
1152 let b11 = vtrn2q_u16(u8_to_u16(a9), u8_to_u16(a11));
1153 let b12 = vtrn1q_u16(u8_to_u16(a12), u8_to_u16(a14));
1154 let b14 = vtrn2q_u16(u8_to_u16(a12), u8_to_u16(a14));
1155 let b13 = vtrn1q_u16(u8_to_u16(a13), u8_to_u16(a15));
1156 let b15 = vtrn2q_u16(u8_to_u16(a13), u8_to_u16(a15));
1157
1158 let c0 = vtrn1q_u32(u16_to_u32(b0), u16_to_u32(b4));
1161 let c4 = vtrn2q_u32(u16_to_u32(b0), u16_to_u32(b4));
1162 let c1 = vtrn1q_u32(u16_to_u32(b1), u16_to_u32(b5));
1163 let c5 = vtrn2q_u32(u16_to_u32(b1), u16_to_u32(b5));
1164 let c2 = vtrn1q_u32(u16_to_u32(b2), u16_to_u32(b6));
1165 let c6 = vtrn2q_u32(u16_to_u32(b2), u16_to_u32(b6));
1166 let c3 = vtrn1q_u32(u16_to_u32(b3), u16_to_u32(b7));
1167 let c7 = vtrn2q_u32(u16_to_u32(b3), u16_to_u32(b7));
1168 let c8 = vtrn1q_u32(u16_to_u32(b8), u16_to_u32(b12));
1169 let c12 = vtrn2q_u32(u16_to_u32(b8), u16_to_u32(b12));
1170 let c9 = vtrn1q_u32(u16_to_u32(b9), u16_to_u32(b13));
1171 let c13 = vtrn2q_u32(u16_to_u32(b9), u16_to_u32(b13));
1172 let c10 = vtrn1q_u32(u16_to_u32(b10), u16_to_u32(b14));
1173 let c14 = vtrn2q_u32(u16_to_u32(b10), u16_to_u32(b14));
1174 let c11 = vtrn1q_u32(u16_to_u32(b11), u16_to_u32(b15));
1175 let c15 = vtrn2q_u32(u16_to_u32(b11), u16_to_u32(b15));
1176
1177 [
1180 u64_to_u8(vtrn1q_u64(u32_to_u64(c0), u32_to_u64(c8))),
1181 u64_to_u8(vtrn1q_u64(u32_to_u64(c1), u32_to_u64(c9))),
1182 u64_to_u8(vtrn1q_u64(u32_to_u64(c2), u32_to_u64(c10))),
1183 u64_to_u8(vtrn1q_u64(u32_to_u64(c3), u32_to_u64(c11))),
1184 u64_to_u8(vtrn1q_u64(u32_to_u64(c4), u32_to_u64(c12))),
1185 u64_to_u8(vtrn1q_u64(u32_to_u64(c5), u32_to_u64(c13))),
1186 u64_to_u8(vtrn1q_u64(u32_to_u64(c6), u32_to_u64(c14))),
1187 u64_to_u8(vtrn1q_u64(u32_to_u64(c7), u32_to_u64(c15))),
1188 u64_to_u8(vtrn2q_u64(u32_to_u64(c0), u32_to_u64(c8))),
1189 u64_to_u8(vtrn2q_u64(u32_to_u64(c1), u32_to_u64(c9))),
1190 u64_to_u8(vtrn2q_u64(u32_to_u64(c2), u32_to_u64(c10))),
1191 u64_to_u8(vtrn2q_u64(u32_to_u64(c3), u32_to_u64(c11))),
1192 u64_to_u8(vtrn2q_u64(u32_to_u64(c4), u32_to_u64(c12))),
1193 u64_to_u8(vtrn2q_u64(u32_to_u64(c5), u32_to_u64(c13))),
1194 u64_to_u8(vtrn2q_u64(u32_to_u64(c6), u32_to_u64(c14))),
1195 u64_to_u8(vtrn2q_u64(u32_to_u64(c7), u32_to_u64(c15))),
1196 ]
1197 }
1198 }
1199}
1200
1201#[cfg(test)]
1206mod tests {
1207 use super::*;
1208 use proptest::prelude::*;
1209 use rand::{RngExt, rng};
1210
1211 #[test]
1216 fn tower_constants() {
1217 let tau128 = Block128::EXTENSION_TAU;
1220 let (lo128, hi128) = tau128.split();
1221 assert_eq!(lo128, Block64::ZERO);
1222 assert_eq!(hi128, Block64::TAU);
1223 }
1224
1225 #[test]
1226 fn add_truth() {
1227 let zero = Block128::ZERO;
1228 let one = Block128::ONE;
1229
1230 assert_eq!(zero + zero, zero);
1231 assert_eq!(zero + one, one);
1232 assert_eq!(one + zero, one);
1233 assert_eq!(one + one, zero);
1234 }
1235
1236 #[test]
1237 fn mul_truth() {
1238 let zero = Block128::ZERO;
1239 let one = Block128::ONE;
1240
1241 assert_eq!(zero * zero, zero);
1242 assert_eq!(zero * one, zero);
1243 assert_eq!(one * one, one);
1244 }
1245
1246 #[test]
1247 fn add() {
1248 assert_eq!(Block128(5) + Block128(3), Block128(6));
1251 }
1252
1253 #[test]
1254 fn mul_simple() {
1255 assert_eq!(Block128(2) * Block128(2), Block128(4));
1258 }
1259
1260 #[test]
1261 fn mul_overflow() {
1262 assert_eq!(Block128(0x57) * Block128(0x83), Block128(0xC1));
1266 }
1267
1268 #[test]
1269 fn karatsuba_correctness() {
1270 let x = Block128::new(Block64::ZERO, Block64::ONE);
1282 let squared = x * x;
1283
1284 let (res_lo, res_hi) = squared.split();
1286
1287 assert_eq!(res_hi, Block64::ONE, "X^2 should contain X component");
1288 assert_eq!(
1289 res_lo,
1290 Block64(0x2000_0000_0000_0000),
1291 "X^2 should contain tau component (0x2000_0000_0000_0000)"
1292 );
1293 }
1294
1295 #[test]
1296 fn security_zeroize() {
1297 let mut secret_val = Block128::from(0xDEAD_BEEF_CAFE_BABE_u128);
1299 assert_ne!(secret_val, Block128::ZERO);
1300
1301 secret_val.zeroize();
1303
1304 assert_eq!(secret_val, Block128::ZERO, "Memory was not wiped!");
1306
1307 assert_eq!(secret_val.0, 0u128, "Underlying memory leak detected");
1309 }
1310
1311 #[test]
1312 fn invert_zero() {
1313 assert_eq!(
1315 Block128::ZERO.invert(),
1316 Block128::ZERO,
1317 "invert(0) must return 0"
1318 );
1319 }
1320
1321 #[test]
1322 fn inversion_random() {
1323 let mut rng = rng();
1324 for _i in 0..1000 {
1325 let val = Block128(rng.random());
1326
1327 if val != Block128::ZERO {
1328 let inv = val.invert();
1329 let identity = val * inv;
1330
1331 assert_eq!(
1332 identity,
1333 Block128::ONE,
1334 "Inversion identity failed: a * a^-1 != 1"
1335 );
1336 }
1337 }
1338 }
1339
1340 #[test]
1341 fn tower_embedding() {
1342 let mut rng = rng();
1343 for _ in 0..100 {
1344 let a = Block64(rng.random());
1345 let b = Block64(rng.random());
1346
1347 let a_lifted: Block128 = a.into();
1349 let (lo, hi) = a_lifted.split();
1350
1351 assert_eq!(lo, a, "Embedding structure failed: low part mismatch");
1352 assert_eq!(
1353 hi,
1354 Block64::ZERO,
1355 "Embedding structure failed: high part must be zero"
1356 );
1357
1358 let sum_sub = a + b;
1360 let sum_lifted: Block128 = sum_sub.into();
1361 let sum_in_super = Block128::from(a) + Block128::from(b);
1362
1363 assert_eq!(sum_lifted, sum_in_super, "Homomorphism failed: add");
1364
1365 let prod_sub = a * b;
1370 let prod_lifted: Block128 = prod_sub.into();
1371 let prod_in_super = Block128::from(a) * Block128::from(b);
1372
1373 assert_eq!(prod_lifted, prod_in_super, "Homomorphism failed: mul");
1374 }
1375 }
1376
1377 #[test]
1382 fn isomorphism_roundtrip() {
1383 let mut rng = rng();
1384 for _ in 0..1000 {
1385 let val = Block128(rng.random::<u128>());
1386 assert_eq!(val.to_hardware().to_tower(), val);
1387 }
1388 }
1389
1390 #[test]
1391 fn flat_mul_homomorphism() {
1392 let mut rng = rng();
1393 for _ in 0..1000 {
1394 let a = Block128(rng.random());
1395 let b = Block128(rng.random());
1396
1397 let expected_flat = (a * b).to_hardware();
1398 let actual_flat = a.to_hardware() * b.to_hardware();
1399
1400 assert_eq!(
1401 actual_flat, expected_flat,
1402 "Block128 flat multiplication mismatch: (a*b)^H != a^H * b^H"
1403 );
1404 }
1405 }
1406
1407 #[test]
1408 fn packed_consistency() {
1409 let mut rng = rng();
1410 for _ in 0..100 {
1411 let mut a_vals = [Block128::ZERO; 4];
1412 let mut b_vals = [Block128::ZERO; 4];
1413
1414 for i in 0..4 {
1415 a_vals[i] = Block128(rng.random::<u128>());
1416 b_vals[i] = Block128(rng.random::<u128>());
1417 }
1418
1419 let a_flat_vals = a_vals.map(|x| x.to_hardware());
1420 let b_flat_vals = b_vals.map(|x| x.to_hardware());
1421 let a_packed = Flat::<Block128>::pack(&a_flat_vals);
1422 let b_packed = Flat::<Block128>::pack(&b_flat_vals);
1423
1424 let add_res = Block128::add_hardware_packed(a_packed, b_packed);
1426
1427 let mut add_out = [Block128::ZERO.to_hardware(); 4];
1428 Flat::<Block128>::unpack(add_res, &mut add_out);
1429
1430 for i in 0..4 {
1431 assert_eq!(
1432 add_out[i],
1433 (a_vals[i] + b_vals[i]).to_hardware(),
1434 "Block128 SIMD add mismatch at index {}",
1435 i
1436 );
1437 }
1438
1439 let mul_res = Block128::mul_hardware_packed(a_packed, b_packed);
1441
1442 let mut mul_out = [Block128::ZERO.to_hardware(); 4];
1443 Flat::<Block128>::unpack(mul_res, &mut mul_out);
1444
1445 for i in 0..4 {
1446 let expected_flat = (a_vals[i] * b_vals[i]).to_hardware();
1447 assert_eq!(
1448 mul_out[i], expected_flat,
1449 "Block128 SIMD mul mismatch at index {}",
1450 i
1451 );
1452 }
1453 }
1454 }
1455
1456 #[test]
1461 fn pack_unpack_roundtrip() {
1462 let mut rng = rng();
1463 let mut data = [Block128::ZERO; PACKED_WIDTH_128];
1464 for v in data.iter_mut() {
1465 *v = Block128(rng.random());
1466 }
1467
1468 let packed = Block128::pack(&data);
1469 let mut unpacked = [Block128::ZERO; PACKED_WIDTH_128];
1470 Block128::unpack(packed, &mut unpacked);
1471 assert_eq!(data, unpacked);
1472 }
1473
1474 #[test]
1475 fn packed_add_consistency() {
1476 let mut rng = rng();
1477 let mut a_vals = [Block128::ZERO; PACKED_WIDTH_128];
1478 let mut b_vals = [Block128::ZERO; PACKED_WIDTH_128];
1479
1480 for i in 0..PACKED_WIDTH_128 {
1481 a_vals[i] = Block128(rng.random());
1482 b_vals[i] = Block128(rng.random());
1483 }
1484
1485 let res_packed = Block128::pack(&a_vals) + Block128::pack(&b_vals);
1486 let mut res_unpacked = [Block128::ZERO; PACKED_WIDTH_128];
1487 Block128::unpack(res_packed, &mut res_unpacked);
1488
1489 for i in 0..PACKED_WIDTH_128 {
1490 assert_eq!(res_unpacked[i], a_vals[i] + b_vals[i]);
1491 }
1492 }
1493
1494 #[test]
1495 fn packed_mul_consistency() {
1496 let mut rng = rng();
1497
1498 for _ in 0..1000 {
1499 let mut a_arr = [Block128::ZERO; PACKED_WIDTH_128];
1502 let mut b_arr = [Block128::ZERO; PACKED_WIDTH_128];
1503
1504 for i in 0..PACKED_WIDTH_128 {
1505 let val_a: u128 = rng.random();
1507 let val_b: u128 = rng.random();
1508 a_arr[i] = Block128(val_a);
1509 b_arr[i] = Block128(val_b);
1510 }
1511
1512 let a_packed = PackedBlock128(a_arr);
1513 let b_packed = PackedBlock128(b_arr);
1514
1515 let c_packed = a_packed * b_packed;
1517
1518 let mut c_expected = [Block128::ZERO; PACKED_WIDTH_128];
1520 for i in 0..PACKED_WIDTH_128 {
1521 c_expected[i] = a_arr[i] * b_arr[i];
1522 }
1523
1524 assert_eq!(c_packed.0, c_expected, "SIMD multiplication mismatch!");
1525 }
1526 }
1527
1528 #[inline(always)]
1533 fn promote_block8_tables(val: Block8) -> Block128 {
1534 let idx_flat = val.0 as usize;
1536 let tower_byte = unsafe { *constants::FLAT_TO_TOWER_8.get_unchecked(idx_flat) };
1537 let idx_tower = tower_byte as usize;
1538
1539 Block128(unsafe { *constants::TOWER_TO_FLAT_128.get_unchecked(idx_tower) })
1540 }
1541
1542 #[inline(always)]
1543 fn promote_block16_tables(val: Block16) -> Block128 {
1544 let v_flat = val.0;
1545
1546 let mut v_tower = 0u16;
1547 for i in 0..2 {
1548 let byte = ((v_flat >> (i * 8)) & 0xFF) as usize;
1549 let idx = (i * 256) + byte;
1550 v_tower ^= unsafe { *constants::FLAT_TO_TOWER_16.get_unchecked(idx) };
1551 }
1552
1553 let mut res = 0u128;
1554 for i in 0..2 {
1555 let byte = ((v_tower >> (i * 8)) & 0xFF) as usize;
1556 let idx = (i * 256) + byte;
1557 res ^= unsafe { *constants::TOWER_TO_FLAT_128.get_unchecked(idx) };
1558 }
1559
1560 Block128(res)
1561 }
1562
1563 #[inline(always)]
1564 fn promote_block32_tables(val: Block32) -> Block128 {
1565 let v_flat = val.0;
1566
1567 let mut v_tower = 0u32;
1568 for i in 0..4 {
1569 let byte = ((v_flat >> (i * 8)) & 0xFF) as usize;
1570 let idx = (i * 256) + byte;
1571 v_tower ^= unsafe { *constants::FLAT_TO_TOWER_32.get_unchecked(idx) };
1572 }
1573
1574 let mut res = 0u128;
1575 for i in 0..4 {
1576 let byte = ((v_tower >> (i * 8)) & 0xFF) as usize;
1577 let idx = (i * 256) + byte;
1578 res ^= unsafe { *constants::TOWER_TO_FLAT_128.get_unchecked(idx) };
1579 }
1580
1581 Block128(res)
1582 }
1583
1584 #[inline(always)]
1585 fn promote_block64_tables(val: Block64) -> Block128 {
1586 let v_flat = val.0;
1587
1588 let mut v_tower = 0u64;
1589 for i in 0..8 {
1590 let byte = ((v_flat >> (i * 8)) & 0xFF) as usize;
1591 let idx = (i * 256) + byte;
1592 v_tower ^= unsafe { *constants::FLAT_TO_TOWER_64.get_unchecked(idx) };
1593 }
1594
1595 let mut res = 0u128;
1596 for i in 0..8 {
1597 let byte = ((v_tower >> (i * 8)) & 0xFF) as usize;
1598 let idx = (i * 256) + byte;
1599 res ^= unsafe { *constants::TOWER_TO_FLAT_128.get_unchecked(idx) };
1600 }
1601
1602 Block128(res)
1603 }
1604
1605 #[test]
1606 fn lift_from_partial_hardware_matches_tables_block8_exhaustive() {
1607 for x in 0u16..=u8::MAX as u16 {
1608 let v = Block8(x as u8);
1609 let got = Block128::promote_flat(Flat::from_raw(v)).into_raw();
1610 let expected = promote_block8_tables(v);
1611
1612 assert_eq!(got, expected);
1613 }
1614 }
1615
1616 #[test]
1617 fn lift_from_partial_hardware_matches_tables_block16_exhaustive() {
1618 for x in 0..=u16::MAX {
1619 let v = Block16(x);
1620 let got = Block128::promote_flat(Flat::from_raw(v)).into_raw();
1621 let expected = promote_block16_tables(v);
1622
1623 assert_eq!(got, expected);
1624 }
1625 }
1626
1627 #[test]
1628 fn lift_from_partial_hardware_matches_tables_block32_random() {
1629 let mut rng = rng();
1630 for _ in 0..10_000 {
1631 let v = Block32(rng.random::<u32>());
1632 let got = Block128::promote_flat(Flat::from_raw(v)).into_raw();
1633 let expected = promote_block32_tables(v);
1634
1635 assert_eq!(got, expected);
1636 }
1637 }
1638
1639 #[test]
1640 fn lift_from_partial_hardware_matches_tables_block64_random() {
1641 let mut rng = rng();
1642 for _ in 0..10_000 {
1643 let v = Block64(rng.random::<u64>());
1644 let got = Block128::promote_flat(Flat::from_raw(v)).into_raw();
1645 let expected = promote_block64_tables(v);
1646
1647 assert_eq!(got, expected);
1648 }
1649 }
1650
1651 #[test]
1656 fn promote_flat_batch_matches_scalar_block8() {
1657 let mut rng = rng();
1658 let input: Vec<Flat<Block8>> = (0..64)
1659 .map(|_| Block8(rng.random::<u8>()).to_hardware())
1660 .collect();
1661
1662 let mut batch_out = vec![Flat::from_raw(Block128::ZERO); 64];
1663 Block128::promote_flat_batch(&input, &mut batch_out);
1664
1665 for (i, &v) in input.iter().enumerate() {
1666 let scalar = Block128::promote_flat(v);
1667 assert_eq!(batch_out[i], scalar, "batch/scalar mismatch at index {}", i);
1668 }
1669 }
1670
1671 #[test]
1672 fn promote_flat_batch_matches_scalar_block16() {
1673 let mut rng = rng();
1674 let input: Vec<Flat<Block16>> = (0..32)
1675 .map(|_| Block16(rng.random::<u16>()).to_hardware())
1676 .collect();
1677
1678 let mut batch_out = vec![Flat::from_raw(Block128::ZERO); 32];
1679 Block128::promote_flat_batch(&input, &mut batch_out);
1680
1681 for (i, &v) in input.iter().enumerate() {
1682 assert_eq!(
1683 batch_out[i],
1684 Block128::promote_flat(v),
1685 "batch/scalar mismatch at index {}",
1686 i
1687 );
1688 }
1689 }
1690
1691 #[test]
1692 fn promote_flat_batch_matches_scalar_block32() {
1693 let mut rng = rng();
1694 let input: Vec<Flat<Block32>> = (0..16)
1695 .map(|_| Block32(rng.random::<u32>()).to_hardware())
1696 .collect();
1697
1698 let mut batch_out = vec![Flat::from_raw(Block128::ZERO); 16];
1699 Block128::promote_flat_batch(&input, &mut batch_out);
1700
1701 for (i, &v) in input.iter().enumerate() {
1702 assert_eq!(
1703 batch_out[i],
1704 Block128::promote_flat(v),
1705 "batch/scalar mismatch at index {}",
1706 i
1707 );
1708 }
1709 }
1710
1711 #[test]
1712 fn promote_flat_batch_matches_scalar_block64() {
1713 let mut rng = rng();
1714 let input: Vec<Flat<Block64>> = (0..8)
1715 .map(|_| Block64(rng.random::<u64>()).to_hardware())
1716 .collect();
1717
1718 let mut batch_out = vec![Flat::from_raw(Block128::ZERO); 8];
1719 Block128::promote_flat_batch(&input, &mut batch_out);
1720
1721 for (i, &v) in input.iter().enumerate() {
1722 assert_eq!(
1723 batch_out[i],
1724 Block128::promote_flat(v),
1725 "batch/scalar mismatch at index {}",
1726 i
1727 );
1728 }
1729 }
1730
1731 #[test]
1732 fn promote_flat_batch_partial_slice() {
1733 let input: Vec<Flat<Block8>> = (0..16).map(|i| Block8(i as u8).to_hardware()).collect();
1734
1735 let mut out_short = vec![Flat::from_raw(Block128::ZERO); 5];
1737 Block128::promote_flat_batch(&input, &mut out_short);
1738
1739 for i in 0..5 {
1740 assert_eq!(out_short[i], Block128::promote_flat(input[i]));
1741 }
1742
1743 let short_input = &input[..3];
1745 let mut out_long = vec![Flat::from_raw(Block128::ZERO); 10];
1746
1747 Block128::promote_flat_batch(short_input, &mut out_long);
1748
1749 for i in 0..3 {
1750 assert_eq!(out_long[i], Block128::promote_flat(short_input[i]));
1751 }
1752
1753 for val in &out_long[3..10] {
1755 assert_eq!(*val, Flat::from_raw(Block128::ZERO));
1756 }
1757 }
1758
1759 #[test]
1760 fn promote_edge_zero() {
1761 let zero = Flat::from_raw(Block8(0));
1762 let promoted = Block128::promote_flat(zero);
1763
1764 assert_eq!(
1765 promoted,
1766 Flat::from_raw(Block128::ZERO),
1767 "promote(0) must be 0"
1768 );
1769
1770 let input = vec![zero; 16];
1773 let mut output = vec![Flat::from_raw(Block128(0xDEAD)); 16];
1774
1775 Block128::promote_flat_batch(&input, &mut output);
1776
1777 for o in &output {
1778 assert_eq!(*o, Flat::from_raw(Block128::ZERO));
1779 }
1780 }
1781
1782 #[test]
1783 fn promote_edge_one() {
1784 let one_flat8 = Block8::ONE.to_hardware();
1785 let one_flat128 = Block128::ONE.to_hardware();
1786
1787 assert_eq!(
1788 Block128::promote_flat(one_flat8),
1789 one_flat128,
1790 "promote(1) must equal 1 in target field"
1791 );
1792 }
1793
1794 #[test]
1795 fn promote_edge_max_block8() {
1796 let max = Flat::from_raw(Block8(0xFF));
1797 let promoted = Block128::promote_flat(max);
1798
1799 assert_ne!(promoted, Flat::from_raw(Block128::ZERO));
1801
1802 let tower_8 = max.to_tower();
1805 let tower_128 = Block128::from(tower_8);
1806
1807 assert_eq!(promoted.to_tower(), tower_128);
1808 }
1809
1810 #[test]
1811 fn promote_edge_single_bits() {
1812 for bit in 0..8 {
1813 let val = Flat::from_raw(Block8(1u8 << bit));
1814 let promoted = Block128::promote_flat(val);
1815
1816 assert_ne!(
1818 promoted,
1819 Flat::from_raw(Block128::ZERO),
1820 "single-bit {} promoted to zero",
1821 bit
1822 );
1823
1824 let tower_8 = val.to_tower();
1826 let tower_128 = Block128::from(tower_8);
1827
1828 assert_eq!(
1829 promoted.to_tower(),
1830 tower_128,
1831 "tower roundtrip failed for bit {}",
1832 bit
1833 );
1834 }
1835 }
1836
1837 #[test]
1838 fn promote_edge_alternating_packed() {
1839 let input: Vec<Flat<Block8>> = (0..16)
1840 .map(|i| {
1841 if i % 2 == 0 {
1842 Flat::from_raw(Block8(0x00))
1843 } else {
1844 Flat::from_raw(Block8(0xFF))
1845 }
1846 })
1847 .collect();
1848
1849 let mut output = vec![Flat::from_raw(Block128::ZERO); 16];
1850 Block128::promote_flat_batch(&input, &mut output);
1851
1852 for (i, &v) in input.iter().enumerate() {
1853 assert_eq!(
1854 output[i],
1855 Block128::promote_flat(v),
1856 "alternating mismatch at {}",
1857 i
1858 );
1859 }
1860 }
1861
1862 #[test]
1863 fn promote_edge_all_same_packed() {
1864 let val = Flat::from_raw(Block8(0x42));
1865 let expected = Block128::promote_flat(val);
1866
1867 let input = vec![val; 16];
1868 let mut output = vec![Flat::from_raw(Block128::ZERO); 16];
1869
1870 Block128::promote_flat_batch(&input, &mut output);
1871
1872 for (i, o) in output.iter().enumerate() {
1873 assert_eq!(*o, expected, "all-same mismatch at {}", i);
1874 }
1875 }
1876
1877 #[test]
1878 fn promote_tower_roundtrip_block8() {
1879 for x in 0u16..=u8::MAX as u16 {
1880 let b8 = Block8(x as u8);
1881 let promoted = Block128::promote_flat(b8.to_hardware());
1882 let tower_128 = promoted.to_tower();
1883 let embedded = Block128::from(b8);
1884
1885 assert_eq!(
1886 tower_128, embedded,
1887 "tower roundtrip failed for Block8({})",
1888 x
1889 );
1890 }
1891 }
1892
1893 #[test]
1894 fn promote_tower_roundtrip_block16() {
1895 let mut rng = rng();
1896 for _ in 0..10_000 {
1897 let v = Block16(rng.random::<u16>());
1898 let promoted = Block128::promote_flat(v.to_hardware());
1899 let tower_128 = promoted.to_tower();
1900 let embedded = Block128::from(v);
1901
1902 assert_eq!(
1903 tower_128, embedded,
1904 "tower roundtrip failed for Block16({})",
1905 v.0
1906 );
1907 }
1908 }
1909
1910 #[test]
1911 fn promote_tower_roundtrip_block32() {
1912 let mut rng = rng();
1913 for _ in 0..10_000 {
1914 let v = Block32(rng.random::<u32>());
1915 let promoted = Block128::promote_flat(v.to_hardware());
1916 let tower_128 = promoted.to_tower();
1917 let embedded = Block128::from(v);
1918
1919 assert_eq!(
1920 tower_128, embedded,
1921 "tower roundtrip failed for Block32({})",
1922 v.0
1923 );
1924 }
1925 }
1926
1927 #[test]
1928 fn promote_tower_roundtrip_block64() {
1929 let mut rng = rng();
1930 for _ in 0..10_000 {
1931 let v = Block64(rng.random::<u64>());
1932 let promoted = Block128::promote_flat(v.to_hardware());
1933 let tower_128 = promoted.to_tower();
1934 let embedded = Block128::from(v);
1935
1936 assert_eq!(
1937 tower_128, embedded,
1938 "tower roundtrip failed for Block64({})",
1939 v.0
1940 );
1941 }
1942 }
1943
1944 #[test]
1945 fn promote_algebraic_homomorphism_add_block8() {
1946 let mut rng = rng();
1947 for _ in 0..1000 {
1948 let a = Block8(rng.random::<u8>());
1949 let b = Block8(rng.random::<u8>());
1950
1951 let promote_a = Block128::promote_flat(a.to_hardware());
1952 let promote_b = Block128::promote_flat(b.to_hardware());
1953 let promote_sum = Block128::promote_flat((a + b).to_hardware());
1954
1955 assert_eq!(
1956 promote_a + promote_b,
1957 promote_sum,
1958 "add homomorphism: promote(a)+promote(b) != promote(a+b)"
1959 );
1960 }
1961 }
1962
1963 #[test]
1964 fn promote_algebraic_homomorphism_mul_block8() {
1965 let mut rng = rng();
1966 for _ in 0..1000 {
1967 let a = Block8(rng.random::<u8>());
1968 let b = Block8(rng.random::<u8>());
1969
1970 let promote_a = Block128::promote_flat(a.to_hardware());
1971 let promote_b = Block128::promote_flat(b.to_hardware());
1972 let promote_prod = Block128::promote_flat((a * b).to_hardware());
1973
1974 assert_eq!(
1977 promote_a * promote_b,
1978 promote_prod,
1979 "mul homomorphism: promote(a)*promote(b) != promote(a*b)"
1980 );
1981 }
1982 }
1983
1984 #[test]
1985 fn promote_algebraic_homomorphism_add_block16() {
1986 let mut rng = rng();
1987 for _ in 0..1000 {
1988 let a = Block16(rng.random::<u16>());
1989 let b = Block16(rng.random::<u16>());
1990
1991 let pa = Block128::promote_flat(a.to_hardware());
1992 let pb = Block128::promote_flat(b.to_hardware());
1993 let p_sum = Block128::promote_flat((a + b).to_hardware());
1994
1995 assert_eq!(pa + pb, p_sum, "Block16 add homomorphism failed");
1996 }
1997 }
1998
1999 #[test]
2000 fn promote_algebraic_homomorphism_mul_block16() {
2001 let mut rng = rng();
2002 for _ in 0..1000 {
2003 let a = Block16(rng.random::<u16>());
2004 let b = Block16(rng.random::<u16>());
2005
2006 let pa = Block128::promote_flat(a.to_hardware());
2007 let pb = Block128::promote_flat(b.to_hardware());
2008 let p_prod = Block128::promote_flat((a * b).to_hardware());
2009
2010 assert_eq!(pa * pb, p_prod, "Block16 mul homomorphism failed");
2011 }
2012 }
2013
2014 #[test]
2015 fn promote_algebraic_homomorphism_add_block32() {
2016 let mut rng = rng();
2017 for _ in 0..1000 {
2018 let a = Block32(rng.random::<u32>());
2019 let b = Block32(rng.random::<u32>());
2020
2021 let pa = Block128::promote_flat(a.to_hardware());
2022 let pb = Block128::promote_flat(b.to_hardware());
2023 let p_sum = Block128::promote_flat((a + b).to_hardware());
2024
2025 assert_eq!(pa + pb, p_sum, "Block32 add homomorphism failed");
2026 }
2027 }
2028
2029 #[test]
2030 fn promote_algebraic_homomorphism_mul_block32() {
2031 let mut rng = rng();
2032 for _ in 0..1000 {
2033 let a = Block32(rng.random::<u32>());
2034 let b = Block32(rng.random::<u32>());
2035
2036 let pa = Block128::promote_flat(a.to_hardware());
2037 let pb = Block128::promote_flat(b.to_hardware());
2038 let p_prod = Block128::promote_flat((a * b).to_hardware());
2039
2040 assert_eq!(pa * pb, p_prod, "Block32 mul homomorphism failed");
2041 }
2042 }
2043
2044 #[test]
2045 fn promote_algebraic_homomorphism_add_block64() {
2046 let mut rng = rng();
2047 for _ in 0..1000 {
2048 let a = Block64(rng.random::<u64>());
2049 let b = Block64(rng.random::<u64>());
2050
2051 let pa = Block128::promote_flat(a.to_hardware());
2052 let pb = Block128::promote_flat(b.to_hardware());
2053 let p_sum = Block128::promote_flat((a + b).to_hardware());
2054
2055 assert_eq!(pa + pb, p_sum, "Block64 add homomorphism failed");
2056 }
2057 }
2058
2059 #[test]
2060 fn promote_algebraic_homomorphism_mul_block64() {
2061 let mut rng = rng();
2062 for _ in 0..1000 {
2063 let a = Block64(rng.random::<u64>());
2064 let b = Block64(rng.random::<u64>());
2065
2066 let pa = Block128::promote_flat(a.to_hardware());
2067 let pb = Block128::promote_flat(b.to_hardware());
2068 let p_prod = Block128::promote_flat((a * b).to_hardware());
2069
2070 assert_eq!(pa * pb, p_prod, "Block64 mul homomorphism failed");
2071 }
2072 }
2073
2074 #[test]
2075 fn promote_generator_preserves_order() {
2076 let g = Block8(3);
2078 let g_promoted = Block128::promote_flat(g.to_hardware());
2079
2080 let mut acc8 = Block8::ONE;
2083 for _ in 0..255 {
2084 acc8 *= g;
2085 }
2086
2087 assert_eq!(acc8, Block8::ONE, "Block8 Fermat: g^255 must be 1");
2088
2089 let mut acc128 = Flat::from_raw(Block128::ONE);
2092 for _ in 0..255 {
2093 acc128 *= g_promoted;
2094 }
2095
2096 assert_eq!(
2097 acc128,
2098 Flat::from_raw(Block128::ONE),
2099 "promoted element lost multiplicative order"
2100 );
2101 }
2102
2103 proptest! {
2104 #[test]
2105 fn parity_masks_match_from_hardware(x_flat in any::<u128>()) {
2106 let tower = Block128::from_hardware(Flat::from_raw(Block128(x_flat))).0;
2107
2108 for k in 0..128 {
2109 let bit = ((tower >> k) & 1) as u8;
2110 let via_api = Flat::from_raw(Block128(x_flat)).tower_bit(k);
2111
2112 prop_assert_eq!(
2113 via_api, bit,
2114 "Block128 tower_bit_from_hardware mismatch at x_flat={:#034x}, bit_idx={}",
2115 x_flat, k
2116 );
2117 }
2118 }
2119 }
2120}