1use crate::constants::FLAT_TO_TOWER_BIT_MASKS_8;
20use crate::towers::bit::Bit;
21use crate::{
22 CanonicalDeserialize, CanonicalSerialize, Flat, HardwareField, PackableField, PackedFlat,
23 TowerField, constants,
24};
25use core::ops::{Add, AddAssign, BitXor, Mul, MulAssign, Sub, SubAssign};
26use serde::{Deserialize, Serialize};
27use zeroize::Zeroize;
28
29#[cfg(not(feature = "table-math"))]
30#[repr(align(64))]
31struct CtConvertBasisU8<const N: usize>([u8; N]);
32
33#[cfg(not(feature = "table-math"))]
34static TOWER_TO_FLAT_BASIS_8: CtConvertBasisU8<8> =
35 CtConvertBasisU8(constants::RAW_TOWER_TO_FLAT_8);
36
37#[cfg(not(feature = "table-math"))]
38static FLAT_TO_TOWER_BASIS_8: CtConvertBasisU8<8> =
39 CtConvertBasisU8(constants::RAW_FLAT_TO_TOWER_8);
40
41#[cfg(feature = "table-math")]
52const EXP_TABLE: [u8; 256] = generate_exp_table();
53
54#[cfg(feature = "table-math")]
59const LOG_TABLE: [u8; 256] = generate_log_table();
60
61#[derive(Copy, Clone, Default, Debug, Eq, PartialEq, Serialize, Deserialize, Zeroize)]
63#[repr(transparent)]
64pub struct Block8(pub u8);
65
66impl Block8 {
67 pub const fn new(val: u8) -> Self {
68 Self(val)
69 }
70}
71
72impl TowerField for Block8 {
73 const BITS: usize = 8;
74 const ZERO: Self = Block8(0);
75 const ONE: Self = Block8(1);
76
77 const EXTENSION_TAU: Self = Block8(0x20);
78
79 fn invert(&self) -> Self {
80 #[cfg(feature = "table-math")]
81 {
82 if self.0 == 0 {
83 return Self::ZERO;
84 }
85
86 let i = LOG_TABLE[self.0 as usize] as usize;
87 Block8(EXP_TABLE[255 - i])
88 }
89
90 #[cfg(not(feature = "table-math"))]
91 {
92 let x = *self;
96 let x2 = x * x;
97 let x4 = x2 * x2;
98 let x8 = x4 * x4;
99 let x16 = x8 * x8;
100 let x32 = x16 * x16;
101 let x64 = x32 * x32;
102 let x128 = x64 * x64;
103
104 x128 * x64 * x32 * x16 * x8 * x4 * x2
106 }
107 }
108
109 fn from_uniform_bytes(bytes: &[u8; 32]) -> Self {
110 Self(bytes[0])
111 }
112}
113
114impl Add for Block8 {
116 type Output = Self;
117
118 fn add(self, rhs: Self) -> Self::Output {
119 Self(self.0.bitxor(rhs.0))
120 }
121}
122
123impl Sub for Block8 {
124 type Output = Self;
125
126 fn sub(self, rhs: Self) -> Self::Output {
127 self.add(rhs)
128 }
129}
130
131impl Mul for Block8 {
133 type Output = Self;
134
135 fn mul(self, rhs: Self) -> Self::Output {
136 #[cfg(feature = "table-math")]
137 {
138 if self.0 == 0 || rhs.0 == 0 {
140 return Self::ZERO;
141 }
142
143 let i = LOG_TABLE[self.0 as usize] as usize;
147 let j = LOG_TABLE[rhs.0 as usize] as usize;
148
149 let k = i + j;
153 let idx = if k >= 255 { k - 255 } else { k };
154
155 Self(EXP_TABLE[idx])
157 }
158
159 #[cfg(not(feature = "table-math"))]
160 {
161 #[cfg(target_arch = "aarch64")]
162 {
163 neon::mul_8(self, rhs)
164 }
165
166 #[cfg(not(target_arch = "aarch64"))]
167 {
168 let mut a = self.0;
169 let mut b = rhs.0;
170 let mut res = 0u8;
171
172 for _ in 0..8 {
175 let bit = b & 1;
176 let mask = 0u8.wrapping_sub(bit);
177 res ^= a & mask;
178
179 let high_bit = a >> 7;
180 let overflow_mask = 0u8.wrapping_sub(high_bit);
181 a = (a << 1) ^ (0x1B & overflow_mask);
182
183 b >>= 1;
184 }
185
186 Self(res)
187 }
188 }
189 }
190}
191
192impl AddAssign for Block8 {
193 fn add_assign(&mut self, rhs: Self) {
194 *self = *self + rhs;
195 }
196}
197
198impl SubAssign for Block8 {
199 fn sub_assign(&mut self, rhs: Self) {
200 *self = *self - rhs;
201 }
202}
203
204impl MulAssign for Block8 {
205 fn mul_assign(&mut self, rhs: Self) {
206 *self = *self * rhs;
207 }
208}
209
210impl CanonicalSerialize for Block8 {
211 #[inline]
212 fn serialized_size(&self) -> usize {
213 1
214 }
215
216 #[inline]
217 fn serialize(&self, writer: &mut [u8]) -> Result<(), ()> {
218 if writer.is_empty() {
219 return Err(());
220 }
221
222 writer[0] = self.0;
223
224 Ok(())
225 }
226}
227
228impl CanonicalDeserialize for Block8 {
229 fn deserialize(bytes: &[u8]) -> Result<Self, ()> {
230 if bytes.is_empty() {
231 return Err(());
232 }
233
234 Ok(Self(bytes[0]))
235 }
236}
237
238impl From<u8> for Block8 {
239 #[inline]
240 fn from(val: u8) -> Self {
241 Self::new(val)
242 }
243}
244
245impl From<u32> for Block8 {
246 #[inline]
247 fn from(val: u32) -> Self {
248 Self(val as u8)
249 }
250}
251
252impl From<u64> for Block8 {
253 #[inline]
254 fn from(val: u64) -> Self {
255 Self(val as u8)
256 }
257}
258
259impl From<u128> for Block8 {
260 #[inline]
261 fn from(val: u128) -> Self {
262 Self(val as u8)
263 }
264}
265
266impl From<Bit> for Block8 {
271 #[inline(always)]
272 fn from(val: Bit) -> Self {
273 Self(val.0)
274 }
275}
276
277pub const PACKED_WIDTH_8: usize = 16;
283
284#[derive(Clone, Copy, Debug, PartialEq, Eq, Default)]
285#[repr(C, align(16))]
286pub struct PackedBlock8(pub [Block8; PACKED_WIDTH_8]);
287
288impl PackedBlock8 {
289 #[inline(always)]
290 pub fn zero() -> Self {
291 Self([Block8::ZERO; PACKED_WIDTH_8])
292 }
293}
294
295impl PackableField for Block8 {
296 type Packed = PackedBlock8;
297
298 const WIDTH: usize = PACKED_WIDTH_8;
299
300 #[inline(always)]
301 fn pack(chunk: &[Self]) -> Self::Packed {
302 assert!(
303 chunk.len() >= PACKED_WIDTH_8,
304 "PackableField::pack: input slice too short",
305 );
306
307 let mut arr = [Self::ZERO; PACKED_WIDTH_8];
308 arr.copy_from_slice(&chunk[..PACKED_WIDTH_8]);
309
310 PackedBlock8(arr)
311 }
312
313 #[inline(always)]
314 fn unpack(packed: Self::Packed, output: &mut [Self]) {
315 assert!(
316 output.len() >= PACKED_WIDTH_8,
317 "PackableField::unpack: output slice too short",
318 );
319
320 output[..PACKED_WIDTH_8].copy_from_slice(&packed.0);
321 }
322}
323
324impl Add for PackedBlock8 {
325 type Output = Self;
326
327 #[inline(always)]
328 fn add(self, rhs: Self) -> Self {
329 let mut res = [Block8::ZERO; PACKED_WIDTH_8];
330 for ((out, l), r) in res.iter_mut().zip(self.0.iter()).zip(rhs.0.iter()) {
331 *out = *l + *r;
332 }
333
334 Self(res)
335 }
336}
337
338impl AddAssign for PackedBlock8 {
339 #[inline(always)]
340 fn add_assign(&mut self, rhs: Self) {
341 for (l, r) in self.0.iter_mut().zip(rhs.0.iter()) {
342 *l += *r;
343 }
344 }
345}
346
347impl Sub for PackedBlock8 {
348 type Output = Self;
349
350 #[inline(always)]
351 fn sub(self, rhs: Self) -> Self {
352 self.add(rhs)
353 }
354}
355
356impl SubAssign for PackedBlock8 {
357 #[inline(always)]
358 fn sub_assign(&mut self, rhs: Self) {
359 self.add_assign(rhs);
360 }
361}
362
363impl Mul for PackedBlock8 {
364 type Output = Self;
365
366 #[inline(always)]
367 fn mul(self, rhs: Self) -> Self {
368 #[cfg(target_arch = "aarch64")]
369 {
370 let mut res = [Block8::ZERO; PACKED_WIDTH_8];
371 for ((out, l), r) in res.iter_mut().zip(self.0.iter()).zip(rhs.0.iter()) {
372 *out = mul_iso_8(*l, *r);
373 }
374
375 Self(res)
376 }
377
378 #[cfg(not(target_arch = "aarch64"))]
379 {
380 let mut res = [Block8::ZERO; PACKED_WIDTH_8];
381 for ((out, l), r) in res.iter_mut().zip(self.0.iter()).zip(rhs.0.iter()) {
382 *out = *l * *r;
383 }
384
385 Self(res)
386 }
387 }
388}
389
390impl MulAssign for PackedBlock8 {
391 #[inline(always)]
392 fn mul_assign(&mut self, rhs: Self) {
393 *self = *self * rhs;
394 }
395}
396
397impl Mul<Block8> for PackedBlock8 {
398 type Output = Self;
399
400 #[inline(always)]
401 fn mul(self, rhs: Block8) -> Self {
402 let mut res = [Block8::ZERO; PACKED_WIDTH_8];
403 for (out, v) in res.iter_mut().zip(self.0.iter()) {
404 *out = *v * rhs;
405 }
406
407 Self(res)
408 }
409}
410
411impl HardwareField for Block8 {
416 #[inline(always)]
417 fn to_hardware(self) -> Flat<Self> {
418 #[cfg(feature = "table-math")]
419 {
420 Flat::from_raw(apply_matrix_8(self, &constants::TOWER_TO_FLAT_8))
421 }
422
423 #[cfg(not(feature = "table-math"))]
424 {
425 Flat::from_raw(Block8(map_ct_8(self.0, &TOWER_TO_FLAT_BASIS_8.0)))
426 }
427 }
428
429 #[inline(always)]
430 fn from_hardware(value: Flat<Self>) -> Self {
431 let value = value.into_raw();
432 #[cfg(feature = "table-math")]
433 {
434 apply_matrix_8(value, &constants::FLAT_TO_TOWER_8)
435 }
436
437 #[cfg(not(feature = "table-math"))]
438 {
439 Block8(map_ct_8(value.0, &FLAT_TO_TOWER_BASIS_8.0))
440 }
441 }
442
443 #[inline(always)]
444 fn add_hardware(lhs: Flat<Self>, rhs: Flat<Self>) -> Flat<Self> {
445 Flat::from_raw(lhs.into_raw() + rhs.into_raw())
446 }
447
448 #[inline(always)]
449 fn add_hardware_packed(lhs: PackedFlat<Self>, rhs: PackedFlat<Self>) -> PackedFlat<Self> {
450 let lhs = lhs.into_raw();
451 let rhs = rhs.into_raw();
452 #[cfg(target_arch = "aarch64")]
453 {
454 PackedFlat::from_raw(neon::add_packed_8(lhs, rhs))
455 }
456
457 #[cfg(not(target_arch = "aarch64"))]
458 {
459 PackedFlat::from_raw(lhs + rhs)
460 }
461 }
462
463 #[inline(always)]
464 fn mul_hardware(lhs: Flat<Self>, rhs: Flat<Self>) -> Flat<Self> {
465 let lhs = lhs.into_raw();
466 let rhs = rhs.into_raw();
467 #[cfg(target_arch = "aarch64")]
468 {
469 Flat::from_raw(neon::mul_8(lhs, rhs))
470 }
471
472 #[cfg(not(target_arch = "aarch64"))]
473 {
474 let a_tower = Self::from_hardware(Flat::from_raw(lhs));
475 let b_tower = Self::from_hardware(Flat::from_raw(rhs));
476
477 (a_tower * b_tower).to_hardware()
478 }
479 }
480
481 #[inline(always)]
482 fn mul_hardware_packed(lhs: PackedFlat<Self>, rhs: PackedFlat<Self>) -> PackedFlat<Self> {
483 let lhs = lhs.into_raw();
484 let rhs = rhs.into_raw();
485
486 #[cfg(target_arch = "aarch64")]
487 {
488 PackedFlat::from_raw(neon::mul_flat_packed_8(lhs, rhs))
489 }
490
491 #[cfg(not(target_arch = "aarch64"))]
492 {
493 let mut l = [Self::ZERO; <Self as PackableField>::WIDTH];
494 let mut r = [Self::ZERO; <Self as PackableField>::WIDTH];
495 let mut res = [Self::ZERO; <Self as PackableField>::WIDTH];
496
497 Self::unpack(lhs, &mut l);
498 Self::unpack(rhs, &mut r);
499
500 for i in 0..<Self as PackableField>::WIDTH {
501 res[i] = Self::mul_hardware(Flat::from_raw(l[i]), Flat::from_raw(r[i])).into_raw();
502 }
503
504 PackedFlat::from_raw(Self::pack(&res))
505 }
506 }
507
508 #[inline(always)]
509 fn mul_hardware_scalar_packed(lhs: PackedFlat<Self>, rhs: Flat<Self>) -> PackedFlat<Self> {
510 let broadcasted = PackedBlock8([rhs.into_raw(); PACKED_WIDTH_8]);
511 Self::mul_hardware_packed(lhs, PackedFlat::from_raw(broadcasted))
512 }
513
514 #[inline(always)]
515 fn tower_bit_from_hardware(value: Flat<Self>, bit_idx: usize) -> u8 {
516 let mask = FLAT_TO_TOWER_BIT_MASKS_8[bit_idx];
517
518 let mut v = value.into_raw().0 & mask;
520 v ^= v >> 4;
521 v ^= v >> 2;
522 v ^= v >> 1;
523
524 v & 1
525 }
526}
527
528#[cfg(target_arch = "aarch64")]
533#[inline(always)]
534fn mul_iso_8(a: Block8, b: Block8) -> Block8 {
535 let a_f = a.to_hardware();
536 let b_f = b.to_hardware();
537 let c_f = Flat::from_raw(neon::mul_8(a_f.into_raw(), b_f.into_raw()));
538
539 c_f.to_tower()
540}
541
542#[cfg(feature = "table-math")]
543#[inline(always)]
544fn apply_matrix_8(val: Block8, table: &[u8; 256]) -> Block8 {
545 let idx = val.0 as usize;
546 Block8(unsafe { *table.get_unchecked(idx) })
547}
548
549#[cfg(not(feature = "table-math"))]
550#[inline(always)]
551fn map_ct_8(x: u8, basis: &[u8; 8]) -> u8 {
552 let mut acc = 0u8;
553 let mut i = 0usize;
554
555 while i < 8 {
556 let bit = (x >> i) & 1;
557 let mask = 0u8.wrapping_sub(bit);
558 acc ^= basis[i] & mask;
559 i += 1;
560 }
561
562 acc
563}
564
565#[cfg(feature = "table-math")]
566const fn generate_exp_table() -> [u8; 256] {
567 let mut table = [0u8; 256];
568 let mut val: u8 = 1;
569
570 let mut i = 0;
577 while i < 256 {
578 table[i] = val;
579
580 let high_bit = val & 0x80;
584 let mut shifted = val << 1;
585
586 if high_bit != 0 {
590 shifted ^= 0x1B;
591 }
592
593 val = shifted ^ val;
594 i += 1;
595 }
596
597 table
598}
599
600#[cfg(feature = "table-math")]
601const fn generate_log_table() -> [u8; 256] {
602 let mut table = [0u8; 256];
603
604 let mut val: u8 = 1;
613 let mut i = 0;
614
615 while i < 255 {
616 table[val as usize] = i as u8;
617
618 let high_bit = val & 0x80;
619 let mut shifted = val << 1;
620
621 if high_bit != 0 {
622 shifted ^= 0x1B;
623 }
624
625 val = shifted ^ val;
626
627 i += 1;
628 }
629
630 table
633}
634
635#[cfg(target_arch = "aarch64")]
640mod neon {
641 use super::*;
642 use core::arch::aarch64::*;
643 use core::mem::transmute;
644
645 #[inline(always)]
646 pub fn add_packed_8(lhs: PackedBlock8, rhs: PackedBlock8) -> PackedBlock8 {
647 unsafe {
648 let res = veorq_u8(
649 transmute::<[Block8; 16], uint8x16_t>(lhs.0),
650 transmute::<[Block8; 16], uint8x16_t>(rhs.0),
651 );
652 transmute(res)
653 }
654 }
655
656 #[inline(always)]
657 pub fn mul_8(a: Block8, b: Block8) -> Block8 {
658 unsafe {
659 let a_poly = transmute::<uint8x8_t, poly8x8_t>(vdup_n_u8(a.0));
662 let b_poly = transmute::<uint8x8_t, poly8x8_t>(vdup_n_u8(b.0));
663
664 let prod = vmull_p8(a_poly, b_poly);
667
668 let prod_u16 = vgetq_lane_u16(transmute::<poly16x8_t, uint16x8_t>(prod), 0);
670
671 let l = (prod_u16 & 0xFF) as u8;
672 let h = (prod_u16 >> 8) as u8;
673
674 let r_val = constants::POLY_8; let h_poly = transmute::<uint8x8_t, poly8x8_t>(vdup_n_u8(h));
679 let r_poly = transmute::<uint8x8_t, poly8x8_t>(vdup_n_u8(r_val));
680 let h_red = vmull_p8(h_poly, r_poly);
681
682 let h_red_u16 = vgetq_lane_u16(transmute::<poly16x8_t, uint16x8_t>(h_red), 0);
683
684 let folded = (h_red_u16 & 0xFF) as u8;
685 let carry = (h_red_u16 >> 8) as u8;
686
687 let mut res = l ^ folded;
688
689 let c_poly = transmute::<uint8x8_t, poly8x8_t>(vdup_n_u8(carry));
693 let c_red = vmull_p8(c_poly, r_poly);
694 let c_red_u16 = vgetq_lane_u16(transmute::<poly16x8_t, uint16x8_t>(c_red), 0);
695
696 res ^= (c_red_u16 & 0xFF) as u8;
697
698 Block8(res)
699 }
700 }
701
702 #[inline(always)]
705 pub fn mul_flat_packed_8(lhs: PackedBlock8, rhs: PackedBlock8) -> PackedBlock8 {
706 unsafe {
707 let a: uint8x16_t = transmute(lhs.0);
708 let b: uint8x16_t = transmute(rhs.0);
709
710 let a_lo = vget_low_u8(a);
712 let a_hi = vget_high_u8(a);
713 let b_lo = vget_low_u8(b);
714 let b_hi = vget_high_u8(b);
715
716 let res_lo = vmull_p8(
719 transmute::<uint8x8_t, poly8x8_t>(a_lo),
720 transmute::<uint8x8_t, poly8x8_t>(b_lo),
721 );
722 let res_hi = vmull_p8(
723 transmute::<uint8x8_t, poly8x8_t>(a_hi),
724 transmute::<uint8x8_t, poly8x8_t>(b_hi),
725 );
726
727 let tbl_lo = vld1q_u8(
730 [
731 0x00, 0x1b, 0x36, 0x2d, 0x6c, 0x77, 0x5a, 0x41, 0xd8, 0xc3, 0xee, 0xf5, 0xb4,
732 0xaf, 0x82, 0x99,
733 ]
734 .as_ptr(),
735 );
736
737 let tbl_hi = vld1q_u8(
738 [
739 0x00, 0xab, 0x4d, 0xe6, 0x9a, 0x31, 0xd7, 0x7c, 0x2f, 0x84, 0x62, 0xc9, 0xb5,
740 0x1e, 0xf8, 0x53,
741 ]
742 .as_ptr(),
743 );
744
745 let reduce_tbl = |val_poly: poly16x8_t| -> uint8x8_t {
749 let val: uint16x8_t = transmute(val_poly);
750
751 let data = vmovn_u16(val);
753 let carry_u16 = vshrq_n_u16(val, 8);
754 let carry = vmovn_u16(carry_u16);
755
756 let mask_lo = vdup_n_u8(0x0F);
758 let h_lo = vand_u8(carry, mask_lo);
759 let h_hi = vshr_n_u8(carry, 4);
760
761 let r_lo = vqtbl1_u8(tbl_lo, h_lo);
766 let r_hi = vqtbl1_u8(tbl_hi, h_hi);
767
768 veor_u8(data, veor_u8(r_lo, r_hi))
770 };
771
772 let final_lo = reduce_tbl(res_lo);
773 let final_hi = reduce_tbl(res_hi);
774
775 let res = vcombine_u8(final_lo, final_hi);
778
779 PackedBlock8(transmute::<uint8x16_t, [Block8; 16]>(res))
780 }
781 }
782}
783
784#[cfg(test)]
785mod tests {
786 use super::*;
787 use rand::{RngExt, rng};
788
789 #[test]
794 fn tower_constants() {
795 assert_eq!(Block8::EXTENSION_TAU, Block8(0x20));
798 }
799
800 #[test]
801 fn add_truth() {
802 let zero = Block8::ZERO;
803 let one = Block8::ONE;
804
805 assert_eq!(zero + zero, zero);
806 assert_eq!(zero + one, one);
807 assert_eq!(one + zero, one);
808 assert_eq!(one + one, zero);
809 }
810
811 #[test]
812 fn mul_truth() {
813 let zero = Block8::ZERO;
814 let one = Block8::ONE;
815
816 assert_eq!(zero * zero, zero);
817 assert_eq!(zero * one, zero);
818 assert_eq!(one * one, one);
819 }
820
821 #[test]
822 fn add() {
823 assert_eq!(Block8(5) + Block8(3), Block8(6));
826 }
827
828 #[test]
829 fn mul_simple() {
830 assert_eq!(Block8(2) * Block8(2), Block8(4));
833 }
834
835 #[test]
836 fn mul_overflow() {
837 assert_eq!(Block8(0x57) * Block8(0x83), Block8(0xC1));
841 }
842
843 #[test]
844 fn security_zeroize() {
845 let mut secret_val = Block8::from(0xFF_u32);
846 assert_ne!(secret_val, Block8::ZERO);
847
848 secret_val.zeroize();
849
850 assert_eq!(secret_val, Block8::ZERO);
851 assert_eq!(secret_val.0, 0, "Block8 memory leak detected");
852 }
853
854 #[test]
855 fn inversion_exhaustive() {
856 for i in 0u8..=255 {
858 let val = Block8(i);
859
860 if val == Block8::ZERO {
861 assert_eq!(val.invert(), Block8::ZERO, "invert(0) must return 0");
864 } else {
865 let inv = val.invert();
868 let product = val * inv;
869
870 assert_eq!(
871 product,
872 Block8::ONE,
873 "Inversion identity failed: a * a^-1 != 1"
874 );
875 }
876 }
877 }
878
879 #[test]
884 fn isomorphism_roundtrip() {
885 let mut rng = rng();
886 for _ in 0..1000 {
887 let val = Block8::from(rng.random::<u8>());
888
889 assert_eq!(
892 val.to_hardware().to_tower(),
893 val,
894 "Block8 isomorphism roundtrip failed"
895 );
896 }
897 }
898
899 #[test]
900 fn parity_masks_match_from_hardware() {
901 for x in 0u16..=255 {
904 let x_flat = x as u8;
905 let tower = Block8::from_hardware(Flat::from_raw(Block8(x_flat))).0;
906
907 for (k, &mask) in FLAT_TO_TOWER_BIT_MASKS_8.iter().enumerate() {
908 let parity = ((x_flat & mask).count_ones() & 1) as u8;
909 let bit = (tower >> k) & 1;
910 assert_eq!(
911 parity, bit,
912 "Block8 mask mismatch at x={x_flat:#04x}, k={k}"
913 );
914
915 let via_api = Flat::from_raw(Block8(x_flat)).tower_bit(k);
916 assert_eq!(via_api, bit, "Block8 tower_bit_from_hardware mismatch");
917 }
918 }
919 }
920
921 #[test]
922 fn flat_mul_homomorphism() {
923 let mut rng = rng();
924 for _ in 0..1000 {
925 let a = Block8::from(rng.random::<u8>());
926 let b = Block8::from(rng.random::<u8>());
927
928 let expected_flat = (a * b).to_hardware();
929 let actual_flat = a.to_hardware() * b.to_hardware();
930
931 assert_eq!(
933 actual_flat, expected_flat,
934 "Block8 flat multiplication mismatch"
935 );
936 }
937 }
938
939 #[test]
940 fn packed_consistency() {
941 let mut rng = rng();
942 for _ in 0..100 {
943 let mut a_vals = [Block8::ZERO; 16];
944 let mut b_vals = [Block8::ZERO; 16];
945
946 for i in 0..16 {
947 a_vals[i] = Block8::from(rng.random::<u8>());
948 b_vals[i] = Block8::from(rng.random::<u8>());
949 }
950
951 let a_flat_vals = a_vals.map(|x| x.to_hardware());
952 let b_flat_vals = b_vals.map(|x| x.to_hardware());
953 let a_packed = Flat::<Block8>::pack(&a_flat_vals);
954 let b_packed = Flat::<Block8>::pack(&b_flat_vals);
955
956 let add_res = Block8::add_hardware_packed(a_packed, b_packed);
958
959 let mut add_out = [Block8::ZERO.to_hardware(); 16];
960 Flat::<Block8>::unpack(add_res, &mut add_out);
961
962 for i in 0..16 {
963 assert_eq!(
964 add_out[i],
965 (a_vals[i] + b_vals[i]).to_hardware(),
966 "Block8 packed add mismatch"
967 );
968 }
969
970 let mul_res = Block8::mul_hardware_packed(a_packed, b_packed);
972
973 let mut mul_out = [Block8::ZERO.to_hardware(); 16];
974 Flat::<Block8>::unpack(mul_res, &mut mul_out);
975
976 for i in 0..16 {
977 assert_eq!(
978 mul_out[i],
979 (a_vals[i] * b_vals[i]).to_hardware(),
980 "Block8 packed mul mismatch"
981 );
982 }
983 }
984 }
985
986 #[test]
991 fn pack_unpack_roundtrip() {
992 let mut rng = rng();
993 let mut data = [Block8::ZERO; PACKED_WIDTH_8];
994
995 for v in data.iter_mut() {
996 *v = Block8(rng.random());
997 }
998
999 let packed = Block8::pack(&data);
1000 let mut unpacked = [Block8::ZERO; PACKED_WIDTH_8];
1001 Block8::unpack(packed, &mut unpacked);
1002
1003 assert_eq!(data, unpacked, "Block8 pack/unpack roundtrip failed");
1004 }
1005
1006 #[test]
1007 fn packed_add_consistency() {
1008 let mut rng = rng();
1009 let mut a_vals = [Block8::ZERO; PACKED_WIDTH_8];
1010 let mut b_vals = [Block8::ZERO; PACKED_WIDTH_8];
1011
1012 for i in 0..PACKED_WIDTH_8 {
1013 a_vals[i] = Block8(rng.random());
1014 b_vals[i] = Block8(rng.random());
1015 }
1016
1017 let a_packed = Block8::pack(&a_vals);
1018 let b_packed = Block8::pack(&b_vals);
1019 let res_packed = a_packed + b_packed;
1020
1021 let mut res_unpacked = [Block8::ZERO; PACKED_WIDTH_8];
1022 Block8::unpack(res_packed, &mut res_unpacked);
1023
1024 for i in 0..PACKED_WIDTH_8 {
1025 assert_eq!(
1026 res_unpacked[i],
1027 a_vals[i] + b_vals[i],
1028 "Block8 packed add mismatch at index {}",
1029 i
1030 );
1031 }
1032 }
1033
1034 #[test]
1035 fn packed_mul_consistency() {
1036 let mut rng = rng();
1037
1038 for _ in 0..1000 {
1039 let mut a_arr = [Block8::ZERO; PACKED_WIDTH_8];
1040 let mut b_arr = [Block8::ZERO; PACKED_WIDTH_8];
1041
1042 for i in 0..PACKED_WIDTH_8 {
1043 let val_a: u8 = rng.random();
1044 let val_b: u8 = rng.random();
1045 a_arr[i] = Block8(val_a);
1046 b_arr[i] = Block8(val_b);
1047 }
1048
1049 let a_packed = PackedBlock8(a_arr);
1050 let b_packed = PackedBlock8(b_arr);
1051 let c_packed = a_packed * b_packed;
1052
1053 let mut c_expected = [Block8::ZERO; PACKED_WIDTH_8];
1054 for i in 0..PACKED_WIDTH_8 {
1055 c_expected[i] = a_arr[i] * b_arr[i];
1056 }
1057
1058 assert_eq!(c_packed.0, c_expected, "SIMD Block8 mismatch!");
1059 }
1060 }
1061}