use crate::constants::FLAT_TO_TOWER_BIT_MASKS_8;
use crate::towers::bit::Bit;
use crate::{
CanonicalDeserialize, CanonicalSerialize, Flat, HardwareField, PackableField, PackedFlat,
TowerField, constants,
};
use core::ops::{Add, AddAssign, BitXor, Mul, MulAssign, Sub, SubAssign};
use serde::{Deserialize, Serialize};
use zeroize::Zeroize;
#[cfg(not(feature = "table-math"))]
#[repr(align(64))]
struct CtConvertBasisU8<const N: usize>([u8; N]);
#[cfg(not(feature = "table-math"))]
static TOWER_TO_FLAT_BASIS_8: CtConvertBasisU8<8> =
CtConvertBasisU8(constants::RAW_TOWER_TO_FLAT_8);
#[cfg(not(feature = "table-math"))]
static FLAT_TO_TOWER_BASIS_8: CtConvertBasisU8<8> =
CtConvertBasisU8(constants::RAW_FLAT_TO_TOWER_8);
#[cfg(feature = "table-math")]
const EXP_TABLE: [u8; 256] = generate_exp_table();
#[cfg(feature = "table-math")]
const LOG_TABLE: [u8; 256] = generate_log_table();
#[derive(Copy, Clone, Default, Debug, Eq, PartialEq, Serialize, Deserialize, Zeroize)]
#[repr(transparent)]
pub struct Block8(pub u8);
impl Block8 {
pub const fn new(val: u8) -> Self {
Self(val)
}
}
impl TowerField for Block8 {
const BITS: usize = 8;
const ZERO: Self = Block8(0);
const ONE: Self = Block8(1);
const EXTENSION_TAU: Self = Block8(0x20);
fn invert(&self) -> Self {
#[cfg(feature = "table-math")]
{
if self.0 == 0 {
return Self::ZERO;
}
let i = LOG_TABLE[self.0 as usize] as usize;
Block8(EXP_TABLE[255 - i])
}
#[cfg(not(feature = "table-math"))]
{
let x = *self;
let x2 = x * x;
let x4 = x2 * x2;
let x8 = x4 * x4;
let x16 = x8 * x8;
let x32 = x16 * x16;
let x64 = x32 * x32;
let x128 = x64 * x64;
x128 * x64 * x32 * x16 * x8 * x4 * x2
}
}
fn from_uniform_bytes(bytes: &[u8; 32]) -> Self {
Self(bytes[0])
}
}
impl Add for Block8 {
type Output = Self;
fn add(self, rhs: Self) -> Self::Output {
Self(self.0.bitxor(rhs.0))
}
}
impl Sub for Block8 {
type Output = Self;
fn sub(self, rhs: Self) -> Self::Output {
self.add(rhs)
}
}
impl Mul for Block8 {
type Output = Self;
fn mul(self, rhs: Self) -> Self::Output {
#[cfg(feature = "table-math")]
{
if self.0 == 0 || rhs.0 == 0 {
return Self::ZERO;
}
let i = LOG_TABLE[self.0 as usize] as usize;
let j = LOG_TABLE[rhs.0 as usize] as usize;
let k = i + j;
let idx = if k >= 255 { k - 255 } else { k };
Self(EXP_TABLE[idx])
}
#[cfg(not(feature = "table-math"))]
{
#[cfg(target_arch = "aarch64")]
{
neon::mul_8(self, rhs)
}
#[cfg(not(target_arch = "aarch64"))]
{
let mut a = self.0;
let mut b = rhs.0;
let mut res = 0u8;
for _ in 0..8 {
let bit = b & 1;
let mask = 0u8.wrapping_sub(bit);
res ^= a & mask;
let high_bit = a >> 7;
let overflow_mask = 0u8.wrapping_sub(high_bit);
a = (a << 1) ^ (0x1B & overflow_mask);
b >>= 1;
}
Self(res)
}
}
}
}
impl AddAssign for Block8 {
fn add_assign(&mut self, rhs: Self) {
*self = *self + rhs;
}
}
impl SubAssign for Block8 {
fn sub_assign(&mut self, rhs: Self) {
*self = *self - rhs;
}
}
impl MulAssign for Block8 {
fn mul_assign(&mut self, rhs: Self) {
*self = *self * rhs;
}
}
impl CanonicalSerialize for Block8 {
#[inline]
fn serialized_size(&self) -> usize {
1
}
#[inline]
fn serialize(&self, writer: &mut [u8]) -> Result<(), ()> {
if writer.is_empty() {
return Err(());
}
writer[0] = self.0;
Ok(())
}
}
impl CanonicalDeserialize for Block8 {
fn deserialize(bytes: &[u8]) -> Result<Self, ()> {
if bytes.is_empty() {
return Err(());
}
Ok(Self(bytes[0]))
}
}
impl From<u8> for Block8 {
#[inline]
fn from(val: u8) -> Self {
Self::new(val)
}
}
impl From<u32> for Block8 {
#[inline]
fn from(val: u32) -> Self {
Self(val as u8)
}
}
impl From<u64> for Block8 {
#[inline]
fn from(val: u64) -> Self {
Self(val as u8)
}
}
impl From<u128> for Block8 {
#[inline]
fn from(val: u128) -> Self {
Self(val as u8)
}
}
impl From<Bit> for Block8 {
#[inline(always)]
fn from(val: Bit) -> Self {
Self(val.0)
}
}
pub const PACKED_WIDTH_8: usize = 16;
#[derive(Clone, Copy, Debug, PartialEq, Eq, Default)]
#[repr(C, align(16))]
pub struct PackedBlock8(pub [Block8; PACKED_WIDTH_8]);
impl PackedBlock8 {
#[inline(always)]
pub fn zero() -> Self {
Self([Block8::ZERO; PACKED_WIDTH_8])
}
}
impl PackableField for Block8 {
type Packed = PackedBlock8;
const WIDTH: usize = PACKED_WIDTH_8;
#[inline(always)]
fn pack(chunk: &[Self]) -> Self::Packed {
assert!(
chunk.len() >= PACKED_WIDTH_8,
"PackableField::pack: input slice too short",
);
let mut arr = [Self::ZERO; PACKED_WIDTH_8];
arr.copy_from_slice(&chunk[..PACKED_WIDTH_8]);
PackedBlock8(arr)
}
#[inline(always)]
fn unpack(packed: Self::Packed, output: &mut [Self]) {
assert!(
output.len() >= PACKED_WIDTH_8,
"PackableField::unpack: output slice too short",
);
output[..PACKED_WIDTH_8].copy_from_slice(&packed.0);
}
}
impl Add for PackedBlock8 {
type Output = Self;
#[inline(always)]
fn add(self, rhs: Self) -> Self {
let mut res = [Block8::ZERO; PACKED_WIDTH_8];
for ((out, l), r) in res.iter_mut().zip(self.0.iter()).zip(rhs.0.iter()) {
*out = *l + *r;
}
Self(res)
}
}
impl AddAssign for PackedBlock8 {
#[inline(always)]
fn add_assign(&mut self, rhs: Self) {
for (l, r) in self.0.iter_mut().zip(rhs.0.iter()) {
*l += *r;
}
}
}
impl Sub for PackedBlock8 {
type Output = Self;
#[inline(always)]
fn sub(self, rhs: Self) -> Self {
self.add(rhs)
}
}
impl SubAssign for PackedBlock8 {
#[inline(always)]
fn sub_assign(&mut self, rhs: Self) {
self.add_assign(rhs);
}
}
impl Mul for PackedBlock8 {
type Output = Self;
#[inline(always)]
fn mul(self, rhs: Self) -> Self {
#[cfg(target_arch = "aarch64")]
{
let mut res = [Block8::ZERO; PACKED_WIDTH_8];
for ((out, l), r) in res.iter_mut().zip(self.0.iter()).zip(rhs.0.iter()) {
*out = mul_iso_8(*l, *r);
}
Self(res)
}
#[cfg(not(target_arch = "aarch64"))]
{
let mut res = [Block8::ZERO; PACKED_WIDTH_8];
for ((out, l), r) in res.iter_mut().zip(self.0.iter()).zip(rhs.0.iter()) {
*out = *l * *r;
}
Self(res)
}
}
}
impl MulAssign for PackedBlock8 {
#[inline(always)]
fn mul_assign(&mut self, rhs: Self) {
*self = *self * rhs;
}
}
impl Mul<Block8> for PackedBlock8 {
type Output = Self;
#[inline(always)]
fn mul(self, rhs: Block8) -> Self {
let mut res = [Block8::ZERO; PACKED_WIDTH_8];
for (out, v) in res.iter_mut().zip(self.0.iter()) {
*out = *v * rhs;
}
Self(res)
}
}
impl HardwareField for Block8 {
#[inline(always)]
fn to_hardware(self) -> Flat<Self> {
#[cfg(feature = "table-math")]
{
Flat::from_raw(apply_matrix_8(self, &constants::TOWER_TO_FLAT_8))
}
#[cfg(not(feature = "table-math"))]
{
Flat::from_raw(Block8(map_ct_8(self.0, &TOWER_TO_FLAT_BASIS_8.0)))
}
}
#[inline(always)]
fn from_hardware(value: Flat<Self>) -> Self {
let value = value.into_raw();
#[cfg(feature = "table-math")]
{
apply_matrix_8(value, &constants::FLAT_TO_TOWER_8)
}
#[cfg(not(feature = "table-math"))]
{
Block8(map_ct_8(value.0, &FLAT_TO_TOWER_BASIS_8.0))
}
}
#[inline(always)]
fn add_hardware(lhs: Flat<Self>, rhs: Flat<Self>) -> Flat<Self> {
Flat::from_raw(lhs.into_raw() + rhs.into_raw())
}
#[inline(always)]
fn add_hardware_packed(lhs: PackedFlat<Self>, rhs: PackedFlat<Self>) -> PackedFlat<Self> {
let lhs = lhs.into_raw();
let rhs = rhs.into_raw();
#[cfg(target_arch = "aarch64")]
{
PackedFlat::from_raw(neon::add_packed_8(lhs, rhs))
}
#[cfg(not(target_arch = "aarch64"))]
{
PackedFlat::from_raw(lhs + rhs)
}
}
#[inline(always)]
fn mul_hardware(lhs: Flat<Self>, rhs: Flat<Self>) -> Flat<Self> {
let lhs = lhs.into_raw();
let rhs = rhs.into_raw();
#[cfg(target_arch = "aarch64")]
{
Flat::from_raw(neon::mul_8(lhs, rhs))
}
#[cfg(not(target_arch = "aarch64"))]
{
let a_tower = Self::from_hardware(Flat::from_raw(lhs));
let b_tower = Self::from_hardware(Flat::from_raw(rhs));
(a_tower * b_tower).to_hardware()
}
}
#[inline(always)]
fn mul_hardware_packed(lhs: PackedFlat<Self>, rhs: PackedFlat<Self>) -> PackedFlat<Self> {
let lhs = lhs.into_raw();
let rhs = rhs.into_raw();
#[cfg(target_arch = "aarch64")]
{
PackedFlat::from_raw(neon::mul_flat_packed_8(lhs, rhs))
}
#[cfg(not(target_arch = "aarch64"))]
{
let mut l = [Self::ZERO; <Self as PackableField>::WIDTH];
let mut r = [Self::ZERO; <Self as PackableField>::WIDTH];
let mut res = [Self::ZERO; <Self as PackableField>::WIDTH];
Self::unpack(lhs, &mut l);
Self::unpack(rhs, &mut r);
for i in 0..<Self as PackableField>::WIDTH {
res[i] = Self::mul_hardware(Flat::from_raw(l[i]), Flat::from_raw(r[i])).into_raw();
}
PackedFlat::from_raw(Self::pack(&res))
}
}
#[inline(always)]
fn mul_hardware_scalar_packed(lhs: PackedFlat<Self>, rhs: Flat<Self>) -> PackedFlat<Self> {
let broadcasted = PackedBlock8([rhs.into_raw(); PACKED_WIDTH_8]);
Self::mul_hardware_packed(lhs, PackedFlat::from_raw(broadcasted))
}
#[inline(always)]
fn tower_bit_from_hardware(value: Flat<Self>, bit_idx: usize) -> u8 {
let mask = FLAT_TO_TOWER_BIT_MASKS_8[bit_idx];
let mut v = value.into_raw().0 & mask;
v ^= v >> 4;
v ^= v >> 2;
v ^= v >> 1;
v & 1
}
}
#[cfg(target_arch = "aarch64")]
#[inline(always)]
fn mul_iso_8(a: Block8, b: Block8) -> Block8 {
let a_f = a.to_hardware();
let b_f = b.to_hardware();
let c_f = Flat::from_raw(neon::mul_8(a_f.into_raw(), b_f.into_raw()));
c_f.to_tower()
}
#[cfg(feature = "table-math")]
#[inline(always)]
fn apply_matrix_8(val: Block8, table: &[u8; 256]) -> Block8 {
let idx = val.0 as usize;
Block8(unsafe { *table.get_unchecked(idx) })
}
#[cfg(not(feature = "table-math"))]
#[inline(always)]
fn map_ct_8(x: u8, basis: &[u8; 8]) -> u8 {
let mut acc = 0u8;
let mut i = 0usize;
while i < 8 {
let bit = (x >> i) & 1;
let mask = 0u8.wrapping_sub(bit);
acc ^= basis[i] & mask;
i += 1;
}
acc
}
#[cfg(feature = "table-math")]
const fn generate_exp_table() -> [u8; 256] {
let mut table = [0u8; 256];
let mut val: u8 = 1;
let mut i = 0;
while i < 256 {
table[i] = val;
let high_bit = val & 0x80;
let mut shifted = val << 1;
if high_bit != 0 {
shifted ^= 0x1B;
}
val = shifted ^ val;
i += 1;
}
table
}
#[cfg(feature = "table-math")]
const fn generate_log_table() -> [u8; 256] {
let mut table = [0u8; 256];
let mut val: u8 = 1;
let mut i = 0;
while i < 255 {
table[val as usize] = i as u8;
let high_bit = val & 0x80;
let mut shifted = val << 1;
if high_bit != 0 {
shifted ^= 0x1B;
}
val = shifted ^ val;
i += 1;
}
table
}
#[cfg(target_arch = "aarch64")]
mod neon {
use super::*;
use core::arch::aarch64::*;
use core::mem::transmute;
#[inline(always)]
pub fn add_packed_8(lhs: PackedBlock8, rhs: PackedBlock8) -> PackedBlock8 {
unsafe {
let res = veorq_u8(
transmute::<[Block8; 16], uint8x16_t>(lhs.0),
transmute::<[Block8; 16], uint8x16_t>(rhs.0),
);
transmute(res)
}
}
#[inline(always)]
pub fn mul_8(a: Block8, b: Block8) -> Block8 {
unsafe {
let a_poly = transmute::<uint8x8_t, poly8x8_t>(vdup_n_u8(a.0));
let b_poly = transmute::<uint8x8_t, poly8x8_t>(vdup_n_u8(b.0));
let prod = vmull_p8(a_poly, b_poly);
let prod_u16 = vgetq_lane_u16(transmute::<poly16x8_t, uint16x8_t>(prod), 0);
let l = (prod_u16 & 0xFF) as u8;
let h = (prod_u16 >> 8) as u8;
let r_val = constants::POLY_8;
let h_poly = transmute::<uint8x8_t, poly8x8_t>(vdup_n_u8(h));
let r_poly = transmute::<uint8x8_t, poly8x8_t>(vdup_n_u8(r_val));
let h_red = vmull_p8(h_poly, r_poly);
let h_red_u16 = vgetq_lane_u16(transmute::<poly16x8_t, uint16x8_t>(h_red), 0);
let folded = (h_red_u16 & 0xFF) as u8;
let carry = (h_red_u16 >> 8) as u8;
let mut res = l ^ folded;
let c_poly = transmute::<uint8x8_t, poly8x8_t>(vdup_n_u8(carry));
let c_red = vmull_p8(c_poly, r_poly);
let c_red_u16 = vgetq_lane_u16(transmute::<poly16x8_t, uint16x8_t>(c_red), 0);
res ^= (c_red_u16 & 0xFF) as u8;
Block8(res)
}
}
#[inline(always)]
pub fn mul_flat_packed_8(lhs: PackedBlock8, rhs: PackedBlock8) -> PackedBlock8 {
unsafe {
let a: uint8x16_t = transmute(lhs.0);
let b: uint8x16_t = transmute(rhs.0);
let a_lo = vget_low_u8(a);
let a_hi = vget_high_u8(a);
let b_lo = vget_low_u8(b);
let b_hi = vget_high_u8(b);
let res_lo = vmull_p8(
transmute::<uint8x8_t, poly8x8_t>(a_lo),
transmute::<uint8x8_t, poly8x8_t>(b_lo),
);
let res_hi = vmull_p8(
transmute::<uint8x8_t, poly8x8_t>(a_hi),
transmute::<uint8x8_t, poly8x8_t>(b_hi),
);
let tbl_lo = vld1q_u8(
[
0x00, 0x1b, 0x36, 0x2d, 0x6c, 0x77, 0x5a, 0x41, 0xd8, 0xc3, 0xee, 0xf5, 0xb4,
0xaf, 0x82, 0x99,
]
.as_ptr(),
);
let tbl_hi = vld1q_u8(
[
0x00, 0xab, 0x4d, 0xe6, 0x9a, 0x31, 0xd7, 0x7c, 0x2f, 0x84, 0x62, 0xc9, 0xb5,
0x1e, 0xf8, 0x53,
]
.as_ptr(),
);
let reduce_tbl = |val_poly: poly16x8_t| -> uint8x8_t {
let val: uint16x8_t = transmute(val_poly);
let data = vmovn_u16(val);
let carry_u16 = vshrq_n_u16(val, 8);
let carry = vmovn_u16(carry_u16);
let mask_lo = vdup_n_u8(0x0F);
let h_lo = vand_u8(carry, mask_lo);
let h_hi = vshr_n_u8(carry, 4);
let r_lo = vqtbl1_u8(tbl_lo, h_lo);
let r_hi = vqtbl1_u8(tbl_hi, h_hi);
veor_u8(data, veor_u8(r_lo, r_hi))
};
let final_lo = reduce_tbl(res_lo);
let final_hi = reduce_tbl(res_hi);
let res = vcombine_u8(final_lo, final_hi);
PackedBlock8(transmute::<uint8x16_t, [Block8; 16]>(res))
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use rand::{RngExt, rng};
#[test]
fn tower_constants() {
assert_eq!(Block8::EXTENSION_TAU, Block8(0x20));
}
#[test]
fn add_truth() {
let zero = Block8::ZERO;
let one = Block8::ONE;
assert_eq!(zero + zero, zero);
assert_eq!(zero + one, one);
assert_eq!(one + zero, one);
assert_eq!(one + one, zero);
}
#[test]
fn mul_truth() {
let zero = Block8::ZERO;
let one = Block8::ONE;
assert_eq!(zero * zero, zero);
assert_eq!(zero * one, zero);
assert_eq!(one * one, one);
}
#[test]
fn add() {
assert_eq!(Block8(5) + Block8(3), Block8(6));
}
#[test]
fn mul_simple() {
assert_eq!(Block8(2) * Block8(2), Block8(4));
}
#[test]
fn mul_overflow() {
assert_eq!(Block8(0x57) * Block8(0x83), Block8(0xC1));
}
#[test]
fn security_zeroize() {
let mut secret_val = Block8::from(0xFF_u32);
assert_ne!(secret_val, Block8::ZERO);
secret_val.zeroize();
assert_eq!(secret_val, Block8::ZERO);
assert_eq!(secret_val.0, 0, "Block8 memory leak detected");
}
#[test]
fn inversion_exhaustive() {
for i in 0u8..=255 {
let val = Block8(i);
if val == Block8::ZERO {
assert_eq!(val.invert(), Block8::ZERO, "invert(0) must return 0");
} else {
let inv = val.invert();
let product = val * inv;
assert_eq!(
product,
Block8::ONE,
"Inversion identity failed: a * a^-1 != 1"
);
}
}
}
#[test]
fn isomorphism_roundtrip() {
let mut rng = rng();
for _ in 0..1000 {
let val = Block8::from(rng.random::<u8>());
assert_eq!(
val.to_hardware().to_tower(),
val,
"Block8 isomorphism roundtrip failed"
);
}
}
#[test]
fn parity_masks_match_from_hardware() {
for x in 0u16..=255 {
let x_flat = x as u8;
let tower = Block8::from_hardware(Flat::from_raw(Block8(x_flat))).0;
for (k, &mask) in FLAT_TO_TOWER_BIT_MASKS_8.iter().enumerate() {
let parity = ((x_flat & mask).count_ones() & 1) as u8;
let bit = (tower >> k) & 1;
assert_eq!(
parity, bit,
"Block8 mask mismatch at x={x_flat:#04x}, k={k}"
);
let via_api = Flat::from_raw(Block8(x_flat)).tower_bit(k);
assert_eq!(via_api, bit, "Block8 tower_bit_from_hardware mismatch");
}
}
}
#[test]
fn flat_mul_homomorphism() {
let mut rng = rng();
for _ in 0..1000 {
let a = Block8::from(rng.random::<u8>());
let b = Block8::from(rng.random::<u8>());
let expected_flat = (a * b).to_hardware();
let actual_flat = a.to_hardware() * b.to_hardware();
assert_eq!(
actual_flat, expected_flat,
"Block8 flat multiplication mismatch"
);
}
}
#[test]
fn packed_consistency() {
let mut rng = rng();
for _ in 0..100 {
let mut a_vals = [Block8::ZERO; 16];
let mut b_vals = [Block8::ZERO; 16];
for i in 0..16 {
a_vals[i] = Block8::from(rng.random::<u8>());
b_vals[i] = Block8::from(rng.random::<u8>());
}
let a_flat_vals = a_vals.map(|x| x.to_hardware());
let b_flat_vals = b_vals.map(|x| x.to_hardware());
let a_packed = Flat::<Block8>::pack(&a_flat_vals);
let b_packed = Flat::<Block8>::pack(&b_flat_vals);
let add_res = Block8::add_hardware_packed(a_packed, b_packed);
let mut add_out = [Block8::ZERO.to_hardware(); 16];
Flat::<Block8>::unpack(add_res, &mut add_out);
for i in 0..16 {
assert_eq!(
add_out[i],
(a_vals[i] + b_vals[i]).to_hardware(),
"Block8 packed add mismatch"
);
}
let mul_res = Block8::mul_hardware_packed(a_packed, b_packed);
let mut mul_out = [Block8::ZERO.to_hardware(); 16];
Flat::<Block8>::unpack(mul_res, &mut mul_out);
for i in 0..16 {
assert_eq!(
mul_out[i],
(a_vals[i] * b_vals[i]).to_hardware(),
"Block8 packed mul mismatch"
);
}
}
}
#[test]
fn pack_unpack_roundtrip() {
let mut rng = rng();
let mut data = [Block8::ZERO; PACKED_WIDTH_8];
for v in data.iter_mut() {
*v = Block8(rng.random());
}
let packed = Block8::pack(&data);
let mut unpacked = [Block8::ZERO; PACKED_WIDTH_8];
Block8::unpack(packed, &mut unpacked);
assert_eq!(data, unpacked, "Block8 pack/unpack roundtrip failed");
}
#[test]
fn packed_add_consistency() {
let mut rng = rng();
let mut a_vals = [Block8::ZERO; PACKED_WIDTH_8];
let mut b_vals = [Block8::ZERO; PACKED_WIDTH_8];
for i in 0..PACKED_WIDTH_8 {
a_vals[i] = Block8(rng.random());
b_vals[i] = Block8(rng.random());
}
let a_packed = Block8::pack(&a_vals);
let b_packed = Block8::pack(&b_vals);
let res_packed = a_packed + b_packed;
let mut res_unpacked = [Block8::ZERO; PACKED_WIDTH_8];
Block8::unpack(res_packed, &mut res_unpacked);
for i in 0..PACKED_WIDTH_8 {
assert_eq!(
res_unpacked[i],
a_vals[i] + b_vals[i],
"Block8 packed add mismatch at index {}",
i
);
}
}
#[test]
fn packed_mul_consistency() {
let mut rng = rng();
for _ in 0..1000 {
let mut a_arr = [Block8::ZERO; PACKED_WIDTH_8];
let mut b_arr = [Block8::ZERO; PACKED_WIDTH_8];
for i in 0..PACKED_WIDTH_8 {
let val_a: u8 = rng.random();
let val_b: u8 = rng.random();
a_arr[i] = Block8(val_a);
b_arr[i] = Block8(val_b);
}
let a_packed = PackedBlock8(a_arr);
let b_packed = PackedBlock8(b_arr);
let c_packed = a_packed * b_packed;
let mut c_expected = [Block8::ZERO; PACKED_WIDTH_8];
for i in 0..PACKED_WIDTH_8 {
c_expected[i] = a_arr[i] * b_arr[i];
}
assert_eq!(c_packed.0, c_expected, "SIMD Block8 mismatch!");
}
}
}