Skip to main content

hekate_math/towers/
block16.rs

1// SPDX-License-Identifier: Apache-2.0
2// This file is part of the hekate-math project.
3// Copyright (C) 2026 Andrei Kochergin <zeek@tuta.com>
4// Copyright (C) 2026 Oumuamua Labs. All rights reserved.
5//
6// Licensed under the Apache License, Version 2.0 (the "License");
7// you may not use this file except in compliance with the License.
8// You may obtain a copy of the License at
9//
10//     http://www.apache.org/licenses/LICENSE-2.0
11//
12// Unless required by applicable law or agreed to in writing, software
13// distributed under the License is distributed on an "AS IS" BASIS,
14// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15// See the License for the specific language governing permissions and
16// limitations under the License.
17
18//! BLOCK 16 (GF(2^16))
19use crate::towers::bit::Bit;
20use crate::towers::block8::Block8;
21use crate::{
22    CanonicalDeserialize, CanonicalSerialize, Flat, FlatPromote, HardwareField, PackableField,
23    PackedFlat, TowerField, constants,
24};
25use core::ops::{Add, AddAssign, BitXor, BitXorAssign, Mul, MulAssign, Sub, SubAssign};
26use serde::{Deserialize, Serialize};
27use zeroize::Zeroize;
28
29#[cfg(not(feature = "table-math"))]
30#[repr(align(64))]
31struct CtConvertBasisU16<const N: usize>([u16; N]);
32
33#[cfg(not(feature = "table-math"))]
34static TOWER_TO_FLAT_BASIS_16: CtConvertBasisU16<16> =
35    CtConvertBasisU16(constants::RAW_TOWER_TO_FLAT_16);
36
37#[cfg(not(feature = "table-math"))]
38static FLAT_TO_TOWER_BASIS_16: CtConvertBasisU16<16> =
39    CtConvertBasisU16(constants::RAW_FLAT_TO_TOWER_16);
40
41#[derive(Copy, Clone, Default, Debug, Eq, PartialEq, Serialize, Deserialize, Zeroize)]
42#[repr(transparent)]
43pub struct Block16(pub u16);
44
45impl Block16 {
46    pub const TAU: Self = Block16(0x2000);
47
48    pub fn new(lo: Block8, hi: Block8) -> Self {
49        Self((hi.0 as u16) << 8 | (lo.0 as u16))
50    }
51
52    #[inline(always)]
53    pub fn split(self) -> (Block8, Block8) {
54        (Block8(self.0 as u8), Block8((self.0 >> 8) as u8))
55    }
56}
57
58impl TowerField for Block16 {
59    const BITS: usize = 16;
60    const ZERO: Self = Block16(0);
61    const ONE: Self = Block16(1);
62
63    const EXTENSION_TAU: Self = Self::TAU;
64
65    fn invert(&self) -> Self {
66        let (l, h) = self.split();
67
68        // Norm = h^2 * tau + h*l + l^2
69        let h2 = h * h;
70        let l2 = l * l;
71        let hl = h * l;
72        let norm = (h2 * Block8::EXTENSION_TAU) + hl + l2;
73
74        let norm_inv = norm.invert();
75
76        // Res = (h*norm_inv) X + (h+l)*norm_inv
77        let res_hi = h * norm_inv;
78        let res_lo = (h + l) * norm_inv;
79
80        Self::new(res_lo, res_hi)
81    }
82
83    fn from_uniform_bytes(bytes: &[u8; 32]) -> Self {
84        let mut buf = [0u8; 2];
85        buf.copy_from_slice(&bytes[0..2]);
86
87        Self(u16::from_le_bytes(buf))
88    }
89}
90
91impl Add for Block16 {
92    type Output = Self;
93
94    fn add(self, rhs: Self) -> Self {
95        Self(self.0.bitxor(rhs.0))
96    }
97}
98
99impl Sub for Block16 {
100    type Output = Self;
101
102    fn sub(self, rhs: Self) -> Self {
103        Self(self.0.bitxor(rhs.0))
104    }
105}
106
107impl Mul for Block16 {
108    type Output = Self;
109
110    fn mul(self, rhs: Self) -> Self {
111        let (a0, a1) = self.split();
112        let (b0, b1) = rhs.split();
113
114        // Karatsuba
115        let v0 = a0 * b0;
116        let v1 = a1 * b1;
117        let v_sum = (a0 + a1) * (b0 + b1);
118
119        // Reconstruction with reduction X^2 = X + tau
120        // Hi
121        let c_hi = v0 + v_sum;
122
123        // Lo
124        let c_lo = v0 + (v1 * Block8::EXTENSION_TAU);
125
126        Self::new(c_lo, c_hi)
127    }
128}
129
130impl AddAssign for Block16 {
131    fn add_assign(&mut self, rhs: Self) {
132        self.0.bitxor_assign(rhs.0);
133    }
134}
135
136impl SubAssign for Block16 {
137    fn sub_assign(&mut self, rhs: Self) {
138        self.0.bitxor_assign(rhs.0);
139    }
140}
141
142impl MulAssign for Block16 {
143    fn mul_assign(&mut self, rhs: Self) {
144        *self = *self * rhs;
145    }
146}
147
148impl CanonicalSerialize for Block16 {
149    fn serialized_size(&self) -> usize {
150        2
151    }
152
153    fn serialize(&self, writer: &mut [u8]) -> Result<(), ()> {
154        if writer.len() < 2 {
155            return Err(());
156        }
157
158        writer.copy_from_slice(&self.0.to_le_bytes());
159
160        Ok(())
161    }
162}
163
164impl CanonicalDeserialize for Block16 {
165    fn deserialize(bytes: &[u8]) -> Result<Self, ()> {
166        if bytes.len() < 2 {
167            return Err(());
168        }
169
170        let mut buf = [0u8; 2];
171        buf.copy_from_slice(&bytes[0..2]);
172
173        Ok(Self(u16::from_le_bytes(buf)))
174    }
175}
176
177impl From<u8> for Block16 {
178    fn from(val: u8) -> Self {
179        Self(val as u16)
180    }
181}
182
183impl From<u16> for Block16 {
184    #[inline]
185    fn from(val: u16) -> Self {
186        Self(val)
187    }
188}
189
190impl From<u32> for Block16 {
191    #[inline]
192    fn from(val: u32) -> Self {
193        Self(val as u16)
194    }
195}
196
197impl From<u64> for Block16 {
198    #[inline]
199    fn from(val: u64) -> Self {
200        Self(val as u16)
201    }
202}
203
204impl From<u128> for Block16 {
205    #[inline]
206    fn from(val: u128) -> Self {
207        Self(val as u16)
208    }
209}
210
211// ========================================
212// FIELD LIFTING
213// ========================================
214
215impl From<Bit> for Block16 {
216    #[inline(always)]
217    fn from(val: Bit) -> Self {
218        Self(val.0 as u16)
219    }
220}
221
222impl From<Block8> for Block16 {
223    #[inline(always)]
224    fn from(val: Block8) -> Self {
225        Self(val.0 as u16)
226    }
227}
228
229// ===================================
230// PACKED BLOCK 16 (Width = 8)
231// ===================================
232
233// 128 bits / 16 = 8 elements
234pub const PACKED_WIDTH_16: usize = 8;
235
236#[derive(Clone, Copy, Debug, PartialEq, Eq, Default)]
237#[repr(C, align(16))]
238pub struct PackedBlock16(pub [Block16; PACKED_WIDTH_16]);
239
240impl PackedBlock16 {
241    #[inline(always)]
242    pub fn zero() -> Self {
243        Self([Block16::ZERO; PACKED_WIDTH_16])
244    }
245}
246
247impl PackableField for Block16 {
248    type Packed = PackedBlock16;
249
250    const WIDTH: usize = PACKED_WIDTH_16;
251
252    #[inline(always)]
253    fn pack(chunk: &[Self]) -> Self::Packed {
254        assert!(
255            chunk.len() >= PACKED_WIDTH_16,
256            "PackableField::pack: input slice too short",
257        );
258
259        let mut arr = [Self::ZERO; PACKED_WIDTH_16];
260        arr.copy_from_slice(&chunk[..PACKED_WIDTH_16]);
261
262        PackedBlock16(arr)
263    }
264
265    #[inline(always)]
266    fn unpack(packed: Self::Packed, output: &mut [Self]) {
267        assert!(
268            output.len() >= PACKED_WIDTH_16,
269            "PackableField::unpack: output slice too short",
270        );
271
272        output[..PACKED_WIDTH_16].copy_from_slice(&packed.0);
273    }
274}
275
276impl Add for PackedBlock16 {
277    type Output = Self;
278
279    #[inline(always)]
280    fn add(self, rhs: Self) -> Self {
281        let mut res = [Block16::ZERO; PACKED_WIDTH_16];
282        for ((out, l), r) in res.iter_mut().zip(self.0.iter()).zip(rhs.0.iter()) {
283            *out = *l + *r;
284        }
285
286        Self(res)
287    }
288}
289
290impl AddAssign for PackedBlock16 {
291    #[inline(always)]
292    fn add_assign(&mut self, rhs: Self) {
293        for (l, r) in self.0.iter_mut().zip(rhs.0.iter()) {
294            *l += *r;
295        }
296    }
297}
298
299impl Sub for PackedBlock16 {
300    type Output = Self;
301
302    #[inline(always)]
303    fn sub(self, rhs: Self) -> Self {
304        self.add(rhs)
305    }
306}
307
308impl SubAssign for PackedBlock16 {
309    #[inline(always)]
310    fn sub_assign(&mut self, rhs: Self) {
311        self.add_assign(rhs);
312    }
313}
314
315impl Mul for PackedBlock16 {
316    type Output = Self;
317
318    #[inline(always)]
319    fn mul(self, rhs: Self) -> Self {
320        #[cfg(target_arch = "aarch64")]
321        {
322            let mut res = [Block16::ZERO; PACKED_WIDTH_16];
323            for ((out, l), r) in res.iter_mut().zip(self.0.iter()).zip(rhs.0.iter()) {
324                *out = mul_iso_16(*l, *r);
325            }
326
327            Self(res)
328        }
329
330        #[cfg(not(target_arch = "aarch64"))]
331        {
332            let mut res = [Block16::ZERO; PACKED_WIDTH_16];
333            for ((out, l), r) in res.iter_mut().zip(self.0.iter()).zip(rhs.0.iter()) {
334                *out = *l * *r;
335            }
336
337            Self(res)
338        }
339    }
340}
341
342impl MulAssign for PackedBlock16 {
343    #[inline(always)]
344    fn mul_assign(&mut self, rhs: Self) {
345        *self = *self * rhs;
346    }
347}
348
349impl Mul<Block16> for PackedBlock16 {
350    type Output = Self;
351
352    #[inline(always)]
353    fn mul(self, rhs: Block16) -> Self {
354        let mut res = [Block16::ZERO; PACKED_WIDTH_16];
355        for (out, v) in res.iter_mut().zip(self.0.iter()) {
356            *out = *v * rhs;
357        }
358
359        Self(res)
360    }
361}
362
363// ===================================
364// Hardware Field
365// ===================================
366
367impl HardwareField for Block16 {
368    #[inline(always)]
369    fn to_hardware(self) -> Flat<Self> {
370        #[cfg(feature = "table-math")]
371        {
372            Flat::from_raw(apply_matrix_16(self, &constants::TOWER_TO_FLAT_16))
373        }
374
375        #[cfg(not(feature = "table-math"))]
376        {
377            Flat::from_raw(Block16(map_ct_16(self.0, &TOWER_TO_FLAT_BASIS_16.0)))
378        }
379    }
380
381    #[inline(always)]
382    fn from_hardware(value: Flat<Self>) -> Self {
383        let value = value.into_raw();
384
385        #[cfg(feature = "table-math")]
386        {
387            apply_matrix_16(value, &constants::FLAT_TO_TOWER_16)
388        }
389
390        #[cfg(not(feature = "table-math"))]
391        {
392            Block16(map_ct_16(value.0, &FLAT_TO_TOWER_BASIS_16.0))
393        }
394    }
395
396    #[inline(always)]
397    fn add_hardware(lhs: Flat<Self>, rhs: Flat<Self>) -> Flat<Self> {
398        Flat::from_raw(lhs.into_raw() + rhs.into_raw())
399    }
400
401    #[inline(always)]
402    fn add_hardware_packed(lhs: PackedFlat<Self>, rhs: PackedFlat<Self>) -> PackedFlat<Self> {
403        let lhs = lhs.into_raw();
404        let rhs = rhs.into_raw();
405
406        #[cfg(target_arch = "aarch64")]
407        {
408            PackedFlat::from_raw(neon::add_packed_16(lhs, rhs))
409        }
410
411        #[cfg(not(target_arch = "aarch64"))]
412        {
413            PackedFlat::from_raw(lhs + rhs)
414        }
415    }
416
417    #[inline(always)]
418    fn mul_hardware(lhs: Flat<Self>, rhs: Flat<Self>) -> Flat<Self> {
419        let lhs = lhs.into_raw();
420        let rhs = rhs.into_raw();
421
422        #[cfg(target_arch = "aarch64")]
423        {
424            Flat::from_raw(neon::mul_flat_16(lhs, rhs))
425        }
426
427        #[cfg(not(target_arch = "aarch64"))]
428        {
429            let a_tower = Self::from_hardware(Flat::from_raw(lhs));
430            let b_tower = Self::from_hardware(Flat::from_raw(rhs));
431
432            (a_tower * b_tower).to_hardware()
433        }
434    }
435
436    #[inline(always)]
437    fn mul_hardware_packed(lhs: PackedFlat<Self>, rhs: PackedFlat<Self>) -> PackedFlat<Self> {
438        let lhs = lhs.into_raw();
439        let rhs = rhs.into_raw();
440
441        #[cfg(target_arch = "aarch64")]
442        {
443            PackedFlat::from_raw(neon::mul_flat_packed_16(lhs, rhs))
444        }
445
446        #[cfg(not(target_arch = "aarch64"))]
447        {
448            let mut l = [Self::ZERO; <Self as PackableField>::WIDTH];
449            let mut r = [Self::ZERO; <Self as PackableField>::WIDTH];
450            let mut res = [Self::ZERO; <Self as PackableField>::WIDTH];
451
452            Self::unpack(lhs, &mut l);
453            Self::unpack(rhs, &mut r);
454
455            for i in 0..<Self as PackableField>::WIDTH {
456                res[i] = Self::mul_hardware(Flat::from_raw(l[i]), Flat::from_raw(r[i])).into_raw();
457            }
458
459            PackedFlat::from_raw(Self::pack(&res))
460        }
461    }
462
463    #[inline(always)]
464    fn mul_hardware_scalar_packed(lhs: PackedFlat<Self>, rhs: Flat<Self>) -> PackedFlat<Self> {
465        let broadcasted = PackedBlock16([rhs.into_raw(); PACKED_WIDTH_16]);
466        Self::mul_hardware_packed(lhs, PackedFlat::from_raw(broadcasted))
467    }
468
469    #[inline(always)]
470    fn tower_bit_from_hardware(value: Flat<Self>, bit_idx: usize) -> u8 {
471        let mask = constants::FLAT_TO_TOWER_BIT_MASKS_16[bit_idx];
472
473        // Parity of (x & mask) without
474        // popcount. Folds 16 bits down
475        // to 1 using a binary XOR tree.
476        let mut v = value.into_raw().0 & mask;
477        v ^= v >> 8;
478        v ^= v >> 4;
479        v ^= v >> 2;
480        v ^= v >> 1;
481
482        (v & 1) as u8
483    }
484}
485
486impl FlatPromote<Block8> for Block16 {
487    #[inline(always)]
488    fn promote_flat(val: Flat<Block8>) -> Flat<Self> {
489        let val = val.into_raw();
490
491        #[cfg(not(feature = "table-math"))]
492        {
493            let mut acc = 0u16;
494            for i in 0..8 {
495                let bit = (val.0 >> i) & 1;
496                let mask = 0u16.wrapping_sub(bit as u16);
497                acc ^= constants::LIFT_BASIS_8_TO_16[i] & mask;
498            }
499
500            Flat::from_raw(Block16(acc))
501        }
502
503        #[cfg(feature = "table-math")]
504        {
505            Flat::from_raw(Block16(constants::LIFT_TABLE_8_TO_16[val.0 as usize]))
506        }
507    }
508}
509
510// ===========================================
511// UTILS
512// ===========================================
513
514#[cfg(target_arch = "aarch64")]
515#[inline(always)]
516pub fn mul_iso_16(a: Block16, b: Block16) -> Block16 {
517    let a_f = a.to_hardware();
518    let b_f = b.to_hardware();
519    let c_f = Flat::from_raw(neon::mul_flat_16(a_f.into_raw(), b_f.into_raw()));
520
521    c_f.to_tower()
522}
523
524#[cfg(feature = "table-math")]
525#[inline(always)]
526pub fn apply_matrix_16(val: Block16, table: &[u16; 512]) -> Block16 {
527    let v = val.0;
528    let mut res = 0u16;
529
530    // 2 lookups (8-bit window)
531    for i in 0..2 {
532        let idx = (i * 256) + ((v >> (i * 8)) & 0xFF) as usize;
533        res ^= unsafe { *table.get_unchecked(idx) };
534    }
535
536    Block16(res)
537}
538
539#[cfg(not(feature = "table-math"))]
540#[inline(always)]
541fn map_ct_16(x: u16, basis: &[u16; 16]) -> u16 {
542    let mut acc = 0u16;
543    let mut i = 0usize;
544
545    while i < 16 {
546        let bit = (x >> i) & 1;
547        let mask = 0u16.wrapping_sub(bit);
548        acc ^= basis[i] & mask;
549        i += 1;
550    }
551
552    acc
553}
554
555// ===========================================
556// SIMD INSTRUCTIONS
557// ===========================================
558
559#[cfg(target_arch = "aarch64")]
560mod neon {
561    use super::*;
562    use core::arch::aarch64::*;
563    use core::mem::transmute;
564
565    #[inline(always)]
566    pub fn add_packed_16(lhs: PackedBlock16, rhs: PackedBlock16) -> PackedBlock16 {
567        unsafe {
568            let res = veorq_u8(
569                transmute::<[Block16; 8], uint8x16_t>(lhs.0),
570                transmute::<[Block16; 8], uint8x16_t>(rhs.0),
571            );
572            transmute(res)
573        }
574    }
575
576    #[inline(always)]
577    pub fn mul_flat_16(a: Block16, b: Block16) -> Block16 {
578        unsafe {
579            // Note: Using 64-bit PMULL for 16-bit blocks
580            // is optimal on Apple Silicon. The pipeline
581            // parallelism of scalar `vmull_p64` outperforms
582            // complex SIMD Karatsuba.
583            let prod = vmull_p64(a.0 as u64, b.0 as u64);
584            let prod_val = vgetq_lane_u64(transmute::<u128, uint64x2_t>(prod), 0);
585
586            let l = (prod_val & 0xFFFF) as u16;
587            let h = (prod_val >> 16) as u16; // The rest fits in u16 for 16x16
588
589            // P(x) = x^16 + R
590            let r_val = constants::POLY_16 as u64;
591
592            // h * R
593            let h_red = vmull_p64(h as u64, r_val);
594            let h_red_val = vgetq_lane_u64(transmute::<u128, uint64x2_t>(h_red), 0);
595
596            // Result of h*R fits in 32 bits max (16+16).
597            // It's x^16 * H = H * R.
598            // res = L ^ (H*R)
599            // Since H*R > 16 bits, we have carry.
600
601            let folded = (h_red_val & 0xFFFF) as u16;
602            let carry = (h_red_val >> 16) as u16;
603
604            let mut res = l ^ folded;
605
606            // Unconditional reduction
607            // ensures constant-time.
608            let c_red = vmull_p64(carry as u64, r_val);
609            let c_val = vgetq_lane_u64(transmute::<u128, uint64x2_t>(c_red), 0);
610
611            res ^= c_val as u16;
612
613            Block16(res)
614        }
615    }
616
617    /// Vectorized multiplication
618    /// for Block16 (8 elements at once).
619    /// Uses Vector Karatsuba +
620    /// Shift-XOR Reduction for 0x2B.
621    #[inline(always)]
622    pub fn mul_flat_packed_16(lhs: PackedBlock16, rhs: PackedBlock16) -> PackedBlock16 {
623        let r0 = mul_flat_16(lhs.0[0], rhs.0[0]);
624        let r1 = mul_flat_16(lhs.0[1], rhs.0[1]);
625        let r2 = mul_flat_16(lhs.0[2], rhs.0[2]);
626        let r3 = mul_flat_16(lhs.0[3], rhs.0[3]);
627        let r4 = mul_flat_16(lhs.0[4], rhs.0[4]);
628        let r5 = mul_flat_16(lhs.0[5], rhs.0[5]);
629        let r6 = mul_flat_16(lhs.0[6], rhs.0[6]);
630        let r7 = mul_flat_16(lhs.0[7], rhs.0[7]);
631
632        PackedBlock16([r0, r1, r2, r3, r4, r5, r6, r7])
633    }
634}
635
636#[cfg(test)]
637mod tests {
638    use super::*;
639    use rand::{RngExt, rng};
640
641    // ==================================
642    // BASIC
643    // ==================================
644
645    #[test]
646    fn tower_constants() {
647        // Check that tau is propagated correctly
648        // For Block16, tau must be (0, 1) from Block8.
649        let tau16 = Block16::EXTENSION_TAU;
650        let (lo16, hi16) = tau16.split();
651        assert_eq!(lo16, Block8::ZERO);
652        assert_eq!(hi16, Block8(0x20));
653    }
654
655    #[test]
656    fn add_truth() {
657        let zero = Block16::ZERO;
658        let one = Block16::ONE;
659
660        assert_eq!(zero + zero, zero);
661        assert_eq!(zero + one, one);
662        assert_eq!(one + zero, one);
663        assert_eq!(one + one, zero);
664    }
665
666    #[test]
667    fn mul_truth() {
668        let zero = Block16::ZERO;
669        let one = Block16::ONE;
670
671        assert_eq!(zero * zero, zero);
672        assert_eq!(zero * one, zero);
673        assert_eq!(one * one, one);
674    }
675
676    #[test]
677    fn add() {
678        // 5 ^ 3 = 6
679        // 101 ^ 011 = 110
680        assert_eq!(Block16(5) + Block16(3), Block16(6));
681    }
682
683    #[test]
684    fn mul_simple() {
685        // Check for prime numbers (without overflow)
686        // x^1 * x^1 = x^2 (2 * 2 = 4)
687        assert_eq!(Block16(2) * Block16(2), Block16(4));
688    }
689
690    #[test]
691    fn mul_overflow() {
692        // Reduction verification (AES test vectors)
693        // Example from the AES specification:
694        // 0x57 * 0x83 = 0xC1
695        assert_eq!(Block16(0x57) * Block16(0x83), Block16(0xC1));
696    }
697
698    #[test]
699    fn karatsuba_correctness() {
700        // Let A = X (hi=1, lo=0)
701        // Let B = X (hi=1, lo=0)
702        // A * B = X^2
703        // According to the rule:
704        // X^2 = X + tau
705        // Where tau for Block8 = 0x20.
706        // So the result should be:
707        // hi=1 (X), lo=0x20 (tau)
708
709        // Construct X manually
710        let x = Block16::new(Block8::ZERO, Block8::ONE);
711        let squared = x * x;
712
713        // Verify result via splitting
714        let (res_lo, res_hi) = squared.split();
715
716        assert_eq!(res_hi, Block8::ONE, "X^2 should contain X component");
717        assert_eq!(
718            res_lo,
719            Block8(0x20),
720            "X^2 should contain tau component (0x20)"
721        );
722    }
723
724    #[test]
725    fn security_zeroize() {
726        let mut secret_val = Block16::from(0xDEAD_u16);
727        assert_ne!(secret_val, Block16::ZERO);
728
729        secret_val.zeroize();
730
731        assert_eq!(secret_val, Block16::ZERO);
732        assert_eq!(secret_val.0, 0, "Block16 memory leak detected");
733    }
734
735    #[test]
736    fn invert_zero() {
737        // Critical safety check:
738        // Inverting zero must return 0 by convention.
739        assert_eq!(
740            Block16::ZERO.invert(),
741            Block16::ZERO,
742            "invert(0) must return 0"
743        );
744    }
745
746    #[test]
747    fn inversion_random() {
748        let mut rng = rng();
749
750        // Test a significant number of random elements
751        for _ in 0..1000 {
752            let val_u16: u16 = rng.random();
753            let val = Block16(val_u16);
754
755            if val != Block16::ZERO {
756                let inv = val.invert();
757                let res = val * inv;
758
759                assert_eq!(
760                    res,
761                    Block16::ONE,
762                    "Inversion identity failed: a * a^-1 != 1"
763                );
764            }
765        }
766    }
767
768    #[test]
769    fn tower_embedding() {
770        let mut rng = rng();
771        for _ in 0..100 {
772            let a = Block8(rng.random());
773            let b = Block8(rng.random());
774
775            // 1. Structure check:
776            // Lifting puts value in low part,
777            // zero in high part Subfield element
778            // 'a' inside extension must look like (a, 0)
779            let a_lifted: Block16 = a.into();
780            let (lo, hi) = a_lifted.split();
781
782            assert_eq!(lo, a, "Embedding structure failed: low part mismatch");
783            assert_eq!(
784                hi,
785                Block8::ZERO,
786                "Embedding structure failed: high part must be zero"
787            );
788
789            // 2. Addition Homomorphism:
790            // lift(a + b) == lift(a) + lift(b)
791            let sum_sub = a + b;
792            let sum_lifted: Block16 = sum_sub.into();
793            let sum_manual = Block16::from(a) + Block16::from(b);
794
795            assert_eq!(sum_lifted, sum_manual, "Homomorphism failed: add");
796
797            // 3. Multiplication Homomorphism:
798            // lift(a * b) == lift(a) * lift(b)
799            // Operations in the subfield must
800            // match operations in the superfield.
801            let prod_sub = a * b;
802            let prod_lifted: Block16 = prod_sub.into();
803            let prod_manual = Block16::from(a) * Block16::from(b);
804
805            assert_eq!(prod_lifted, prod_manual, "Homomorphism failed: mul");
806        }
807    }
808
809    // ==================================
810    // HARDWARE
811    // ==================================
812
813    #[test]
814    fn isomorphism_roundtrip() {
815        let mut rng = rng();
816        for _ in 0..1000 {
817            let val = Block16(rng.random::<u16>());
818            assert_eq!(
819                val.to_hardware().to_tower(),
820                val,
821                "Block16 isomorphism roundtrip failed"
822            );
823        }
824    }
825
826    #[test]
827    fn flat_mul_homomorphism() {
828        let mut rng = rng();
829        for _ in 0..1000 {
830            let a = Block16(rng.random::<u16>());
831            let b = Block16(rng.random::<u16>());
832
833            let expected_flat = (a * b).to_hardware();
834            let actual_flat = a.to_hardware() * b.to_hardware();
835
836            assert_eq!(
837                actual_flat, expected_flat,
838                "Block16 flat multiplication mismatch"
839            );
840        }
841    }
842
843    #[test]
844    fn packed_consistency() {
845        let mut rng = rng();
846        for _ in 0..100 {
847            let mut a_vals = [Block16::ZERO; 8];
848            let mut b_vals = [Block16::ZERO; 8];
849
850            for i in 0..8 {
851                a_vals[i] = Block16(rng.random::<u16>());
852                b_vals[i] = Block16(rng.random::<u16>());
853            }
854
855            let a_flat_vals = a_vals.map(|x| x.to_hardware());
856            let b_flat_vals = b_vals.map(|x| x.to_hardware());
857            let a_packed = Flat::<Block16>::pack(&a_flat_vals);
858            let b_packed = Flat::<Block16>::pack(&b_flat_vals);
859
860            // Test SIMD Add
861            let add_res = Block16::add_hardware_packed(a_packed, b_packed);
862
863            let mut add_out = [Block16::ZERO.to_hardware(); 8];
864            Flat::<Block16>::unpack(add_res, &mut add_out);
865
866            for i in 0..8 {
867                assert_eq!(
868                    add_out[i],
869                    (a_vals[i] + b_vals[i]).to_hardware(),
870                    "Block16 packed add mismatch"
871                );
872            }
873
874            // Test SIMD Mul
875            let mul_res = Block16::mul_hardware_packed(a_packed, b_packed);
876
877            let mut mul_out = [Block16::ZERO.to_hardware(); 8];
878            Flat::<Block16>::unpack(mul_res, &mut mul_out);
879
880            for i in 0..8 {
881                assert_eq!(
882                    mul_out[i],
883                    (a_vals[i] * b_vals[i]).to_hardware(),
884                    "Block16 packed mul mismatch"
885                );
886            }
887        }
888    }
889
890    // ==================================
891    // PACKED
892    // ==================================
893
894    #[test]
895    fn pack_unpack_roundtrip() {
896        let mut rng = rng();
897        let mut data = [Block16::ZERO; PACKED_WIDTH_16];
898
899        for v in data.iter_mut() {
900            *v = Block16(rng.random());
901        }
902
903        let packed = Block16::pack(&data);
904        let mut unpacked = [Block16::ZERO; PACKED_WIDTH_16];
905        Block16::unpack(packed, &mut unpacked);
906
907        assert_eq!(data, unpacked, "Block16 pack/unpack roundtrip failed");
908    }
909
910    #[test]
911    fn packed_add_consistency() {
912        let mut rng = rng();
913        let mut a_vals = [Block16::ZERO; PACKED_WIDTH_16];
914        let mut b_vals = [Block16::ZERO; PACKED_WIDTH_16];
915
916        for i in 0..PACKED_WIDTH_16 {
917            a_vals[i] = Block16(rng.random());
918            b_vals[i] = Block16(rng.random());
919        }
920
921        let res_packed = Block16::pack(&a_vals) + Block16::pack(&b_vals);
922        let mut res_unpacked = [Block16::ZERO; PACKED_WIDTH_16];
923        Block16::unpack(res_packed, &mut res_unpacked);
924
925        for i in 0..PACKED_WIDTH_16 {
926            assert_eq!(
927                res_unpacked[i],
928                a_vals[i] + b_vals[i],
929                "Block16 packed add mismatch"
930            );
931        }
932    }
933
934    #[test]
935    fn packed_mul_consistency() {
936        let mut rng = rng();
937
938        for _ in 0..1000 {
939            let mut a_arr = [Block16::ZERO; PACKED_WIDTH_16];
940            let mut b_arr = [Block16::ZERO; PACKED_WIDTH_16];
941
942            for i in 0..PACKED_WIDTH_16 {
943                let val_a_u16: u16 = rng.random();
944                let val_b_u16: u16 = rng.random();
945
946                a_arr[i] = Block16(val_a_u16);
947                b_arr[i] = Block16(val_b_u16);
948            }
949
950            let a_packed = PackedBlock16(a_arr);
951            let b_packed = PackedBlock16(b_arr);
952            let c_packed = a_packed * b_packed;
953
954            let mut c_expected = [Block16::ZERO; PACKED_WIDTH_16];
955            for i in 0..PACKED_WIDTH_16 {
956                c_expected[i] = a_arr[i] * b_arr[i];
957            }
958
959            assert_eq!(c_packed.0, c_expected, "SIMD Block16 mismatch!");
960        }
961    }
962
963    #[test]
964    fn parity_masks_match_from_hardware() {
965        // Exhaustive for Block16:
966        // 65536 values * 16 bits.
967        for x_flat in 0u16..=u16::MAX {
968            let tower = Block16::from_hardware(Flat::from_raw(Block16(x_flat))).0;
969
970            for k in 0..16 {
971                let bit = ((tower >> k) & 1) as u8;
972                let via_api = Flat::from_raw(Block16(x_flat)).tower_bit(k);
973
974                assert_eq!(
975                    via_api, bit,
976                    "Block16 tower_bit_from_hardware mismatch at x_flat={x_flat:#06x}, bit_idx={k}"
977                );
978            }
979        }
980    }
981}