Skip to main content

vaea_ntt/ntt32/
mod.rs

1// Copyright (C) 2024-2026 Vaea SAS
2// SPDX-License-Identifier: AGPL-3.0-or-later
3//
4// This file is part of VaeaNTT.
5//
6// VaeaNTT is free software: you can redistribute it and/or modify it under
7// the terms of the GNU Affero General Public License as published by the
8// Free Software Foundation, either version 3 of the License, or (at your
9// option) any later version.
10//
11// VaeaNTT is distributed in the hope that it will be useful, but WITHOUT
12// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
14// License for more details.
15//
16// You should have received a copy of the GNU Affero General Public License
17// along with VaeaNTT. If not, see <https://www.gnu.org/licenses/>.
18
19//! # ntt32 — 28-bit NTT Pipeline
20//!
21//! High-performance Number Theoretic Transform for primes < 2^28.
22//!
23//! ## Architecture
24//!
25//! | Module     | Description |
26//! |------------|-------------|
27//! | `arith`    | Branchless modular arithmetic (add, sub, mul, pow, inv) |
28//! | `prime`    | NTT-friendly prime generation and primitive root finding |
29//! | `scalar`   | Scalar NTT with Shoup trick + Harvey lazy butterfly |
30//! | `neon`     | NEON-vectorized NTT (aarch64 only, all stages) |
31//! | `context`  | Unified `Ntt32Context` with automatic NEON/scalar dispatch |
32//!
33//! ## Quick Start
34//!
35//! ```
36//! use vaea_ntt::ntt32::{Ntt32Context, generate_primes_28};
37//!
38//! let primes = generate_primes_28(1024, 1);
39//! let ctx = Ntt32Context::new(1024, primes[0]);
40//!
41//! let a = vec![1u32; 1024];
42//! let b = vec![2u32; 1024];
43//! let product = ctx.negacyclic_mul(&a, &b);
44//! assert_eq!(product.len(), 1024);
45//! ```
46//!
47//! ## Performance
48//!
49//! Measured on Apple M3 Pro (single core):
50//!
51//! | Operation              | N = 256 | Throughput               |
52//! |------------------------|---------|--------------------------|
53//! | Forward NTT            | 234 ns  | 1.09 billion coeff/s     |
54//! | Negacyclic multiply    | 1.08 µs | —                        |
55//!
56//! ## Security
57//!
58//! All operations are **constant-time**:
59//!
60//! - No data-dependent branches in any arithmetic or butterfly routine.
61//! - Modular reductions use branchless wrapping arithmetic
62//!   ([`u32::wrapping_add`] / [`u32::wrapping_sub`]).
63//! - Safe to use on secret polynomial coefficients (e.g. ML-DSA keys).
64//!
65//! ## Primes
66//!
67//! NTT-friendly primes must satisfy two constraints:
68//!
69//! 1. **Bit-width**: `q < 2^28` (required by the Shoup / Harvey reduction).
70//! 2. **Divisibility**: `q ≡ 1 (mod 2N)` so that a principal 2N-th root
71//!    of unity exists in `Z_q`.
72//!
73//! Use [`generate_primes_28`] to find valid primes for a given `N`:
74//!
75//! ```
76//! use vaea_ntt::ntt32::generate_primes_28;
77//!
78//! let primes = generate_primes_28(256, 3);
79//! assert_eq!(primes.len(), 3);
80//! for &q in &primes {
81//!     assert!(q < (1 << 28));
82//!     assert_eq!(q % (2 * 256), 1);
83//! }
84//! ```
85//!
86//! The ML-DSA (Dilithium) prime **8 380 417** is NTT-friendly for `N = 256`
87//! (`8_380_417 % 512 == 1`).
88//!
89//! ## Forward / Inverse Roundtrip
90//!
91//! ```
92//! use vaea_ntt::ntt32::Ntt32Context;
93//!
94//! let ctx = Ntt32Context::new(256, 8_380_417);
95//! let mut data = vec![0u32; 256];
96//! data[0] = 1;
97//! data[1] = 2;
98//! let original = data.clone();
99//!
100//! ctx.forward(&mut data);
101//! // data is now in NTT domain
102//! assert_ne!(data, original);
103//!
104//! ctx.inverse(&mut data);
105//! // roundtrip: data restored
106//! assert_eq!(data, original);
107//! ```
108
109pub mod arith;
110pub mod context;
111#[cfg(target_arch = "aarch64")]
112pub mod neon;
113pub mod prime;
114pub mod scalar;
115
116// Re-exports for convenience
117pub use arith::{mod_add_28, mod_inv_32, mod_mul_28, mod_pow_32, mod_sub_28};
118pub use context::Ntt32Context;
119pub use prime::generate_primes_28;
120pub use prime::{find_primitive_root, is_prime_32};
121pub use scalar::{compute_shoup, shoup_mul};