Skip to main content

vaea_ntt/
lib.rs

1// Copyright (C) 2024-2026 Vaea SAS
2// SPDX-License-Identifier: AGPL-3.0-or-later
3//
4// This file is part of VaeaNTT.
5//
6// VaeaNTT is free software: you can redistribute it and/or modify it under
7// the terms of the GNU Affero General Public License as published by the
8// Free Software Foundation, either version 3 of the License, or (at your
9// option) any later version.
10//
11// VaeaNTT is distributed in the hope that it will be useful, but WITHOUT
12// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
14// License for more details.
15//
16// You should have received a copy of the GNU Affero General Public License
17// along with VaeaNTT. If not, see <https://www.gnu.org/licenses/>.
18
19//! # VaeaNTT — High-Performance Number Theoretic Transforms
20//!
21//! **VaeaNTT** is a production-grade NTT engine for lattice-based cryptography
22//! and Fully Homomorphic Encryption (FHE), optimized for ARM NEON (aarch64)
23//! with a portable scalar fallback.
24//!
25//! ## What is NTT and why does it matter?
26//!
27//! The **Number Theoretic Transform** is the finite-field analogue of the FFT.
28//! It maps a polynomial from coefficient representation to evaluation
29//! representation in O(N log N) over Z_q, where q is a prime modulus. In
30//! the evaluation domain, polynomial multiplication becomes pointwise — O(N)
31//! instead of the naïve O(N²). This makes NTT the critical hot-path in:
32//!
33//! - **Post-quantum cryptography** — ML-DSA (formerly Dilithium)
34//!   and other NIST lattice standards multiply polynomials in the ring
35//!   Z_q\[X\]/(X^N+1) via NTT.
36//! - **Fully Homomorphic Encryption (FHE)** — CKKS, BFV, and BGV schemes
37//!   (SEAL, OpenFHE) rely on multi-prime RNS-NTT with 60–62 bit primes.
38//!
39//! VaeaNTT covers both use-cases with a single engine: 28-bit NTT for
40//! post-quantum signatures and 64-bit NTT for FHE workloads.
41//!
42//! ## Quick Start
43//!
44//! ```
45//! use vaea_ntt::ntt32::Ntt32Context;
46//!
47//! // Any NTT-friendly prime < 2^28
48//! let ctx = Ntt32Context::new(256, 8_380_417);
49//!
50//! let mut data = vec![42u32; 256];
51//! ctx.forward(&mut data);
52//! ctx.inverse(&mut data);
53//! assert!(data.iter().all(|&x| x == 42));
54//! ```
55//!
56//! ## Architecture Overview
57//!
58//! | Module | Pipeline | Description |
59//! |--------|----------|-------------|
60//! | [`ntt32`] | 28-bit primes (< 2²⁸) | ARM NEON vectorized butterfly (Shoup + Harvey lazy reduction). Scalar fallback on non-aarch64 targets. |
61//! | [`ntt64`] | 60–62 bit primes | Barrett reduction. SEAL/OpenFHE-compatible. Cooley-Tukey forward, Gentleman-Sande inverse. |
62//! | [`poly`] | Polynomial ring | Polynomials over Z_q\[X\]/(X^N+1) with 64-bit coefficients. Tracks coefficient/NTT domain. |
63//! | [`rns`] | Multi-prime CRT | Residue Number System decomposition for FHE. Component-wise NTT on each limb. |
64//! | [`pq`] | NIST presets | One-line constructors for ML-DSA-44/65/87. |
65//!
66//! ## Negacyclic Polynomial Multiplication (ntt32)
67//!
68//! Multiply two polynomials in Z_q\[X\]/(X^N+1) using the 28-bit pipeline:
69//!
70//! ```
71//! use vaea_ntt::ntt32::{Ntt32Context, generate_primes_28};
72//!
73//! // Generate an NTT-friendly prime for N = 256
74//! let primes = generate_primes_28(256, 1);
75//! let q = primes[0];
76//! let ctx = Ntt32Context::new(256, q);
77//!
78//! // a(X) = 1 + 2X
79//! let mut a = vec![0u32; 256];
80//! a[0] = 1;
81//! a[1] = 2;
82//!
83//! // b(X) = 3 + X
84//! let mut b = vec![0u32; 256];
85//! b[0] = 3;
86//! b[1] = 1;
87//!
88//! // c(X) = a(X) · b(X) mod (X^256 + 1, q) = 3 + 7X + 2X²
89//! let c = ctx.negacyclic_mul(&a, &b);
90//! assert_eq!(c[0], 3);
91//! assert_eq!(c[1], 7);
92//! assert_eq!(c[2], 2);
93//! for i in 3..256 {
94//!     assert_eq!(c[i], 0);
95//! }
96//! ```
97//!
98//! ## 64-bit Pipeline (FHE)
99//!
100//! For FHE workloads requiring 60–62 bit primes, use [`ntt64::Ntt64Arith`]
101//! and [`ntt64::Ntt64Context`]:
102//!
103//! ```
104//! use vaea_ntt::ntt64::{Ntt64Arith, Ntt64Context, generate_primes_60};
105//!
106//! // Generate a 60-bit NTT-friendly prime for N = 4096
107//! let primes = generate_primes_60(4096, 60, 1);
108//! let arith = Ntt64Arith::new(primes[0]);
109//! let ctx = Ntt64Context::new(4096, arith);
110//!
111//! // Forward + inverse roundtrip
112//! let mut data = vec![0u64; 4096];
113//! data[0] = 42;
114//! data[1] = 100;
115//! let original = data.clone();
116//!
117//! ctx.forward(&mut data);
118//! assert_ne!(data, original); // now in NTT domain
119//! ctx.inverse(&mut data);
120//! assert_eq!(data, original); // roundtrip restored
121//! ```
122//!
123//! ## Post-Quantum Presets
124//!
125//! Zero-configuration NTT for NIST post-quantum standards:
126//!
127//! ```
128//! use vaea_ntt::pq::{PqScheme, PqNtt};
129//!
130//! // ML-DSA-65 — digital signatures, NIST Level 3
131//! let ntt = PqNtt::new(PqScheme::MlDsa65);
132//! assert_eq!(ntt.n(), 256);
133//! assert_eq!(ntt.q(), 8_380_417);
134//! assert_eq!(ntt.security_level(), 3);
135//!
136//! // Multiply two polynomials
137//! let mut a = vec![0u32; 256];
138//! a[0] = 5;
139//! let mut b = vec![0u32; 256];
140//! b[0] = 7;
141//! let c = ntt.multiply(&a, &b);
142//! assert_eq!(c[0], 35);
143//! ```
144//!
145//! ## Modules
146//!
147//! | Module | Use case |
148//! |--------|----------|
149//! | [`pq`] | Post-quantum presets for ML-DSA |
150//! | [`ntt32`] | 28-bit primes (< 2²⁸), ARM NEON vectorized |
151//! | [`ntt64`] | 60–62 bit primes for FHE (SEAL/OpenFHE compatible) |
152//! | [`poly`] | Polynomials over Z_q\[X\]/(X^N+1) with 64-bit coefficients |
153//! | [`rns`] | Multi-prime CRT (Residue Number System) |
154//!
155//! ## Performance
156//!
157//! Measured on Apple M3 Pro (single core), `ntt32` pipeline:
158//!
159//! | Operation | N = 256 | Throughput |
160//! |-----------|---------|------------|
161//! | Forward NTT | 234 ns | 1.09 billion coeff/s |
162//! | Negacyclic multiply | 1.08 µs | — |
163//!
164//! The `ntt32` pipeline uses the Shoup precomputed-quotient trick with
165//! Harvey lazy butterfly reductions. On aarch64, all butterfly stages are
166//! NEON-vectorized (4×u32 lanes). The scalar fallback is used on other
167//! architectures.
168//!
169//! ## Security
170//!
171//! All arithmetic and butterfly routines are **constant-time**:
172//!
173//! - **No data-dependent branches** — modular reductions use branchless
174//!   wrapping arithmetic ([`u32::wrapping_add`] / [`u32::wrapping_sub`]).
175//! - **No secret-dependent memory access patterns** — twiddle factor
176//!   indexing depends only on the public transform size N.
177//! - Safe to use on secret polynomial coefficients (e.g. ML-DSA signing keys).
178//!
179//! ## Features
180//!
181//! | Feature | Default | Description |
182//! |---------|---------|-------------|
183//! | `std` | **on** | Enables [`std::error::Error`] impl on [`NttError`] |
184//! | `rand` | off | Random polynomial generation |
185//! | `ffi` | off | C/C++/JS bindings via Diplomat |
186//!
187//! ## `no_std`
188//!
189//! This crate is `no_std` compatible (requires `alloc`).
190//! Disable default features to use without `std`.
191//!
192//! ## License
193//!
194//! VaeaNTT is dual-licensed:
195//!
196//! - **AGPL-3.0-or-later** for open-source use.
197//! - **Commercial license** available from [Vaea SAS](https://vaea.tech)
198//!   for proprietary / closed-source integration.
199
200#![no_std]
201#![warn(missing_docs)]
202
203extern crate alloc;
204
205#[cfg(feature = "std")]
206extern crate std;
207
208/// Errors returned by NTT context construction.
209#[derive(Debug, Clone, PartialEq, Eq)]
210pub enum NttError {
211    /// N must be a power of 2 >= 2.
212    InvalidSize(usize),
213    /// q must be prime.
214    NotPrime(u64),
215    /// q must satisfy q ≡ 1 (mod 2N) for NTT.
216    NotNttFriendly {
217        /// The modulus that failed the NTT-friendly check.
218        q: u64,
219        /// The polynomial size N.
220        n: usize,
221    },
222    /// q must be < 2^28 for the 32-bit pipeline.
223    PrimeTooLarge(u64),
224}
225
226impl core::fmt::Display for NttError {
227    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
228        match self {
229            NttError::InvalidSize(n) => write!(f, "N={n} must be a power of 2 >= 2"),
230            NttError::NotPrime(q) => write!(f, "q={q} is not prime"),
231            NttError::NotNttFriendly { q, n } => {
232                write!(f, "q={q} does not satisfy q ≡ 1 (mod {})", 2 * n)
233            }
234            NttError::PrimeTooLarge(q) => write!(f, "q={q} must be < 2^28"),
235        }
236    }
237}
238
239#[cfg(feature = "std")]
240impl std::error::Error for NttError {}
241
242pub mod ntt32;
243pub mod ntt64;
244pub mod poly;
245pub mod pq;
246pub mod rns;
247
248#[cfg(feature = "ffi")]
249pub mod ffi;