Skip to main content

p3_field/
field.rs

1use alloc::vec;
2use alloc::vec::Vec;
3use core::fmt::{Debug, Display};
4use core::hash::Hash;
5use core::iter::{Product, Sum, zip};
6use core::ops::{Add, AddAssign, Div, DivAssign, Mul, MulAssign, Neg, Sub, SubAssign};
7use core::{array, slice};
8
9use num_bigint::BigUint;
10use p3_maybe_rayon::prelude::*;
11use p3_util::{flatten_to_base, iter_array_chunks_padded};
12use serde::Serialize;
13use serde::de::DeserializeOwned;
14
15use crate::exponentiation::bits_u64;
16use crate::integers::{QuotientMap, from_integer_types};
17use crate::packed::PackedField;
18use crate::{Dup, Packable, PackedFieldExtension, PackedValue};
19
20/// A commutative ring, `R`, with prime characteristic, `p`.
21///
22/// This permits elements like:
23/// - A single finite field element.
24/// - A symbolic expression which would evaluate to a field element.
25/// - An array of finite field elements.
26/// - A polynomial with coefficients in a finite field.
27///
28/// ### Mathematical Description
29///
30/// Mathematically, a commutative ring is a set of objects which supports an addition-like
31/// like operation, `+`, and a multiplication-like operation `*`.
32///
33/// Let `x, y, z` denote arbitrary elements of the set.
34///
35/// Then, an operation is addition-like if it satisfies the following properties:
36/// - Commutativity => `x + y = y + x`
37/// - Associativity => `x + (y + z) = (x + y) + z`
38/// - Unit => There exists an identity element `ZERO` satisfying `x + ZERO = x`.
39/// - Inverses => For every `x` there exists a unique inverse `(-x)` satisfying `x + (-x) = ZERO`
40///
41/// Similarly, an operation is multiplication-like if it satisfies the following properties:
42/// - Commutativity => `x * y = y * x`
43/// - Associativity => `x * (y * z) = (x * y) * z`
44/// - Unit => There exists an identity element `ONE` satisfying `x * ONE = x`.
45/// - Distributivity => The two operations `+` and `*` must together satisfy `x * (y + z) = (x * y) + (x * z)`
46///
47/// Unlike in the addition case, we do not require inverses to exist with respect to `*`.
48///
49/// The simplest examples of commutative rings are the integers (`ℤ`), and the integers mod `N` (`ℤ/N`).
50///
51/// The characteristic of a ring is the smallest positive integer `r` such that `0 = r . 1 = 1 + 1 + ... + 1 (r times)`.
52/// For example, the characteristic of the modulo ring `ℤ/N` is `N`.
53///
54/// Rings with prime characteristic are particularly special due to their close relationship with finite fields.
55pub trait PrimeCharacteristicRing:
56    Sized
57    + Default
58    + Dup
59    + Add<Output = Self>
60    + AddAssign
61    + Sub<Output = Self>
62    + SubAssign
63    + Neg<Output = Self>
64    + Mul<Output = Self>
65    + MulAssign
66    + Sum
67    + Product
68    + Debug
69{
70    /// The field `ℤ/p` where the characteristic of this ring is p.
71    type PrimeSubfield: PrimeField;
72
73    /// The additive identity of the ring.
74    ///
75    /// For every element `a` in the ring we require the following properties:
76    ///
77    /// `a + ZERO = ZERO + a = a,`
78    ///
79    /// `a + (-a) = (-a) + a = ZERO.`
80    const ZERO: Self;
81
82    /// The multiplicative identity of the ring.
83    ///
84    /// For every element `a` in the ring we require the following property:
85    ///
86    /// `a*ONE = ONE*a = a.`
87    const ONE: Self;
88
89    /// The element in the ring given by `ONE + ONE`.
90    ///
91    /// This is provided as a convenience as `TWO` occurs regularly in
92    /// the proving system. This also is slightly faster than computing
93    /// it via addition. Note that multiplication by `TWO` is discouraged.
94    /// Instead of `a * TWO` use `a.double()` which will be faster.
95    ///
96    /// If the field has characteristic 2 this is equal to ZERO.
97    const TWO: Self;
98
99    /// The element in the ring given by `-ONE`.
100    ///
101    /// This is provided as a convenience as `NEG_ONE` occurs regularly in
102    /// the proving system. This also is slightly faster than computing
103    /// it via negation. Note that where possible `NEG_ONE` should be absorbed
104    /// into mathematical operations. For example `a - b` will be faster
105    /// than `a + NEG_ONE * b` and similarly `(-b)` is faster than `NEG_ONE * b`.
106    ///
107    /// If the field has characteristic 2 this is equal to ONE.
108    const NEG_ONE: Self;
109
110    /// Embed an element of the prime field `ℤ/p` into the ring `R`.
111    ///
112    /// Given any element `[r] ∈ ℤ/p`, represented by an integer `r` between `0` and `p - 1`
113    /// `from_prime_subfield([r])` will be equal to:
114    ///
115    /// `Self::ONE + ... + Self::ONE (r times)`
116    #[must_use]
117    fn from_prime_subfield(f: Self::PrimeSubfield) -> Self;
118
119    /// Return `Self::ONE` if `b` is `true` and `Self::ZERO` if `b` is `false`.
120    #[must_use]
121    #[inline(always)]
122    fn from_bool(b: bool) -> Self {
123        // Some rings might reimplement this to avoid the branch.
124        if b { Self::ONE } else { Self::ZERO }
125    }
126
127    from_integer_types!(
128        u8, u16, u32, u64, u128, usize, i8, i16, i32, i64, i128, isize
129    );
130
131    /// The elementary function `double(a) = 2*a`.
132    ///
133    /// This function should be preferred over calling `a + a` or `TWO * a` as a faster implementation may be available for some rings.
134    /// If the field has characteristic 2 then this returns 0.
135    #[must_use]
136    #[inline(always)]
137    fn double(&self) -> Self {
138        self.dup() + self.dup()
139    }
140
141    /// The elementary function `halve(a) = a/2`.
142    ///
143    /// # Panics
144    /// The function will panic if the field has characteristic 2.
145    #[must_use]
146    #[inline]
147    fn halve(&self) -> Self {
148        // This must be overwritten by PrimeField implementations as this definition
149        // is circular when PrimeSubfield = Self. It should also be overwritten by
150        // most rings to avoid the multiplication.
151        let half = Self::from_prime_subfield(Self::PrimeSubfield::ONE.halve());
152        self.dup() * half
153    }
154
155    /// The elementary function `square(a) = a^2`.
156    ///
157    /// This function should be preferred over calling `a * a`, as a faster implementation may be available for some rings.
158    #[must_use]
159    #[inline(always)]
160    fn square(&self) -> Self {
161        self.dup() * self.dup()
162    }
163
164    /// The elementary function `cube(a) = a^3`.
165    ///
166    /// This function should be preferred over calling `a * a * a`, as a faster implementation may be available for some rings.
167    #[must_use]
168    #[inline(always)]
169    fn cube(&self) -> Self {
170        self.square() * self.dup()
171    }
172
173    /// Computes the arithmetic generalization of boolean `xor`.
174    ///
175    /// For boolean inputs, `x ^ y = x + y - 2 xy`.
176    #[must_use]
177    #[inline(always)]
178    fn xor(&self, y: &Self) -> Self {
179        self.dup() + y.dup() - self.dup() * y.dup().double()
180    }
181
182    /// Computes the arithmetic generalization of a triple `xor`.
183    ///
184    /// For boolean inputs `x ^ y ^ z = x + y + z - 2(xy + xz + yz) + 4xyz`.
185    #[must_use]
186    #[inline(always)]
187    fn xor3(&self, y: &Self, z: &Self) -> Self {
188        self.xor(y).xor(z)
189    }
190
191    /// Computes the arithmetic generalization of `andnot`.
192    ///
193    /// For boolean inputs `(!x) & y = (1 - x)y`.
194    #[must_use]
195    #[inline(always)]
196    fn andn(&self, y: &Self) -> Self {
197        (Self::ONE - self.dup()) * y.dup()
198    }
199
200    /// The vanishing polynomial for boolean values: `x * (x - 1)`.
201    ///
202    /// This is a polynomial of degree `2` that evaluates to `0` if the input is `0` or `1`.
203    /// If our space is a field, then this will be nonzero on all other inputs.
204    #[must_use]
205    #[inline(always)]
206    fn bool_check(&self) -> Self {
207        // Note: We could delegate to `andn`, but to maintain backwards
208        // compatible AIR definitions, we stick with `x * (x - 1)` here.
209        self.dup() * (self.dup() - Self::ONE)
210    }
211
212    /// Exponentiation by a `u64` power.
213    ///
214    /// This uses the standard square and multiply approach.
215    /// For specific powers regularly used and known in advance,
216    /// this will be slower than custom addition chain exponentiation.
217    #[must_use]
218    #[inline]
219    fn exp_u64(&self, power: u64) -> Self {
220        let mut current = self.dup();
221        let mut product = Self::ONE;
222
223        for j in 0..bits_u64(power) {
224            if (power >> j) & 1 != 0 {
225                product *= current.dup();
226            }
227            current = current.square();
228        }
229        product
230    }
231
232    /// Exponentiation by a small constant power.
233    ///
234    /// For a collection of small values we implement custom multiplication chain circuits which can be faster than the
235    /// simpler square and multiply approach.
236    ///
237    /// For large values this defaults back to `self.exp_u64`.
238    #[must_use]
239    #[inline(always)]
240    fn exp_const_u64<const POWER: u64>(&self) -> Self {
241        match POWER {
242            0 => Self::ONE,
243            1 => self.dup(),
244            2 => self.square(),
245            3 => self.cube(),
246            4 => self.square().square(),
247            5 => self.square().square() * self.dup(),
248            6 => self.square().cube(),
249            7 => {
250                let x2 = self.square();
251                let x3 = x2.dup() * self.dup();
252                let x4 = x2.square();
253                x3 * x4
254            }
255            _ => self.exp_u64(POWER),
256        }
257    }
258
259    /// The elementary function `exp_power_of_2(a, power_log) = a^{2^power_log}`.
260    ///
261    /// Computed via repeated squaring.
262    #[must_use]
263    #[inline]
264    fn exp_power_of_2(&self, power_log: usize) -> Self {
265        let mut res = self.dup();
266        for _ in 0..power_log {
267            res = res.square();
268        }
269        res
270    }
271
272    /// The elementary function `mul_2exp_u64(a, exp) = a * 2^{exp}`.
273    ///
274    /// Here `2^{exp}` is computed using the square and multiply approach.
275    #[must_use]
276    #[inline]
277    fn mul_2exp_u64(&self, exp: u64) -> Self {
278        // Some rings might want to reimplement this to avoid the
279        // exponentiations (and potentially even the multiplication).
280        self.dup() * Self::TWO.exp_u64(exp)
281    }
282
283    /// Divide by a given power of two. `div_2exp_u64(a, exp) = a/2^exp`
284    ///
285    /// # Panics
286    /// The function will panic if the field has characteristic 2.
287    #[must_use]
288    #[inline]
289    fn div_2exp_u64(&self, exp: u64) -> Self {
290        // Some rings might want to reimplement this to avoid the
291        // exponentiations (and potentially even the multiplication).
292        self.dup() * Self::from_prime_subfield(Self::PrimeSubfield::ONE.halve().exp_u64(exp))
293    }
294
295    /// Construct an iterator which returns powers of `self`: `self^0, self^1, self^2, ...`.
296    #[must_use]
297    #[inline]
298    fn powers(&self) -> Powers<Self> {
299        self.shifted_powers(Self::ONE)
300    }
301
302    /// Construct an iterator which returns powers of `self` shifted by `start`: `start, start*self^1, start*self^2, ...`.
303    #[must_use]
304    #[inline]
305    fn shifted_powers(&self, start: Self) -> Powers<Self> {
306        Powers {
307            base: self.dup(),
308            current: start,
309        }
310    }
311
312    /// Compute the dot product of two vectors.
313    #[must_use]
314    #[inline]
315    fn dot_product<const N: usize>(u: &[Self; N], v: &[Self; N]) -> Self {
316        u.iter().zip(v).map(|(x, y)| x.dup() * y.dup()).sum()
317    }
318
319    /// Compute the sum of a slice of elements whose length is a compile time constant.
320    ///
321    /// The rust compiler doesn't realize that add is associative
322    /// so we help it out and minimize the dependency chains by hand.
323    /// Thus while this function has the same throughput as `input.iter().sum()`,
324    /// it will usually have much lower latency.
325    ///
326    /// # Panics
327    ///
328    /// May panic if the length of the input slice is not equal to `N`.
329    #[must_use]
330    #[inline]
331    fn sum_array<const N: usize>(input: &[Self]) -> Self {
332        // It looks a little strange but using a const parameter and an assert_eq! instead of
333        // using input.len() leads to a significant performance improvement.
334        // We could make this input &[Self; N] but that would require sticking .try_into().unwrap() everywhere.
335        // Checking godbolt, the compiler seems to unroll everything anyway.
336        assert_eq!(N, input.len());
337
338        // For `N <= 8` we implement a tree sum structure and for `N > 8` we break the input into
339        // chunks of `8`, perform a tree sum on each chunk and sum the results. The parameter `8`
340        // was determined experimentally by testing the speed of the poseidon2 internal layer computations.
341        // This is a useful benchmark as we have a mix of summations of size 15, 23 with other work in between.
342        // I only tested this on `AVX2` though so there might be a better value for other architectures.
343        match N {
344            0 => Self::ZERO,
345            1 => input[0].dup(),
346            2 => input[0].dup() + input[1].dup(),
347            3 => input[0].dup() + input[1].dup() + input[2].dup(),
348            4 => (input[0].dup() + input[1].dup()) + (input[2].dup() + input[3].dup()),
349            5 => Self::sum_array::<4>(&input[..4]) + Self::sum_array::<1>(&input[4..]),
350            6 => Self::sum_array::<4>(&input[..4]) + Self::sum_array::<2>(&input[4..]),
351            7 => Self::sum_array::<4>(&input[..4]) + Self::sum_array::<3>(&input[4..]),
352            8 => Self::sum_array::<4>(&input[..4]) + Self::sum_array::<4>(&input[4..]),
353            _ => {
354                // We know that N > 8 here so this saves an add over the usual
355                // initialisation of acc to Self::ZERO.
356                let mut acc = Self::sum_array::<8>(&input[..8]);
357                for i in (16..=N).step_by(8) {
358                    acc += Self::sum_array::<8>(&input[(i - 8)..i]);
359                }
360                // This would be much cleaner if we could use const generic expressions but
361                // this will do for now.
362                match N & 7 {
363                    0 => acc,
364                    1 => acc + Self::sum_array::<1>(&input[(8 * (N / 8))..]),
365                    2 => acc + Self::sum_array::<2>(&input[(8 * (N / 8))..]),
366                    3 => acc + Self::sum_array::<3>(&input[(8 * (N / 8))..]),
367                    4 => acc + Self::sum_array::<4>(&input[(8 * (N / 8))..]),
368                    5 => acc + Self::sum_array::<5>(&input[(8 * (N / 8))..]),
369                    6 => acc + Self::sum_array::<6>(&input[(8 * (N / 8))..]),
370                    7 => acc + Self::sum_array::<7>(&input[(8 * (N / 8))..]),
371                    _ => unreachable!(),
372                }
373            }
374        }
375    }
376
377    /// Allocates a vector of zero elements of length `len`. Many operating systems zero pages
378    /// before assigning them to a userspace process. In that case, our process should not need to
379    /// write zeros, which would be redundant. However, the compiler may not always recognize this.
380    ///
381    /// In particular, `vec![Self::ZERO; len]` appears to result in redundant userspace zeroing.
382    /// This is the default implementation, but implementers may wish to provide their own
383    /// implementation which transmutes something like `vec![0u32; len]`.
384    #[must_use]
385    #[inline]
386    fn zero_vec(len: usize) -> Vec<Self> {
387        vec![Self::ZERO; len]
388    }
389}
390
391/// A vector space `V` over `F` with a fixed basis. Fixing the basis allows elements of `V` to be
392/// converted to and from `DIMENSION` many elements of `F` which are interpreted as basis coefficients.
393///
394/// We usually expect `F` to be a field but do not enforce this and so allow it to be just a ring.
395/// This lets every ring implement `BasedVectorSpace<Self>` and is useful in a couple of other cases.
396///
397/// ## Safety
398/// We make no guarantees about consistency of the choice of basis across different versions of Plonky3.
399/// If this choice of basis changes, the behaviour of `BasedVectorSpace` will also change. Due to this,
400/// we recommend avoiding using this trait unless absolutely necessary.
401///
402/// ### Mathematical Description
403/// Given a vector space, `A` over `F`, a basis is a set of elements `B = {b_0, ..., b_{n-1}}`
404/// in `A` such that, given any element `a`, we can find a unique set of `n` elements of `F`,
405/// `f_0, ..., f_{n - 1}` satisfying `a = f_0 b_0 + ... + f_{n - 1} b_{n - 1}`. Thus the choice
406/// of `B` gives rise to a natural linear map between the vector space `A` and the canonical
407/// `n` dimensional vector space `F^n`.
408///
409/// This allows us to map between elements of `A` and arrays of `n` elements of `F`.
410/// Clearly this map depends entirely on the choice of basis `B` which may change
411/// across versions of Plonky3.
412///
413/// The situation is slightly more complicated in cases where `F` is not a field but boils down
414/// to an identical description once we enforce that `A` is a free module over `F`.
415pub trait BasedVectorSpace<F: PrimeCharacteristicRing>: Sized {
416    /// The dimension of the vector space, i.e. the number of elements in
417    /// its basis.
418    const DIMENSION: usize;
419
420    /// Fixes a basis for the algebra `A` and uses this to
421    /// map an element of `A` to a slice of `DIMENSION` `F` elements.
422    ///
423    /// # Safety
424    ///
425    /// The value produced by this function fundamentally depends
426    /// on the choice of basis. Care must be taken
427    /// to ensure portability if these values might ever be passed to
428    /// (or rederived within) another compilation environment where a
429    /// different basis might have been used.
430    #[must_use]
431    fn as_basis_coefficients_slice(&self) -> &[F];
432
433    /// Fixes a basis for the algebra `A` and uses this to
434    /// map `DIMENSION` `F` elements to an element of `A`.
435    ///
436    /// # Safety
437    ///
438    /// The value produced by this function fundamentally depends
439    /// on the choice of basis. Care must be taken
440    /// to ensure portability if these values might ever be passed to
441    /// (or rederived within) another compilation environment where a
442    /// different basis might have been used.
443    ///
444    /// Returns `None` if the length of the slice is different to `DIMENSION`.
445    #[must_use]
446    #[inline]
447    fn from_basis_coefficients_slice(slice: &[F]) -> Option<Self> {
448        Self::from_basis_coefficients_iter(slice.iter().cloned())
449    }
450
451    /// Fixes a basis for the algebra `A` and uses this to
452    /// map `DIMENSION` `F` elements to an element of `A`. Similar
453    /// to `core:array::from_fn`, the `DIMENSION` `F` elements are
454    /// given by `Fn(0), ..., Fn(DIMENSION - 1)` called in that order.
455    ///
456    /// # Safety
457    ///
458    /// The value produced by this function fundamentally depends
459    /// on the choice of basis. Care must be taken
460    /// to ensure portability if these values might ever be passed to
461    /// (or rederived within) another compilation environment where a
462    /// different basis might have been used.
463    #[must_use]
464    fn from_basis_coefficients_fn<Fn: FnMut(usize) -> F>(f: Fn) -> Self;
465
466    /// Fixes a basis for the algebra `A` and uses this to
467    /// map `DIMENSION` `F` elements to an element of `A`.
468    ///
469    /// # Safety
470    ///
471    /// The value produced by this function fundamentally depends
472    /// on the choice of basis. Care must be taken
473    /// to ensure portability if these values might ever be passed to
474    /// (or rederived within) another compilation environment where a
475    /// different basis might have been used.
476    ///
477    /// Returns `None` if the length of the iterator is different to `DIMENSION`.
478    #[must_use]
479    fn from_basis_coefficients_iter<I: ExactSizeIterator<Item = F>>(iter: I) -> Option<Self>;
480
481    /// Given a basis for the Algebra `A`, return the i'th basis element.
482    ///
483    /// # Safety
484    ///
485    /// The value produced by this function fundamentally depends
486    /// on the choice of basis. Care must be taken
487    /// to ensure portability if these values might ever be passed to
488    /// (or rederived within) another compilation environment where a
489    /// different basis might have been used.
490    ///
491    /// Returns `None` if `i` is greater than or equal to `DIMENSION`.
492    #[must_use]
493    #[inline]
494    fn ith_basis_element(i: usize) -> Option<Self> {
495        (i < Self::DIMENSION).then(|| Self::from_basis_coefficients_fn(|j| F::from_bool(i == j)))
496    }
497
498    /// Convert from a vector of `Self` to a vector of `F` by flattening the basis coefficients.
499    ///
500    /// Depending on the `BasedVectorSpace` this may be essentially a no-op and should certainly
501    /// be reimplemented in those cases.
502    ///
503    /// # Safety
504    ///
505    /// The value produced by this function fundamentally depends
506    /// on the choice of basis. Care must be taken
507    /// to ensure portability if these values might ever be passed to
508    /// (or rederived within) another compilation environment where a
509    /// different basis might have been used.
510    #[must_use]
511    #[inline]
512    fn flatten_to_base(vec: Vec<Self>) -> Vec<F> {
513        vec.into_iter()
514            .flat_map(|x| x.as_basis_coefficients_slice().to_vec())
515            .collect()
516    }
517
518    /// Convert from a vector of `F` to a vector of `Self` by combining the basis coefficients.
519    ///
520    /// Depending on the `BasedVectorSpace` this may be essentially a no-op and should certainly
521    /// be reimplemented in those cases.
522    ///
523    /// # Panics
524    /// This will panic if the length of `vec` is not a multiple of `Self::DIMENSION`.
525    ///
526    /// # Safety
527    ///
528    /// The value produced by this function fundamentally depends
529    /// on the choice of basis. Care must be taken
530    /// to ensure portability if these values might ever be passed to
531    /// (or rederived within) another compilation environment where a
532    /// different basis might have been used.
533    #[must_use]
534    #[inline]
535    fn reconstitute_from_base(vec: Vec<F>) -> Vec<Self>
536    where
537        F: Sync,
538        Self: Send,
539    {
540        assert_eq!(vec.len() % Self::DIMENSION, 0);
541
542        vec.par_chunks_exact(Self::DIMENSION)
543            .map(|chunk| {
544                Self::from_basis_coefficients_slice(chunk)
545                    .expect("Chunk length not equal to dimension")
546            })
547            .collect()
548    }
549}
550
551/// Values that can act as sponge lanes for delimiter padding.
552///
553/// This is used by symmetric sponge adapters that need canonical `0` and `1` symbols while
554/// supporting both field/ring-based lanes and `u64`-based Keccak lanes behind one API.
555pub trait SpongePaddingValue: Copy {
556    /// The empty-lane value.
557    const PAD_ZERO: Self;
558
559    /// The delimiter value injected after the final absorbed element.
560    const PAD_ONE: Self;
561}
562
563impl<T: PrimeCharacteristicRing + Copy> SpongePaddingValue for T {
564    const PAD_ZERO: Self = Self::ZERO;
565    const PAD_ONE: Self = Self::ONE;
566}
567
568impl SpongePaddingValue for u64 {
569    const PAD_ZERO: Self = 0;
570    const PAD_ONE: Self = 1;
571}
572
573impl<const N: usize> SpongePaddingValue for [u64; N] {
574    const PAD_ZERO: Self = [0; N];
575    const PAD_ONE: Self = [1; N];
576}
577
578impl<F: PrimeCharacteristicRing> BasedVectorSpace<F> for F {
579    const DIMENSION: usize = 1;
580
581    #[inline]
582    fn as_basis_coefficients_slice(&self) -> &[F] {
583        slice::from_ref(self)
584    }
585
586    #[inline]
587    fn from_basis_coefficients_fn<Fn: FnMut(usize) -> F>(mut f: Fn) -> Self {
588        f(0)
589    }
590
591    #[inline]
592    fn from_basis_coefficients_iter<I: ExactSizeIterator<Item = F>>(mut iter: I) -> Option<Self> {
593        (iter.len() == 1).then(|| iter.next().unwrap()) // Unwrap will not panic as we know the length is 1.
594    }
595
596    #[inline]
597    fn flatten_to_base(vec: Vec<Self>) -> Vec<F> {
598        vec
599    }
600
601    #[inline]
602    fn reconstitute_from_base(vec: Vec<F>) -> Vec<Self> {
603        vec
604    }
605}
606
607/// A ring implements `InjectiveMonomial<N>` if the algebraic function
608/// `f(x) = x^N` is an injective map on elements of the ring.
609///
610/// We do not enforce that this map be invertible as there are useful
611/// cases such as polynomials or symbolic expressions where no inverse exists.
612///
613/// However, if the ring is a field with order `q` or an array of such field elements,
614/// then `f(x) = x^N` will be injective if and only if it is invertible and so in
615/// such cases this monomial acts as a permutation. Moreover, this will occur
616/// exactly when `N` and `q - 1` are relatively prime i.e. `gcd(N, q - 1) = 1`.
617pub trait InjectiveMonomial<const N: u64>: PrimeCharacteristicRing {
618    /// Compute `x -> x^n` for a given `n > 1` such that this
619    /// map is injective.
620    #[must_use]
621    #[inline]
622    fn injective_exp_n(&self) -> Self {
623        self.exp_const_u64::<N>()
624    }
625}
626
627/// A ring implements `PermutationMonomial<N>` if the algebraic function
628/// `f(x) = x^N` is invertible and thus acts as a permutation on elements of the ring.
629///
630/// In all cases we care about, this means that we can find another integer `K` such
631/// that `x = x^{NK}` for all elements of our ring.
632pub trait PermutationMonomial<const N: u64>: InjectiveMonomial<N> {
633    /// Compute `x -> x^K` for a given `K > 1` such that
634    /// `x^{NK} = x` for all elements `x`.
635    #[must_use]
636    fn injective_exp_root_n(&self) -> Self;
637}
638
639/// A ring `R` implements `Algebra<F>` if there is an injective homomorphism
640///  from `F` into `R`; in particular only `F::ZERO` maps to `R::ZERO`.
641///
642/// For the most part, we will usually expect `F` to be a field but there
643/// are a few cases where it is handy to allow it to just be a ring. In
644/// particular, every ring naturally implements `Algebra<Self>`.
645///
646/// ### Mathematical Description
647///
648/// Let `x` and `y` denote arbitrary elements of `F`. Then
649/// we require that our map `from` has the properties:
650/// - Preserves Identity: `from(F::ONE) = R::ONE`
651/// - Commutes with Addition: `from(x + y) = from(x) + from(y)`
652/// - Commutes with Multiplication: `from(x * y) = from(x) * from(y)`
653///
654/// Such maps are known as ring homomorphisms and are injective if the
655/// only element which maps to `R::ZERO` is `F::ZERO`.
656///
657/// The existence of this map makes `R` into an `F`-module and hence an `F`-algebra.
658/// If, additionally, `R` is a field, then this makes `R` a field extension of `F`.
659pub trait Algebra<F>:
660    PrimeCharacteristicRing
661    + From<F>
662    + Add<F, Output = Self>
663    + AddAssign<F>
664    + Sub<F, Output = Self>
665    + SubAssign<F>
666    + Mul<F, Output = Self>
667    + MulAssign<F>
668{
669    /// Dot product between algebra elements and base field scalars.
670    ///
671    /// Given arrays `a` (algebra) and `f` (scalars), computes:
672    ///
673    /// ```text
674    ///   result = a[0]*f[0] + a[1]*f[1] + ... + a[N-1]*f[N-1]
675    /// ```
676    ///
677    /// Uses a tree-structured summation to minimize dependency chains and
678    /// maximize throughput on pipelined architectures.
679    #[must_use]
680    #[inline]
681    fn mixed_dot_product<const N: usize>(a: &[Self; N], f: &[F; N]) -> Self
682    where
683        F: Dup,
684    {
685        let products: [Self; N] = core::array::from_fn(|i| a[i].dup() * f[i].dup());
686        Self::sum_array::<N>(&products)
687    }
688
689    /// Optimal chunk size for [`batched_linear_combination`](Self::batched_linear_combination).
690    ///
691    /// Override in implementations where a different chunk size is faster.
692    /// Must be one of 1, 2, 4, 8, 16, 32, or 64; other values cause a compile error.
693    const BATCHED_LC_CHUNK: usize = 8;
694
695    /// Runtime-length linear combination: `Σ values[i] * coeffs[i]`.
696    ///
697    /// Like [`mixed_dot_product`](Self::mixed_dot_product) but for slices whose
698    /// length is not known at compile time. Processes in chunks of
699    /// [`BATCHED_LC_CHUNK`](Self::BATCHED_LC_CHUNK), delegating each chunk to
700    /// `mixed_dot_product` to leverage SIMD-specialized overrides.
701    #[must_use]
702    #[inline]
703    fn batched_linear_combination(values: &[Self], coeffs: &[F]) -> Self
704    where
705        F: Dup,
706    {
707        const {
708            assert!(
709                matches!(Self::BATCHED_LC_CHUNK, 1 | 2 | 4 | 8 | 16 | 32 | 64),
710                "BATCHED_LC_CHUNK must be one of 1, 2, 4, 8, 16, 32, or 64"
711            );
712        }
713        match Self::BATCHED_LC_CHUNK {
714            1 => chunked_linear_combination::<1, Self, F>(values, coeffs),
715            2 => chunked_linear_combination::<2, Self, F>(values, coeffs),
716            4 => chunked_linear_combination::<4, Self, F>(values, coeffs),
717            8 => chunked_linear_combination::<8, Self, F>(values, coeffs),
718            16 => chunked_linear_combination::<16, Self, F>(values, coeffs),
719            32 => chunked_linear_combination::<32, Self, F>(values, coeffs),
720            64 => chunked_linear_combination::<64, Self, F>(values, coeffs),
721            _ => unreachable!(),
722        }
723    }
724}
725
726/// Compute `Σ values[i] * coeffs[i]` over `N` pairs.
727///
728/// A single long sum forces every add to wait for the previous one. Instead,
729/// we split the pairs into groups of `CHUNK`, sum each group on its own, and
730/// add up the group totals. Several partial sums run in parallel on the CPU,
731/// so the total latency is shorter than one straight chain.
732///
733/// The result is the same for every valid `CHUNK` — only the speed changes.
734///
735/// # Layout
736///
737/// For `N = q * CHUNK + r` with `0 <= r < CHUNK`:
738///
739/// ```text
740///     ┌── group 0 ──┬── group 1 ──┬─ ... ─┬── tail (r) ──┐
741///     │   CHUNK     │   CHUNK     │       │   r pairs    │
742///     └──────┬──────┴──────┬──────┴───────┴──────┬───────┘
743///            ▼             ▼                     ▼
744///       tree-sum      tree-sum             scalar adds
745///            └──► acc ◄────┴──────► acc ◄────────┘
746/// ```
747///
748/// # Panics
749///
750/// Compile-time panic if `CHUNK` is zero.
751#[must_use]
752#[inline]
753pub fn chunked_mixed_dot_product<
754    const CHUNK: usize,
755    A: Algebra<F> + Dup,
756    F: Dup,
757    const N: usize,
758>(
759    values: &[A; N],
760    coeffs: &[F; N],
761) -> A {
762    // CHUNK = 0 would make the group count undefined.
763    const { assert!(CHUNK != 0, "chunked_mixed_dot_product requires CHUNK > 0") }
764
765    // Fast path: N fits in one group → single balanced tree, no outer loop.
766    if N <= CHUNK {
767        let products: [A; N] = core::array::from_fn(|i| values[i].dup() * coeffs[i].dup());
768        return A::sum_array::<N>(&products);
769    }
770
771    // Split off q complete groups; r leftover pairs go to the tail.
772    let (val_chunks, val_rem) = values.as_slice().as_chunks::<CHUNK>();
773    let (coeff_chunks, coeff_rem) = coeffs.as_slice().as_chunks::<CHUNK>();
774    debug_assert_eq!(val_chunks.len(), coeff_chunks.len());
775
776    // One add per group; runs in parallel with the next group's multiplies.
777    let mut acc = A::ZERO;
778    for (vc, cc) in zip(val_chunks, coeff_chunks) {
779        let products: [A; CHUNK] = core::array::from_fn(|i| vc[i].dup() * cc[i].dup());
780        // Balanced tree of depth log2(CHUNK), folded into acc.
781        acc += A::sum_array::<CHUNK>(&products);
782    }
783
784    // Tail: at most CHUNK - 1 pairs as a serial multiply-add chain.
785    debug_assert_eq!(val_rem.len(), coeff_rem.len());
786    for (v, c) in zip(val_rem, coeff_rem) {
787        acc += v.dup() * c.dup();
788    }
789    acc
790}
791
792/// Lower a runtime chunk size into a const-generic call to the fixed-chunk dot product.
793///
794/// Each backend picks its preferred chunk size at runtime; the inner routine
795/// needs it as a const for unrolling. This wrapper bridges the gap.
796///
797/// Supported sizes: `1, 2, 4, 8, 16, 32, 64` — powers of two only, so the
798/// inner balanced tree stays balanced.
799///
800/// # Panics
801///
802/// Runtime panic if `chunk` is outside the supported set.
803#[must_use]
804#[inline]
805pub fn dispatch_chunked_mixed_dot_product<A: Algebra<F> + Dup, F: Dup, const N: usize>(
806    values: &[A; N],
807    coeffs: &[F; N],
808    chunk: usize,
809) -> A {
810    match chunk {
811        1 => chunked_mixed_dot_product::<1, A, F, N>(values, coeffs),
812        2 => chunked_mixed_dot_product::<2, A, F, N>(values, coeffs),
813        4 => chunked_mixed_dot_product::<4, A, F, N>(values, coeffs),
814        8 => chunked_mixed_dot_product::<8, A, F, N>(values, coeffs),
815        16 => chunked_mixed_dot_product::<16, A, F, N>(values, coeffs),
816        32 => chunked_mixed_dot_product::<32, A, F, N>(values, coeffs),
817        64 => chunked_mixed_dot_product::<64, A, F, N>(values, coeffs),
818        // Unsupported chunk = configuration bug in a backend.
819        _ => panic!("mixed_dot_product chunk must be one of 1, 2, 4, 8, 16, 32, or 64"),
820    }
821}
822
823/// Linear combination over runtime-length slices, processing in chunks of `CHUNK`.
824///
825/// Computes `Σ values[i] * coeffs[i]` by batching into fixed-size chunks and
826/// delegating each to [`Algebra::mixed_dot_product`], which SIMD implementations
827/// override with fused multiply-accumulate intrinsics.
828///
829/// This is the implementation backing [`Algebra::batched_linear_combination`].
830/// Use it directly when overriding that method with a different chunk size.
831#[must_use]
832#[inline]
833pub fn chunked_linear_combination<const CHUNK: usize, A: Algebra<F> + Dup, F: Dup>(
834    values: &[A],
835    coeffs: &[F],
836) -> A {
837    const { assert!(CHUNK != 0, "chunked_linear_combination requires CHUNK > 0") }
838    assert_eq!(values.len(), coeffs.len());
839
840    let (val_chunks, val_rem) = values.as_chunks::<CHUNK>();
841    let (coeff_chunks, coeff_rem) = coeffs.as_chunks::<CHUNK>();
842
843    debug_assert_eq!(val_chunks.len(), coeff_chunks.len());
844    let mut acc = A::ZERO;
845    for (vc, cc) in zip(val_chunks, coeff_chunks) {
846        acc += A::mixed_dot_product::<CHUNK>(vc, cc);
847    }
848
849    debug_assert_eq!(val_rem.len(), coeff_rem.len());
850    for (v, c) in zip(val_rem, coeff_rem) {
851        acc += v.dup() * c.dup();
852    }
853    acc
854}
855
856// Every ring is an algebra over itself.
857impl<R: PrimeCharacteristicRing> Algebra<R> for R {}
858
859/// A collection of methods designed to help hash field elements.
860///
861/// Most fields will want to reimplement many/all of these methods as the default implementations
862/// are slow and involve converting to/from byte representations.
863pub trait RawDataSerializable: Sized {
864    /// The number of bytes which this field element occupies in memory.
865    /// Must be equal to the length of self.into_bytes().
866    const NUM_BYTES: usize;
867
868    /// Convert a field element into a collection of bytes.
869    #[must_use]
870    fn into_bytes(self) -> impl IntoIterator<Item = u8>;
871
872    /// Convert an iterator of field elements into an iterator of bytes.
873    #[must_use]
874    fn into_byte_stream(input: impl IntoIterator<Item = Self>) -> impl IntoIterator<Item = u8> {
875        input.into_iter().flat_map(|elem| elem.into_bytes())
876    }
877
878    /// Convert an iterator of field elements into an iterator of u32s.
879    ///
880    /// If `NUM_BYTES` does not divide `4`, multiple `F`s may be packed together to make a single `u32`. Furthermore,
881    /// if `NUM_BYTES * input.len()` does not divide `4`, the final `u32` will involve padding bytes which are set to `0`.
882    #[must_use]
883    fn into_u32_stream(input: impl IntoIterator<Item = Self>) -> impl IntoIterator<Item = u32> {
884        let bytes = Self::into_byte_stream(input);
885        iter_array_chunks_padded(bytes, 0).map(u32::from_le_bytes)
886    }
887
888    /// Convert an iterator of field elements into an iterator of u64s.
889    ///
890    /// If `NUM_BYTES` does not divide `8`, multiple `F`s may be packed together to make a single `u64`. Furthermore,
891    /// if `NUM_BYTES * input.len()` does not divide `8`, the final `u64` will involve padding bytes which are set to `0`.
892    #[must_use]
893    fn into_u64_stream(input: impl IntoIterator<Item = Self>) -> impl IntoIterator<Item = u64> {
894        let bytes = Self::into_byte_stream(input);
895        iter_array_chunks_padded(bytes, 0).map(u64::from_le_bytes)
896    }
897
898    /// Convert an iterator of field element arrays into an iterator of byte arrays.
899    ///
900    /// Converts an element `[F; N]` into the byte array `[[u8; N]; NUM_BYTES]`. This is
901    /// intended for use with vectorized hash functions which use vector operations
902    /// to compute several hashes in parallel.
903    #[must_use]
904    fn into_parallel_byte_streams<const N: usize>(
905        input: impl IntoIterator<Item = [Self; N]>,
906    ) -> impl IntoIterator<Item = [u8; N]> {
907        input.into_iter().flat_map(|vector| {
908            let bytes = vector.map(|elem| elem.into_bytes().into_iter().collect::<Vec<_>>());
909            (0..Self::NUM_BYTES).map(move |i| array::from_fn(|j| bytes[j][i]))
910        })
911    }
912
913    /// Convert an iterator of field element arrays into an iterator of u32 arrays.
914    ///
915    /// Converts an element `[F; N]` into the u32 array `[[u32; N]; NUM_BYTES/4]`. This is
916    /// intended for use with vectorized hash functions which use vector operations
917    /// to compute several hashes in parallel.
918    ///
919    /// This function is guaranteed to be equivalent to starting with `Iterator<[F; N]>` performing a transpose
920    /// operation to get `[Iterator<F>; N]`, calling `into_u32_stream` on each element to get `[Iterator<u32>; N]` and then
921    /// performing another transpose operation to get `Iterator<[u32; N]>`.
922    ///
923    /// If `NUM_BYTES` does not divide `4`, multiple `[F; N]`s may be packed together to make a single `[u32; N]`. Furthermore,
924    /// if `NUM_BYTES * input.len()` does not divide `4`, the final `[u32; N]` will involve padding bytes which are set to `0`.
925    #[must_use]
926    fn into_parallel_u32_streams<const N: usize>(
927        input: impl IntoIterator<Item = [Self; N]>,
928    ) -> impl IntoIterator<Item = [u32; N]> {
929        let bytes = Self::into_parallel_byte_streams(input);
930        iter_array_chunks_padded(bytes, [0; N]).map(|byte_array: [[u8; N]; 4]| {
931            array::from_fn(|i| u32::from_le_bytes(array::from_fn(|j| byte_array[j][i])))
932        })
933    }
934
935    /// Convert an iterator of field element arrays into an iterator of u64 arrays.
936    ///
937    /// Converts an element `[F; N]` into the u64 array `[[u64; N]; NUM_BYTES/8]`. This is
938    /// intended for use with vectorized hash functions which use vector operations
939    /// to compute several hashes in parallel.
940    ///
941    /// This function is guaranteed to be equivalent to starting with `Iterator<[F; N]>` performing a transpose
942    /// operation to get `[Iterator<F>; N]`, calling `into_u64_stream` on each element to get `[Iterator<u64>; N]` and then
943    /// performing another transpose operation to get `Iterator<[u64; N]>`.
944    ///
945    /// If `NUM_BYTES` does not divide `8`, multiple `[F; N]`s may be packed together to make a single `[u64; N]`. Furthermore,
946    /// if `NUM_BYTES * input.len()` does not divide `8`, the final `[u64; N]` will involve padding bytes which are set to `0`.
947    #[must_use]
948    fn into_parallel_u64_streams<const N: usize>(
949        input: impl IntoIterator<Item = [Self; N]>,
950    ) -> impl IntoIterator<Item = [u64; N]> {
951        let bytes = Self::into_parallel_byte_streams(input);
952        iter_array_chunks_padded(bytes, [0; N]).map(|byte_array: [[u8; N]; 8]| {
953            array::from_fn(|i| u64::from_le_bytes(array::from_fn(|j| byte_array[j][i])))
954        })
955    }
956}
957
958/// A field `F`. This permits both modular fields `ℤ/p` along with their field extensions.
959///
960/// A ring is a field if every element `x` has a unique multiplicative inverse `x^{-1}`
961/// which satisfies `x * x^{-1} = F::ONE`.
962pub trait Field:
963    Algebra<Self>
964    + RawDataSerializable
965    + Packable
966    + 'static
967    + Copy
968    + Div<Self, Output = Self>
969    + DivAssign
970    + Add<Self::Packing, Output = Self::Packing>
971    + Sub<Self::Packing, Output = Self::Packing>
972    + Mul<Self::Packing, Output = Self::Packing>
973    + Eq
974    + Hash
975    + Send
976    + Sync
977    + Display
978    + Serialize
979    + DeserializeOwned
980{
981    type Packing: PackedField<Scalar = Self>;
982
983    /// A generator of this field's multiplicative group.
984    const GENERATOR: Self;
985
986    /// Check if the given field element is equal to the unique additive identity (ZERO).
987    #[must_use]
988    #[inline]
989    fn is_zero(&self) -> bool {
990        *self == Self::ZERO
991    }
992
993    /// Check if the given field element is equal to the unique multiplicative identity (ONE).
994    #[must_use]
995    #[inline]
996    fn is_one(&self) -> bool {
997        *self == Self::ONE
998    }
999
1000    /// The multiplicative inverse of this field element, if it exists.
1001    ///
1002    /// NOTE: The inverse of `0` is undefined and will return `None`.
1003    #[must_use]
1004    fn try_inverse(&self) -> Option<Self>;
1005
1006    /// The multiplicative inverse of this field element.
1007    ///
1008    /// # Panics
1009    /// The function will panic if the field element is `0`.
1010    /// Use try_inverse if you want to handle this case.
1011    #[must_use]
1012    fn inverse(&self) -> Self {
1013        self.try_inverse().expect("Tried to invert zero")
1014    }
1015
1016    /// Add two slices of field elements together, returning the result in the first slice.
1017    ///
1018    /// Makes use of packing to speed up the addition.
1019    ///
1020    /// This is optimal for cases where the two slices are small to medium length. E.g. between
1021    /// `F::Packing::WIDTH` and roughly however many elements fit in a cache line.
1022    ///
1023    /// For larger slices, it's likely worthwhile to use parallelization before calling this.
1024    /// Similarly if you need to add a large number of slices together, it's best to
1025    /// break them into small chunks and call this on the smaller chunks.
1026    ///
1027    /// # Panics
1028    /// The function will panic if the lengths of the two slices are not equal.
1029    #[inline]
1030    fn add_slices(slice_1: &mut [Self], slice_2: &[Self]) {
1031        let (shorts_1, suffix_1) = Self::Packing::pack_slice_with_suffix_mut(slice_1);
1032        let (shorts_2, suffix_2) = Self::Packing::pack_slice_with_suffix(slice_2);
1033        debug_assert_eq!(shorts_1.len(), shorts_2.len());
1034        debug_assert_eq!(suffix_1.len(), suffix_2.len());
1035        for (x_1, &x_2) in shorts_1.iter_mut().zip(shorts_2) {
1036            *x_1 += x_2;
1037        }
1038        for (x_1, &x_2) in suffix_1.iter_mut().zip(suffix_2) {
1039            *x_1 += x_2;
1040        }
1041    }
1042
1043    /// The number of elements in the field.
1044    ///
1045    /// This will either be prime if the field is a PrimeField or a power of a
1046    /// prime if the field is an extension field.
1047    #[must_use]
1048    fn order() -> BigUint;
1049
1050    /// The number of bits required to define an element of this field.
1051    ///
1052    /// Usually due to storage and practical reasons the memory size of
1053    /// a field element will be a little larger than bits().
1054    #[must_use]
1055    #[inline]
1056    fn bits() -> usize {
1057        Self::order().bits() as usize
1058    }
1059}
1060
1061/// A field isomorphic to `ℤ/p` for some prime `p`.
1062///
1063/// There is a natural map from `ℤ` to `ℤ/p` which sends an integer `r` to its conjugacy class `[r]`.
1064/// Canonically, each conjugacy class `[r]` can be represented by the unique integer `s` in `[0, p - 1)`
1065/// satisfying `s = r mod p`. This however is often not the most convenient computational representation
1066/// and so internal representations of field elements might differ from this and may change over time.
1067pub trait PrimeField:
1068    Field
1069    + Ord
1070    + QuotientMap<u8>
1071    + QuotientMap<u16>
1072    + QuotientMap<u32>
1073    + QuotientMap<u64>
1074    + QuotientMap<u128>
1075    + QuotientMap<usize>
1076    + QuotientMap<i8>
1077    + QuotientMap<i16>
1078    + QuotientMap<i32>
1079    + QuotientMap<i64>
1080    + QuotientMap<i128>
1081    + QuotientMap<isize>
1082{
1083    /// Return the representative of `value` in canonical form
1084    /// which lies in the range `0 <= x < self.order()`.
1085    #[must_use]
1086    fn as_canonical_biguint(&self) -> BigUint;
1087}
1088
1089/// A prime field `ℤ/p` with order, `p < 2^64`.
1090pub trait PrimeField64: PrimeField {
1091    const ORDER_U64: u64;
1092
1093    /// Return the representative of `value` in canonical form
1094    /// which lies in the range `0 <= x < ORDER_U64`.
1095    #[must_use]
1096    fn as_canonical_u64(&self) -> u64;
1097
1098    /// Convert a field element to a `u64` such that any two field elements
1099    /// are converted to the same `u64` if and only if they represent the same value.
1100    ///
1101    /// This will be the fastest way to convert a field element to a `u64` and
1102    /// is intended for use in hashing. It will also be consistent across different targets.
1103    #[must_use]
1104    #[inline(always)]
1105    fn to_unique_u64(&self) -> u64 {
1106        // A simple default which is optimal for some fields.
1107        self.as_canonical_u64()
1108    }
1109}
1110
1111/// A prime field `ℤ/p` with order `p < 2^32`.
1112pub trait PrimeField32: PrimeField64 {
1113    const ORDER_U32: u32;
1114
1115    /// Return the representative of `value` in canonical form
1116    /// which lies in the range `0 <= x < ORDER_U64`.
1117    #[must_use]
1118    fn as_canonical_u32(&self) -> u32;
1119
1120    /// Convert a field element to a `u32` such that any two field elements
1121    /// are converted to the same `u32` if and only if they represent the same value.
1122    ///
1123    /// This will be the fastest way to convert a field element to a `u32` and
1124    /// is intended for use in hashing. It will also be consistent across different targets.
1125    #[must_use]
1126    #[inline(always)]
1127    fn to_unique_u32(&self) -> u32 {
1128        // A simple default which is optimal for some fields.
1129        self.as_canonical_u32()
1130    }
1131}
1132
1133/// A field `EF` which is also an algebra over a field `F`.
1134///
1135/// This provides a couple of convenience methods on top of the
1136/// standard methods provided by `Field`, `Algebra<F>` and `BasedVectorSpace<F>`.
1137///
1138/// It also provides a type which handles packed vectors of extension field elements.
1139pub trait ExtensionField<Base: Field>: Field + Algebra<Base> + BasedVectorSpace<Base> {
1140    type ExtensionPacking: PackedFieldExtension<Base, Self> + 'static + Copy + Send + Sync;
1141
1142    /// Determine if the given element lies in the base field.
1143    #[must_use]
1144    fn is_in_basefield(&self) -> bool;
1145
1146    /// If the element lies in the base field project it down.
1147    /// Otherwise return None.
1148    #[must_use]
1149    fn as_base(&self) -> Option<Base>;
1150}
1151
1152// Every field is trivially a one dimensional extension over itself.
1153impl<F: Field> ExtensionField<F> for F {
1154    type ExtensionPacking = F::Packing;
1155
1156    #[inline]
1157    fn is_in_basefield(&self) -> bool {
1158        true
1159    }
1160
1161    #[inline]
1162    fn as_base(&self) -> Option<F> {
1163        Some(*self)
1164    }
1165}
1166
1167/// A field which supplies information like the two-adicity of its multiplicative group, and methods
1168/// for obtaining two-adic generators.
1169pub trait TwoAdicField: Field {
1170    /// The number of factors of two in this field's multiplicative group.
1171    const TWO_ADICITY: usize;
1172
1173    /// Returns a generator of the multiplicative group of order `2^bits`.
1174    /// Assumes `bits <= TWO_ADICITY`, otherwise the result is undefined.
1175    #[must_use]
1176    fn two_adic_generator(bits: usize) -> Self;
1177}
1178
1179/// An iterator which returns the powers of a base element `b` shifted by current `c`: `c, c * b, c * b^2, ...`.
1180#[derive(Clone, Debug)]
1181pub struct Powers<R: PrimeCharacteristicRing> {
1182    pub base: R,
1183    pub current: R,
1184}
1185
1186impl<R: PrimeCharacteristicRing> Iterator for Powers<R> {
1187    type Item = R;
1188
1189    fn next(&mut self) -> Option<R> {
1190        let result = self.current.dup();
1191        self.current *= self.base.dup();
1192        Some(result)
1193    }
1194}
1195
1196impl<R: PrimeCharacteristicRing> Powers<R> {
1197    /// Returns an iterator yielding the first `n` powers.
1198    #[inline]
1199    #[must_use]
1200    pub const fn take(self, n: usize) -> BoundedPowers<R> {
1201        BoundedPowers { iter: self, n }
1202    }
1203
1204    /// Fills `slice` with the next `slice.len()` powers yielded by the iterator.
1205    #[inline]
1206    pub fn fill(self, slice: &mut [R]) {
1207        slice
1208            .iter_mut()
1209            .zip(self)
1210            .for_each(|(out, next)| *out = next);
1211    }
1212
1213    /// Wrapper for `self.take(n).collect()`.
1214    #[inline]
1215    #[must_use]
1216    pub fn collect_n(self, n: usize) -> Vec<R> {
1217        self.take(n).collect()
1218    }
1219}
1220
1221impl<F: Field> BoundedPowers<F> {
1222    /// Collect exactly `num_powers` ascending powers of `self.base`, starting at `self.current`.
1223    ///
1224    /// # Details
1225    ///
1226    /// The computation is split evenly amongst available threads, and each chunk is computed
1227    /// using packed fields.
1228    ///
1229    /// # Performance
1230    ///
1231    /// Enable the `parallel` feature to enable parallelization.
1232    #[must_use]
1233    pub fn collect(self) -> Vec<F> {
1234        let num_powers = self.n;
1235
1236        // When num_powers is small, fallback to serial computation
1237        if num_powers < 16 {
1238            return self.take(num_powers).collect();
1239        }
1240
1241        // Allocate buffer storing packed powers, containing at least `num_powers` scalars.
1242        let width = F::Packing::WIDTH;
1243        let num_packed = num_powers.div_ceil(width);
1244        let mut points_packed = F::Packing::zero_vec(num_packed);
1245
1246        // Split computation evenly among threads
1247        let num_threads = current_num_threads().max(1);
1248        let chunk_size = num_packed.div_ceil(num_threads);
1249
1250        // Precompute base for each chunk.
1251        let base = self.iter.base;
1252        let chunk_base = base.exp_u64((chunk_size * width) as u64);
1253        let shift = self.iter.current;
1254
1255        points_packed
1256            .par_chunks_mut(chunk_size)
1257            .enumerate()
1258            .for_each(|(chunk_idx, chunk_slice)| {
1259                // First power in this chunk
1260                let chunk_start = shift * chunk_base.exp_u64(chunk_idx as u64);
1261
1262                // Fill the chunk with packed powers.
1263                F::Packing::packed_shifted_powers(base, chunk_start).fill(chunk_slice);
1264            });
1265
1266        // return the number of requested points, discarding the unused packed powers
1267        // SAFETY: size_of::<F::Packing> always divides size_of::<F::Packing>.
1268        let mut points = unsafe { flatten_to_base(points_packed) };
1269        points.truncate(num_powers);
1270        points
1271    }
1272}
1273
1274/// Same as [`Powers`], but returns a bounded number of powers.
1275#[derive(Clone, Debug)]
1276pub struct BoundedPowers<R: PrimeCharacteristicRing> {
1277    iter: Powers<R>,
1278    n: usize,
1279}
1280
1281impl<R: PrimeCharacteristicRing> Iterator for BoundedPowers<R> {
1282    type Item = R;
1283
1284    fn next(&mut self) -> Option<R> {
1285        (self.n != 0).then(|| {
1286            self.n -= 1;
1287            self.iter.next().unwrap()
1288        })
1289    }
1290}