p3_goldilocks/
lib.rs

1//! The prime field known as Goldilocks, defined as `F_p` where `p = 2^64 - 2^32 + 1`.
2
3#![no_std]
4
5extern crate alloc;
6
7mod extension;
8mod mds;
9mod poseidon2;
10
11use core::fmt;
12use core::fmt::{Debug, Display, Formatter};
13use core::hash::{Hash, Hasher};
14use core::iter::{Product, Sum};
15use core::ops::{Add, AddAssign, Div, Mul, MulAssign, Neg, Sub, SubAssign};
16
17pub use mds::*;
18use num_bigint::BigUint;
19use p3_field::{
20    exp_10540996611094048183, exp_u64_by_squaring, halve_u64, AbstractField, Field, Packable,
21    PrimeField, PrimeField64, TwoAdicField,
22};
23use p3_util::{assume, branch_hint};
24pub use poseidon2::*;
25use rand::distributions::{Distribution, Standard};
26use rand::Rng;
27use serde::{Deserialize, Serialize};
28
29/// The Goldilocks prime
30const P: u64 = 0xFFFF_FFFF_0000_0001;
31
32/// The prime field known as Goldilocks, defined as `F_p` where `p = 2^64 - 2^32 + 1`.
33#[derive(Copy, Clone, Default, Serialize, Deserialize)]
34pub struct Goldilocks {
35    /// Not necessarily canonical.
36    value: u64,
37}
38
39impl Goldilocks {
40    const fn new(value: u64) -> Self {
41        Self { value }
42    }
43
44    /// Two's complement of `ORDER`, i.e. `2^64 - ORDER = 2^32 - 1`.
45    const NEG_ORDER: u64 = Self::ORDER_U64.wrapping_neg();
46}
47
48impl PartialEq for Goldilocks {
49    fn eq(&self, other: &Self) -> bool {
50        self.as_canonical_u64() == other.as_canonical_u64()
51    }
52}
53
54impl Eq for Goldilocks {}
55
56impl Packable for Goldilocks {}
57
58impl Hash for Goldilocks {
59    fn hash<H: Hasher>(&self, state: &mut H) {
60        state.write_u64(self.as_canonical_u64());
61    }
62}
63
64impl Ord for Goldilocks {
65    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
66        self.as_canonical_u64().cmp(&other.as_canonical_u64())
67    }
68}
69
70impl PartialOrd for Goldilocks {
71    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
72        Some(self.cmp(other))
73    }
74}
75
76impl Display for Goldilocks {
77    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
78        Display::fmt(&self.value, f)
79    }
80}
81
82impl Debug for Goldilocks {
83    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
84        Debug::fmt(&self.value, f)
85    }
86}
87
88impl Distribution<Goldilocks> for Standard {
89    fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> Goldilocks {
90        loop {
91            let next_u64 = rng.next_u64();
92            let is_canonical = next_u64 < Goldilocks::ORDER_U64;
93            if is_canonical {
94                return Goldilocks::new(next_u64);
95            }
96        }
97    }
98}
99
100impl AbstractField for Goldilocks {
101    type F = Self;
102
103    fn zero() -> Self {
104        Self::new(0)
105    }
106    fn one() -> Self {
107        Self::new(1)
108    }
109    fn two() -> Self {
110        Self::new(2)
111    }
112    fn neg_one() -> Self {
113        Self::new(Self::ORDER_U64 - 1)
114    }
115
116    #[inline]
117    fn from_f(f: Self::F) -> Self {
118        f
119    }
120
121    fn from_bool(b: bool) -> Self {
122        Self::new(u64::from(b))
123    }
124
125    fn from_canonical_u8(n: u8) -> Self {
126        Self::new(u64::from(n))
127    }
128
129    fn from_canonical_u16(n: u16) -> Self {
130        Self::new(u64::from(n))
131    }
132
133    fn from_canonical_u32(n: u32) -> Self {
134        Self::new(u64::from(n))
135    }
136
137    fn from_canonical_u64(n: u64) -> Self {
138        Self::new(n)
139    }
140
141    fn from_canonical_usize(n: usize) -> Self {
142        Self::new(n as u64)
143    }
144
145    fn from_wrapped_u32(n: u32) -> Self {
146        // A u32 must be canonical, plus we don't store canonical encodings anyway, so there's no
147        // need for a reduction.
148        Self::new(u64::from(n))
149    }
150
151    fn from_wrapped_u64(n: u64) -> Self {
152        // There's no need to reduce `n` to canonical form, as our internal encoding is
153        // non-canonical, so there's no need for a reduction.
154        Self::new(n)
155    }
156
157    // Sage: GF(2^64 - 2^32 + 1).multiplicative_generator()
158    fn generator() -> Self {
159        Self::new(7)
160    }
161}
162
163impl Field for Goldilocks {
164    // TODO: Add cfg-guarded Packing for AVX2, NEON, etc.
165    type Packing = Self;
166
167    fn is_zero(&self) -> bool {
168        self.value == 0 || self.value == Self::ORDER_U64
169    }
170
171    #[inline]
172    fn exp_u64_generic<AF: AbstractField<F = Self>>(val: AF, power: u64) -> AF {
173        match power {
174            10540996611094048183 => exp_10540996611094048183(val), // used to compute x^{1/7}
175            _ => exp_u64_by_squaring(val, power),
176        }
177    }
178
179    fn try_inverse(&self) -> Option<Self> {
180        if self.is_zero() {
181            return None;
182        }
183
184        // From Fermat's little theorem, in a prime field `F_p`, the inverse of `a` is `a^(p-2)`.
185        //
186        // compute a^(p - 2) using 72 multiplications
187        // The exponent p - 2 is represented in binary as:
188        // 0b1111111111111111111111111111111011111111111111111111111111111111
189        // Adapted from: https://github.com/facebook/winterfell/blob/d238a1/math/src/field/f64/mod.rs#L136-L164
190
191        // compute base^11
192        let t2 = self.square() * *self;
193
194        // compute base^111
195        let t3 = t2.square() * *self;
196
197        // compute base^111111 (6 ones)
198        // repeatedly square t3 3 times and multiply by t3
199        let t6 = exp_acc::<3>(t3, t3);
200        let t60 = t6.square();
201        let t7 = t60 * *self;
202
203        // compute base^111111111111 (12 ones)
204        // repeatedly square t6 6 times and multiply by t6
205        let t12 = exp_acc::<5>(t60, t6);
206
207        // compute base^111111111111111111111111 (24 ones)
208        // repeatedly square t12 12 times and multiply by t12
209        let t24 = exp_acc::<12>(t12, t12);
210
211        // compute base^1111111111111111111111111111111 (31 ones)
212        // repeatedly square t24 6 times and multiply by t6 first. then square t30 and
213        // multiply by base
214        let t31 = exp_acc::<7>(t24, t7);
215
216        // compute base^111111111111111111111111111111101111111111111111111111111111111
217        // repeatedly square t31 32 times and multiply by t31
218        let t63 = exp_acc::<32>(t31, t31);
219
220        // compute base^1111111111111111111111111111111011111111111111111111111111111111
221        Some(t63.square() * *self)
222    }
223
224    #[inline]
225    fn halve(&self) -> Self {
226        Goldilocks::new(halve_u64::<P>(self.value))
227    }
228
229    #[inline]
230    fn order() -> BigUint {
231        P.into()
232    }
233}
234
235impl PrimeField for Goldilocks {
236    fn as_canonical_biguint(&self) -> BigUint {
237        <Self as PrimeField64>::as_canonical_u64(self).into()
238    }
239}
240
241impl PrimeField64 for Goldilocks {
242    const ORDER_U64: u64 = P;
243
244    #[inline]
245    fn as_canonical_u64(&self) -> u64 {
246        let mut c = self.value;
247        // We only need one condition subtraction, since 2 * ORDER would not fit in a u64.
248        if c >= Self::ORDER_U64 {
249            c -= Self::ORDER_U64;
250        }
251        c
252    }
253}
254
255impl TwoAdicField for Goldilocks {
256    const TWO_ADICITY: usize = 32;
257
258    fn two_adic_generator(bits: usize) -> Self {
259        // TODO: Consider a `match` which may speed this up.
260        assert!(bits <= Self::TWO_ADICITY);
261        let base = Self::new(1_753_635_133_440_165_772); // generates the whole 2^TWO_ADICITY group
262        base.exp_power_of_2(Self::TWO_ADICITY - bits)
263    }
264}
265
266impl Add for Goldilocks {
267    type Output = Self;
268
269    fn add(self, rhs: Self) -> Self {
270        let (sum, over) = self.value.overflowing_add(rhs.value);
271        let (mut sum, over) = sum.overflowing_add(u64::from(over) * Self::NEG_ORDER);
272        if over {
273            // NB: self.value > Self::ORDER && rhs.value > Self::ORDER is necessary but not
274            // sufficient for double-overflow.
275            // This assume does two things:
276            //  1. If compiler knows that either self.value or rhs.value <= ORDER, then it can skip
277            //     this check.
278            //  2. Hints to the compiler how rare this double-overflow is (thus handled better with
279            //     a branch).
280            assume(self.value > Self::ORDER_U64 && rhs.value > Self::ORDER_U64);
281            branch_hint();
282            sum += Self::NEG_ORDER; // Cannot overflow.
283        }
284        Self::new(sum)
285    }
286}
287
288impl AddAssign for Goldilocks {
289    fn add_assign(&mut self, rhs: Self) {
290        *self = *self + rhs;
291    }
292}
293
294impl Sum for Goldilocks {
295    fn sum<I: Iterator<Item = Self>>(iter: I) -> Self {
296        // This is faster than iter.reduce(|x, y| x + y).unwrap_or(Self::zero()) for iterators of length > 2.
297
298        // This sum will not overflow so long as iter.len() < 2^64.
299        let sum = iter.map(|x| x.value as u128).sum::<u128>();
300        reduce128(sum)
301    }
302}
303
304impl Sub for Goldilocks {
305    type Output = Self;
306
307    fn sub(self, rhs: Self) -> Self {
308        let (diff, under) = self.value.overflowing_sub(rhs.value);
309        let (mut diff, under) = diff.overflowing_sub(u64::from(under) * Self::NEG_ORDER);
310        if under {
311            // NB: self.value < NEG_ORDER - 1 && rhs.value > ORDER is necessary but not
312            // sufficient for double-underflow.
313            // This assume does two things:
314            //  1. If compiler knows that either self.value >= NEG_ORDER - 1 or rhs.value <= ORDER,
315            //     then it can skip this check.
316            //  2. Hints to the compiler how rare this double-underflow is (thus handled better
317            //     with a branch).
318            assume(self.value < Self::NEG_ORDER - 1 && rhs.value > Self::ORDER_U64);
319            branch_hint();
320            diff -= Self::NEG_ORDER; // Cannot underflow.
321        }
322        Self::new(diff)
323    }
324}
325
326impl SubAssign for Goldilocks {
327    fn sub_assign(&mut self, rhs: Self) {
328        *self = *self - rhs;
329    }
330}
331
332impl Neg for Goldilocks {
333    type Output = Self;
334
335    fn neg(self) -> Self::Output {
336        Self::new(Self::ORDER_U64 - self.as_canonical_u64())
337    }
338}
339
340impl Mul for Goldilocks {
341    type Output = Self;
342
343    fn mul(self, rhs: Self) -> Self {
344        reduce128(u128::from(self.value) * u128::from(rhs.value))
345    }
346}
347
348impl MulAssign for Goldilocks {
349    fn mul_assign(&mut self, rhs: Self) {
350        *self = *self * rhs;
351    }
352}
353
354impl Product for Goldilocks {
355    fn product<I: Iterator<Item = Self>>(iter: I) -> Self {
356        iter.reduce(|x, y| x * y).unwrap_or(Self::one())
357    }
358}
359
360impl Div for Goldilocks {
361    type Output = Self;
362
363    #[allow(clippy::suspicious_arithmetic_impl)]
364    fn div(self, rhs: Self) -> Self {
365        self * rhs.inverse()
366    }
367}
368
369/// Squares the base N number of times and multiplies the result by the tail value.
370#[inline(always)]
371fn exp_acc<const N: usize>(base: Goldilocks, tail: Goldilocks) -> Goldilocks {
372    base.exp_power_of_2(N) * tail
373}
374
375/// Reduces to a 64-bit value. The result might not be in canonical form; it could be in between the
376/// field order and `2^64`.
377#[inline]
378pub(crate) fn reduce128(x: u128) -> Goldilocks {
379    let (x_lo, x_hi) = split(x); // This is a no-op
380    let x_hi_hi = x_hi >> 32;
381    let x_hi_lo = x_hi & Goldilocks::NEG_ORDER;
382
383    let (mut t0, borrow) = x_lo.overflowing_sub(x_hi_hi);
384    if borrow {
385        branch_hint(); // A borrow is exceedingly rare. It is faster to branch.
386        t0 -= Goldilocks::NEG_ORDER; // Cannot underflow.
387    }
388    let t1 = x_hi_lo * Goldilocks::NEG_ORDER;
389    let t2 = unsafe { add_no_canonicalize_trashing_input(t0, t1) };
390    Goldilocks::new(t2)
391}
392
393#[inline]
394#[allow(clippy::cast_possible_truncation)]
395const fn split(x: u128) -> (u64, u64) {
396    (x as u64, (x >> 64) as u64)
397}
398
399/// Fast addition modulo ORDER for x86-64.
400/// This function is marked unsafe for the following reasons:
401///   - It is only correct if x + y < 2**64 + ORDER = 0x1ffffffff00000001.
402///   - It is only faster in some circumstances. In particular, on x86 it overwrites both inputs in
403///     the registers, so its use is not recommended when either input will be used again.
404#[inline(always)]
405#[cfg(target_arch = "x86_64")]
406unsafe fn add_no_canonicalize_trashing_input(x: u64, y: u64) -> u64 {
407    let res_wrapped: u64;
408    let adjustment: u64;
409    core::arch::asm!(
410        "add {0}, {1}",
411        // Trick. The carry flag is set iff the addition overflowed.
412        // sbb x, y does x := x - y - CF. In our case, x and y are both {1:e}, so it simply does
413        // {1:e} := 0xffffffff on overflow and {1:e} := 0 otherwise. {1:e} is the low 32 bits of
414        // {1}; the high 32-bits are zeroed on write. In the end, we end up with 0xffffffff in {1}
415        // on overflow; this happens be NEG_ORDER.
416        // Note that the CPU does not realize that the result of sbb x, x does not actually depend
417        // on x. We must write the result to a register that we know to be ready. We have a
418        // dependency on {1} anyway, so let's use it.
419        "sbb {1:e}, {1:e}",
420        inlateout(reg) x => res_wrapped,
421        inlateout(reg) y => adjustment,
422        options(pure, nomem, nostack),
423    );
424    assume(x != 0 || (res_wrapped == y && adjustment == 0));
425    assume(y != 0 || (res_wrapped == x && adjustment == 0));
426    // Add NEG_ORDER == subtract ORDER.
427    // Cannot overflow unless the assumption if x + y < 2**64 + ORDER is incorrect.
428    res_wrapped + adjustment
429}
430
431#[inline(always)]
432#[cfg(not(target_arch = "x86_64"))]
433unsafe fn add_no_canonicalize_trashing_input(x: u64, y: u64) -> u64 {
434    let (res_wrapped, carry) = x.overflowing_add(y);
435    // Below cannot overflow unless the assumption if x + y < 2**64 + ORDER is incorrect.
436    res_wrapped + Goldilocks::NEG_ORDER * u64::from(carry)
437}
438
439/// Convert a constant u64 array into a constant Goldilocks array.
440#[inline]
441#[must_use]
442pub(crate) const fn to_goldilocks_array<const N: usize>(input: [u64; N]) -> [Goldilocks; N] {
443    let mut output = [Goldilocks { value: 0 }; N];
444    let mut i = 0;
445    loop {
446        if i == N {
447            break;
448        }
449        output[i].value = input[i];
450        i += 1;
451    }
452    output
453}
454
455#[cfg(test)]
456mod tests {
457    use p3_field_testing::{test_field, test_two_adic_field};
458
459    use super::*;
460
461    type F = Goldilocks;
462
463    #[test]
464    fn test_goldilocks() {
465        let f = F::new(100);
466        assert_eq!(f.as_canonical_u64(), 100);
467
468        // Over the Goldilocks field, the following set of equations hold
469        // p               = 0
470        // 2^64 - 2^32 + 1 = 0
471        // 2^64            = 2^32 - 1
472        let f = F::new(u64::MAX);
473        assert_eq!(f.as_canonical_u64(), u32::MAX as u64 - 1);
474
475        let f = F::from_canonical_u64(u64::MAX);
476        assert_eq!(f.as_canonical_u64(), u32::MAX as u64 - 1);
477
478        let f = F::from_canonical_u64(0);
479        assert!(f.is_zero());
480
481        let f = F::from_canonical_u64(F::ORDER_U64);
482        assert!(f.is_zero());
483
484        assert_eq!(F::generator().as_canonical_u64(), 7_u64);
485
486        let f_1 = F::new(1);
487        let f_1_copy = F::new(1);
488
489        let expected_result = F::zero();
490        assert_eq!(f_1 - f_1_copy, expected_result);
491
492        let expected_result = F::new(2);
493        assert_eq!(f_1 + f_1_copy, expected_result);
494
495        let f_2 = F::new(2);
496        let expected_result = F::new(3);
497        assert_eq!(f_1 + f_1_copy * f_2, expected_result);
498
499        let expected_result = F::new(5);
500        assert_eq!(f_1 + f_2 * f_2, expected_result);
501
502        let f_p_minus_1 = F::from_canonical_u64(F::ORDER_U64 - 1);
503        let expected_result = F::zero();
504        assert_eq!(f_1 + f_p_minus_1, expected_result);
505
506        let f_p_minus_2 = F::from_canonical_u64(F::ORDER_U64 - 2);
507        let expected_result = F::from_canonical_u64(F::ORDER_U64 - 3);
508        assert_eq!(f_p_minus_1 + f_p_minus_2, expected_result);
509
510        let expected_result = F::new(1);
511        assert_eq!(f_p_minus_1 - f_p_minus_2, expected_result);
512
513        let expected_result = f_p_minus_1;
514        assert_eq!(f_p_minus_2 - f_p_minus_1, expected_result);
515
516        let expected_result = f_p_minus_2;
517        assert_eq!(f_p_minus_1 - f_1, expected_result);
518
519        let expected_result = F::new(3);
520        assert_eq!(f_2 * f_2 - f_1, expected_result);
521
522        // Generator check
523        let expected_multiplicative_group_generator = F::new(7);
524        assert_eq!(F::generator(), expected_multiplicative_group_generator);
525
526        // Check on `reduce_u128`
527        let x = u128::MAX;
528        let y = reduce128(x);
529        // The following equalitiy sequence holds, modulo p = 2^64 - 2^32 + 1
530        // 2^128 - 1 = (2^64 - 1) * (2^64 + 1)
531        //           = (2^32 - 1 - 1) * (2^32 - 1 + 1)
532        //           = (2^32 - 2) * (2^32)
533        //           = 2^64 - 2 * 2^32
534        //           = 2^64 - 2^33
535        //           = 2^32 - 1 - 2^33
536        //           = - 2^32 - 1
537        let expected_result = -F::new(2_u64.pow(32)) - F::new(1);
538        assert_eq!(y, expected_result);
539
540        assert_eq!(f.exp_u64(10540996611094048183).exp_const_u64::<7>(), f);
541        assert_eq!(y.exp_u64(10540996611094048183).exp_const_u64::<7>(), y);
542        assert_eq!(f_2.exp_u64(10540996611094048183).exp_const_u64::<7>(), f_2);
543    }
544
545    test_field!(crate::Goldilocks);
546    test_two_adic_field!(crate::Goldilocks);
547}
p3_goldilocks/lib.rs

p3_goldilocks/
lib.rs