Skip to main content

p3_goldilocks/
goldilocks.rs

1use alloc::vec;
2use alloc::vec::Vec;
3use core::fmt::{Debug, Display, Formatter};
4use core::hash::{Hash, Hasher};
5use core::iter::{Product, Sum};
6use core::ops::{Add, AddAssign, Div, DivAssign, Mul, MulAssign, Neg, Sub, SubAssign};
7use core::{array, fmt};
8
9use num_bigint::BigUint;
10use p3_challenger::UniformSamplingField;
11use p3_field::exponentiation::exp_10540996611094048183;
12use p3_field::integers::QuotientMap;
13use p3_field::op_assign_macros::{
14    impl_add_assign, impl_div_methods, impl_mul_methods, impl_sub_assign,
15};
16use p3_field::{
17    Field, InjectiveMonomial, Packable, PermutationMonomial, PrimeCharacteristicRing, PrimeField,
18    PrimeField64, RawDataSerializable, TwoAdicField, impl_raw_serializable_primefield64,
19    quotient_map_large_iint, quotient_map_large_uint, quotient_map_small_int,
20};
21use p3_util::{assume, branch_hint, flatten_to_base, gcd_inner};
22use rand::Rng;
23use rand::distr::{Distribution, StandardUniform};
24use serde::de::Error;
25use serde::{Deserialize, Deserializer, Serialize};
26
27/// The Goldilocks prime
28pub(crate) const P: u64 = 0xFFFF_FFFF_0000_0001;
29
30/// The prime field known as Goldilocks, defined as `F_p` where `p = 2^64 - 2^32 + 1`.
31///
32/// The serde encoding is canonical: every field element has exactly one valid byte representation.
33#[derive(Copy, Clone, Default)]
34#[repr(transparent)] // Important for reasoning about memory layout
35#[must_use]
36pub struct Goldilocks {
37    /// Not necessarily canonical.
38    pub(crate) value: u64,
39}
40
41impl Serialize for Goldilocks {
42    fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
43        // Emit the canonical representative so every field element has one encoding.
44        serializer.serialize_u64(self.as_canonical_u64())
45    }
46}
47
48impl<'de> Deserialize<'de> for Goldilocks {
49    fn deserialize<D: Deserializer<'de>>(d: D) -> Result<Self, D::Error> {
50        let val = u64::deserialize(d)?;
51        // Reject non-canonical encodings so a proof cannot be re-encoded without the witness.
52        if val < P {
53            Ok(Self::new(val))
54        } else {
55            Err(D::Error::custom("Goldilocks value is out of range"))
56        }
57    }
58}
59
60impl Goldilocks {
61    /// Create a new field element from any `u64`.
62    ///
63    /// Any `u64` value is accepted. No reduction is performed since
64    /// Goldilocks uses a non-canonical internal representation.
65    #[inline]
66    pub const fn new(value: u64) -> Self {
67        Self { value }
68    }
69
70    /// Convert a `[u64; N]` array to an array of field elements.
71    ///
72    /// Const version of `input.map(Goldilocks::new)`.
73    #[inline]
74    pub const fn new_array<const N: usize>(input: [u64; N]) -> [Self; N] {
75        let mut output = [Self::ZERO; N];
76        let mut i = 0;
77        while i < N {
78            output[i].value = input[i];
79            i += 1;
80        }
81        output
82    }
83
84    /// Convert a `[[u64; N]; M]` array to a 2D array of field elements.
85    ///
86    /// Const version of `input.map(Goldilocks::new_array)`.
87    #[inline]
88    pub const fn new_2d_array<const N: usize, const M: usize>(
89        input: [[u64; N]; M],
90    ) -> [[Self; N]; M] {
91        let mut output = [[Self::ZERO; N]; M];
92        let mut i = 0;
93        while i < M {
94            output[i] = Self::new_array(input[i]);
95            i += 1;
96        }
97        output
98    }
99
100    /// Two's complement of `ORDER`, i.e. `2^64 - ORDER = 2^32 - 1`.
101    const NEG_ORDER: u64 = Self::ORDER_U64.wrapping_neg();
102
103    /// A list of generators for the two-adic subgroups of the goldilocks field.
104    ///
105    /// These satisfy the properties that `TWO_ADIC_GENERATORS[0] = 1` and `TWO_ADIC_GENERATORS[i+1]^2 = TWO_ADIC_GENERATORS[i]`.
106    pub const TWO_ADIC_GENERATORS: [Self; 33] = Self::new_array([
107        0x0000000000000001,
108        0xffffffff00000000,
109        0x0001000000000000,
110        0xfffffffeff000001,
111        0xefffffff00000001,
112        0x00003fffffffc000,
113        0x0000008000000000,
114        0xf80007ff08000001,
115        0xbf79143ce60ca966,
116        0x1905d02a5c411f4e,
117        0x9d8f2ad78bfed972,
118        0x0653b4801da1c8cf,
119        0xf2c35199959dfcb6,
120        0x1544ef2335d17997,
121        0xe0ee099310bba1e2,
122        0xf6b2cffe2306baac,
123        0x54df9630bf79450e,
124        0xabd0a6e8aa3d8a0e,
125        0x81281a7b05f9beac,
126        0xfbd41c6b8caa3302,
127        0x30ba2ecd5e93e76d,
128        0xf502aef532322654,
129        0x4b2a18ade67246b5,
130        0xea9d5a1336fbc98b,
131        0x86cdcc31c307e171,
132        0x4bbaf5976ecfefd8,
133        0xed41d05b78d6e286,
134        0x10d78dd8915a171d,
135        0x59049500004a4485,
136        0xdfa8c93ba46d2666,
137        0x7e9bd009b86a0845,
138        0x400a7f755588e659,
139        0x185629dcda58878c,
140    ]);
141
142    /// A list of powers of two from 0 to 95.
143    ///
144    /// Note that 2^{96} = -1 mod P so all powers of two can be simply
145    /// derived from this list.
146    const POWERS_OF_TWO: [Self; 96] = {
147        let mut powers_of_two = [Self::ONE; 96];
148
149        let mut i = 1;
150        while i < 64 {
151            powers_of_two[i] = Self::new(1 << i);
152            i += 1;
153        }
154        let mut var = Self::new(1 << 63);
155        while i < 96 {
156            var = const_add(var, var);
157            powers_of_two[i] = var;
158            i += 1;
159        }
160        powers_of_two
161    };
162}
163
164impl PartialEq for Goldilocks {
165    fn eq(&self, other: &Self) -> bool {
166        self.as_canonical_u64() == other.as_canonical_u64()
167    }
168}
169
170impl Eq for Goldilocks {}
171
172impl Packable for Goldilocks {}
173
174impl Hash for Goldilocks {
175    fn hash<H: Hasher>(&self, state: &mut H) {
176        state.write_u64(self.as_canonical_u64());
177    }
178}
179
180impl Ord for Goldilocks {
181    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
182        self.as_canonical_u64().cmp(&other.as_canonical_u64())
183    }
184}
185
186impl PartialOrd for Goldilocks {
187    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
188        Some(self.cmp(other))
189    }
190}
191
192impl Display for Goldilocks {
193    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
194        Display::fmt(&self.as_canonical_u64(), f)
195    }
196}
197
198impl Debug for Goldilocks {
199    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
200        Debug::fmt(&self.as_canonical_u64(), f)
201    }
202}
203
204impl Distribution<Goldilocks> for StandardUniform {
205    fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> Goldilocks {
206        loop {
207            let next_u64 = rng.next_u64();
208            let is_canonical = next_u64 < Goldilocks::ORDER_U64;
209            if is_canonical {
210                return Goldilocks::new(next_u64);
211            }
212        }
213    }
214}
215
216impl UniformSamplingField for Goldilocks {
217    const MAX_SINGLE_SAMPLE_BITS: usize = 24;
218    const SAMPLING_BITS_M: [u64; 64] = {
219        let prime: u64 = P;
220        let mut a = [0u64; 64];
221        let mut k = 0;
222        while k < 64 {
223            if k == 0 {
224                a[k] = prime; // This value is irrelevant in practice. `bits = 0` returns 0 always.
225            } else {
226                // Create a mask to zero out the last k bits
227                let mask = !((1u64 << k) - 1);
228                a[k] = prime & mask;
229            }
230            k += 1;
231        }
232        a
233    };
234}
235
236impl PrimeCharacteristicRing for Goldilocks {
237    type PrimeSubfield = Self;
238
239    const ZERO: Self = Self::new(0);
240    const ONE: Self = Self::new(1);
241    const TWO: Self = Self::new(2);
242    const NEG_ONE: Self = Self::new(Self::ORDER_U64 - 1);
243
244    #[inline]
245    fn from_prime_subfield(f: Self::PrimeSubfield) -> Self {
246        f
247    }
248
249    #[inline]
250    fn from_bool(b: bool) -> Self {
251        Self::new(b.into())
252    }
253
254    #[inline]
255    fn halve(&self) -> Self {
256        // Branchless halving: x/2 = (x >> 1) + ((x & 1) * (p+1)/2).
257        // When x is odd, add (p+1)/2 to compensate for the lost bit.
258        // Uses mask arithmetic to avoid the 50/50 unpredictable branch.
259        const HALF_P_PLUS_1: u64 = (P + 1) >> 1; // 0x7FFFFFFF80000001
260        let lo_bit = self.value & 1;
261        let half = self.value >> 1;
262        let mask = 0u64.wrapping_sub(lo_bit); // all-ones when odd, zero when even
263        Self::new(half.wrapping_add(mask & HALF_P_PLUS_1))
264    }
265
266    #[inline]
267    fn mul_2exp_u64(&self, exp: u64) -> Self {
268        // In the Goldilocks field, 2^96 = -1 mod P and 2^192 = 1 mod P.
269        match exp {
270            0 => *self,
271            1 => *self + *self,
272            _ => {
273                if exp < 96 {
274                    *self * Self::POWERS_OF_TWO[exp as usize]
275                } else if exp < 192 {
276                    -*self * Self::POWERS_OF_TWO[(exp - 96) as usize]
277                } else {
278                    self.mul_2exp_u64(exp % 192)
279                }
280            }
281        }
282    }
283
284    #[inline]
285    fn div_2exp_u64(&self, mut exp: u64) -> Self {
286        // In the goldilocks field, 2^192 = 1 mod P.
287        // Thus 2^{-n} = 2^{192 - n} mod P.
288        exp %= 192;
289        match exp {
290            0 => *self,
291            1 => self.halve(),
292            _ => self.mul_2exp_u64(192 - exp),
293        }
294    }
295
296    #[inline]
297    fn sum_array<const N: usize>(input: &[Self]) -> Self {
298        assert_eq!(N, input.len());
299        // Benchmarking shows that for N <= 3 it's faster to sum the elements directly
300        // but for N > 3 it's faster to use the .sum() methods which passes through u128's
301        // allowing for delayed reductions.
302        match N {
303            0 => Self::ZERO,
304            1 => input[0],
305            2 => input[0] + input[1],
306            3 => input[0] + input[1] + input[2],
307            _ => input.iter().copied().sum(),
308        }
309    }
310
311    #[inline]
312    fn dot_product<const N: usize>(lhs: &[Self; N], rhs: &[Self; N]) -> Self {
313        // The constant OFFSET has 2 important properties:
314        // 1. It is a multiple of P.
315        // 2. It is greater than the maximum possible value of the sum of the products of two u64s.
316        const OFFSET: u128 = ((P as u128) << 64) - (P as u128) + ((P as u128) << 32);
317        const {
318            assert!((N as u32) <= (1 << 31));
319        }
320        match N {
321            0 => Self::ZERO,
322            1 => lhs[0] * rhs[0],
323            2 => {
324                // We unroll the N = 2 case as it is slightly faster and this is an important case
325                // as a major use is in extension field arithmetic and Goldilocks has a degree 2 extension.
326                let long_prod_0 = (lhs[0].value as u128) * (rhs[0].value as u128);
327                let long_prod_1 = (lhs[1].value as u128) * (rhs[1].value as u128);
328
329                // We know that long_prod_0, long_prod_1 < OFFSET.
330                // Thus if long_prod_0 + long_prod_1 overflows, we can just subtract OFFSET.
331                let (sum, over) = long_prod_0.overflowing_add(long_prod_1);
332                // Compiler really likes defining sum_corr here instead of in the if/else.
333                let sum_corr = sum.wrapping_sub(OFFSET);
334                if over {
335                    reduce128(sum_corr)
336                } else {
337                    reduce128(sum)
338                }
339            }
340            _ => {
341                let (lo_plus_hi, hi) = lhs
342                    .iter()
343                    .zip(rhs)
344                    .map(|(x, y)| (x.value as u128) * (y.value as u128))
345                    .fold((0_u128, 0_u64), |(acc_lo, acc_hi), val| {
346                        // Split val into (hi, lo) where hi is the upper 32 bits and lo is the lower 96 bits.
347                        let val_hi = (val >> 96) as u64;
348                        // acc_hi accumulates hi, acc_lo accumulates lo + 2^{96}hi.
349                        // As N <= 2^32, acc_hi cannot overflow.
350                        unsafe { (acc_lo.wrapping_add(val), acc_hi.unchecked_add(val_hi)) }
351                    });
352                // First, remove the hi part from lo_plus_hi.
353                let lo = lo_plus_hi.wrapping_sub((hi as u128) << 96);
354                // As 2^{96} = -1 mod P, we simply need to reduce lo - hi.
355                // As N <= 2^31, lo < 2^127 and hi < 2^63 < P. Hence the equation below will not over or underflow.
356                let sum = unsafe { lo.unchecked_add(P.unchecked_sub(hi) as u128) };
357                reduce128(sum)
358            }
359        }
360    }
361
362    #[inline]
363    fn zero_vec(len: usize) -> Vec<Self> {
364        // SAFETY:
365        // Due to `#[repr(transparent)]`, Goldilocks and u64 have the same size, alignment
366        // and memory layout making `flatten_to_base` safe. This will create
367        // a vector of Goldilocks elements with value set to 0.
368        unsafe { flatten_to_base(vec![0u64; len]) }
369    }
370}
371
372/// Degree of the smallest permutation polynomial for Goldilocks.
373///
374/// As p - 1 = 2^32 * 3 * 5 * 17 * ... the smallest choice for a degree D satisfying gcd(p - 1, D) = 1 is 7.
375impl InjectiveMonomial<7> for Goldilocks {}
376
377impl PermutationMonomial<7> for Goldilocks {
378    /// In the field `Goldilocks`, `a^{1/7}` is equal to a^{10540996611094048183}.
379    ///
380    /// This follows from the calculation `7*10540996611094048183 = 4*(2^64 - 2**32) + 1 = 1 mod (p - 1)`.
381    fn injective_exp_root_n(&self) -> Self {
382        exp_10540996611094048183(*self)
383    }
384}
385
386impl RawDataSerializable for Goldilocks {
387    impl_raw_serializable_primefield64!();
388}
389
390impl Field for Goldilocks {
391    #[cfg(all(
392        target_arch = "x86_64",
393        target_feature = "avx2",
394        not(target_feature = "avx512f")
395    ))]
396    type Packing = crate::PackedGoldilocksAVX2;
397
398    #[cfg(all(target_arch = "x86_64", target_feature = "avx512f"))]
399    type Packing = crate::PackedGoldilocksAVX512;
400
401    #[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
402    type Packing = crate::PackedGoldilocksNeon;
403
404    #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
405    type Packing = crate::PackedGoldilocksWasmSimd128;
406
407    #[cfg(not(any(
408        all(
409            target_arch = "x86_64",
410            target_feature = "avx2",
411            not(target_feature = "avx512f")
412        ),
413        all(target_arch = "x86_64", target_feature = "avx512f"),
414        target_arch = "aarch64",
415        all(target_arch = "wasm32", target_feature = "simd128"),
416    )))]
417    type Packing = Self;
418
419    // Sage: GF(2^64 - 2^32 + 1).multiplicative_generator()
420    const GENERATOR: Self = Self::new(7);
421
422    fn is_zero(&self) -> bool {
423        self.value == 0 || self.value == Self::ORDER_U64
424    }
425
426    fn try_inverse(&self) -> Option<Self> {
427        if self.is_zero() {
428            return None;
429        }
430
431        Some(gcd_inversion(*self))
432    }
433
434    #[inline]
435    fn order() -> BigUint {
436        P.into()
437    }
438}
439
440// We use macros to implement QuotientMap<Int> for all integer types except for u64 and i64.
441quotient_map_small_int!(Goldilocks, u64, [u8, u16, u32]);
442quotient_map_small_int!(Goldilocks, i64, [i8, i16, i32]);
443quotient_map_large_uint!(
444    Goldilocks,
445    u64,
446    Goldilocks::ORDER_U64,
447    "`[0, 2^64 - 2^32]`",
448    "`[0, 2^64 - 1]`",
449    [u128]
450);
451quotient_map_large_iint!(
452    Goldilocks,
453    i64,
454    "`[-(2^63 - 2^31), 2^63 - 2^31]`",
455    "`[1 + 2^32 - 2^64, 2^64 - 1]`",
456    [(i128, u128)]
457);
458
459impl QuotientMap<u64> for Goldilocks {
460    /// Convert a given `u64` integer into an element of the `Goldilocks` field.
461    ///
462    /// No reduction is needed as the internal value is allowed
463    /// to be any u64.
464    #[inline]
465    fn from_int(int: u64) -> Self {
466        Self::new(int)
467    }
468
469    /// Convert a given `u64` integer into an element of the `Goldilocks` field.
470    ///
471    /// Return `None` if the given integer is greater than `p = 2^64 - 2^32 + 1`.
472    #[inline]
473    fn from_canonical_checked(int: u64) -> Option<Self> {
474        (int < Self::ORDER_U64).then(|| Self::new(int))
475    }
476
477    /// Convert a given `u64` integer into an element of the `Goldilocks` field.
478    ///
479    /// # Safety
480    /// In this case this function is actually always safe as the internal
481    /// value is allowed to be any u64.
482    #[inline(always)]
483    unsafe fn from_canonical_unchecked(int: u64) -> Self {
484        Self::new(int)
485    }
486}
487
488impl QuotientMap<i64> for Goldilocks {
489    /// Convert a given `i64` integer into an element of the `Goldilocks` field.
490    ///
491    /// We simply need to deal with the sign.
492    #[inline]
493    fn from_int(int: i64) -> Self {
494        if int >= 0 {
495            Self::new(int as u64)
496        } else {
497            Self::new(Self::ORDER_U64.wrapping_add_signed(int))
498        }
499    }
500
501    /// Convert a given `i64` integer into an element of the `Goldilocks` field.
502    ///
503    /// Returns none if the input does not lie in the range `(-(2^63 - 2^31), 2^63 - 2^31)`.
504    #[inline]
505    fn from_canonical_checked(int: i64) -> Option<Self> {
506        const POS_BOUND: i64 = (P >> 1) as i64;
507        const NEG_BOUND: i64 = -POS_BOUND;
508        match int {
509            0..=POS_BOUND => Some(Self::new(int as u64)),
510            NEG_BOUND..0 => Some(Self::new(Self::ORDER_U64.wrapping_add_signed(int))),
511            _ => None,
512        }
513    }
514
515    /// Convert a given `i64` integer into an element of the `Goldilocks` field.
516    ///
517    /// # Safety
518    /// In this case this function is actually always safe as the internal
519    /// value is allowed to be any u64.
520    #[inline(always)]
521    unsafe fn from_canonical_unchecked(int: i64) -> Self {
522        Self::from_int(int)
523    }
524}
525
526impl PrimeField for Goldilocks {
527    fn as_canonical_biguint(&self) -> BigUint {
528        self.as_canonical_u64().into()
529    }
530}
531
532impl PrimeField64 for Goldilocks {
533    const ORDER_U64: u64 = P;
534
535    #[inline]
536    fn as_canonical_u64(&self) -> u64 {
537        let mut c = self.value;
538        // We only need one condition subtraction, since 2 * ORDER would not fit in a u64.
539        if c >= Self::ORDER_U64 {
540            c -= Self::ORDER_U64;
541        }
542        c
543    }
544}
545
546impl TwoAdicField for Goldilocks {
547    const TWO_ADICITY: usize = 32;
548
549    fn two_adic_generator(bits: usize) -> Self {
550        assert!(bits <= Self::TWO_ADICITY);
551        Self::TWO_ADIC_GENERATORS[bits]
552    }
553}
554
555/// A const version of the addition function.
556///
557/// Useful for constructing constants values in const contexts. Outside of
558/// const contexts, Add should be used instead.
559#[inline]
560const fn const_add(lhs: Goldilocks, rhs: Goldilocks) -> Goldilocks {
561    let (sum, over) = lhs.value.overflowing_add(rhs.value);
562    let (mut sum, over) = sum.overflowing_add((over as u64) * Goldilocks::NEG_ORDER);
563    if over {
564        sum += Goldilocks::NEG_ORDER;
565    }
566    Goldilocks::new(sum)
567}
568
569impl Add for Goldilocks {
570    type Output = Self;
571
572    #[inline]
573    fn add(self, rhs: Self) -> Self {
574        let (sum, over) = self.value.overflowing_add(rhs.value);
575        let (mut sum, over) = sum.overflowing_add(u64::from(over) * Self::NEG_ORDER);
576        if over {
577            // NB: self.value > Self::ORDER && rhs.value > Self::ORDER is necessary but not
578            // sufficient for double-overflow.
579            // This assume does two things:
580            //  1. If compiler knows that either self.value or rhs.value <= ORDER, then it can skip
581            //     this check.
582            //  2. Hints to the compiler how rare this double-overflow is (thus handled better with
583            //     a branch).
584            unsafe {
585                assume(self.value > Self::ORDER_U64 && rhs.value > Self::ORDER_U64);
586            }
587            branch_hint();
588            sum += Self::NEG_ORDER; // Cannot overflow.
589        }
590        Self::new(sum)
591    }
592}
593
594impl Sub for Goldilocks {
595    type Output = Self;
596
597    #[inline]
598    fn sub(self, rhs: Self) -> Self {
599        let (diff, under) = self.value.overflowing_sub(rhs.value);
600        let (mut diff, under) = diff.overflowing_sub(u64::from(under) * Self::NEG_ORDER);
601        if under {
602            // NB: self.value < NEG_ORDER - 1 && rhs.value > ORDER is necessary but not
603            // sufficient for double-underflow.
604            // This assume does two things:
605            //  1. If compiler knows that either self.value >= NEG_ORDER - 1 or rhs.value <= ORDER,
606            //     then it can skip this check.
607            //  2. Hints to the compiler how rare this double-underflow is (thus handled better
608            //     with a branch).
609            unsafe {
610                assume(self.value < Self::NEG_ORDER - 1 && rhs.value > Self::ORDER_U64);
611            }
612            branch_hint();
613            diff -= Self::NEG_ORDER; // Cannot underflow.
614        }
615        Self::new(diff)
616    }
617}
618
619impl Neg for Goldilocks {
620    type Output = Self;
621
622    #[inline]
623    fn neg(self) -> Self::Output {
624        Self::new(Self::ORDER_U64 - self.as_canonical_u64())
625    }
626}
627
628impl Mul for Goldilocks {
629    type Output = Self;
630
631    #[inline]
632    fn mul(self, rhs: Self) -> Self {
633        reduce128(u128::from(self.value) * u128::from(rhs.value))
634    }
635}
636
637impl_add_assign!(Goldilocks);
638impl_sub_assign!(Goldilocks);
639impl_mul_methods!(Goldilocks);
640impl_div_methods!(Goldilocks, Goldilocks);
641
642impl Sum for Goldilocks {
643    fn sum<I: Iterator<Item = Self>>(iter: I) -> Self {
644        // This is faster than iter.reduce(|x, y| x + y).unwrap_or(Self::ZERO) for iterators of length > 2.
645
646        // This sum will not overflow so long as iter.len() < 2^64.
647        let sum = iter.map(|x| x.value as u128).sum::<u128>();
648        reduce128(sum)
649    }
650}
651
652/// Reduces to a 64-bit value. The result might not be in canonical form; it could be in between the
653/// field order and `2^64`.
654#[inline]
655pub(crate) fn reduce128(x: u128) -> Goldilocks {
656    let (x_lo, x_hi) = split(x); // This is a no-op
657    let x_hi_hi = x_hi >> 32;
658    let x_hi_lo = x_hi & Goldilocks::NEG_ORDER;
659
660    let (mut t0, borrow) = x_lo.overflowing_sub(x_hi_hi);
661    if borrow {
662        branch_hint(); // A borrow is exceedingly rare. It is faster to branch.
663        t0 -= Goldilocks::NEG_ORDER; // Cannot underflow.
664    }
665    let t1 = x_hi_lo * Goldilocks::NEG_ORDER;
666    let t2 = unsafe { add_no_canonicalize_trashing_input(t0, t1) };
667    Goldilocks::new(t2)
668}
669
670#[inline]
671#[allow(clippy::cast_possible_truncation)]
672const fn split(x: u128) -> (u64, u64) {
673    (x as u64, (x >> 64) as u64)
674}
675
676/// Fast addition modulo ORDER for x86-64.
677/// This function is marked unsafe for the following reasons:
678///   - It is only correct if x + y < 2**64 + ORDER = 0x1ffffffff00000001.
679///   - It is only faster in some circumstances. In particular, on x86 it overwrites both inputs in
680///     the registers, so its use is not recommended when either input will be used again.
681#[inline(always)]
682#[cfg(target_arch = "x86_64")]
683unsafe fn add_no_canonicalize_trashing_input(x: u64, y: u64) -> u64 {
684    unsafe {
685        let res_wrapped: u64;
686        let adjustment: u64;
687        core::arch::asm!(
688            "add {0}, {1}",
689            // Trick. The carry flag is set iff the addition overflowed.
690            // sbb x, y does x := x - y - CF. In our case, x and y are both {1:e}, so it simply does
691            // {1:e} := 0xffffffff on overflow and {1:e} := 0 otherwise. {1:e} is the low 32 bits of
692            // {1}; the high 32-bits are zeroed on write. In the end, we end up with 0xffffffff in {1}
693            // on overflow; this happens be NEG_ORDER.
694            // Note that the CPU does not realize that the result of sbb x, x does not actually depend
695            // on x. We must write the result to a register that we know to be ready. We have a
696            // dependency on {1} anyway, so let's use it.
697            "sbb {1:e}, {1:e}",
698            inlateout(reg) x => res_wrapped,
699            inlateout(reg) y => adjustment,
700            options(pure, nomem, nostack),
701        );
702        assume(x != 0 || (res_wrapped == y && adjustment == 0));
703        assume(y != 0 || (res_wrapped == x && adjustment == 0));
704        // Add NEG_ORDER == subtract ORDER.
705        // Cannot overflow unless the assumption if x + y < 2**64 + ORDER is incorrect.
706        res_wrapped + adjustment
707    }
708}
709
710#[inline(always)]
711#[cfg(not(target_arch = "x86_64"))]
712unsafe fn add_no_canonicalize_trashing_input(x: u64, y: u64) -> u64 {
713    let (res_wrapped, carry) = x.overflowing_add(y);
714    // Below cannot overflow unless the assumption if x + y < 2**64 + ORDER is incorrect.
715    res_wrapped + Goldilocks::NEG_ORDER * u64::from(carry)
716}
717
718/// Compute the inverse of a Goldilocks element `a` using the binary GCD algorithm.
719///
720/// Instead of applying the standard algorithm this uses a variant inspired by <https://eprint.iacr.org/2020/972.pdf>.
721/// The key idea is to compute update factors which are incorrect by a known power of 2 which
722/// can be corrected at the end. These update factors can then be used to construct the inverse
723/// via a simple linear combination.
724///
725/// This is much faster than the standard algorithm as we avoid most of the (more expensive) field arithmetic.
726fn gcd_inversion(input: Goldilocks) -> Goldilocks {
727    // Initialise our values to the value we want to invert and the prime.
728    let (mut a, mut b) = (input.value, P);
729
730    // As the goldilocks prime is 64 bit, initially `len(a) + len(b) ≤ 2 * 64 = 128`.
731    // This means we will need `126` iterations of the inner loop ensure `len(a) + len(b) ≤ 2`.
732    // We split the iterations into 2 rounds of length 63.
733    const ROUND_SIZE: usize = 63;
734
735    // In theory we could make this slightly faster by replacing the first `gcd_inner` by a copy-pasted
736    // version which doesn't do any computations involving g. But either the compiler works this out
737    // for itself or the speed up is negligible as I couldn't notice any difference in benchmarks.
738    let (f00, _, f10, _) = gcd_inner::<ROUND_SIZE>(&mut a, &mut b);
739    let (_, _, f11, g11) = gcd_inner::<ROUND_SIZE>(&mut a, &mut b);
740
741    // The update factors are i64's except we need to interpret -2^63 as 2^63.
742    // This is because the outputs of `gcd_inner` are always in the range `(-2^ROUND_SIZE, 2^ROUND_SIZE]`.
743    let u = from_unusual_int(f00);
744    let v = from_unusual_int(f10);
745    let u_fac11 = from_unusual_int(f11);
746    let v_fac11 = from_unusual_int(g11);
747
748    // Each iteration introduced a factor of 2 and so we need to divide by 2^{126}.
749    // But 2^{192} = 1 mod P, so we can instead multiply by 2^{66} as 192 - 126 = 66.
750    (u * u_fac11 + v * v_fac11).mul_2exp_u64(66)
751}
752
753/// Convert from an i64 to a Goldilocks element but interpret -2^63 as 2^63.
754const fn from_unusual_int(int: i64) -> Goldilocks {
755    if (int >= 0) || (int == i64::MIN) {
756        Goldilocks::new(int as u64)
757    } else {
758        Goldilocks::new(Goldilocks::ORDER_U64.wrapping_add_signed(int))
759    }
760}
761
762#[cfg(test)]
763mod tests {
764    use p3_field::extension::BinomialExtensionField;
765    use p3_field_testing::{
766        test_field, test_field_dft, test_prime_field, test_prime_field_64, test_two_adic_field,
767    };
768
769    use super::*;
770
771    type F = Goldilocks;
772    type EF = BinomialExtensionField<F, 5>;
773
774    #[test]
775    fn deserialize_rejects_non_canonical_encodings() {
776        // p, p + i, and u64::MAX are all field-equal to canonical values.
777        // Only the canonical encoding may deserialize.
778        // This blocks re-encoding a proof as p + i without the witness.
779        for non_canonical in [P, P + 5, u64::MAX] {
780            let json = serde_json::to_string(&non_canonical).unwrap();
781            assert!(serde_json::from_str::<F>(&json).is_err());
782        }
783
784        // The largest canonical value, p - 1, still deserializes.
785        let max_canonical_json = serde_json::to_string(&(P - 1)).unwrap();
786        let max_canonical: F = serde_json::from_str(&max_canonical_json).unwrap();
787        assert_eq!(max_canonical.as_canonical_u64(), P - 1);
788    }
789
790    #[test]
791    fn serialize_is_canonical() {
792        // A non-canonical in-memory value serializes to its canonical representative.
793        //     in memory : p + 5
794        //     canonical : 5
795        let non_canonical = F::new(P + 5);
796        let json = serde_json::to_string(&non_canonical).unwrap();
797        assert_eq!(json, "5");
798
799        // The canonical encoding round-trips back to the same field element.
800        let roundtrip: F = serde_json::from_str(&json).unwrap();
801        assert_eq!(roundtrip, non_canonical);
802    }
803
804    #[test]
805    fn test_goldilocks() {
806        let f = F::new(100);
807        assert_eq!(f.as_canonical_u64(), 100);
808
809        // Over the Goldilocks field, the following set of equations hold
810        // p               = 0
811        // 2^64 - 2^32 + 1 = 0
812        // 2^64            = 2^32 - 1
813        let f = F::new(u64::MAX);
814        assert_eq!(f.as_canonical_u64(), u32::MAX as u64 - 1);
815
816        let f = F::from_u64(u64::MAX);
817        assert_eq!(f.as_canonical_u64(), u32::MAX as u64 - 1);
818
819        // Generator check
820        let expected_multiplicative_group_generator = F::new(7);
821        assert_eq!(F::GENERATOR, expected_multiplicative_group_generator);
822        assert_eq!(F::GENERATOR.as_canonical_u64(), 7_u64);
823
824        // Check on `reduce_u128`
825        let x = u128::MAX;
826        let y = reduce128(x);
827        // The following equality sequence holds, modulo p = 2^64 - 2^32 + 1
828        // 2^128 - 1 = (2^64 - 1) * (2^64 + 1)
829        //           = (2^32 - 1 - 1) * (2^32 - 1 + 1)
830        //           = (2^32 - 2) * (2^32)
831        //           = 2^64 - 2 * 2^32
832        //           = 2^64 - 2^33
833        //           = 2^32 - 1 - 2^33
834        //           = - 2^32 - 1
835        let expected_result = -F::TWO.exp_power_of_2(5) - F::ONE;
836        assert_eq!(y, expected_result);
837
838        let f = F::new(100);
839        assert_eq!(f.injective_exp_n().injective_exp_root_n(), f);
840        assert_eq!(y.injective_exp_n().injective_exp_root_n(), y);
841        assert_eq!(F::TWO.injective_exp_n().injective_exp_root_n(), F::TWO);
842    }
843
844    // Goldilocks has a redundant representation for both 0 and 1.
845    const ZEROS: [Goldilocks; 2] = [Goldilocks::ZERO, Goldilocks::new(P)];
846    const ONES: [Goldilocks; 2] = [Goldilocks::ONE, Goldilocks::new(P + 1)];
847
848    // Get the prime factorization of the order of the multiplicative group.
849    // i.e. the prime factorization of P - 1.
850    fn multiplicative_group_prime_factorization() -> [(BigUint, u32); 6] {
851        [
852            (BigUint::from(2u8), 32),
853            (BigUint::from(3u8), 1),
854            (BigUint::from(5u8), 1),
855            (BigUint::from(17u8), 1),
856            (BigUint::from(257u16), 1),
857            (BigUint::from(65537u32), 1),
858        ]
859    }
860
861    test_field!(
862        crate::Goldilocks,
863        &super::ZEROS,
864        &super::ONES,
865        &super::multiplicative_group_prime_factorization()
866    );
867    test_prime_field!(crate::Goldilocks);
868    test_prime_field_64!(crate::Goldilocks, &super::ZEROS, &super::ONES);
869    test_two_adic_field!(crate::Goldilocks);
870
871    test_field_dft!(
872        radix2dit,
873        crate::Goldilocks,
874        super::EF,
875        p3_dft::Radix2Dit<_>
876    );
877    test_field_dft!(bowers, crate::Goldilocks, super::EF, p3_dft::Radix2Bowers);
878    test_field_dft!(
879        parallel,
880        crate::Goldilocks,
881        super::EF,
882        p3_dft::Radix2DitParallel<crate::Goldilocks>
883    );
884}