Skip to main content

poseidon_hash/
lib.rs

1#![forbid(unsafe_code)]
2//! # Poseidon Hash (Goldilocks)
3//!
4//! Rust implementation of Poseidon2 hash function and Goldilocks field arithmetic.
5//!
6//! ## ⚠️ Security Warning
7//!
8//! **This library has NOT been audited and is provided as-is. Use with caution.**
9//!
10//! - Prototype implementation focused on correctness
11//! - **Not security audited** - do not use in production without proper security review
12//! - While the implementation appears to work correctly, cryptographic software requires careful auditing
13//! - This is an open-source contribution and not an official Lighter Protocol library
14//! - Use at your own risk
15//!
16//! ## Overview
17//!
18//! This crate provides essential cryptographic primitives for Zero-Knowledge proof systems:
19//!
20//! - **Goldilocks Field**: A special prime field (p = 2^64 - 2^32 + 1) optimized for 64-bit CPU operations
21//! - **Poseidon2 Hash**: A ZK-friendly hash function designed for low constraint counts in ZK circuits
22//! - **Fp5 Extension Field**: Quintic extension field (GF(p^5)) for elliptic curve operations
23//!
24//! ## Features
25//!
26//! - Fast field arithmetic with optimized modular reduction
27//! - Efficient Poseidon2 hash implementation
28//! - 40-byte field elements for cryptographic operations
29//! - Production-grade performance and security
30//!
31//! ## Example
32//!
33//! ```rust
34//! use poseidon_hash::{Goldilocks, hash_to_quintic_extension};
35//!
36//! // Field arithmetic
37//! let a = Goldilocks::from_canonical_u64(42);
38//! let b = Goldilocks::from_canonical_u64(10);
39//! let sum = a.add(&b);
40//! let product = a.mul(&b);
41//!
42//! // Poseidon2 hashing
43//! let elements = vec![
44//!     Goldilocks::from_canonical_u64(1),
45//!     Goldilocks::from_canonical_u64(2),
46//!     Goldilocks::from_canonical_u64(3),
47//! ];
48//! let hash = hash_to_quintic_extension(&elements);
49//! ```
50
51use zeroize::Zeroize;
52
53/// Goldilocks field element.
54///
55/// The Goldilocks field uses prime modulus p = 2^64 - 2^32 + 1, which is optimized for:
56/// - Fast modular reduction using bit manipulation
57/// - Efficient 64-bit CPU operations
58/// - Zero-Knowledge proof systems (Plonky2, STARKs)
59///
60/// # Example
61///
62/// ```rust
63/// use poseidon_hash::Goldilocks;
64///
65/// let a = Goldilocks::from_canonical_u64(42);
66/// let b = Goldilocks::from_canonical_u64(10);
67/// let sum = a.add(&b);
68/// let product = a.mul(&b);
69/// ```
70#[derive(Debug, Clone, Copy, Zeroize)]
71pub struct Goldilocks(pub u64);
72
73/// Two Goldilocks elements are equal iff they represent the same field element,
74/// i.e., their *canonical* (reduced mod p) values are identical.
75/// This is necessary because arithmetic operations (especially `mul`) can produce
76/// non-canonical representations where the raw u64 exceeds the modulus.
77impl PartialEq for Goldilocks {
78    #[inline]
79    fn eq(&self, other: &Self) -> bool {
80        self.to_canonical_u64() == other.to_canonical_u64()
81    }
82}
83impl Eq for Goldilocks {}
84
85impl Goldilocks {
86    /// Field modulus: p = 2^64 - 2^32 + 1 = 0xffffffff00000001
87    pub const MODULUS: u64 = 0xffffffff00000001;
88    
89    /// Epsilon constant: (1 << 32) - 1 = 0xffffffff
90    /// Used for efficient modular reduction
91    pub const EPSILON: u64 = 0xffffffff;
92    
93    /// The order of the field (same as MODULUS)
94    pub const ORDER: u64 = Self::MODULUS;
95    
96    /// Returns the zero element of the field.
97    pub fn zero() -> Self {
98        Goldilocks(0)
99    }
100    
101    /// Returns the multiplicative identity (one) of the field.
102    pub fn one() -> Self {
103        Goldilocks(1)
104    }
105    
106    /// Checks if this element is zero.
107    pub fn is_zero(&self) -> bool {
108        self.to_canonical_u64() == 0
109    }
110    
111    /// Converts this field element to its canonical representation as a u64.
112    ///
113    /// The canonical form ensures the value is in the range [0, MODULUS).
114    pub fn to_canonical_u64(&self) -> u64 {
115        let x = self.0;
116        if x >= Self::MODULUS {
117            x - Self::MODULUS
118        } else {
119            x
120        }
121    }
122    
123    /// Adds two field elements with modular reduction.
124    ///
125    /// # Example
126    ///
127    /// ```rust
128    /// use poseidon_hash::Goldilocks;
129    ///
130    /// let a = Goldilocks::from_canonical_u64(100);
131    /// let b = Goldilocks::from_canonical_u64(50);
132    /// let sum = a.add(&b);
133    /// assert_eq!(sum.to_canonical_u64(), 150);
134    /// ```
135    pub fn add(&self, other: &Goldilocks) -> Goldilocks {
136        // Field addition with modular reduction using epsilon optimization
137        let (sum, over) = self.0.overflowing_add(other.0);
138        let (sum, over) = sum.overflowing_add(over as u64 * Self::EPSILON);
139        let final_sum = if over {
140            sum + Self::EPSILON
141        } else {
142            sum
143        };
144        Goldilocks(final_sum)
145    }
146    
147    /// Subtracts two field elements with modular reduction.
148    ///
149    /// # Example
150    ///
151    /// ```rust
152    /// use poseidon_hash::Goldilocks;
153    ///
154    /// let a = Goldilocks::from_canonical_u64(100);
155    /// let b = Goldilocks::from_canonical_u64(50);
156    /// let diff = a.sub(&b);
157    /// assert_eq!(diff.to_canonical_u64(), 50);
158    /// ```
159    pub fn sub(&self, other: &Goldilocks) -> Goldilocks {
160        // Field subtraction with modular reduction
161        let (diff, borrow) = self.0.overflowing_sub(other.0);
162        let (diff, borrow) = diff.overflowing_sub(borrow as u64 * Self::EPSILON);
163        let final_diff = if borrow {
164            diff - Self::EPSILON
165        } else {
166            diff
167        };
168        Goldilocks(final_diff)
169    }
170    
171    /// Multiplies two field elements with modular reduction.
172    ///
173    /// Uses optimized reduction algorithm for the Goldilocks prime.
174    ///
175    /// # Example
176    ///
177    /// ```rust
178    /// use poseidon_hash::Goldilocks;
179    ///
180    /// let a = Goldilocks::from_canonical_u64(10);
181    /// let b = Goldilocks::from_canonical_u64(5);
182    /// let product = a.mul(&b);
183    /// assert_eq!(product.to_canonical_u64(), 50);
184    /// ```
185    pub fn mul(&self, other: &Goldilocks) -> Goldilocks {
186        // Field multiplication with optimized modular reduction
187        // Algorithm: Compute product as u128, then reduce using Goldilocks prime properties
188        let product = (self.0 as u128) * (other.0 as u128);
189        let x_hi = (product >> 64) as u64;
190        let x_lo = product as u64;
191        
192        let x_hi_hi = x_hi >> 32;
193        let x_hi_lo = x_hi & Self::EPSILON;
194        
195        let (t0, borrow) = x_lo.overflowing_sub(x_hi_hi);
196        let t0 = if borrow { t0 - Self::EPSILON } else { t0 };
197        let t1 = x_hi_lo * Self::EPSILON;
198        
199        let (sum, over) = t0.overflowing_add(t1);
200        let t2 = sum + Self::EPSILON * over as u64;
201        Goldilocks(t2)
202    }
203    
204    /// Computes the square of this field element.
205    ///
206    /// More efficient than `self.mul(self)` as it can use optimized squaring formulas.
207    pub fn square(&self) -> Goldilocks {
208        self.mul(self)
209    }
210    
211    /// Doubles this field element (multiplies by 2).
212    pub fn double(&self) -> Goldilocks {
213        self.add(self)
214    }
215
216    /// Computes the additive inverse (negation) of this field element.
217    ///
218    /// Returns `0` for the zero element; otherwise returns `p - x` where `p` is the modulus.
219    ///
220    /// # Example
221    ///
222    /// ```rust
223    /// use poseidon_hash::Goldilocks;
224    ///
225    /// let a = Goldilocks::from_canonical_u64(42);
226    /// let neg_a = a.neg();
227    /// assert!(neg_a.add(&a).is_zero(), "a + (-a) must be zero");
228    /// assert_eq!(neg_a.neg().to_canonical_u64(), a.to_canonical_u64(), "-(-a) must equal a");
229    /// ```
230    pub fn neg(&self) -> Goldilocks {
231        let x = self.to_canonical_u64();
232        if x == 0 {
233            Goldilocks::zero()
234        } else {
235            Goldilocks(Self::MODULUS - x)
236        }
237    }
238
239    /// Computes the multiplicative inverse of this field element.
240    ///
241    /// Uses Fermat's little theorem: `a^(-1) ≡ a^(p-2) mod p`
242    ///
243    /// Returns `0` when called on the zero element (analogous to `inverse_or_zero()`).
244    /// Use the explicit check `if self.is_zero()` before calling when a zero input
245    /// indicates a logic error in your code.
246    pub fn inverse(&self) -> Goldilocks {
247        // Fermat's little theorem: a^(p-2) ≡ a^(-1) mod p
248        // p = 2^64 - 2^32 + 1
249        // p - 2 = 2^64 - 2^32 - 1
250        let mut result = Goldilocks::one();
251        let mut base = *self;
252        let mut exp = Self::MODULUS - 2;
253        
254        while exp > 0 {
255            if exp & 1 == 1 {
256                result = result.mul(&base);
257            }
258            base = base.mul(&base);
259            exp >>= 1;
260        }
261        
262        result
263    }
264    
265    /// Creates a field element from a canonical u64 value.
266    ///
267    /// The input value is unconditionally reduced modulo MODULUS, so values
268    /// in the range `[MODULUS, u64::MAX]` are accepted and normalised.
269    /// In debug builds an assertion fires for non-canonical inputs so callers
270    /// can catch unintended use early.
271    ///
272    /// # Example
273    ///
274    /// ```rust
275    /// use poseidon_hash::Goldilocks;
276    ///
277    /// let a = Goldilocks::from_canonical_u64(42);
278    /// // Values above MODULUS are silently reduced.
279    /// let b = Goldilocks::from_canonical_u64(Goldilocks::MODULUS + 1);
280    /// assert_eq!(b.to_canonical_u64(), 1);
281    /// ```
282    pub fn from_canonical_u64(val: u64) -> Goldilocks {
283        debug_assert!(
284            val < Self::MODULUS,
285            "from_canonical_u64: value 0x{val:016x} exceeds MODULUS — did you mean from_noncanonical_u64?"
286        );
287        // Unconditional branchless reduction keeps the representation canonical
288        // in both debug and release builds.
289        Goldilocks(if val >= Self::MODULUS { val - Self::MODULUS } else { val })
290    }
291
292    /// Creates a field element from any u64 value, reducing modulo MODULUS.
293    ///
294    /// Use this when the input may legitimately exceed MODULUS (e.g. when
295    /// deserialising untrusted data or computing intermediate results).
296    pub fn from_noncanonical_u64(val: u64) -> Goldilocks {
297        Goldilocks(if val >= Self::MODULUS { val - Self::MODULUS } else { val })
298    }
299    
300    /// Creates a field element from an i64 value.
301    ///
302    /// Negative values are handled using two's complement representation.
303    /// The field operations will automatically reduce the value modulo MODULUS.
304    ///
305    /// # Example
306    ///
307    /// ```rust
308    /// use poseidon_hash::Goldilocks;
309    ///
310    /// let a = Goldilocks::from_i64(-10);
311    /// ```
312    pub fn from_i64(val: i64) -> Goldilocks {
313        // Direct cast - two's complement representation is valid in the field
314        // Field operations will reduce modulo MODULUS automatically
315        Goldilocks(val as u64)
316    }
317    
318    /// Computes the square root of this field element using Tonelli-Shanks algorithm.
319    ///
320    /// Returns `Some(sqrt)` if the square root exists, `None` otherwise.
321    ///
322    /// For Goldilocks field p = 2^64 - 2^32 + 1, implements full Tonelli-Shanks.
323    /// p - 1 = 2^32 * (2^32 - 1) = 2^32 * q where q = 0xFFFFFFFF is odd.
324    pub fn sqrt(&self) -> Option<Goldilocks> {
325        if self.is_zero() {
326            return Some(Goldilocks::zero());
327        }
328        
329        // Tonelli-Shanks algorithm for Goldilocks field
330        // p = 2^64 - 2^32 + 1 = 0xffffffff00000001
331        // p - 1 = 2^32 * (2^32 - 1) = 2^e * q
332        // where e = 32 and q = 2^32 - 1 = 0xFFFFFFFF (odd)
333        
334        const E: usize = 32;
335        const Q: u64 = 0xFFFFFFFFu64; // q = 2^32 - 1
336        
337        // Step 1: Find a quadratic non-residue z
338        // For Goldilocks, z = 11 is a known quadratic non-residue
339        let z = Goldilocks::from_canonical_u64(11);
340        
341        // Step 2: Initialize
342        // c = z^q mod p
343        let mut c = z.exp(Q);
344        
345        // t = self^q mod p
346        let mut t = self.exp(Q);
347        
348        // r = self^((q+1)/2) mod p
349        let mut r = self.exp((Q + 1) / 2);
350        
351        let mut m = E;
352        
353        // Step 3: Main loop
354        while t.to_canonical_u64() != 1 {
355            // Find smallest i such that t^(2^i) = 1
356            let mut i = 0;
357            let mut tt = t;
358            
359            while i < m && tt.to_canonical_u64() != 1 {
360                tt = tt.square();
361                i += 1;
362            }
363            
364            if i == m {
365                // No square root exists
366                return None;
367            }
368            
369            // b = c^(2^(m-i-1)) mod p
370            let mut b = c;
371            for _ in 0..(m - i - 1) {
372                b = b.square();
373            }
374            
375            // Update variables
376            // r = r * b mod p
377            r = r.mul(&b);
378            
379            // c = b^2 mod p
380            c = b.square();
381            
382            // t = t * c mod p
383            t = t.mul(&c);
384            
385            // m = i
386            m = i;
387        }
388        
389        // Verify that r^2 == self
390        let r_sq = r.square();
391        if r_sq.to_canonical_u64() == self.to_canonical_u64() {
392            Some(r)
393        } else {
394            None
395        }
396    }
397    
398    /// Raises this element to the power of 2^n by repeated squaring.
399    ///
400    /// # Example
401    ///
402    /// ```rust
403    /// use poseidon_hash::Goldilocks;
404    ///
405    /// let a = Goldilocks::from_canonical_u64(5);
406    /// let result = a.exp_power_of_2(3); // a^(2^3) = a^8
407    /// ```
408    pub fn exp_power_of_2(&self, n: usize) -> Goldilocks {
409        let mut result = *self;
410        for _ in 0..n {
411            result = result.square();
412        }
413        result
414    }
415    
416    /// Checks if two Goldilocks elements are equal.
417    pub fn equals(&self, other: &Goldilocks) -> bool {
418        self.to_canonical_u64() == other.to_canonical_u64()
419    }
420    
421    /// Exponentiation: raises this element to a power.
422    ///
423    /// Uses binary exponentiation (square-and-multiply algorithm).
424    pub fn exp(&self, exponent: u64) -> Goldilocks {
425        if exponent == 0 {
426            return Goldilocks::one();
427        }
428        if exponent == 1 {
429            return *self;
430        }
431        
432        let mut result = Goldilocks::one();
433        let mut base = *self;
434        let mut exp = exponent;
435        
436        while exp > 0 {
437            if exp & 1 == 1 {
438                result = result.mul(&base);
439            }
440            base = base.square();
441            exp >>= 1;
442        }
443        
444        result
445    }
446}
447
448impl From<u64> for Goldilocks {
449    fn from(val: u64) -> Self {
450        Goldilocks::from_canonical_u64(val)
451    }
452}
453
454impl std::fmt::Display for Goldilocks {
455    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
456        write!(f, "{}", self.to_canonical_u64())
457    }
458}
459
460impl Goldilocks {
461    /// Serialises this element as 8 little-endian bytes.
462    ///
463    /// The output always uses the *canonical* representation (value reduced mod MODULUS).
464    pub fn to_bytes_le(&self) -> [u8; 8] {
465        self.to_canonical_u64().to_le_bytes()
466    }
467
468    /// Deserialises a Goldilocks element from 8 little-endian bytes.
469    ///
470    /// Returns `Err` if `bytes` is not exactly 8 bytes long or if the decoded
471    /// u64 value is ≥ MODULUS (i.e. not a canonical field element).
472    pub fn from_bytes_le(bytes: &[u8]) -> Result<Self, String> {
473        if bytes.len() != 8 {
474            return Err(format!("expected 8 bytes, got {}", bytes.len()));
475        }
476        let mut arr = [0u8; 8];
477        arr.copy_from_slice(bytes);
478        let val = u64::from_le_bytes(arr);
479        if val >= Self::MODULUS {
480            return Err(format!(
481                "value 0x{val:016x} is not a canonical Goldilocks element (>= MODULUS)"
482            ));
483        }
484        Ok(Goldilocks(val))
485    }
486
487    /// Encodes this element as a 16-character lowercase hex string.
488    ///
489    /// The hex string always represents canonical (reduced) bytes, little-endian.
490    pub fn to_hex(&self) -> String {
491        hex::encode(self.to_bytes_le())
492    }
493
494    /// Decodes a Goldilocks element from a 16-character hex string.
495    ///
496    /// Accepts an optional `0x` prefix.  Returns `Err` if the hex is malformed,
497    /// the wrong length, or the decoded value ≥ MODULUS.
498    pub fn from_hex(s: &str) -> Result<Self, String> {
499        let s = s.strip_prefix("0x").unwrap_or(s);
500        if s.len() != 16 {
501            return Err(format!("expected 16 hex chars, got {}", s.len()));
502        }
503        let bytes = hex::decode(s).map_err(|e| format!("hex decode: {e}"))?;
504        Self::from_bytes_le(&bytes)
505    }
506}
507
508#[allow(dead_code)]
509fn reduce_u128(x: u128) -> u64 {
510    let low = x as u64;
511    let high = (x >> 64) as u64;
512    
513    // Reduce high part
514    let reduced_high = high.wrapping_sub(high >> 32);
515    let result = low.wrapping_add(reduced_high << 32);
516    
517    // Final reduction
518    if result >= Goldilocks::MODULUS {
519        result - Goldilocks::MODULUS
520    } else {
521        result
522    }
523}
524
525/// Fp5 extension field element.
526///
527/// Represents an element of the quintic extension field GF(p^5) where p is the Goldilocks prime.
528/// Each element is represented as a polynomial of degree at most 4 over the base field.
529///
530/// The extension field uses the irreducible polynomial x^5 = w where w = 3.
531///
532/// # Example
533///
534/// ```rust
535/// use poseidon_hash::{Fp5Element, Goldilocks};
536///
537/// let a = Fp5Element::from_uint64_array([1, 2, 3, 4, 5]);
538/// let b = Fp5Element::one();
539/// let product = a.mul(&b);
540/// ```
541#[derive(Debug, Clone, Copy)]
542pub struct Fp5Element(pub [Goldilocks; 5]);
543
544/// Two Fp5 elements are equal iff all five coefficients are equal as field elements
545/// (uses canonical Goldilocks comparison).
546impl PartialEq for Fp5Element {
547    fn eq(&self, other: &Self) -> bool {
548        self.0.iter().zip(other.0.iter()).all(|(a, b)| a == b)
549    }
550}
551impl Eq for Fp5Element {}
552
553impl Zeroize for Fp5Element {
554    fn zeroize(&mut self) {
555        for limb in &mut self.0 {
556            limb.zeroize();
557        }
558    }
559}
560
561impl Fp5Element {
562    /// Returns the zero element of the extension field.
563    pub fn zero() -> Self {
564        Fp5Element([Goldilocks::zero(); 5])
565    }
566    
567    /// Returns the multiplicative identity (one) of the extension field.
568    pub fn one() -> Self {
569        let mut result = [Goldilocks::zero(); 5];
570        result[0] = Goldilocks::one();
571        Fp5Element(result)
572    }
573    
574    /// Checks if this element is zero.
575    pub fn is_zero(&self) -> bool {
576        self.0.iter().all(|&x| x.is_zero())
577    }
578    
579    /// Adds two extension field elements.
580    ///
581    /// Addition is performed component-wise on the polynomial coefficients.
582    pub fn add(&self, other: &Fp5Element) -> Fp5Element {
583        let mut result = [Goldilocks::zero(); 5];
584        for i in 0..5 {
585            result[i] = self.0[i].add(&other.0[i]);
586        }
587        Fp5Element(result)
588    }
589    
590    /// Subtracts two extension field elements.
591    ///
592    /// Subtraction is performed component-wise on the polynomial coefficients.
593    pub fn sub(&self, other: &Fp5Element) -> Fp5Element {
594        let mut result = [Goldilocks::zero(); 5];
595        for i in 0..5 {
596            result[i] = self.0[i].sub(&other.0[i]);
597        }
598        Fp5Element(result)
599    }
600    
601    /// Multiplies two extension field elements.
602    ///
603    /// Uses the irreducible polynomial x^5 = w (where w = 3) to reduce the result.
604    ///
605    /// # Example
606    ///
607    /// ```rust
608    /// use poseidon_hash::{Fp5Element, Goldilocks};
609    ///
610    /// let a = Fp5Element::from_uint64_array([1, 0, 0, 0, 0]);
611    /// let b = Fp5Element::from_uint64_array([2, 0, 0, 0, 0]);
612    /// let product = a.mul(&b);
613    /// ```
614    pub fn mul(&self, other: &Fp5Element) -> Fp5Element {
615        // Multiplication in quintic extension field
616        // Uses irreducible polynomial x^5 = w where w = 3
617        const W: Goldilocks = Goldilocks(3);
618        
619        // c0 = a0*b0 + w*(a1*b4 + a2*b3 + a3*b2 + a4*b1)
620        let a0b0 = self.0[0].mul(&other.0[0]);
621        let a1b4 = self.0[1].mul(&other.0[4]);
622        let a2b3 = self.0[2].mul(&other.0[3]);
623        let a3b2 = self.0[3].mul(&other.0[2]);
624        let a4b1 = self.0[4].mul(&other.0[1]);
625        let added = a1b4.add(&a2b3).add(&a3b2).add(&a4b1);
626        let muld = W.mul(&added);
627        let c0 = a0b0.add(&muld);
628        
629        // c1 = a0*b1 + a1*b0 + w*(a2*b4 + a3*b3 + a4*b2)
630        let a0b1 = self.0[0].mul(&other.0[1]);
631        let a1b0 = self.0[1].mul(&other.0[0]);
632        let a2b4 = self.0[2].mul(&other.0[4]);
633        let a3b3 = self.0[3].mul(&other.0[3]);
634        let a4b2 = self.0[4].mul(&other.0[2]);
635        let added = a2b4.add(&a3b3).add(&a4b2);
636        let muld = W.mul(&added);
637        let c1 = a0b1.add(&a1b0).add(&muld);
638        
639        // c2 = a0*b2 + a1*b1 + a2*b0 + w*(a3*b4 + a4*b3)
640        let a0b2 = self.0[0].mul(&other.0[2]);
641        let a1b1 = self.0[1].mul(&other.0[1]);
642        let a2b0 = self.0[2].mul(&other.0[0]);
643        let a3b4 = self.0[3].mul(&other.0[4]);
644        let a4b3 = self.0[4].mul(&other.0[3]);
645        let added = a3b4.add(&a4b3);
646        let muld = W.mul(&added);
647        let c2 = a0b2.add(&a1b1).add(&a2b0).add(&muld);
648        
649        // c3 = a0*b3 + a1*b2 + a2*b1 + a3*b0 + w*a4*b4
650        let a0b3 = self.0[0].mul(&other.0[3]);
651        let a1b2 = self.0[1].mul(&other.0[2]);
652        let a2b1 = self.0[2].mul(&other.0[1]);
653        let a3b0 = self.0[3].mul(&other.0[0]);
654        let a4b4 = self.0[4].mul(&other.0[4]);
655        let muld = W.mul(&a4b4);
656        let c3 = a0b3.add(&a1b2).add(&a2b1).add(&a3b0).add(&muld);
657        
658        // c4 = a0*b4 + a1*b3 + a2*b2 + a3*b1 + a4*b0
659        let a0b4 = self.0[0].mul(&other.0[4]);
660        let a1b3 = self.0[1].mul(&other.0[3]);
661        let a2b2 = self.0[2].mul(&other.0[2]);
662        let a3b1 = self.0[3].mul(&other.0[1]);
663        let a4b0 = self.0[4].mul(&other.0[0]);
664        let c4 = a0b4.add(&a1b3).add(&a2b2).add(&a3b1).add(&a4b0);
665        
666        Fp5Element([c0, c1, c2, c3, c4])
667    }
668    
669    /// Computes the multiplicative inverse of this element.
670    ///
671    /// Returns zero if this element is zero (which has no inverse).
672    pub fn inverse(&self) -> Fp5Element {
673        self.inverse_or_zero()
674    }
675    
676    /// Computes the multiplicative inverse, returning zero if the element is zero.
677    ///
678    /// This is a safe version of `inverse()` that handles zero elements gracefully.
679    pub fn inverse_or_zero(&self) -> Fp5Element {
680        if self.is_zero() {
681            return Fp5Element::zero();
682        }
683        
684        // Use Frobenius automorphism for efficient inversion
685        let d = self.frobenius();
686        let e = d.mul(&d.frobenius());
687        let f = e.mul(&e.repeated_frobenius(2));
688        
689        // Compute g = a[0]*f[0] + w*(a[1]*f[4] + a[2]*f[3] + a[3]*f[2] + a[4]*f[1])
690        let w = Goldilocks(3);
691        let a0b0 = self.0[0].mul(&f.0[0]);
692        let a1b4 = self.0[1].mul(&f.0[4]);
693        let a2b3 = self.0[2].mul(&f.0[3]);
694        let a3b2 = self.0[3].mul(&f.0[2]);
695        let a4b1 = self.0[4].mul(&f.0[1]);
696        let added = a1b4.add(&a2b3).add(&a3b2).add(&a4b1);
697        let muld = w.mul(&added);
698        let g = a0b0.add(&muld);
699        
700        // Return f * g.inverse()
701        let g_inv = g.inverse();
702        f.scalar_mul(&g_inv)
703    }
704    
705    /// Applies the Frobenius automorphism once.
706    ///
707    /// The Frobenius automorphism raises each coefficient to the p-th power.
708    pub fn frobenius(&self) -> Fp5Element {
709        self.repeated_frobenius(1)
710    }
711    
712    /// Applies the Frobenius automorphism `count` times.
713    ///
714    /// Since we're in GF(p^5), applying it 5 times returns the original element.
715    pub fn repeated_frobenius(&self, count: usize) -> Fp5Element {
716        if count == 0 {
717            return *self;
718        }
719        
720        let d = 5;
721        let count = count % d;
722        
723        if count == 0 {
724            return *self;
725        }
726        
727        // FP5_DTH_ROOT = 1041288259238279555
728        let dth_root = Goldilocks(1041288259238279555);
729        
730        // Compute z0 = dth_root^count
731        let mut z0 = dth_root;
732        for _ in 1..count {
733            z0 = z0.mul(&dth_root);
734        }
735        
736        // Compute powers of z0: [1, z0, z0^2, z0^3, z0^4]
737        let mut z_powers = [Goldilocks::zero(); 5];
738        z_powers[0] = Goldilocks::one();
739        for i in 1..5 {
740            z_powers[i] = z_powers[i-1].mul(&z0);
741        }
742        
743        // Multiply each coordinate by corresponding power
744        let mut result = [Goldilocks::zero(); 5];
745        for i in 0..5 {
746            result[i] = self.0[i].mul(&z_powers[i]);
747        }
748        
749        Fp5Element(result)
750    }
751    
752    /// Multiplies this element by a scalar (base field element).
753    ///
754    /// This is more efficient than full extension field multiplication when one operand
755    /// is in the base field.
756    pub fn scalar_mul(&self, scalar: &Goldilocks) -> Fp5Element {
757        Fp5Element([
758            self.0[0].mul(scalar),
759            self.0[1].mul(scalar),
760            self.0[2].mul(scalar),
761            self.0[3].mul(scalar),
762            self.0[4].mul(scalar),
763        ])
764    }
765    
766    /// Computes the square of this element.
767    ///
768    /// Optimized implementation that uses fewer operations than general multiplication.
769    pub fn square(&self) -> Fp5Element {
770        // Optimized squaring for quintic extension field
771        const W: Goldilocks = Goldilocks(3);
772        let double_w = W.add(&W); // 2*w = 6
773        
774        // c0 = a0^2 + 2*w*(a1*a4 + a2*a3)
775        let a0s = self.0[0].mul(&self.0[0]);
776        let a1a4 = self.0[1].mul(&self.0[4]);
777        let a2a3 = self.0[2].mul(&self.0[3]);
778        let added = a1a4.add(&a2a3);
779        let muld = double_w.mul(&added);
780        let c0 = a0s.add(&muld);
781        
782        // c1 = 2*a0*a1 + 2*w*a2*a4 + w*a3^2
783        let a0_double = self.0[0].add(&self.0[0]);
784        let a0_double_a1 = a0_double.mul(&self.0[1]);
785        let a2a4_double_w = double_w.mul(&self.0[2].mul(&self.0[4]));
786        let a3a3w = W.mul(&self.0[3].mul(&self.0[3]));
787        let c1 = a0_double_a1.add(&a2a4_double_w).add(&a3a3w);
788        
789        // c2 = 2*a0*a2 + a1^2 + 2*w*a4*a3
790        let a0_double_a2 = a0_double.mul(&self.0[2]);
791        let a1_square = self.0[1].mul(&self.0[1]);
792        let a4a3_double_w = double_w.mul(&self.0[4].mul(&self.0[3]));
793        let c2 = a0_double_a2.add(&a1_square).add(&a4a3_double_w);
794        
795        // c3 = 2*a0*a3 + 2*a1*a2 + w*a4^2
796        let a1_double = self.0[1].add(&self.0[1]);
797        let a0_double_a3 = a0_double.mul(&self.0[3]);
798        let a1_double_a2 = a1_double.mul(&self.0[2]);
799        let a4_square_w = W.mul(&self.0[4].mul(&self.0[4]));
800        let c3 = a0_double_a3.add(&a1_double_a2).add(&a4_square_w);
801        
802        // c4 = 2*a0*a4 + 2*a1*a3 + a2^2
803        let a0_double_a4 = a0_double.mul(&self.0[4]);
804        let a1_double_a3 = a1_double.mul(&self.0[3]);
805        let a2_square = self.0[2].mul(&self.0[2]);
806        let c4 = a0_double_a4.add(&a1_double_a3).add(&a2_square);
807        
808        Fp5Element([c0, c1, c2, c3, c4])
809    }
810    
811    /// Doubles this element (multiplies by 2).
812    pub fn double(&self) -> Fp5Element {
813        self.add(self)
814    }
815    
816    /// Creates an Fp5Element from an array of 5 u64 values.
817    ///
818    /// Each u64 value is interpreted as a Goldilocks field element.
819    ///
820    /// # Example
821    ///
822    /// ```rust
823    /// use poseidon_hash::Fp5Element;
824    ///
825    /// let elem = Fp5Element::from_uint64_array([1, 2, 3, 4, 5]);
826    /// ```
827    pub fn from_uint64_array(arr: [u64; 5]) -> Fp5Element {
828        let mut result = [Goldilocks::zero(); 5];
829        for i in 0..5 {
830            result[i] = Goldilocks(arr[i]);
831        }
832        Fp5Element(result)
833    }
834    
835    /// Converts this element to a 40-byte little-endian representation.
836    ///
837    /// Each of the 5 Goldilocks field elements contributes 8 bytes (little-endian).
838    ///
839    /// # Example
840    ///
841    /// ```rust
842    /// use poseidon_hash::Fp5Element;
843    ///
844    /// let elem = Fp5Element::one();
845    /// let bytes = elem.to_bytes_le();
846    /// assert_eq!(bytes.len(), 40);
847    /// ```
848    pub fn to_bytes_le(&self) -> [u8; 40] {
849        let mut result = [0u8; 40];
850        for i in 0..5 {
851            result[i*8..(i+1)*8].copy_from_slice(&self.0[i].0.to_le_bytes());
852        }
853        result
854    }
855    
856    /// Creates an Fp5Element from a 40-byte little-endian representation.
857    ///
858    /// Each of the 5 Goldilocks field elements is read as 8 bytes (little-endian).
859    ///
860    /// # Example
861    ///
862    /// ```rust
863    /// use poseidon_hash::Fp5Element;
864    ///
865    /// let bytes = [0u8; 40];
866    /// let elem = Fp5Element::from_bytes_le(&bytes);
867    /// ```
868    pub fn from_bytes_le(bytes: &[u8]) -> Result<Self, String> {
869        if bytes.len() != 40 {
870            return Err(format!("Invalid length: expected 40 bytes, got {}", bytes.len()));
871        }
872        
873        let mut result = [Goldilocks::zero(); 5];
874        for i in 0..5 {
875            let mut limb_bytes = [0u8; 8];
876            limb_bytes.copy_from_slice(&bytes[i*8..(i+1)*8]);
877            result[i] = Goldilocks(u64::from_le_bytes(limb_bytes));
878        }
879        Ok(Fp5Element(result))
880    }
881    
882    /// Computes the additive inverse (negation) of this element.
883    pub fn neg(&self) -> Fp5Element {
884        Fp5Element::zero().sub(self)
885    }
886    
887    /// Raises this element to the power of 2^n by repeated squaring.
888    ///
889    /// Equivalent to Go's ExpPowerOf2 function.
890    pub fn exp_power_of_2(&self, n: usize) -> Fp5Element {
891        let mut result = *self;
892        for _ in 0..n {
893            result = result.square();
894        }
895        result
896    }
897    
898    /// Computes the sign function Sgn0(x) for this element.
899    ///
900    /// Returns true if the sign bit (LSB of the first non-zero limb) is 0.
901    /// Equivalent to Go's Sgn0 function.
902    pub fn sgn0(&self) -> bool {
903        let mut sign = false;
904        let mut zero = true;
905        
906        for limb in &self.0 {
907            let sign_i = (limb.0 & 1) == 0;
908            let zero_i = limb.is_zero();
909            sign = sign || (zero && sign_i);
910            zero = zero && zero_i;
911        }
912        
913        sign
914    }
915    
916    /// Computes the square root of this element.
917    ///
918    /// Returns `Some(sqrt)` if the square root exists, `None` otherwise.
919    /// Equivalent to Go's Sqrt function.
920    pub fn sqrt(&self) -> Option<Fp5Element> {
921        // Step 1: v = x^(2^31)
922        let v = self.exp_power_of_2(31);
923        
924        // Step 2: d = x * v^(2^32) * v^(-1)
925        let v_32 = v.exp_power_of_2(32);
926        let v_inv = v.inverse_or_zero();
927        let d = self.mul(&v_32).mul(&v_inv);
928        
929        // Step 3: e = Frobenius(d * RepeatedFrobenius(d, 2))
930        let d_repeated = d.repeated_frobenius(2);
931        let d_mul = d.mul(&d_repeated);
932        let e = d_mul.frobenius();
933        
934        // Step 4: f = e^2
935        let f = e.square();
936        
937        // Step 5: Compute g = x[0]*f[0] + 3*(x[1]*f[4] + x[2]*f[3] + x[3]*f[2] + x[4]*f[1])
938        let w = Goldilocks(3);
939        let x0f0 = self.0[0].mul(&f.0[0]);
940        let x1f4 = self.0[1].mul(&f.0[4]);
941        let x2f3 = self.0[2].mul(&f.0[3]);
942        let x3f2 = self.0[3].mul(&f.0[2]);
943        let x4f1 = self.0[4].mul(&f.0[1]);
944        let added = x1f4.add(&x2f3).add(&x3f2).add(&x4f1);
945        let muld = w.mul(&added);
946        let g_goldi = x0f0.add(&muld);
947        
948        // Step 6: Compute sqrt of g in base field
949        let s_opt = g_goldi.sqrt();
950        let s = match s_opt {
951            Some(s_val) => s_val,
952            None => return None,
953        };
954        
955        // Step 7: Convert s to Fp5 and multiply by e^(-1)
956        let e_inv = e.inverse_or_zero();
957        let s_fp5 = Fp5Element::from_uint64_array([s.0, 0, 0, 0, 0]);
958        
959        Some(s_fp5.mul(&e_inv))
960    }
961    
962    /// Computes the canonical square root of this element.
963    ///
964    /// Returns `(canonical_sqrt, success)` where success indicates if the square root exists.
965    /// The canonical square root is chosen such that Sgn0(sqrt) == false.
966    /// Equivalent to Go's CanonicalSqrt function.
967    pub fn canonical_sqrt(&self) -> (Fp5Element, bool) {
968        match self.sqrt() {
969            Some(sqrt_x) => {
970                // If Sgn0(sqrt_x) is true, return -sqrt_x, else return sqrt_x
971                if sqrt_x.sgn0() {
972                    (sqrt_x.neg(), true)
973                } else {
974                    (sqrt_x, true)
975                }
976            }
977            None => (Fp5Element::zero(), false),
978        }
979    }
980    
981    /// Computes the Legendre symbol of this element.
982    ///
983    /// Returns a Goldilocks element:
984    /// - 0 if x is zero
985    /// - 1 if x is a quadratic residue (square)
986    /// - -1 (p-1) if x is a quadratic non-residue
987    /// 
988    /// Equivalent to Go's Legendre function.
989    pub fn legendre(&self) -> Goldilocks {
990        // Step 1: Compute Frobenius automorphisms
991        let frob1 = self.frobenius();
992        let frob2 = frob1.frobenius();
993        
994        // Step 2: Compute products
995        let frob1_times_frob2 = frob1.mul(&frob2);
996        let frob2_frob1_times_frob2 = frob1_times_frob2.repeated_frobenius(2);
997        
998        // Step 3: Compute xrExt = x * frob1_times_frob2 * frob2_frob1_times_frob2
999        let xr_ext = self.mul(&frob1_times_frob2).mul(&frob2_frob1_times_frob2);
1000        
1001        // Step 4: Extract base field element (first coordinate)
1002        let xr = xr_ext.0[0];
1003        
1004        // Step 5: Compute xr^31, then xr^63
1005        let xr_31 = xr.exp(1u64 << 31);
1006        let xr_31_inv = xr_31.inverse();
1007        let xr_63 = xr_31.exp(1u64 << 32);
1008        
1009        // Step 6: Return xr_63 * xr_31^(-1)
1010        xr_63.mul(&xr_31_inv)
1011    }
1012    
1013    /// Checks if two Fp5Element values are equal.
1014    pub fn equals(&self, other: &Fp5Element) -> bool {
1015        self.0.iter().zip(other.0.iter()).all(|(a, b)| a.equals(b))
1016    }
1017}
1018
1019// Poseidon2 hash implementation constants
1020const WIDTH: usize = 12;
1021const RATE: usize = 8;
1022const ROUNDS_F_HALF: usize = 4;
1023const ROUNDS_P: usize = 22;
1024
1025// External round constants (8 rounds × 12 elements).
1026// Source: poseidon_crypto/hash/poseidon2_goldilocks/config.go (elliottech/poseidon_crypto)
1027const EXTERNAL_CONSTANTS: [[u64; WIDTH]; 8] = [
1028    [
1029        15492826721047263190, 11728330187201910315, 8836021247773420868, 16777404051263952451,
1030        5510875212538051896, 6173089941271892285, 2927757366422211339, 10340958981325008808,
1031        8541987352684552425, 9739599543776434497, 15073950188101532019, 12084856431752384512,
1032    ],
1033    [
1034        4584713381960671270, 8807052963476652830, 54136601502601741, 4872702333905478703,
1035        5551030319979516287, 12889366755535460989, 16329242193178844328, 412018088475211848,
1036        10505784623379650541, 9758812378619434837, 7421979329386275117, 375240370024755551,
1037    ],
1038    [
1039        3331431125640721931, 15684937309956309981, 578521833432107983, 14379242000670861838,
1040        17922409828154900976, 8153494278429192257, 15904673920630731971, 11217863998460634216,
1041        3301540195510742136, 9937973023749922003, 3059102938155026419, 1895288289490976132,
1042    ],
1043    [
1044        5580912693628927540, 10064804080494788323, 9582481583369602410, 10186259561546797986,
1045        247426333829703916, 13193193905461376067, 6386232593701758044, 17954717245501896472,
1046        1531720443376282699, 2455761864255501970, 11234429217864304495, 4746959618548874102,
1047    ],
1048    [
1049        13571697342473846203, 17477857865056504753, 15963032953523553760, 16033593225279635898,
1050        14252634232868282405, 8219748254835277737, 7459165569491914711, 15855939513193752003,
1051        16788866461340278896, 7102224659693946577, 3024718005636976471, 13695468978618890430,
1052    ],
1053    [
1054        8214202050877825436, 2670727992739346204, 16259532062589659211, 11869922396257088411,
1055        3179482916972760137, 13525476046633427808, 3217337278042947412, 14494689598654046340,
1056        15837379330312175383, 8029037639801151344, 2153456285263517937, 8301106462311849241,
1057    ],
1058    [
1059        13294194396455217955, 17394768489610594315, 12847609130464867455, 14015739446356528640,
1060        5879251655839607853, 9747000124977436185, 8950393546890284269, 10765765936405694368,
1061        14695323910334139959, 16366254691123000864, 15292774414889043182, 10910394433429313384,
1062    ],
1063    [
1064        17253424460214596184, 3442854447664030446, 3005570425335613727, 10859158614900201063,
1065        9763230642109343539, 6647722546511515039, 909012944955815706, 18101204076790399111,
1066        11588128829349125809, 15863878496612806566, 5201119062417750399, 176665553780565743,
1067    ],
1068];
1069
1070// Internal round constants (22 partial rounds).
1071// Source: poseidon_crypto/hash/poseidon2_goldilocks/config.go (elliottech/poseidon_crypto)
1072const INTERNAL_CONSTANTS: [u64; ROUNDS_P] = [
1073    11921381764981422944, 10318423381711320787, 8291411502347000766, 229948027109387563,
1074    9152521390190983261, 7129306032690285515, 15395989607365232011, 8641397269074305925,
1075    17256848792241043600, 6046475228902245682, 12041608676381094092, 12785542378683951657,
1076    14546032085337914034, 3304199118235116851, 16499627707072547655, 10386478025625759321,
1077    13475579315436919170, 16042710511297532028, 1411266850385657080, 9024840976168649958,
1078    14047056970978379368, 838728605080212101,
1079];
1080
1081// Matrix diagonal constants for Poseidon2.
1082// Source: Plonky3 — https://github.com/Plonky3/Plonky3/blob/eeb4e37b/goldilocks/src/poseidon2.rs#L28
1083const MATRIX_DIAG_12_U64: [u64; WIDTH] = [
1084    0xc3b6c08e23ba9300, 0xd84b5de94a324fb6, 0x0d0c371c5b35b84f, 0x7964f570e7188037,
1085    0x5daf18bbd996604b, 0x6743bc47b9595257, 0x5528b9362c59bb70, 0xac45e25b7127b68b,
1086    0xa2077d7dfbb606b5, 0xf3faac6faee378ae, 0x0c6388b51545e883, 0xd27dbb6944917b60,
1087];
1088
1089/// Hashes a slice of Goldilocks field elements to a single Fp5Element.
1090///
1091/// This is the main Poseidon2 hash function. It takes an arbitrary number of
1092/// Goldilocks field elements and produces a 40-byte hash (Fp5Element).
1093///
1094/// # Example
1095///
1096/// ```rust
1097/// use poseidon_hash::{Goldilocks, hash_to_quintic_extension};
1098///
1099/// let elements = vec![
1100///     Goldilocks::from_canonical_u64(1),
1101///     Goldilocks::from_canonical_u64(2),
1102///     Goldilocks::from_canonical_u64(3),
1103/// ];
1104/// let hash = hash_to_quintic_extension(&elements);
1105/// ```
1106pub fn hash_to_quintic_extension(input: &[Goldilocks]) -> Fp5Element {
1107    hash_n_to_m_no_pad(input, 5)
1108}
1109
1110fn hash_n_to_m_no_pad(input: &[Goldilocks], num_outputs: usize) -> Fp5Element {
1111    let mut perm = [Goldilocks::zero(); WIDTH];
1112    
1113    // Process input in chunks of RATE
1114    for chunk in input.chunks(RATE) {
1115        for (j, &val) in chunk.iter().enumerate() {
1116            perm[j] = val;
1117        }
1118        permute(&mut perm);
1119    }
1120    
1121    // Extract outputs (num_outputs is always 5 for our use case)
1122    let mut output_idx = 0;
1123    let mut outputs = [Goldilocks::zero(); 5];
1124    loop {
1125        for i in 0..RATE {
1126            if output_idx < num_outputs {
1127                outputs[output_idx] = perm[i];
1128                output_idx += 1;
1129            }
1130            if output_idx == num_outputs {
1131                return Fp5Element(outputs);
1132            }
1133        }
1134        permute(&mut perm);
1135    }
1136}
1137
1138/// Hash output type: 4 Goldilocks elements (32 bytes)
1139/// Equivalent to Go's HashOut type
1140pub type HashOut = [Goldilocks; 4];
1141
1142/// Serialises a `HashOut` to 32 little-endian bytes.
1143///
1144/// Each of the 4 Goldilocks elements contributes 8 bytes in canonical (reduced) form.
1145///
1146/// # Example
1147///
1148/// ```rust
1149/// use poseidon_hash::{Goldilocks, hash_no_pad, hash_out_to_bytes_le};
1150///
1151/// let h = hash_no_pad(&[Goldilocks::from_canonical_u64(1)]);
1152/// let bytes = hash_out_to_bytes_le(h);
1153/// assert_eq!(bytes.len(), 32);
1154/// ```
1155pub fn hash_out_to_bytes_le(h: HashOut) -> [u8; 32] {
1156    let mut out = [0u8; 32];
1157    for (i, elem) in h.iter().enumerate() {
1158        out[i * 8..(i + 1) * 8].copy_from_slice(&elem.to_bytes_le());
1159    }
1160    out
1161}
1162
1163/// Deserialises a `HashOut` from 32 little-endian bytes.
1164///
1165/// Returns `Err` if `bytes` is not exactly 32 bytes or if any 8-byte chunk
1166/// encodes a non-canonical Goldilocks element (value >= MODULUS).
1167///
1168/// # Example
1169///
1170/// ```rust
1171/// use poseidon_hash::{Goldilocks, hash_no_pad, hash_out_to_bytes_le, hash_out_from_bytes_le};
1172///
1173/// let original = hash_no_pad(&[Goldilocks::from_canonical_u64(42)]);
1174/// let bytes = hash_out_to_bytes_le(original);
1175/// let restored = hash_out_from_bytes_le(&bytes).unwrap();
1176/// assert_eq!(original, restored);
1177/// ```
1178pub fn hash_out_from_bytes_le(bytes: &[u8]) -> Result<HashOut, String> {
1179    if bytes.len() != 32 {
1180        return Err(format!("expected 32 bytes, got {}", bytes.len()));
1181    }
1182    let mut out = [Goldilocks::zero(); 4];
1183    for i in 0..4 {
1184        out[i] = Goldilocks::from_bytes_le(&bytes[i * 8..(i + 1) * 8])?;
1185    }
1186    Ok(out)
1187}
1188
1189/// Hashes a slice of Goldilocks field elements, producing exactly 4 output elements.
1190/// Equivalent to Go's HashNoPad function.
1191/// 
1192/// # Arguments
1193/// * `input` - Slice of Goldilocks field elements to hash
1194/// 
1195/// # Returns
1196/// Array of 4 Goldilocks elements (HashOut)
1197/// 
1198/// # Example
1199/// ```
1200/// use poseidon_hash::{Goldilocks, hash_no_pad};
1201/// 
1202/// let elements = vec![
1203///     Goldilocks::from_canonical_u64(1),
1204///     Goldilocks::from_canonical_u64(2),
1205///     // ... more elements
1206/// ];
1207/// let hash = hash_no_pad(&elements);
1208/// assert_eq!(hash.len(), 4);
1209/// ```
1210pub fn hash_no_pad(input: &[Goldilocks]) -> HashOut {
1211    hash_n_to_hash_no_pad(input)
1212}
1213
1214/// Internal function to hash to exactly 4 elements
1215fn hash_n_to_hash_no_pad(input: &[Goldilocks]) -> HashOut {
1216    let mut perm = [Goldilocks::zero(); WIDTH];
1217    
1218    // Process input in chunks of RATE
1219    for chunk in input.chunks(RATE) {
1220        for (j, &val) in chunk.iter().enumerate() {
1221            perm[j] = val;
1222        }
1223        permute(&mut perm);
1224    }
1225    
1226    // Extract exactly 4 outputs
1227    let mut outputs = [Goldilocks::zero(); 4];
1228    let mut output_idx = 0;
1229    loop {
1230        for i in 0..RATE {
1231            if output_idx < 4 {
1232                outputs[output_idx] = perm[i];
1233                output_idx += 1;
1234            }
1235            if output_idx == 4 {
1236                return outputs;
1237            }
1238        }
1239        permute(&mut perm);
1240    }
1241}
1242
1243/// Combines multiple hash outputs into a single hash output.
1244/// Equivalent to Go's HashNToOne function.
1245/// 
1246/// # Arguments
1247/// * `hashes` - Slice of HashOut values to combine
1248/// 
1249/// # Returns
1250/// Single HashOut combining all inputs
1251/// 
1252/// # Example
1253/// ```
1254/// use poseidon_hash::{Goldilocks, hash_no_pad, hash_n_to_one};
1255/// 
1256/// let hash1 = hash_no_pad(&[Goldilocks::from_canonical_u64(1)]);
1257/// let hash2 = hash_no_pad(&[Goldilocks::from_canonical_u64(2)]);
1258/// let combined = hash_n_to_one(&[hash1, hash2]);
1259/// ```
1260pub fn hash_n_to_one(hashes: &[HashOut]) -> HashOut {
1261    if hashes.is_empty() {
1262        return [Goldilocks::zero(); 4];
1263    }
1264    if hashes.len() == 1 {
1265        return hashes[0];
1266    }
1267    
1268    // Combine hashes pairwise using HashTwoToOne
1269    let mut result = hash_two_to_one(hashes[0], hashes[1]);
1270    for i in 2..hashes.len() {
1271        result = hash_two_to_one(result, hashes[i]);
1272    }
1273    result
1274}
1275
1276/// Combines two hash outputs into one.
1277/// Equivalent to Go's HashTwoToOne function.
1278fn hash_two_to_one(input1: HashOut, input2: HashOut) -> HashOut {
1279    // Combine 8 elements (4 from each hash) into 4 elements
1280    let combined = [
1281        input1[0], input1[1], input1[2], input1[3],
1282        input2[0], input2[1], input2[2], input2[3],
1283    ];
1284    hash_n_to_hash_no_pad(&combined)
1285}
1286
1287/// Returns an empty hash output (all zeros).
1288/// Equivalent to Go's EmptyHashOut function.
1289pub fn empty_hash_out() -> HashOut {
1290    [Goldilocks::zero(); 4]
1291}
1292
1293/// Applies the Poseidon2 permutation to a 12-element state array.
1294///
1295/// This is the core permutation function used by the hash. It applies:
1296/// - External linear layer
1297/// - Full rounds (first half)
1298/// - Partial rounds
1299/// - Full rounds (second half)
1300pub fn permute(input: &mut [Goldilocks; WIDTH]) {
1301    external_linear_layer(input);
1302    full_rounds(input, 0);
1303    partial_rounds(input);
1304    full_rounds(input, ROUNDS_F_HALF);
1305}
1306
1307fn full_rounds(state: &mut [Goldilocks; WIDTH], start: usize) {
1308    for r in start..start + ROUNDS_F_HALF {
1309        add_rc(state, r);
1310        sbox(state);
1311        external_linear_layer(state);
1312    }
1313}
1314
1315fn partial_rounds(state: &mut [Goldilocks; WIDTH]) {
1316    for r in 0..ROUNDS_P {
1317        add_rci(state, r);
1318        sbox_p(0, state);
1319        internal_linear_layer(state);
1320    }
1321}
1322
1323fn external_linear_layer(s: &mut [Goldilocks; WIDTH]) {
1324    // Process in 4-element windows for efficiency
1325    for i in 0..3 { // 3 windows of 4 elements each
1326        let t0 = s[4*i].add(&s[4*i+1]);     // s0+s1
1327        let t1 = s[4*i+2].add(&s[4*i+3]);   // s2+s3
1328        let t2 = t0.add(&t1);               // t0+t1 = s0+s1+s2+s3
1329        let t3 = t2.add(&s[4*i+1]);         // t2+s1 = s0+2s1+s2+s3
1330        let t4 = t2.add(&s[4*i+3]);         // t2+s3 = s0+s1+s2+2s3
1331        let t5 = s[4*i].double();           // 2s0
1332        let t6 = s[4*i+2].double();         // 2s2
1333        
1334        s[4*i] = t3.add(&t0);
1335        s[4*i+1] = t6.add(&t3);
1336        s[4*i+2] = t1.add(&t4);
1337        s[4*i+3] = t5.add(&t4);
1338    }
1339    
1340    // Add sums to each element
1341    // Unroll loops for better performance (WIDTH is constant 12, so we have 3 groups of 4)
1342    let sum0 = s[0].add(&s[4]).add(&s[8]);
1343    let sum1 = s[1].add(&s[5]).add(&s[9]);
1344    let sum2 = s[2].add(&s[6]).add(&s[10]);
1345    let sum3 = s[3].add(&s[7]).add(&s[11]);
1346    
1347    for i in 0..WIDTH {
1348        s[i] = s[i].add(match i % 4 {
1349            0 => &sum0,
1350            1 => &sum1,
1351            2 => &sum2,
1352            3 => &sum3,
1353            _ => unreachable!(),
1354        });
1355    }
1356}
1357
1358fn internal_linear_layer(state: &mut [Goldilocks; WIDTH]) {
1359    let mut sum = state[0];
1360    for i in 1..WIDTH {
1361        sum = sum.add(&state[i]);
1362    }
1363    for i in 0..WIDTH {
1364        state[i] = state[i].mul(&Goldilocks(MATRIX_DIAG_12_U64[i])).add(&sum);
1365    }
1366}
1367
1368fn add_rc(state: &mut [Goldilocks; WIDTH], external_round: usize) {
1369    for i in 0..WIDTH {
1370        state[i] = state[i].add(&Goldilocks(EXTERNAL_CONSTANTS[external_round][i]));
1371    }
1372}
1373
1374fn add_rci(state: &mut [Goldilocks; WIDTH], round: usize) {
1375    state[0] = state[0].add(&Goldilocks(INTERNAL_CONSTANTS[round]));
1376}
1377
1378fn sbox(state: &mut [Goldilocks; WIDTH]) {
1379    for i in 0..WIDTH {
1380        sbox_p(i, state);
1381    }
1382}
1383
1384fn sbox_p(index: usize, state: &mut [Goldilocks; WIDTH]) {
1385    // Poseidon2 S-box: x^7
1386    // Computed as: x^7 = (x^2 * x)^2 * x
1387    let tmp = state[index];
1388    let tmp_square = tmp.square();
1389    let tmp_sixth = tmp_square.mul(&tmp).square();
1390    state[index] = tmp_sixth.mul(&tmp);
1391}
1392
1393#[cfg(test)]
1394mod tests;
1395
1396#[cfg(test)]
1397mod poseidon2_tests;
1398
1399pub mod merkle;