ed25519_to_curve25519 0.2.1

#![allow(clippy::all)]
use core::ops::{Add, AddAssign};
use core::ops::{Mul, MulAssign};
use core::ops::{Sub, SubAssign};

#[derive(Copy, Clone)]
pub struct FieldElement2625([u32; 10]);

impl<'b> AddAssign<&'b FieldElement2625> for FieldElement2625 {
    fn add_assign(&mut self, _rhs: &'b FieldElement2625) {
        for i in 0..10 {
            self.0[i] += _rhs.0[i];
        }
    }
}

impl<'a, 'b> Add<&'b FieldElement2625> for &'a FieldElement2625 {
    type Output = FieldElement2625;
    fn add(self, _rhs: &'b FieldElement2625) -> FieldElement2625 {
        let mut output = *self;
        output += _rhs;
        output
    }
}

impl<'b> SubAssign<&'b FieldElement2625> for FieldElement2625 {
    fn sub_assign(&mut self, _rhs: &'b FieldElement2625) {
        // See comment in FieldElement51::Sub
        //
        // Compute a - b as ((a + 2^4 * p) - b) to avoid underflow.
        let b = &_rhs.0;
        self.0 = FieldElement2625::reduce([
            ((self.0[0] + (0x3ffffed << 4)) - b[0]) as u64,
            ((self.0[1] + (0x1ffffff << 4)) - b[1]) as u64,
            ((self.0[2] + (0x3ffffff << 4)) - b[2]) as u64,
            ((self.0[3] + (0x1ffffff << 4)) - b[3]) as u64,
            ((self.0[4] + (0x3ffffff << 4)) - b[4]) as u64,
            ((self.0[5] + (0x1ffffff << 4)) - b[5]) as u64,
            ((self.0[6] + (0x3ffffff << 4)) - b[6]) as u64,
            ((self.0[7] + (0x1ffffff << 4)) - b[7]) as u64,
            ((self.0[8] + (0x3ffffff << 4)) - b[8]) as u64,
            ((self.0[9] + (0x1ffffff << 4)) - b[9]) as u64,
        ])
        .0;
    }
}

impl<'a, 'b> Sub<&'b FieldElement2625> for &'a FieldElement2625 {
    type Output = FieldElement2625;
    fn sub(self, _rhs: &'b FieldElement2625) -> FieldElement2625 {
        let mut output = *self;
        output -= _rhs;
        output
    }
}

impl<'b> MulAssign<&'b FieldElement2625> for FieldElement2625 {
    fn mul_assign(&mut self, _rhs: &'b FieldElement2625) {
        let result = (self as &FieldElement2625) * _rhs;
        self.0 = result.0;
    }
}

impl<'a, 'b> Mul<&'b FieldElement2625> for &'a FieldElement2625 {
    type Output = FieldElement2625;
    fn mul(self, _rhs: &'b FieldElement2625) -> FieldElement2625 {
        /// Helper function to multiply two 32-bit integers with 64 bits
        /// of output.
        #[inline(always)]
        fn m(x: u32, y: u32) -> u64 {
            (x as u64) * (y as u64)
        }

        // Alias self, _rhs for more readable formulas
        let x: &[u32; 10] = &self.0;
        let y: &[u32; 10] = &_rhs.0;

        // We assume that the input limbs x[i], y[i] are bounded by:
        //
        // x[i], y[i] < 2^(26 + b) if i even
        // x[i], y[i] < 2^(25 + b) if i odd
        //
        // where b is a (real) parameter representing the excess bits of
        // the limbs.  We track the bitsizes of all variables through
        // the computation and solve at the end for the allowable
        // headroom bitsize b (which determines how many additions we
        // can perform between reductions or multiplications).

        let y1_19 = 19 * y[1]; // This fits in a u32
        let y2_19 = 19 * y[2]; // iff 26 + b + lg(19) < 32
        let y3_19 = 19 * y[3]; // if  b < 32 - 26 - 4.248 = 1.752
        let y4_19 = 19 * y[4];
        let y5_19 = 19 * y[5]; // below, b<2.5: this is a bottleneck,
        let y6_19 = 19 * y[6]; // could be avoided by promoting to
        let y7_19 = 19 * y[7]; // u64 here instead of in m()
        let y8_19 = 19 * y[8];
        let y9_19 = 19 * y[9];

        // What happens when we multiply x[i] with y[j] and place the
        // result into the (i+j)-th limb?
        //
        // x[i]      represents the value x[i]*2^ceil(i*51/2)
        // y[j]      represents the value y[j]*2^ceil(j*51/2)
        // z[i+j]    represents the value z[i+j]*2^ceil((i+j)*51/2)
        // x[i]*y[j] represents the value x[i]*y[i]*2^(ceil(i*51/2)+ceil(j*51/2))
        //
        // Since the radix is already accounted for, the result placed
        // into the (i+j)-th limb should be
        //
        // x[i]*y[i]*2^(ceil(i*51/2)+ceil(j*51/2) - ceil((i+j)*51/2)).
        //
        // The value of ceil(i*51/2)+ceil(j*51/2) - ceil((i+j)*51/2) is
        // 1 when both i and j are odd, and 0 otherwise.  So we add
        //
        //   x[i]*y[j] if either i or j is even
        // 2*x[i]*y[j] if i and j are both odd
        //
        // by using precomputed multiples of x[i] for odd i:

        let x1_2 = 2 * x[1]; // This fits in a u32 iff 25 + b + 1 < 32
        let x3_2 = 2 * x[3]; //                    iff b < 6
        let x5_2 = 2 * x[5];
        let x7_2 = 2 * x[7];
        let x9_2 = 2 * x[9];

        let z0 = m(x[0], y[0])
            + m(x1_2, y9_19)
            + m(x[2], y8_19)
            + m(x3_2, y7_19)
            + m(x[4], y6_19)
            + m(x5_2, y5_19)
            + m(x[6], y4_19)
            + m(x7_2, y3_19)
            + m(x[8], y2_19)
            + m(x9_2, y1_19);
        let z1 = m(x[0], y[1])
            + m(x[1], y[0])
            + m(x[2], y9_19)
            + m(x[3], y8_19)
            + m(x[4], y7_19)
            + m(x[5], y6_19)
            + m(x[6], y5_19)
            + m(x[7], y4_19)
            + m(x[8], y3_19)
            + m(x[9], y2_19);
        let z2 = m(x[0], y[2])
            + m(x1_2, y[1])
            + m(x[2], y[0])
            + m(x3_2, y9_19)
            + m(x[4], y8_19)
            + m(x5_2, y7_19)
            + m(x[6], y6_19)
            + m(x7_2, y5_19)
            + m(x[8], y4_19)
            + m(x9_2, y3_19);
        let z3 = m(x[0], y[3])
            + m(x[1], y[2])
            + m(x[2], y[1])
            + m(x[3], y[0])
            + m(x[4], y9_19)
            + m(x[5], y8_19)
            + m(x[6], y7_19)
            + m(x[7], y6_19)
            + m(x[8], y5_19)
            + m(x[9], y4_19);
        let z4 = m(x[0], y[4])
            + m(x1_2, y[3])
            + m(x[2], y[2])
            + m(x3_2, y[1])
            + m(x[4], y[0])
            + m(x5_2, y9_19)
            + m(x[6], y8_19)
            + m(x7_2, y7_19)
            + m(x[8], y6_19)
            + m(x9_2, y5_19);
        let z5 = m(x[0], y[5])
            + m(x[1], y[4])
            + m(x[2], y[3])
            + m(x[3], y[2])
            + m(x[4], y[1])
            + m(x[5], y[0])
            + m(x[6], y9_19)
            + m(x[7], y8_19)
            + m(x[8], y7_19)
            + m(x[9], y6_19);
        let z6 = m(x[0], y[6])
            + m(x1_2, y[5])
            + m(x[2], y[4])
            + m(x3_2, y[3])
            + m(x[4], y[2])
            + m(x5_2, y[1])
            + m(x[6], y[0])
            + m(x7_2, y9_19)
            + m(x[8], y8_19)
            + m(x9_2, y7_19);
        let z7 = m(x[0], y[7])
            + m(x[1], y[6])
            + m(x[2], y[5])
            + m(x[3], y[4])
            + m(x[4], y[3])
            + m(x[5], y[2])
            + m(x[6], y[1])
            + m(x[7], y[0])
            + m(x[8], y9_19)
            + m(x[9], y8_19);
        let z8 = m(x[0], y[8])
            + m(x1_2, y[7])
            + m(x[2], y[6])
            + m(x3_2, y[5])
            + m(x[4], y[4])
            + m(x5_2, y[3])
            + m(x[6], y[2])
            + m(x7_2, y[1])
            + m(x[8], y[0])
            + m(x9_2, y9_19);
        let z9 = m(x[0], y[9])
            + m(x[1], y[8])
            + m(x[2], y[7])
            + m(x[3], y[6])
            + m(x[4], y[5])
            + m(x[5], y[4])
            + m(x[6], y[3])
            + m(x[7], y[2])
            + m(x[8], y[1])
            + m(x[9], y[0]);

        // How big is the contribution to z[i+j] from x[i], y[j]?
        //
        // Using the bounds above, we get:
        //
        // i even, j even:   x[i]*y[j] <   2^(26+b)*2^(26+b) = 2*2^(51+2*b)
        // i  odd, j even:   x[i]*y[j] <   2^(25+b)*2^(26+b) = 1*2^(51+2*b)
        // i even, j  odd:   x[i]*y[j] <   2^(26+b)*2^(25+b) = 1*2^(51+2*b)
        // i  odd, j  odd: 2*x[i]*y[j] < 2*2^(25+b)*2^(25+b) = 1*2^(51+2*b)
        //
        // We perform inline reduction mod p by replacing 2^255 by 19
        // (since 2^255 - 19 = 0 mod p).  This adds a factor of 19, so
        // we get the bounds (z0 is the biggest one, but calculated for
        // posterity here in case finer estimation is needed later):
        //
        //  z0 < ( 2 + 1*19 + 2*19 + 1*19 + 2*19 + 1*19 + 2*19 + 1*19 + 2*19 + 1*19 )*2^(51 + 2b) = 249*2^(51 + 2*b)
        //  z1 < ( 1 +  1   + 1*19 + 1*19 + 1*19 + 1*19 + 1*19 + 1*19 + 1*19 + 1*19 )*2^(51 + 2b) = 154*2^(51 + 2*b)
        //  z2 < ( 2 +  1   +  2   + 1*19 + 2*19 + 1*19 + 2*19 + 1*19 + 2*19 + 1*19 )*2^(51 + 2b) = 195*2^(51 + 2*b)
        //  z3 < ( 1 +  1   +  1   +  1   + 1*19 + 1*19 + 1*19 + 1*19 + 1*19 + 1*19 )*2^(51 + 2b) = 118*2^(51 + 2*b)
        //  z4 < ( 2 +  1   +  2   +  1   +  2   + 1*19 + 2*19 + 1*19 + 2*19 + 1*19 )*2^(51 + 2b) = 141*2^(51 + 2*b)
        //  z5 < ( 1 +  1   +  1   +  1   +  1   +  1   + 1*19 + 1*19 + 1*19 + 1*19 )*2^(51 + 2b) =  82*2^(51 + 2*b)
        //  z6 < ( 2 +  1   +  2   +  1   +  2   +  1   +  2   + 1*19 + 2*19 + 1*19 )*2^(51 + 2b) =  87*2^(51 + 2*b)
        //  z7 < ( 1 +  1   +  1   +  1   +  1   +  1   +  1   +  1   + 1*19 + 1*19 )*2^(51 + 2b) =  46*2^(51 + 2*b)
        //  z6 < ( 2 +  1   +  2   +  1   +  2   +  1   +  2   +  1   +  2   + 1*19 )*2^(51 + 2b) =  33*2^(51 + 2*b)
        //  z7 < ( 1 +  1   +  1   +  1   +  1   +  1   +  1   +  1   +  1   +  1   )*2^(51 + 2b) =  10*2^(51 + 2*b)
        //
        // So z[0] fits into a u64 if 51 + 2*b + lg(249) < 64
        //                         if b < 2.5.
        FieldElement2625::reduce([z0, z1, z2, z3, z4, z5, z6, z7, z8, z9])
    }
}

impl FieldElement2625 {
    /// Construct zero.
    pub fn zero() -> FieldElement2625 {
        FieldElement2625([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
    }

    /// Construct one.
    pub fn one() -> FieldElement2625 {
        FieldElement2625([1, 0, 0, 0, 0, 0, 0, 0, 0, 0])
    }

    /// Load a `FieldElement51` from the low 255 bits of a 256-bit
    /// input.
    ///
    /// # Warning
    ///
    /// This function does not check that the input used the canonical
    /// representative.  It masks the high bit, but it will happily
    /// decode 2^255 - 18 to 1.  Applications that require a canonical
    /// encoding of every field element should decode, re-encode to
    /// the canonical encoding, and check that the input was
    /// canonical.
    pub fn from_bytes(data: &[u8; 32]) -> FieldElement2625 {
        //FeFromBytes
        #[inline]
        fn load3(b: &[u8]) -> u64 {
            (b[0] as u64) | ((b[1] as u64) << 8) | ((b[2] as u64) << 16)
        }

        #[inline]
        fn load4(b: &[u8]) -> u64 {
            (b[0] as u64) | ((b[1] as u64) << 8) | ((b[2] as u64) << 16) | ((b[3] as u64) << 24)
        }

        let mut h = [0u64; 10];
        const LOW_23_BITS: u64 = (1 << 23) - 1;
        h[0] = load4(&data[0..]);
        h[1] = load3(&data[4..]) << 6;
        h[2] = load3(&data[7..]) << 5;
        h[3] = load3(&data[10..]) << 3;
        h[4] = load3(&data[13..]) << 2;
        h[5] = load4(&data[16..]);
        h[6] = load3(&data[20..]) << 7;
        h[7] = load3(&data[23..]) << 5;
        h[8] = load3(&data[26..]) << 4;
        h[9] = (load3(&data[29..]) & LOW_23_BITS) << 2;

        FieldElement2625::reduce(h)
    }

    /// Serialize this `FieldElement51` to a 32-byte array.  The
    /// encoding is canonical.
    pub fn to_bytes(&self) -> [u8; 32] {
        let inp = &self.0;
        // Reduce the value represented by `in` to the range [0,2*p)
        let mut h: [u32; 10] = FieldElement2625::reduce([
            // XXX this cast is annoying
            inp[0] as u64,
            inp[1] as u64,
            inp[2] as u64,
            inp[3] as u64,
            inp[4] as u64,
            inp[5] as u64,
            inp[6] as u64,
            inp[7] as u64,
            inp[8] as u64,
            inp[9] as u64,
        ])
        .0;

        // Let h be the value to encode.
        //
        // Write h = pq + r with 0 <= r < p.  We want to compute r = h mod p.
        //
        // Since h < 2*p, q = 0 or 1, with q = 0 when h < p and q = 1 when h >= p.
        //
        // Notice that h >= p <==> h + 19 >= p + 19 <==> h + 19 >= 2^255.
        // Therefore q can be computed as the carry bit of h + 19.

        let mut q: u32 = (h[0] + 19) >> 26;
        q = (h[1] + q) >> 25;
        q = (h[2] + q) >> 26;
        q = (h[3] + q) >> 25;
        q = (h[4] + q) >> 26;
        q = (h[5] + q) >> 25;
        q = (h[6] + q) >> 26;
        q = (h[7] + q) >> 25;
        q = (h[8] + q) >> 26;
        q = (h[9] + q) >> 25;

        debug_assert!(q == 0 || q == 1);

        // Now we can compute r as r = h - pq = r - (2^255-19)q = r + 19q - 2^255q

        const LOW_25_BITS: u32 = (1 << 25) - 1;
        const LOW_26_BITS: u32 = (1 << 26) - 1;

        h[0] += 19 * q;

        // Now carry the result to compute r + 19q...
        h[1] += h[0] >> 26;
        h[0] = h[0] & LOW_26_BITS;
        h[2] += h[1] >> 25;
        h[1] = h[1] & LOW_25_BITS;
        h[3] += h[2] >> 26;
        h[2] = h[2] & LOW_26_BITS;
        h[4] += h[3] >> 25;
        h[3] = h[3] & LOW_25_BITS;
        h[5] += h[4] >> 26;
        h[4] = h[4] & LOW_26_BITS;
        h[6] += h[5] >> 25;
        h[5] = h[5] & LOW_25_BITS;
        h[7] += h[6] >> 26;
        h[6] = h[6] & LOW_26_BITS;
        h[8] += h[7] >> 25;
        h[7] = h[7] & LOW_25_BITS;
        h[9] += h[8] >> 26;
        h[8] = h[8] & LOW_26_BITS;

        // ... but instead of carrying the value
        // (h[9] >> 25) = q*2^255 into another limb,
        // discard it, subtracting the value from h.
        debug_assert!((h[9] >> 25) == 0 || (h[9] >> 25) == 1);
        h[9] = h[9] & LOW_25_BITS;

        let mut s = [0u8; 32];
        s[0] = (h[0] >> 0) as u8;
        s[1] = (h[0] >> 8) as u8;
        s[2] = (h[0] >> 16) as u8;
        s[3] = ((h[0] >> 24) | (h[1] << 2)) as u8;
        s[4] = (h[1] >> 6) as u8;
        s[5] = (h[1] >> 14) as u8;
        s[6] = ((h[1] >> 22) | (h[2] << 3)) as u8;
        s[7] = (h[2] >> 5) as u8;
        s[8] = (h[2] >> 13) as u8;
        s[9] = ((h[2] >> 21) | (h[3] << 5)) as u8;
        s[10] = (h[3] >> 3) as u8;
        s[11] = (h[3] >> 11) as u8;
        s[12] = ((h[3] >> 19) | (h[4] << 6)) as u8;
        s[13] = (h[4] >> 2) as u8;
        s[14] = (h[4] >> 10) as u8;
        s[15] = (h[4] >> 18) as u8;
        s[16] = (h[5] >> 0) as u8;
        s[17] = (h[5] >> 8) as u8;
        s[18] = (h[5] >> 16) as u8;
        s[19] = ((h[5] >> 24) | (h[6] << 1)) as u8;
        s[20] = (h[6] >> 7) as u8;
        s[21] = (h[6] >> 15) as u8;
        s[22] = ((h[6] >> 23) | (h[7] << 3)) as u8;
        s[23] = (h[7] >> 5) as u8;
        s[24] = (h[7] >> 13) as u8;
        s[25] = ((h[7] >> 21) | (h[8] << 4)) as u8;
        s[26] = (h[8] >> 4) as u8;
        s[27] = (h[8] >> 12) as u8;
        s[28] = ((h[8] >> 20) | (h[9] << 6)) as u8;
        s[29] = (h[9] >> 2) as u8;
        s[30] = (h[9] >> 10) as u8;
        s[31] = (h[9] >> 18) as u8;

        // Check that high bit is cleared
        debug_assert!((s[31] & 0b1000_0000u8) == 0u8);

        s
    }

    /// Given unreduced coefficients `z[0], ..., z[9]` of any size,
    /// carry and reduce them mod p to obtain a `FieldElement2625`
    /// whose coefficients have excess `b < 0.007`.
    ///
    /// In other words, each coefficient of the result is bounded by
    /// either `2^(25 + 0.007)` or `2^(26 + 0.007)`, as appropriate.
    fn reduce(mut z: [u64; 10]) -> FieldElement2625 {
        const LOW_25_BITS: u64 = (1 << 25) - 1;
        const LOW_26_BITS: u64 = (1 << 26) - 1;

        /// Carry the value from limb i = 0..8 to limb i+1
        #[inline(always)]
        fn carry(z: &mut [u64; 10], i: usize) {
            debug_assert!(i < 9);
            if i % 2 == 0 {
                // Even limbs have 26 bits
                z[i + 1] += z[i] >> 26;
                z[i] &= LOW_26_BITS;
            } else {
                // Odd limbs have 25 bits
                z[i + 1] += z[i] >> 25;
                z[i] &= LOW_25_BITS;
            }
        }

        // Perform two halves of the carry chain in parallel.
        carry(&mut z, 0);
        carry(&mut z, 4);
        carry(&mut z, 1);
        carry(&mut z, 5);
        carry(&mut z, 2);
        carry(&mut z, 6);
        carry(&mut z, 3);
        carry(&mut z, 7);
        // Since z[3] < 2^64, c < 2^(64-25) = 2^39,
        // so    z[4] < 2^26 + 2^39 < 2^39.0002
        carry(&mut z, 4);
        carry(&mut z, 8);
        // Now z[4] < 2^26
        // and z[5] < 2^25 + 2^13.0002 < 2^25.0004 (good enough)

        // Last carry has a multiplication by 19:
        z[0] += 19 * (z[9] >> 25);
        z[9] &= LOW_25_BITS;

        // Since z[9] < 2^64, c < 2^(64-25) = 2^39,
        //    so z[0] + 19*c < 2^26 + 2^43.248 < 2^43.249.
        carry(&mut z, 0);
        // Now z[1] < 2^25 - 2^(43.249 - 26)
        //          < 2^25.007 (good enough)
        // and we're done.

        FieldElement2625([
            z[0] as u32,
            z[1] as u32,
            z[2] as u32,
            z[3] as u32,
            z[4] as u32,
            z[5] as u32,
            z[6] as u32,
            z[7] as u32,
            z[8] as u32,
            z[9] as u32,
        ])
    }

    /// Given `k > 0`, return `self^(2^k)`.
    pub fn pow2k(&self, k: u32) -> FieldElement2625 {
        debug_assert!(k > 0);
        let mut z = self.square();
        for _ in 1..k {
            z = z.square();
        }
        z
    }

    /// Compute `self^2`.
    pub fn square(&self) -> FieldElement2625 {
        FieldElement2625::reduce(self.square_inner())
    }

    fn square_inner(&self) -> [u64; 10] {
        // Optimized version of multiplication for the case of squaring.
        // Pre- and post- conditions identical to multiplication function.
        let x = &self.0;
        let x0_2 = 2 * x[0];
        let x1_2 = 2 * x[1];
        let x2_2 = 2 * x[2];
        let x3_2 = 2 * x[3];
        let x4_2 = 2 * x[4];
        let x5_2 = 2 * x[5];
        let x6_2 = 2 * x[6];
        let x7_2 = 2 * x[7];
        let x5_19 = 19 * x[5];
        let x6_19 = 19 * x[6];
        let x7_19 = 19 * x[7];
        let x8_19 = 19 * x[8];
        let x9_19 = 19 * x[9];

        /// Helper function to multiply two 32-bit integers with 64 bits
        /// of output.
        #[inline(always)]
        fn m(x: u32, y: u32) -> u64 {
            (x as u64) * (y as u64)
        }

        // This block is rearranged so that instead of doing a 32-bit multiplication by 38, we do a
        // 64-bit multiplication by 2 on the results.  This is because lg(38) is too big: we would
        // have less than 1 bit of headroom left, which is too little.
        let mut z = [0u64; 10];
        z[0] = m(x[0], x[0])
            + m(x2_2, x8_19)
            + m(x4_2, x6_19)
            + (m(x1_2, x9_19) + m(x3_2, x7_19) + m(x[5], x5_19)) * 2;
        z[1] =
            m(x0_2, x[1]) + m(x3_2, x8_19) + m(x5_2, x6_19) + (m(x[2], x9_19) + m(x[4], x7_19)) * 2;
        z[2] = m(x0_2, x[2])
            + m(x1_2, x[1])
            + m(x4_2, x8_19)
            + m(x[6], x6_19)
            + (m(x3_2, x9_19) + m(x5_2, x7_19)) * 2;
        z[3] =
            m(x0_2, x[3]) + m(x1_2, x[2]) + m(x5_2, x8_19) + (m(x[4], x9_19) + m(x[6], x7_19)) * 2;
        z[4] = m(x0_2, x[4])
            + m(x1_2, x3_2)
            + m(x[2], x[2])
            + m(x6_2, x8_19)
            + (m(x5_2, x9_19) + m(x[7], x7_19)) * 2;
        z[5] = m(x0_2, x[5]) + m(x1_2, x[4]) + m(x2_2, x[3]) + m(x7_2, x8_19) + m(x[6], x9_19) * 2;
        z[6] = m(x0_2, x[6])
            + m(x1_2, x5_2)
            + m(x2_2, x[4])
            + m(x3_2, x[3])
            + m(x[8], x8_19)
            + m(x7_2, x9_19) * 2;
        z[7] = m(x0_2, x[7]) + m(x1_2, x[6]) + m(x2_2, x[5]) + m(x3_2, x[4]) + m(x[8], x9_19) * 2;
        z[8] = m(x0_2, x[8])
            + m(x1_2, x7_2)
            + m(x2_2, x[6])
            + m(x3_2, x5_2)
            + m(x[4], x[4])
            + m(x[9], x9_19) * 2;
        z[9] = m(x0_2, x[9]) + m(x1_2, x[8]) + m(x2_2, x[7]) + m(x3_2, x[6]) + m(x4_2, x[5]);

        z
    }
}