p256_cm4/
lib.rs

1#![no_std]
2#![allow(clippy::missing_safety_doc)]
3
4use crate::asm::{
5    P256_add_mod_n, P256_check_range_n, P256_check_range_p, P256_decompress_point,
6    P256_divsteps2_31, P256_double_j, P256_from_montgomery, P256_matrix_mul_fg_9, P256_mul_mod_n,
7    P256_negate_mod_n_if, P256_negate_mod_p_if, P256_point_is_on_curve, P256_reduce_mod_n_32bytes,
8    P256_to_montgomery, P256_verify_last_step,
9    jacobian::{P256_add_sub_j, P256_jacobian_to_affine},
10};
11
12#[cfg(target_arch = "arm")]
13mod asm;
14
15const ONE_MONTGOMERY: [u32; 8] = [1, 0, 0, 0xffffffff, 0xffffffff, 0xffffffff, 0xfffffffe, 0];
16
17// This table contains 1G, 3G, 5G, ... 15G in affine coordinates in montgomery form
18#[rustfmt::skip]
19static P256_BASEPOINT_PRECOMP: [[[u32; 8]; 2]; 8]= [
20[[0x18a9143c, 0x79e730d4, 0x5fedb601, 0x75ba95fc, 0x77622510, 0x79fb732b, 0xa53755c6, 0x18905f76],
21[0xce95560a, 0xddf25357, 0xba19e45c, 0x8b4ab8e4, 0xdd21f325, 0xd2e88688, 0x25885d85, 0x8571ff18]],
22[[0x4eebc127, 0xffac3f90, 0x87d81fb, 0xb027f84a, 0x87cbbc98, 0x66ad77dd, 0xb6ff747e, 0x26936a3f],
23[0xc983a7eb, 0xb04c5c1f, 0x861fe1a, 0x583e47ad, 0x1a2ee98e, 0x78820831, 0xe587cc07, 0xd5f06a29]],
24[[0xc45c61f5, 0xbe1b8aae, 0x94b9537d, 0x90ec649a, 0xd076c20c, 0x941cb5aa, 0x890523c8, 0xc9079605],
25[0xe7ba4f10, 0xeb309b4a, 0xe5eb882b, 0x73c568ef, 0x7e7a1f68, 0x3540a987, 0x2dd1e916, 0x73a076bb]],
26[[0xa0173b4f, 0x746354e, 0xd23c00f7, 0x2bd20213, 0xc23bb08, 0xf43eaab5, 0xc3123e03, 0x13ba5119],
27[0x3f5b9d4d, 0x2847d030, 0x5da67bdd, 0x6742f2f2, 0x77c94195, 0xef933bdc, 0x6e240867, 0xeaedd915]],
28[[0x264e20e8, 0x75c96e8f, 0x59a7a841, 0xabe6bfed, 0x44c8eb00, 0x2cc09c04, 0xf0c4e16b, 0xe05b3080],
29[0xa45f3314, 0x1eb7777a, 0xce5d45e3, 0x56af7bed, 0x88b12f1a, 0x2b6e019a, 0xfd835f9b, 0x86659cd]],
30[[0x6245e404, 0xea7d260a, 0x6e7fdfe0, 0x9de40795, 0x8dac1ab5, 0x1ff3a415, 0x649c9073, 0x3e7090f1],
31[0x2b944e88, 0x1a768561, 0xe57f61c8, 0x250f939e, 0x1ead643d, 0xc0daa89, 0xe125b88e, 0x68930023]],
32[[0x4b2ed709, 0xccc42563, 0x856fd30d, 0xe356769, 0x559e9811, 0xbcbcd43f, 0x5395b759, 0x738477ac],
33[0xc00ee17f, 0x35752b90, 0x742ed2e3, 0x68748390, 0xbd1f5bc1, 0x7cd06422, 0xc9e7b797, 0xfbc08769]],
34[[0xbc60055b, 0x72bcd8b7, 0x56e27e4b, 0x3cc23ee, 0xe4819370, 0xee337424, 0xad3da09, 0xe2aa0e43],
35[0x6383c45d, 0x40b8524f, 0x42a41b25, 0xd7663554, 0x778a4797, 0x64efa6de, 0x7079adf4, 0x2042170a]]
36];
37
38// This contains two tables, 8 points each in affine coordinates in montgomery form
39// The first table contains these points:
40// (2^192 - 2^128 - 2^64 - 1)G
41// (2^192 - 2^128 - 2^64 + 1)G
42// (2^192 - 2^128 + 2^64 - 1)G
43// (2^192 - 2^128 + 2^64 + 1)G
44// (2^192 + 2^128 - 2^64 - 1)G
45// (2^192 + 2^128 - 2^64 + 1)G
46// (2^192 + 2^128 + 2^64 - 1)G
47// (2^192 + 2^128 + 2^64 + 1)G
48// The second table contains the same points multiplied by 2^32
49#[rustfmt::skip]
50const P256_BASEPOINT_PRECOMP2: [[[[u32; 8]; 2]; 8]; 2] =
51[
52[
53[[0x670844e0, 0x52d8a7c9, 0xef68a29d, 0xe33bdc, 0x4bdb7361, 0xf3d2848, 0x91c5304d, 0x5222c821],
54[0xdf73fc25, 0xea6d2944, 0x255c81b, 0xa04c0f55, 0xefe488a8, 0x29acdc97, 0x80a560de, 0xbe2e158f]],
55[[0x2b13e673, 0xfc8511ee, 0xd103ed24, 0xffc58dee, 0xea7e99b8, 0x1022523a, 0x4afc8a17, 0x8f43ea39],
56[0xc5f33d0b, 0x8f4e2dbc, 0xd0aa1681, 0x3bc099fa, 0x79ff9df1, 0xffbb7b41, 0xd58b57c4, 0x180de09d]],
57[[0x8bd1cda5, 0x56430752, 0x8e05eda5, 0x1807577f, 0x956896e9, 0x99c699b, 0xf1f0efb5, 0x83d6093d],
58[0xed97061c, 0xef5af17e, 0x30d4c3c, 0x35b977b8, 0x49229439, 0x81fa75a2, 0xa0b6d35d, 0xf5a22070]],
59[[0x74f81cf1, 0x814c5365, 0x120065b, 0xe30baff7, 0x15132621, 0x80ae1256, 0x36a80788, 0x16d2b8cb],
60[0xecc50bca, 0x33d14697, 0x17aedd21, 0x19a9dfb0, 0xedc3f766, 0x523fbcc7, 0xb2cf5afd, 0x9c4de6dd]],
61[[0xcf0d9f6d, 0x5305a9e6, 0x81a9b021, 0x5839172f, 0x75c687cf, 0xcca7a4dd, 0x844be22f, 0x36d59b3e],
62[0x111a53e9, 0xcace7e62, 0xf063f3a1, 0x91c843d4, 0xda812da, 0xbf77e5f0, 0x437f3176, 0xe64af9c]],
63[[0xcf07517d, 0xdbd568bb, 0xba6830b9, 0x2f1afba2, 0xe6c4c2a6, 0x15b6807c, 0xe4966aef, 0x91c7eabc],
64[0xd6b2b6e6, 0x716dea1b, 0x19f85b4b, 0x248c43d1, 0x4a315e2a, 0x16dcfd60, 0xc72b3d0b, 0x15fdd303]],
65[[0x42b7dfd5, 0xe40bf9f4, 0x2d934f2a, 0x673689f3, 0x30a6f50b, 0x8314beb4, 0x976ec64e, 0xd17af2bc],
66[0x1ee7ddf1, 0x39f66c4f, 0x68ea373c, 0x7f68e18b, 0x53d0b186, 0x5166c1f2, 0x7be58f14, 0x95dda601]],
67[[0x42913074, 0xd5ae356, 0x48a542b1, 0x55491b27, 0xb310732a, 0x469ca665, 0x5f1a4cc1, 0x29591d52],
68[0xb84f983f, 0xe76f5b6b, 0x9f5f84e1, 0xbe7eef41, 0x80baa189, 0x1200d496, 0x18ef332c, 0x6376551f]]
69],
70[
71[[0x7c4e54f5, 0xb9e5cbc0, 0xe1410e34, 0xc53a1a17, 0xec454425, 0x3e199130, 0x1700902e, 0xb029c97e],
72[0x786423b6, 0x2de66e11, 0xb41a95be, 0x262dc914, 0x451b683, 0x51766abd, 0x85bb6fb1, 0x55ad5f34]],
73[[0x9066cb79, 0x74f4f1c, 0x30c8b94e, 0x1ab31bd6, 0xd74275b3, 0x6d3f012f, 0x9ddcce40, 0xa214d0b1],
74[0xd165050a, 0x24aedf74, 0xe0e5dc3e, 0x95f17ece, 0xd9224456, 0x6ada9cda, 0x2dd60eea, 0x1fadb2d1]],
75[[0xe20cfb9b, 0xa3d83091, 0xba76e0cb, 0xae79c975, 0xc8858a6e, 0xa5f2a588, 0x874a3168, 0xe897a5f4],
76[0x7d48f096, 0xf6c1ef40, 0xc35b132c, 0x1f9c516b, 0x53c479fd, 0xe1040f91, 0x9df06743, 0x60e881f]],
77[[0x52a90e51, 0x9e0ad72, 0x38c50a96, 0xb7e66ea3, 0x7d997770, 0xab32ad05, 0x445671cb, 0xceaffe2],
78[0x5d37cc99, 0xdfbe753c, 0xe0fea2d5, 0x95d068cc, 0x4dd77cb6, 0x1e37cdda, 0x55530688, 0x88c5a4bb]],
79[[0xc7744f1, 0x3413f033, 0xbc816702, 0x23c05c89, 0x1192b5ac, 0x2322ee9a, 0x373180bb, 0xc1636a0],
80[0xbdde0207, 0xfe2f3d4, 0xc23578d8, 0xe1a093a, 0xc888ead, 0x6e5f0d1, 0x52a2b660, 0x9ca285a5]],
81[[0xce923964, 0xdae76995, 0xa34c7993, 0xcc96493a, 0xea73d9e7, 0xd19b5144, 0x311e6e34, 0x4a5c263],
82[0xd9a2a443, 0x7db5b32b, 0x2cfd960c, 0x3754bd33, 0xa430f15, 0xc5bcc98, 0xd9a94574, 0x5651201f]],
83[[0xfc0418fe, 0xebdd8921, 0x34e20036, 0x37015b39, 0xdf03a353, 0xcf4fcd8f, 0xf12cab16, 0xdc2de6e1],
84[0xd071df14, 0x9c17cc1a, 0x63415530, 0xd7c5e6a3, 0x68f3fb1e, 0xb5301660, 0x18269301, 0xb5f70bc9]],
85[[0x79ec1a0f, 0x2d8daefd, 0xceb39c97, 0x3bbcd6fd, 0x58f61a95, 0xf5575ffc, 0xadf7b420, 0xdbd986c4],
86[0x15f39eb7, 0x81aa8814, 0xb98d976c, 0x6ee2fcf5, 0xcf2f717d, 0x5465475d, 0x6860bbd0, 0x8e24d3c4]]
87]
88];
89
90// Constant time abs
91// but not really abs, only works for +/-15
92#[inline(always)]
93fn abs_int(a: i8) -> u32 {
94    let a_u: u32 = a as i32 as u32;
95    let mut mask: u32 = a_u >> 31;
96    mask |= mask << 1;
97    mask |= mask << 2;
98    let mut result: u32 = ((-a) as u32) & mask;
99    result |= (a as u32) & (mask ^ 0xF);
100    result
101}
102
103/// Checks that the argument, as little-endian integer,
104/// is a reduced non-zero element of the scalar field.
105///
106/// In other words, that it is in the range `1..=n-1`,
107/// where `n = 2^256 - 2^224 + 2^192 - 0x4319055258e8617b0c46353d039cdaaf`.
108#[inline]
109#[must_use]
110pub fn check_range_n(a: &[u32; 8]) -> bool {
111    unsafe { P256_check_range_n(a) }
112}
113
114/// Checks that the argument, as little-endian integer,
115/// is a reduced element of the base field.
116///
117/// In other words, that it is in the range `0..=p-1`,
118/// where `p = 2^256 - 2^224 + 2^192 + 2^96 - 1`.
119#[inline]
120#[must_use]
121pub fn check_range_p(a: &[u32; 8]) -> bool {
122    unsafe { P256_check_range_p(a) }
123}
124
125/// Converts endianness by reversing the input value.
126///
127/// The output and input pointers may NOT refer to the same location
128/// and have no alignment requirements.
129pub fn convert_endianness(output: &mut [u8; 32], input: &[u8; 32]) {
130    (0..16).for_each(|i| {
131        let t: u8 = input[31 - i];
132        output[31 - i] = input[i];
133        output[i] = t;
134    });
135}
136
137fn u32x8_to_u8x32(input: &[u32; 8]) -> &[u8; 32] {
138    unsafe { core::mem::transmute::<&[u32; 8], &[u8; 32]>(input) }
139}
140
141fn u32x8_to_u8x32_mut(input: &mut [u32; 8]) -> &mut [u8; 32] {
142    unsafe { core::mem::transmute::<&mut [u32; 8], &mut [u8; 32]>(input) }
143}
144
145/// Uncompressed encoding
146///
147/// `04 || Px || Py`.
148pub fn point_to_octet_string_uncompressed(out: &mut [u8; 65], x: &[u32; 8], y: &[u32; 8]) {
149    out[0] = 4;
150    convert_endianness((&mut out[1..33]).try_into().unwrap(), u32x8_to_u8x32(x));
151    convert_endianness((&mut out[33..65]).try_into().unwrap(), u32x8_to_u8x32(y));
152}
153
154/// Compressed encoding
155///
156/// `02 || Px` if Py is even and `03 || Px` if Py is odd.
157pub fn point_to_octet_string_compressed(out: &mut [u8; 33], x: &[u32; 8], y: &[u32; 8]) {
158    out[0] = 2_u8.wrapping_add((y[0] & 1) as u8);
159    convert_endianness((&mut out[1..]).try_into().unwrap(), u32x8_to_u8x32(x))
160}
161
162/// Hybrid encoding
163///
164/// `06 || Px || Py` if Py is even and `07 || Px || Py` if Py is odd
165/// (a pretty useless encoding).
166pub fn point_to_octet_string_hybrid(out: &mut [u8; 65], x: &[u32; 8], y: &[u32; 8]) {
167    out[0] = 6_u8.wrapping_add((y[0] & 1) as u8);
168    convert_endianness((&mut out[1..33]).try_into().unwrap(), u32x8_to_u8x32(x));
169    convert_endianness((&mut out[33..65]).try_into().unwrap(), u32x8_to_u8x32(y));
170}
171
172/// Decodes a point according to the three encodings above.
173///
174/// include_p256_decode_point: first byte is "04", "06" or "07" and input length is 65 bytes
175/// include_p256_decompress_point: first byte is "02" or "03" and input length is 33 bytes
176///
177/// Returns true if the input string confirms to a valid encoding and the point lies on the curve,
178/// otherwise false.
179///
180/// NOTE: The return value MUST be checked in case the point is not guaranteed to lie on the curve (e.g. if it
181/// is received from an untrusted party).
182#[must_use]
183pub fn octet_string_to_point(x: &mut [u32; 8], y: &mut [u32; 8], input: &[u8]) -> bool {
184    if let Ok(slice) = input[1..33].try_into() {
185        convert_endianness(u32x8_to_u8x32_mut(x), slice);
186        if unsafe { !P256_check_range_p(x) } {
187            return false;
188        }
189
190        if (input[0] == 4 || ((input[0] >> 1) == 3)) && input.len() == 65 {
191            convert_endianness(u32x8_to_u8x32_mut(y), input[33..65].try_into().unwrap());
192            if unsafe { !P256_check_range_p(y) } {
193                return false;
194            }
195
196            if (input[0] >> 1) == 3 && u32::from(input[0] & 1) != (y[0] & 1) {
197                return false;
198            }
199
200            let mut x_mont: [u32; 8] = [0; 8];
201            let mut y_mont: [u32; 8] = [0; 8];
202
203            unsafe { P256_to_montgomery(&raw mut x_mont as _, x) };
204            unsafe { P256_to_montgomery(&raw mut y_mont as _, y) };
205            unsafe { P256_point_is_on_curve(&raw const x_mont as _, &raw const y_mont as _) }
206        } else if (input[0] >> 1) == 1 && input.len() == 33 {
207            unsafe {
208                P256_decompress_point(
209                    &raw mut *y as _,
210                    &raw const *x as _,
211                    u32::from(input[0] & 1),
212                )
213            }
214        } else {
215            false
216        }
217    } else {
218        false
219    }
220}
221
222// Calculates scalar*P in constant time (except for the scalars 2 and n-2, for which the results take a few extra cycles to compute)
223fn scalarmult_variable_base(
224    output_mont_x: &mut [u32; 8],
225    output_mont_y: &mut [u32; 8],
226    scalar: &[u32; 8],
227) {
228    // Based on https://eprint.iacr.org/2014/130.pdf, Algorithm 1.
229
230    // The algorithm used requires the scalar to be odd. If even, negate the scalar modulo p to make it odd, and later negate the end result.
231    let mut scalar2: [u32; 8] = [0; 8];
232    let even: u32 = ((scalar[0]) & 1) ^ 1;
233    unsafe { P256_negate_mod_n_if(&mut scalar2, scalar, even) };
234
235    // Rewrite the scalar as e[0] + 2^4*e[1] + 2^8*e[2] + ... + 2^252*e[63], where each e[i] is an odd number and -15 <= e[i] <= 15.
236    let mut e: [i8; 64] = [0; 64];
237    e[0] = (scalar2[0] & 0xf) as i8;
238    (1..64).for_each(|i| {
239        // Extract 4 bits
240        e[i] = ((scalar2[i / 8] >> ((i % 8) * 4)) & 0xf) as u8 as i8;
241        // If even, subtract 2^4 from e[i - 1] and add 1 to e[i]
242        e[i - 1] -= ((e[i] & 1) ^ 1) << 4;
243        e[i] |= 1;
244    });
245
246    // Create a table of P, 3P, 5P, ... 15P.
247    let mut table: [[[u32; 8]; 3]; 8] = [[[0; 8]; 3]; 8];
248    table[0][0].copy_from_slice(output_mont_x);
249    table[0][1].copy_from_slice(output_mont_y);
250    table[0][2].copy_from_slice(&ONE_MONTGOMERY);
251    unsafe { P256_double_j(&raw mut table[7] as _, &raw const table[0] as _) };
252    (1..8).for_each(|i| {
253        table.copy_within(7..8, i);
254        unsafe {
255            P256_add_sub_j(
256                &raw mut table[i] as _,
257                &raw const table[i - 1] as _,
258                false,
259                false,
260            )
261        };
262    });
263
264    // Calculate the result as (((((((((e[63]*G)*2^4)+e[62])*2^4)+e[61])*2^4)...)+e[1])*2^4)+e[0] = (2^252*e[63] + 2^248*e[62] + ... + e[0])*G.
265
266    let mut current_point: [[u32; 8]; 3] = [[0; 8]; 3];
267
268    // e[63] is never negative
269    current_point.copy_from_slice(&table[(e[63] >> 1) as u8 as usize]);
270
271    (0..63).rev().for_each(|i| {
272        (0..4).for_each(|_| unsafe {
273            P256_double_j(&raw mut current_point as _, &raw const current_point as _)
274        });
275
276        let mut selected_point: [[u32; 8]; 3] = [[0; 8]; 3];
277        selected_point.copy_from_slice(&table[(abs_int(e[i]) >> 1) as u8 as usize]);
278
279        unsafe {
280            P256_negate_mod_p_if(
281                &mut selected_point[1],
282                &selected_point[1],
283                ((e[i] as u8) >> 7) as u32,
284            )
285        };
286
287        // There is (only) one odd input scalar that leads to an exception when i == 0: n-2,
288        // in that case current_point will be equal to selected_point and hence a doubling
289        // will occur instead. We don't bother fixing the same constant time for that case since
290        // the probability of that random value to be generated is around 1/2^255 and an
291        // attacker could easily test this case anyway.
292        unsafe {
293            P256_add_sub_j(
294                &raw mut current_point as _,
295                &raw const selected_point as _,
296                false,
297                false,
298            )
299        };
300    });
301    unsafe {
302        P256_jacobian_to_affine(
303            &raw mut *output_mont_x as _,
304            &raw mut *output_mont_y as _,
305            &raw const current_point as _,
306        )
307    };
308
309    // If the scalar was initially even, we now negate the result to get the correct result, since -(scalar*G) = (-scalar*G).
310    // This is done by negating y, since -(x,y) = (x,-y).
311    unsafe { P256_negate_mod_p_if(output_mont_y, output_mont_y, even) };
312}
313
314#[must_use]
315fn scalarmult_generic_no_scalar_check(
316    output_mont_x: &mut [u32; 8],
317    output_mont_y: &mut [u32; 8],
318    scalar: &[u32; 8],
319    in_x: &[u32; 8],
320    in_y: &[u32; 8],
321) -> bool {
322    if unsafe { !P256_check_range_p(in_x) || !P256_check_range_p(in_y) } {
323        false
324    } else {
325        unsafe { P256_to_montgomery(&raw mut *output_mont_x as _, in_x) };
326        unsafe { P256_to_montgomery(&raw mut *output_mont_y as _, in_y) };
327
328        if unsafe {
329            !P256_point_is_on_curve(
330                &raw const *output_mont_x as _,
331                &raw const *output_mont_y as _,
332            )
333        } {
334            false
335        } else {
336            scalarmult_variable_base(output_mont_x, output_mont_y, scalar);
337            true
338        }
339    }
340}
341
342/// Raw scalar multiplication by any point on the elliptic curve.
343///
344/// This function can be used to implement custom algorithms using the P-256 curve.
345///
346/// This function validates all inputs and proceeds only if the scalar is within the range 1 to n-1, where n
347/// is the order of the elliptic curve, and the input point's coordinates are each less than the order of
348/// the prime field. If validation succeeds, true is returned. Otherwise false is returned.
349#[must_use]
350pub fn scalarmult_generic(
351    result_x: &mut [u32; 8],
352    result_y: &mut [u32; 8],
353    scalar: &[u32; 8],
354    in_x: &[u32; 8],
355    in_y: &[u32; 8],
356) -> bool {
357    if unsafe {
358        !P256_check_range_n(scalar)
359            || !scalarmult_generic_no_scalar_check(result_x, result_y, scalar, in_x, in_y)
360    } {
361        false
362    } else {
363        unsafe { P256_from_montgomery(result_x, &raw const *result_x as _) };
364        unsafe { P256_from_montgomery(result_y, &raw const *result_y as _) };
365        true
366    }
367}
368
369/// Generates the shared secret according to the ECDH standard.
370///
371/// The shared secret parameter will contain the big endian encoding for the x coordinate of the scalar
372/// multiplication of the private key and the input point (other's public key), if the function succeeds.
373///
374/// If the other's public key point does not lie on the curve, this function fails and false is returned.
375/// Otherwise, shared secret is calculated and true is returned.
376///
377/// NOTE: The return value MUST be checked since the other's public key point cannot generally be trusted.
378#[must_use]
379pub fn ecdh_calc_shared_secret(
380    shared_secret: &mut [u8; 32],
381    private_key: &[u32; 8],
382    others_public_key_x: &[u32; 8],
383    others_public_key_y: &[u32; 8],
384) -> bool {
385    let mut result_x: [u32; 8] = [0; 8];
386    let mut result_y: [u32; 8] = [0; 8];
387    if !scalarmult_generic_no_scalar_check(
388        &mut result_x,
389        &mut result_y,
390        private_key,
391        others_public_key_x,
392        others_public_key_y,
393    ) {
394        false
395    } else {
396        unsafe { P256_from_montgomery(&raw mut result_x as _, &raw const result_x as _) };
397        convert_endianness(shared_secret, u32x8_to_u8x32(&result_x));
398        true
399    }
400}
401
402/// Calculates the public key from a given private key for use by either ECDSA or ECDH.
403///
404/// The private key shall be taken from a random value that MUST have been generated by a cryptographically
405/// secure random number generator that generates 256 random bits. This function validates that the private key
406/// lies in the accepted range 1 to n-1, where n is the order of the elliptic curve, and returns true only if
407/// this validation succeeds. If random value is out of that range, false is returned and in this case a new
408/// random value needs to be generated and this function MUST be called again until true is returned.
409///
410/// The public key is created by performing a scalar multiplication of the private key and the base point of
411/// the curve.
412///
413/// Only use a keypair for either ECDSA or ECDH, not both, and don't use the private key for any other purposes.
414#[must_use]
415pub fn keygen(
416    public_key_x: &mut [u32; 8],
417    public_key_y: &mut [u32; 8],
418    private_key: &[u32; 8],
419) -> bool {
420    scalarmult_base(public_key_x, public_key_y, private_key)
421}
422
423macro_rules! get_bit {
424    ($arr:ident, $i:expr) => {
425        (($arr[$i / 32] >> ($i % 32)) & 1)
426    };
427}
428
429// Calculates scalar*G in constant time
430fn scalarmult_fixed_base(
431    output_mont_x: &mut [u32; 8],
432    output_mont_y: &mut [u32; 8],
433    scalar: &[u32; 8],
434) {
435    let mut scalar2: [u32; 8] = [0; 8];
436
437    // Just as with the algorithm used in variable base scalar multiplication, this algorithm requires the scalar to be odd.
438    let even: u32 = ((scalar[0]) & 1) ^ 1;
439    unsafe { P256_negate_mod_n_if(&mut scalar2, scalar, even) };
440
441    // This algorithm conceptually rewrites the odd scalar as s[0] + 2^1*s[1] + 2^2*s[2] + ... + 2^255*s[255], where each s[i] is -1 or 1.
442    // By initially setting s[i] to the corresponding bit S[i] in the original odd scalar S, we go from lsb to msb, and whenever a value s[i] is 0,
443    // increase s[i] by 1 and decrease s[i-1] by 2.
444    // This will result in that s[i] = S[i+1] == 1 ? 1 : -1 for i < 255, and s[255] = 1.
445
446    // We then form the scalars abs(s[j] + s[j+64]*2^64 + s[j+128]*2^128 + s[j+192]*2^192)*(2^32 * floor(j / 32)) for different 0 <= j < 64.
447    // Each scalar times G has already been precomputed in p256_basepoint_precomp2.
448    // That way we only need 31 point doublings and 63 point additions.
449
450    let mut current_point: [[u32; 8]; 3] = [[0; 8]; 3];
451    let mut selected_point: [[u32; 8]; 2] = [[0; 8]; 2];
452
453    for i in (0..32).rev() {
454        {
455            let mut mask: u32 = get_bit!(scalar2, i + 32 + 1)
456                | (get_bit!(scalar2, i + 64 + 32 + 1) << 1)
457                | (get_bit!(scalar2, i + 2 * 64 + 32 + 1) << 2);
458            if i == 31 {
459                current_point[..2].copy_from_slice(&P256_BASEPOINT_PRECOMP2[1][mask as usize]);
460                current_point[2].copy_from_slice(&ONE_MONTGOMERY);
461            } else {
462                unsafe { P256_double_j(&raw mut current_point as _, &raw mut current_point as _) };
463
464                let sign: u32 = get_bit!(scalar2, i + 3 * 64 + 32 + 1).wrapping_sub(1); // positive: 0, negative: -1
465                mask = (mask ^ sign) & 7;
466                selected_point.copy_from_slice(&P256_BASEPOINT_PRECOMP2[1][mask as usize]);
467                unsafe {
468                    P256_negate_mod_p_if(&mut selected_point[1], &selected_point[1], sign & 1)
469                };
470                unsafe {
471                    P256_add_sub_j(
472                        &raw mut current_point as _,
473                        &raw const selected_point as _,
474                        false,
475                        true,
476                    )
477                };
478            }
479        }
480        {
481            let mut mask: u32 = get_bit!(scalar2, i + 1)
482                | (get_bit!(scalar2, i + 64 + 1) << 1)
483                | (get_bit!(scalar2, i + 2 * 64 + 1) << 2);
484            let sign: u32 = get_bit!(scalar2, i + 3 * 64 + 1).wrapping_sub(1); // positive: 0, negative: -1
485            mask = (mask ^ sign) & 7;
486            selected_point.copy_from_slice(&P256_BASEPOINT_PRECOMP2[0][mask as usize]);
487            unsafe { P256_negate_mod_p_if(&mut selected_point[1], &selected_point[1], sign & 1) };
488            unsafe {
489                P256_add_sub_j(
490                    &raw mut current_point as _,
491                    &raw const selected_point as _,
492                    false,
493                    true,
494                )
495            };
496        }
497    }
498
499    unsafe {
500        P256_jacobian_to_affine(
501            &raw mut *output_mont_x as _,
502            &raw mut *output_mont_y as _,
503            &raw const current_point as _,
504        )
505    };
506
507    // Negate final result if the scalar was initially even.
508    unsafe { P256_negate_mod_p_if(output_mont_y, output_mont_y, even) };
509}
510
511/// Raw scalar multiplication by the base point of the elliptic curve.
512///
513/// This function can be used to implement custom algorithms using the P-256 curve.
514///
515/// This function validates that the scalar lies in the accepted range 1 to n-1, where n is the order of the
516/// elliptic curve, and returns true only if this validation succeeds. Otherwise false is returned.
517#[must_use]
518pub fn scalarmult_base(
519    result_x: &mut [u32; 8],
520    result_y: &mut [u32; 8],
521    scalar: &[u32; 8],
522) -> bool {
523    if unsafe { !P256_check_range_n(scalar) } {
524        false
525    } else {
526        scalarmult_fixed_base(result_x, result_y, scalar);
527        unsafe { P256_from_montgomery(result_x, &raw const *result_x as _) };
528        unsafe { P256_from_montgomery(result_y, &raw const *result_y as _) };
529        true
530    }
531}
532
533/// Sign precomputation state.
534///
535/// The content shall be treated as opaque to the API user and shall not be inspected or modified.
536#[repr(C)]
537#[derive(Default, Debug, Copy, Clone)]
538pub struct SignPrecomp {
539    pub r: [u32; 8],
540    pub k_inv: [u32; 8],
541}
542
543/// Creates an ECDSA signature.
544///
545/// The parameter "k" shall consist of a 256-bit random integer value. This random value MUST be generated from
546/// a cryptographically secure random number generator, and MUST be unique for every pair of message hash and
547/// private key.
548///
549/// With a small probability (~ 2^-32), this function will fail and return false for the given "k" and this
550/// function MUST in that case be called again with a new random "k", until true is returned. This is in line
551/// with the ECDSA standard.
552///
553/// As an alternative to using a random "k", "k" might be derived deterministically from the input, using a
554/// sophisticated hash construction such as RFC 6979, or e.g. by hashing the private key, message hash and a
555/// retry counter, using a secure hash function such as SHA-256.
556#[must_use]
557pub fn sign(
558    r: &mut [u32; 8],
559    s: &mut [u32; 8],
560    hash: &[u8],
561    private_key: &[u32; 8],
562    k: &[u32; 8],
563) -> bool {
564    let mut t: SignPrecomp = Default::default();
565    if !sign_step1(&mut t, k) {
566        r.fill(0);
567        s.fill(0);
568        false
569    } else {
570        sign_step2(r, s, hash, private_key, &mut t)
571    }
572}
573
574/// Creates an ECDSA signature, using a two-step procedure.
575///
576/// This function performs the first of two steps, and accounts for 99% of the time spent for generating an
577/// ECDSA signature.
578///
579/// By splitting up into two steps, most of the work could be spent before deciding what message to sign, or
580/// which private key to use.
581///
582/// The parameter "k" shall consist of a 256-bit random integer value. This random value MUST be generated from
583/// a cryptographically secure random number generator, and MUST be unique for every pair of message hash and
584/// private key.
585///
586/// With a small probability (~ 2^-32), this function will fail and return false for the given "k" and this
587/// function MUST in that case be called again with a new random "k", until true is returned. This is in line
588/// with the ECDSA standard.
589///
590/// As an alternative to using a random "k", "k" might be derived deterministically from the input, using a
591/// sophisticated hash construction such as RFC 6979, or e.g. by hashing the private key, message hash and a
592/// retry counter, using a secure hash function such as SHA-256.
593///
594/// The "result" parameter will contain the computed state, that is later to be passed to p256_sign_step2.
595/// A result state MUST NOT be reused for generating multiple signatures.
596#[must_use]
597pub fn sign_step1(result: &mut SignPrecomp, k: &[u32; 8]) -> bool {
598    #[allow(clippy::never_loop)]
599    loop {
600        if unsafe { !P256_check_range_n(k) } {
601            break;
602        }
603        let mut output_x: [u32; 8] = [0; 8];
604        let mut output_y: [u32; 8] = [0; 8];
605        scalarmult_fixed_base(&mut output_x, &mut output_y, k);
606        mod_n_inv(&mut result.k_inv, k);
607        unsafe { P256_from_montgomery(&raw mut result.r, &raw const output_x as _) };
608        unsafe { P256_reduce_mod_n_32bytes(&raw mut result.r, &raw const result.r) };
609
610        let r_sum: u32 = (0..8).fold(0, |r_sum, i| r_sum | result.r[i]);
611        if r_sum == 0 {
612            break;
613        }
614        return true;
615    }
616
617    result.r.fill(0);
618    result.k_inv.fill(0);
619    false
620}
621
622// Takes the leftmost 256 bits in hash (treated as big endian),
623// and converts to little endian integer z.
624fn hash_to_z(z: &mut [u8], hash: &[u8]) {
625    let hashlen: usize = core::cmp::min(hash.len(), 32);
626    (0..hashlen).for_each(|i| {
627        z[i] = hash[hashlen - 1 - i];
628    });
629    z[hashlen..].fill(0);
630}
631
632/// Second step of creating an ECDSA signature, using a two-step procedure.
633///
634/// This function performs the second of two steps, and accounts for the last 1% of the time spent for generating
635/// an ECDSA signature.
636///
637/// The "sign_precomp" parameter shall contain a pointer to a state generated by p256_sign_step1.
638///
639/// With a small probability (~ 2^-256), this function will fail, due to the given "k" from the first step is
640/// not compatible with the rest of the input, and return false. In this case, the procedure MUST be started
641/// over from step 1 with a new random "k".  This is in line with the ECDSA standard. Otherwise true is returned
642/// and the signature is placed in "r" and "s".
643///
644/// When this function returns, "sign_precomp" is also zeroed out and may hence not be reused.
645#[must_use]
646pub fn sign_step2(
647    r: &mut [u32; 8],
648    s: &mut [u32; 8],
649    hash: &[u8],
650    private_key: &[u32; 8],
651    sign_precomp: &mut SignPrecomp,
652) -> bool {
653    #[allow(clippy::never_loop)]
654    loop {
655        // just make sure user did not input an obviously invalid precomp
656        if unsafe {
657            !P256_check_range_n(&sign_precomp.k_inv) || !P256_check_range_n(&sign_precomp.r)
658        } {
659            break;
660        }
661        hash_to_z(u32x8_to_u8x32_mut(r), hash);
662        unsafe { P256_mul_mod_n(s, &sign_precomp.r, private_key) };
663        unsafe { P256_add_mod_n(s, r, s) };
664        unsafe { P256_mul_mod_n(s, &sign_precomp.k_inv, s) };
665
666        r.copy_from_slice(&sign_precomp.r);
667
668        let s_sum: u32 = s.iter().fold(0, |s_sum, s| s_sum | s);
669        if s_sum == 0 {
670            break;
671        }
672        sign_precomp.r.fill(0);
673        sign_precomp.k_inv.fill(0);
674        return true;
675    }
676
677    r.fill(0);
678    s.fill(0);
679    false
680}
681
682// Creates a representation of a (little endian integer),
683// so that r[0] + 2*r[1] + 2^2*r[2] + 2^3*r[3] + ... = a,
684// where each r[i] is -15, -13, ..., 11, 13, 15 or 0.
685// Only around 1/5.5 of the r[i] will be non-zero.
686fn slide_257(a: &[u8; 32]) -> [i8; 257] {
687    let mut r: [i8; 257] = [0; 257];
688    (0..256).for_each(|i| {
689        r[i] = (1 & (a[i >> 3] >> (i & 7))) as i8;
690    });
691    r[256] = 0;
692
693    (0..256).for_each(|i| {
694        if r[i] != 0 {
695            let mut b: usize = 1;
696            while b <= 4 && i + b < 256 {
697                if r[i + b] != 0 {
698                    if r[i] + (r[i + b] << b) <= 15 {
699                        r[i] += r[i + b] << b;
700                        r[i + b] = 0;
701                    } else if r[i] - (r[i + b] << b) >= -15 {
702                        r[i] -= r[i + b] << b;
703                        loop {
704                            r[i + b] = 0;
705                            b += 1;
706                            if r[i + b] == 0 {
707                                r[i + b] = 1;
708                                b -= 1; // Will be added back after loop footer b++
709                                break;
710                            }
711                        }
712                    } else {
713                        break;
714                    }
715                }
716                b += 1;
717            }
718        }
719    });
720
721    r
722}
723
724/// Verifies an ECDSA signature.
725///
726/// Returns true if the signature is valid for the given input, otherwise false.
727#[must_use = "The return value indicates if the message is authentic"]
728pub fn verify(
729    public_key_x: &[u32; 8],
730    public_key_y: &[u32; 8],
731    hash: &[u8],
732    r: &[u32; 8],
733    s: &[u32; 8],
734) -> bool {
735    if unsafe { !P256_check_range_n(r) || !P256_check_range_n(s) } {
736        return false;
737    }
738
739    if unsafe { !P256_check_range_p(public_key_x) || !P256_check_range_p(public_key_y) } {
740        return false;
741    }
742
743    let mut pk_table: [[[u32; 8]; 3]; 8] = [[[0; 8]; 3]; 8];
744    unsafe {
745        P256_to_montgomery(&raw mut pk_table[0][0] as _, public_key_x);
746        P256_to_montgomery(&raw mut pk_table[0][1] as _, public_key_y);
747    }
748    pk_table[0][2].copy_from_slice(&ONE_MONTGOMERY);
749
750    if unsafe {
751        !P256_point_is_on_curve(
752            &raw const pk_table[0][0] as _,
753            &raw const pk_table[0][1] as _,
754        )
755    } {
756        return false;
757    }
758
759    // Create a table of P, 3P, 5P, ..., 15P, where P is the public key.
760    unsafe { P256_double_j(&raw mut pk_table[7] as _, &raw const pk_table[0] as _) };
761    (1..8).for_each(|i| {
762        pk_table.copy_within(7..8, i);
763        unsafe {
764            P256_add_sub_j(
765                &raw mut pk_table[i] as _,
766                &raw const pk_table[i - 1] as _,
767                false,
768                false,
769            )
770        };
771    });
772
773    let mut z: [u32; 8] = [0; 8];
774    hash_to_z(u32x8_to_u8x32_mut(&mut z), hash);
775
776    let mut w: [u32; 8] = [0; 8];
777    mod_n_inv(&mut w, s);
778
779    let mut u1: [u32; 8] = [0; 8];
780    unsafe { P256_mul_mod_n(&mut u1, &z, &w) };
781    let mut u2: [u32; 8] = [0; 8];
782    unsafe { P256_mul_mod_n(&mut u2, r, &w) };
783
784    // Each value in these arrays will be an odd integer v, so that -15 <= v <= 15.
785    // Around 1/5.5 of them will be non-zero.
786
787    let slide_bp: [i8; 257] = slide_257(u32x8_to_u8x32(&u1));
788    let slide_pk: [i8; 257] = slide_257(u32x8_to_u8x32(&u2));
789
790    let mut cp: [[u32; 8]; 3] = [[0; 8]; 3];
791
792    slide_bp
793        .iter()
794        .rev()
795        .zip(slide_pk.iter().rev())
796        .for_each(|(&bp, &pk)| {
797            unsafe { P256_double_j(&raw mut cp as _, &raw mut cp as _) };
798
799            let bp_op = if bp > 0 {
800                Some((bp / 2, false))
801            } else if bp < 0 {
802                Some((-bp / 2, true))
803            } else {
804                None
805            };
806
807            if let Some((precomp, is_sub)) = bp_op {
808                let precomp = &raw const P256_BASEPOINT_PRECOMP[precomp as usize];
809                unsafe {
810                    P256_add_sub_j(&raw mut cp as _, precomp as _, is_sub, true);
811                }
812            }
813
814            let pk_op = if pk > 0 {
815                Some((pk / 2, false))
816            } else if pk < 0 {
817                Some((-pk / 2, true))
818            } else {
819                None
820            };
821
822            if let Some((pk_idx, is_sub)) = pk_op {
823                let pk_table = &raw const pk_table[pk_idx as usize];
824                unsafe {
825                    P256_add_sub_j(&raw mut cp as _, pk_table as _, is_sub, false);
826                }
827            }
828        });
829
830    unsafe { P256_verify_last_step(&raw const *r, &raw const cp as _) }
831}
832
833#[repr(C)]
834#[derive(Default)]
835struct FGInteger {
836    // To get the value this struct represents,
837    // interpret signed_value as a two's complement 288-bit little endian integer,
838    // and negate if flip_sign is -1
839    flip_sign: i32, // 0 or -1
840    // of 288 bits, 257 are useful (top 31 bits are sign-extended from bit 256)
841    signed_value: [u32; 9],
842}
843
844#[repr(C)]
845#[derive(Default)]
846struct XYInteger {
847    // To get the value this struct represents,
848    // interpret signed_value as an unsigned 288-bit little endian integer,
849    // and negate if flip_sign is -1
850    flip_sign: i32,  // 0 or -1
851    value: [u32; 8], // unsigned value, 0 <= value < P256_order
852}
853
854#[repr(C)]
855#[derive(Default)]
856struct State {
857    fg: [FGInteger; 2],
858    xy: [XYInteger; 2],
859}
860
861fn mod_n_inv(res: &mut [u32; 8], a: &[u32; 8]) {
862    // This function follows the algorithm in section 12.1 of https://gcd.cr.yp.to/safegcd-20190413.pdf.
863    // It has been altered in the following ways:
864    //   1. Due to 32-bit cpu, we use 24 * 31 iterations instead of 12 * 62.
865    //   2. P-256 modulus instead of 2^255-19.
866    //      744 iterations are still enough and slightly more than the required 741 (floor((49*256+57)/17)).
867    //   3. Step 5 has been corrected to go back to step 2 instead of step 3.
868    //   4. The order of the matrix multiplications in step 6 has been changed to (T24*(T23*(T22*(...*(T1*[0, 1]))))),
869    //      where [0, 1] is a column vector to make it possible to be able to extract the "top-right corner", v, of T24*T23*...*T1.
870    //      The result v will then be contained in the first element of the resulting column vector.
871
872    let mut state: [State; 2] = Default::default();
873
874    state[0].fg[0].flip_sign = 0; // non-negative f
875    state[0].fg[0]
876        .signed_value
877        .copy_from_slice(&asm::P256_ORDER); // f
878    state[0].fg[1].flip_sign = 0; // non-negative g
879    state[0].fg[1].signed_value[..8].copy_from_slice(a); // g
880    state[0].fg[1].signed_value[8] = 0; // upper bits of g are 0
881
882    // We later need a factor 2^-744. The montgomery multiplication gives 2^(24*-32)=2^-768, so multiply the init value (1) by 2^24 here.
883    state[0].xy[1].value[0] = 1 << 24;
884
885    let mut delta: i32 = 1;
886    (0..24).for_each(|i| {
887        // Scaled translation matrix Ti
888        let mut matrix: [u32; 4] = [0; 4]; // element range: [-2^30, 2^31] (negative numbers are stored in two's complement form)
889
890        // Decode f and g into two's complement representation and use the lowest 32 bits in the P256_divsteps2_31 calculation
891        let negate_f: u32 = state[i % 2].fg[0].flip_sign as u32;
892        let negate_g: u32 = state[i % 2].fg[1].flip_sign as u32;
893        delta = unsafe {
894            P256_divsteps2_31(
895                delta,
896                (state[i % 2].fg[0].signed_value[0] ^ negate_f).wrapping_sub(negate_f),
897                (state[i % 2].fg[1].signed_value[0] ^ negate_g).wrapping_sub(negate_g),
898                &raw mut matrix,
899            )
900        };
901
902        // "Jump step", calculates the new f and g values that applies after 31 divstep2 iterations
903        unsafe {
904            P256_matrix_mul_fg_9(
905                matrix[0],
906                matrix[1],
907                &state[i % 2].fg,
908                &mut state[(i + 1) % 2].fg[0],
909            )
910        };
911        unsafe {
912            P256_matrix_mul_fg_9(
913                matrix[2],
914                matrix[3],
915                &state[i % 2].fg,
916                &mut state[(i + 1) % 2].fg[1],
917            )
918        };
919
920        // Iterate the result vector
921        // Due to montgomery multiplication inside this function, each step also adds a 2^-32 factor
922        unsafe {
923            crate::asm::P256_matrix_mul_mod_n(
924                matrix[0],
925                matrix[1],
926                &state[i % 2].xy,
927                &mut state[(i + 1) % 2].xy[0],
928            )
929        };
930        unsafe {
931            crate::asm::P256_matrix_mul_mod_n(
932                matrix[2],
933                matrix[3],
934                &state[i % 2].xy,
935                &mut state[(i + 1) % 2].xy[1],
936            )
937        };
938    });
939
940    // Calculates val^-1 = sgn(f) * v * 2^-744, where v is the "top-right corner" of the resulting T24*T23*...*T1 matrix.
941    // In this implementation, at this point x contains v * 2^-744.
942    unsafe {
943        P256_negate_mod_n_if(
944            res,
945            &state[0].xy[0].value,
946            ((state[0].xy[0].flip_sign
947                ^ state[0].fg[0].flip_sign
948                ^ (state[0].fg[0].signed_value[8] as i32))
949                & 1) as u32,
950        )
951    };
952}