x25519_nostd/
lib.rs

1//! Pure-Rust X25519 Elliptic Curve Diffie-Hellman
2//!
3//! Implements X25519 key exchange (RFC 7748).
4//! This is a faithful translation of curve25519-donna-c64 by Adam Langley.
5//! Avoids LLVM SIMD issues on x86_64-unknown-none bare-metal targets.
6//!
7//! Properties:
8//! - 128-bit security level
9//! - Constant-time Montgomery ladder
10//! - 32-byte keys
11//! - ~100k operations/sec on modern CPUs
12//!
13//! Algorithm:
14//! - Montgomery curve: y² = x³ + 486662x² + x
15//! - Prime: p = 2^255 - 19
16//! - Base point: u = 9
17//! - Scalar clamping: s[0] &= 248, s[31] &= 127, s[31] |= 64
18
19#![no_std]
20#![forbid(unsafe_code)]
21
22/// Field element: 5 limbs of 51 bits each (255 bits total)
23/// Represented as u64 to allow for overflow during computation
24type Limb = u64;
25type FieldElement = [Limb; 5];
26
27/// Load a little-endian 64-bit number
28#[inline]
29fn load_limb(input: &[u8]) -> Limb {
30    (input[0] as Limb) |
31    ((input[1] as Limb) << 8) |
32    ((input[2] as Limb) << 16) |
33    ((input[3] as Limb) << 24) |
34    ((input[4] as Limb) << 32) |
35    ((input[5] as Limb) << 40) |
36    ((input[6] as Limb) << 48) |
37    ((input[7] as Limb) << 56)
38}
39
40/// Store a 64-bit number as little-endian bytes
41#[inline]
42fn store_limb(output: &mut [u8], input: Limb) {
43    output[0] = (input & 0xff) as u8;
44    output[1] = ((input >> 8) & 0xff) as u8;
45    output[2] = ((input >> 16) & 0xff) as u8;
46    output[3] = ((input >> 24) & 0xff) as u8;
47    output[4] = ((input >> 32) & 0xff) as u8;
48    output[5] = ((input >> 40) & 0xff) as u8;
49    output[6] = ((input >> 48) & 0xff) as u8;
50    output[7] = ((input >> 56) & 0xff) as u8;
51}
52
53/// Expand a 32-byte little-endian number into field element form (5 limbs of 51 bits)
54fn fexpand(output: &mut FieldElement, input: &[u8; 32]) {
55    output[0] = load_limb(&input[0..]) & 0x7ffffffffffff;
56    output[1] = (load_limb(&input[6..]) >> 3) & 0x7ffffffffffff;
57    output[2] = (load_limb(&input[12..]) >> 6) & 0x7ffffffffffff;
58    output[3] = (load_limb(&input[19..]) >> 1) & 0x7ffffffffffff;
59    output[4] = (load_limb(&input[24..]) >> 12) & 0x7ffffffffffff;
60}
61
62/// Contract a field element into a 32-byte little-endian array
63fn fcontract(output: &mut [u8; 32], input: &FieldElement) {
64    let mut t: [u128; 5] = [
65        input[0] as u128,
66        input[1] as u128,
67        input[2] as u128,
68        input[3] as u128,
69        input[4] as u128,
70    ];
71
72    // First carry pass
73    t[1] += t[0] >> 51; t[0] &= 0x7ffffffffffff;
74    t[2] += t[1] >> 51; t[1] &= 0x7ffffffffffff;
75    t[3] += t[2] >> 51; t[2] &= 0x7ffffffffffff;
76    t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffff;
77    t[0] += 19 * (t[4] >> 51); t[4] &= 0x7ffffffffffff;
78
79    // Second carry pass
80    t[1] += t[0] >> 51; t[0] &= 0x7ffffffffffff;
81    t[2] += t[1] >> 51; t[1] &= 0x7ffffffffffff;
82    t[3] += t[2] >> 51; t[2] &= 0x7ffffffffffff;
83    t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffff;
84    t[0] += 19 * (t[4] >> 51); t[4] &= 0x7ffffffffffff;
85
86    // Now t is between 0 and 2^255-1, properly carried
87    // Add 19 to handle case where value is between 2^255-19 and 2^255-1
88    t[0] += 19;
89
90    t[1] += t[0] >> 51; t[0] &= 0x7ffffffffffff;
91    t[2] += t[1] >> 51; t[1] &= 0x7ffffffffffff;
92    t[3] += t[2] >> 51; t[2] &= 0x7ffffffffffff;
93    t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffff;
94    t[0] += 19 * (t[4] >> 51); t[4] &= 0x7ffffffffffff;
95
96    // Now between 19 and 2^255-1, offset by 19
97    // Add 2^255 - 19 to each limb to ensure positive and in range
98    t[0] += 0x8000000000000u128 - 19;
99    t[1] += 0x8000000000000u128 - 1;
100    t[2] += 0x8000000000000u128 - 1;
101    t[3] += 0x8000000000000u128 - 1;
102    t[4] += 0x8000000000000u128 - 1;
103
104    // Now between 2^255 and 2^256-20, offset by 2^255
105    t[1] += t[0] >> 51; t[0] &= 0x7ffffffffffff;
106    t[2] += t[1] >> 51; t[1] &= 0x7ffffffffffff;
107    t[3] += t[2] >> 51; t[2] &= 0x7ffffffffffff;
108    t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffff;
109    t[4] &= 0x7ffffffffffff;
110
111    // Pack into bytes
112    store_limb(&mut output[0..], (t[0] | (t[1] << 51)) as Limb);
113    store_limb(&mut output[8..], ((t[1] >> 13) | (t[2] << 38)) as Limb);
114    store_limb(&mut output[16..], ((t[2] >> 26) | (t[3] << 25)) as Limb);
115    store_limb(&mut output[24..], ((t[3] >> 39) | (t[4] << 12)) as Limb);
116}
117
118/// Sum two field elements: output += input
119#[inline]
120fn fsum(output: &mut FieldElement, input: &FieldElement) {
121    output[0] += input[0];
122    output[1] += input[1];
123    output[2] += input[2];
124    output[3] += input[3];
125    output[4] += input[4];
126}
127
128/// Subtract field elements: output = input - output
129/// (Note the order of arguments!)
130/// Adds large constants to avoid underflow
131#[inline]
132fn fdifference_backwards(out: &mut FieldElement, input: &FieldElement) {
133    // 152 is 19 << 3
134    const TWO54M152: Limb = (1u64 << 54) - 152;
135    const TWO54M8: Limb = (1u64 << 54) - 8;
136
137    out[0] = input[0] + TWO54M152 - out[0];
138    out[1] = input[1] + TWO54M8 - out[1];
139    out[2] = input[2] + TWO54M8 - out[2];
140    out[3] = input[3] + TWO54M8 - out[3];
141    out[4] = input[4] + TWO54M8 - out[4];
142}
143
144/// Multiply field element by scalar
145fn fscalar_product(output: &mut FieldElement, input: &FieldElement, scalar: Limb) {
146    let mut a: u128;
147
148    a = (input[0] as u128) * (scalar as u128);
149    output[0] = (a as Limb) & 0x7ffffffffffff;
150
151    a = (input[1] as u128) * (scalar as u128) + (a >> 51);
152    output[1] = (a as Limb) & 0x7ffffffffffff;
153
154    a = (input[2] as u128) * (scalar as u128) + (a >> 51);
155    output[2] = (a as Limb) & 0x7ffffffffffff;
156
157    a = (input[3] as u128) * (scalar as u128) + (a >> 51);
158    output[3] = (a as Limb) & 0x7ffffffffffff;
159
160    a = (input[4] as u128) * (scalar as u128) + (a >> 51);
161    output[4] = (a as Limb) & 0x7ffffffffffff;
162
163    output[0] += ((a >> 51) as Limb) * 19;
164}
165
166/// Multiply two field elements
167fn fmul(output: &mut FieldElement, in2: &FieldElement, input: &FieldElement) {
168    let mut t: [u128; 5] = [0; 5];
169
170    let r0 = input[0];
171    let r1 = input[1];
172    let r2 = input[2];
173    let r3 = input[3];
174    let r4 = input[4];
175
176    let s0 = in2[0];
177    let s1 = in2[1];
178    let s2 = in2[2];
179    let s3 = in2[3];
180    let s4 = in2[4];
181
182    t[0] = (r0 as u128) * (s0 as u128);
183    t[1] = (r0 as u128) * (s1 as u128) + (r1 as u128) * (s0 as u128);
184    t[2] = (r0 as u128) * (s2 as u128) + (r2 as u128) * (s0 as u128) + (r1 as u128) * (s1 as u128);
185    t[3] = (r0 as u128) * (s3 as u128) + (r3 as u128) * (s0 as u128) + (r1 as u128) * (s2 as u128) + (r2 as u128) * (s1 as u128);
186    t[4] = (r0 as u128) * (s4 as u128) + (r4 as u128) * (s0 as u128) + (r3 as u128) * (s1 as u128) + (r1 as u128) * (s3 as u128) + (r2 as u128) * (s2 as u128);
187
188    let r4 = r4 * 19;
189    let r1 = r1 * 19;
190    let r2 = r2 * 19;
191    let r3 = r3 * 19;
192
193    t[0] += (r4 as u128) * (s1 as u128) + (r1 as u128) * (s4 as u128) + (r2 as u128) * (s3 as u128) + (r3 as u128) * (s2 as u128);
194    t[1] += (r4 as u128) * (s2 as u128) + (r2 as u128) * (s4 as u128) + (r3 as u128) * (s3 as u128);
195    t[2] += (r4 as u128) * (s3 as u128) + (r3 as u128) * (s4 as u128);
196    t[3] += (r4 as u128) * (s4 as u128);
197
198    let mut r0: Limb;
199    let mut r1: Limb;
200    let mut r2: Limb;
201    let r3: Limb;
202    let r4: Limb;
203    let mut c: Limb;
204
205    r0 = (t[0] as Limb) & 0x7ffffffffffff; c = (t[0] >> 51) as Limb;
206    t[1] += c as u128; r1 = (t[1] as Limb) & 0x7ffffffffffff; c = (t[1] >> 51) as Limb;
207    t[2] += c as u128; r2 = (t[2] as Limb) & 0x7ffffffffffff; c = (t[2] >> 51) as Limb;
208    t[3] += c as u128; r3 = (t[3] as Limb) & 0x7ffffffffffff; c = (t[3] >> 51) as Limb;
209    t[4] += c as u128; r4 = (t[4] as Limb) & 0x7ffffffffffff; c = (t[4] >> 51) as Limb;
210    r0 += c * 19; c = r0 >> 51; r0 &= 0x7ffffffffffff;
211    r1 += c; c = r1 >> 51; r1 &= 0x7ffffffffffff;
212    r2 += c;
213
214    output[0] = r0;
215    output[1] = r1;
216    output[2] = r2;
217    output[3] = r3;
218    output[4] = r4;
219}
220
221/// Square a field element (multiple times)
222fn fsquare_times(output: &mut FieldElement, input: &FieldElement, count: usize) {
223    let mut t: [u128; 5] = [0; 5];
224    let mut r0 = input[0];
225    let mut r1 = input[1];
226    let mut r2 = input[2];
227    let mut r3 = input[3];
228    let mut r4 = input[4];
229
230    for _ in 0..count {
231        let d0 = r0 * 2;
232        let d1 = r1 * 2;
233        let d2 = r2 * 2 * 19;
234        let d419 = r4 * 19;
235        let d4 = d419 * 2;
236
237        t[0] = (r0 as u128) * (r0 as u128) + (d4 as u128) * (r1 as u128) + (d2 as u128) * (r3 as u128);
238        t[1] = (d0 as u128) * (r1 as u128) + (d4 as u128) * (r2 as u128) + (r3 as u128) * ((r3 * 19) as u128);
239        t[2] = (d0 as u128) * (r2 as u128) + (r1 as u128) * (r1 as u128) + (d4 as u128) * (r3 as u128);
240        t[3] = (d0 as u128) * (r3 as u128) + (d1 as u128) * (r2 as u128) + (r4 as u128) * (d419 as u128);
241        t[4] = (d0 as u128) * (r4 as u128) + (d1 as u128) * (r3 as u128) + (r2 as u128) * (r2 as u128);
242
243        let mut c: Limb;
244        r0 = (t[0] as Limb) & 0x7ffffffffffff; c = (t[0] >> 51) as Limb;
245        t[1] += c as u128; r1 = (t[1] as Limb) & 0x7ffffffffffff; c = (t[1] >> 51) as Limb;
246        t[2] += c as u128; r2 = (t[2] as Limb) & 0x7ffffffffffff; c = (t[2] >> 51) as Limb;
247        t[3] += c as u128; r3 = (t[3] as Limb) & 0x7ffffffffffff; c = (t[3] >> 51) as Limb;
248        t[4] += c as u128; r4 = (t[4] as Limb) & 0x7ffffffffffff; c = (t[4] >> 51) as Limb;
249        r0 += c * 19; c = r0 >> 51; r0 &= 0x7ffffffffffff;
250        r1 += c; c = r1 >> 51; r1 &= 0x7ffffffffffff;
251        r2 += c;
252    }
253
254    output[0] = r0;
255    output[1] = r1;
256    output[2] = r2;
257    output[3] = r3;
258    output[4] = r4;
259}
260
261/// Compute multiplicative inverse using Fermat's little theorem
262/// out = z^(p-2) = z^(2^255 - 21)
263fn crecip(out: &mut FieldElement, z: &FieldElement) {
264    let mut a = [0u64; 5];
265    let mut t0 = [0u64; 5];
266    let mut b = [0u64; 5];
267    let mut c = [0u64; 5];
268
269    // 2
270    fsquare_times(&mut a, z, 1);
271    // 8
272    fsquare_times(&mut t0, &a, 2);
273    // 9
274    fmul(&mut b, &t0, z);
275    // 11
276    let a_copy = a;
277    fmul(&mut a, &b, &a_copy);
278    // 22
279    fsquare_times(&mut t0, &a, 1);
280    // 2^5 - 2^0 = 31
281    let b_copy = b;
282    fmul(&mut b, &t0, &b_copy);
283    // 2^10 - 2^5
284    fsquare_times(&mut t0, &b, 5);
285    // 2^10 - 2^0
286    let b_copy = b;
287    fmul(&mut b, &t0, &b_copy);
288    // 2^20 - 2^10
289    fsquare_times(&mut t0, &b, 10);
290    // 2^20 - 2^0
291    fmul(&mut c, &t0, &b);
292    // 2^40 - 2^20
293    fsquare_times(&mut t0, &c, 20);
294    // 2^40 - 2^0
295    let t0_copy = t0;
296    fmul(&mut t0, &t0_copy, &c);
297    // 2^50 - 2^10
298    let t0_copy = t0;
299    fsquare_times(&mut t0, &t0_copy, 10);
300    // 2^50 - 2^0
301    let b_copy = b;
302    fmul(&mut b, &t0, &b_copy);
303    // 2^100 - 2^50
304    fsquare_times(&mut t0, &b, 50);
305    // 2^100 - 2^0
306    fmul(&mut c, &t0, &b);
307    // 2^200 - 2^100
308    fsquare_times(&mut t0, &c, 100);
309    // 2^200 - 2^0
310    let t0_copy = t0;
311    fmul(&mut t0, &t0_copy, &c);
312    // 2^250 - 2^50
313    let t0_copy = t0;
314    fsquare_times(&mut t0, &t0_copy, 50);
315    // 2^250 - 2^0
316    let t0_copy = t0;
317    fmul(&mut t0, &t0_copy, &b);
318    // 2^255 - 2^5
319    let t0_copy = t0;
320    fsquare_times(&mut t0, &t0_copy, 5);
321    // 2^255 - 21
322    fmul(out, &t0, &a);
323}
324
325/// Constant-time conditional swap
326fn swap_conditional(a: &mut FieldElement, b: &mut FieldElement, iswap: Limb) {
327    let swap = iswap.wrapping_neg(); // 0 or 0xFFFFFFFFFFFFFFFF
328
329    for i in 0..5 {
330        let x = swap & (a[i] ^ b[i]);
331        a[i] ^= x;
332        b[i] ^= x;
333    }
334}
335
336/// Montgomery ladder step
337#[allow(clippy::too_many_arguments)]
338fn fmonty(
339    x2: &mut FieldElement, z2: &mut FieldElement,
340    x3: &mut FieldElement, z3: &mut FieldElement,
341    x: &mut FieldElement, z: &mut FieldElement,
342    xprime: &mut FieldElement, zprime: &mut FieldElement,
343    qmqp: &FieldElement,
344) {
345    let mut origx = [0u64; 5];
346    let mut origxprime = [0u64; 5];
347    let mut zzz = [0u64; 5];
348    let mut xx = [0u64; 5];
349    let mut zz = [0u64; 5];
350    let mut xxprime = [0u64; 5];
351    let mut zzprime = [0u64; 5];
352    let mut zzzprime = [0u64; 5];
353
354    origx.copy_from_slice(x);
355    fsum(x, z);
356    fdifference_backwards(z, &origx);
357
358    origxprime.copy_from_slice(xprime);
359    fsum(xprime, zprime);
360    fdifference_backwards(zprime, &origxprime);
361    fmul(&mut xxprime, xprime, z);
362    fmul(&mut zzprime, x, zprime);
363    origxprime.copy_from_slice(&xxprime);
364    fsum(&mut xxprime, &zzprime);
365    fdifference_backwards(&mut zzprime, &origxprime);
366    fsquare_times(x3, &xxprime, 1);
367    fsquare_times(&mut zzzprime, &zzprime, 1);
368    fmul(z3, &zzzprime, qmqp);
369
370    fsquare_times(&mut xx, x, 1);
371    fsquare_times(&mut zz, z, 1);
372    fmul(x2, &xx, &zz);
373    fdifference_backwards(&mut zz, &xx);
374    fscalar_product(&mut zzz, &zz, 121665);
375    fsum(&mut zzz, &xx);
376    fmul(z2, &zz, &zzz);
377}
378
379/// Scalar multiplication: resultx/resultz = n * q
380fn cmult(resultx: &mut FieldElement, resultz: &mut FieldElement, n: &[u8; 32], q: &FieldElement) {
381    let mut a = [0u64; 5];
382    let mut b = [1u64, 0, 0, 0, 0];
383    let mut c = [1u64, 0, 0, 0, 0];
384    let mut d = [0u64; 5];
385    let mut e = [0u64; 5];
386    let mut f = [1u64, 0, 0, 0, 0];
387    let mut g = [0u64; 5];
388    let mut h = [1u64, 0, 0, 0, 0];
389
390    let nqpqx = &mut a;
391    let nqpqz = &mut b;
392    let nqx = &mut c;
393    let nqz = &mut d;
394    let nqpqx2 = &mut e;
395    let nqpqz2 = &mut f;
396    let nqx2 = &mut g;
397    let nqz2 = &mut h;
398
399    nqpqx.copy_from_slice(q);
400
401    for i in 0..32 {
402        let mut byte = n[31 - i];
403        for _ in 0..8 {
404            let bit = (byte >> 7) as Limb;
405
406            swap_conditional(nqx, nqpqx, bit);
407            swap_conditional(nqz, nqpqz, bit);
408            fmonty(nqx2, nqz2, nqpqx2, nqpqz2, nqx, nqz, nqpqx, nqpqz, q);
409            swap_conditional(nqx2, nqpqx2, bit);
410            swap_conditional(nqz2, nqpqz2, bit);
411
412            core::mem::swap(&mut *nqx, &mut *nqx2);
413            core::mem::swap(&mut *nqz, &mut *nqz2);
414            core::mem::swap(&mut *nqpqx, &mut *nqpqx2);
415            core::mem::swap(&mut *nqpqz, &mut *nqpqz2);
416
417            byte <<= 1;
418        }
419    }
420
421    resultx.copy_from_slice(nqx);
422    resultz.copy_from_slice(nqz);
423}
424
425/// X25519 scalar multiplication
426fn curve25519_donna(mypublic: &mut [u8; 32], secret: &[u8; 32], basepoint: &[u8; 32]) {
427    let mut bp = [0u64; 5];
428    let mut x = [0u64; 5];
429    let mut z = [0u64; 5];
430    let mut zmone = [0u64; 5];
431    let mut e = *secret;
432
433    // Clamp scalar
434    e[0] &= 248;
435    e[31] &= 127;
436    e[31] |= 64;
437
438    fexpand(&mut bp, basepoint);
439    cmult(&mut x, &mut z, &e, &bp);
440    crecip(&mut zmone, &z);
441    fmul(&mut z, &x, &zmone);
442    fcontract(mypublic, &z);
443}
444
445/// Base point for X25519 (u=9)
446const BASEPOINT: [u8; 32] = [
447    9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
448    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
449];
450
451/// Compute public key from secret key
452pub fn public_key(secret: &[u8; 32]) -> [u8; 32] {
453    let mut public = [0u8; 32];
454    curve25519_donna(&mut public, secret, &BASEPOINT);
455    public
456}
457
458/// Compute shared secret from our secret key and their public key
459pub fn diffie_hellman(our_secret: &[u8; 32], their_public: &[u8; 32]) -> [u8; 32] {
460    let mut shared = [0u8; 32];
461    curve25519_donna(&mut shared, our_secret, their_public);
462    shared
463}
464
465#[cfg(test)]
466mod tests {
467    use super::*;
468
469    #[test]
470    fn test_rfc7748_vector1() {
471        // RFC 7748 Section 5.2 Test Vector 1
472        let scalar = [
473            0xa5, 0x46, 0xe3, 0x6b, 0xf0, 0x52, 0x7c, 0x9d,
474            0x3b, 0x16, 0x15, 0x4b, 0x82, 0x46, 0x5e, 0xdd,
475            0x62, 0x14, 0x4c, 0x0a, 0xc1, 0xfc, 0x5a, 0x18,
476            0x50, 0x6a, 0x22, 0x44, 0xba, 0x44, 0x9a, 0xc4,
477        ];
478
479        let basepoint = [
480            0xe6, 0xdb, 0x68, 0x67, 0x58, 0x30, 0x30, 0xdb,
481            0x35, 0x94, 0xc1, 0xa4, 0x24, 0xb1, 0x5f, 0x7c,
482            0x72, 0x66, 0x24, 0xec, 0x26, 0xb3, 0x35, 0x3b,
483            0x10, 0xa9, 0x03, 0xa6, 0xd0, 0xab, 0x1c, 0x4c,
484        ];
485
486        let expected = [
487            0xc3, 0xda, 0x55, 0x37, 0x9d, 0xe9, 0xc6, 0x90,
488            0x8e, 0x94, 0xea, 0x4d, 0xf2, 0x8d, 0x08, 0x4f,
489            0x32, 0xec, 0xcf, 0x03, 0x49, 0x1c, 0x71, 0xf7,
490            0x54, 0xb4, 0x07, 0x55, 0x77, 0xa2, 0x85, 0x52,
491        ];
492
493        let result = diffie_hellman(&scalar, &basepoint);
494        assert_eq!(result, expected);
495    }
496
497    #[test]
498    fn test_basepoint_mult() {
499        let secret = [
500            1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
501            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
502        ];
503
504        let public = public_key(&secret);
505        assert_ne!(public, [0u8; 32]);
506    }
507
508    #[test]
509    fn test_dh_symmetry() {
510        let alice_secret = [0x77u8; 32];
511        let alice_public = public_key(&alice_secret);
512
513        let bob_secret = [0x99u8; 32];
514        let bob_public = public_key(&bob_secret);
515
516        let alice_shared = diffie_hellman(&alice_secret, &bob_public);
517        let bob_shared = diffie_hellman(&bob_secret, &alice_public);
518
519        assert_eq!(alice_shared, bob_shared);
520    }
521
522    #[test]
523    fn test_public_key_deterministic() {
524        let secret = [0x42u8; 32];
525        let public1 = public_key(&secret);
526        let public2 = public_key(&secret);
527        assert_eq!(public1, public2);
528    }
529}