poly1305_nostd/
lib.rs

1//! Pure-Rust Poly1305 Message Authentication Code
2//!
3//! Implements Poly1305-AES one-time authenticator (RFC 8439).
4//! This is a faithful translation of poly1305-donna by Andrew Moon.
5//! Avoids LLVM SIMD issues on x86_64-unknown-none bare-metal targets.
6//!
7//! Properties:
8//! - 128-bit authentication tag
9//! - One-time MAC (key must be unique per message)
10//! - Constant-time operation (no secret-dependent branches)
11//! - ~10 cycles/byte on modern x86_64
12//!
13//! Algorithm:
14//! 1. Clamp the 'r' portion of the key
15//! 2. Process message in 16-byte blocks
16//! 3. Accumulate: a = ((a + block) * r) mod (2^130 - 5)
17//! 4. Add 's' portion of key: tag = (a + s) mod 2^128
18
19#![no_std]
20#![forbid(unsafe_code)]
21
22const POLY1305_BLOCK_SIZE: usize = 16;
23
24/// Poly1305 MAC state (using 32bit * 32bit = 64bit multiplication)
25pub struct Poly1305 {
26    r: [u32; 5],               // Clamped r key (130 bits in 26-bit limbs)
27    h: [u32; 5],               // Accumulator (130 bits in 26-bit limbs)
28    pad: [u32; 4],             // Secret pad (s) for final addition
29    leftover: usize,           // Bytes in buffer
30    buffer: [u8; 16],          // Partial block buffer
31    final_block: bool,         // Whether we're processing the final block
32}
33
34/// Interpret four 8-bit unsigned integers as a 32-bit unsigned integer in little endian
35#[inline]
36fn u8to32(p: &[u8]) -> u32 {
37    (p[0] as u32) |
38    ((p[1] as u32) << 8) |
39    ((p[2] as u32) << 16) |
40    ((p[3] as u32) << 24)
41}
42
43/// Store a 32-bit unsigned integer as four 8-bit unsigned integers in little endian
44#[inline]
45fn u32to8(p: &mut [u8], v: u32) {
46    p[0] = (v) as u8;
47    p[1] = (v >> 8) as u8;
48    p[2] = (v >> 16) as u8;
49    p[3] = (v >> 24) as u8;
50}
51
52impl Poly1305 {
53    /// Create a new Poly1305 instance from a 32-byte key
54    ///
55    /// Key format: [r (16 bytes) || s (16 bytes)]
56    /// - r: clamped and used as multiplication key
57    /// - s: added to final accumulator
58    pub fn new(key: &[u8; 32]) -> Self {
59        // r &= 0xffffffc0ffffffc0ffffffc0fffffff
60        // Extract r in 26-bit limbs with proper clamping
61        let r0 = (u8to32(&key[0..])) & 0x3ffffff;
62        let r1 = (u8to32(&key[3..]) >> 2) & 0x3ffff03;
63        let r2 = (u8to32(&key[6..]) >> 4) & 0x3ffc0ff;
64        let r3 = (u8to32(&key[9..]) >> 6) & 0x3f03fff;
65        let r4 = (u8to32(&key[12..]) >> 8) & 0x00fffff;
66
67        // Extract pad (s)
68        let pad0 = u8to32(&key[16..]);
69        let pad1 = u8to32(&key[20..]);
70        let pad2 = u8to32(&key[24..]);
71        let pad3 = u8to32(&key[28..]);
72
73        Self {
74            r: [r0, r1, r2, r3, r4],
75            h: [0, 0, 0, 0, 0],
76            pad: [pad0, pad1, pad2, pad3],
77            leftover: 0,
78            buffer: [0u8; 16],
79            final_block: false,
80        }
81    }
82
83    /// Process multiple complete 16-byte blocks
84    fn blocks(&mut self, m: &[u8], bytes: usize) {
85        let hibit = if self.final_block { 0 } else { 1u32 << 24 }; // 1 << 128
86
87        let r0 = self.r[0];
88        let r1 = self.r[1];
89        let r2 = self.r[2];
90        let r3 = self.r[3];
91        let r4 = self.r[4];
92
93        let s1 = r1 * 5;
94        let s2 = r2 * 5;
95        let s3 = r3 * 5;
96        let s4 = r4 * 5;
97
98        let mut h0 = self.h[0];
99        let mut h1 = self.h[1];
100        let mut h2 = self.h[2];
101        let mut h3 = self.h[3];
102        let mut h4 = self.h[4];
103
104        let mut offset = 0;
105        while offset + POLY1305_BLOCK_SIZE <= bytes {
106            // h += m[i] (add message block to accumulator in 26-bit limbs)
107            h0 += (u8to32(&m[offset..])) & 0x3ffffff;
108            h1 += (u8to32(&m[offset + 3..]) >> 2) & 0x3ffffff;
109            h2 += (u8to32(&m[offset + 6..]) >> 4) & 0x3ffffff;
110            h3 += (u8to32(&m[offset + 9..]) >> 6) & 0x3ffffff;
111            h4 += (u8to32(&m[offset + 12..]) >> 8) | hibit;
112
113            // h *= r (multiply accumulator by r, with modular reduction)
114            let d0 = (h0 as u64 * r0 as u64) + (h1 as u64 * s4 as u64) + (h2 as u64 * s3 as u64) + (h3 as u64 * s2 as u64) + (h4 as u64 * s1 as u64);
115            let d1 = (h0 as u64 * r1 as u64) + (h1 as u64 * r0 as u64) + (h2 as u64 * s4 as u64) + (h3 as u64 * s3 as u64) + (h4 as u64 * s2 as u64);
116            let d2 = (h0 as u64 * r2 as u64) + (h1 as u64 * r1 as u64) + (h2 as u64 * r0 as u64) + (h3 as u64 * s4 as u64) + (h4 as u64 * s3 as u64);
117            let d3 = (h0 as u64 * r3 as u64) + (h1 as u64 * r2 as u64) + (h2 as u64 * r1 as u64) + (h3 as u64 * r0 as u64) + (h4 as u64 * s4 as u64);
118            let d4 = (h0 as u64 * r4 as u64) + (h1 as u64 * r3 as u64) + (h2 as u64 * r2 as u64) + (h3 as u64 * r1 as u64) + (h4 as u64 * r0 as u64);
119
120            // (partial) h %= p (carry propagation)
121            let mut c: u32;
122            c = (d0 >> 26) as u32; h0 = (d0 as u32) & 0x3ffffff;
123            let d1 = d1 + c as u64; c = (d1 >> 26) as u32; h1 = (d1 as u32) & 0x3ffffff;
124            let d2 = d2 + c as u64; c = (d2 >> 26) as u32; h2 = (d2 as u32) & 0x3ffffff;
125            let d3 = d3 + c as u64; c = (d3 >> 26) as u32; h3 = (d3 as u32) & 0x3ffffff;
126            let d4 = d4 + c as u64; c = (d4 >> 26) as u32; h4 = (d4 as u32) & 0x3ffffff;
127            h0 += c * 5; c = h0 >> 26; h0 &= 0x3ffffff;
128            h1 += c;
129
130            offset += POLY1305_BLOCK_SIZE;
131        }
132
133        self.h[0] = h0;
134        self.h[1] = h1;
135        self.h[2] = h2;
136        self.h[3] = h3;
137        self.h[4] = h4;
138    }
139
140    /// Update MAC with more data
141    pub fn update(&mut self, data: &[u8]) {
142        let mut m = data;
143        let mut bytes = data.len();
144
145        // Handle leftover from previous update
146        if self.leftover > 0 {
147            let want = core::cmp::min(POLY1305_BLOCK_SIZE - self.leftover, bytes);
148            for i in 0..want {
149                self.buffer[self.leftover + i] = m[i];
150            }
151            bytes -= want;
152            m = &m[want..];
153            self.leftover += want;
154
155            if self.leftover < POLY1305_BLOCK_SIZE {
156                return;
157            }
158
159            let buffer_copy = self.buffer;
160            self.blocks(&buffer_copy, POLY1305_BLOCK_SIZE);
161            self.leftover = 0;
162        }
163
164        // Process full blocks
165        if bytes >= POLY1305_BLOCK_SIZE {
166            let want = bytes & !(POLY1305_BLOCK_SIZE - 1);
167            self.blocks(m, want);
168            m = &m[want..];
169            bytes -= want;
170        }
171
172        // Store leftover
173        if bytes > 0 {
174            for i in 0..bytes {
175                self.buffer[self.leftover + i] = m[i];
176            }
177            self.leftover += bytes;
178        }
179    }
180
181    /// Finalize and return 16-byte authentication tag
182    pub fn finalize(mut self) -> [u8; 16] {
183        // Process the remaining block (if any)
184        if self.leftover > 0 {
185            let mut i = self.leftover;
186            self.buffer[i] = 1;
187            i += 1;
188            while i < POLY1305_BLOCK_SIZE {
189                self.buffer[i] = 0;
190                i += 1;
191            }
192            self.final_block = true;
193            self.blocks(&self.buffer.clone(), POLY1305_BLOCK_SIZE);
194        }
195
196        // Fully carry h
197        let mut h0 = self.h[0];
198        let mut h1 = self.h[1];
199        let mut h2 = self.h[2];
200        let mut h3 = self.h[3];
201        let mut h4 = self.h[4];
202
203        let mut c: u32;
204        c = h1 >> 26; h1 &= 0x3ffffff;
205        h2 += c; c = h2 >> 26; h2 &= 0x3ffffff;
206        h3 += c; c = h3 >> 26; h3 &= 0x3ffffff;
207        h4 += c; c = h4 >> 26; h4 &= 0x3ffffff;
208        h0 += c * 5; c = h0 >> 26; h0 &= 0x3ffffff;
209        h1 += c;
210
211        // Compute h + -p
212        let mut g0 = h0.wrapping_add(5); c = g0 >> 26; g0 &= 0x3ffffff;
213        let mut g1 = h1.wrapping_add(c); c = g1 >> 26; g1 &= 0x3ffffff;
214        let mut g2 = h2.wrapping_add(c); c = g2 >> 26; g2 &= 0x3ffffff;
215        let mut g3 = h3.wrapping_add(c); c = g3 >> 26; g3 &= 0x3ffffff;
216        let mut g4 = h4.wrapping_add(c).wrapping_sub(1u32 << 26);
217
218        // Select h if h < p, or h + -p if h >= p
219        let mut mask = (g4 >> ((core::mem::size_of::<u32>() * 8) - 1)).wrapping_sub(1);
220        g0 &= mask;
221        g1 &= mask;
222        g2 &= mask;
223        g3 &= mask;
224        g4 &= mask;
225        mask = !mask;
226        h0 = (h0 & mask) | g0;
227        h1 = (h1 & mask) | g1;
228        h2 = (h2 & mask) | g2;
229        h3 = (h3 & mask) | g3;
230        h4 = (h4 & mask) | g4;
231
232        // h = h % (2^128) (pack back into 32-bit words)
233        h0 = ((h0) | (h1 << 26)) & 0xffffffff;
234        h1 = ((h1 >> 6) | (h2 << 20)) & 0xffffffff;
235        h2 = ((h2 >> 12) | (h3 << 14)) & 0xffffffff;
236        h3 = ((h3 >> 18) | (h4 << 8)) & 0xffffffff;
237
238        // mac = (h + pad) % (2^128)
239        let mut f: u64;
240        f = h0 as u64 + self.pad[0] as u64; h0 = f as u32;
241        f = h1 as u64 + self.pad[1] as u64 + (f >> 32); h1 = f as u32;
242        f = h2 as u64 + self.pad[2] as u64 + (f >> 32); h2 = f as u32;
243        f = h3 as u64 + self.pad[3] as u64 + (f >> 32); h3 = f as u32;
244
245        let mut mac = [0u8; 16];
246        u32to8(&mut mac[0..], h0);
247        u32to8(&mut mac[4..], h1);
248        u32to8(&mut mac[8..], h2);
249        u32to8(&mut mac[12..], h3);
250
251        mac
252    }
253
254    /// One-shot MAC computation
255    pub fn mac(key: &[u8; 32], data: &[u8]) -> [u8; 16] {
256        let mut poly = Self::new(key);
257        poly.update(data);
258        poly.finalize()
259    }
260}
261
262#[cfg(test)]
263mod tests {
264    use super::*;
265
266    #[test]
267    fn test_poly1305_rfc8439_vector1() {
268        // RFC 8439 Section 2.5.2 Test Vector #1
269        let key = [
270            0x85, 0xd6, 0xbe, 0x78, 0x57, 0x55, 0x6d, 0x33,
271            0x7f, 0x44, 0x52, 0xfe, 0x42, 0xd5, 0x06, 0xa8,
272            0x01, 0x03, 0x80, 0x8a, 0xfb, 0x0d, 0xb2, 0xfd,
273            0x4a, 0xbf, 0xf6, 0xaf, 0x41, 0x49, 0xf5, 0x1b,
274        ];
275
276        let msg = b"Cryptographic Forum Research Group";
277
278        let expected = [
279            0xa8, 0x06, 0x1d, 0xc1, 0x30, 0x51, 0x36, 0xc6,
280            0xc2, 0x2b, 0x8b, 0xaf, 0x0c, 0x01, 0x27, 0xa9,
281        ];
282
283        let tag = Poly1305::mac(&key, msg);
284        assert_eq!(tag, expected);
285    }
286
287    #[test]
288    fn test_poly1305_empty() {
289        let key = [0x42u8; 32];
290        let tag = Poly1305::mac(&key, &[]);
291        // Empty message should produce deterministic tag
292        assert_eq!(tag.len(), 16);
293    }
294
295    #[test]
296    fn test_poly1305_incremental() {
297        let key = [0x42u8; 32];
298        let msg = b"Hello, World!";
299
300        // One-shot
301        let tag1 = Poly1305::mac(&key, msg);
302
303        // Incremental
304        let mut poly = Poly1305::new(&key);
305        poly.update(b"Hello, ");
306        poly.update(b"World!");
307        let tag2 = poly.finalize();
308
309        assert_eq!(tag1, tag2);
310    }
311
312    #[test]
313    fn test_poly1305_different_keys() {
314        let key1 = [0x42u8; 32];
315        let key2 = [0x43u8; 32];
316        let msg = b"Same message";
317
318        let tag1 = Poly1305::mac(&key1, msg);
319        let tag2 = Poly1305::mac(&key2, msg);
320
321        // Different keys should produce different tags
322        assert_ne!(tag1, tag2);
323    }
324}