Skip to main content

gmcrypto_core/
pem.rs

1//! Hand-rolled PEM (RFC 7468) codec.
2//!
3//! Wraps and unwraps `-----BEGIN <LABEL>-----` ... `-----END <LABEL>-----`
4//! armor around an arbitrary DER blob. Used by [`crate::pkcs8`],
5//! [`crate::spki`], and [`crate::sec1`] for on-disk format support.
6//!
7//! # Posture
8//!
9//! - **Liberal decoder, conservative encoder.** [`decode`] accepts the
10//!   relaxed RFC 7468 production: arbitrary whitespace (including CR,
11//!   LF, tab, space) anywhere inside the body, and either CRLF or LF
12//!   line terminators around the boundary lines. [`encode`] emits the
13//!   strict RFC 1421 production: 64 base-64 characters per line, LF
14//!   terminator, no trailing whitespace.
15//! - **No external dependencies.** The base64 codec is embedded below
16//!   (~80 LOC) per the v0.3 scope's zero-runtime-deps stance (Q7.1).
17//! - **`no_std` + `alloc`.** No file-loading helpers in this module.
18//!
19//! # Failure-mode invariant
20//!
21//! [`decode`] returns `Result<Vec<u8>, Error>` with a single
22//! [`Error::Failed`] variant. Distinguishing "wrong label" from "bad
23//! base64" from "missing END line" is forbidden — see `CLAUDE.md`.
24
25use alloc::vec::Vec;
26
27/// PEM codec failure — alias for the workspace-wide [`crate::Error`].
28///
29/// Single uninformative variant per the project's failure-mode
30/// invariant. Prior to v0.5 this was a distinct `pem::Error` enum;
31/// v0.5 W5 unifies it with the workspace-wide type via this alias,
32/// so import paths and non-exhaustive `match` callsites against
33/// `pem::Error::Failed` continue to work. **One caveat:** the
34/// workspace-wide type is `#[non_exhaustive]`, so downstream
35/// **exhaustive** `match` arms must now add a wildcard `_ => ...`
36/// (single-variant non-exhaustive enums require the wildcard from
37/// outside-crate matches).
38pub type Error = crate::Error;
39
40/// Strict line length emitted by [`encode`]. RFC 1421 §4.3.2.4 fixes
41/// 64 base-64 characters per line; RFC 7468 §3 keeps the same.
42const LINE_LEN: usize = 64;
43
44/// Encode `der` as a PEM block with the given `label`. Output is the
45/// strict RFC 1421 form: 64 chars per line, LF terminators, no
46/// trailing whitespace.
47///
48/// `label` must be ASCII per RFC 7468 §2 — non-ASCII labels would
49/// round-trip but reject under strict-conformant decoders. The
50/// callers in this crate use fixed labels (`"PRIVATE KEY"`,
51/// `"PUBLIC KEY"`, `"ENCRYPTED PRIVATE KEY"`, `"EC PRIVATE KEY"`),
52/// all ASCII.
53///
54/// # Panics
55///
56/// Never (encoded length is bounded by `4 · der.len() / 3 + small`,
57/// well below the `Vec` allocation ceiling on any realistic input).
58#[must_use]
59pub fn encode(label: &str, der: &[u8]) -> alloc::string::String {
60    use core::fmt::Write;
61    let body = base64_encode(der);
62    // 4-line preamble + (body chunked into 64-char lines) + 4-line postamble.
63    let line_count = body.len().div_ceil(LINE_LEN);
64    let mut out =
65        alloc::string::String::with_capacity(body.len() + line_count + 2 * (label.len() + 16));
66    let _ = writeln!(out, "-----BEGIN {label}-----");
67    let mut start = 0;
68    while start < body.len() {
69        let end = (start + LINE_LEN).min(body.len());
70        out.push_str(&body[start..end]);
71        out.push('\n');
72        start = end;
73    }
74    let _ = writeln!(out, "-----END {label}-----");
75    out
76}
77
78/// Decode a PEM block, returning the raw DER bytes. The block's label
79/// must equal `expected_label` exactly.
80///
81/// Liberal on whitespace (RFC 7468 §3): tabs, spaces, CR, and LF are
82/// all stripped inside the body. The label must match exactly — case
83/// sensitive, no fuzzy-match.
84///
85/// # Errors
86///
87/// Returns [`Error::Failed`] for any malformed input. Single
88/// uninformative variant per the project's failure-mode invariant.
89pub fn decode(input: &str, expected_label: &str) -> Result<Vec<u8>, Error> {
90    let begin = alloc::format!("-----BEGIN {expected_label}-----");
91    let end = alloc::format!("-----END {expected_label}-----");
92
93    let begin_idx = input.find(&begin).ok_or(Error::Failed)?;
94    let after_begin = &input[begin_idx + begin.len()..];
95    let end_rel = after_begin.find(&end).ok_or(Error::Failed)?;
96    let body = &after_begin[..end_rel];
97
98    // Strip whitespace from the body. Anything else (printable
99    // non-base64, non-ASCII) gets fed through to base64_decode, which
100    // rejects it.
101    let mut stripped = alloc::string::String::with_capacity(body.len());
102    for ch in body.chars() {
103        if !ch.is_ascii_whitespace() {
104            stripped.push(ch);
105        }
106    }
107
108    base64_decode(&stripped).ok_or(Error::Failed)
109}
110
111// --- base64 codec (RFC 4648 §4, "standard alphabet") ---
112
113const BASE64_ALPHABET: &[u8; 64] =
114    b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
115
116/// Encode `input` as standard base64 with `=` padding. Output is
117/// pure ASCII (no line breaks; [`encode`] inserts them).
118#[must_use]
119fn base64_encode(input: &[u8]) -> alloc::string::String {
120    let mut out = alloc::string::String::with_capacity(input.len().div_ceil(3) * 4);
121    let mut i = 0;
122    while i + 3 <= input.len() {
123        let b0 = input[i];
124        let b1 = input[i + 1];
125        let b2 = input[i + 2];
126        out.push(BASE64_ALPHABET[(b0 >> 2) as usize] as char);
127        out.push(BASE64_ALPHABET[(((b0 & 0x03) << 4) | (b1 >> 4)) as usize] as char);
128        out.push(BASE64_ALPHABET[(((b1 & 0x0F) << 2) | (b2 >> 6)) as usize] as char);
129        out.push(BASE64_ALPHABET[(b2 & 0x3F) as usize] as char);
130        i += 3;
131    }
132    let rem = input.len() - i;
133    if rem == 1 {
134        let b0 = input[i];
135        out.push(BASE64_ALPHABET[(b0 >> 2) as usize] as char);
136        out.push(BASE64_ALPHABET[((b0 & 0x03) << 4) as usize] as char);
137        out.push('=');
138        out.push('=');
139    } else if rem == 2 {
140        let b0 = input[i];
141        let b1 = input[i + 1];
142        out.push(BASE64_ALPHABET[(b0 >> 2) as usize] as char);
143        out.push(BASE64_ALPHABET[(((b0 & 0x03) << 4) | (b1 >> 4)) as usize] as char);
144        out.push(BASE64_ALPHABET[((b1 & 0x0F) << 2) as usize] as char);
145        out.push('=');
146    }
147    out
148}
149
150/// Decode a base64 string (no whitespace; caller pre-stripped).
151/// Returns `None` for any malformed input.
152#[must_use]
153fn base64_decode(input: &str) -> Option<Vec<u8>> {
154    let bytes = input.as_bytes();
155    if bytes.len() % 4 != 0 {
156        return None;
157    }
158    if bytes.is_empty() {
159        return Some(Vec::new());
160    }
161
162    // Determine pad count from the suffix.
163    let pad = if bytes.ends_with(b"==") {
164        2usize
165    } else {
166        usize::from(bytes.ends_with(b"="))
167    };
168    let body_chars = bytes.len() - pad;
169
170    let mut out = Vec::with_capacity(bytes.len() / 4 * 3);
171    let mut i = 0;
172    while i + 4 <= bytes.len() {
173        // Decode four input characters → three output bytes (minus
174        // pad-driven trim on the final group).
175        let last_group = i + 4 == bytes.len();
176        let v0 = base64_lookup(bytes[i])?;
177        let v1 = base64_lookup(bytes[i + 1])?;
178        let (v2, v3) = if last_group {
179            (
180                if i + 2 < body_chars {
181                    base64_lookup(bytes[i + 2])?
182                } else {
183                    if bytes[i + 2] != b'=' {
184                        return None;
185                    }
186                    0
187                },
188                if i + 3 < body_chars {
189                    base64_lookup(bytes[i + 3])?
190                } else {
191                    if bytes[i + 3] != b'=' {
192                        return None;
193                    }
194                    0
195                },
196            )
197        } else {
198            (base64_lookup(bytes[i + 2])?, base64_lookup(bytes[i + 3])?)
199        };
200
201        let b0 = (v0 << 2) | (v1 >> 4);
202        let b1 = (v1 << 4) | (v2 >> 2);
203        let b2 = (v2 << 6) | v3;
204
205        // Strict-canonical: the bits of the final-group sextets that
206        // would have encoded the dropped output bytes must be zero.
207        // pad=2: low 4 bits of v1 encode part of `b1` (which we drop)
208        // and must be zero. pad=1: low 2 bits of v2 encode part of
209        // `b2` (which we drop) and must be zero.
210        if last_group {
211            if pad == 2 && (v1 & 0x0F) != 0 {
212                return None;
213            }
214            if pad == 1 && (v2 & 0x03) != 0 {
215                return None;
216            }
217        }
218
219        out.push(b0);
220        if !last_group || pad <= 1 {
221            out.push(b1);
222        }
223        if !last_group || pad == 0 {
224            out.push(b2);
225        }
226        i += 4;
227    }
228    Some(out)
229}
230
231/// Reverse-lookup for the standard base64 alphabet. Returns `None` for
232/// any non-alphabet byte (including `=`, which the caller handles
233/// out-of-band via the suffix scan).
234const fn base64_lookup(c: u8) -> Option<u8> {
235    Some(match c {
236        b'A'..=b'Z' => c - b'A',
237        b'a'..=b'z' => c - b'a' + 26,
238        b'0'..=b'9' => c - b'0' + 52,
239        b'+' => 62,
240        b'/' => 63,
241        _ => return None,
242    })
243}
244
245#[cfg(test)]
246mod tests {
247    use super::*;
248
249    // ---------- base64 codec ----------
250
251    #[test]
252    fn base64_round_trip_empty() {
253        let bytes: &[u8] = &[];
254        assert_eq!(base64_encode(bytes), "");
255        assert_eq!(base64_decode("").as_deref(), Some(bytes));
256    }
257
258    #[test]
259    fn base64_round_trip_one_byte() {
260        // Single byte → "AA==" (RFC 4648 example: "f" = "Zg==").
261        assert_eq!(base64_encode(b"f"), "Zg==");
262        assert_eq!(base64_decode("Zg==").as_deref(), Some(b"f".as_slice()));
263    }
264
265    #[test]
266    fn base64_round_trip_two_bytes() {
267        assert_eq!(base64_encode(b"fo"), "Zm8=");
268        assert_eq!(base64_decode("Zm8=").as_deref(), Some(b"fo".as_slice()));
269    }
270
271    #[test]
272    fn base64_round_trip_three_bytes() {
273        assert_eq!(base64_encode(b"foo"), "Zm9v");
274        assert_eq!(base64_decode("Zm9v").as_deref(), Some(b"foo".as_slice()));
275    }
276
277    #[test]
278    fn base64_rfc4648_test_vectors() {
279        // RFC 4648 §10.
280        for (raw, encoded) in [
281            ("", ""),
282            ("f", "Zg=="),
283            ("fo", "Zm8="),
284            ("foo", "Zm9v"),
285            ("foob", "Zm9vYg=="),
286            ("fooba", "Zm9vYmE="),
287            ("foobar", "Zm9vYmFy"),
288        ] {
289            assert_eq!(base64_encode(raw.as_bytes()), encoded);
290            assert_eq!(
291                base64_decode(encoded).as_deref(),
292                Some(raw.as_bytes()),
293                "decode {encoded:?}"
294            );
295        }
296    }
297
298    #[test]
299    fn base64_decode_rejects_bad_chars() {
300        assert!(base64_decode("Zm9*").is_none()); // '*' not in alphabet
301        assert!(base64_decode("Zm9").is_none()); // length not multiple of 4
302        assert!(base64_decode("Z===").is_none()); // 3 pads invalid
303        assert!(base64_decode("====").is_none()); // all-pad invalid
304    }
305
306    /// Strict canonical: non-zero pad bits in the final quantum reject.
307    /// `Zg==` is the canonical encoding of `[0x66]`. `Zh==` would
308    /// embed `0x68` in v1's low 4 bits — non-canonical because the
309    /// encoded byte is still `0x66` but the round-trip would silently
310    /// drop the extra bits.
311    #[test]
312    fn base64_decode_rejects_non_canonical_pad_bits() {
313        // 'Z' = 25, 'h' = 33. v1 = 33 = 0b100001. Low 4 bits = 0b0001 ≠ 0.
314        assert!(base64_decode("Zh==").is_none());
315        // 'Z' = 25, 'g' = 32. v1 = 32 = 0b100000. Low 4 bits = 0 — accept.
316        assert!(base64_decode("Zg==").is_some());
317        // 'Z' = 25, 'g' = 32, '8' = 60. v2 = 60 = 0b111100. Low 2 bits = 0 — accept.
318        assert!(base64_decode("Zm8=").is_some());
319        // Mutate to v2 with non-zero low 2 bits: '9' = 61 = 0b111101. Low 2 = 0b01 ≠ 0.
320        assert!(base64_decode("Zm9=").is_none());
321    }
322
323    // ---------- PEM ----------
324
325    #[test]
326    fn pem_round_trip_short() {
327        let der: &[u8] = &[0x30, 0x03, 0x02, 0x01, 0x05];
328        let pem = encode("EC PRIVATE KEY", der);
329        let recovered = decode(&pem, "EC PRIVATE KEY").expect("decode");
330        assert_eq!(recovered, der);
331    }
332
333    #[test]
334    fn pem_round_trip_long_wraps_at_64() {
335        // 100 bytes of DER → 168 chars of base64 → 3 lines of 64/64/40.
336        let der: alloc::vec::Vec<u8> = (0..100u8).collect();
337        let pem = encode("PRIVATE KEY", &der);
338        // Body lines all ≤ 64 chars.
339        for line in pem.lines() {
340            if line.starts_with("---") {
341                continue;
342            }
343            assert!(line.len() <= LINE_LEN, "body line too long: {line:?}");
344        }
345        let recovered = decode(&pem, "PRIVATE KEY").expect("decode");
346        assert_eq!(recovered, der);
347    }
348
349    #[test]
350    fn pem_label_must_match() {
351        let pem = encode("PRIVATE KEY", b"\x30\x00");
352        assert!(matches!(decode(&pem, "PUBLIC KEY"), Err(Error::Failed)));
353    }
354
355    #[test]
356    fn pem_decode_rejects_missing_begin() {
357        assert!(matches!(
358            decode("garbage", "PRIVATE KEY"),
359            Err(Error::Failed)
360        ));
361    }
362
363    #[test]
364    fn pem_decode_rejects_missing_end() {
365        let bad = "-----BEGIN PRIVATE KEY-----\nABCD\n";
366        assert!(matches!(decode(bad, "PRIVATE KEY"), Err(Error::Failed)));
367    }
368
369    #[test]
370    fn pem_decode_tolerates_crlf_and_extra_whitespace() {
371        // CRLF terminators + extra whitespace inside the body.
372        let pem = "-----BEGIN PRIVATE KEY-----\r\n\
373                   MAMC\r\n\
374                   AQU=\r\n\
375                   -----END PRIVATE KEY-----\r\n";
376        let recovered = decode(pem, "PRIVATE KEY").expect("decode");
377        assert_eq!(recovered, [0x30, 0x03, 0x02, 0x01, 0x05]);
378    }
379
380    #[test]
381    fn pem_encoded_form_is_strict() {
382        let der: alloc::vec::Vec<u8> = (0..200u8).collect();
383        let pem = encode("PRIVATE KEY", &der);
384        // Strict form: trailing newline; no \r; preamble + body + postamble.
385        assert!(pem.ends_with('\n'));
386        assert!(!pem.contains('\r'));
387        assert!(pem.starts_with("-----BEGIN PRIVATE KEY-----\n"));
388        assert!(pem.contains("\n-----END PRIVATE KEY-----\n"));
389    }
390}