pem_rfc7468/
decoder.rs

1//! Decoder for PEM encapsulated data.
2//!
3//! From RFC 7468 Section 2:
4//!
5//! > Textual encoding begins with a line comprising "-----BEGIN ", a
6//! > label, and "-----", and ends with a line comprising "-----END ", a
7//! > label, and "-----".  Between these lines, or "encapsulation
8//! > boundaries", are base64-encoded data according to Section 4 of
9//! > [RFC 4648].
10//!
11//! [RFC 4648]: https://datatracker.ietf.org/doc/html/rfc4648
12
13use crate::{
14    BASE64_WRAP_WIDTH, Base64Decoder, Error, POST_ENCAPSULATION_BOUNDARY,
15    PRE_ENCAPSULATION_BOUNDARY, Result, grammar,
16};
17use core::str;
18
19#[cfg(feature = "alloc")]
20use alloc::vec::Vec;
21
22#[cfg(feature = "std")]
23use std::io;
24
25/// Decode a PEM document according to RFC 7468's "Strict" grammar.
26///
27/// On success, writes the decoded document into the provided buffer, returning
28/// the decoded label and the portion of the provided buffer containing the
29/// decoded message.
30pub fn decode<'i, 'o>(pem: &'i [u8], buf: &'o mut [u8]) -> Result<(&'i str, &'o [u8])> {
31    let mut decoder = Decoder::new(pem).map_err(|e| check_for_headers(pem, e))?;
32    let type_label = decoder.type_label();
33    let buf = buf
34        .get_mut(..decoder.remaining_len())
35        .ok_or(Error::Length)?;
36    let decoded = decoder.decode(buf).map_err(|e| check_for_headers(pem, e))?;
37
38    if decoder.base64.is_finished() {
39        Ok((type_label, decoded))
40    } else {
41        Err(Error::Length)
42    }
43}
44
45/// Decode a PEM document according to RFC 7468's "Strict" grammar, returning
46/// the result as a [`Vec`] upon success.
47#[cfg(feature = "alloc")]
48pub fn decode_vec(pem: &[u8]) -> Result<(&str, Vec<u8>)> {
49    let mut decoder = Decoder::new(pem).map_err(|e| check_for_headers(pem, e))?;
50    let type_label = decoder.type_label();
51    let mut buf = Vec::new();
52    decoder
53        .decode_to_end(&mut buf)
54        .map_err(|e| check_for_headers(pem, e))?;
55    Ok((type_label, buf))
56}
57
58/// Decode the encapsulation boundaries of a PEM document according to RFC 7468's "Strict" grammar.
59///
60/// On success, returning the decoded label.
61pub fn decode_label(pem: &[u8]) -> Result<&str> {
62    Ok(Encapsulation::try_from(pem)?.label())
63}
64
65/// Attempt to detect the Base64 line width for the given PEM document.
66///
67/// NOTE: not constant time with respect to the input.
68pub fn detect_base64_line_width(pem: &[u8]) -> Result<usize> {
69    Ok(Encapsulation::try_from(pem)?.encapsulated_text_line_width())
70}
71
72/// Buffered PEM decoder.
73///
74/// Stateful buffered decoder type which decodes an input PEM document according
75/// to RFC 7468's "Strict" grammar.
76#[derive(Clone)]
77pub struct Decoder<'i> {
78    /// PEM type label.
79    type_label: &'i str,
80
81    /// Buffered Base64 decoder.
82    base64: Base64Decoder<'i>,
83}
84
85impl<'i> Decoder<'i> {
86    /// Create a new PEM [`Decoder`] with the default options.
87    ///
88    /// Uses the default 64-character line wrapping.
89    pub fn new(pem: &'i [u8]) -> Result<Self> {
90        Self::new_wrapped(pem, BASE64_WRAP_WIDTH)
91    }
92
93    /// Create a new PEM [`Decoder`] which wraps at the given line width.
94    pub fn new_wrapped(pem: &'i [u8], line_width: usize) -> Result<Self> {
95        let encapsulation = Encapsulation::try_from(pem)?;
96        let type_label = encapsulation.label();
97        let base64 = Base64Decoder::new_wrapped(encapsulation.encapsulated_text, line_width)?;
98
99        Ok(Self { type_label, base64 })
100    }
101
102    /// Create a new PEM [`Decoder`] which automatically detects the line width the input is wrapped
103    /// at and flexibly handles widths other than the default 64-characters.
104    ///
105    /// Note: unlike `new` and `new_wrapped`, this method is not constant-time.
106    pub fn new_detect_wrap(pem: &'i [u8]) -> Result<Self> {
107        let line_width = detect_base64_line_width(pem)?;
108        Self::new_wrapped(pem, line_width)
109    }
110
111    /// Get the PEM type label for the input document.
112    pub fn type_label(&self) -> &'i str {
113        self.type_label
114    }
115
116    /// Decode data into the provided output buffer.
117    ///
118    /// There must be at least as much remaining Base64 input to be decoded
119    /// in order to completely fill `buf`.
120    pub fn decode<'o>(&mut self, buf: &'o mut [u8]) -> Result<&'o [u8]> {
121        Ok(self.base64.decode(buf)?)
122    }
123
124    /// Decode all of the remaining data in the input buffer into `buf`.
125    #[cfg(feature = "alloc")]
126    pub fn decode_to_end<'o>(&mut self, buf: &'o mut Vec<u8>) -> Result<&'o [u8]> {
127        Ok(self.base64.decode_to_end(buf)?)
128    }
129
130    /// Get the decoded length of the remaining PEM data after Base64 decoding.
131    pub fn remaining_len(&self) -> usize {
132        self.base64.remaining_len()
133    }
134
135    /// Are we finished decoding the PEM input?
136    pub fn is_finished(&self) -> bool {
137        self.base64.is_finished()
138    }
139}
140
141impl<'i> From<Decoder<'i>> for Base64Decoder<'i> {
142    fn from(decoder: Decoder<'i>) -> Base64Decoder<'i> {
143        decoder.base64
144    }
145}
146
147#[cfg(feature = "std")]
148impl io::Read for Decoder<'_> {
149    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
150        self.base64.read(buf)
151    }
152
153    fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
154        self.base64.read_to_end(buf)
155    }
156
157    fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> {
158        self.base64.read_exact(buf)
159    }
160}
161
162/// PEM encapsulation parser.
163///
164/// This parser performs an initial pass over the data, locating the
165/// pre-encapsulation (`---BEGIN [...]---`) and post-encapsulation
166/// (`---END [...]`) boundaries while attempting to avoid branching
167/// on the potentially secret Base64-encoded data encapsulated between
168/// the two boundaries.
169///
170/// It only supports a single encapsulated message at present. Future work
171/// could potentially include extending it provide an iterator over a series
172/// of encapsulated messages.
173#[derive(Copy, Clone, Debug)]
174struct Encapsulation<'a> {
175    /// Type label extracted from the pre/post-encapsulation boundaries.
176    ///
177    /// From RFC 7468 Section 2:
178    ///
179    /// > The type of data encoded is labeled depending on the type label in
180    /// > the "-----BEGIN " line (pre-encapsulation boundary).  For example,
181    /// > the line may be "-----BEGIN CERTIFICATE-----" to indicate that the
182    /// > content is a PKIX certificate (see further below).  Generators MUST
183    /// > put the same label on the "-----END " line (post-encapsulation
184    /// > boundary) as the corresponding "-----BEGIN " line.  Labels are
185    /// > formally case-sensitive, uppercase, and comprised of zero or more
186    /// > characters; they do not contain consecutive spaces or hyphen-minuses,
187    /// > nor do they contain spaces or hyphen-minuses at either end.  Parsers
188    /// > MAY disregard the label in the post-encapsulation boundary instead of
189    /// > signaling an error if there is a label mismatch: some extant
190    /// > implementations require the labels to match; others do not.
191    label: &'a str,
192
193    /// Encapsulated text portion contained between the boundaries.
194    ///
195    /// This data should be encoded as Base64, however this type performs no
196    /// validation of it so it can be handled in constant-time.
197    encapsulated_text: &'a [u8],
198}
199
200impl<'a> Encapsulation<'a> {
201    /// Parse the type label and encapsulated text from between the
202    /// pre/post-encapsulation boundaries.
203    pub fn parse(data: &'a [u8]) -> Result<Self> {
204        // Strip the "preamble": optional text occurring before the pre-encapsulation boundary
205        let data = grammar::strip_preamble(data)?;
206
207        // Parse pre-encapsulation boundary (including label)
208        let data = data
209            .strip_prefix(PRE_ENCAPSULATION_BOUNDARY)
210            .ok_or(Error::PreEncapsulationBoundary)?;
211
212        let (label, body) = grammar::split_label(data).ok_or(Error::Label)?;
213
214        let mut body = match grammar::strip_trailing_eol(body).unwrap_or(body) {
215            [head @ .., b'-', b'-', b'-', b'-', b'-'] => head,
216            _ => return Err(Error::PreEncapsulationBoundary),
217        };
218
219        // Ensure body ends with a properly labeled post-encapsulation boundary
220        for &slice in [POST_ENCAPSULATION_BOUNDARY, label.as_bytes()].iter().rev() {
221            // Ensure the input ends with the post encapsulation boundary as
222            // well as a matching label
223            if !body.ends_with(slice) {
224                return Err(Error::PostEncapsulationBoundary);
225            }
226
227            let len = body.len().checked_sub(slice.len()).ok_or(Error::Length)?;
228            body = body.get(..len).ok_or(Error::PostEncapsulationBoundary)?;
229        }
230
231        let encapsulated_text =
232            grammar::strip_trailing_eol(body).ok_or(Error::PostEncapsulationBoundary)?;
233
234        Ok(Self {
235            label,
236            encapsulated_text,
237        })
238    }
239
240    /// Get the label parsed from the encapsulation boundaries.
241    pub fn label(self) -> &'a str {
242        self.label
243    }
244
245    /// Detect the line width of the encapsulated text by looking for the position of the first EOL.
246    pub fn encapsulated_text_line_width(self) -> usize {
247        // TODO(tarcieri): handle empty space between the pre-encapsulation boundary and Base64
248        self.encapsulated_text
249            .iter()
250            .copied()
251            .position(|c| matches!(c, grammar::CHAR_CR | grammar::CHAR_LF))
252            .unwrap_or(self.encapsulated_text.len())
253    }
254}
255
256impl<'a> TryFrom<&'a [u8]> for Encapsulation<'a> {
257    type Error = Error;
258
259    fn try_from(bytes: &'a [u8]) -> Result<Self> {
260        Self::parse(bytes)
261    }
262}
263
264/// Check for PEM headers in the input, as they are disallowed by RFC7468.
265///
266/// Returns `Error::HeaderDisallowed` if headers are encountered.
267fn check_for_headers(pem: &[u8], err: Error) -> Error {
268    if err == Error::Base64(base64ct::Error::InvalidEncoding) && pem.contains(&grammar::CHAR_COLON)
269    {
270        Error::HeaderDisallowed
271    } else {
272        err
273    }
274}
275
276#[cfg(test)]
277#[allow(clippy::unwrap_used)]
278mod tests {
279    use super::Encapsulation;
280
281    #[test]
282    fn pkcs8_example() {
283        let pem = include_bytes!("../tests/examples/pkcs8.pem");
284        let encapsulation = Encapsulation::parse(pem).unwrap();
285        assert_eq!(encapsulation.label, "PRIVATE KEY");
286
287        assert_eq!(
288            encapsulation.encapsulated_text,
289            &[
290                77, 67, 52, 67, 65, 81, 65, 119, 66, 81, 89, 68, 75, 50, 86, 119, 66, 67, 73, 69,
291                73, 66, 102, 116, 110, 72, 80, 112, 50, 50, 83, 101, 119, 89, 109, 109, 69, 111,
292                77, 99, 88, 56, 86, 119, 73, 52, 73, 72, 119, 97, 113, 100, 43, 57, 76, 70, 80,
293                106, 47, 49, 53, 101, 113, 70
294            ]
295        );
296    }
297}