mda/
decode.rs

1// Copyright 2019 Alexandros Frantzis
2//
3// This Source Code Form is subject to the terms of the Mozilla Public
4// License, v. 2.0. If a copy of the MPL was not distributed with this
5// file, You can obtain one at https://mozilla.org/MPL/2.0/.
6//
7// SPDX-License-Identifier: MPL-2.0
8
9//! Base64 and quoted-printable decoding.
10
11use crate::Result;
12
13const PAD: u8 = 64; // The pseudo-index of the PAD character.
14const INV: u8 = 99; // An invalid index.
15
16static BASE64_INDICES: &'static [u8] = &[
17     //   0    1    2    3    4    5    6    7    8    9    A    B    C    D    E    F
18/* 0 */ INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV,
19/* 1 */ INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV,
20/* 2 */ INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV,  62, INV, INV, INV,  63,
21/* 3 */  52,  53,  54,  55,  56,  57,  58,  59,  60,  61, INV, INV, INV, PAD, INV, INV,
22/* 4 */ INV,   0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
23/* 5 */  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25, INV, INV, INV, INV, INV,
24/* 6 */ INV,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,
25/* 7 */  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51, INV, INV, INV, INV, INV,
26/* 8 */ INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV,
27/* 9 */ INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV,
28/* A */ INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV,
29/* B */ INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV,
30/* C */ INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV,
31/* D */ INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV,
32/* E */ INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV,
33/* F */ INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV,
34];
35
36/// A base64 value.
37enum Base64Value {
38    /// A valid base64 numeric value.
39    Some(u8),
40    /// The pad symbol.
41    Pad,
42    /// No value.
43    None,
44}
45
46/// Returns the value of the next base64 character. Skips invalid
47/// characters (rfc2045: All line breaks or other characters not
48/// found in Table 1 must be ignored by decoding software).
49fn next_valid_base64_value(iter: &mut dyn Iterator<Item=&u8>) -> Base64Value {
50    while let Some(c) = iter.next() {
51        let b = BASE64_INDICES[*c as usize];
52        if b < PAD {
53            return Base64Value::Some(b);
54        }
55        if b == PAD {
56            return Base64Value::Pad;
57        }
58    }
59    return Base64Value::None;
60}
61
62/// Decodes base64 encoded data, appending the decoded data to a Vec<u8>.
63///
64/// During decoding all line breaks and invalid characters are ignored.
65/// Decoding is finished at the first pad character or end of input.  If an
66/// error is encountered during decoding, the already decoded data in the output
67/// buffer is left intact. It's up to the caller to deal with the partial
68/// decoded data in case of failure
69pub fn base64_decode_into_buf(input: &[u8], output: &mut Vec<u8>) -> Result<()> {
70    let mut iter = input.iter();
71
72    let expected_paddings =
73        loop {
74            let c0 = match next_valid_base64_value(&mut iter) {
75                Base64Value::Some(c) => c,
76                Base64Value::Pad => return Err("Invalid base64 padding".into()),
77                Base64Value::None => return Ok(()),
78            };
79
80            let c1 = match next_valid_base64_value(&mut iter) {
81                Base64Value::Some(c) => { output.push((c0 << 2) | ((c & 0x3f) >> 4)); c }
82                Base64Value::Pad => return Err("Invalid base64 padding".into()),
83                Base64Value::None => return Err("Invalid base64 encoding".into()),
84            };
85
86            let c2 = match next_valid_base64_value(&mut iter) {
87                Base64Value::Some(c) => { output.push((c1 << 4) | ((c & 0x3f) >> 2)); c }
88                Base64Value::Pad => break 1,
89                Base64Value::None => return Err("Invalid base64 padding".into()),
90            };
91
92            match next_valid_base64_value(&mut iter) {
93                Base64Value::Some(c) => { output.push((c2 << 6) | ((c & 0x3f))); }
94                Base64Value::Pad => break 0,
95                Base64Value::None => return Err("Invalid base64 padding".into()),
96            };
97        };
98
99    let mut found_paddings = 0;
100
101    while let Some(c) = iter.next() {
102        if *c == b'=' {
103            found_paddings += 1;
104            continue;
105        }
106        let b = BASE64_INDICES[*c as usize];
107        if b < PAD {
108            return Err("Unexpected characters after base64 padding".into());
109        }
110    }
111
112    if found_paddings != expected_paddings {
113        return Err("Invalid base64 padding".into());
114    }
115
116    Ok(())
117}
118
119/// Converts an ascii byte representing a hex digit to it's numerical value.
120fn hexdigit_to_num(mut a: u8) -> Option<u8> {
121    if a.is_ascii_digit() {
122        return Some(a - b'0');
123    }
124
125    a.make_ascii_lowercase();
126
127    if a >= b'a' && a <= b'f' {
128        return Some(a - b'a' + 10);
129    }
130
131    None
132}
133
134/// Decodes quoted-printable encoded data, appending the decoding data to a
135/// Vec<u8>.
136///
137/// During decoding all line breaks and invalid characters are ignored.
138/// If an error is encountered during decoding, the already decoded data in the
139/// output buffer is left intact. It's up to the caller to deal with the partial
140/// decoded data in case of failure.
141pub fn qp_decode_into_buf(input: &[u8], output: &mut Vec<u8>) -> Result<()> {
142    let mut iter = input.iter().peekable();
143
144    'outer: loop {
145        loop {
146            match iter.next() {
147                Some(b'=') => break,
148                Some(c) => output.push(*c),
149                None => break 'outer,
150            }
151        }
152
153        // At this point we have encountered a '=', so check
154        // to see what follows.
155        if let Some(&first) = iter.next() {
156            // A CRLF/LF after '=' marks a line continuation, and
157            // is effectively dropped.
158            if first == b'\r' {
159                if iter.peek() == Some(&&b'\n') {
160                    iter.next();
161                    continue;
162                }
163            } else if first == b'\n' {
164                continue;
165            } else if let Some(first_num) = hexdigit_to_num(first) {
166                // A valid pair of hexdigits represent the raw byte value.
167                if let Some(&&second) = iter.peek() {
168                    if let Some(second_num) = hexdigit_to_num(second) {
169                        output.push(first_num * 16 + second_num);
170                        iter.next();
171                        continue;
172                    }
173                }
174            }
175
176            // Emit the raw sequence if it's not one of the special
177            // special cases checked above.
178            output.extend(&[b'=', first]);
179        } else {
180            // Last character in the input was an '=', just emit it.
181            output.push(b'=');
182        }
183    }
184
185
186    Ok(())
187}
188
189#[cfg(test)]
190mod test_base64 {
191    use crate::decode::base64_decode_into_buf;
192
193    #[test]
194    fn decodes_full_length() {
195        let mut decoded = Vec::new();
196        assert!(base64_decode_into_buf("YWJj".as_bytes(), &mut decoded).is_ok());
197        assert_eq!(decoded, &[b'a', b'b', b'c']);
198    }
199
200    #[test]
201    fn decodes_with_two_padding() {
202        let mut decoded = Vec::new();
203        assert!(base64_decode_into_buf("YWJjZA==".as_bytes(), &mut decoded).is_ok());
204        assert_eq!(decoded, &[b'a', b'b', b'c', b'd']);
205    }
206
207    #[test]
208    fn decodes_with_one_padding() {
209        let mut decoded = Vec::new();
210        assert!(base64_decode_into_buf("YWJjZGU=".as_bytes(), &mut decoded).is_ok());
211        assert_eq!(decoded, &[b'a', b'b', b'c', b'd', b'e']);
212    }
213
214    #[test]
215    fn decodes_with_ignored_characters() {
216        let mut decoded = Vec::new();
217        assert!(base64_decode_into_buf(" Y\t WJ\njZA=\r\n = ".as_bytes(), &mut decoded).is_ok());
218        assert_eq!(decoded, &[b'a', b'b', b'c', b'd']);
219    }
220
221    #[test]
222    fn error_with_invalid_paddings() {
223        let mut decoded = Vec::new();
224        assert!(base64_decode_into_buf("YWJj====".as_bytes(), &mut decoded).is_err());
225        assert!(base64_decode_into_buf("YWJjZ===".as_bytes(), &mut decoded).is_err());
226        assert!(base64_decode_into_buf("====".as_bytes(), &mut decoded).is_err());
227    }
228
229    #[test]
230    fn error_with_unpadded_input() {
231        let mut decoded = Vec::new();
232        assert!(base64_decode_into_buf("YWJjZA=".as_bytes(), &mut decoded).is_err());
233    }
234
235    #[test]
236    fn error_with_characters_after_padding() {
237        let mut decoded = Vec::new();
238        assert!(base64_decode_into_buf("YWJjZA=a".as_bytes(), &mut decoded).is_err());
239        assert!(base64_decode_into_buf("YWJjZA==b=".as_bytes(), &mut decoded).is_err());
240    }
241}
242
243#[cfg(test)]
244mod test_qp {
245    use crate::decode::qp_decode_into_buf;
246
247    #[test]
248    fn decodes_byte() {
249        let mut decoded = Vec::new();
250        assert!(qp_decode_into_buf("a=62c=64".as_bytes(), &mut decoded).is_ok());
251        assert_eq!(decoded, &[b'a', b'b', b'c', b'd']);
252    }
253
254    #[test]
255    fn decodes_soft_break() {
256        let mut decoded = Vec::new();
257        assert!(qp_decode_into_buf("a=\r\nb=\nc".as_bytes(), &mut decoded).is_ok());
258        assert_eq!(decoded, &[b'a', b'b', b'c']);
259    }
260
261    #[test]
262    fn invalid_sequences_are_untouched() {
263        let mut decoded = Vec::new();
264        let invalid_sequence = "a=6t= c=".as_bytes();
265        assert!(qp_decode_into_buf(invalid_sequence, &mut decoded).is_ok());
266        assert_eq!(decoded, invalid_sequence);
267    }
268}