simple_base64/
decode.rs

1use crate::engine::{general_purpose::STANDARD, DecodeEstimate, Engine};
2#[cfg(any(feature = "alloc", test))]
3use alloc::vec::Vec;
4use core::fmt;
5#[cfg(any(feature = "std", test))]
6use std::error;
7
8/// Errors that can occur while decoding.
9#[derive(Clone, Debug, PartialEq, Eq)]
10pub enum DecodeError {
11    /// An invalid byte was found in the input. The offset and offending byte are provided.
12    /// Padding characters (`=`) interspersed in the encoded form will be treated as invalid bytes.
13    InvalidByte(usize, u8),
14    /// The length of the input is invalid.
15    /// A typical cause of this is stray trailing whitespace or other separator bytes.
16    /// In the case where excess trailing bytes have produced an invalid length *and* the last byte
17    /// is also an invalid base64 symbol (as would be the case for whitespace, etc), `InvalidByte`
18    /// will be emitted instead of `InvalidLength` to make the issue easier to debug.
19    InvalidLength,
20    /// The last non-padding input symbol's encoded 6 bits have nonzero bits that will be discarded.
21    /// This is indicative of corrupted or truncated Base64.
22    /// Unlike `InvalidByte`, which reports symbols that aren't in the alphabet, this error is for
23    /// symbols that are in the alphabet but represent nonsensical encodings.
24    InvalidLastSymbol(usize, u8),
25    /// The nature of the padding was not as configured: absent or incorrect when it must be
26    /// canonical, or present when it must be absent, etc.
27    InvalidPadding,
28}
29
30impl fmt::Display for DecodeError {
31    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
32        match *self {
33            Self::InvalidByte(index, byte) => write!(f, "Invalid byte {}, offset {}.", byte, index),
34            Self::InvalidLength => write!(f, "Encoded text cannot have a 6-bit remainder."),
35            Self::InvalidLastSymbol(index, byte) => {
36                write!(f, "Invalid last symbol {}, offset {}.", byte, index)
37            }
38            Self::InvalidPadding => write!(f, "Invalid padding"),
39        }
40    }
41}
42
43#[cfg(any(feature = "std", test))]
44impl error::Error for DecodeError {}
45
46/// Errors that can occur while decoding into a slice.
47#[derive(Clone, Debug, PartialEq, Eq)]
48pub enum DecodeSliceError {
49    /// A [DecodeError] occurred
50    DecodeError(DecodeError),
51    /// The provided slice _may_ be too small.
52    ///
53    /// The check is conservative (assumes the last triplet of output bytes will all be needed).
54    OutputSliceTooSmall,
55}
56
57impl fmt::Display for DecodeSliceError {
58    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
59        match self {
60            Self::DecodeError(e) => write!(f, "DecodeError: {}", e),
61            Self::OutputSliceTooSmall => write!(f, "Output slice too small"),
62        }
63    }
64}
65
66#[cfg(any(feature = "std", test))]
67impl error::Error for DecodeSliceError {
68    fn source(&self) -> Option<&(dyn error::Error + 'static)> {
69        match self {
70            DecodeSliceError::DecodeError(e) => Some(e),
71            DecodeSliceError::OutputSliceTooSmall => None,
72        }
73    }
74}
75
76impl From<DecodeError> for DecodeSliceError {
77    fn from(e: DecodeError) -> Self {
78        DecodeSliceError::DecodeError(e)
79    }
80}
81
82/// Decode base64 using the [`STANDARD` engine](STANDARD).
83///
84/// See [Engine::decode].
85#[cfg(any(feature = "alloc", test))]
86pub fn decode<T: AsRef<[u8]>>(input: T) -> Result<Vec<u8>, DecodeError> {
87    STANDARD.decode(input)
88}
89
90/// Decode from string reference as octets using the specified [Engine].
91///
92/// See [Engine::decode].
93///Returns a `Result` containing a `Vec<u8>`.
94#[cfg(any(feature = "alloc", test))]
95pub fn decode_engine<E: Engine, T: AsRef<[u8]>>(
96    input: T,
97    engine: &E,
98) -> Result<Vec<u8>, DecodeError> {
99    engine.decode(input)
100}
101
102/// Decode from string reference as octets.
103///
104/// See [Engine::decode_vec].
105#[cfg(any(feature = "alloc", test))]
106pub fn decode_engine_vec<E: Engine, T: AsRef<[u8]>>(
107    input: T,
108    buffer: &mut Vec<u8>,
109    engine: &E,
110) -> Result<(), DecodeError> {
111    engine.decode_vec(input, buffer)
112}
113
114/// Decode the input into the provided output slice.
115///
116/// See [Engine::decode_slice].
117pub fn decode_engine_slice<E: Engine, T: AsRef<[u8]>>(
118    input: T,
119    output: &mut [u8],
120    engine: &E,
121) -> Result<usize, DecodeSliceError> {
122    engine.decode_slice(input, output)
123}
124
125/// Returns a conservative estimate of the decoded size of `encoded_len` base64 symbols (rounded up
126/// to the next group of 3 decoded bytes).
127///
128/// The resulting length will be a safe choice for the size of a decode buffer, but may have up to
129/// 2 trailing bytes that won't end up being needed.
130///
131/// # Examples
132///
133/// ```
134/// use simple_base64::decoded_len_estimate;
135///
136/// assert_eq!(3, decoded_len_estimate(1));
137/// assert_eq!(3, decoded_len_estimate(2));
138/// assert_eq!(3, decoded_len_estimate(3));
139/// assert_eq!(3, decoded_len_estimate(4));
140/// // start of the next quad of encoded symbols
141/// assert_eq!(6, decoded_len_estimate(5));
142/// ```
143pub fn decoded_len_estimate(encoded_len: usize) -> usize {
144    STANDARD
145        .internal_decoded_len_estimate(encoded_len)
146        .decoded_len_estimate()
147}
148
149#[cfg(test)]
150mod tests {
151    use super::*;
152    use crate::{
153        alphabet,
154        engine::{general_purpose, Config, GeneralPurpose},
155        tests::{assert_encode_sanity, random_engine},
156    };
157    use rand::{
158        distributions::{Distribution, Uniform},
159        Rng, SeedableRng,
160    };
161
162    #[test]
163    fn decode_into_nonempty_vec_doesnt_clobber_existing_prefix() {
164        let mut orig_data = Vec::new();
165        let mut encoded_data = String::new();
166        let mut decoded_with_prefix = Vec::new();
167        let mut decoded_without_prefix = Vec::new();
168        let mut prefix = Vec::new();
169
170        let prefix_len_range = Uniform::new(0, 1000);
171        let input_len_range = Uniform::new(0, 1000);
172
173        let mut rng = rand::rngs::SmallRng::from_entropy();
174
175        for _ in 0..10_000 {
176            orig_data.clear();
177            encoded_data.clear();
178            decoded_with_prefix.clear();
179            decoded_without_prefix.clear();
180            prefix.clear();
181
182            let input_len = input_len_range.sample(&mut rng);
183
184            for _ in 0..input_len {
185                orig_data.push(rng.gen());
186            }
187
188            let engine = random_engine(&mut rng);
189            engine.encode_string(&orig_data, &mut encoded_data);
190            assert_encode_sanity(&encoded_data, engine.config().encode_padding(), input_len);
191
192            let prefix_len = prefix_len_range.sample(&mut rng);
193
194            // fill the buf with a prefix
195            for _ in 0..prefix_len {
196                prefix.push(rng.gen());
197            }
198
199            decoded_with_prefix.resize(prefix_len, 0);
200            decoded_with_prefix.copy_from_slice(&prefix);
201
202            // decode into the non-empty buf
203            engine
204                .decode_vec(&encoded_data, &mut decoded_with_prefix)
205                .unwrap();
206            // also decode into the empty buf
207            engine
208                .decode_vec(&encoded_data, &mut decoded_without_prefix)
209                .unwrap();
210
211            assert_eq!(
212                prefix_len + decoded_without_prefix.len(),
213                decoded_with_prefix.len()
214            );
215            assert_eq!(orig_data, decoded_without_prefix);
216
217            // append plain decode onto prefix
218            prefix.append(&mut decoded_without_prefix);
219
220            assert_eq!(prefix, decoded_with_prefix);
221        }
222    }
223
224    #[test]
225    fn decode_slice_doesnt_clobber_existing_prefix_or_suffix() {
226        do_decode_slice_doesnt_clobber_existing_prefix_or_suffix(|e, input, output| {
227            e.decode_slice(input, output).unwrap()
228        })
229    }
230
231    #[test]
232    fn decode_slice_unchecked_doesnt_clobber_existing_prefix_or_suffix() {
233        do_decode_slice_doesnt_clobber_existing_prefix_or_suffix(|e, input, output| {
234            e.decode_slice_unchecked(input, output).unwrap()
235        })
236    }
237
238    #[test]
239    fn decode_engine_estimation_works_for_various_lengths() {
240        let engine = GeneralPurpose::new(&alphabet::STANDARD, general_purpose::NO_PAD);
241        for num_prefix_quads in 0..100 {
242            for suffix in &["AA", "AAA", "AAAA"] {
243                let mut prefix = "AAAA".repeat(num_prefix_quads);
244                prefix.push_str(suffix);
245                // make sure no overflow (and thus a panic) occurs
246                let res = engine.decode(prefix);
247                assert!(res.is_ok());
248            }
249        }
250    }
251
252    #[test]
253    fn decode_slice_output_length_errors() {
254        for num_quads in 1..100 {
255            let input = "AAAA".repeat(num_quads);
256            let mut vec = vec![0; (num_quads - 1) * 3];
257            assert_eq!(
258                DecodeSliceError::OutputSliceTooSmall,
259                STANDARD.decode_slice(&input, &mut vec).unwrap_err()
260            );
261            vec.push(0);
262            assert_eq!(
263                DecodeSliceError::OutputSliceTooSmall,
264                STANDARD.decode_slice(&input, &mut vec).unwrap_err()
265            );
266            vec.push(0);
267            assert_eq!(
268                DecodeSliceError::OutputSliceTooSmall,
269                STANDARD.decode_slice(&input, &mut vec).unwrap_err()
270            );
271            vec.push(0);
272            // now it works
273            assert_eq!(
274                num_quads * 3,
275                STANDARD.decode_slice(&input, &mut vec).unwrap()
276            );
277        }
278    }
279
280    fn do_decode_slice_doesnt_clobber_existing_prefix_or_suffix<
281        F: Fn(&GeneralPurpose, &[u8], &mut [u8]) -> usize,
282    >(
283        call_decode: F,
284    ) {
285        let mut orig_data = Vec::new();
286        let mut encoded_data = String::new();
287        let mut decode_buf = Vec::new();
288        let mut decode_buf_copy: Vec<u8> = Vec::new();
289
290        let input_len_range = Uniform::new(0, 1000);
291
292        let mut rng = rand::rngs::SmallRng::from_entropy();
293
294        for _ in 0..10_000 {
295            orig_data.clear();
296            encoded_data.clear();
297            decode_buf.clear();
298            decode_buf_copy.clear();
299
300            let input_len = input_len_range.sample(&mut rng);
301
302            for _ in 0..input_len {
303                orig_data.push(rng.gen());
304            }
305
306            let engine = random_engine(&mut rng);
307            engine.encode_string(&orig_data, &mut encoded_data);
308            assert_encode_sanity(&encoded_data, engine.config().encode_padding(), input_len);
309
310            // fill the buffer with random garbage, long enough to have some room before and after
311            for _ in 0..5000 {
312                decode_buf.push(rng.gen());
313            }
314
315            // keep a copy for later comparison
316            decode_buf_copy.extend(decode_buf.iter());
317
318            let offset = 1000;
319
320            // decode into the non-empty buf
321            let decode_bytes_written =
322                call_decode(&engine, encoded_data.as_bytes(), &mut decode_buf[offset..]);
323
324            assert_eq!(orig_data.len(), decode_bytes_written);
325            assert_eq!(
326                orig_data,
327                &decode_buf[offset..(offset + decode_bytes_written)]
328            );
329            assert_eq!(&decode_buf_copy[0..offset], &decode_buf[0..offset]);
330            assert_eq!(
331                &decode_buf_copy[offset + decode_bytes_written..],
332                &decode_buf[offset + decode_bytes_written..]
333            );
334        }
335    }
336}