simple_base64/
encode.rs

1#[cfg(any(feature = "alloc", test))]
2use alloc::string::String;
3use core::fmt;
4#[cfg(any(feature = "std", test))]
5use std::error;
6
7#[cfg(any(feature = "alloc", test))]
8use crate::engine::general_purpose::STANDARD;
9use crate::engine::{Config, Engine};
10use crate::PAD_BYTE;
11
12/// Encode arbitrary octets as base64 using the [`STANDARD` engine](STANDARD).
13///
14/// See [Engine::encode].
15#[allow(unused)]
16#[cfg(any(feature = "alloc", test))]
17pub fn encode<T: AsRef<[u8]>>(input: T) -> String {
18    STANDARD.encode(input)
19}
20
21///Encode arbitrary octets as base64 using the provided `Engine` into a new `String`.
22///
23/// See [Engine::encode].
24#[allow(unused)]
25#[cfg(any(feature = "alloc", test))]
26pub fn encode_engine<E: Engine, T: AsRef<[u8]>>(input: T, engine: &E) -> String {
27    engine.encode(input)
28}
29
30///Encode arbitrary octets as base64 into a supplied `String`.
31///
32/// See [Engine::encode_string].
33#[allow(unused)]
34#[cfg(any(feature = "alloc", test))]
35pub fn encode_engine_string<E: Engine, T: AsRef<[u8]>>(
36    input: T,
37    output_buf: &mut String,
38    engine: &E,
39) {
40    engine.encode_string(input, output_buf)
41}
42
43/// Encode arbitrary octets as base64 into a supplied slice.
44///
45/// See [Engine::encode_slice].
46#[allow(unused)]
47pub fn encode_engine_slice<E: Engine, T: AsRef<[u8]>>(
48    input: T,
49    output_buf: &mut [u8],
50    engine: &E,
51) -> Result<usize, EncodeSliceError> {
52    engine.encode_slice(input, output_buf)
53}
54
55/// B64-encode and pad (if configured).
56///
57/// This helper exists to avoid recalculating encoded_size, which is relatively expensive on short
58/// inputs.
59///
60/// `encoded_size` is the encoded size calculated for `input`.
61///
62/// `output` must be of size `encoded_size`.
63///
64/// All bytes in `output` will be written to since it is exactly the size of the output.
65pub(crate) fn encode_with_padding<E: Engine + ?Sized>(
66    input: &[u8],
67    output: &mut [u8],
68    engine: &E,
69    expected_encoded_size: usize,
70) {
71    debug_assert_eq!(expected_encoded_size, output.len());
72
73    let b64_bytes_written = engine.internal_encode(input, output);
74
75    let padding_bytes = if engine.config().encode_padding() {
76        add_padding(b64_bytes_written, &mut output[b64_bytes_written..])
77    } else {
78        0
79    };
80
81    let encoded_bytes = b64_bytes_written
82        .checked_add(padding_bytes)
83        .expect("usize overflow when calculating b64 length");
84
85    debug_assert_eq!(expected_encoded_size, encoded_bytes);
86}
87
88/// Calculate the base64 encoded length for a given input length, optionally including any
89/// appropriate padding bytes.
90///
91/// Returns `None` if the encoded length can't be represented in `usize`. This will happen for
92/// input lengths in approximately the top quarter of the range of `usize`.
93pub const fn encoded_len(bytes_len: usize, padding: bool) -> Option<usize> {
94    let rem = bytes_len % 3;
95
96    let complete_input_chunks = bytes_len / 3;
97    // `let Some(_) = _ else` requires 1.65.0, whereas this messier one works on 1.48
98    let complete_chunk_output =
99        if let Some(complete_chunk_output) = complete_input_chunks.checked_mul(4) {
100            complete_chunk_output
101        } else {
102            return None;
103        };
104
105    if rem > 0 {
106        if padding {
107            complete_chunk_output.checked_add(4)
108        } else {
109            let encoded_rem = match rem {
110                1 => 2,
111                // only other possible remainder is 2
112                // can't use a separate _ => unreachable!() in const fns in ancient rust versions
113                _ => 3,
114            };
115            complete_chunk_output.checked_add(encoded_rem)
116        }
117    } else {
118        Some(complete_chunk_output)
119    }
120}
121
122/// Write padding characters.
123/// `unpadded_output_len` is the size of the unpadded but base64 encoded data.
124/// `output` is the slice where padding should be written, of length at least 2.
125///
126/// Returns the number of padding bytes written.
127pub(crate) fn add_padding(unpadded_output_len: usize, output: &mut [u8]) -> usize {
128    let pad_bytes = (4 - (unpadded_output_len % 4)) % 4;
129    // for just a couple bytes, this has better performance than using
130    // .fill(), or iterating over mutable refs, which call memset()
131    #[allow(clippy::needless_range_loop)]
132    for i in 0..pad_bytes {
133        output[i] = PAD_BYTE;
134    }
135
136    pad_bytes
137}
138
139/// Errors that can occur while encoding into a slice.
140#[derive(Clone, Debug, PartialEq, Eq)]
141pub enum EncodeSliceError {
142    /// The provided slice is too small.
143    OutputSliceTooSmall,
144}
145
146impl fmt::Display for EncodeSliceError {
147    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
148        match self {
149            Self::OutputSliceTooSmall => write!(f, "Output slice too small"),
150        }
151    }
152}
153
154#[cfg(any(feature = "std", test))]
155impl error::Error for EncodeSliceError {}
156
157#[cfg(test)]
158mod tests {
159    use super::*;
160
161    use crate::{
162        alphabet,
163        engine::general_purpose::{GeneralPurpose, NO_PAD, STANDARD},
164        tests::{assert_encode_sanity, random_config, random_engine},
165    };
166    use rand::{
167        distributions::{Distribution, Uniform},
168        Rng, SeedableRng,
169    };
170    use std::str;
171
172    const URL_SAFE_NO_PAD_ENGINE: GeneralPurpose = GeneralPurpose::new(&alphabet::URL_SAFE, NO_PAD);
173
174    #[test]
175    fn encoded_size_correct_standard() {
176        assert_encoded_length(0, 0, &STANDARD, true);
177
178        assert_encoded_length(1, 4, &STANDARD, true);
179        assert_encoded_length(2, 4, &STANDARD, true);
180        assert_encoded_length(3, 4, &STANDARD, true);
181
182        assert_encoded_length(4, 8, &STANDARD, true);
183        assert_encoded_length(5, 8, &STANDARD, true);
184        assert_encoded_length(6, 8, &STANDARD, true);
185
186        assert_encoded_length(7, 12, &STANDARD, true);
187        assert_encoded_length(8, 12, &STANDARD, true);
188        assert_encoded_length(9, 12, &STANDARD, true);
189
190        assert_encoded_length(54, 72, &STANDARD, true);
191
192        assert_encoded_length(55, 76, &STANDARD, true);
193        assert_encoded_length(56, 76, &STANDARD, true);
194        assert_encoded_length(57, 76, &STANDARD, true);
195
196        assert_encoded_length(58, 80, &STANDARD, true);
197    }
198
199    #[test]
200    fn encoded_size_correct_no_pad() {
201        assert_encoded_length(0, 0, &URL_SAFE_NO_PAD_ENGINE, false);
202
203        assert_encoded_length(1, 2, &URL_SAFE_NO_PAD_ENGINE, false);
204        assert_encoded_length(2, 3, &URL_SAFE_NO_PAD_ENGINE, false);
205        assert_encoded_length(3, 4, &URL_SAFE_NO_PAD_ENGINE, false);
206
207        assert_encoded_length(4, 6, &URL_SAFE_NO_PAD_ENGINE, false);
208        assert_encoded_length(5, 7, &URL_SAFE_NO_PAD_ENGINE, false);
209        assert_encoded_length(6, 8, &URL_SAFE_NO_PAD_ENGINE, false);
210
211        assert_encoded_length(7, 10, &URL_SAFE_NO_PAD_ENGINE, false);
212        assert_encoded_length(8, 11, &URL_SAFE_NO_PAD_ENGINE, false);
213        assert_encoded_length(9, 12, &URL_SAFE_NO_PAD_ENGINE, false);
214
215        assert_encoded_length(54, 72, &URL_SAFE_NO_PAD_ENGINE, false);
216
217        assert_encoded_length(55, 74, &URL_SAFE_NO_PAD_ENGINE, false);
218        assert_encoded_length(56, 75, &URL_SAFE_NO_PAD_ENGINE, false);
219        assert_encoded_length(57, 76, &URL_SAFE_NO_PAD_ENGINE, false);
220
221        assert_encoded_length(58, 78, &URL_SAFE_NO_PAD_ENGINE, false);
222    }
223
224    #[test]
225    fn encoded_size_overflow() {
226        assert_eq!(None, encoded_len(usize::MAX, true));
227    }
228
229    #[test]
230    fn encode_engine_string_into_nonempty_buffer_doesnt_clobber_prefix() {
231        let mut orig_data = Vec::new();
232        let mut prefix = String::new();
233        let mut encoded_data_no_prefix = String::new();
234        let mut encoded_data_with_prefix = String::new();
235        let mut decoded = Vec::new();
236
237        let prefix_len_range = Uniform::new(0, 1000);
238        let input_len_range = Uniform::new(0, 1000);
239
240        let mut rng = rand::rngs::SmallRng::from_entropy();
241
242        for _ in 0..10_000 {
243            orig_data.clear();
244            prefix.clear();
245            encoded_data_no_prefix.clear();
246            encoded_data_with_prefix.clear();
247            decoded.clear();
248
249            let input_len = input_len_range.sample(&mut rng);
250
251            for _ in 0..input_len {
252                orig_data.push(rng.gen());
253            }
254
255            let prefix_len = prefix_len_range.sample(&mut rng);
256            for _ in 0..prefix_len {
257                // getting convenient random single-byte printable chars that aren't base64 is
258                // annoying
259                prefix.push('#');
260            }
261            encoded_data_with_prefix.push_str(&prefix);
262
263            let engine = random_engine(&mut rng);
264            engine.encode_string(&orig_data, &mut encoded_data_no_prefix);
265            engine.encode_string(&orig_data, &mut encoded_data_with_prefix);
266
267            assert_eq!(
268                encoded_data_no_prefix.len() + prefix_len,
269                encoded_data_with_prefix.len()
270            );
271            assert_encode_sanity(
272                &encoded_data_no_prefix,
273                engine.config().encode_padding(),
274                input_len,
275            );
276            assert_encode_sanity(
277                &encoded_data_with_prefix[prefix_len..],
278                engine.config().encode_padding(),
279                input_len,
280            );
281
282            // append plain encode onto prefix
283            prefix.push_str(&encoded_data_no_prefix);
284
285            assert_eq!(prefix, encoded_data_with_prefix);
286
287            engine
288                .decode_vec(&encoded_data_no_prefix, &mut decoded)
289                .unwrap();
290            assert_eq!(orig_data, decoded);
291        }
292    }
293
294    #[test]
295    fn encode_engine_slice_into_nonempty_buffer_doesnt_clobber_suffix() {
296        let mut orig_data = Vec::new();
297        let mut encoded_data = Vec::new();
298        let mut encoded_data_original_state = Vec::new();
299        let mut decoded = Vec::new();
300
301        let input_len_range = Uniform::new(0, 1000);
302
303        let mut rng = rand::rngs::SmallRng::from_entropy();
304
305        for _ in 0..10_000 {
306            orig_data.clear();
307            encoded_data.clear();
308            encoded_data_original_state.clear();
309            decoded.clear();
310
311            let input_len = input_len_range.sample(&mut rng);
312
313            for _ in 0..input_len {
314                orig_data.push(rng.gen());
315            }
316
317            // plenty of existing garbage in the encoded buffer
318            for _ in 0..10 * input_len {
319                encoded_data.push(rng.gen());
320            }
321
322            encoded_data_original_state.extend_from_slice(&encoded_data);
323
324            let engine = random_engine(&mut rng);
325
326            let encoded_size = encoded_len(input_len, engine.config().encode_padding()).unwrap();
327
328            assert_eq!(
329                encoded_size,
330                engine.encode_slice(&orig_data, &mut encoded_data).unwrap()
331            );
332
333            assert_encode_sanity(
334                str::from_utf8(&encoded_data[0..encoded_size]).unwrap(),
335                engine.config().encode_padding(),
336                input_len,
337            );
338
339            assert_eq!(
340                &encoded_data[encoded_size..],
341                &encoded_data_original_state[encoded_size..]
342            );
343
344            engine
345                .decode_vec(&encoded_data[0..encoded_size], &mut decoded)
346                .unwrap();
347            assert_eq!(orig_data, decoded);
348        }
349    }
350
351    #[test]
352    fn encode_to_slice_random_valid_utf8() {
353        let mut input = Vec::new();
354        let mut output = Vec::new();
355
356        let input_len_range = Uniform::new(0, 1000);
357
358        let mut rng = rand::rngs::SmallRng::from_entropy();
359
360        for _ in 0..10_000 {
361            input.clear();
362            output.clear();
363
364            let input_len = input_len_range.sample(&mut rng);
365
366            for _ in 0..input_len {
367                input.push(rng.gen());
368            }
369
370            let config = random_config(&mut rng);
371            let engine = random_engine(&mut rng);
372
373            // fill up the output buffer with garbage
374            let encoded_size = encoded_len(input_len, config.encode_padding()).unwrap();
375            for _ in 0..encoded_size {
376                output.push(rng.gen());
377            }
378
379            let orig_output_buf = output.clone();
380
381            let bytes_written = engine.internal_encode(&input, &mut output);
382
383            // make sure the part beyond bytes_written is the same garbage it was before
384            assert_eq!(orig_output_buf[bytes_written..], output[bytes_written..]);
385
386            // make sure the encoded bytes are UTF-8
387            let _ = str::from_utf8(&output[0..bytes_written]).unwrap();
388        }
389    }
390
391    #[test]
392    fn encode_with_padding_random_valid_utf8() {
393        let mut input = Vec::new();
394        let mut output = Vec::new();
395
396        let input_len_range = Uniform::new(0, 1000);
397
398        let mut rng = rand::rngs::SmallRng::from_entropy();
399
400        for _ in 0..10_000 {
401            input.clear();
402            output.clear();
403
404            let input_len = input_len_range.sample(&mut rng);
405
406            for _ in 0..input_len {
407                input.push(rng.gen());
408            }
409
410            let engine = random_engine(&mut rng);
411
412            // fill up the output buffer with garbage
413            let encoded_size = encoded_len(input_len, engine.config().encode_padding()).unwrap();
414            for _ in 0..encoded_size + 1000 {
415                output.push(rng.gen());
416            }
417
418            let orig_output_buf = output.clone();
419
420            encode_with_padding(&input, &mut output[0..encoded_size], &engine, encoded_size);
421
422            // make sure the part beyond b64 is the same garbage it was before
423            assert_eq!(orig_output_buf[encoded_size..], output[encoded_size..]);
424
425            // make sure the encoded bytes are UTF-8
426            let _ = str::from_utf8(&output[0..encoded_size]).unwrap();
427        }
428    }
429
430    #[test]
431    fn add_padding_random_valid_utf8() {
432        let mut output = Vec::new();
433
434        let mut rng = rand::rngs::SmallRng::from_entropy();
435
436        // cover our bases for length % 4
437        for unpadded_output_len in 0..20 {
438            output.clear();
439
440            // fill output with random
441            for _ in 0..100 {
442                output.push(rng.gen());
443            }
444
445            let orig_output_buf = output.clone();
446
447            let bytes_written = add_padding(unpadded_output_len, &mut output);
448
449            // make sure the part beyond bytes_written is the same garbage it was before
450            assert_eq!(orig_output_buf[bytes_written..], output[bytes_written..]);
451
452            // make sure the encoded bytes are UTF-8
453            let _ = str::from_utf8(&output[0..bytes_written]).unwrap();
454        }
455    }
456
457    fn assert_encoded_length<E: Engine>(
458        input_len: usize,
459        enc_len: usize,
460        engine: &E,
461        padded: bool,
462    ) {
463        assert_eq!(enc_len, encoded_len(input_len, padded).unwrap());
464
465        let mut bytes: Vec<u8> = Vec::new();
466        let mut rng = rand::rngs::SmallRng::from_entropy();
467
468        for _ in 0..input_len {
469            bytes.push(rng.gen());
470        }
471
472        let encoded = engine.encode(&bytes);
473        assert_encode_sanity(&encoded, padded, input_len);
474
475        assert_eq!(enc_len, encoded.len());
476    }
477
478    #[test]
479    fn encode_imap() {
480        assert_eq!(
481            &GeneralPurpose::new(&alphabet::IMAP_MUTF7, NO_PAD).encode(b"\xFB\xFF"),
482            &GeneralPurpose::new(&alphabet::STANDARD, NO_PAD)
483                .encode(b"\xFB\xFF")
484                .replace('/', ",")
485        );
486    }
487}