e_utils/algorithm/base64/
encode.rs

1use core::convert::TryInto;
2use super::{STANDARD, Config, chunked_encoder, PAD_BYTE};
3
4///Encode arbitrary octets as base64.
5///Returns a String.
6///Convenience for `encode_config(input, base64::STANDARD);`.
7///
8///# Example
9///
10///```rust
11///extern crate base64;
12///
13///fn main() {
14///    let b64 = base64::encode(b"hello world");
15///    println!("{}", b64);
16///}
17///```
18pub fn encode<T: AsRef<[u8]>>(input: T) -> String {
19    encode_config(input, STANDARD)
20}
21
22///Encode arbitrary octets as base64.
23///Returns a String.
24///
25///# Example
26///
27///```rust
28///extern crate base64;
29///
30///fn main() {
31///    let b64 = base64::encode_config(b"hello world~", base64::STANDARD);
32///    println!("{}", b64);
33///
34///    let b64_url = base64::encode_config(b"hello internet~", base64::URL_SAFE);
35///    println!("{}", b64_url);
36///}
37///```
38pub fn encode_config<T: AsRef<[u8]>>(input: T, config: Config) -> String {
39    let mut buf = match encoded_size(input.as_ref().len(), config) {
40        Some(n) => vec![0; n],
41        None => panic!("integer overflow when calculating buffer size"),
42    };
43
44    encode_with_padding(input.as_ref(), config, buf.len(), &mut buf[..]);
45
46    String::from_utf8(buf).expect("Invalid UTF8")
47}
48
49///Encode arbitrary octets as base64.
50///Writes into the supplied output buffer, which will grow the buffer if needed.
51///
52///# Example
53///
54///```rust
55///extern crate base64;
56///
57///fn main() {
58///    let mut buf = String::new();
59///    base64::encode_config_buf(b"hello world~", base64::STANDARD, &mut buf);
60///    println!("{}", buf);
61///
62///    buf.clear();
63///    base64::encode_config_buf(b"hello internet~", base64::URL_SAFE, &mut buf);
64///    println!("{}", buf);
65///}
66///```
67pub fn encode_config_buf<T: AsRef<[u8]>>(input: T, config: Config, buf: &mut String) {
68    let input_bytes = input.as_ref();
69
70    {
71        let mut sink = chunked_encoder::StringSink::new(buf);
72        let encoder = chunked_encoder::ChunkedEncoder::new(config);
73
74        encoder
75            .encode(input_bytes, &mut sink)
76            .expect("Writing to a String shouldn't fail")
77    }
78}
79
80/// Encode arbitrary octets as base64.
81/// Writes into the supplied output buffer.
82///
83/// This is useful if you wish to avoid allocation entirely (e.g. encoding into a stack-resident
84/// or statically-allocated buffer).
85///
86/// # Panics
87///
88/// If `output` is too small to hold the encoded version of `input`, a panic will result.
89///
90/// # Example
91///
92/// ```rust
93/// extern crate base64;
94///
95/// fn main() {
96///     let s = b"hello internet!";
97///     let mut buf = Vec::new();
98///     // make sure we'll have a slice big enough for base64 + padding
99///     buf.resize(s.len() * 4 / 3 + 4, 0);
100///
101///     let bytes_written = base64::encode_config_slice(s,
102///                             base64::STANDARD, &mut buf);
103///
104///     // shorten our vec down to just what was written
105///     buf.resize(bytes_written, 0);
106///
107///     assert_eq!(s, base64::decode(&buf).unwrap().as_slice());
108/// }
109/// ```
110pub fn encode_config_slice<T: AsRef<[u8]>>(input: T, config: Config, output: &mut [u8]) -> usize {
111    let input_bytes = input.as_ref();
112
113    let encoded_size = encoded_size(input_bytes.len(), config)
114        .expect("usize overflow when calculating buffer size");
115
116    let mut b64_output = &mut output[0..encoded_size];
117
118    encode_with_padding(&input_bytes, config, encoded_size, &mut b64_output);
119
120    encoded_size
121}
122
123/// B64-encode and pad (if configured).
124///
125/// This helper exists to avoid recalculating encoded_size, which is relatively expensive on short
126/// inputs.
127///
128/// `encoded_size` is the encoded size calculated for `input`.
129///
130/// `output` must be of size `encoded_size`.
131///
132/// All bytes in `output` will be written to since it is exactly the size of the output.
133fn encode_with_padding(input: &[u8], config: Config, encoded_size: usize, output: &mut [u8]) {
134    debug_assert_eq!(encoded_size, output.len());
135
136    let b64_bytes_written = encode_to_slice(input, output, config.char_set.encode_table());
137
138    let padding_bytes = if config.pad {
139        add_padding(input.len(), &mut output[b64_bytes_written..])
140    } else {
141        0
142    };
143
144    let encoded_bytes = b64_bytes_written
145        .checked_add(padding_bytes)
146        .expect("usize overflow when calculating b64 length");
147
148    debug_assert_eq!(encoded_size, encoded_bytes);
149}
150
151#[inline]
152fn read_u64(s: &[u8]) -> u64 {
153    u64::from_be_bytes(s[..8].try_into().unwrap())
154}
155
156/// Encode input bytes to utf8 base64 bytes. Does not pad.
157/// `output` must be long enough to hold the encoded `input` without padding.
158/// Returns the number of bytes written.
159#[inline]
160pub fn encode_to_slice(input: &[u8], output: &mut [u8], encode_table: &[u8; 64]) -> usize {
161    let mut input_index: usize = 0;
162
163    const BLOCKS_PER_FAST_LOOP: usize = 4;
164    const LOW_SIX_BITS: u64 = 0x3F;
165
166    // we read 8 bytes at a time (u64) but only actually consume 6 of those bytes. Thus, we need
167    // 2 trailing bytes to be available to read..
168    let last_fast_index = input.len().saturating_sub(BLOCKS_PER_FAST_LOOP * 6 + 2);
169    let mut output_index = 0;
170
171    if last_fast_index > 0 {
172        while input_index <= last_fast_index {
173            // Major performance wins from letting the optimizer do the bounds check once, mostly
174            // on the output side
175            let input_chunk = &input[input_index..(input_index + (BLOCKS_PER_FAST_LOOP * 6 + 2))];
176            let output_chunk = &mut output[output_index..(output_index + BLOCKS_PER_FAST_LOOP * 8)];
177
178            // Hand-unrolling for 32 vs 16 or 8 bytes produces yields performance about equivalent
179            // to unsafe pointer code on a Xeon E5-1650v3. 64 byte unrolling was slightly better for
180            // large inputs but significantly worse for 50-byte input, unsurprisingly. I suspect
181            // that it's a not uncommon use case to encode smallish chunks of data (e.g. a 64-byte
182            // SHA-512 digest), so it would be nice if that fit in the unrolled loop at least once.
183            // Plus, single-digit percentage performance differences might well be quite different
184            // on different hardware.
185
186            let input_u64 = read_u64(&input_chunk[0..]);
187
188            output_chunk[0] = encode_table[((input_u64 >> 58) & LOW_SIX_BITS) as usize];
189            output_chunk[1] = encode_table[((input_u64 >> 52) & LOW_SIX_BITS) as usize];
190            output_chunk[2] = encode_table[((input_u64 >> 46) & LOW_SIX_BITS) as usize];
191            output_chunk[3] = encode_table[((input_u64 >> 40) & LOW_SIX_BITS) as usize];
192            output_chunk[4] = encode_table[((input_u64 >> 34) & LOW_SIX_BITS) as usize];
193            output_chunk[5] = encode_table[((input_u64 >> 28) & LOW_SIX_BITS) as usize];
194            output_chunk[6] = encode_table[((input_u64 >> 22) & LOW_SIX_BITS) as usize];
195            output_chunk[7] = encode_table[((input_u64 >> 16) & LOW_SIX_BITS) as usize];
196
197            let input_u64 = read_u64(&input_chunk[6..]);
198
199            output_chunk[8] = encode_table[((input_u64 >> 58) & LOW_SIX_BITS) as usize];
200            output_chunk[9] = encode_table[((input_u64 >> 52) & LOW_SIX_BITS) as usize];
201            output_chunk[10] = encode_table[((input_u64 >> 46) & LOW_SIX_BITS) as usize];
202            output_chunk[11] = encode_table[((input_u64 >> 40) & LOW_SIX_BITS) as usize];
203            output_chunk[12] = encode_table[((input_u64 >> 34) & LOW_SIX_BITS) as usize];
204            output_chunk[13] = encode_table[((input_u64 >> 28) & LOW_SIX_BITS) as usize];
205            output_chunk[14] = encode_table[((input_u64 >> 22) & LOW_SIX_BITS) as usize];
206            output_chunk[15] = encode_table[((input_u64 >> 16) & LOW_SIX_BITS) as usize];
207
208            let input_u64 = read_u64(&input_chunk[12..]);
209
210            output_chunk[16] = encode_table[((input_u64 >> 58) & LOW_SIX_BITS) as usize];
211            output_chunk[17] = encode_table[((input_u64 >> 52) & LOW_SIX_BITS) as usize];
212            output_chunk[18] = encode_table[((input_u64 >> 46) & LOW_SIX_BITS) as usize];
213            output_chunk[19] = encode_table[((input_u64 >> 40) & LOW_SIX_BITS) as usize];
214            output_chunk[20] = encode_table[((input_u64 >> 34) & LOW_SIX_BITS) as usize];
215            output_chunk[21] = encode_table[((input_u64 >> 28) & LOW_SIX_BITS) as usize];
216            output_chunk[22] = encode_table[((input_u64 >> 22) & LOW_SIX_BITS) as usize];
217            output_chunk[23] = encode_table[((input_u64 >> 16) & LOW_SIX_BITS) as usize];
218
219            let input_u64 = read_u64(&input_chunk[18..]);
220
221            output_chunk[24] = encode_table[((input_u64 >> 58) & LOW_SIX_BITS) as usize];
222            output_chunk[25] = encode_table[((input_u64 >> 52) & LOW_SIX_BITS) as usize];
223            output_chunk[26] = encode_table[((input_u64 >> 46) & LOW_SIX_BITS) as usize];
224            output_chunk[27] = encode_table[((input_u64 >> 40) & LOW_SIX_BITS) as usize];
225            output_chunk[28] = encode_table[((input_u64 >> 34) & LOW_SIX_BITS) as usize];
226            output_chunk[29] = encode_table[((input_u64 >> 28) & LOW_SIX_BITS) as usize];
227            output_chunk[30] = encode_table[((input_u64 >> 22) & LOW_SIX_BITS) as usize];
228            output_chunk[31] = encode_table[((input_u64 >> 16) & LOW_SIX_BITS) as usize];
229
230            output_index += BLOCKS_PER_FAST_LOOP * 8;
231            input_index += BLOCKS_PER_FAST_LOOP * 6;
232        }
233    }
234
235    // Encode what's left after the fast loop.
236
237    const LOW_SIX_BITS_U8: u8 = 0x3F;
238
239    let rem = input.len() % 3;
240    let start_of_rem = input.len() - rem;
241
242    // start at the first index not handled by fast loop, which may be 0.
243
244    while input_index < start_of_rem {
245        let input_chunk = &input[input_index..(input_index + 3)];
246        let output_chunk = &mut output[output_index..(output_index + 4)];
247
248        output_chunk[0] = encode_table[(input_chunk[0] >> 2) as usize];
249        output_chunk[1] =
250            encode_table[((input_chunk[0] << 4 | input_chunk[1] >> 4) & LOW_SIX_BITS_U8) as usize];
251        output_chunk[2] =
252            encode_table[((input_chunk[1] << 2 | input_chunk[2] >> 6) & LOW_SIX_BITS_U8) as usize];
253        output_chunk[3] = encode_table[(input_chunk[2] & LOW_SIX_BITS_U8) as usize];
254
255        input_index += 3;
256        output_index += 4;
257    }
258
259    if rem == 2 {
260        output[output_index] = encode_table[(input[start_of_rem] >> 2) as usize];
261        output[output_index + 1] = encode_table[((input[start_of_rem] << 4
262            | input[start_of_rem + 1] >> 4)
263            & LOW_SIX_BITS_U8) as usize];
264        output[output_index + 2] =
265            encode_table[((input[start_of_rem + 1] << 2) & LOW_SIX_BITS_U8) as usize];
266        output_index += 3;
267    } else if rem == 1 {
268        output[output_index] = encode_table[(input[start_of_rem] >> 2) as usize];
269        output[output_index + 1] =
270            encode_table[((input[start_of_rem] << 4) & LOW_SIX_BITS_U8) as usize];
271        output_index += 2;
272    }
273
274    output_index
275}
276
277/// calculate the base64 encoded string size, including padding if appropriate
278pub fn encoded_size(bytes_len: usize, config: Config) -> Option<usize> {
279    let rem = bytes_len % 3;
280
281    let complete_input_chunks = bytes_len / 3;
282    let complete_chunk_output = complete_input_chunks.checked_mul(4);
283
284    if rem > 0 {
285        if config.pad {
286            complete_chunk_output.and_then(|c| c.checked_add(4))
287        } else {
288            let encoded_rem = match rem {
289                1 => 2,
290                2 => 3,
291                _ => unreachable!("Impossible remainder"),
292            };
293            complete_chunk_output.and_then(|c| c.checked_add(encoded_rem))
294        }
295    } else {
296        complete_chunk_output
297    }
298}
299
300/// Write padding characters.
301/// `output` is the slice where padding should be written, of length at least 2.
302///
303/// Returns the number of padding bytes written.
304pub fn add_padding(input_len: usize, output: &mut [u8]) -> usize {
305    let rem = input_len % 3;
306    let mut bytes_written = 0;
307    for _ in 0..((3 - rem) % 3) {
308        output[bytes_written] = PAD_BYTE;
309        bytes_written += 1;
310    }
311
312    bytes_written
313}