e_utils/algorithm/base64/
mod.rs

1mod decode;
2mod encode;
3mod tables;
4pub use decode::*;
5pub use encode::*;
6
7const PAD_BYTE: u8 = b'=';
8/// Standard character set with padding.
9pub const STANDARD: Config = Config {
10  char_set: CharacterSet::Standard,
11  pad: true,
12  decode_allow_trailing_bits: false,
13};
14/// Contains configuration parameters for base64 encoding
15#[derive(Clone, Copy, Debug)]
16pub struct Config {
17  /// Character set to use
18  char_set: CharacterSet,
19  /// True to pad output with `=` characters
20  pad: bool,
21  /// True to ignore excess nonzero bits in the last few symbols, otherwise an error is returned.
22  decode_allow_trailing_bits: bool,
23}
24
25impl Config {
26  /// Create a new `Config`.
27  pub const fn new(char_set: CharacterSet, pad: bool) -> Config {
28    Config {
29      char_set,
30      pad,
31      decode_allow_trailing_bits: false,
32    }
33  }
34
35  /// Sets whether to pad output with `=` characters.
36  pub const fn pad(self, pad: bool) -> Config {
37    Config { pad, ..self }
38  }
39
40  /// Sets whether to emit errors for nonzero trailing bits.
41  ///
42  /// This is useful when implementing
43  /// [forgiving-base64 decode](https://infra.spec.whatwg.org/#forgiving-base64-decode).
44  pub const fn decode_allow_trailing_bits(self, allow: bool) -> Config {
45    Config {
46      decode_allow_trailing_bits: allow,
47      ..self
48    }
49  }
50}
51
52/// Available encoding character sets
53#[derive(Clone, Copy, Debug)]
54pub enum CharacterSet {
55  /// The standard character set (uses `+` and `/`).
56  ///
57  /// See [RFC 3548](https://tools.ietf.org/html/rfc3548#section-3).
58  Standard,
59  /// The URL safe character set (uses `-` and `_`).
60  ///
61  /// See [RFC 3548](https://tools.ietf.org/html/rfc3548#section-4).
62  UrlSafe,
63  /// The `crypt(3)` character set (uses `./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz`).
64  ///
65  /// Not standardized, but folk wisdom on the net asserts that this alphabet is what crypt uses.
66  Crypt,
67  /// The bcrypt character set (uses `./ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789`).
68  Bcrypt,
69  /// The character set used in IMAP-modified UTF-7 (uses `+` and `,`).
70  ///
71  /// See [RFC 3501](https://tools.ietf.org/html/rfc3501#section-5.1.3)
72  ImapMutf7,
73  /// The character set used in BinHex 4.0 files.
74  ///
75  /// See [BinHex 4.0 Definition](http://files.stairways.com/other/binhex-40-specs-info.txt)
76  BinHex,
77}
78
79impl CharacterSet {
80  fn encode_table(self) -> &'static [u8; 64] {
81    match self {
82      CharacterSet::Standard => tables::STANDARD_ENCODE,
83      CharacterSet::UrlSafe => tables::URL_SAFE_ENCODE,
84      CharacterSet::Crypt => tables::CRYPT_ENCODE,
85      CharacterSet::Bcrypt => tables::BCRYPT_ENCODE,
86      CharacterSet::ImapMutf7 => tables::IMAP_MUTF7_ENCODE,
87      CharacterSet::BinHex => tables::BINHEX_ENCODE,
88    }
89  }
90
91  fn decode_table(self) -> &'static [u8; 256] {
92    match self {
93      CharacterSet::Standard => tables::STANDARD_DECODE,
94      CharacterSet::UrlSafe => tables::URL_SAFE_DECODE,
95      CharacterSet::Crypt => tables::CRYPT_DECODE,
96      CharacterSet::Bcrypt => tables::BCRYPT_DECODE,
97      CharacterSet::ImapMutf7 => tables::IMAP_MUTF7_DECODE,
98      CharacterSet::BinHex => tables::BINHEX_DECODE,
99    }
100  }
101}
102
103mod chunked_encoder {
104  use super::{add_padding, encode_to_slice, Config};
105  use core::cmp;
106
107  /// The output mechanism for ChunkedEncoder's encoded bytes.
108  pub(crate) trait Sink {
109    type Error;
110
111    /// Handle a chunk of encoded base64 data (as UTF-8 bytes)
112    fn write_encoded_bytes(&mut self, encoded: &[u8]) -> Result<(), Self::Error>;
113  }
114
115  const BUF_SIZE: usize = 1024;
116
117  /// A base64 encoder that emits encoded bytes in chunks without heap allocation.
118  pub(crate) struct ChunkedEncoder {
119    config: Config,
120    max_input_chunk_len: usize,
121  }
122
123  impl ChunkedEncoder {
124    pub(crate) fn new(config: Config) -> ChunkedEncoder {
125      ChunkedEncoder {
126        config,
127        max_input_chunk_len: max_input_length(BUF_SIZE, config),
128      }
129    }
130
131    pub(crate) fn encode<S: Sink>(&self, bytes: &[u8], sink: &mut S) -> Result<(), S::Error> {
132      let mut encode_buf: [u8; BUF_SIZE] = [0; BUF_SIZE];
133      let encode_table = self.config.char_set.encode_table();
134
135      let mut input_index = 0;
136
137      while input_index < bytes.len() {
138        // either the full input chunk size, or it's the last iteration
139        let input_chunk_len = cmp::min(self.max_input_chunk_len, bytes.len() - input_index);
140
141        let chunk = &bytes[input_index..(input_index + input_chunk_len)];
142
143        let mut b64_bytes_written = encode_to_slice(chunk, &mut encode_buf, encode_table);
144
145        input_index += input_chunk_len;
146        let more_input_left = input_index < bytes.len();
147
148        if self.config.pad && !more_input_left {
149          // no more input, add padding if needed. Buffer will have room because
150          // max_input_length leaves room for it.
151          b64_bytes_written += add_padding(bytes.len(), &mut encode_buf[b64_bytes_written..]);
152        }
153
154        sink.write_encoded_bytes(&encode_buf[0..b64_bytes_written])?;
155      }
156
157      Ok(())
158    }
159  }
160
161  /// Calculate the longest input that can be encoded for the given output buffer size.
162  ///
163  /// If the config requires padding, two bytes of buffer space will be set aside so that the last
164  /// chunk of input can be encoded safely.
165  ///
166  /// The input length will always be a multiple of 3 so that no encoding state has to be carried over
167  /// between chunks.
168  fn max_input_length(encoded_buf_len: usize, config: Config) -> usize {
169    let effective_buf_len = if config.pad {
170      // make room for padding
171      encoded_buf_len
172        .checked_sub(2)
173        .expect("Don't use a tiny buffer")
174    } else {
175      encoded_buf_len
176    };
177
178    // No padding, so just normal base64 expansion.
179    (effective_buf_len / 4) * 3
180  }
181
182  // A really simple sink that just appends to a string
183  #[cfg(any(feature = "alloc", feature = "std", test))]
184  pub(crate) struct StringSink<'a> {
185    string: &'a mut String,
186  }
187
188  #[cfg(any(feature = "alloc", feature = "std", test))]
189  impl<'a> StringSink<'a> {
190    pub(crate) fn new(s: &mut String) -> StringSink<'_> {
191      StringSink { string: s }
192    }
193  }
194
195  #[cfg(any(feature = "alloc", feature = "std", test))]
196  impl<'a> Sink for StringSink<'a> {
197    type Error = ();
198
199    fn write_encoded_bytes(&mut self, s: &[u8]) -> Result<(), Self::Error> {
200      self.string.push_str(std::str::from_utf8(s).unwrap());
201
202      Ok(())
203    }
204  }
205}