irox_tools/util/
base64.rs

1// SPDX-License-Identifier: MIT
2// Copyright 2023 IROX Contributors
3//
4
5//!
6//! RFC-4648 Compliant Base64, Base32, and Base16 encoders and decoders
7//!
8extern crate alloc;
9use crate::codec::Codec;
10use alloc::collections::BTreeMap;
11use irox_bits::{Bits, Error, ErrorKind, MutBits};
12
13/// `A-Z,a-z,0-9,+,/` - not filesystem or URL-safe
14pub static BASE64_ALPHABET: [u8; 64] = [
15    b'A', b'B', b'C', b'D', b'E', b'F', b'G', b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O', b'P',
16    b'Q', b'R', b'S', b'T', b'U', b'V', b'W', b'X', b'Y', b'Z', b'a', b'b', b'c', b'd', b'e', b'f',
17    b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's', b't', b'u', b'v',
18    b'w', b'x', b'y', b'z', b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9', b'+', b'/',
19];
20/// `A-Z,a-z,0-9,-,_` - filesystem and URL-safe
21pub static BASE64URL_ALPHABET: [u8; 64] = [
22    b'A', b'B', b'C', b'D', b'E', b'F', b'G', b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O', b'P',
23    b'Q', b'R', b'S', b'T', b'U', b'V', b'W', b'X', b'Y', b'Z', b'a', b'b', b'c', b'd', b'e', b'f',
24    b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's', b't', b'u', b'v',
25    b'w', b'x', b'y', b'z', b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9', b'-', b'_',
26];
27/// `A-Z,2-7` - filesystem and URL-safe
28pub static BASE32_ALPHABET: [u8; 32] = [
29    b'A', b'B', b'C', b'D', b'E', b'F', b'G', b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O', b'P',
30    b'Q', b'R', b'S', b'T', b'U', b'V', b'W', b'X', b'Y', b'Z', b'2', b'3', b'4', b'5', b'6', b'7',
31];
32/// `0-9,A-V` - "Extended Hex", filesystem and URL-safe
33pub static BASE32HEX_ALPHABET: [u8; 32] = [
34    b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9', b'A', b'B', b'C', b'D', b'E', b'F',
35    b'G', b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O', b'P', b'Q', b'R', b'S', b'T', b'U', b'V',
36];
37/// `0-9,A-F` - Standard Hex.
38pub static BASE16_ALPHABET: [u8; 16] = [
39    b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9', b'A', b'B', b'C', b'D', b'E', b'F',
40];
41
42macro_rules! getalpha {
43    ($alpha:ident,$idx:tt) => {
44        $alpha.get($idx).map(|v| *v).unwrap_or_default()
45    };
46}
47///
48/// A Six Bit Codec encodes 3x eight-bit bytes (24 bits) into 4x six-bit symbols.  Likewise, reads
49/// 4x six-bit symbols and decodes them into 3x eight-bit bytes.
50///
51/// The alphabet size is `2^6 = 64` symbols.
52pub struct SixBitCodec {
53    alphabet: &'static [u8; 64],
54    reverse: BTreeMap<u8, u8>,
55    fail_on_invalid_decode: bool,
56    pad: u8,
57}
58impl SixBitCodec {
59    /// Creates a new codec, using the provided alphabet.
60    pub fn new(alphabet: &'static [u8; 64]) -> Self {
61        let reverse: BTreeMap<u8, u8> = alphabet
62            .iter()
63            .enumerate()
64            .map(|(idx, v)| (*v, idx as u8))
65            .collect::<BTreeMap<_, _>>();
66        SixBitCodec {
67            alphabet,
68            reverse,
69            fail_on_invalid_decode: false,
70            pad: b'=',
71        }
72    }
73    /// sets the end padding character (defaults to `'='`)
74    pub fn set_pad(&mut self, pad: u8) -> &mut Self {
75        self.pad = pad;
76        self
77    }
78    /// if set, decoding will return an error on invalid character - otherwise will just skip it.
79    pub fn set_fail_on_invalid_character(&mut self) -> &mut Self {
80        self.fail_on_invalid_decode = true;
81        self
82    }
83}
84impl Codec for SixBitCodec {
85    fn encode<I: Bits, O: MutBits>(&self, mut input: I, output: &mut O) -> Result<usize, Error> {
86        let mut buf: u32 = 0;
87        let mut ctr = 0;
88        let mut written = 0;
89        let alpha = self.alphabet;
90        loop {
91            let Some(v) = input.next_u8()? else {
92                break;
93            };
94
95            buf <<= 8;
96            buf |= v as u32;
97            ctr += 1;
98            if ctr == 3 {
99                let a = ((buf & 0xFC_0000) >> 18) as usize;
100                let b = ((buf & 0x03_F000) >> 12) as usize;
101                let c = ((buf & 0x00_0FC0) >> 6) as usize;
102                let d = (buf & 0x00_003F) as usize;
103                output.write_all_bytes(&[
104                    getalpha!(alpha, a),
105                    getalpha!(alpha, b),
106                    getalpha!(alpha, c),
107                    getalpha!(alpha, d),
108                ])?;
109                ctr = 0;
110                buf = 0;
111                written += 4;
112            }
113        }
114        if ctr == 2 {
115            buf <<= 2;
116            let a = ((buf & 0x03_F000) >> 12) as usize;
117            let b = ((buf & 0x00_0FC0) >> 6) as usize;
118            let c = (buf & 0x00_003F) as usize;
119            output.write_all_bytes(&[
120                getalpha!(alpha, a),
121                getalpha!(alpha, b),
122                getalpha!(alpha, c),
123                self.pad,
124            ])?;
125            written += 4;
126        } else if ctr == 1 {
127            buf <<= 4;
128            let a = ((buf & 0xFC0) >> 6) as usize;
129            let b = (buf & 0x030) as usize;
130            output.write_all_bytes(&[
131                getalpha!(alpha, a),
132                getalpha!(alpha, b),
133                self.pad,
134                self.pad,
135            ])?;
136            written += 4;
137        }
138        Ok(written)
139    }
140
141    fn decode<I: Bits, O: MutBits>(&self, mut input: I, output: &mut O) -> Result<usize, Error> {
142        let mut buf: u32 = 0;
143        let mut ctr = 0;
144        let mut written = 0;
145        loop {
146            let Some(var) = input.next_u8()? else {
147                break;
148            };
149            if var == self.pad {
150                continue;
151            }
152            let Some(dec) = self.reverse.get(&var) else {
153                if self.fail_on_invalid_decode {
154                    return Err(ErrorKind::InvalidData.into());
155                }
156                continue;
157            };
158            buf <<= 6;
159            buf |= *dec as u32;
160            ctr += 1;
161            if ctr == 4 {
162                let [_, a, b, c] = buf.to_be_bytes();
163                output.write_all_bytes(&[a, b, c])?;
164                ctr = 0;
165                buf = 0;
166                written += 3;
167            }
168        }
169        if ctr == 3 {
170            buf >>= 2;
171            output.write_be_u16((buf & 0xFFFF) as u16)?;
172            written += 2;
173        } else if ctr == 2 {
174            // write 1
175            buf >>= 4;
176            output.write_u8((buf & 0xFF) as u8)?;
177            written += 1;
178        } else if ctr == 1 {
179            // invalid!
180        }
181
182        Ok(written)
183    }
184}
185/// Creates and returns a [`SixBitCodec`] compliant with the RFC4648 "Base64" standard alphabet
186/// ([`BASE64_ALPHABET`]) - this alphabet contains characters inconsistent with URLs and Filenames.
187pub fn new_base64_codec() -> SixBitCodec {
188    SixBitCodec::new(&BASE64_ALPHABET)
189}
190/// Creates and returns a [`SixBitCodec`] compliant with the RFC4648 "Base64 URL" standard alphabet
191/// ([`BASE64URL_ALPHABET`]) - this alphabet contains characters compatible with URLs and Filenames.
192pub fn new_base64_safe_codec() -> SixBitCodec {
193    SixBitCodec::new(&BASE64URL_ALPHABET)
194}
195
196/// Encodes the provided the input, writing the encoding to output, using the standard RFC-4648
197/// [`BASE64_ALPHABET`], upon success, returns the number of bytes written out
198pub fn base64_encode<I: Bits, O: MutBits>(input: I, output: &mut O) -> Result<usize, Error> {
199    new_base64_codec().encode(input, output)
200}
201/// Decodes the provided the input, writing the decoded data to output, using the standard RFC-4648
202/// [`BASE64_ALPHABET`], upon success, returns the number of bytes written out
203pub fn base64_decode<I: Bits, O: MutBits>(input: I, output: &mut O) -> Result<usize, Error> {
204    new_base64_codec().decode(input, output)
205}
206crate::cfg_feature_alloc! {
207    /// Encodes the provided input to a string, using the standard RFC-4648 [`BASE64_ALPHABET`]
208    pub fn base64_encode_to_str<I: Bits>(input: I) -> Result<alloc::string::String, Error> {
209        new_base64_codec().encode_to_str(input)
210    }
211}
212crate::cfg_feature_alloc! {
213    /// Decodes the provided input to a string, using the standard RFC-4648 [`BASE64_ALPHABET`], dropping
214    /// any characters that aren't UTF-8.
215    pub fn base64_decode_to_str_lossy<I: Bits>(input: I) -> Result<alloc::string::String, Error> {
216        new_base64_codec().decode_to_str_lossy(input)
217    }
218}
219
220/// Encodes the provided the input, writing the encoding to output, using the filesystem and
221/// URL-safe RFC-4648 [`BASE64URL_ALPHABET`], , upon success, returns the number of bytes written out
222pub fn base64_encode_safe<I: Bits, O: MutBits>(input: I, output: &mut O) -> Result<usize, Error> {
223    new_base64_safe_codec().encode(input, output)
224}
225/// Decodes the provided the input, writing the decoded data to output, using the filesystem and
226/// URL-safe RFC-4648 [`BASE64URL_ALPHABET`], upon success, returns the number of bytes written out
227pub fn base64_decode_safe<I: Bits, O: MutBits>(input: I, output: &mut O) -> Result<usize, Error> {
228    new_base64_safe_codec().decode(input, output)
229}
230crate::cfg_feature_alloc! {
231    /// Encodes the provided input to a string, using the using the filesystem and
232    /// URL-safe RFC-4648 [`BASE64URL_ALPHABET`]
233    pub fn base64_encode_safe_to_str<I: Bits>(input: I) -> Result<alloc::string::String, Error> {
234        new_base64_safe_codec().encode_to_str(input)
235    }
236}
237crate::cfg_feature_alloc! {
238    /// Decodes the provided the input, to a string, using the filesystem and URL-safe RFC-4648
239    /// [`BASE64URL_ALPHABET`], any characters not valid UTF-8 are dropped.
240    pub fn base64_decode_safe_to_str_lossy<I: Bits>(input: I) -> Result<alloc::string::String, Error> {
241        new_base64_safe_codec().decode_to_str_lossy(input)
242    }
243}
244
245#[cfg(test)]
246#[cfg(feature = "std")]
247mod tests {
248    use crate::base64::new_base64_codec;
249    use crate::codec::Codec;
250
251    #[allow(clippy::panic_in_result_fn)]
252    #[test]
253    pub fn base64_tests() -> Result<(), std::io::Error> {
254        let codec = new_base64_codec();
255        let tests: [(&str, &str); 7] = [
256            ("", ""),
257            ("f", "Zg=="),
258            ("fo", "Zm8="),
259            ("foo", "Zm9v"),
260            ("foob", "Zm9vYg=="),
261            ("fooba", "Zm9vYmE="),
262            ("foobar", "Zm9vYmFy"),
263        ];
264        for (i, o) in tests {
265            assert_eq!(o, codec.encode_to_str(i.as_bytes())?);
266            assert_eq!(i, codec.decode_to_str_lossy(o.as_bytes())?);
267        }
268
269        Ok(())
270    }
271}