simple_base64/
alphabet.rs

1//! Provides [Alphabet] and constants for alphabets commonly used in the wild.
2
3use crate::PAD_BYTE;
4use core::{convert, fmt};
5#[cfg(any(feature = "std", test))]
6use std::error;
7
8const ALPHABET_SIZE: usize = 64;
9
10/// An alphabet defines the 64 ASCII characters (symbols) used for base64.
11///
12/// Common alphabets are provided as constants, and custom alphabets
13/// can be made via `from_str` or the `TryFrom<str>` implementation.
14///
15/// # Examples
16///
17/// Building and using a custom Alphabet:
18///
19/// ```
20/// let custom = base64::alphabet::Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/").unwrap();
21///
22/// let engine = base64::engine::GeneralPurpose::new(
23///     &custom,
24///     base64::engine::general_purpose::PAD);
25/// ```
26///
27/// Building a const:
28///
29/// ```
30/// use simple_base64::alphabet::Alphabet;
31///
32/// static CUSTOM: Alphabet = {
33///     // Result::unwrap() isn't const yet, but panic!() is OK
34///     match Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/") {
35///         Ok(x) => x,
36///         Err(_) => panic!("creation of alphabet failed"),
37///     }
38/// };
39/// ```
40///
41/// Building a lazy_static:
42///
43/// ```
44/// use simple_base64::{
45///     alphabet::Alphabet,
46///     engine::{general_purpose::GeneralPurpose, GeneralPurposeConfig},
47/// };
48///
49/// lazy_static::lazy_static! {
50///     static ref CUSTOM: Alphabet = Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/").unwrap();
51/// }
52/// ```
53#[derive(Clone, Debug, Eq, PartialEq)]
54pub struct Alphabet {
55    pub(crate) symbols: [u8; ALPHABET_SIZE],
56}
57
58impl Alphabet {
59    /// Performs no checks so that it can be const.
60    /// Used only for known-valid strings.
61    const fn from_str_unchecked(alphabet: &str) -> Self {
62        let mut symbols = [0_u8; ALPHABET_SIZE];
63        let source_bytes = alphabet.as_bytes();
64
65        // a way to copy that's allowed in const fn
66        let mut index = 0;
67        while index < ALPHABET_SIZE {
68            symbols[index] = source_bytes[index];
69            index += 1;
70        }
71
72        Self { symbols }
73    }
74
75    /// Create an `Alphabet` from a string of 64 unique printable ASCII bytes.
76    ///
77    /// The `=` byte is not allowed as it is used for padding.
78    pub const fn new(alphabet: &str) -> Result<Self, ParseAlphabetError> {
79        let bytes = alphabet.as_bytes();
80        if bytes.len() != ALPHABET_SIZE {
81            return Err(ParseAlphabetError::InvalidLength);
82        }
83
84        {
85            let mut index = 0;
86            while index < ALPHABET_SIZE {
87                let byte = bytes[index];
88
89                // must be ascii printable. 127 (DEL) is commonly considered printable
90                // for some reason but clearly unsuitable for base64.
91                if !(byte >= 32_u8 && byte <= 126_u8) {
92                    return Err(ParseAlphabetError::UnprintableByte(byte));
93                }
94                // = is assumed to be padding, so cannot be used as a symbol
95                if byte == PAD_BYTE {
96                    return Err(ParseAlphabetError::ReservedByte(byte));
97                }
98
99                // Check for duplicates while staying within what const allows.
100                // It's n^2, but only over 64 hot bytes, and only once, so it's likely in the single digit
101                // microsecond range.
102
103                let mut probe_index = 0;
104                while probe_index < ALPHABET_SIZE {
105                    if probe_index == index {
106                        probe_index += 1;
107                        continue;
108                    }
109
110                    let probe_byte = bytes[probe_index];
111
112                    if byte == probe_byte {
113                        return Err(ParseAlphabetError::DuplicatedByte(byte));
114                    }
115
116                    probe_index += 1;
117                }
118
119                index += 1;
120            }
121        }
122
123        Ok(Self::from_str_unchecked(alphabet))
124    }
125}
126
127impl convert::TryFrom<&str> for Alphabet {
128    type Error = ParseAlphabetError;
129
130    fn try_from(value: &str) -> Result<Self, Self::Error> {
131        Self::new(value)
132    }
133}
134
135/// Possible errors when constructing an [Alphabet] from a `str`.
136#[derive(Debug, Eq, PartialEq)]
137pub enum ParseAlphabetError {
138    /// Alphabets must be 64 ASCII bytes
139    InvalidLength,
140    /// All bytes must be unique
141    DuplicatedByte(u8),
142    /// All bytes must be printable (in the range `[32, 126]`).
143    UnprintableByte(u8),
144    /// `=` cannot be used
145    ReservedByte(u8),
146}
147
148impl fmt::Display for ParseAlphabetError {
149    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
150        match self {
151            Self::InvalidLength => write!(f, "Invalid length - must be 64 bytes"),
152            Self::DuplicatedByte(b) => write!(f, "Duplicated byte: {:#04x}", b),
153            Self::UnprintableByte(b) => write!(f, "Unprintable byte: {:#04x}", b),
154            Self::ReservedByte(b) => write!(f, "Reserved byte: {:#04x}", b),
155        }
156    }
157}
158
159#[cfg(any(feature = "std", test))]
160impl error::Error for ParseAlphabetError {}
161
162/// The standard alphabet (uses `+` and `/`).
163///
164/// See [RFC 3548](https://tools.ietf.org/html/rfc3548#section-3).
165pub const STANDARD: Alphabet = Alphabet::from_str_unchecked(
166    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/",
167);
168
169/// The URL safe alphabet (uses `-` and `_`).
170///
171/// See [RFC 3548](https://tools.ietf.org/html/rfc3548#section-4).
172pub const URL_SAFE: Alphabet = Alphabet::from_str_unchecked(
173    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_",
174);
175
176/// The `crypt(3)` alphabet (uses `.` and `/` as the first two values).
177///
178/// Not standardized, but folk wisdom on the net asserts that this alphabet is what crypt uses.
179pub const CRYPT: Alphabet = Alphabet::from_str_unchecked(
180    "./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
181);
182
183/// The bcrypt alphabet.
184pub const BCRYPT: Alphabet = Alphabet::from_str_unchecked(
185    "./ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789",
186);
187
188/// The alphabet used in IMAP-modified UTF-7 (uses `+` and `,`).
189///
190/// See [RFC 3501](https://tools.ietf.org/html/rfc3501#section-5.1.3)
191pub const IMAP_MUTF7: Alphabet = Alphabet::from_str_unchecked(
192    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,",
193);
194
195/// The alphabet used in BinHex 4.0 files.
196///
197/// See [BinHex 4.0 Definition](http://files.stairways.com/other/binhex-40-specs-info.txt)
198pub const BIN_HEX: Alphabet = Alphabet::from_str_unchecked(
199    "!\"#$%&'()*+,-0123456789@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdehijklmpqr",
200);
201
202#[cfg(test)]
203mod tests {
204    use crate::alphabet::*;
205    use core::convert::TryFrom as _;
206
207    #[test]
208    fn detects_duplicate_start() {
209        assert_eq!(
210            ParseAlphabetError::DuplicatedByte(b'A'),
211            Alphabet::new("AACDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
212                .unwrap_err()
213        );
214    }
215
216    #[test]
217    fn detects_duplicate_end() {
218        assert_eq!(
219            ParseAlphabetError::DuplicatedByte(b'/'),
220            Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789//")
221                .unwrap_err()
222        );
223    }
224
225    #[test]
226    fn detects_duplicate_middle() {
227        assert_eq!(
228            ParseAlphabetError::DuplicatedByte(b'Z'),
229            Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZZbcdefghijklmnopqrstuvwxyz0123456789+/")
230                .unwrap_err()
231        );
232    }
233
234    #[test]
235    fn detects_length() {
236        assert_eq!(
237            ParseAlphabetError::InvalidLength,
238            Alphabet::new(
239                "xxxxxxxxxABCDEFGHIJKLMNOPQRSTUVWXYZZbcdefghijklmnopqrstuvwxyz0123456789+/",
240            )
241            .unwrap_err()
242        );
243    }
244
245    #[test]
246    fn detects_padding() {
247        assert_eq!(
248            ParseAlphabetError::ReservedByte(b'='),
249            Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+=")
250                .unwrap_err()
251        );
252    }
253
254    #[test]
255    fn detects_unprintable() {
256        // form feed
257        assert_eq!(
258            ParseAlphabetError::UnprintableByte(0xc),
259            Alphabet::new("\x0cBCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
260                .unwrap_err()
261        );
262    }
263
264    #[test]
265    fn same_as_unchecked() {
266        assert_eq!(
267            STANDARD,
268            Alphabet::try_from("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
269                .unwrap()
270        );
271    }
272}