Skip to main content

base64_ng/
alphabet.rs

1//! Base64 alphabets and custom alphabet validation.
2
3use crate::{ct_mask_eq_u8, ct_mask_lt_u8};
4
5/// Alphabet validation error.
6#[derive(Clone, Copy, Debug, Eq, PartialEq)]
7pub enum AlphabetError {
8    /// The alphabet contains a non-visible-ASCII byte.
9    InvalidByte {
10        /// Byte index in the alphabet table.
11        index: usize,
12        /// Invalid byte value.
13        byte: u8,
14    },
15    /// The alphabet contains the padding byte `=`.
16    PaddingByte {
17        /// Byte index in the alphabet table.
18        index: usize,
19    },
20    /// The alphabet maps more than one value to the same byte.
21    DuplicateByte {
22        /// First byte index.
23        first: usize,
24        /// Second byte index.
25        second: usize,
26        /// Duplicated byte value.
27        byte: u8,
28    },
29}
30
31impl core::fmt::Display for AlphabetError {
32    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
33        match self {
34            Self::InvalidByte { index, byte } => {
35                write!(
36                    f,
37                    "invalid base64 alphabet byte 0x{byte:02x} at index {index}"
38                )
39            }
40            Self::PaddingByte { index } => {
41                write!(f, "base64 alphabet contains padding byte at index {index}")
42            }
43            Self::DuplicateByte {
44                first,
45                second,
46                byte,
47            } => write!(
48                f,
49                "base64 alphabet byte 0x{byte:02x} is duplicated at indexes {first} and {second}"
50            ),
51        }
52    }
53}
54
55#[cfg(feature = "std")]
56impl std::error::Error for AlphabetError {}
57
58/// Defines a custom [`Alphabet`] from a 64-byte string literal.
59///
60/// The generated alphabet is validated at compile time with
61/// [`validate_alphabet`]. Invalid, duplicate, or padding bytes fail the build
62/// instead of creating a malformed runtime profile.
63///
64/// The generated implementation uses the conservative default
65/// [`Alphabet::encode`] behavior: every emitted Base64 byte performs a fixed
66/// 64-entry scan to avoid secret-indexed table lookups. Built-in alphabets use
67/// optimized arithmetic mappers.
68///
69/// The generated [`Alphabet::decode`] implementation delegates to
70/// [`decode_alphabet_byte`]. The constant-time-oriented [`ct`](crate::ct)
71/// module scans the generated `ENCODE` table directly and does not call the
72/// generated `decode` method.
73///
74/// # Examples
75///
76/// ```
77/// base64_ng::define_alphabet! {
78///     struct DotSlash = b"./ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
79/// }
80///
81/// let engine = base64_ng::Engine::<DotSlash, false>::new();
82/// let mut encoded = [0u8; 4];
83/// let written = engine.encode_slice(&[0xff, 0xff, 0xff], &mut encoded).unwrap();
84/// assert_eq!(&encoded[..written], b"9999");
85/// ```
86///
87/// Invalid alphabets fail during compilation:
88///
89/// ```compile_fail
90/// base64_ng::define_alphabet! {
91///     struct Bad = b"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA";
92/// }
93/// ```
94#[macro_export]
95macro_rules! define_alphabet {
96    ($(#[$meta:meta])* $vis:vis struct $name:ident = $encode:expr;) => {
97        $(#[$meta])*
98        #[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
99        $vis struct $name;
100
101        impl $crate::Alphabet for $name {
102            const ENCODE: [u8; 64] = *$encode;
103
104            #[inline]
105            fn decode(byte: u8) -> Option<u8> {
106                $crate::decode_alphabet_byte(byte, &Self::ENCODE)
107            }
108        }
109
110        const _: [(); 1] = [(); match $crate::validate_alphabet(
111            &<$name as $crate::Alphabet>::ENCODE,
112        ) {
113            Ok(()) => 1,
114            Err(_) => 0,
115        }];
116    };
117}
118
119/// Validates a 64-byte Base64 alphabet table.
120///
121/// A valid alphabet must contain exactly 64 unique visible ASCII bytes and must
122/// not contain the padding byte `=`.
123///
124/// # Examples
125///
126/// ```
127/// use base64_ng::{Alphabet, Standard, validate_alphabet};
128///
129/// validate_alphabet(&Standard::ENCODE).unwrap();
130/// ```
131pub const fn validate_alphabet(encode: &[u8; 64]) -> Result<(), AlphabetError> {
132    let mut index = 0;
133    while index < encode.len() {
134        let byte = encode[index];
135        if !is_visible_ascii(byte) {
136            return Err(AlphabetError::InvalidByte { index, byte });
137        }
138        if byte == b'=' {
139            return Err(AlphabetError::PaddingByte { index });
140        }
141
142        let mut duplicate = index + 1;
143        while duplicate < encode.len() {
144            if encode[duplicate] == byte {
145                return Err(AlphabetError::DuplicateByte {
146                    first: index,
147                    second: duplicate,
148                    byte,
149                });
150            }
151            duplicate += 1;
152        }
153
154        index += 1;
155    }
156
157    Ok(())
158}
159
160/// Decodes one byte by scanning a caller-provided alphabet table.
161///
162/// This helper is intended for custom [`Alphabet`] implementations. Validate
163/// the table with [`validate_alphabet`] before trusting the alphabet in a
164/// protocol or public API. The scan always visits all 64 entries before
165/// returning so the match position does not create an early-return timing
166/// signal in custom alphabet decoders.
167///
168/// # Examples
169///
170/// ```
171/// use base64_ng::{Alphabet, decode_alphabet_byte};
172///
173/// struct DotSlash;
174///
175/// impl Alphabet for DotSlash {
176///     const ENCODE: [u8; 64] =
177///         *b"./ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
178///
179///     fn decode(byte: u8) -> Option<u8> {
180///         decode_alphabet_byte(byte, &Self::ENCODE)
181///     }
182/// }
183///
184/// assert_eq!(DotSlash::decode(b'.'), Some(0));
185/// assert_eq!(DotSlash::decode(b'9'), Some(63));
186/// ```
187#[must_use]
188pub const fn decode_alphabet_byte(byte: u8, encode: &[u8; 64]) -> Option<u8> {
189    let mut index = 0;
190    let mut candidate = 0;
191    let mut decoded = 0;
192    let mut valid = 0;
193    while index < encode.len() {
194        let matches = ct_mask_eq_u8(byte, encode[index]);
195        decoded |= candidate & matches;
196        valid |= matches;
197        index += 1;
198        candidate += 1;
199    }
200
201    if valid == 0 { None } else { Some(decoded) }
202}
203
204/// A Base64 alphabet.
205///
206/// # Security
207///
208/// The default [`Alphabet::encode`] implementation is constant-time-oriented:
209/// it scans all 64 alphabet entries instead of using `ENCODE[value as usize]`.
210/// If an implementation overrides `encode` with a direct table lookup, normal
211/// [`Engine`](crate::Engine) encoding becomes timing-sensitive with respect to
212/// the emitted 6-bit value.
213///
214/// The normal strict decode path calls [`Alphabet::decode`] and is not a
215/// constant-time decoder. The [`ct`](crate::ct) module does not call
216/// [`Alphabet::decode`]; it scans [`Alphabet::ENCODE`] directly with its own
217/// fixed 64-entry mapper. A custom non-constant-time `decode` implementation
218/// therefore affects normal strict decode diagnostics and timing, but not the
219/// `ct` module's symbol-mapping loop.
220pub trait Alphabet {
221    /// Encoding table indexed by 6-bit values.
222    const ENCODE: [u8; 64];
223
224    /// Encode one 6-bit value into an alphabet byte.
225    ///
226    /// The default implementation scans the alphabet table instead of using a
227    /// secret-indexed table lookup. Built-in alphabets override this with the
228    /// branch-minimized ASCII arithmetic mapper. Custom alphabets that keep the
229    /// default method prioritize timing posture over throughput: every emitted
230    /// Base64 byte performs a fixed 64-entry scan. For massive payloads with
231    /// user-defined alphabets, profile this cost and consider an audited custom
232    /// override only if the alphabet has a structure that can be mapped without
233    /// secret-indexed table access.
234    #[must_use]
235    fn encode(value: u8) -> u8 {
236        encode_alphabet_value(value, &Self::ENCODE)
237    }
238
239    /// Decode one byte into a 6-bit value.
240    ///
241    /// Implementations that want conservative custom-alphabet timing posture
242    /// should delegate to [`decode_alphabet_byte`], which scans all 64 entries
243    /// before returning. The `ct` module ignores this method and scans
244    /// [`Self::ENCODE`] directly.
245    fn decode(byte: u8) -> Option<u8>;
246}
247
248const fn is_visible_ascii(byte: u8) -> bool {
249    byte >= 0x21 && byte <= 0x7e
250}
251
252/// The RFC 4648 standard Base64 alphabet.
253#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
254pub struct Standard;
255
256impl Alphabet for Standard {
257    const ENCODE: [u8; 64] = *b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
258
259    #[inline]
260    fn encode(value: u8) -> u8 {
261        encode_ascii_base64(value, Self::ENCODE[62], Self::ENCODE[63])
262    }
263
264    #[inline]
265    fn decode(byte: u8) -> Option<u8> {
266        decode_ascii_base64(byte, Self::ENCODE[62], Self::ENCODE[63])
267    }
268}
269
270/// The RFC 4648 URL-safe Base64 alphabet.
271#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
272pub struct UrlSafe;
273
274impl Alphabet for UrlSafe {
275    const ENCODE: [u8; 64] = *b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
276
277    #[inline]
278    fn encode(value: u8) -> u8 {
279        encode_ascii_base64(value, Self::ENCODE[62], Self::ENCODE[63])
280    }
281
282    #[inline]
283    fn decode(byte: u8) -> Option<u8> {
284        decode_ascii_base64(byte, Self::ENCODE[62], Self::ENCODE[63])
285    }
286}
287
288/// The bcrypt Base64 alphabet.
289///
290/// This alphabet is commonly used by bcrypt hash strings. It is provided as an
291/// alphabet/profile building block; `base64-ng` does not parse or verify full
292/// bcrypt password-hash records.
293#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
294pub struct Bcrypt;
295
296impl Alphabet for Bcrypt {
297    const ENCODE: [u8; 64] = *b"./ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
298
299    #[inline]
300    fn decode(byte: u8) -> Option<u8> {
301        decode_alphabet_byte(byte, &Self::ENCODE)
302    }
303}
304
305/// The Unix `crypt(3)` Base64 alphabet.
306///
307/// This alphabet is provided as an explicit legacy interoperability profile.
308/// `base64-ng` does not parse or verify complete password-hash records.
309#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
310pub struct Crypt;
311
312impl Alphabet for Crypt {
313    const ENCODE: [u8; 64] = *b"./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
314
315    #[inline]
316    fn decode(byte: u8) -> Option<u8> {
317        decode_alphabet_byte(byte, &Self::ENCODE)
318    }
319}
320
321#[inline]
322pub(crate) const fn encode_base64_value<A: Alphabet>(value: u8) -> u8 {
323    encode_alphabet_value(value, &A::ENCODE)
324}
325
326#[inline]
327pub(crate) fn encode_base64_value_runtime<A: Alphabet>(value: u8) -> u8 {
328    A::encode(value)
329}
330
331#[inline]
332const fn encode_alphabet_value(value: u8, encode: &[u8; 64]) -> u8 {
333    let mut output = 0;
334    let mut index = 0;
335    let mut candidate = 0;
336    while index < encode.len() {
337        output |= encode[index] & ct_mask_eq_u8(value, candidate);
338        index += 1;
339        candidate += 1;
340    }
341    output
342}
343
344#[inline]
345const fn encode_ascii_base64(value: u8, value_62_byte: u8, value_63_byte: u8) -> u8 {
346    let upper = ct_mask_lt_u8(value, 26);
347    let lower = ct_mask_lt_u8(value.wrapping_sub(26), 26);
348    let digit = ct_mask_lt_u8(value.wrapping_sub(52), 10);
349    let value_62 = ct_mask_eq_u8(value, 0x3e);
350    let value_63 = ct_mask_eq_u8(value, 0x3f);
351
352    (value.wrapping_add(b'A') & upper)
353        | (value.wrapping_sub(26).wrapping_add(b'a') & lower)
354        | (value.wrapping_sub(52).wrapping_add(b'0') & digit)
355        | (value_62_byte & value_62)
356        | (value_63_byte & value_63)
357}
358
359#[inline]
360fn decode_ascii_base64(byte: u8, value_62_byte: u8, value_63_byte: u8) -> Option<u8> {
361    let upper = ct_mask_lt_u8(byte.wrapping_sub(b'A'), 26);
362    let lower = ct_mask_lt_u8(byte.wrapping_sub(b'a'), 26);
363    let digit = ct_mask_lt_u8(byte.wrapping_sub(b'0'), 10);
364    let value_62 = ct_mask_eq_u8(byte, value_62_byte);
365    let value_63 = ct_mask_eq_u8(byte, value_63_byte);
366    let valid = upper | lower | digit | value_62 | value_63;
367
368    let decoded = (byte.wrapping_sub(b'A') & upper)
369        | (byte.wrapping_sub(b'a').wrapping_add(26) & lower)
370        | (byte.wrapping_sub(b'0').wrapping_add(52) & digit)
371        | (0x3e & value_62)
372        | (0x3f & value_63);
373
374    if valid == 0 { None } else { Some(decoded) }
375}