Skip to main content

base64_ng/
alphabet.rs

1//! Base64 alphabets and custom alphabet validation.
2
3use crate::{ct_mask_eq_u8, ct_mask_lt_u8};
4
5/// Alphabet validation error.
6#[derive(Clone, Copy, Debug, Eq, PartialEq)]
7pub enum AlphabetError {
8    /// The alphabet contains a non-visible-ASCII byte.
9    InvalidByte {
10        /// Byte index in the alphabet table.
11        index: usize,
12        /// Invalid byte value.
13        byte: u8,
14    },
15    /// The alphabet contains the padding byte `=`.
16    PaddingByte {
17        /// Byte index in the alphabet table.
18        index: usize,
19    },
20    /// The alphabet maps more than one value to the same byte.
21    DuplicateByte {
22        /// First byte index.
23        first: usize,
24        /// Second byte index.
25        second: usize,
26        /// Duplicated byte value.
27        byte: u8,
28    },
29}
30
31impl core::fmt::Display for AlphabetError {
32    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
33        match self {
34            Self::InvalidByte { index, byte } => {
35                write!(
36                    f,
37                    "invalid base64 alphabet byte 0x{byte:02x} at index {index}"
38                )
39            }
40            Self::PaddingByte { index } => {
41                write!(f, "base64 alphabet contains padding byte at index {index}")
42            }
43            Self::DuplicateByte {
44                first,
45                second,
46                byte,
47            } => write!(
48                f,
49                "base64 alphabet byte 0x{byte:02x} is duplicated at indexes {first} and {second}"
50            ),
51        }
52    }
53}
54
55#[cfg(feature = "std")]
56impl std::error::Error for AlphabetError {}
57
58/// Defines a custom [`Alphabet`] from a 64-byte string literal.
59///
60/// The generated alphabet is validated at compile time with
61/// [`validate_alphabet`]. Invalid, duplicate, or padding bytes fail the build
62/// instead of creating a malformed runtime profile.
63///
64/// The generated implementation uses the conservative default
65/// [`Alphabet::encode`] behavior: every emitted Base64 byte performs a fixed
66/// 64-entry scan to avoid secret-indexed table lookups. Built-in alphabets use
67/// optimized arithmetic mappers.
68///
69/// The generated [`Alphabet::decode`] implementation delegates to
70/// [`decode_alphabet_byte`]. The constant-time-oriented [`ct`](crate::ct)
71/// module scans the generated `ENCODE` table directly and does not call the
72/// generated `decode` method.
73///
74/// # Examples
75///
76/// ```
77/// base64_ng::define_alphabet! {
78///     struct DotSlash = b"./ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
79/// }
80///
81/// let engine = base64_ng::Engine::<DotSlash, false>::new();
82/// let mut encoded = [0u8; 4];
83/// let written = engine.encode_slice(&[0xff, 0xff, 0xff], &mut encoded).unwrap();
84/// assert_eq!(&encoded[..written], b"9999");
85/// ```
86///
87/// Invalid alphabets fail during compilation:
88///
89/// ```compile_fail
90/// base64_ng::define_alphabet! {
91///     struct Bad = b"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA";
92/// }
93/// ```
94#[macro_export]
95macro_rules! define_alphabet {
96    ($(#[$meta:meta])* $vis:vis struct $name:ident = $encode:expr;) => {
97        $(#[$meta])*
98        #[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
99        $vis struct $name;
100
101        impl $crate::Alphabet for $name {
102            const ENCODE: [u8; 64] = *$encode;
103
104            #[inline]
105            fn decode(byte: u8) -> Option<u8> {
106                $crate::decode_alphabet_byte(byte, &Self::ENCODE)
107            }
108        }
109
110        const _: [(); 1] = [(); match $crate::validate_alphabet(
111            &<$name as $crate::Alphabet>::ENCODE,
112        ) {
113            Ok(()) => 1,
114            Err(_) => 0,
115        }];
116    };
117}
118
119/// Validates a 64-byte Base64 alphabet table.
120///
121/// A valid alphabet must contain exactly 64 unique visible ASCII bytes and must
122/// not contain the padding byte `=`.
123///
124/// # Examples
125///
126/// ```
127/// use base64_ng::{Alphabet, Standard, validate_alphabet};
128///
129/// validate_alphabet(&Standard::ENCODE).unwrap();
130/// ```
131pub const fn validate_alphabet(encode: &[u8; 64]) -> Result<(), AlphabetError> {
132    let mut index = 0;
133    while index < encode.len() {
134        let byte = encode[index];
135        if !is_visible_ascii(byte) {
136            return Err(AlphabetError::InvalidByte { index, byte });
137        }
138        if byte == b'=' {
139            return Err(AlphabetError::PaddingByte { index });
140        }
141
142        let mut duplicate = index + 1;
143        while duplicate < encode.len() {
144            if encode[duplicate] == byte {
145                return Err(AlphabetError::DuplicateByte {
146                    first: index,
147                    second: duplicate,
148                    byte,
149                });
150            }
151            duplicate += 1;
152        }
153
154        index += 1;
155    }
156
157    Ok(())
158}
159
160/// Decodes one byte by scanning a caller-provided alphabet table.
161///
162/// This helper is intended for custom [`Alphabet`] implementations. Validate
163/// the table with [`validate_alphabet`] before trusting the alphabet in a
164/// protocol or public API. The scan always visits all 64 entries before
165/// returning so the match position does not create an early-return timing
166/// signal in the source-level implementation.
167///
168/// # Security
169///
170/// This helper is part of the normal strict decoder path, not the
171/// constant-time-oriented [`ct`](crate::ct) module. It is a `const fn` so it
172/// does not use the optimizer barriers, volatile accumulator reads, or
173/// generated-code evidence hooks used by the private `ct` scanner. Do not rely
174/// on this helper for military or cryptographic constant-time guarantees under
175/// LTO or future compiler rewrites. For secret-bearing custom alphabets, use
176/// [`Engine::ct_decoder`](crate::Engine::ct_decoder) or the [`ct`](crate::ct)
177/// module, which scans [`Alphabet::ENCODE`] directly and does not call
178/// [`Alphabet::decode`].
179///
180/// # Examples
181///
182/// ```
183/// use base64_ng::{Alphabet, decode_alphabet_byte};
184///
185/// struct DotSlash;
186///
187/// impl Alphabet for DotSlash {
188///     const ENCODE: [u8; 64] =
189///         *b"./ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
190///
191///     fn decode(byte: u8) -> Option<u8> {
192///         decode_alphabet_byte(byte, &Self::ENCODE)
193///     }
194/// }
195///
196/// assert_eq!(DotSlash::decode(b'.'), Some(0));
197/// assert_eq!(DotSlash::decode(b'9'), Some(63));
198/// ```
199#[must_use]
200pub const fn decode_alphabet_byte(byte: u8, encode: &[u8; 64]) -> Option<u8> {
201    let mut index = 0;
202    let mut candidate = 0;
203    let mut decoded = 0;
204    let mut valid = 0;
205    while index < encode.len() {
206        let matches = ct_mask_eq_u8(byte, encode[index]);
207        decoded |= candidate & matches;
208        valid |= matches;
209        index += 1;
210        candidate += 1;
211    }
212
213    if valid == 0 { None } else { Some(decoded) }
214}
215
216/// A Base64 alphabet.
217///
218/// # Security
219///
220/// The default [`Alphabet::encode`] implementation is constant-time-oriented:
221/// it scans all 64 alphabet entries instead of using `ENCODE[value as usize]`.
222/// If an implementation overrides `encode` with a direct table lookup, normal
223/// [`Engine`](crate::Engine) encoding becomes timing-sensitive with respect to
224/// the emitted 6-bit value.
225///
226/// The normal strict decode path calls [`Alphabet::decode`] and is not a
227/// constant-time decoder. The [`ct`](crate::ct) module does not call
228/// [`Alphabet::decode`]; it scans [`Alphabet::ENCODE`] directly with its own
229/// fixed 64-entry mapper. A custom non-constant-time `decode` implementation
230/// therefore affects normal strict decode diagnostics and timing, but not the
231/// `ct` module's symbol-mapping loop.
232pub trait Alphabet {
233    /// Encoding table indexed by 6-bit values.
234    const ENCODE: [u8; 64];
235
236    /// Encode one 6-bit value into an alphabet byte.
237    ///
238    /// The default implementation scans the alphabet table instead of using a
239    /// secret-indexed table lookup. Built-in alphabets override this with the
240    /// branch-minimized ASCII arithmetic mapper. Custom alphabets that keep the
241    /// default method prioritize timing posture over throughput: every emitted
242    /// Base64 byte performs a fixed 64-entry scan. For massive payloads with
243    /// user-defined alphabets, profile this cost and consider an audited custom
244    /// override only if the alphabet has a structure that can be mapped without
245    /// secret-indexed table access.
246    #[must_use]
247    fn encode(value: u8) -> u8 {
248        encode_alphabet_value(value, &Self::ENCODE)
249    }
250
251    /// Decode one byte into a 6-bit value.
252    ///
253    /// Implementations that want conservative custom-alphabet timing posture
254    /// should delegate to [`decode_alphabet_byte`], which scans all 64 entries
255    /// before returning. The `ct` module ignores this method and scans
256    /// [`Self::ENCODE`] directly.
257    fn decode(byte: u8) -> Option<u8>;
258}
259
260const fn is_visible_ascii(byte: u8) -> bool {
261    byte >= 0x21 && byte <= 0x7e
262}
263
264/// The RFC 4648 standard Base64 alphabet.
265#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
266pub struct Standard;
267
268impl Alphabet for Standard {
269    const ENCODE: [u8; 64] = *b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
270
271    #[inline]
272    fn encode(value: u8) -> u8 {
273        encode_ascii_base64(value, Self::ENCODE[62], Self::ENCODE[63])
274    }
275
276    #[inline]
277    fn decode(byte: u8) -> Option<u8> {
278        decode_ascii_base64(byte, Self::ENCODE[62], Self::ENCODE[63])
279    }
280}
281
282/// The RFC 4648 URL-safe Base64 alphabet.
283#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
284pub struct UrlSafe;
285
286impl Alphabet for UrlSafe {
287    const ENCODE: [u8; 64] = *b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
288
289    #[inline]
290    fn encode(value: u8) -> u8 {
291        encode_ascii_base64(value, Self::ENCODE[62], Self::ENCODE[63])
292    }
293
294    #[inline]
295    fn decode(byte: u8) -> Option<u8> {
296        decode_ascii_base64(byte, Self::ENCODE[62], Self::ENCODE[63])
297    }
298}
299
300/// The bcrypt Base64 alphabet.
301///
302/// This alphabet is commonly used by bcrypt hash strings. It is provided as an
303/// alphabet/profile building block; `base64-ng` does not parse or verify full
304/// bcrypt password-hash records.
305#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
306pub struct Bcrypt;
307
308impl Alphabet for Bcrypt {
309    const ENCODE: [u8; 64] = *b"./ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
310
311    #[inline]
312    fn decode(byte: u8) -> Option<u8> {
313        decode_alphabet_byte(byte, &Self::ENCODE)
314    }
315}
316
317/// The Unix `crypt(3)` Base64 alphabet.
318///
319/// This alphabet is provided as an explicit legacy interoperability profile.
320/// `base64-ng` does not parse or verify complete password-hash records.
321#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
322pub struct Crypt;
323
324impl Alphabet for Crypt {
325    const ENCODE: [u8; 64] = *b"./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
326
327    #[inline]
328    fn decode(byte: u8) -> Option<u8> {
329        decode_alphabet_byte(byte, &Self::ENCODE)
330    }
331}
332
333#[inline]
334pub(crate) const fn encode_base64_value<A: Alphabet>(value: u8) -> u8 {
335    encode_alphabet_value(value, &A::ENCODE)
336}
337
338#[inline]
339pub(crate) fn encode_base64_value_runtime<A: Alphabet>(value: u8) -> u8 {
340    A::encode(value)
341}
342
343#[inline]
344const fn encode_alphabet_value(value: u8, encode: &[u8; 64]) -> u8 {
345    let mut output = 0;
346    let mut index = 0;
347    let mut candidate = 0;
348    while index < encode.len() {
349        output |= encode[index] & ct_mask_eq_u8(value, candidate);
350        index += 1;
351        candidate += 1;
352    }
353    output
354}
355
356#[inline]
357const fn encode_ascii_base64(value: u8, value_62_byte: u8, value_63_byte: u8) -> u8 {
358    let upper = ct_mask_lt_u8(value, 26);
359    let lower = ct_mask_lt_u8(value.wrapping_sub(26), 26);
360    let digit = ct_mask_lt_u8(value.wrapping_sub(52), 10);
361    let value_62 = ct_mask_eq_u8(value, 0x3e);
362    let value_63 = ct_mask_eq_u8(value, 0x3f);
363
364    (value.wrapping_add(b'A') & upper)
365        | (value.wrapping_sub(26).wrapping_add(b'a') & lower)
366        | (value.wrapping_sub(52).wrapping_add(b'0') & digit)
367        | (value_62_byte & value_62)
368        | (value_63_byte & value_63)
369}
370
371#[inline]
372fn decode_ascii_base64(byte: u8, value_62_byte: u8, value_63_byte: u8) -> Option<u8> {
373    let upper = ct_mask_lt_u8(byte.wrapping_sub(b'A'), 26);
374    let lower = ct_mask_lt_u8(byte.wrapping_sub(b'a'), 26);
375    let digit = ct_mask_lt_u8(byte.wrapping_sub(b'0'), 10);
376    let value_62 = ct_mask_eq_u8(byte, value_62_byte);
377    let value_63 = ct_mask_eq_u8(byte, value_63_byte);
378    let valid = upper | lower | digit | value_62 | value_63;
379
380    let decoded = (byte.wrapping_sub(b'A') & upper)
381        | (byte.wrapping_sub(b'a').wrapping_add(26) & lower)
382        | (byte.wrapping_sub(b'0').wrapping_add(52) & digit)
383        | (0x3e & value_62)
384        | (0x3f & value_63);
385
386    if valid == 0 { None } else { Some(decoded) }
387}