base64_ng/alphabet.rs
1//! Base64 alphabets and custom alphabet validation.
2
3use crate::{ct_mask_eq_u8, ct_mask_lt_u8};
4
5/// Alphabet validation error.
6#[derive(Clone, Copy, Debug, Eq, PartialEq)]
7pub enum AlphabetError {
8 /// The alphabet contains a non-visible-ASCII byte.
9 InvalidByte {
10 /// Byte index in the alphabet table.
11 index: usize,
12 /// Invalid byte value.
13 byte: u8,
14 },
15 /// The alphabet contains the padding byte `=`.
16 PaddingByte {
17 /// Byte index in the alphabet table.
18 index: usize,
19 },
20 /// The alphabet maps more than one value to the same byte.
21 DuplicateByte {
22 /// First byte index.
23 first: usize,
24 /// Second byte index.
25 second: usize,
26 /// Duplicated byte value.
27 byte: u8,
28 },
29}
30
31impl core::fmt::Display for AlphabetError {
32 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
33 match self {
34 Self::InvalidByte { index, byte } => {
35 write!(
36 f,
37 "invalid base64 alphabet byte 0x{byte:02x} at index {index}"
38 )
39 }
40 Self::PaddingByte { index } => {
41 write!(f, "base64 alphabet contains padding byte at index {index}")
42 }
43 Self::DuplicateByte {
44 first,
45 second,
46 byte,
47 } => write!(
48 f,
49 "base64 alphabet byte 0x{byte:02x} is duplicated at indexes {first} and {second}"
50 ),
51 }
52 }
53}
54
55#[cfg(feature = "std")]
56impl std::error::Error for AlphabetError {}
57
58/// Defines a custom [`Alphabet`] from a 64-byte string literal.
59///
60/// The generated alphabet is validated at compile time with
61/// [`validate_alphabet`]. Invalid, duplicate, or padding bytes fail the build
62/// instead of creating a malformed runtime profile.
63///
64/// The generated implementation uses the conservative default
65/// [`Alphabet::encode`] behavior: every emitted Base64 byte performs a fixed
66/// 64-entry scan to avoid secret-indexed table lookups. Built-in alphabets use
67/// optimized arithmetic mappers.
68///
69/// The generated [`Alphabet::decode`] implementation delegates to
70/// [`decode_alphabet_byte`]. The constant-time-oriented [`ct`](crate::ct)
71/// module scans the generated `ENCODE` table directly and does not call the
72/// generated `decode` method.
73///
74/// # Examples
75///
76/// ```
77/// base64_ng::define_alphabet! {
78/// struct DotSlash = b"./ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
79/// }
80///
81/// let engine = base64_ng::Engine::<DotSlash, false>::new();
82/// let mut encoded = [0u8; 4];
83/// let written = engine.encode_slice(&[0xff, 0xff, 0xff], &mut encoded).unwrap();
84/// assert_eq!(&encoded[..written], b"9999");
85/// ```
86///
87/// Invalid alphabets fail during compilation:
88///
89/// ```compile_fail
90/// base64_ng::define_alphabet! {
91/// struct Bad = b"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA";
92/// }
93/// ```
94#[macro_export]
95macro_rules! define_alphabet {
96 ($(#[$meta:meta])* $vis:vis struct $name:ident = $encode:expr;) => {
97 $(#[$meta])*
98 #[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
99 $vis struct $name;
100
101 impl $crate::Alphabet for $name {
102 const ENCODE: [u8; 64] = *$encode;
103
104 #[inline]
105 fn decode(byte: u8) -> Option<u8> {
106 $crate::decode_alphabet_byte(byte, &Self::ENCODE)
107 }
108 }
109
110 const _: [(); 1] = [(); match $crate::validate_alphabet(
111 &<$name as $crate::Alphabet>::ENCODE,
112 ) {
113 Ok(()) => 1,
114 Err(_) => 0,
115 }];
116 };
117}
118
119/// Validates a 64-byte Base64 alphabet table.
120///
121/// A valid alphabet must contain exactly 64 unique visible ASCII bytes and must
122/// not contain the padding byte `=`.
123///
124/// # Examples
125///
126/// ```
127/// use base64_ng::{Alphabet, Standard, validate_alphabet};
128///
129/// validate_alphabet(&Standard::ENCODE).unwrap();
130/// ```
131pub const fn validate_alphabet(encode: &[u8; 64]) -> Result<(), AlphabetError> {
132 let mut index = 0;
133 while index < encode.len() {
134 let byte = encode[index];
135 if !is_visible_ascii(byte) {
136 return Err(AlphabetError::InvalidByte { index, byte });
137 }
138 if byte == b'=' {
139 return Err(AlphabetError::PaddingByte { index });
140 }
141
142 let mut duplicate = index + 1;
143 while duplicate < encode.len() {
144 if encode[duplicate] == byte {
145 return Err(AlphabetError::DuplicateByte {
146 first: index,
147 second: duplicate,
148 byte,
149 });
150 }
151 duplicate += 1;
152 }
153
154 index += 1;
155 }
156
157 Ok(())
158}
159
160/// Decodes one byte by scanning a caller-provided alphabet table.
161///
162/// This helper is intended for custom [`Alphabet`] implementations. Validate
163/// the table with [`validate_alphabet`] before trusting the alphabet in a
164/// protocol or public API. The scan always visits all 64 entries before
165/// returning so the match position does not create an early-return timing
166/// signal in the source-level implementation.
167///
168/// # Security
169///
170/// This helper is part of the normal strict decoder path, not the
171/// constant-time-oriented [`ct`](crate::ct) module. It is a `const fn` so it
172/// does not use the optimizer barriers, volatile accumulator reads, or
173/// generated-code evidence hooks used by the private `ct` scanner. Do not rely
174/// on this helper for military or cryptographic constant-time guarantees under
175/// LTO or future compiler rewrites. For secret-bearing custom alphabets, use
176/// [`Engine::ct_decoder`](crate::Engine::ct_decoder) or the [`ct`](crate::ct)
177/// module, which scans [`Alphabet::ENCODE`] directly and does not call
178/// [`Alphabet::decode`].
179///
180/// # Examples
181///
182/// ```
183/// use base64_ng::{Alphabet, decode_alphabet_byte};
184///
185/// struct DotSlash;
186///
187/// impl Alphabet for DotSlash {
188/// const ENCODE: [u8; 64] =
189/// *b"./ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
190///
191/// fn decode(byte: u8) -> Option<u8> {
192/// decode_alphabet_byte(byte, &Self::ENCODE)
193/// }
194/// }
195///
196/// assert_eq!(DotSlash::decode(b'.'), Some(0));
197/// assert_eq!(DotSlash::decode(b'9'), Some(63));
198/// ```
199#[must_use]
200pub const fn decode_alphabet_byte(byte: u8, encode: &[u8; 64]) -> Option<u8> {
201 let mut index = 0;
202 let mut candidate = 0;
203 let mut decoded = 0;
204 let mut valid = 0;
205 while index < encode.len() {
206 let matches = ct_mask_eq_u8(byte, encode[index]);
207 decoded |= candidate & matches;
208 valid |= matches;
209 index += 1;
210 candidate += 1;
211 }
212
213 if valid == 0 { None } else { Some(decoded) }
214}
215
216/// A Base64 alphabet.
217///
218/// # Security
219///
220/// The default [`Alphabet::encode`] implementation is constant-time-oriented:
221/// it scans all 64 alphabet entries instead of using `ENCODE[value as usize]`.
222/// If an implementation overrides `encode` with a direct table lookup, normal
223/// [`Engine`](crate::Engine) encoding becomes timing-sensitive with respect to
224/// the emitted 6-bit value.
225///
226/// The normal strict decode path calls [`Alphabet::decode`] and is not a
227/// constant-time decoder. The [`ct`](crate::ct) module does not call
228/// [`Alphabet::decode`]; it scans [`Alphabet::ENCODE`] directly with its own
229/// fixed 64-entry mapper. A custom non-constant-time `decode` implementation
230/// therefore affects normal strict decode diagnostics and timing, but not the
231/// `ct` module's symbol-mapping loop.
232pub trait Alphabet {
233 /// Encoding table indexed by 6-bit values.
234 const ENCODE: [u8; 64];
235
236 /// Encode one 6-bit value into an alphabet byte.
237 ///
238 /// The default implementation scans the alphabet table instead of using a
239 /// secret-indexed table lookup. Built-in alphabets override this with the
240 /// branch-minimized ASCII arithmetic mapper. Custom alphabets that keep the
241 /// default method prioritize timing posture over throughput: every emitted
242 /// Base64 byte performs a fixed 64-entry scan. For massive payloads with
243 /// user-defined alphabets, profile this cost and consider an audited custom
244 /// override only if the alphabet has a structure that can be mapped without
245 /// secret-indexed table access.
246 #[must_use]
247 fn encode(value: u8) -> u8 {
248 encode_alphabet_value(value, &Self::ENCODE)
249 }
250
251 /// Decode one byte into a 6-bit value.
252 ///
253 /// Implementations that want conservative custom-alphabet timing posture
254 /// should delegate to [`decode_alphabet_byte`], which scans all 64 entries
255 /// before returning. The `ct` module ignores this method and scans
256 /// [`Self::ENCODE`] directly.
257 fn decode(byte: u8) -> Option<u8>;
258}
259
260const fn is_visible_ascii(byte: u8) -> bool {
261 byte >= 0x21 && byte <= 0x7e
262}
263
264/// The RFC 4648 standard Base64 alphabet.
265#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
266pub struct Standard;
267
268impl Alphabet for Standard {
269 const ENCODE: [u8; 64] = *b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
270
271 #[inline]
272 fn encode(value: u8) -> u8 {
273 encode_ascii_base64(value, Self::ENCODE[62], Self::ENCODE[63])
274 }
275
276 #[inline]
277 fn decode(byte: u8) -> Option<u8> {
278 decode_ascii_base64(byte, Self::ENCODE[62], Self::ENCODE[63])
279 }
280}
281
282/// The RFC 4648 URL-safe Base64 alphabet.
283#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
284pub struct UrlSafe;
285
286impl Alphabet for UrlSafe {
287 const ENCODE: [u8; 64] = *b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
288
289 #[inline]
290 fn encode(value: u8) -> u8 {
291 encode_ascii_base64(value, Self::ENCODE[62], Self::ENCODE[63])
292 }
293
294 #[inline]
295 fn decode(byte: u8) -> Option<u8> {
296 decode_ascii_base64(byte, Self::ENCODE[62], Self::ENCODE[63])
297 }
298}
299
300/// The bcrypt Base64 alphabet.
301///
302/// This alphabet is commonly used by bcrypt hash strings. It is provided as an
303/// alphabet/profile building block; `base64-ng` does not parse or verify full
304/// bcrypt password-hash records.
305#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
306pub struct Bcrypt;
307
308impl Alphabet for Bcrypt {
309 const ENCODE: [u8; 64] = *b"./ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
310
311 #[inline]
312 fn decode(byte: u8) -> Option<u8> {
313 decode_alphabet_byte(byte, &Self::ENCODE)
314 }
315}
316
317/// The Unix `crypt(3)` Base64 alphabet.
318///
319/// This alphabet is provided as an explicit legacy interoperability profile.
320/// `base64-ng` does not parse or verify complete password-hash records.
321#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
322pub struct Crypt;
323
324impl Alphabet for Crypt {
325 const ENCODE: [u8; 64] = *b"./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
326
327 #[inline]
328 fn decode(byte: u8) -> Option<u8> {
329 decode_alphabet_byte(byte, &Self::ENCODE)
330 }
331}
332
333#[inline]
334pub(crate) const fn encode_base64_value<A: Alphabet>(value: u8) -> u8 {
335 encode_alphabet_value(value, &A::ENCODE)
336}
337
338#[inline]
339pub(crate) fn encode_base64_value_runtime<A: Alphabet>(value: u8) -> u8 {
340 A::encode(value)
341}
342
343#[inline]
344const fn encode_alphabet_value(value: u8, encode: &[u8; 64]) -> u8 {
345 let mut output = 0;
346 let mut index = 0;
347 let mut candidate = 0;
348 while index < encode.len() {
349 output |= encode[index] & ct_mask_eq_u8(value, candidate);
350 index += 1;
351 candidate += 1;
352 }
353 output
354}
355
356#[inline]
357const fn encode_ascii_base64(value: u8, value_62_byte: u8, value_63_byte: u8) -> u8 {
358 let upper = ct_mask_lt_u8(value, 26);
359 let lower = ct_mask_lt_u8(value.wrapping_sub(26), 26);
360 let digit = ct_mask_lt_u8(value.wrapping_sub(52), 10);
361 let value_62 = ct_mask_eq_u8(value, 0x3e);
362 let value_63 = ct_mask_eq_u8(value, 0x3f);
363
364 (value.wrapping_add(b'A') & upper)
365 | (value.wrapping_sub(26).wrapping_add(b'a') & lower)
366 | (value.wrapping_sub(52).wrapping_add(b'0') & digit)
367 | (value_62_byte & value_62)
368 | (value_63_byte & value_63)
369}
370
371#[inline]
372fn decode_ascii_base64(byte: u8, value_62_byte: u8, value_63_byte: u8) -> Option<u8> {
373 let upper = ct_mask_lt_u8(byte.wrapping_sub(b'A'), 26);
374 let lower = ct_mask_lt_u8(byte.wrapping_sub(b'a'), 26);
375 let digit = ct_mask_lt_u8(byte.wrapping_sub(b'0'), 10);
376 let value_62 = ct_mask_eq_u8(byte, value_62_byte);
377 let value_63 = ct_mask_eq_u8(byte, value_63_byte);
378 let valid = upper | lower | digit | value_62 | value_63;
379
380 let decoded = (byte.wrapping_sub(b'A') & upper)
381 | (byte.wrapping_sub(b'a').wrapping_add(26) & lower)
382 | (byte.wrapping_sub(b'0').wrapping_add(52) & digit)
383 | (0x3e & value_62)
384 | (0x3f & value_63);
385
386 if valid == 0 { None } else { Some(decoded) }
387}