Skip to main content

base64_ng/
ct.rs

1//! Constant-time-oriented scalar decoding APIs.
2//!
3//! This module is separate from the default decoder so callers can opt into a
4//! slower path with a narrower timing target. It avoids lookup tables indexed
5//! by secret input bytes while mapping Base64 symbols and reports malformed
6//! content through one opaque error. It is not documented as a formally
7//! verified cryptographic constant-time API.
8//!
9//! # Security
10//!
11//! Input length, decoded length, selected alphabet, and final success or
12//! failure remain public. The clear-tail methods wipe caller-owned output on
13//! error, but decoded bytes are written during the fixed-shape decode loop
14//! before final validation is reported. In shared-memory, enclave, or HSM-style
15//! threat models where another component can observe the output buffer during
16//! the call, prefer [`crate::ct::CtEngine::decode_slice_staged_clear_tail`]
17//! with a private staging buffer. In those deployments,
18//! [`crate::ct::CtEngine::decode_slice_clear_tail`] is not sufficient by
19//! itself because it wipes caller-owned output only after the internal decode
20//! loop reaches the final error gate. Treat
21//! [`crate::ct::CtEngine::decode_slice_staged_clear_tail`] as the default for
22//! shared-memory, enclave, HSM-adjacent, or multi-principal deployments;
23//! [`crate::ct::CtEngine::decode_slice_clear_tail`] is appropriate only when
24//! the output buffer is not observable during the call.
25//!
26//! # Platform Posture
27//!
28//! The CT result gate uses architecture-specific best-effort barriers where
29//! stable Rust exposes them. On `AArch64`, the emitted CSDB hint is reported as
30//! `hardware-speculation-barrier-unattested` because older cores may treat it
31//! as a no-op; deployments must attest the exact core behavior before relying
32//! on it for high assurance. On RISC-V, `fence rw, rw` is an ordering fence,
33//! not a Spectre-v1 speculation barrier, and the built-in high-assurance
34//! runtime policy intentionally rejects that posture. RISC-V deployments on
35//! speculative cores need platform-level mitigations and startup policy checks
36//! that make the gap explicit.
37//!
38//! The dependency-free comparison helpers on redacted buffers are
39//! constant-time-oriented best effort, not formally audited MAC or token
40//! comparison primitives. Applications that can admit dependencies and need a
41//! reviewed comparison primitive should use one at the protocol boundary.
42//!
43//! The CT decoder exposes only clear-tail and stack-backed decode APIs. The
44//! former non-clear-tail methods were removed before the `1.0` stable boundary
45//! because they could leave decoded plaintext in caller-owned buffers after
46//! malformed input errors.
47//!
48//! ```compile_fail
49//! use base64_ng::ct;
50//!
51//! let mut output = [0u8; 8];
52//! let _ = ct::STANDARD.decode_slice(b"aGk=", &mut output);
53//! ```
54//!
55//! ```compile_fail
56//! use base64_ng::ct;
57//!
58//! let mut buffer = *b"aGk=";
59//! let _ = ct::STANDARD.decode_in_place(&mut buffer);
60//! ```
61#[cfg(feature = "alloc")]
62use crate::SecretBuffer;
63use crate::{
64    Alphabet, DecodeError, DecodedBuffer, Standard, UrlSafe, decoded_capacity, read_quad,
65    wipe_bytes, wipe_tail,
66};
67use core::marker::PhantomData;
68
69/// Standard Base64 constant-time-oriented decoder with padding.
70pub const STANDARD: CtEngine<Standard, true> = CtEngine::new();
71
72/// Standard Base64 constant-time-oriented decoder without padding.
73pub const STANDARD_NO_PAD: CtEngine<Standard, false> = CtEngine::new();
74
75/// URL-safe Base64 constant-time-oriented decoder with padding.
76pub const URL_SAFE: CtEngine<UrlSafe, true> = CtEngine::new();
77
78/// URL-safe Base64 constant-time-oriented decoder without padding.
79pub const URL_SAFE_NO_PAD: CtEngine<UrlSafe, false> = CtEngine::new();
80
81/// A zero-sized constant-time-oriented Base64 decoder.
82///
83/// # Security
84///
85/// For ordinary secret-bearing inputs, prefer
86/// [`Self::decode_slice_clear_tail`], [`Self::decode_buffer`], or
87/// [`Self::decode_in_place_clear_tail`]. For shared-memory,
88/// enclave-adjacent, HSM-style, or multi-principal deployments where
89/// another component can observe caller-owned output during the call, use
90/// [`Self::decode_slice_staged_clear_tail`] with a private staging buffer
91/// so malformed input cannot transiently write decoded bytes into the
92/// public output buffer before the final error gate.
93#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
94pub struct CtEngine<A, const PAD: bool> {
95    alphabet: PhantomData<A>,
96}
97
98impl<A, const PAD: bool> CtEngine<A, PAD>
99where
100    A: Alphabet,
101{
102    /// Creates a new constant-time-oriented decoder engine.
103    #[must_use]
104    pub const fn new() -> Self {
105        Self {
106            alphabet: PhantomData,
107        }
108    }
109
110    /// Returns whether this constant-time-oriented decoder expects padded
111    /// input.
112    #[must_use]
113    pub const fn is_padded(&self) -> bool {
114        PAD
115    }
116
117    /// Validates `input` without writing decoded bytes.
118    ///
119    /// This uses the same constant-time-oriented symbol mapping and opaque
120    /// malformed-input error behavior as
121    /// [`Self::decode_slice_clear_tail`]. Input length, padding length, and
122    /// final success or failure remain public.
123    ///
124    /// # Examples
125    ///
126    /// ```
127    /// use base64_ng::ct;
128    ///
129    /// ct::STANDARD.validate_result(b"aGVsbG8=").unwrap();
130    /// assert!(ct::STANDARD.validate_result(b"aGVsbG8").is_err());
131    /// ```
132    pub fn validate_result(&self, input: &[u8]) -> Result<(), DecodeError> {
133        ct_validate_decode::<A, PAD>(input)
134    }
135
136    /// Returns whether `input` is valid for this constant-time-oriented
137    /// decoder.
138    ///
139    /// This is a convenience wrapper around [`Self::validate_result`].
140    ///
141    /// # Examples
142    ///
143    /// ```
144    /// use base64_ng::ct;
145    ///
146    /// assert!(ct::URL_SAFE_NO_PAD.validate(b"-_8"));
147    /// assert!(!ct::URL_SAFE_NO_PAD.validate(b"+/8"));
148    /// ```
149    #[must_use]
150    pub fn validate(&self, input: &[u8]) -> bool {
151        self.validate_result(input).is_ok()
152    }
153
154    /// Returns the exact decoded length for valid input.
155    ///
156    /// This uses the same constant-time-oriented validation policy as
157    /// [`Self::validate_result`] before returning a length. Input length,
158    /// padding length, and final success or failure remain public.
159    pub fn decoded_len(&self, input: &[u8]) -> Result<usize, DecodeError> {
160        ct_decoded_len::<A, PAD>(input)
161    }
162
163    /// Decodes `input` into `output` and clears all bytes after the
164    /// decoded prefix.
165    ///
166    /// If decoding fails, the entire output buffer is cleared before the
167    /// error is returned. Use this variant for sensitive payloads where
168    /// partially decoded bytes from rejected input should not remain in the
169    /// caller-owned output buffer.
170    ///
171    /// # Security: Transient Plaintext Window
172    ///
173    /// Decoded bytes are written to `output` progressively during the
174    /// fixed-shape decode loop before malformed-input detection is
175    /// complete. On error, the entire `output` is wiped before returning,
176    /// but a concurrent same-process observer with access to `output`
177    /// during the call may observe transient partial plaintext from valid
178    /// leading quanta. For shared-memory, enclave-adjacent, HSM-style, or
179    /// multi-principal deployments where even transient writes are
180    /// unacceptable, use [`Self::decode_slice_staged_clear_tail`] with a
181    /// private staging buffer.
182    ///
183    /// # Examples
184    ///
185    /// ```
186    /// use base64_ng::ct;
187    ///
188    /// let mut output = [0xff; 8];
189    /// let written = ct::STANDARD
190    ///     .decode_slice_clear_tail(b"aGk=", &mut output)
191    ///     .unwrap();
192    ///
193    /// assert_eq!(&output[..written], b"hi");
194    /// assert!(output[written..].iter().all(|byte| *byte == 0));
195    /// ```
196    #[must_use = "handle decode errors; use decode_slice_staged_clear_tail for shared-memory or HSM-style threat models"]
197    pub fn decode_slice_clear_tail(
198        &self,
199        input: &[u8],
200        output: &mut [u8],
201    ) -> Result<usize, DecodeError> {
202        let written = match ct_decode_slice::<A, PAD>(input, output) {
203            Ok(written) => written,
204            Err(err) => {
205                crate::wipe_bytes(output);
206                return Err(err);
207            }
208        };
209        crate::wipe_tail(output, written);
210        Ok(written)
211    }
212
213    /// Decodes through caller-provided private staging before copying into
214    /// `output`.
215    ///
216    /// This variant is for shared-memory or sandboxed deployments where
217    /// the caller-owned `output` buffer must not contain transient decoded
218    /// bytes from malformed input. The `staging` buffer must be at least
219    /// the decoded length of `input` and must not be shared with
220    /// untrusted concurrent observers. On success, decoded bytes are
221    /// copied from `staging` into `output`; on error, both buffers are
222    /// cleared before returning.
223    ///
224    /// Input length, final success or failure, and decoded length remain
225    /// public.
226    #[must_use = "handle decode errors; staged decode is for shared-memory or HSM-style threat models"]
227    pub fn decode_slice_staged_clear_tail(
228        &self,
229        input: &[u8],
230        output: &mut [u8],
231        staging: &mut [u8],
232    ) -> Result<usize, DecodeError> {
233        ct_decode_slice_staged_clear_tail::<A, PAD>(input, output, staging)
234    }
235
236    /// Decodes `input` into a stack-backed buffer.
237    ///
238    /// This uses the same constant-time-oriented scalar decoder as
239    /// [`Self::decode_slice_clear_tail`] and clears the internal backing
240    /// array before returning an error.
241    ///
242    /// # Examples
243    ///
244    /// ```
245    /// use base64_ng::ct;
246    ///
247    /// let decoded = ct::STANDARD.decode_buffer::<5>(b"aGVsbG8=").unwrap();
248    ///
249    /// assert_eq!(decoded.as_bytes(), b"hello");
250    /// ```
251    pub fn decode_buffer<const CAP: usize>(
252        &self,
253        input: &[u8],
254    ) -> Result<DecodedBuffer<CAP>, DecodeError> {
255        let mut output = DecodedBuffer::new();
256        let written = match self.decode_slice_clear_tail(input, output.as_mut_capacity()) {
257            Ok(written) => written,
258            Err(err) => {
259                output.clear();
260                return Err(err);
261            }
262        };
263        output.set_filled(written)?;
264        Ok(output)
265    }
266
267    /// Decodes `input` into an owned byte vector.
268    ///
269    /// This uses the same constant-time-oriented scalar decoder as
270    /// [`Self::decode_slice_clear_tail`]. If decoding fails, the allocated
271    /// output buffer is cleared before the error is returned.
272    ///
273    /// Use [`Self::decode_secret`] for secret-bearing payloads that should stay
274    /// on the crate's redacted, drop-wiping buffer path. Use
275    /// [`Self::decode_secret_staged`] for shared-memory, enclave-adjacent,
276    /// HSM-style, or multi-principal deployments where even transient writes
277    /// into the final heap allocation are unacceptable.
278    #[cfg(feature = "alloc")]
279    #[must_use = "for secret-bearing payloads use decode_secret, which returns a redacted buffer with drop-time cleanup"]
280    pub fn decode_vec(&self, input: &[u8]) -> Result<alloc::vec::Vec<u8>, DecodeError> {
281        let required = self.decoded_len(input)?;
282        let mut output = alloc::vec![0; required];
283        // decode_slice_clear_tail wipes output on error.
284        let written = self.decode_slice_clear_tail(input, &mut output)?;
285        output.truncate(written);
286        Ok(output)
287    }
288
289    /// Decodes `input` into a redacted owned secret buffer.
290    ///
291    /// This is the recommended heap-owning CT decode path for secret-bearing
292    /// payloads. It decodes with [`Self::decode_vec`] and then wraps the result
293    /// in [`SecretBuffer`], which redacts formatting and clears initialized
294    /// bytes plus spare vector capacity on drop.
295    ///
296    /// # Security: Transient Plaintext Window
297    ///
298    /// This function uses the non-staged CT decode path. Decoded bytes are
299    /// written transiently into the heap allocation before the final error
300    /// gate. On error, the allocation is wiped before returning, but a
301    /// concurrent same-process observer with access to that allocation during
302    /// the call may observe transient partial plaintext. For shared-memory,
303    /// enclave-adjacent, HSM-style, or multi-principal deployments where even
304    /// transient writes into the final heap allocation are unacceptable, use
305    /// [`Self::decode_secret_staged`] with a stack-backed private staging
306    /// capacity large enough for the decoded value.
307    ///
308    /// # Examples
309    ///
310    /// ```
311    /// use base64_ng::ct;
312    ///
313    /// let decoded = ct::STANDARD.decode_secret(b"aGVsbG8=").unwrap();
314    /// assert!(decoded.constant_time_eq_public_len(b"hello"));
315    /// ```
316    #[cfg(feature = "alloc")]
317    pub fn decode_secret(&self, input: &[u8]) -> Result<SecretBuffer, DecodeError> {
318        self.decode_vec(input).map(SecretBuffer::from_vec)
319    }
320
321    /// Decodes `input` into a redacted owned secret buffer through private
322    /// stack staging.
323    ///
324    /// `STAGE` must be at least the decoded length of `input`. Decoded bytes
325    /// are written to a stack-backed staging buffer first and copied into the
326    /// returned heap buffer only after the full constant-time-oriented decode
327    /// succeeds. On error, both staging and heap output buffers are wiped before
328    /// returning.
329    ///
330    /// This is the preferred owned decode API for shared-memory,
331    /// enclave-adjacent, HSM-style, or multi-principal deployments where the
332    /// final heap allocation must not contain transient partial plaintext from
333    /// rejected input.
334    ///
335    /// # Examples
336    ///
337    /// ```
338    /// use base64_ng::ct;
339    ///
340    /// let decoded = ct::STANDARD
341    ///     .decode_secret_staged::<5>(b"aGVsbG8=")
342    ///     .unwrap();
343    /// assert!(decoded.constant_time_eq_public_len(b"hello"));
344    /// ```
345    #[cfg(feature = "alloc")]
346    pub fn decode_secret_staged<const STAGE: usize>(
347        &self,
348        input: &[u8],
349    ) -> Result<SecretBuffer, DecodeError> {
350        let required = self.decoded_len(input)?;
351        let mut staging = DecodedBuffer::<STAGE>::new();
352        let mut output = alloc::vec![0; required];
353        let written =
354            self.decode_slice_staged_clear_tail(input, &mut output, staging.as_mut_capacity())?;
355        output.truncate(written);
356        Ok(SecretBuffer::from_vec(output))
357    }
358
359    /// Decodes `buffer` in place and clears all bytes after the decoded
360    /// prefix.
361    ///
362    /// If decoding fails, the entire buffer is cleared before the error is
363    /// returned.
364    ///
365    /// # Security: Transient Plaintext Window
366    ///
367    /// This in-place API writes decoded bytes into `buffer` during the
368    /// fixed-shape decode loop before malformed-input detection is
369    /// complete. On error, the entire buffer is wiped before returning,
370    /// but concurrent same-process observers with access to the same memory
371    /// can observe transient partial plaintext. Use
372    /// [`Self::decode_slice_staged_clear_tail`] with a private staging
373    /// buffer when shared-memory or enclave-adjacent deployments cannot
374    /// tolerate that window.
375    ///
376    /// # Examples
377    ///
378    /// ```
379    /// use base64_ng::ct;
380    ///
381    /// let mut buffer = *b"aGk=";
382    /// let decoded = ct::STANDARD.decode_in_place_clear_tail(&mut buffer).unwrap();
383    ///
384    /// assert_eq!(decoded, b"hi");
385    /// ```
386    pub fn decode_in_place_clear_tail<'a>(
387        &self,
388        buffer: &'a mut [u8],
389    ) -> Result<&'a mut [u8], DecodeError> {
390        let len = match ct_decode_in_place::<A, PAD>(buffer) {
391            Ok(len) => len,
392            Err(err) => {
393                crate::wipe_bytes(buffer);
394                return Err(err);
395            }
396        };
397        crate::wipe_tail(buffer, len);
398        Ok(&mut buffer[..len])
399    }
400}
401
402impl<A, const PAD: bool> core::fmt::Display for CtEngine<A, PAD> {
403    fn fmt(&self, formatter: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
404        write!(formatter, "ct padded={PAD}")
405    }
406}
407
408#[inline]
409pub(crate) const fn ct_mask_bit(bit: u8) -> u8 {
410    0u8.wrapping_sub(bit & 1)
411}
412
413#[inline]
414pub(crate) const fn ct_mask_nonzero_u8(value: u8) -> u8 {
415    let wide = value as u16;
416    let negative = 0u16.wrapping_sub(wide);
417    let nonzero = ((wide | negative) >> 8) as u8;
418    ct_mask_bit(nonzero)
419}
420
421#[inline]
422pub(crate) const fn ct_mask_eq_u8(left: u8, right: u8) -> u8 {
423    !ct_mask_nonzero_u8(left ^ right)
424}
425
426#[inline]
427pub(crate) const fn ct_mask_lt_u8(left: u8, right: u8) -> u8 {
428    let diff = (left as u16).wrapping_sub(right as u16);
429    ct_mask_bit((diff >> 8) as u8)
430}
431
432#[inline(never)]
433pub(crate) fn constant_time_eq_public_len(left: &[u8], right: &[u8]) -> bool {
434    if left.len() != right.len() {
435        return false;
436    }
437
438    constant_time_eq_same_len(left, right)
439}
440
441#[inline(never)]
442pub(crate) fn constant_time_eq_fixed_width_array<const N: usize>(
443    left: &[u8; N],
444    right: &[u8; N],
445) -> bool {
446    constant_time_eq_same_len(left, right)
447}
448
449#[inline(never)]
450#[allow(unsafe_code)]
451fn constant_time_eq_same_len(left: &[u8], right: &[u8]) -> bool {
452    let mut diff = 0u8;
453    for (left, right) in left.iter().zip(right) {
454        diff = core::hint::black_box(
455            core::hint::black_box(diff) | core::hint::black_box(*left ^ *right),
456        );
457        // SAFETY: `diff` is an initialized local `u8`; the volatile read is a
458        // dependency-free optimizer barrier for the accumulation value and does
459        // not access caller memory.
460        diff = unsafe { core::ptr::read_volatile(&raw const diff) };
461    }
462    ct_error_gate_barrier(diff, 0);
463    // SAFETY: `diff` is an initialized local `u8`; this final volatile read
464    // keeps the public equality comparison dependent on a post-barrier load of
465    // the accumulated value.
466    let result = unsafe { core::ptr::read_volatile(&raw const diff) };
467    result == 0
468}
469
470fn ct_decode_slice<A: Alphabet, const PAD: bool>(
471    input: &[u8],
472    output: &mut [u8],
473) -> Result<usize, DecodeError> {
474    if input.is_empty() {
475        return Ok(0);
476    }
477
478    if PAD {
479        ct_decode_padded::<A>(input, output)
480    } else {
481        ct_decode_unpadded::<A>(input, output)
482    }
483}
484
485fn ct_decode_slice_staged_clear_tail<A: Alphabet, const PAD: bool>(
486    input: &[u8],
487    output: &mut [u8],
488    staging: &mut [u8],
489) -> Result<usize, DecodeError> {
490    let required = match ct_decoded_len::<A, PAD>(input) {
491        Ok(required) => required,
492        Err(err) => {
493            wipe_bytes(output);
494            wipe_bytes(staging);
495            return Err(err);
496        }
497    };
498
499    if output.len() < required {
500        wipe_bytes(output);
501        wipe_bytes(staging);
502        return Err(DecodeError::OutputTooSmall {
503            required,
504            available: output.len(),
505        });
506    }
507
508    if staging.len() < required {
509        wipe_bytes(output);
510        wipe_bytes(staging);
511        return Err(DecodeError::StagingTooSmall {
512            required,
513            available: staging.len(),
514        });
515    }
516
517    let written = match ct_decode_slice::<A, PAD>(input, &mut staging[..required]) {
518        Ok(written) => written,
519        Err(err) => {
520            wipe_bytes(output);
521            wipe_bytes(staging);
522            return Err(err);
523        }
524    };
525
526    output[..written].copy_from_slice(&staging[..written]);
527    wipe_bytes(staging);
528    wipe_tail(output, written);
529    Ok(written)
530}
531
532fn ct_decode_in_place<A: Alphabet, const PAD: bool>(
533    buffer: &mut [u8],
534) -> Result<usize, DecodeError> {
535    if buffer.is_empty() {
536        return Ok(0);
537    }
538
539    if PAD {
540        ct_decode_padded_in_place::<A>(buffer)
541    } else {
542        ct_decode_unpadded_in_place::<A>(buffer)
543    }
544}
545
546#[inline(never)]
547#[allow(unsafe_code)]
548fn ct_error_gate_barrier(invalid_byte: u8, invalid_padding: u8) {
549    core::hint::black_box(invalid_byte | invalid_padding);
550    core::sync::atomic::compiler_fence(core::sync::atomic::Ordering::SeqCst);
551
552    #[cfg(all(not(miri), any(target_arch = "x86", target_arch = "x86_64")))]
553    {
554        // SAFETY: `lfence` does not access memory and is used as a speculation
555        // barrier before the public success/failure branch is observed.
556        unsafe {
557            core::arch::asm!("lfence", options(nostack, preserves_flags, nomem));
558        }
559    }
560
561    #[cfg(all(not(miri), target_arch = "aarch64"))]
562    {
563        // Older cores may treat CSDB as a no-op; runtime reporting marks this
564        // as unattested until the deployment provides platform evidence.
565        // SAFETY: these barriers do not access memory.
566        unsafe {
567            core::arch::asm!("isb sy", "hint #20", options(nostack, preserves_flags));
568        }
569    }
570
571    #[cfg(all(not(miri), target_arch = "arm"))]
572    {
573        // SAFETY: `isb sy` does not access memory and is used as the best
574        // available stable ARM speculation boundary for this crate.
575        unsafe {
576            core::arch::asm!("isb sy", options(nostack, preserves_flags));
577        }
578    }
579
580    #[cfg(all(not(miri), any(target_arch = "riscv32", target_arch = "riscv64")))]
581    {
582        // RISC-V base ISA does not provide a canonical speculation barrier.
583        // `fence rw, rw` is the available ordering primitive for the CT public
584        // result gate and is reported separately as `ordering-fence`; callers
585        // on speculative RISC-V cores must use platform mitigations because
586        // this does not satisfy `BackendPolicy::HighAssuranceScalarOnly`.
587        // SAFETY: the assembly block does not access memory.
588        unsafe {
589            core::arch::asm!("fence rw, rw", options(nostack, preserves_flags));
590        }
591    }
592}
593
594fn ct_validate_decode<A: Alphabet, const PAD: bool>(input: &[u8]) -> Result<(), DecodeError> {
595    if input.is_empty() {
596        return Ok(());
597    }
598
599    if PAD {
600        ct_validate_padded::<A>(input)
601    } else {
602        ct_validate_unpadded::<A>(input)
603    }
604}
605
606fn ct_decoded_len<A: Alphabet, const PAD: bool>(input: &[u8]) -> Result<usize, DecodeError> {
607    ct_validate_decode::<A, PAD>(input)?;
608    if input.is_empty() {
609        return Ok(0);
610    }
611
612    if PAD {
613        Ok(input.len() / 4 * 3 - ct_padding_len(input))
614    } else {
615        let full_quads = input.len() / 4 * 3;
616        match input.len() % 4 {
617            0 => Ok(full_quads),
618            2 => Ok(full_quads + 1),
619            3 => Ok(full_quads + 2),
620            _ => Err(DecodeError::InvalidLength),
621        }
622    }
623}
624
625fn ct_validate_padded<A: Alphabet>(input: &[u8]) -> Result<(), DecodeError> {
626    if !input.len().is_multiple_of(4) {
627        return Err(DecodeError::InvalidLength);
628    }
629
630    let padding = ct_padding_len(input);
631    let mut invalid_byte = 0u8;
632    let mut invalid_padding = 0u8;
633    let mut read = 0;
634
635    while read + 4 < input.len() {
636        let [b0, b1, b2, b3] =
637            read_quad_or_mark_invalid(input, read, &mut invalid_byte, &mut invalid_padding);
638        let (_, valid0) = ct_decode_alphabet_byte::<A>(b0);
639        let (_, valid1) = ct_decode_alphabet_byte::<A>(b1);
640        let (_, valid2) = ct_decode_alphabet_byte::<A>(b2);
641        let (_, valid3) = ct_decode_alphabet_byte::<A>(b3);
642
643        invalid_byte |= !valid0;
644        invalid_byte |= !valid1;
645        invalid_byte |= !valid2;
646        invalid_byte |= !valid3;
647        invalid_padding |= ct_mask_eq_u8(b2, b'=');
648        invalid_padding |= ct_mask_eq_u8(b3, b'=');
649        read += 4;
650    }
651
652    let final_chunk =
653        read_quad_or_mark_invalid(input, read, &mut invalid_byte, &mut invalid_padding);
654    let (_, final_invalid_byte, final_invalid_padding, _) =
655        ct_padded_final_quantum::<A>(final_chunk, padding);
656    invalid_byte |= final_invalid_byte;
657    invalid_padding |= final_invalid_padding;
658
659    report_ct_error(invalid_byte, invalid_padding)
660}
661
662fn ct_validate_unpadded<A: Alphabet>(input: &[u8]) -> Result<(), DecodeError> {
663    if input.len() % 4 == 1 {
664        return Err(DecodeError::InvalidLength);
665    }
666
667    let mut invalid_byte = 0u8;
668    let mut invalid_padding = 0u8;
669    let mut read = 0;
670
671    while read + 4 <= input.len() {
672        let [b0, b1, b2, b3] =
673            read_quad_or_mark_invalid(input, read, &mut invalid_byte, &mut invalid_padding);
674        let (_, valid0) = ct_decode_alphabet_byte::<A>(b0);
675        let (_, valid1) = ct_decode_alphabet_byte::<A>(b1);
676        let (_, valid2) = ct_decode_alphabet_byte::<A>(b2);
677        let (_, valid3) = ct_decode_alphabet_byte::<A>(b3);
678
679        invalid_byte |= !valid0;
680        invalid_byte |= !valid1;
681        invalid_byte |= !valid2;
682        invalid_byte |= !valid3;
683        invalid_padding |= ct_mask_eq_u8(b0, b'=');
684        invalid_padding |= ct_mask_eq_u8(b1, b'=');
685        invalid_padding |= ct_mask_eq_u8(b2, b'=');
686        invalid_padding |= ct_mask_eq_u8(b3, b'=');
687
688        read += 4;
689    }
690
691    match read_tail_or_mark_invalid(input, read, &mut invalid_byte, &mut invalid_padding) {
692        [] => {}
693        [b0, b1] => {
694            let (_, valid0) = ct_decode_alphabet_byte::<A>(*b0);
695            let (v1, valid1) = ct_decode_alphabet_byte::<A>(*b1);
696            invalid_byte |= !valid0;
697            invalid_byte |= !valid1;
698            invalid_padding |= ct_mask_eq_u8(*b0, b'=');
699            invalid_padding |= ct_mask_eq_u8(*b1, b'=');
700            invalid_padding |= ct_mask_nonzero_u8(v1 & 0b0000_1111);
701        }
702        [b0, b1, b2] => {
703            let (_, valid0) = ct_decode_alphabet_byte::<A>(*b0);
704            let (_, valid1) = ct_decode_alphabet_byte::<A>(*b1);
705            let (v2, valid2) = ct_decode_alphabet_byte::<A>(*b2);
706            invalid_byte |= !valid0;
707            invalid_byte |= !valid1;
708            invalid_byte |= !valid2;
709            invalid_padding |= ct_mask_eq_u8(*b0, b'=');
710            invalid_padding |= ct_mask_eq_u8(*b1, b'=');
711            invalid_padding |= ct_mask_eq_u8(*b2, b'=');
712            invalid_padding |= ct_mask_nonzero_u8(v2 & 0b0000_0011);
713        }
714        _ => {
715            invalid_byte = 0xff;
716            invalid_padding = 0xff;
717        }
718    }
719
720    report_ct_error(invalid_byte, invalid_padding)
721}
722
723pub(crate) fn ct_padded_final_quantum<A: Alphabet>(
724    input: [u8; 4],
725    padding: usize,
726) -> ([u8; 3], u8, u8, usize) {
727    let [b0, b1, b2, b3] = input;
728    let (v0, valid0) = ct_decode_alphabet_byte::<A>(b0);
729    let (v1, valid1) = ct_decode_alphabet_byte::<A>(b1);
730    let (v2, valid2) = ct_decode_alphabet_byte::<A>(b2);
731    let (v3, valid3) = ct_decode_alphabet_byte::<A>(b3);
732
733    let padding_byte = match padding {
734        0 => 0,
735        1 => 1,
736        2 => 2,
737        _ => return ([0; 3], 0xff, 0xff, 0),
738    };
739    let no_padding = ct_mask_eq_u8(padding_byte, 0);
740    let one_padding = ct_mask_eq_u8(padding_byte, 1);
741    let two_padding = ct_mask_eq_u8(padding_byte, 2);
742    let require_v2 = no_padding | one_padding;
743    let require_v3 = no_padding;
744
745    let invalid_byte = !valid0 | !valid1 | (!valid2 & require_v2) | (!valid3 & require_v3);
746    let invalid_padding = (ct_mask_nonzero_u8(v1 & 0b0000_1111) & two_padding)
747        | ((ct_mask_eq_u8(b2, b'=') | ct_mask_nonzero_u8(v2 & 0b0000_0011)) & one_padding)
748        | ((ct_mask_eq_u8(b2, b'=') | ct_mask_eq_u8(b3, b'=')) & no_padding);
749
750    (
751        [(v0 << 2) | (v1 >> 4), (v1 << 4) | (v2 >> 2), (v2 << 6) | v3],
752        invalid_byte,
753        invalid_padding,
754        3 - padding,
755    )
756}
757
758fn ct_decode_padded<A: Alphabet>(input: &[u8], output: &mut [u8]) -> Result<usize, DecodeError> {
759    if !input.len().is_multiple_of(4) {
760        return Err(DecodeError::InvalidLength);
761    }
762
763    let padding = ct_padding_len(input);
764    let required = input.len() / 4 * 3 - padding;
765    if output.len() < required {
766        return Err(DecodeError::OutputTooSmall {
767            required,
768            available: output.len(),
769        });
770    }
771
772    let mut invalid_byte = 0u8;
773    let mut invalid_padding = 0u8;
774    let mut write = 0;
775    let mut read = 0;
776
777    while read + 4 < input.len() {
778        let [b0, b1, b2, b3] =
779            read_quad_or_mark_invalid(input, read, &mut invalid_byte, &mut invalid_padding);
780        let (v0, valid0) = ct_decode_alphabet_byte::<A>(b0);
781        let (v1, valid1) = ct_decode_alphabet_byte::<A>(b1);
782        let (v2, valid2) = ct_decode_alphabet_byte::<A>(b2);
783        let (v3, valid3) = ct_decode_alphabet_byte::<A>(b3);
784
785        invalid_byte |= !valid0;
786        invalid_byte |= !valid1;
787        invalid_byte |= !valid2;
788        invalid_byte |= !valid3;
789        invalid_padding |= ct_mask_eq_u8(b2, b'=');
790        invalid_padding |= ct_mask_eq_u8(b3, b'=');
791        output[write] = (v0 << 2) | (v1 >> 4);
792        output[write + 1] = (v1 << 4) | (v2 >> 2);
793        output[write + 2] = (v2 << 6) | v3;
794        write += 3;
795        read += 4;
796    }
797
798    let final_chunk =
799        read_quad_or_mark_invalid(input, read, &mut invalid_byte, &mut invalid_padding);
800    let (final_bytes, final_invalid_byte, final_invalid_padding, final_written) =
801        ct_padded_final_quantum::<A>(final_chunk, padding);
802    invalid_byte |= final_invalid_byte;
803    invalid_padding |= final_invalid_padding;
804    output[write..write + final_written].copy_from_slice(&final_bytes[..final_written]);
805    write += final_written;
806
807    report_ct_error(invalid_byte, invalid_padding)?;
808    Ok(write)
809}
810
811fn ct_decode_padded_in_place<A: Alphabet>(buffer: &mut [u8]) -> Result<usize, DecodeError> {
812    if !buffer.len().is_multiple_of(4) {
813        return Err(DecodeError::InvalidLength);
814    }
815
816    let padding = ct_padding_len(buffer);
817    let required = buffer.len() / 4 * 3 - padding;
818    if required > buffer.len() {
819        wipe_bytes(buffer);
820        return Err(DecodeError::InvalidInput);
821    }
822
823    let mut invalid_byte = 0u8;
824    let mut invalid_padding = 0u8;
825    let mut write = 0;
826    let mut read = 0;
827
828    while read + 4 < buffer.len() {
829        let [b0, b1, b2, b3] =
830            read_quad_or_mark_invalid(buffer, read, &mut invalid_byte, &mut invalid_padding);
831        let (v0, valid0) = ct_decode_alphabet_byte::<A>(b0);
832        let (v1, valid1) = ct_decode_alphabet_byte::<A>(b1);
833        let (v2, valid2) = ct_decode_alphabet_byte::<A>(b2);
834        let (v3, valid3) = ct_decode_alphabet_byte::<A>(b3);
835
836        invalid_byte |= !valid0;
837        invalid_byte |= !valid1;
838        invalid_byte |= !valid2;
839        invalid_byte |= !valid3;
840        invalid_padding |= ct_mask_eq_u8(b2, b'=');
841        invalid_padding |= ct_mask_eq_u8(b3, b'=');
842        buffer[write] = (v0 << 2) | (v1 >> 4);
843        buffer[write + 1] = (v1 << 4) | (v2 >> 2);
844        buffer[write + 2] = (v2 << 6) | v3;
845        write += 3;
846        read += 4;
847    }
848
849    let final_chunk =
850        read_quad_or_mark_invalid(buffer, read, &mut invalid_byte, &mut invalid_padding);
851    let (final_bytes, final_invalid_byte, final_invalid_padding, final_written) =
852        ct_padded_final_quantum::<A>(final_chunk, padding);
853    invalid_byte |= final_invalid_byte;
854    invalid_padding |= final_invalid_padding;
855    buffer[write..write + final_written].copy_from_slice(&final_bytes[..final_written]);
856    write += final_written;
857
858    if write != required {
859        ct_error_gate_barrier(invalid_byte, invalid_padding);
860        wipe_bytes(buffer);
861        return Err(DecodeError::InvalidInput);
862    }
863    if let Err(err) = report_ct_error(invalid_byte, invalid_padding) {
864        wipe_bytes(buffer);
865        return Err(err);
866    }
867    Ok(write)
868}
869
870fn ct_decode_unpadded<A: Alphabet>(input: &[u8], output: &mut [u8]) -> Result<usize, DecodeError> {
871    if input.len() % 4 == 1 {
872        return Err(DecodeError::InvalidLength);
873    }
874
875    let required = decoded_capacity(input.len());
876    if output.len() < required {
877        return Err(DecodeError::OutputTooSmall {
878            required,
879            available: output.len(),
880        });
881    }
882
883    let mut invalid_byte = 0u8;
884    let mut invalid_padding = 0u8;
885    let mut write = 0;
886    let mut read = 0;
887
888    while read + 4 <= input.len() {
889        let [b0, b1, b2, b3] =
890            read_quad_or_mark_invalid(input, read, &mut invalid_byte, &mut invalid_padding);
891        let (v0, valid0) = ct_decode_alphabet_byte::<A>(b0);
892        let (v1, valid1) = ct_decode_alphabet_byte::<A>(b1);
893        let (v2, valid2) = ct_decode_alphabet_byte::<A>(b2);
894        let (v3, valid3) = ct_decode_alphabet_byte::<A>(b3);
895
896        invalid_byte |= !valid0;
897        invalid_byte |= !valid1;
898        invalid_byte |= !valid2;
899        invalid_byte |= !valid3;
900        invalid_padding |= ct_mask_eq_u8(b0, b'=');
901        invalid_padding |= ct_mask_eq_u8(b1, b'=');
902        invalid_padding |= ct_mask_eq_u8(b2, b'=');
903        invalid_padding |= ct_mask_eq_u8(b3, b'=');
904
905        output[write] = (v0 << 2) | (v1 >> 4);
906        output[write + 1] = (v1 << 4) | (v2 >> 2);
907        output[write + 2] = (v2 << 6) | v3;
908        read += 4;
909        write += 3;
910    }
911
912    match read_tail_or_mark_invalid(input, read, &mut invalid_byte, &mut invalid_padding) {
913        [] => {}
914        [b0, b1] => {
915            let (v0, valid0) = ct_decode_alphabet_byte::<A>(*b0);
916            let (v1, valid1) = ct_decode_alphabet_byte::<A>(*b1);
917            invalid_byte |= !valid0;
918            invalid_byte |= !valid1;
919            invalid_padding |= ct_mask_eq_u8(*b0, b'=');
920            invalid_padding |= ct_mask_eq_u8(*b1, b'=');
921            invalid_padding |= ct_mask_nonzero_u8(v1 & 0b0000_1111);
922            output[write] = (v0 << 2) | (v1 >> 4);
923            write += 1;
924        }
925        [b0, b1, b2] => {
926            let (v0, valid0) = ct_decode_alphabet_byte::<A>(*b0);
927            let (v1, valid1) = ct_decode_alphabet_byte::<A>(*b1);
928            let (v2, valid2) = ct_decode_alphabet_byte::<A>(*b2);
929            invalid_byte |= !valid0;
930            invalid_byte |= !valid1;
931            invalid_byte |= !valid2;
932            invalid_padding |= ct_mask_eq_u8(*b0, b'=');
933            invalid_padding |= ct_mask_eq_u8(*b1, b'=');
934            invalid_padding |= ct_mask_eq_u8(*b2, b'=');
935            invalid_padding |= ct_mask_nonzero_u8(v2 & 0b0000_0011);
936            output[write] = (v0 << 2) | (v1 >> 4);
937            output[write + 1] = (v1 << 4) | (v2 >> 2);
938            write += 2;
939        }
940        _ => {
941            invalid_byte = 0xff;
942            invalid_padding = 0xff;
943        }
944    }
945
946    report_ct_error(invalid_byte, invalid_padding)?;
947    Ok(write)
948}
949
950fn ct_decode_unpadded_in_place<A: Alphabet>(buffer: &mut [u8]) -> Result<usize, DecodeError> {
951    if buffer.len() % 4 == 1 {
952        return Err(DecodeError::InvalidLength);
953    }
954
955    let required = decoded_capacity(buffer.len());
956    if required > buffer.len() {
957        wipe_bytes(buffer);
958        return Err(DecodeError::InvalidInput);
959    }
960
961    let mut invalid_byte = 0u8;
962    let mut invalid_padding = 0u8;
963    let mut write = 0;
964    let mut read = 0;
965
966    while read + 4 <= buffer.len() {
967        let [b0, b1, b2, b3] =
968            read_quad_or_mark_invalid(buffer, read, &mut invalid_byte, &mut invalid_padding);
969        let (v0, valid0) = ct_decode_alphabet_byte::<A>(b0);
970        let (v1, valid1) = ct_decode_alphabet_byte::<A>(b1);
971        let (v2, valid2) = ct_decode_alphabet_byte::<A>(b2);
972        let (v3, valid3) = ct_decode_alphabet_byte::<A>(b3);
973
974        invalid_byte |= !valid0;
975        invalid_byte |= !valid1;
976        invalid_byte |= !valid2;
977        invalid_byte |= !valid3;
978        invalid_padding |= ct_mask_eq_u8(b0, b'=');
979        invalid_padding |= ct_mask_eq_u8(b1, b'=');
980        invalid_padding |= ct_mask_eq_u8(b2, b'=');
981        invalid_padding |= ct_mask_eq_u8(b3, b'=');
982
983        buffer[write] = (v0 << 2) | (v1 >> 4);
984        buffer[write + 1] = (v1 << 4) | (v2 >> 2);
985        buffer[write + 2] = (v2 << 6) | v3;
986        read += 4;
987        write += 3;
988    }
989
990    let tail = read_tail_or_mark_invalid(buffer, read, &mut invalid_byte, &mut invalid_padding);
991    match tail {
992        [] => {}
993        [b0, b1] => {
994            let (v0, valid0) = ct_decode_alphabet_byte::<A>(*b0);
995            let (v1, valid1) = ct_decode_alphabet_byte::<A>(*b1);
996            invalid_byte |= !valid0;
997            invalid_byte |= !valid1;
998            invalid_padding |= ct_mask_eq_u8(*b0, b'=');
999            invalid_padding |= ct_mask_eq_u8(*b1, b'=');
1000            invalid_padding |= ct_mask_nonzero_u8(v1 & 0b0000_1111);
1001            buffer[write] = (v0 << 2) | (v1 >> 4);
1002            write += 1;
1003        }
1004        [b0, b1, b2] => {
1005            let (v0, valid0) = ct_decode_alphabet_byte::<A>(*b0);
1006            let (v1, valid1) = ct_decode_alphabet_byte::<A>(*b1);
1007            let (v2, valid2) = ct_decode_alphabet_byte::<A>(*b2);
1008            invalid_byte |= !valid0;
1009            invalid_byte |= !valid1;
1010            invalid_byte |= !valid2;
1011            invalid_padding |= ct_mask_eq_u8(*b0, b'=');
1012            invalid_padding |= ct_mask_eq_u8(*b1, b'=');
1013            invalid_padding |= ct_mask_eq_u8(*b2, b'=');
1014            invalid_padding |= ct_mask_nonzero_u8(v2 & 0b0000_0011);
1015            buffer[write] = (v0 << 2) | (v1 >> 4);
1016            buffer[write + 1] = (v1 << 4) | (v2 >> 2);
1017            write += 2;
1018        }
1019        _ => {
1020            invalid_byte = 0xff;
1021            invalid_padding = 0xff;
1022        }
1023    }
1024
1025    if write != required {
1026        ct_error_gate_barrier(invalid_byte, invalid_padding);
1027        wipe_bytes(buffer);
1028        return Err(DecodeError::InvalidInput);
1029    }
1030    if let Err(err) = report_ct_error(invalid_byte, invalid_padding) {
1031        wipe_bytes(buffer);
1032        return Err(err);
1033    }
1034    Ok(write)
1035}
1036
1037fn read_tail(input: &[u8], offset: usize) -> Result<&[u8], DecodeError> {
1038    input.get(offset..).ok_or(DecodeError::InvalidLength)
1039}
1040
1041fn read_quad_or_mark_invalid(
1042    input: &[u8],
1043    offset: usize,
1044    invalid_byte: &mut u8,
1045    invalid_padding: &mut u8,
1046) -> [u8; 4] {
1047    if let Ok(quad) = read_quad(input, offset) {
1048        quad
1049    } else {
1050        debug_assert!(
1051            false,
1052            "read_quad failed inside length-validated constant-time decode loop"
1053        );
1054        *invalid_byte = 0xff;
1055        *invalid_padding = 0xff;
1056        [0; 4]
1057    }
1058}
1059
1060fn read_tail_or_mark_invalid<'a>(
1061    input: &'a [u8],
1062    offset: usize,
1063    invalid_byte: &mut u8,
1064    invalid_padding: &mut u8,
1065) -> &'a [u8] {
1066    if let Ok(tail) = read_tail(input, offset) {
1067        tail
1068    } else {
1069        debug_assert!(
1070            false,
1071            "read_tail failed inside length-validated constant-time decode loop"
1072        );
1073        *invalid_byte = 0xff;
1074        *invalid_padding = 0xff;
1075        &[]
1076    }
1077}
1078
1079#[inline(never)]
1080#[allow(unsafe_code)]
1081fn ct_decode_alphabet_byte<A: Alphabet>(byte: u8) -> (u8, u8) {
1082    let mut decoded = 0u8;
1083    let mut valid = 0u8;
1084    let mut candidate = 0u8;
1085
1086    while candidate < 64 {
1087        let matches = core::hint::black_box(ct_mask_eq_u8(
1088            core::hint::black_box(byte),
1089            core::hint::black_box(A::ENCODE[candidate as usize]),
1090        ));
1091        decoded = core::hint::black_box(
1092            core::hint::black_box(decoded) | core::hint::black_box(candidate & matches),
1093        );
1094        // SAFETY: `decoded` is an initialized local `u8`; the volatile read is
1095        // an optimizer barrier for the fixed 64-iteration alphabet scan and
1096        // does not access caller memory.
1097        decoded = unsafe { core::ptr::read_volatile(&raw const decoded) };
1098        valid =
1099            core::hint::black_box(core::hint::black_box(valid) | core::hint::black_box(matches));
1100        // SAFETY: `valid` is an initialized local `u8`; the volatile read is an
1101        // optimizer barrier for the fixed 64-iteration alphabet scan and does
1102        // not access caller memory.
1103        valid = unsafe { core::ptr::read_volatile(&raw const valid) };
1104        candidate += 1;
1105    }
1106
1107    (decoded, valid)
1108}
1109
1110fn ct_padding_len(input: &[u8]) -> usize {
1111    let Some((&last, before_last_prefix)) = input.split_last() else {
1112        return 0;
1113    };
1114    let Some(&before_last) = before_last_prefix.last() else {
1115        return 0;
1116    };
1117    usize::from(ct_mask_eq_u8(last, b'=') & 1) + usize::from(ct_mask_eq_u8(before_last, b'=') & 1)
1118}
1119
1120pub(crate) fn report_ct_error(invalid_byte: u8, invalid_padding: u8) -> Result<(), DecodeError> {
1121    ct_error_gate_barrier(invalid_byte, invalid_padding);
1122
1123    if (invalid_byte | invalid_padding) != 0 {
1124        Err(DecodeError::InvalidInput)
1125    } else {
1126        Ok(())
1127    }
1128}