Skip to main content

base64_ng/
ct.rs

1//! Constant-time-oriented scalar decoding APIs.
2//!
3//! This module is separate from the default decoder so callers can opt into a
4//! slower path with a narrower timing target. It avoids lookup tables indexed
5//! by secret input bytes while mapping Base64 symbols and reports malformed
6//! content through one opaque error. It is not documented as a formally
7//! verified cryptographic constant-time API.
8//!
9//! # Security
10//!
11//! Input length, decoded length, selected alphabet, and final success or
12//! failure remain public. The clear-tail methods wipe caller-owned output on
13//! error, but decoded bytes are written during the fixed-shape decode loop
14//! before final validation is reported. In shared-memory, enclave, or HSM-style
15//! threat models where another component can observe the output buffer during
16//! the call, prefer [`crate::ct::CtEngine::decode_slice_staged_clear_tail`]
17//! with a private staging buffer. In those deployments,
18//! [`crate::ct::CtEngine::decode_slice_clear_tail`] is not sufficient by
19//! itself because it wipes caller-owned output only after the internal decode
20//! loop reaches the final error gate. Treat
21//! [`crate::ct::CtEngine::decode_slice_staged_clear_tail`] as the default for
22//! shared-memory, enclave, HSM-adjacent, or multi-principal deployments;
23//! [`crate::ct::CtEngine::decode_slice_clear_tail`] is appropriate only when
24//! the output buffer is not observable during the call.
25//!
26//! # Platform Posture
27//!
28//! The CT result gate uses architecture-specific best-effort barriers where
29//! stable Rust exposes them. On `AArch64`, the emitted CSDB hint is reported as
30//! `hardware-speculation-barrier-unattested` because older cores may treat it
31//! as a no-op; deployments must attest the exact core behavior before relying
32//! on it for high assurance. On RISC-V, `fence rw, rw` is an ordering fence,
33//! not a Spectre-v1 speculation barrier, and the built-in high-assurance
34//! runtime policy intentionally rejects that posture. RISC-V deployments on
35//! speculative cores need platform-level mitigations and startup policy checks
36//! that make the gap explicit.
37//!
38//! The dependency-free comparison helpers on redacted buffers are
39//! constant-time-oriented best effort, not formally audited MAC or token
40//! comparison primitives. Applications that can admit dependencies and need a
41//! reviewed comparison primitive should use one at the protocol boundary.
42//!
43//! The CT decoder exposes only clear-tail and stack-backed decode APIs. The
44//! former non-clear-tail methods were removed before the `1.0` stable boundary
45//! because they could leave decoded plaintext in caller-owned buffers after
46//! malformed input errors.
47//!
48//! ```compile_fail
49//! use base64_ng::ct;
50//!
51//! let mut output = [0u8; 8];
52//! let _ = ct::STANDARD.decode_slice(b"aGk=", &mut output);
53//! ```
54//!
55//! ```compile_fail
56//! use base64_ng::ct;
57//!
58//! let mut buffer = *b"aGk=";
59//! let _ = ct::STANDARD.decode_in_place(&mut buffer);
60//! ```
61#[cfg(feature = "alloc")]
62use crate::SecretBuffer;
63use crate::{
64    Alphabet, DecodeError, DecodedBuffer, Standard, UrlSafe, decoded_capacity, read_quad,
65    wipe_bytes, wipe_tail,
66};
67use core::marker::PhantomData;
68
69/// Standard Base64 constant-time-oriented decoder with padding.
70pub const STANDARD: CtEngine<Standard, true> = CtEngine::new();
71
72/// Standard Base64 constant-time-oriented decoder without padding.
73pub const STANDARD_NO_PAD: CtEngine<Standard, false> = CtEngine::new();
74
75/// URL-safe Base64 constant-time-oriented decoder with padding.
76pub const URL_SAFE: CtEngine<UrlSafe, true> = CtEngine::new();
77
78/// URL-safe Base64 constant-time-oriented decoder without padding.
79pub const URL_SAFE_NO_PAD: CtEngine<UrlSafe, false> = CtEngine::new();
80
81/// A zero-sized constant-time-oriented Base64 decoder.
82///
83/// # Security
84///
85/// For ordinary secret-bearing inputs, prefer
86/// [`Self::decode_slice_clear_tail`], [`Self::decode_buffer`], or
87/// [`Self::decode_in_place_clear_tail`]. For shared-memory,
88/// enclave-adjacent, HSM-style, or multi-principal deployments where
89/// another component can observe caller-owned output during the call, use
90/// [`Self::decode_slice_staged_clear_tail`] with a private staging buffer
91/// so malformed input cannot transiently write decoded bytes into the
92/// public output buffer before the final error gate.
93#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
94pub struct CtEngine<A, const PAD: bool> {
95    alphabet: PhantomData<A>,
96}
97
98impl<A, const PAD: bool> CtEngine<A, PAD>
99where
100    A: Alphabet,
101{
102    /// Creates a new constant-time-oriented decoder engine.
103    #[must_use]
104    pub const fn new() -> Self {
105        Self {
106            alphabet: PhantomData,
107        }
108    }
109
110    /// Returns whether this constant-time-oriented decoder expects padded
111    /// input.
112    #[must_use]
113    pub const fn is_padded(&self) -> bool {
114        PAD
115    }
116
117    /// Validates `input` without writing decoded bytes.
118    ///
119    /// This uses the same constant-time-oriented symbol mapping and opaque
120    /// malformed-input error behavior as
121    /// [`Self::decode_slice_clear_tail`]. Input length, padding length, and
122    /// final success or failure remain public.
123    ///
124    /// # Examples
125    ///
126    /// ```
127    /// use base64_ng::ct;
128    ///
129    /// ct::STANDARD.validate_result(b"aGVsbG8=").unwrap();
130    /// assert!(ct::STANDARD.validate_result(b"aGVsbG8").is_err());
131    /// ```
132    pub fn validate_result(&self, input: &[u8]) -> Result<(), DecodeError> {
133        ct_validate_decode::<A, PAD>(input)
134    }
135
136    /// Returns whether `input` is valid for this constant-time-oriented
137    /// decoder.
138    ///
139    /// This is a convenience wrapper around [`Self::validate_result`].
140    ///
141    /// # Examples
142    ///
143    /// ```
144    /// use base64_ng::ct;
145    ///
146    /// assert!(ct::URL_SAFE_NO_PAD.validate(b"-_8"));
147    /// assert!(!ct::URL_SAFE_NO_PAD.validate(b"+/8"));
148    /// ```
149    #[must_use]
150    pub fn validate(&self, input: &[u8]) -> bool {
151        self.validate_result(input).is_ok()
152    }
153
154    /// Returns the exact decoded length for valid input.
155    ///
156    /// This uses the same constant-time-oriented validation policy as
157    /// [`Self::validate_result`] before returning a length. Input length,
158    /// padding length, and final success or failure remain public.
159    pub fn decoded_len(&self, input: &[u8]) -> Result<usize, DecodeError> {
160        ct_decoded_len::<A, PAD>(input)
161    }
162
163    /// Decodes `input` into `output` and clears all bytes after the
164    /// decoded prefix.
165    ///
166    /// If decoding fails, the entire output buffer is cleared before the
167    /// error is returned. Use this variant for sensitive payloads where
168    /// partially decoded bytes from rejected input should not remain in the
169    /// caller-owned output buffer.
170    ///
171    /// # Security: Transient Plaintext Window
172    ///
173    /// Decoded bytes are written to `output` progressively during the
174    /// fixed-shape decode loop before malformed-input detection is
175    /// complete. On error, the entire `output` is wiped before returning,
176    /// but a concurrent same-process observer with access to `output`
177    /// during the call may observe transient partial plaintext from valid
178    /// leading quanta. For shared-memory, enclave-adjacent, HSM-style, or
179    /// multi-principal deployments where even transient writes are
180    /// unacceptable, use [`Self::decode_slice_staged_clear_tail`] with a
181    /// private staging buffer.
182    ///
183    /// # Examples
184    ///
185    /// ```
186    /// use base64_ng::ct;
187    ///
188    /// let mut output = [0xff; 8];
189    /// let written = ct::STANDARD
190    ///     .decode_slice_clear_tail(b"aGk=", &mut output)
191    ///     .unwrap();
192    ///
193    /// assert_eq!(&output[..written], b"hi");
194    /// assert!(output[written..].iter().all(|byte| *byte == 0));
195    /// ```
196    #[must_use = "handle decode errors; use decode_slice_staged_clear_tail for shared-memory or HSM-style threat models"]
197    pub fn decode_slice_clear_tail(
198        &self,
199        input: &[u8],
200        output: &mut [u8],
201    ) -> Result<usize, DecodeError> {
202        let written = match ct_decode_slice::<A, PAD>(input, output) {
203            Ok(written) => written,
204            Err(err) => {
205                crate::wipe_bytes(output);
206                return Err(err);
207            }
208        };
209        crate::wipe_tail(output, written);
210        Ok(written)
211    }
212
213    /// Decodes through caller-provided private staging before copying into
214    /// `output`.
215    ///
216    /// This variant is for shared-memory or sandboxed deployments where
217    /// the caller-owned `output` buffer must not contain transient decoded
218    /// bytes from malformed input. The `staging` buffer must be at least
219    /// the decoded length of `input` and must not be shared with
220    /// untrusted concurrent observers. On success, decoded bytes are
221    /// copied from `staging` into `output`; on error, both buffers are
222    /// cleared before returning.
223    ///
224    /// Input length, final success or failure, and decoded length remain
225    /// public.
226    #[must_use = "handle decode errors; staged decode is for shared-memory or HSM-style threat models"]
227    pub fn decode_slice_staged_clear_tail(
228        &self,
229        input: &[u8],
230        output: &mut [u8],
231        staging: &mut [u8],
232    ) -> Result<usize, DecodeError> {
233        ct_decode_slice_staged_clear_tail::<A, PAD>(input, output, staging)
234    }
235
236    /// Decodes `input` into a stack-backed buffer.
237    ///
238    /// This uses the same constant-time-oriented scalar decoder as
239    /// [`Self::decode_slice_clear_tail`] and clears the internal backing
240    /// array before returning an error.
241    ///
242    /// # Examples
243    ///
244    /// ```
245    /// use base64_ng::ct;
246    ///
247    /// let decoded = ct::STANDARD.decode_buffer::<5>(b"aGVsbG8=").unwrap();
248    ///
249    /// assert_eq!(decoded.as_bytes(), b"hello");
250    /// ```
251    pub fn decode_buffer<const CAP: usize>(
252        &self,
253        input: &[u8],
254    ) -> Result<DecodedBuffer<CAP>, DecodeError> {
255        let mut output = DecodedBuffer::new();
256        let written = match self.decode_slice_clear_tail(input, output.as_mut_capacity()) {
257            Ok(written) => written,
258            Err(err) => {
259                output.clear();
260                return Err(err);
261            }
262        };
263        output.set_filled(written)?;
264        Ok(output)
265    }
266
267    /// Decodes `input` into an owned byte vector.
268    ///
269    /// This uses the same constant-time-oriented scalar decoder as
270    /// [`Self::decode_slice_clear_tail`]. If decoding fails, the allocated
271    /// output buffer is cleared before the error is returned.
272    ///
273    /// Use [`Self::decode_secret`] for secret-bearing payloads that should stay
274    /// on the crate's redacted, drop-wiping buffer path. Use
275    /// [`Self::decode_secret_staged`] for shared-memory, enclave-adjacent,
276    /// HSM-style, or multi-principal deployments where even transient writes
277    /// into the final heap allocation are unacceptable.
278    #[cfg(feature = "alloc")]
279    #[must_use = "for secret-bearing payloads use decode_secret, which returns a redacted buffer with drop-time cleanup"]
280    pub fn decode_vec(&self, input: &[u8]) -> Result<alloc::vec::Vec<u8>, DecodeError> {
281        let required = self.decoded_len(input)?;
282        let mut output = alloc::vec![0; required];
283        // decode_slice_clear_tail wipes output on error.
284        let written = self.decode_slice_clear_tail(input, &mut output)?;
285        output.truncate(written);
286        Ok(output)
287    }
288
289    /// Decodes `input` into a redacted owned secret buffer.
290    ///
291    /// This is the recommended heap-owning CT decode path for secret-bearing
292    /// payloads. It decodes with [`Self::decode_vec`] and then wraps the result
293    /// in [`SecretBuffer`], which redacts formatting and clears initialized
294    /// bytes plus spare vector capacity on drop.
295    ///
296    /// # Security: Transient Plaintext Window
297    ///
298    /// This function uses the non-staged CT decode path. Decoded bytes are
299    /// written transiently into the heap allocation before the final error
300    /// gate. On error, the allocation is wiped before returning, but a
301    /// concurrent same-process observer with access to that allocation during
302    /// the call may observe transient partial plaintext. For shared-memory,
303    /// enclave-adjacent, HSM-style, or multi-principal deployments where even
304    /// transient writes into the final heap allocation are unacceptable, use
305    /// [`Self::decode_secret_staged`] with a stack-backed private staging
306    /// capacity large enough for the decoded value.
307    ///
308    /// # Examples
309    ///
310    /// ```
311    /// use base64_ng::ct;
312    ///
313    /// let decoded = ct::STANDARD.decode_secret(b"aGVsbG8=").unwrap();
314    /// assert!(decoded.constant_time_eq_public_len(b"hello"));
315    /// ```
316    #[cfg(feature = "alloc")]
317    pub fn decode_secret(&self, input: &[u8]) -> Result<SecretBuffer, DecodeError> {
318        self.decode_vec(input).map(SecretBuffer::from_vec)
319    }
320
321    /// Decodes `input` into a redacted owned secret buffer through private
322    /// stack staging.
323    ///
324    /// `STAGE` must be at least the decoded length of `input`. Decoded bytes
325    /// are written to a stack-backed staging buffer first and copied into the
326    /// returned heap buffer only after the full constant-time-oriented decode
327    /// succeeds. On error, both staging and heap output buffers are wiped before
328    /// returning.
329    ///
330    /// This is the preferred owned decode API for shared-memory,
331    /// enclave-adjacent, HSM-style, or multi-principal deployments where the
332    /// final heap allocation must not contain transient partial plaintext from
333    /// rejected input.
334    ///
335    /// # Errors
336    ///
337    /// Returns [`DecodeError::StagingTooSmall`] if `STAGE` is smaller than the
338    /// decoded length of `input`. `STAGE` is checked at runtime because the
339    /// encoded input length is not a compile-time value.
340    ///
341    /// # Examples
342    ///
343    /// ```
344    /// use base64_ng::ct;
345    ///
346    /// let decoded = ct::STANDARD
347    ///     .decode_secret_staged::<5>(b"aGVsbG8=")
348    ///     .unwrap();
349    /// assert!(decoded.constant_time_eq_public_len(b"hello"));
350    /// ```
351    #[cfg(feature = "alloc")]
352    pub fn decode_secret_staged<const STAGE: usize>(
353        &self,
354        input: &[u8],
355    ) -> Result<SecretBuffer, DecodeError> {
356        let required = self.decoded_len(input)?;
357        let mut staging = DecodedBuffer::<STAGE>::new();
358        let mut output = alloc::vec![0; required];
359        let written =
360            self.decode_slice_staged_clear_tail(input, &mut output, staging.as_mut_capacity())?;
361        output.truncate(written);
362        Ok(SecretBuffer::from_vec(output))
363    }
364
365    /// Decodes `buffer` in place and clears all bytes after the decoded
366    /// prefix.
367    ///
368    /// If decoding fails, the entire buffer is cleared before the error is
369    /// returned.
370    ///
371    /// # Security: Transient Plaintext Window
372    ///
373    /// This in-place API writes decoded bytes into `buffer` during the
374    /// fixed-shape decode loop before malformed-input detection is
375    /// complete. On error, the entire buffer is wiped before returning,
376    /// but concurrent same-process observers with access to the same memory
377    /// can observe transient partial plaintext. Use
378    /// [`Self::decode_slice_staged_clear_tail`] with a private staging
379    /// buffer when shared-memory or enclave-adjacent deployments cannot
380    /// tolerate that window.
381    ///
382    /// # Examples
383    ///
384    /// ```
385    /// use base64_ng::ct;
386    ///
387    /// let mut buffer = *b"aGk=";
388    /// let decoded = ct::STANDARD.decode_in_place_clear_tail(&mut buffer).unwrap();
389    ///
390    /// assert_eq!(decoded, b"hi");
391    /// ```
392    pub fn decode_in_place_clear_tail<'a>(
393        &self,
394        buffer: &'a mut [u8],
395    ) -> Result<&'a mut [u8], DecodeError> {
396        let len = match ct_decode_in_place::<A, PAD>(buffer) {
397            Ok(len) => len,
398            Err(err) => {
399                crate::wipe_bytes(buffer);
400                return Err(err);
401            }
402        };
403        crate::wipe_tail(buffer, len);
404        Ok(&mut buffer[..len])
405    }
406}
407
408impl<A, const PAD: bool> core::fmt::Display for CtEngine<A, PAD> {
409    fn fmt(&self, formatter: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
410        write!(formatter, "ct padded={PAD}")
411    }
412}
413
414#[inline]
415pub(crate) const fn ct_mask_bit(bit: u8) -> u8 {
416    0u8.wrapping_sub(bit & 1)
417}
418
419#[inline]
420pub(crate) const fn ct_mask_nonzero_u8(value: u8) -> u8 {
421    let wide = value as u16;
422    let negative = 0u16.wrapping_sub(wide);
423    let nonzero = ((wide | negative) >> 8) as u8;
424    ct_mask_bit(nonzero)
425}
426
427#[inline]
428pub(crate) const fn ct_mask_eq_u8(left: u8, right: u8) -> u8 {
429    !ct_mask_nonzero_u8(left ^ right)
430}
431
432#[inline]
433pub(crate) const fn ct_mask_lt_u8(left: u8, right: u8) -> u8 {
434    let diff = (left as u16).wrapping_sub(right as u16);
435    ct_mask_bit((diff >> 8) as u8)
436}
437
438#[inline(never)]
439pub(crate) fn constant_time_eq_public_len(left: &[u8], right: &[u8]) -> bool {
440    if left.len() != right.len() {
441        return false;
442    }
443
444    constant_time_eq_same_len(left, right)
445}
446
447#[inline(never)]
448pub(crate) fn constant_time_eq_fixed_width_array<const N: usize>(
449    left: &[u8; N],
450    right: &[u8; N],
451) -> bool {
452    constant_time_eq_same_len(left, right)
453}
454
455#[inline(never)]
456#[allow(unsafe_code)]
457fn constant_time_eq_same_len(left: &[u8], right: &[u8]) -> bool {
458    let mut diff = 0u8;
459    for (left, right) in left.iter().zip(right) {
460        diff = ct_accumulate_u8(diff, *left ^ *right);
461    }
462    ct_error_gate_barrier(diff, 0);
463    // SAFETY: `diff` is an initialized local `u8`; this final volatile read
464    // keeps the public equality comparison dependent on a post-barrier load of
465    // the accumulated value.
466    let result = unsafe { core::ptr::read_volatile(&raw const diff) };
467    result == 0
468}
469
470#[inline(never)]
471#[allow(unsafe_code)]
472fn ct_accumulate_u8(accumulator: u8, value: u8) -> u8 {
473    let result = core::hint::black_box(accumulator) | core::hint::black_box(value);
474    // SAFETY: `result` is an initialized local `u8`; the volatile read is a
475    // dependency-free optimizer barrier for the accumulation value and does not
476    // access caller memory.
477    unsafe { core::ptr::read_volatile(&raw const result) }
478}
479
480fn ct_decode_slice<A: Alphabet, const PAD: bool>(
481    input: &[u8],
482    output: &mut [u8],
483) -> Result<usize, DecodeError> {
484    if input.is_empty() {
485        return Ok(0);
486    }
487
488    if PAD {
489        ct_decode_padded::<A>(input, output)
490    } else {
491        ct_decode_unpadded::<A>(input, output)
492    }
493}
494
495fn ct_decode_slice_staged_clear_tail<A: Alphabet, const PAD: bool>(
496    input: &[u8],
497    output: &mut [u8],
498    staging: &mut [u8],
499) -> Result<usize, DecodeError> {
500    let required = match ct_decoded_len::<A, PAD>(input) {
501        Ok(required) => required,
502        Err(err) => {
503            wipe_bytes(output);
504            wipe_bytes(staging);
505            return Err(err);
506        }
507    };
508
509    if output.len() < required {
510        wipe_bytes(output);
511        wipe_bytes(staging);
512        return Err(DecodeError::OutputTooSmall {
513            required,
514            available: output.len(),
515        });
516    }
517
518    if staging.len() < required {
519        wipe_bytes(output);
520        wipe_bytes(staging);
521        return Err(DecodeError::StagingTooSmall {
522            required,
523            available: staging.len(),
524        });
525    }
526
527    let written = match ct_decode_slice::<A, PAD>(input, &mut staging[..required]) {
528        Ok(written) => written,
529        Err(err) => {
530            wipe_bytes(output);
531            wipe_bytes(staging);
532            return Err(err);
533        }
534    };
535
536    output[..written].copy_from_slice(&staging[..written]);
537    wipe_bytes(staging);
538    wipe_tail(output, written);
539    Ok(written)
540}
541
542fn ct_decode_in_place<A: Alphabet, const PAD: bool>(
543    buffer: &mut [u8],
544) -> Result<usize, DecodeError> {
545    if buffer.is_empty() {
546        return Ok(0);
547    }
548
549    if PAD {
550        ct_decode_padded_in_place::<A>(buffer)
551    } else {
552        ct_decode_unpadded_in_place::<A>(buffer)
553    }
554}
555
556#[inline(never)]
557#[allow(unsafe_code)]
558fn ct_error_gate_barrier(invalid_byte: u8, invalid_padding: u8) {
559    core::hint::black_box(invalid_byte | invalid_padding);
560    core::sync::atomic::compiler_fence(core::sync::atomic::Ordering::SeqCst);
561
562    #[cfg(all(not(miri), not(kani), any(target_arch = "x86", target_arch = "x86_64")))]
563    {
564        // SAFETY: `lfence` does not access memory and is used as a speculation
565        // barrier before the public success/failure branch is observed.
566        unsafe {
567            core::arch::asm!("lfence", options(nostack, preserves_flags, nomem));
568        }
569    }
570
571    #[cfg(all(not(miri), not(kani), target_arch = "aarch64"))]
572    {
573        // Older cores may treat CSDB as a no-op; runtime reporting marks this
574        // as unattested until the deployment provides platform evidence.
575        // SAFETY: these barriers do not access memory.
576        unsafe {
577            core::arch::asm!("isb sy", "hint #20", options(nostack, preserves_flags));
578        }
579    }
580
581    #[cfg(all(not(miri), not(kani), target_arch = "arm"))]
582    {
583        // SAFETY: `isb sy` does not access memory and is used as the best
584        // available stable ARM speculation boundary for this crate.
585        unsafe {
586            core::arch::asm!("isb sy", options(nostack, preserves_flags));
587        }
588    }
589
590    #[cfg(all(
591        not(miri),
592        not(kani),
593        any(target_arch = "riscv32", target_arch = "riscv64")
594    ))]
595    {
596        // RISC-V base ISA does not provide a canonical speculation barrier.
597        // `fence rw, rw` is the available ordering primitive for the CT public
598        // result gate and is reported separately as `ordering-fence`; callers
599        // on speculative RISC-V cores must use platform mitigations because
600        // this does not satisfy `BackendPolicy::HighAssuranceScalarOnly`.
601        // SAFETY: the assembly block does not access memory.
602        unsafe {
603            core::arch::asm!("fence rw, rw", options(nostack, preserves_flags));
604        }
605    }
606}
607
608fn ct_validate_decode<A: Alphabet, const PAD: bool>(input: &[u8]) -> Result<(), DecodeError> {
609    if input.is_empty() {
610        return Ok(());
611    }
612
613    if PAD {
614        ct_validate_padded::<A>(input)
615    } else {
616        ct_validate_unpadded::<A>(input)
617    }
618}
619
620fn ct_decoded_len<A: Alphabet, const PAD: bool>(input: &[u8]) -> Result<usize, DecodeError> {
621    ct_validate_decode::<A, PAD>(input)?;
622    if input.is_empty() {
623        return Ok(0);
624    }
625
626    if PAD {
627        Ok(input.len() / 4 * 3 - ct_padding_len(input))
628    } else {
629        let full_quads = input.len() / 4 * 3;
630        match input.len() % 4 {
631            0 => Ok(full_quads),
632            2 => Ok(full_quads + 1),
633            3 => Ok(full_quads + 2),
634            _ => Err(DecodeError::InvalidLength),
635        }
636    }
637}
638
639fn ct_validate_padded<A: Alphabet>(input: &[u8]) -> Result<(), DecodeError> {
640    if !input.len().is_multiple_of(4) {
641        return Err(DecodeError::InvalidLength);
642    }
643
644    let padding = ct_padding_len(input);
645    let mut invalid_byte = 0u8;
646    let mut invalid_padding = 0u8;
647    let mut read = 0;
648
649    while read + 4 < input.len() {
650        let [b0, b1, b2, b3] =
651            read_quad_or_mark_invalid(input, read, &mut invalid_byte, &mut invalid_padding);
652        let (_, valid0) = ct_decode_alphabet_byte::<A>(b0);
653        let (_, valid1) = ct_decode_alphabet_byte::<A>(b1);
654        let (_, valid2) = ct_decode_alphabet_byte::<A>(b2);
655        let (_, valid3) = ct_decode_alphabet_byte::<A>(b3);
656
657        invalid_byte |= !valid0;
658        invalid_byte |= !valid1;
659        invalid_byte |= !valid2;
660        invalid_byte |= !valid3;
661        invalid_padding |= ct_mask_eq_u8(b2, b'=');
662        invalid_padding |= ct_mask_eq_u8(b3, b'=');
663        read += 4;
664    }
665
666    let final_chunk =
667        read_quad_or_mark_invalid(input, read, &mut invalid_byte, &mut invalid_padding);
668    let (_, final_invalid_byte, final_invalid_padding, _) =
669        ct_padded_final_quantum::<A>(final_chunk, padding);
670    invalid_byte |= final_invalid_byte;
671    invalid_padding |= final_invalid_padding;
672
673    report_ct_error(invalid_byte, invalid_padding)
674}
675
676fn ct_validate_unpadded<A: Alphabet>(input: &[u8]) -> Result<(), DecodeError> {
677    if input.len() % 4 == 1 {
678        return Err(DecodeError::InvalidLength);
679    }
680
681    let mut invalid_byte = 0u8;
682    let mut invalid_padding = 0u8;
683    let mut read = 0;
684
685    while read + 4 <= input.len() {
686        let [b0, b1, b2, b3] =
687            read_quad_or_mark_invalid(input, read, &mut invalid_byte, &mut invalid_padding);
688        let (_, valid0) = ct_decode_alphabet_byte::<A>(b0);
689        let (_, valid1) = ct_decode_alphabet_byte::<A>(b1);
690        let (_, valid2) = ct_decode_alphabet_byte::<A>(b2);
691        let (_, valid3) = ct_decode_alphabet_byte::<A>(b3);
692
693        invalid_byte |= !valid0;
694        invalid_byte |= !valid1;
695        invalid_byte |= !valid2;
696        invalid_byte |= !valid3;
697        invalid_padding |= ct_mask_eq_u8(b0, b'=');
698        invalid_padding |= ct_mask_eq_u8(b1, b'=');
699        invalid_padding |= ct_mask_eq_u8(b2, b'=');
700        invalid_padding |= ct_mask_eq_u8(b3, b'=');
701
702        read += 4;
703    }
704
705    match read_tail_or_mark_invalid(input, read, &mut invalid_byte, &mut invalid_padding) {
706        [] => {}
707        [b0, b1] => {
708            let (_, valid0) = ct_decode_alphabet_byte::<A>(*b0);
709            let (v1, valid1) = ct_decode_alphabet_byte::<A>(*b1);
710            invalid_byte |= !valid0;
711            invalid_byte |= !valid1;
712            invalid_padding |= ct_mask_eq_u8(*b0, b'=');
713            invalid_padding |= ct_mask_eq_u8(*b1, b'=');
714            invalid_padding |= ct_mask_nonzero_u8(v1 & 0b0000_1111);
715        }
716        [b0, b1, b2] => {
717            let (_, valid0) = ct_decode_alphabet_byte::<A>(*b0);
718            let (_, valid1) = ct_decode_alphabet_byte::<A>(*b1);
719            let (v2, valid2) = ct_decode_alphabet_byte::<A>(*b2);
720            invalid_byte |= !valid0;
721            invalid_byte |= !valid1;
722            invalid_byte |= !valid2;
723            invalid_padding |= ct_mask_eq_u8(*b0, b'=');
724            invalid_padding |= ct_mask_eq_u8(*b1, b'=');
725            invalid_padding |= ct_mask_eq_u8(*b2, b'=');
726            invalid_padding |= ct_mask_nonzero_u8(v2 & 0b0000_0011);
727        }
728        _ => {
729            invalid_byte = 0xff;
730            invalid_padding = 0xff;
731        }
732    }
733
734    report_ct_error(invalid_byte, invalid_padding)
735}
736
737pub(crate) fn ct_padded_final_quantum<A: Alphabet>(
738    input: [u8; 4],
739    padding: usize,
740) -> ([u8; 3], u8, u8, usize) {
741    let [b0, b1, b2, b3] = input;
742    let (v0, valid0) = ct_decode_alphabet_byte::<A>(b0);
743    let (v1, valid1) = ct_decode_alphabet_byte::<A>(b1);
744    let (v2, valid2) = ct_decode_alphabet_byte::<A>(b2);
745    let (v3, valid3) = ct_decode_alphabet_byte::<A>(b3);
746
747    let padding_byte = match padding {
748        0 => 0,
749        1 => 1,
750        2 => 2,
751        _ => return ([0; 3], 0xff, 0xff, 0),
752    };
753    let no_padding = ct_mask_eq_u8(padding_byte, 0);
754    let one_padding = ct_mask_eq_u8(padding_byte, 1);
755    let two_padding = ct_mask_eq_u8(padding_byte, 2);
756    let require_v2 = no_padding | one_padding;
757    let require_v3 = no_padding;
758
759    let invalid_byte = !valid0 | !valid1 | (!valid2 & require_v2) | (!valid3 & require_v3);
760    let invalid_padding = (ct_mask_nonzero_u8(v1 & 0b0000_1111) & two_padding)
761        | ((ct_mask_eq_u8(b2, b'=') | ct_mask_nonzero_u8(v2 & 0b0000_0011)) & one_padding)
762        | ((ct_mask_eq_u8(b2, b'=') | ct_mask_eq_u8(b3, b'=')) & no_padding);
763
764    (
765        [(v0 << 2) | (v1 >> 4), (v1 << 4) | (v2 >> 2), (v2 << 6) | v3],
766        invalid_byte,
767        invalid_padding,
768        3 - padding,
769    )
770}
771
772fn ct_decode_padded<A: Alphabet>(input: &[u8], output: &mut [u8]) -> Result<usize, DecodeError> {
773    if !input.len().is_multiple_of(4) {
774        return Err(DecodeError::InvalidLength);
775    }
776
777    let padding = ct_padding_len(input);
778    let required = input.len() / 4 * 3 - padding;
779    if output.len() < required {
780        return Err(DecodeError::OutputTooSmall {
781            required,
782            available: output.len(),
783        });
784    }
785
786    let mut invalid_byte = 0u8;
787    let mut invalid_padding = 0u8;
788    let mut write = 0;
789    let mut read = 0;
790
791    while read + 4 < input.len() {
792        let [b0, b1, b2, b3] =
793            read_quad_or_mark_invalid(input, read, &mut invalid_byte, &mut invalid_padding);
794        let (v0, valid0) = ct_decode_alphabet_byte::<A>(b0);
795        let (v1, valid1) = ct_decode_alphabet_byte::<A>(b1);
796        let (v2, valid2) = ct_decode_alphabet_byte::<A>(b2);
797        let (v3, valid3) = ct_decode_alphabet_byte::<A>(b3);
798
799        invalid_byte |= !valid0;
800        invalid_byte |= !valid1;
801        invalid_byte |= !valid2;
802        invalid_byte |= !valid3;
803        invalid_padding |= ct_mask_eq_u8(b2, b'=');
804        invalid_padding |= ct_mask_eq_u8(b3, b'=');
805        output[write] = (v0 << 2) | (v1 >> 4);
806        output[write + 1] = (v1 << 4) | (v2 >> 2);
807        output[write + 2] = (v2 << 6) | v3;
808        write += 3;
809        read += 4;
810    }
811
812    let final_chunk =
813        read_quad_or_mark_invalid(input, read, &mut invalid_byte, &mut invalid_padding);
814    let (final_bytes, final_invalid_byte, final_invalid_padding, final_written) =
815        ct_padded_final_quantum::<A>(final_chunk, padding);
816    invalid_byte |= final_invalid_byte;
817    invalid_padding |= final_invalid_padding;
818    output[write..write + final_written].copy_from_slice(&final_bytes[..final_written]);
819    write += final_written;
820
821    report_ct_error(invalid_byte, invalid_padding)?;
822    Ok(write)
823}
824
825fn ct_decode_padded_in_place<A: Alphabet>(buffer: &mut [u8]) -> Result<usize, DecodeError> {
826    if !buffer.len().is_multiple_of(4) {
827        return Err(DecodeError::InvalidLength);
828    }
829
830    let padding = ct_padding_len(buffer);
831    let required = buffer.len() / 4 * 3 - padding;
832    if required > buffer.len() {
833        wipe_bytes(buffer);
834        return Err(DecodeError::InvalidInput);
835    }
836
837    let mut invalid_byte = 0u8;
838    let mut invalid_padding = 0u8;
839    let mut write = 0;
840    let mut read = 0;
841
842    while read + 4 < buffer.len() {
843        let [b0, b1, b2, b3] =
844            read_quad_or_mark_invalid(buffer, read, &mut invalid_byte, &mut invalid_padding);
845        let (v0, valid0) = ct_decode_alphabet_byte::<A>(b0);
846        let (v1, valid1) = ct_decode_alphabet_byte::<A>(b1);
847        let (v2, valid2) = ct_decode_alphabet_byte::<A>(b2);
848        let (v3, valid3) = ct_decode_alphabet_byte::<A>(b3);
849
850        invalid_byte |= !valid0;
851        invalid_byte |= !valid1;
852        invalid_byte |= !valid2;
853        invalid_byte |= !valid3;
854        invalid_padding |= ct_mask_eq_u8(b2, b'=');
855        invalid_padding |= ct_mask_eq_u8(b3, b'=');
856        buffer[write] = (v0 << 2) | (v1 >> 4);
857        buffer[write + 1] = (v1 << 4) | (v2 >> 2);
858        buffer[write + 2] = (v2 << 6) | v3;
859        write += 3;
860        read += 4;
861    }
862
863    let final_chunk =
864        read_quad_or_mark_invalid(buffer, read, &mut invalid_byte, &mut invalid_padding);
865    let (final_bytes, final_invalid_byte, final_invalid_padding, final_written) =
866        ct_padded_final_quantum::<A>(final_chunk, padding);
867    invalid_byte |= final_invalid_byte;
868    invalid_padding |= final_invalid_padding;
869    buffer[write..write + final_written].copy_from_slice(&final_bytes[..final_written]);
870    write += final_written;
871
872    if write != required {
873        ct_error_gate_barrier(invalid_byte, invalid_padding);
874        wipe_bytes(buffer);
875        return Err(DecodeError::InvalidInput);
876    }
877    if let Err(err) = report_ct_error(invalid_byte, invalid_padding) {
878        wipe_bytes(buffer);
879        return Err(err);
880    }
881    Ok(write)
882}
883
884fn ct_decode_unpadded<A: Alphabet>(input: &[u8], output: &mut [u8]) -> Result<usize, DecodeError> {
885    if input.len() % 4 == 1 {
886        return Err(DecodeError::InvalidLength);
887    }
888
889    let required = decoded_capacity(input.len());
890    if output.len() < required {
891        return Err(DecodeError::OutputTooSmall {
892            required,
893            available: output.len(),
894        });
895    }
896
897    let mut invalid_byte = 0u8;
898    let mut invalid_padding = 0u8;
899    let mut write = 0;
900    let mut read = 0;
901
902    while read + 4 <= input.len() {
903        let [b0, b1, b2, b3] =
904            read_quad_or_mark_invalid(input, read, &mut invalid_byte, &mut invalid_padding);
905        let (v0, valid0) = ct_decode_alphabet_byte::<A>(b0);
906        let (v1, valid1) = ct_decode_alphabet_byte::<A>(b1);
907        let (v2, valid2) = ct_decode_alphabet_byte::<A>(b2);
908        let (v3, valid3) = ct_decode_alphabet_byte::<A>(b3);
909
910        invalid_byte |= !valid0;
911        invalid_byte |= !valid1;
912        invalid_byte |= !valid2;
913        invalid_byte |= !valid3;
914        invalid_padding |= ct_mask_eq_u8(b0, b'=');
915        invalid_padding |= ct_mask_eq_u8(b1, b'=');
916        invalid_padding |= ct_mask_eq_u8(b2, b'=');
917        invalid_padding |= ct_mask_eq_u8(b3, b'=');
918
919        output[write] = (v0 << 2) | (v1 >> 4);
920        output[write + 1] = (v1 << 4) | (v2 >> 2);
921        output[write + 2] = (v2 << 6) | v3;
922        read += 4;
923        write += 3;
924    }
925
926    match read_tail_or_mark_invalid(input, read, &mut invalid_byte, &mut invalid_padding) {
927        [] => {}
928        [b0, b1] => {
929            let (v0, valid0) = ct_decode_alphabet_byte::<A>(*b0);
930            let (v1, valid1) = ct_decode_alphabet_byte::<A>(*b1);
931            invalid_byte |= !valid0;
932            invalid_byte |= !valid1;
933            invalid_padding |= ct_mask_eq_u8(*b0, b'=');
934            invalid_padding |= ct_mask_eq_u8(*b1, b'=');
935            invalid_padding |= ct_mask_nonzero_u8(v1 & 0b0000_1111);
936            output[write] = (v0 << 2) | (v1 >> 4);
937            write += 1;
938        }
939        [b0, b1, b2] => {
940            let (v0, valid0) = ct_decode_alphabet_byte::<A>(*b0);
941            let (v1, valid1) = ct_decode_alphabet_byte::<A>(*b1);
942            let (v2, valid2) = ct_decode_alphabet_byte::<A>(*b2);
943            invalid_byte |= !valid0;
944            invalid_byte |= !valid1;
945            invalid_byte |= !valid2;
946            invalid_padding |= ct_mask_eq_u8(*b0, b'=');
947            invalid_padding |= ct_mask_eq_u8(*b1, b'=');
948            invalid_padding |= ct_mask_eq_u8(*b2, b'=');
949            invalid_padding |= ct_mask_nonzero_u8(v2 & 0b0000_0011);
950            output[write] = (v0 << 2) | (v1 >> 4);
951            output[write + 1] = (v1 << 4) | (v2 >> 2);
952            write += 2;
953        }
954        _ => {
955            invalid_byte = 0xff;
956            invalid_padding = 0xff;
957        }
958    }
959
960    report_ct_error(invalid_byte, invalid_padding)?;
961    Ok(write)
962}
963
964fn ct_decode_unpadded_in_place<A: Alphabet>(buffer: &mut [u8]) -> Result<usize, DecodeError> {
965    if buffer.len() % 4 == 1 {
966        return Err(DecodeError::InvalidLength);
967    }
968
969    let required = decoded_capacity(buffer.len());
970    if required > buffer.len() {
971        wipe_bytes(buffer);
972        return Err(DecodeError::InvalidInput);
973    }
974
975    let mut invalid_byte = 0u8;
976    let mut invalid_padding = 0u8;
977    let mut write = 0;
978    let mut read = 0;
979
980    while read + 4 <= buffer.len() {
981        let [b0, b1, b2, b3] =
982            read_quad_or_mark_invalid(buffer, read, &mut invalid_byte, &mut invalid_padding);
983        let (v0, valid0) = ct_decode_alphabet_byte::<A>(b0);
984        let (v1, valid1) = ct_decode_alphabet_byte::<A>(b1);
985        let (v2, valid2) = ct_decode_alphabet_byte::<A>(b2);
986        let (v3, valid3) = ct_decode_alphabet_byte::<A>(b3);
987
988        invalid_byte |= !valid0;
989        invalid_byte |= !valid1;
990        invalid_byte |= !valid2;
991        invalid_byte |= !valid3;
992        invalid_padding |= ct_mask_eq_u8(b0, b'=');
993        invalid_padding |= ct_mask_eq_u8(b1, b'=');
994        invalid_padding |= ct_mask_eq_u8(b2, b'=');
995        invalid_padding |= ct_mask_eq_u8(b3, b'=');
996
997        buffer[write] = (v0 << 2) | (v1 >> 4);
998        buffer[write + 1] = (v1 << 4) | (v2 >> 2);
999        buffer[write + 2] = (v2 << 6) | v3;
1000        read += 4;
1001        write += 3;
1002    }
1003
1004    let tail = read_tail_or_mark_invalid(buffer, read, &mut invalid_byte, &mut invalid_padding);
1005    match tail {
1006        [] => {}
1007        [b0, b1] => {
1008            let (v0, valid0) = ct_decode_alphabet_byte::<A>(*b0);
1009            let (v1, valid1) = ct_decode_alphabet_byte::<A>(*b1);
1010            invalid_byte |= !valid0;
1011            invalid_byte |= !valid1;
1012            invalid_padding |= ct_mask_eq_u8(*b0, b'=');
1013            invalid_padding |= ct_mask_eq_u8(*b1, b'=');
1014            invalid_padding |= ct_mask_nonzero_u8(v1 & 0b0000_1111);
1015            buffer[write] = (v0 << 2) | (v1 >> 4);
1016            write += 1;
1017        }
1018        [b0, b1, b2] => {
1019            let (v0, valid0) = ct_decode_alphabet_byte::<A>(*b0);
1020            let (v1, valid1) = ct_decode_alphabet_byte::<A>(*b1);
1021            let (v2, valid2) = ct_decode_alphabet_byte::<A>(*b2);
1022            invalid_byte |= !valid0;
1023            invalid_byte |= !valid1;
1024            invalid_byte |= !valid2;
1025            invalid_padding |= ct_mask_eq_u8(*b0, b'=');
1026            invalid_padding |= ct_mask_eq_u8(*b1, b'=');
1027            invalid_padding |= ct_mask_eq_u8(*b2, b'=');
1028            invalid_padding |= ct_mask_nonzero_u8(v2 & 0b0000_0011);
1029            buffer[write] = (v0 << 2) | (v1 >> 4);
1030            buffer[write + 1] = (v1 << 4) | (v2 >> 2);
1031            write += 2;
1032        }
1033        _ => {
1034            invalid_byte = 0xff;
1035            invalid_padding = 0xff;
1036        }
1037    }
1038
1039    if write != required {
1040        ct_error_gate_barrier(invalid_byte, invalid_padding);
1041        wipe_bytes(buffer);
1042        return Err(DecodeError::InvalidInput);
1043    }
1044    if let Err(err) = report_ct_error(invalid_byte, invalid_padding) {
1045        wipe_bytes(buffer);
1046        return Err(err);
1047    }
1048    Ok(write)
1049}
1050
1051fn read_tail(input: &[u8], offset: usize) -> Result<&[u8], DecodeError> {
1052    input.get(offset..).ok_or(DecodeError::InvalidLength)
1053}
1054
1055fn read_quad_or_mark_invalid(
1056    input: &[u8],
1057    offset: usize,
1058    invalid_byte: &mut u8,
1059    invalid_padding: &mut u8,
1060) -> [u8; 4] {
1061    if let Ok(quad) = read_quad(input, offset) {
1062        quad
1063    } else {
1064        debug_assert!(
1065            false,
1066            "read_quad failed inside length-validated constant-time decode loop"
1067        );
1068        *invalid_byte = 0xff;
1069        *invalid_padding = 0xff;
1070        [0; 4]
1071    }
1072}
1073
1074fn read_tail_or_mark_invalid<'a>(
1075    input: &'a [u8],
1076    offset: usize,
1077    invalid_byte: &mut u8,
1078    invalid_padding: &mut u8,
1079) -> &'a [u8] {
1080    if let Ok(tail) = read_tail(input, offset) {
1081        tail
1082    } else {
1083        debug_assert!(
1084            false,
1085            "read_tail failed inside length-validated constant-time decode loop"
1086        );
1087        *invalid_byte = 0xff;
1088        *invalid_padding = 0xff;
1089        &[]
1090    }
1091}
1092
1093#[inline(never)]
1094#[allow(unsafe_code)]
1095fn ct_decode_alphabet_byte<A: Alphabet>(byte: u8) -> (u8, u8) {
1096    let mut decoded = 0u8;
1097    let mut valid = 0u8;
1098    let mut candidate = 0u8;
1099
1100    while candidate < 64 {
1101        let matches = core::hint::black_box(ct_mask_eq_u8(
1102            core::hint::black_box(byte),
1103            core::hint::black_box(A::ENCODE[candidate as usize]),
1104        ));
1105        decoded = ct_accumulate_u8(decoded, candidate & matches);
1106        valid = ct_accumulate_u8(valid, matches);
1107        candidate += 1;
1108    }
1109
1110    (decoded, valid)
1111}
1112
1113fn ct_padding_len(input: &[u8]) -> usize {
1114    let Some((&last, before_last_prefix)) = input.split_last() else {
1115        return 0;
1116    };
1117    let Some(&before_last) = before_last_prefix.last() else {
1118        return 0;
1119    };
1120    usize::from(ct_mask_eq_u8(last, b'=') & 1) + usize::from(ct_mask_eq_u8(before_last, b'=') & 1)
1121}
1122
1123pub(crate) fn report_ct_error(invalid_byte: u8, invalid_padding: u8) -> Result<(), DecodeError> {
1124    ct_error_gate_barrier(invalid_byte, invalid_padding);
1125
1126    if (invalid_byte | invalid_padding) != 0 {
1127        Err(DecodeError::InvalidInput)
1128    } else {
1129        Ok(())
1130    }
1131}