Skip to main content

base64_ng/
ct.rs

1//! Constant-time-oriented scalar decoding APIs.
2//!
3//! This module is separate from the default decoder so callers can opt into a
4//! slower path with a narrower timing target. It avoids lookup tables indexed
5//! by secret input bytes while mapping Base64 symbols and reports malformed
6//! content through one opaque error. It is not documented as a formally
7//! verified cryptographic constant-time API.
8//!
9//! # Security
10//!
11//! Input length, decoded length, selected alphabet, and final success or
12//! failure remain public. The clear-tail methods wipe caller-owned output on
13//! error, but decoded bytes are written during the fixed-shape decode loop
14//! before final validation is reported. In shared-memory, enclave, or HSM-style
15//! threat models where another component can observe the output buffer during
16//! the call, prefer [`crate::ct::CtEngine::decode_slice_staged_clear_tail`]
17//! with a private staging buffer. In those deployments,
18//! [`crate::ct::CtEngine::decode_slice_clear_tail`] is not sufficient by
19//! itself because it wipes caller-owned output only after the internal decode
20//! loop reaches the final error gate. Treat
21//! [`crate::ct::CtEngine::decode_slice_staged_clear_tail`] as the default for
22//! shared-memory, enclave, HSM-adjacent, or multi-principal deployments;
23//! [`crate::ct::CtEngine::decode_slice_clear_tail`] is appropriate only when
24//! the output buffer is not observable during the call.
25//!
26//! # Platform Posture
27//!
28//! The CT result gate uses architecture-specific best-effort barriers where
29//! stable Rust exposes them. On `AArch64`, the emitted CSDB hint is reported as
30//! `hardware-speculation-barrier-unattested` because older cores may treat it
31//! as a no-op; deployments must attest the exact core behavior before relying
32//! on it for high assurance. On RISC-V, `fence rw, rw` is an ordering fence,
33//! not a Spectre-v1 speculation barrier, and the built-in high-assurance
34//! runtime policy intentionally rejects that posture. RISC-V deployments on
35//! speculative cores need platform-level mitigations and startup policy checks
36//! that make the gap explicit.
37//!
38//! The dependency-free comparison helpers on redacted buffers are
39//! constant-time-oriented best effort, not formally audited MAC or token
40//! comparison primitives. Applications that can admit dependencies and need a
41//! reviewed comparison primitive should use one at the protocol boundary.
42//!
43//! The CT decoder exposes only clear-tail and stack-backed decode APIs. The
44//! former non-clear-tail methods were removed before the `1.0` stable boundary
45//! because they could leave decoded plaintext in caller-owned buffers after
46//! malformed input errors.
47//!
48//! ```compile_fail
49//! use base64_ng::ct;
50//!
51//! let mut output = [0u8; 8];
52//! let _ = ct::STANDARD.decode_slice(b"aGk=", &mut output);
53//! ```
54//!
55//! ```compile_fail
56//! use base64_ng::ct;
57//!
58//! let mut buffer = *b"aGk=";
59//! let _ = ct::STANDARD.decode_in_place(&mut buffer);
60//! ```
61#[cfg(feature = "alloc")]
62use crate::SecretBuffer;
63use crate::{
64    Alphabet, DecodeError, DecodedBuffer, Standard, UrlSafe, decoded_capacity, read_quad,
65    wipe_bytes, wipe_tail,
66};
67use core::marker::PhantomData;
68
69/// Standard Base64 constant-time-oriented decoder with padding.
70pub const STANDARD: CtEngine<Standard, true> = CtEngine::new();
71
72/// Standard Base64 constant-time-oriented decoder without padding.
73pub const STANDARD_NO_PAD: CtEngine<Standard, false> = CtEngine::new();
74
75/// URL-safe Base64 constant-time-oriented decoder with padding.
76pub const URL_SAFE: CtEngine<UrlSafe, true> = CtEngine::new();
77
78/// URL-safe Base64 constant-time-oriented decoder without padding.
79pub const URL_SAFE_NO_PAD: CtEngine<UrlSafe, false> = CtEngine::new();
80
81/// A zero-sized constant-time-oriented Base64 decoder.
82///
83/// # Security
84///
85/// For ordinary secret-bearing inputs, prefer
86/// [`Self::decode_slice_clear_tail`], [`Self::decode_buffer`], or
87/// [`Self::decode_in_place_clear_tail`]. For shared-memory,
88/// enclave-adjacent, HSM-style, or multi-principal deployments where
89/// another component can observe caller-owned output during the call, use
90/// [`Self::decode_slice_staged_clear_tail`] with a private staging buffer
91/// so malformed input cannot transiently write decoded bytes into the
92/// public output buffer before the final error gate.
93#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
94pub struct CtEngine<A, const PAD: bool> {
95    alphabet: PhantomData<A>,
96}
97
98impl<A, const PAD: bool> CtEngine<A, PAD>
99where
100    A: Alphabet,
101{
102    /// Creates a new constant-time-oriented decoder engine.
103    #[must_use]
104    pub const fn new() -> Self {
105        Self {
106            alphabet: PhantomData,
107        }
108    }
109
110    /// Returns whether this constant-time-oriented decoder expects padded
111    /// input.
112    #[must_use]
113    pub const fn is_padded(&self) -> bool {
114        PAD
115    }
116
117    /// Validates `input` without writing decoded bytes.
118    ///
119    /// This uses the same constant-time-oriented symbol mapping and opaque
120    /// malformed-input error behavior as
121    /// [`Self::decode_slice_clear_tail`]. Input length, padding length, and
122    /// final success or failure remain public.
123    ///
124    /// # Examples
125    ///
126    /// ```
127    /// use base64_ng::ct;
128    ///
129    /// ct::STANDARD.validate_result(b"aGVsbG8=").unwrap();
130    /// assert!(ct::STANDARD.validate_result(b"aGVsbG8").is_err());
131    /// ```
132    pub fn validate_result(&self, input: &[u8]) -> Result<(), DecodeError> {
133        ct_validate_decode::<A, PAD>(input)
134    }
135
136    /// Returns whether `input` is valid for this constant-time-oriented
137    /// decoder.
138    ///
139    /// This is a convenience wrapper around [`Self::validate_result`].
140    ///
141    /// # Examples
142    ///
143    /// ```
144    /// use base64_ng::ct;
145    ///
146    /// assert!(ct::URL_SAFE_NO_PAD.validate(b"-_8"));
147    /// assert!(!ct::URL_SAFE_NO_PAD.validate(b"+/8"));
148    /// ```
149    #[must_use]
150    pub fn validate(&self, input: &[u8]) -> bool {
151        self.validate_result(input).is_ok()
152    }
153
154    /// Returns the exact decoded length for valid input.
155    ///
156    /// This uses the same constant-time-oriented validation policy as
157    /// [`Self::validate_result`] before returning a length. Input length,
158    /// padding length, and final success or failure remain public.
159    pub fn decoded_len(&self, input: &[u8]) -> Result<usize, DecodeError> {
160        ct_decoded_len::<A, PAD>(input)
161    }
162
163    /// Decodes `input` into `output` and clears all bytes after the
164    /// decoded prefix.
165    ///
166    /// If decoding fails, the entire output buffer is cleared before the
167    /// error is returned. Use this variant for sensitive payloads where
168    /// partially decoded bytes from rejected input should not remain in the
169    /// caller-owned output buffer.
170    ///
171    /// # Security: Transient Plaintext Window
172    ///
173    /// Decoded bytes are written to `output` progressively during the
174    /// fixed-shape decode loop before malformed-input detection is
175    /// complete. On error, the entire `output` is wiped before returning,
176    /// but a concurrent same-process observer with access to `output`
177    /// during the call may observe transient partial plaintext from valid
178    /// leading quanta. For shared-memory, enclave-adjacent, HSM-style, or
179    /// multi-principal deployments where even transient writes are
180    /// unacceptable, use [`Self::decode_slice_staged_clear_tail`] with a
181    /// private staging buffer.
182    ///
183    /// # Examples
184    ///
185    /// ```
186    /// use base64_ng::ct;
187    ///
188    /// let mut output = [0xff; 8];
189    /// let written = ct::STANDARD
190    ///     .decode_slice_clear_tail(b"aGk=", &mut output)
191    ///     .unwrap();
192    ///
193    /// assert_eq!(&output[..written], b"hi");
194    /// assert!(output[written..].iter().all(|byte| *byte == 0));
195    /// ```
196    #[must_use = "handle decode errors; use decode_slice_staged_clear_tail for shared-memory or HSM-style threat models"]
197    pub fn decode_slice_clear_tail(
198        &self,
199        input: &[u8],
200        output: &mut [u8],
201    ) -> Result<usize, DecodeError> {
202        let written = match ct_decode_slice::<A, PAD>(input, output) {
203            Ok(written) => written,
204            Err(err) => {
205                crate::wipe_bytes(output);
206                return Err(err);
207            }
208        };
209        crate::wipe_tail(output, written);
210        Ok(written)
211    }
212
213    /// Decodes through caller-provided private staging before copying into
214    /// `output`.
215    ///
216    /// This variant is for shared-memory or sandboxed deployments where
217    /// the caller-owned `output` buffer must not contain transient decoded
218    /// bytes from malformed input. The `staging` buffer must be at least
219    /// the decoded length of `input` and must not be shared with
220    /// untrusted concurrent observers. On success, decoded bytes are
221    /// copied from `staging` into `output`; on error, both buffers are
222    /// cleared before returning.
223    ///
224    /// Input length, final success or failure, and decoded length remain
225    /// public.
226    #[must_use = "handle decode errors; staged decode is for shared-memory or HSM-style threat models"]
227    pub fn decode_slice_staged_clear_tail(
228        &self,
229        input: &[u8],
230        output: &mut [u8],
231        staging: &mut [u8],
232    ) -> Result<usize, DecodeError> {
233        ct_decode_slice_staged_clear_tail::<A, PAD>(input, output, staging)
234    }
235
236    /// Decodes `input` into a stack-backed buffer.
237    ///
238    /// This uses the same constant-time-oriented scalar decoder as
239    /// [`Self::decode_slice_clear_tail`] and clears the internal backing
240    /// array before returning an error.
241    ///
242    /// # Examples
243    ///
244    /// ```
245    /// use base64_ng::ct;
246    ///
247    /// let decoded = ct::STANDARD.decode_buffer::<5>(b"aGVsbG8=").unwrap();
248    ///
249    /// assert_eq!(decoded.as_bytes(), b"hello");
250    /// ```
251    pub fn decode_buffer<const CAP: usize>(
252        &self,
253        input: &[u8],
254    ) -> Result<DecodedBuffer<CAP>, DecodeError> {
255        let mut output = DecodedBuffer::new();
256        let written = match self.decode_slice_clear_tail(input, output.as_mut_capacity()) {
257            Ok(written) => written,
258            Err(err) => {
259                output.clear();
260                return Err(err);
261            }
262        };
263        output.set_filled(written)?;
264        Ok(output)
265    }
266
267    /// Decodes `input` into an owned byte vector.
268    ///
269    /// This uses the same constant-time-oriented scalar decoder as
270    /// [`Self::decode_slice_clear_tail`]. If decoding fails, the allocated
271    /// output buffer is cleared before the error is returned.
272    ///
273    /// Use [`Self::decode_secret`] for secret-bearing payloads that should stay
274    /// on the crate's redacted, drop-wiping buffer path. Use
275    /// [`Self::decode_secret_staged`] for shared-memory, enclave-adjacent,
276    /// HSM-style, or multi-principal deployments where even transient writes
277    /// into the final heap allocation are unacceptable.
278    #[cfg(feature = "alloc")]
279    #[must_use = "for secret-bearing payloads use decode_secret, which returns a redacted buffer with drop-time cleanup"]
280    pub fn decode_vec(&self, input: &[u8]) -> Result<alloc::vec::Vec<u8>, DecodeError> {
281        let required = self.decoded_len(input)?;
282        let mut output = alloc::vec![0; required];
283        // decode_slice_clear_tail wipes output on error.
284        let written = self.decode_slice_clear_tail(input, &mut output)?;
285        output.truncate(written);
286        Ok(output)
287    }
288
289    /// Decodes `input` into a redacted owned secret buffer.
290    ///
291    /// This is the recommended heap-owning CT decode path for secret-bearing
292    /// payloads. It decodes with [`Self::decode_vec`] and then wraps the result
293    /// in [`SecretBuffer`], which redacts formatting and clears initialized
294    /// bytes plus spare vector capacity on drop.
295    ///
296    /// # Security: Transient Plaintext Window
297    ///
298    /// This function uses the non-staged CT decode path. Decoded bytes are
299    /// written transiently into the heap allocation before the final error
300    /// gate. On error, the allocation is wiped before returning, but a
301    /// concurrent same-process observer with access to that allocation during
302    /// the call may observe transient partial plaintext. For shared-memory,
303    /// enclave-adjacent, HSM-style, or multi-principal deployments where even
304    /// transient writes into the final heap allocation are unacceptable, use
305    /// [`Self::decode_secret_staged`] with a stack-backed private staging
306    /// capacity large enough for the decoded value.
307    ///
308    /// # Examples
309    ///
310    /// ```
311    /// use base64_ng::ct;
312    ///
313    /// let decoded = ct::STANDARD.decode_secret(b"aGVsbG8=").unwrap();
314    /// assert!(decoded.constant_time_eq_public_len(b"hello"));
315    /// ```
316    #[cfg(feature = "alloc")]
317    pub fn decode_secret(&self, input: &[u8]) -> Result<SecretBuffer, DecodeError> {
318        self.decode_vec(input).map(SecretBuffer::from_vec)
319    }
320
321    /// Decodes `input` into a redacted owned secret buffer through private
322    /// stack staging.
323    ///
324    /// `STAGE` must be at least the decoded length of `input`. Decoded bytes
325    /// are written to a stack-backed staging buffer first and copied into the
326    /// returned heap buffer only after the full constant-time-oriented decode
327    /// succeeds. On error, both staging and heap output buffers are wiped before
328    /// returning.
329    ///
330    /// This is the preferred owned decode API for shared-memory,
331    /// enclave-adjacent, HSM-style, or multi-principal deployments where the
332    /// final heap allocation must not contain transient partial plaintext from
333    /// rejected input.
334    ///
335    /// # Examples
336    ///
337    /// ```
338    /// use base64_ng::ct;
339    ///
340    /// let decoded = ct::STANDARD
341    ///     .decode_secret_staged::<5>(b"aGVsbG8=")
342    ///     .unwrap();
343    /// assert!(decoded.constant_time_eq_public_len(b"hello"));
344    /// ```
345    #[cfg(feature = "alloc")]
346    pub fn decode_secret_staged<const STAGE: usize>(
347        &self,
348        input: &[u8],
349    ) -> Result<SecretBuffer, DecodeError> {
350        let required = self.decoded_len(input)?;
351        let mut staging = DecodedBuffer::<STAGE>::new();
352        let mut output = alloc::vec![0; required];
353        let written =
354            self.decode_slice_staged_clear_tail(input, &mut output, staging.as_mut_capacity())?;
355        output.truncate(written);
356        Ok(SecretBuffer::from_vec(output))
357    }
358
359    /// Decodes `buffer` in place and clears all bytes after the decoded
360    /// prefix.
361    ///
362    /// If decoding fails, the entire buffer is cleared before the error is
363    /// returned.
364    ///
365    /// # Security: Transient Plaintext Window
366    ///
367    /// This in-place API writes decoded bytes into `buffer` during the
368    /// fixed-shape decode loop before malformed-input detection is
369    /// complete. On error, the entire buffer is wiped before returning,
370    /// but concurrent same-process observers with access to the same memory
371    /// can observe transient partial plaintext. Use
372    /// [`Self::decode_slice_staged_clear_tail`] with a private staging
373    /// buffer when shared-memory or enclave-adjacent deployments cannot
374    /// tolerate that window.
375    ///
376    /// # Examples
377    ///
378    /// ```
379    /// use base64_ng::ct;
380    ///
381    /// let mut buffer = *b"aGk=";
382    /// let decoded = ct::STANDARD.decode_in_place_clear_tail(&mut buffer).unwrap();
383    ///
384    /// assert_eq!(decoded, b"hi");
385    /// ```
386    pub fn decode_in_place_clear_tail<'a>(
387        &self,
388        buffer: &'a mut [u8],
389    ) -> Result<&'a mut [u8], DecodeError> {
390        let len = match ct_decode_in_place::<A, PAD>(buffer) {
391            Ok(len) => len,
392            Err(err) => {
393                crate::wipe_bytes(buffer);
394                return Err(err);
395            }
396        };
397        crate::wipe_tail(buffer, len);
398        Ok(&mut buffer[..len])
399    }
400}
401
402impl<A, const PAD: bool> core::fmt::Display for CtEngine<A, PAD> {
403    fn fmt(&self, formatter: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
404        write!(formatter, "ct padded={PAD}")
405    }
406}
407
408#[inline]
409pub(crate) const fn ct_mask_bit(bit: u8) -> u8 {
410    0u8.wrapping_sub(bit & 1)
411}
412
413#[inline]
414pub(crate) const fn ct_mask_nonzero_u8(value: u8) -> u8 {
415    let wide = value as u16;
416    let negative = 0u16.wrapping_sub(wide);
417    let nonzero = ((wide | negative) >> 8) as u8;
418    ct_mask_bit(nonzero)
419}
420
421#[inline]
422pub(crate) const fn ct_mask_eq_u8(left: u8, right: u8) -> u8 {
423    !ct_mask_nonzero_u8(left ^ right)
424}
425
426#[inline]
427pub(crate) const fn ct_mask_lt_u8(left: u8, right: u8) -> u8 {
428    let diff = (left as u16).wrapping_sub(right as u16);
429    ct_mask_bit((diff >> 8) as u8)
430}
431
432#[inline(never)]
433pub(crate) fn constant_time_eq_public_len(left: &[u8], right: &[u8]) -> bool {
434    if left.len() != right.len() {
435        return false;
436    }
437
438    constant_time_eq_same_len(left, right)
439}
440
441#[inline(never)]
442pub(crate) fn constant_time_eq_fixed_width_array<const N: usize>(
443    left: &[u8; N],
444    right: &[u8; N],
445) -> bool {
446    constant_time_eq_same_len(left, right)
447}
448
449#[inline(never)]
450#[allow(unsafe_code)]
451fn constant_time_eq_same_len(left: &[u8], right: &[u8]) -> bool {
452    let mut diff = 0u8;
453    for (left, right) in left.iter().zip(right) {
454        diff = ct_accumulate_u8(diff, *left ^ *right);
455    }
456    ct_error_gate_barrier(diff, 0);
457    // SAFETY: `diff` is an initialized local `u8`; this final volatile read
458    // keeps the public equality comparison dependent on a post-barrier load of
459    // the accumulated value.
460    let result = unsafe { core::ptr::read_volatile(&raw const diff) };
461    result == 0
462}
463
464#[inline(never)]
465#[allow(unsafe_code)]
466fn ct_accumulate_u8(accumulator: u8, value: u8) -> u8 {
467    let result = core::hint::black_box(accumulator) | core::hint::black_box(value);
468    // SAFETY: `result` is an initialized local `u8`; the volatile read is a
469    // dependency-free optimizer barrier for the accumulation value and does not
470    // access caller memory.
471    unsafe { core::ptr::read_volatile(&raw const result) }
472}
473
474fn ct_decode_slice<A: Alphabet, const PAD: bool>(
475    input: &[u8],
476    output: &mut [u8],
477) -> Result<usize, DecodeError> {
478    if input.is_empty() {
479        return Ok(0);
480    }
481
482    if PAD {
483        ct_decode_padded::<A>(input, output)
484    } else {
485        ct_decode_unpadded::<A>(input, output)
486    }
487}
488
489fn ct_decode_slice_staged_clear_tail<A: Alphabet, const PAD: bool>(
490    input: &[u8],
491    output: &mut [u8],
492    staging: &mut [u8],
493) -> Result<usize, DecodeError> {
494    let required = match ct_decoded_len::<A, PAD>(input) {
495        Ok(required) => required,
496        Err(err) => {
497            wipe_bytes(output);
498            wipe_bytes(staging);
499            return Err(err);
500        }
501    };
502
503    if output.len() < required {
504        wipe_bytes(output);
505        wipe_bytes(staging);
506        return Err(DecodeError::OutputTooSmall {
507            required,
508            available: output.len(),
509        });
510    }
511
512    if staging.len() < required {
513        wipe_bytes(output);
514        wipe_bytes(staging);
515        return Err(DecodeError::StagingTooSmall {
516            required,
517            available: staging.len(),
518        });
519    }
520
521    let written = match ct_decode_slice::<A, PAD>(input, &mut staging[..required]) {
522        Ok(written) => written,
523        Err(err) => {
524            wipe_bytes(output);
525            wipe_bytes(staging);
526            return Err(err);
527        }
528    };
529
530    output[..written].copy_from_slice(&staging[..written]);
531    wipe_bytes(staging);
532    wipe_tail(output, written);
533    Ok(written)
534}
535
536fn ct_decode_in_place<A: Alphabet, const PAD: bool>(
537    buffer: &mut [u8],
538) -> Result<usize, DecodeError> {
539    if buffer.is_empty() {
540        return Ok(0);
541    }
542
543    if PAD {
544        ct_decode_padded_in_place::<A>(buffer)
545    } else {
546        ct_decode_unpadded_in_place::<A>(buffer)
547    }
548}
549
550#[inline(never)]
551#[allow(unsafe_code)]
552fn ct_error_gate_barrier(invalid_byte: u8, invalid_padding: u8) {
553    core::hint::black_box(invalid_byte | invalid_padding);
554    core::sync::atomic::compiler_fence(core::sync::atomic::Ordering::SeqCst);
555
556    #[cfg(all(not(miri), not(kani), any(target_arch = "x86", target_arch = "x86_64")))]
557    {
558        // SAFETY: `lfence` does not access memory and is used as a speculation
559        // barrier before the public success/failure branch is observed.
560        unsafe {
561            core::arch::asm!("lfence", options(nostack, preserves_flags, nomem));
562        }
563    }
564
565    #[cfg(all(not(miri), not(kani), target_arch = "aarch64"))]
566    {
567        // Older cores may treat CSDB as a no-op; runtime reporting marks this
568        // as unattested until the deployment provides platform evidence.
569        // SAFETY: these barriers do not access memory.
570        unsafe {
571            core::arch::asm!("isb sy", "hint #20", options(nostack, preserves_flags));
572        }
573    }
574
575    #[cfg(all(not(miri), not(kani), target_arch = "arm"))]
576    {
577        // SAFETY: `isb sy` does not access memory and is used as the best
578        // available stable ARM speculation boundary for this crate.
579        unsafe {
580            core::arch::asm!("isb sy", options(nostack, preserves_flags));
581        }
582    }
583
584    #[cfg(all(
585        not(miri),
586        not(kani),
587        any(target_arch = "riscv32", target_arch = "riscv64")
588    ))]
589    {
590        // RISC-V base ISA does not provide a canonical speculation barrier.
591        // `fence rw, rw` is the available ordering primitive for the CT public
592        // result gate and is reported separately as `ordering-fence`; callers
593        // on speculative RISC-V cores must use platform mitigations because
594        // this does not satisfy `BackendPolicy::HighAssuranceScalarOnly`.
595        // SAFETY: the assembly block does not access memory.
596        unsafe {
597            core::arch::asm!("fence rw, rw", options(nostack, preserves_flags));
598        }
599    }
600}
601
602fn ct_validate_decode<A: Alphabet, const PAD: bool>(input: &[u8]) -> Result<(), DecodeError> {
603    if input.is_empty() {
604        return Ok(());
605    }
606
607    if PAD {
608        ct_validate_padded::<A>(input)
609    } else {
610        ct_validate_unpadded::<A>(input)
611    }
612}
613
614fn ct_decoded_len<A: Alphabet, const PAD: bool>(input: &[u8]) -> Result<usize, DecodeError> {
615    ct_validate_decode::<A, PAD>(input)?;
616    if input.is_empty() {
617        return Ok(0);
618    }
619
620    if PAD {
621        Ok(input.len() / 4 * 3 - ct_padding_len(input))
622    } else {
623        let full_quads = input.len() / 4 * 3;
624        match input.len() % 4 {
625            0 => Ok(full_quads),
626            2 => Ok(full_quads + 1),
627            3 => Ok(full_quads + 2),
628            _ => Err(DecodeError::InvalidLength),
629        }
630    }
631}
632
633fn ct_validate_padded<A: Alphabet>(input: &[u8]) -> Result<(), DecodeError> {
634    if !input.len().is_multiple_of(4) {
635        return Err(DecodeError::InvalidLength);
636    }
637
638    let padding = ct_padding_len(input);
639    let mut invalid_byte = 0u8;
640    let mut invalid_padding = 0u8;
641    let mut read = 0;
642
643    while read + 4 < input.len() {
644        let [b0, b1, b2, b3] =
645            read_quad_or_mark_invalid(input, read, &mut invalid_byte, &mut invalid_padding);
646        let (_, valid0) = ct_decode_alphabet_byte::<A>(b0);
647        let (_, valid1) = ct_decode_alphabet_byte::<A>(b1);
648        let (_, valid2) = ct_decode_alphabet_byte::<A>(b2);
649        let (_, valid3) = ct_decode_alphabet_byte::<A>(b3);
650
651        invalid_byte |= !valid0;
652        invalid_byte |= !valid1;
653        invalid_byte |= !valid2;
654        invalid_byte |= !valid3;
655        invalid_padding |= ct_mask_eq_u8(b2, b'=');
656        invalid_padding |= ct_mask_eq_u8(b3, b'=');
657        read += 4;
658    }
659
660    let final_chunk =
661        read_quad_or_mark_invalid(input, read, &mut invalid_byte, &mut invalid_padding);
662    let (_, final_invalid_byte, final_invalid_padding, _) =
663        ct_padded_final_quantum::<A>(final_chunk, padding);
664    invalid_byte |= final_invalid_byte;
665    invalid_padding |= final_invalid_padding;
666
667    report_ct_error(invalid_byte, invalid_padding)
668}
669
670fn ct_validate_unpadded<A: Alphabet>(input: &[u8]) -> Result<(), DecodeError> {
671    if input.len() % 4 == 1 {
672        return Err(DecodeError::InvalidLength);
673    }
674
675    let mut invalid_byte = 0u8;
676    let mut invalid_padding = 0u8;
677    let mut read = 0;
678
679    while read + 4 <= input.len() {
680        let [b0, b1, b2, b3] =
681            read_quad_or_mark_invalid(input, read, &mut invalid_byte, &mut invalid_padding);
682        let (_, valid0) = ct_decode_alphabet_byte::<A>(b0);
683        let (_, valid1) = ct_decode_alphabet_byte::<A>(b1);
684        let (_, valid2) = ct_decode_alphabet_byte::<A>(b2);
685        let (_, valid3) = ct_decode_alphabet_byte::<A>(b3);
686
687        invalid_byte |= !valid0;
688        invalid_byte |= !valid1;
689        invalid_byte |= !valid2;
690        invalid_byte |= !valid3;
691        invalid_padding |= ct_mask_eq_u8(b0, b'=');
692        invalid_padding |= ct_mask_eq_u8(b1, b'=');
693        invalid_padding |= ct_mask_eq_u8(b2, b'=');
694        invalid_padding |= ct_mask_eq_u8(b3, b'=');
695
696        read += 4;
697    }
698
699    match read_tail_or_mark_invalid(input, read, &mut invalid_byte, &mut invalid_padding) {
700        [] => {}
701        [b0, b1] => {
702            let (_, valid0) = ct_decode_alphabet_byte::<A>(*b0);
703            let (v1, valid1) = ct_decode_alphabet_byte::<A>(*b1);
704            invalid_byte |= !valid0;
705            invalid_byte |= !valid1;
706            invalid_padding |= ct_mask_eq_u8(*b0, b'=');
707            invalid_padding |= ct_mask_eq_u8(*b1, b'=');
708            invalid_padding |= ct_mask_nonzero_u8(v1 & 0b0000_1111);
709        }
710        [b0, b1, b2] => {
711            let (_, valid0) = ct_decode_alphabet_byte::<A>(*b0);
712            let (_, valid1) = ct_decode_alphabet_byte::<A>(*b1);
713            let (v2, valid2) = ct_decode_alphabet_byte::<A>(*b2);
714            invalid_byte |= !valid0;
715            invalid_byte |= !valid1;
716            invalid_byte |= !valid2;
717            invalid_padding |= ct_mask_eq_u8(*b0, b'=');
718            invalid_padding |= ct_mask_eq_u8(*b1, b'=');
719            invalid_padding |= ct_mask_eq_u8(*b2, b'=');
720            invalid_padding |= ct_mask_nonzero_u8(v2 & 0b0000_0011);
721        }
722        _ => {
723            invalid_byte = 0xff;
724            invalid_padding = 0xff;
725        }
726    }
727
728    report_ct_error(invalid_byte, invalid_padding)
729}
730
731pub(crate) fn ct_padded_final_quantum<A: Alphabet>(
732    input: [u8; 4],
733    padding: usize,
734) -> ([u8; 3], u8, u8, usize) {
735    let [b0, b1, b2, b3] = input;
736    let (v0, valid0) = ct_decode_alphabet_byte::<A>(b0);
737    let (v1, valid1) = ct_decode_alphabet_byte::<A>(b1);
738    let (v2, valid2) = ct_decode_alphabet_byte::<A>(b2);
739    let (v3, valid3) = ct_decode_alphabet_byte::<A>(b3);
740
741    let padding_byte = match padding {
742        0 => 0,
743        1 => 1,
744        2 => 2,
745        _ => return ([0; 3], 0xff, 0xff, 0),
746    };
747    let no_padding = ct_mask_eq_u8(padding_byte, 0);
748    let one_padding = ct_mask_eq_u8(padding_byte, 1);
749    let two_padding = ct_mask_eq_u8(padding_byte, 2);
750    let require_v2 = no_padding | one_padding;
751    let require_v3 = no_padding;
752
753    let invalid_byte = !valid0 | !valid1 | (!valid2 & require_v2) | (!valid3 & require_v3);
754    let invalid_padding = (ct_mask_nonzero_u8(v1 & 0b0000_1111) & two_padding)
755        | ((ct_mask_eq_u8(b2, b'=') | ct_mask_nonzero_u8(v2 & 0b0000_0011)) & one_padding)
756        | ((ct_mask_eq_u8(b2, b'=') | ct_mask_eq_u8(b3, b'=')) & no_padding);
757
758    (
759        [(v0 << 2) | (v1 >> 4), (v1 << 4) | (v2 >> 2), (v2 << 6) | v3],
760        invalid_byte,
761        invalid_padding,
762        3 - padding,
763    )
764}
765
766fn ct_decode_padded<A: Alphabet>(input: &[u8], output: &mut [u8]) -> Result<usize, DecodeError> {
767    if !input.len().is_multiple_of(4) {
768        return Err(DecodeError::InvalidLength);
769    }
770
771    let padding = ct_padding_len(input);
772    let required = input.len() / 4 * 3 - padding;
773    if output.len() < required {
774        return Err(DecodeError::OutputTooSmall {
775            required,
776            available: output.len(),
777        });
778    }
779
780    let mut invalid_byte = 0u8;
781    let mut invalid_padding = 0u8;
782    let mut write = 0;
783    let mut read = 0;
784
785    while read + 4 < input.len() {
786        let [b0, b1, b2, b3] =
787            read_quad_or_mark_invalid(input, read, &mut invalid_byte, &mut invalid_padding);
788        let (v0, valid0) = ct_decode_alphabet_byte::<A>(b0);
789        let (v1, valid1) = ct_decode_alphabet_byte::<A>(b1);
790        let (v2, valid2) = ct_decode_alphabet_byte::<A>(b2);
791        let (v3, valid3) = ct_decode_alphabet_byte::<A>(b3);
792
793        invalid_byte |= !valid0;
794        invalid_byte |= !valid1;
795        invalid_byte |= !valid2;
796        invalid_byte |= !valid3;
797        invalid_padding |= ct_mask_eq_u8(b2, b'=');
798        invalid_padding |= ct_mask_eq_u8(b3, b'=');
799        output[write] = (v0 << 2) | (v1 >> 4);
800        output[write + 1] = (v1 << 4) | (v2 >> 2);
801        output[write + 2] = (v2 << 6) | v3;
802        write += 3;
803        read += 4;
804    }
805
806    let final_chunk =
807        read_quad_or_mark_invalid(input, read, &mut invalid_byte, &mut invalid_padding);
808    let (final_bytes, final_invalid_byte, final_invalid_padding, final_written) =
809        ct_padded_final_quantum::<A>(final_chunk, padding);
810    invalid_byte |= final_invalid_byte;
811    invalid_padding |= final_invalid_padding;
812    output[write..write + final_written].copy_from_slice(&final_bytes[..final_written]);
813    write += final_written;
814
815    report_ct_error(invalid_byte, invalid_padding)?;
816    Ok(write)
817}
818
819fn ct_decode_padded_in_place<A: Alphabet>(buffer: &mut [u8]) -> Result<usize, DecodeError> {
820    if !buffer.len().is_multiple_of(4) {
821        return Err(DecodeError::InvalidLength);
822    }
823
824    let padding = ct_padding_len(buffer);
825    let required = buffer.len() / 4 * 3 - padding;
826    if required > buffer.len() {
827        wipe_bytes(buffer);
828        return Err(DecodeError::InvalidInput);
829    }
830
831    let mut invalid_byte = 0u8;
832    let mut invalid_padding = 0u8;
833    let mut write = 0;
834    let mut read = 0;
835
836    while read + 4 < buffer.len() {
837        let [b0, b1, b2, b3] =
838            read_quad_or_mark_invalid(buffer, read, &mut invalid_byte, &mut invalid_padding);
839        let (v0, valid0) = ct_decode_alphabet_byte::<A>(b0);
840        let (v1, valid1) = ct_decode_alphabet_byte::<A>(b1);
841        let (v2, valid2) = ct_decode_alphabet_byte::<A>(b2);
842        let (v3, valid3) = ct_decode_alphabet_byte::<A>(b3);
843
844        invalid_byte |= !valid0;
845        invalid_byte |= !valid1;
846        invalid_byte |= !valid2;
847        invalid_byte |= !valid3;
848        invalid_padding |= ct_mask_eq_u8(b2, b'=');
849        invalid_padding |= ct_mask_eq_u8(b3, b'=');
850        buffer[write] = (v0 << 2) | (v1 >> 4);
851        buffer[write + 1] = (v1 << 4) | (v2 >> 2);
852        buffer[write + 2] = (v2 << 6) | v3;
853        write += 3;
854        read += 4;
855    }
856
857    let final_chunk =
858        read_quad_or_mark_invalid(buffer, read, &mut invalid_byte, &mut invalid_padding);
859    let (final_bytes, final_invalid_byte, final_invalid_padding, final_written) =
860        ct_padded_final_quantum::<A>(final_chunk, padding);
861    invalid_byte |= final_invalid_byte;
862    invalid_padding |= final_invalid_padding;
863    buffer[write..write + final_written].copy_from_slice(&final_bytes[..final_written]);
864    write += final_written;
865
866    if write != required {
867        ct_error_gate_barrier(invalid_byte, invalid_padding);
868        wipe_bytes(buffer);
869        return Err(DecodeError::InvalidInput);
870    }
871    if let Err(err) = report_ct_error(invalid_byte, invalid_padding) {
872        wipe_bytes(buffer);
873        return Err(err);
874    }
875    Ok(write)
876}
877
878fn ct_decode_unpadded<A: Alphabet>(input: &[u8], output: &mut [u8]) -> Result<usize, DecodeError> {
879    if input.len() % 4 == 1 {
880        return Err(DecodeError::InvalidLength);
881    }
882
883    let required = decoded_capacity(input.len());
884    if output.len() < required {
885        return Err(DecodeError::OutputTooSmall {
886            required,
887            available: output.len(),
888        });
889    }
890
891    let mut invalid_byte = 0u8;
892    let mut invalid_padding = 0u8;
893    let mut write = 0;
894    let mut read = 0;
895
896    while read + 4 <= input.len() {
897        let [b0, b1, b2, b3] =
898            read_quad_or_mark_invalid(input, read, &mut invalid_byte, &mut invalid_padding);
899        let (v0, valid0) = ct_decode_alphabet_byte::<A>(b0);
900        let (v1, valid1) = ct_decode_alphabet_byte::<A>(b1);
901        let (v2, valid2) = ct_decode_alphabet_byte::<A>(b2);
902        let (v3, valid3) = ct_decode_alphabet_byte::<A>(b3);
903
904        invalid_byte |= !valid0;
905        invalid_byte |= !valid1;
906        invalid_byte |= !valid2;
907        invalid_byte |= !valid3;
908        invalid_padding |= ct_mask_eq_u8(b0, b'=');
909        invalid_padding |= ct_mask_eq_u8(b1, b'=');
910        invalid_padding |= ct_mask_eq_u8(b2, b'=');
911        invalid_padding |= ct_mask_eq_u8(b3, b'=');
912
913        output[write] = (v0 << 2) | (v1 >> 4);
914        output[write + 1] = (v1 << 4) | (v2 >> 2);
915        output[write + 2] = (v2 << 6) | v3;
916        read += 4;
917        write += 3;
918    }
919
920    match read_tail_or_mark_invalid(input, read, &mut invalid_byte, &mut invalid_padding) {
921        [] => {}
922        [b0, b1] => {
923            let (v0, valid0) = ct_decode_alphabet_byte::<A>(*b0);
924            let (v1, valid1) = ct_decode_alphabet_byte::<A>(*b1);
925            invalid_byte |= !valid0;
926            invalid_byte |= !valid1;
927            invalid_padding |= ct_mask_eq_u8(*b0, b'=');
928            invalid_padding |= ct_mask_eq_u8(*b1, b'=');
929            invalid_padding |= ct_mask_nonzero_u8(v1 & 0b0000_1111);
930            output[write] = (v0 << 2) | (v1 >> 4);
931            write += 1;
932        }
933        [b0, b1, b2] => {
934            let (v0, valid0) = ct_decode_alphabet_byte::<A>(*b0);
935            let (v1, valid1) = ct_decode_alphabet_byte::<A>(*b1);
936            let (v2, valid2) = ct_decode_alphabet_byte::<A>(*b2);
937            invalid_byte |= !valid0;
938            invalid_byte |= !valid1;
939            invalid_byte |= !valid2;
940            invalid_padding |= ct_mask_eq_u8(*b0, b'=');
941            invalid_padding |= ct_mask_eq_u8(*b1, b'=');
942            invalid_padding |= ct_mask_eq_u8(*b2, b'=');
943            invalid_padding |= ct_mask_nonzero_u8(v2 & 0b0000_0011);
944            output[write] = (v0 << 2) | (v1 >> 4);
945            output[write + 1] = (v1 << 4) | (v2 >> 2);
946            write += 2;
947        }
948        _ => {
949            invalid_byte = 0xff;
950            invalid_padding = 0xff;
951        }
952    }
953
954    report_ct_error(invalid_byte, invalid_padding)?;
955    Ok(write)
956}
957
958fn ct_decode_unpadded_in_place<A: Alphabet>(buffer: &mut [u8]) -> Result<usize, DecodeError> {
959    if buffer.len() % 4 == 1 {
960        return Err(DecodeError::InvalidLength);
961    }
962
963    let required = decoded_capacity(buffer.len());
964    if required > buffer.len() {
965        wipe_bytes(buffer);
966        return Err(DecodeError::InvalidInput);
967    }
968
969    let mut invalid_byte = 0u8;
970    let mut invalid_padding = 0u8;
971    let mut write = 0;
972    let mut read = 0;
973
974    while read + 4 <= buffer.len() {
975        let [b0, b1, b2, b3] =
976            read_quad_or_mark_invalid(buffer, read, &mut invalid_byte, &mut invalid_padding);
977        let (v0, valid0) = ct_decode_alphabet_byte::<A>(b0);
978        let (v1, valid1) = ct_decode_alphabet_byte::<A>(b1);
979        let (v2, valid2) = ct_decode_alphabet_byte::<A>(b2);
980        let (v3, valid3) = ct_decode_alphabet_byte::<A>(b3);
981
982        invalid_byte |= !valid0;
983        invalid_byte |= !valid1;
984        invalid_byte |= !valid2;
985        invalid_byte |= !valid3;
986        invalid_padding |= ct_mask_eq_u8(b0, b'=');
987        invalid_padding |= ct_mask_eq_u8(b1, b'=');
988        invalid_padding |= ct_mask_eq_u8(b2, b'=');
989        invalid_padding |= ct_mask_eq_u8(b3, b'=');
990
991        buffer[write] = (v0 << 2) | (v1 >> 4);
992        buffer[write + 1] = (v1 << 4) | (v2 >> 2);
993        buffer[write + 2] = (v2 << 6) | v3;
994        read += 4;
995        write += 3;
996    }
997
998    let tail = read_tail_or_mark_invalid(buffer, read, &mut invalid_byte, &mut invalid_padding);
999    match tail {
1000        [] => {}
1001        [b0, b1] => {
1002            let (v0, valid0) = ct_decode_alphabet_byte::<A>(*b0);
1003            let (v1, valid1) = ct_decode_alphabet_byte::<A>(*b1);
1004            invalid_byte |= !valid0;
1005            invalid_byte |= !valid1;
1006            invalid_padding |= ct_mask_eq_u8(*b0, b'=');
1007            invalid_padding |= ct_mask_eq_u8(*b1, b'=');
1008            invalid_padding |= ct_mask_nonzero_u8(v1 & 0b0000_1111);
1009            buffer[write] = (v0 << 2) | (v1 >> 4);
1010            write += 1;
1011        }
1012        [b0, b1, b2] => {
1013            let (v0, valid0) = ct_decode_alphabet_byte::<A>(*b0);
1014            let (v1, valid1) = ct_decode_alphabet_byte::<A>(*b1);
1015            let (v2, valid2) = ct_decode_alphabet_byte::<A>(*b2);
1016            invalid_byte |= !valid0;
1017            invalid_byte |= !valid1;
1018            invalid_byte |= !valid2;
1019            invalid_padding |= ct_mask_eq_u8(*b0, b'=');
1020            invalid_padding |= ct_mask_eq_u8(*b1, b'=');
1021            invalid_padding |= ct_mask_eq_u8(*b2, b'=');
1022            invalid_padding |= ct_mask_nonzero_u8(v2 & 0b0000_0011);
1023            buffer[write] = (v0 << 2) | (v1 >> 4);
1024            buffer[write + 1] = (v1 << 4) | (v2 >> 2);
1025            write += 2;
1026        }
1027        _ => {
1028            invalid_byte = 0xff;
1029            invalid_padding = 0xff;
1030        }
1031    }
1032
1033    if write != required {
1034        ct_error_gate_barrier(invalid_byte, invalid_padding);
1035        wipe_bytes(buffer);
1036        return Err(DecodeError::InvalidInput);
1037    }
1038    if let Err(err) = report_ct_error(invalid_byte, invalid_padding) {
1039        wipe_bytes(buffer);
1040        return Err(err);
1041    }
1042    Ok(write)
1043}
1044
1045fn read_tail(input: &[u8], offset: usize) -> Result<&[u8], DecodeError> {
1046    input.get(offset..).ok_or(DecodeError::InvalidLength)
1047}
1048
1049fn read_quad_or_mark_invalid(
1050    input: &[u8],
1051    offset: usize,
1052    invalid_byte: &mut u8,
1053    invalid_padding: &mut u8,
1054) -> [u8; 4] {
1055    if let Ok(quad) = read_quad(input, offset) {
1056        quad
1057    } else {
1058        debug_assert!(
1059            false,
1060            "read_quad failed inside length-validated constant-time decode loop"
1061        );
1062        *invalid_byte = 0xff;
1063        *invalid_padding = 0xff;
1064        [0; 4]
1065    }
1066}
1067
1068fn read_tail_or_mark_invalid<'a>(
1069    input: &'a [u8],
1070    offset: usize,
1071    invalid_byte: &mut u8,
1072    invalid_padding: &mut u8,
1073) -> &'a [u8] {
1074    if let Ok(tail) = read_tail(input, offset) {
1075        tail
1076    } else {
1077        debug_assert!(
1078            false,
1079            "read_tail failed inside length-validated constant-time decode loop"
1080        );
1081        *invalid_byte = 0xff;
1082        *invalid_padding = 0xff;
1083        &[]
1084    }
1085}
1086
1087#[inline(never)]
1088#[allow(unsafe_code)]
1089fn ct_decode_alphabet_byte<A: Alphabet>(byte: u8) -> (u8, u8) {
1090    let mut decoded = 0u8;
1091    let mut valid = 0u8;
1092    let mut candidate = 0u8;
1093
1094    while candidate < 64 {
1095        let matches = core::hint::black_box(ct_mask_eq_u8(
1096            core::hint::black_box(byte),
1097            core::hint::black_box(A::ENCODE[candidate as usize]),
1098        ));
1099        decoded = ct_accumulate_u8(decoded, candidate & matches);
1100        valid = ct_accumulate_u8(valid, matches);
1101        candidate += 1;
1102    }
1103
1104    (decoded, valid)
1105}
1106
1107fn ct_padding_len(input: &[u8]) -> usize {
1108    let Some((&last, before_last_prefix)) = input.split_last() else {
1109        return 0;
1110    };
1111    let Some(&before_last) = before_last_prefix.last() else {
1112        return 0;
1113    };
1114    usize::from(ct_mask_eq_u8(last, b'=') & 1) + usize::from(ct_mask_eq_u8(before_last, b'=') & 1)
1115}
1116
1117pub(crate) fn report_ct_error(invalid_byte: u8, invalid_padding: u8) -> Result<(), DecodeError> {
1118    ct_error_gate_barrier(invalid_byte, invalid_padding);
1119
1120    if (invalid_byte | invalid_padding) != 0 {
1121        Err(DecodeError::InvalidInput)
1122    } else {
1123        Ok(())
1124    }
1125}