Skip to main content

wafrift_encoding/
auth_header_smuggle.rs

1//! `Authorization` / `Proxy-Authorization` header parser-differential
2//! smuggling.
3//!
4//! RFC 7235 §2.1 defines the Authorization header value as
5//! `auth-scheme 1*SP token68 / auth-params`. Real-world parsers are
6//! inconsistent about:
7//!
8//! - **Case sensitivity** of the scheme — RFC says case-insensitive
9//!   (`Bearer` ≡ `bearer` ≡ `BEARER`) but some WAFs match `Bearer`
10//!   literally and miss lowercase.
11//! - **Linear whitespace** between scheme and token — RFC says
12//!   `1*SP` (one or more spaces) but some parsers accept tabs,
13//!   multiple spaces, or no space at all (`Bearereyj…`).
14//! - **Multiple Authorization headers** — RFC 7230 §3.2.2 forbids
15//!   most header duplication; Authorization is single-valued. Real
16//!   stacks: nginx keeps first, Apache keeps last, some join with
17//!   commas. Privilege-escalation surface when WAF and origin
18//!   disagree on which header wins.
19//! - **Quoted scheme** (`"Bearer" eyj…`) — strict RFC rejects; lax
20//!   parsers strip quotes.
21//! - **Trailing junk** after the token — many origin parsers stop
22//!   at the first whitespace and ignore the rest; WAFs that scan
23//!   the entire header value see the trailing payload.
24//! - **Control bytes in the token** — strict RFC 5234 token68
25//!   alphabet forbids CTLs; lax parsers silently strip them.
26//!
27//! The same matrix applies to `Proxy-Authorization` (RFC 7235
28//! §4.4). Caller passes the header name; the same variant generators
29//! work for both.
30//!
31//! ## Wire shape
32//!
33//! Every probe produces a single string for the header value. The
34//! caller attaches it to a `Request` under either `Authorization` or
35//! `Proxy-Authorization`. Some variants emit a `Vec<(name, value)>`
36//! when the probe requires more than one header — see
37//! [`AuthSmuggleProbe::header_lines`].
38
39use rand::Rng;
40use wafrift_types::canary::Canary;
41use wafrift_types::pick::pick_from;
42use wafrift_types::probe::{SmuggleArtifact, SmuggleProbe};
43
44/// Maximum total length wafrift will emit for a single Authorization
45/// header value. Most stacks have a 4-8 KiB header-line cap; we sit
46/// well under so probes don't get dropped at the framing layer
47/// before reaching the parser-differential surface we care about.
48pub const MAX_AUTH_HEADER_BYTES: usize = 4 * 1024;
49
50/// Authorization-header smuggle variants.
51#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
52pub enum AuthHeaderVariant {
53    /// `bearer <token>` — lowercase scheme. RFC 7235 §2.1 says
54    /// case-insensitive; some WAFs match literally and miss it.
55    LowercaseScheme,
56    /// `Bearer<token>` — no whitespace between scheme and token.
57    /// RFC says `1*SP`; some lenient parsers join them.
58    NoWhitespaceAfterScheme,
59    /// `Bearer\t<token>` — TAB instead of SP between scheme and
60    /// token. RFC 5234 allows SP only in `1*SP`; lax parsers accept
61    /// any LWS.
62    TabBetweenSchemeAndToken,
63    /// `Bearer   <token>` — multiple spaces (3-7 chosen randomly)
64    /// instead of `1*SP`. Some strict parsers reject; most accept.
65    MultipleSpacesAfterScheme,
66    /// Two `Authorization:` header lines with different tokens.
67    /// `header_lines` returns both. WAF takes first; origin may
68    /// take last → privilege escalation differential.
69    DuplicateHeaderFirstWinsBenign,
70    /// `"Bearer" <token>` — scheme wrapped in double quotes. Strict
71    /// RFC rejects; some lax parsers strip.
72    QuotedScheme,
73    /// `Bearer <token> trailing junk` — extra bytes after the
74    /// token. Most parsers stop at whitespace; WAFs scanning the
75    /// whole value see the trailing payload.
76    TrailingJunkAfterToken,
77    /// `Bearer <token-with-ctl-byte>` — control byte inserted into
78    /// the token. Strict parsers reject; lax parsers strip.
79    ControlByteInToken,
80}
81
82/// An Authorization-header smuggle probe.
83#[derive(Debug, Clone)]
84pub struct AuthSmuggleProbe {
85    /// Which smuggle shape this probe implements.
86    pub variant: AuthHeaderVariant,
87    /// Header lines to attach to the request. Most variants emit
88    /// exactly one `(name, value)` pair; the duplicate-header
89    /// variant emits two.
90    pub header_lines: Vec<(String, String)>,
91    /// Telemetry description.
92    pub description: String,
93    /// Per-probe correlation token.
94    pub canary: Canary,
95}
96
97impl AuthSmuggleProbe {
98    fn finalise(
99        variant: AuthHeaderVariant,
100        mut header_lines: Vec<(String, String)>,
101        description: String,
102    ) -> Self {
103        for (_, v) in header_lines.iter_mut() {
104            if v.len() > MAX_AUTH_HEADER_BYTES {
105                // §15 panic fix: `String::truncate` panics off a char boundary.
106                // Auth values can be multibyte (operator `--credential`), so cap
107                // at a UTF-8 boundary via the shared helper (matches cookie/range).
108                let cut = crate::floor_char_boundary(v, MAX_AUTH_HEADER_BYTES);
109                v.truncate(cut);
110            }
111        }
112        Self {
113            variant,
114            header_lines,
115            description,
116            canary: Canary::generate(),
117        }
118    }
119
120    /// `bearer <token>` — lowercase scheme.
121    #[must_use]
122    pub fn lowercase_scheme(header_name: &str, scheme: &str, token: &str) -> Self {
123        let value = format!("{} {}", scheme.to_lowercase(), sanitise_token(token));
124        Self::finalise(
125            AuthHeaderVariant::LowercaseScheme,
126            vec![(header_name.to_string(), value)],
127            format!(
128                "Lowercase auth scheme {:?} — RFC 7235 §2.1 case-insensitive but some WAFs match literal",
129                scheme.to_lowercase()
130            ),
131        )
132    }
133
134    /// `Bearer<token>` — no whitespace between scheme and token.
135    #[must_use]
136    pub fn no_whitespace_after_scheme(header_name: &str, scheme: &str, token: &str) -> Self {
137        let value = format!("{}{}", scheme, sanitise_token(token));
138        Self::finalise(
139            AuthHeaderVariant::NoWhitespaceAfterScheme,
140            vec![(header_name.to_string(), value)],
141            "No SP between scheme and token — RFC 7235 §2.1 violation, lenient parsers join".into(),
142        )
143    }
144
145    /// `Bearer\t<token>` — TAB instead of SP between scheme and
146    /// token.
147    #[must_use]
148    pub fn tab_between_scheme_and_token(header_name: &str, scheme: &str, token: &str) -> Self {
149        let value = format!("{}\t{}", scheme, sanitise_token(token));
150        Self::finalise(
151            AuthHeaderVariant::TabBetweenSchemeAndToken,
152            vec![(header_name.to_string(), value)],
153            "TAB between scheme and token — RFC requires SP, some accept any LWS".into(),
154        )
155    }
156
157    /// `Bearer   <token>` — 3-7 spaces between scheme and token.
158    #[must_use]
159    pub fn multiple_spaces_after_scheme(header_name: &str, scheme: &str, token: &str) -> Self {
160        let mut rng = rand::thread_rng();
161        let n = rng.gen_range(3..=7);
162        let value = format!("{}{}{}", scheme, " ".repeat(n), sanitise_token(token));
163        Self::finalise(
164            AuthHeaderVariant::MultipleSpacesAfterScheme,
165            vec![(header_name.to_string(), value)],
166            format!("{n} spaces between scheme and token — boundary stretch of `1*SP`"),
167        )
168    }
169
170    /// Two header lines with the same name; first benign, second
171    /// the real smuggled token. nginx-style "first wins" parsers
172    /// see benign; Apache-style "last wins" parsers see smuggle.
173    #[must_use]
174    pub fn duplicate_header_first_wins_benign(
175        header_name: &str,
176        scheme: &str,
177        benign_token: &str,
178        smuggle_token: &str,
179    ) -> Self {
180        let v1 = format!("{} {}", scheme, sanitise_token(benign_token));
181        let v2 = format!("{} {}", scheme, sanitise_token(smuggle_token));
182        Self::finalise(
183            AuthHeaderVariant::DuplicateHeaderFirstWinsBenign,
184            vec![(header_name.to_string(), v1), (header_name.to_string(), v2)],
185            "Duplicate Authorization headers — nginx-vs-Apache first/last-wins differential".into(),
186        )
187    }
188
189    /// `"Bearer" <token>` — scheme wrapped in double quotes.
190    #[must_use]
191    pub fn quoted_scheme(header_name: &str, scheme: &str, token: &str) -> Self {
192        // Strip any inner quotes so the wrapping pair isn't ambiguous.
193        let clean_scheme = scheme.replace('"', "");
194        let value = format!("\"{}\" {}", clean_scheme, sanitise_token(token));
195        Self::finalise(
196            AuthHeaderVariant::QuotedScheme,
197            vec![(header_name.to_string(), value)],
198            "Quoted scheme — strict RFC rejects, lax parsers strip quotes".into(),
199        )
200    }
201
202    /// `Bearer <token> <junk>` — extra bytes after the token.
203    #[must_use]
204    pub fn trailing_junk_after_token(
205        header_name: &str,
206        scheme: &str,
207        token: &str,
208        junk: &str,
209    ) -> Self {
210        let value = format!(
211            "{} {} {}",
212            scheme,
213            sanitise_token(token),
214            sanitise_token(junk)
215        );
216        Self::finalise(
217            AuthHeaderVariant::TrailingJunkAfterToken,
218            vec![(header_name.to_string(), value)],
219            "Trailing bytes after token — parser stops at SP vs WAF scans whole value".into(),
220        )
221    }
222
223    /// `Bearer <token-with-ctl>` — control byte injected at the
224    /// token midpoint. CTL pool randomised per call.
225    #[must_use]
226    pub fn control_byte_in_token(header_name: &str, scheme: &str, token: &str) -> Self {
227        let clean = sanitise_token(token);
228        let ctl = pick_from(CONTROL_BYTE_POOL, b'\t');
229        // §15 panic fix (sibling of cookie_smuggle::control_byte_in_value):
230        // sanitise_token keeps multibyte UTF-8, so a raw `len/2` split would
231        // panic when a codepoint straddles the midpoint (token "éa" → mid=1 =
232        // middle of `é`). Snap to a char boundary via the shared helper.
233        let mid = crate::floor_char_boundary(&clean, clean.len() / 2);
234        let value = format!(
235            "{} {}{}{}",
236            scheme,
237            &clean[..mid],
238            ctl as char,
239            &clean[mid..]
240        );
241        Self::finalise(
242            AuthHeaderVariant::ControlByteInToken,
243            vec![(header_name.to_string(), value)],
244            format!("Control byte 0x{ctl:02x} inside token — strict reject vs lax strip"),
245        )
246    }
247}
248
249impl SmuggleProbe for AuthSmuggleProbe {
250    fn canary(&self) -> &Canary {
251        &self.canary
252    }
253
254    fn technique(&self) -> String {
255        let suffix = match self.variant {
256            AuthHeaderVariant::LowercaseScheme => "lowercase-scheme",
257            AuthHeaderVariant::NoWhitespaceAfterScheme => "no-whitespace-after-scheme",
258            AuthHeaderVariant::TabBetweenSchemeAndToken => "tab-between-scheme-and-token",
259            AuthHeaderVariant::MultipleSpacesAfterScheme => "multiple-spaces-after-scheme",
260            AuthHeaderVariant::DuplicateHeaderFirstWinsBenign => {
261                "duplicate-header-first-wins-benign"
262            }
263            AuthHeaderVariant::QuotedScheme => "quoted-scheme",
264            AuthHeaderVariant::TrailingJunkAfterToken => "trailing-junk-after-token",
265            AuthHeaderVariant::ControlByteInToken => "control-byte-in-token",
266        };
267        format!("auth.{suffix}")
268    }
269
270    fn description(&self) -> &str {
271        &self.description
272    }
273
274    fn artifact(&self) -> SmuggleArtifact {
275        SmuggleArtifact::Headers(self.header_lines.clone())
276    }
277}
278
279/// Control bytes the
280/// [`ControlByteInToken`](AuthHeaderVariant::ControlByteInToken) probe
281/// may inject. Each is forbidden by strict RFC 5234 in `token68` but
282/// silently stripped by various lax parsers.
283pub(crate) const CONTROL_BYTE_POOL: &[u8] = &[
284    0x09, // HTAB
285    0x0B, // VT
286    0x0C, // FF
287    0x1F, // US
288    0x7F, // DEL
289];
290
291/// Strip CR / LF / NUL from a token. These three bytes break the
292/// HTTP header-line on every stack so probes that "explore lax
293/// parsing" still must not break framing.
294fn sanitise_token(s: &str) -> String {
295    s.chars()
296        .filter(|&c| c != '\r' && c != '\n' && c != '\0')
297        .collect()
298}
299
300/// Enumerate one probe per variant, seeded with `scheme` and
301/// `token`. Defaults to the `Authorization` header. Pass
302/// `"Proxy-Authorization"` as `header_name` to target the proxy
303/// auth surface (RFC 7235 §4.4).
304#[must_use]
305pub fn all_variants(header_name: &str, scheme: &str, token: &str) -> Vec<AuthSmuggleProbe> {
306    vec![
307        AuthSmuggleProbe::lowercase_scheme(header_name, scheme, token),
308        AuthSmuggleProbe::no_whitespace_after_scheme(header_name, scheme, token),
309        AuthSmuggleProbe::tab_between_scheme_and_token(header_name, scheme, token),
310        AuthSmuggleProbe::multiple_spaces_after_scheme(header_name, scheme, token),
311        AuthSmuggleProbe::duplicate_header_first_wins_benign(
312            header_name,
313            scheme,
314            "benign-token-aaaa",
315            token,
316        ),
317        AuthSmuggleProbe::quoted_scheme(header_name, scheme, token),
318        AuthSmuggleProbe::trailing_junk_after_token(header_name, scheme, token, "junk-tail"),
319        AuthSmuggleProbe::control_byte_in_token(header_name, scheme, token),
320    ]
321}
322
323#[cfg(test)]
324mod tests {
325    use super::*;
326    use std::collections::HashSet;
327
328    #[test]
329    fn sweep_emits_eight_distinct_variants() {
330        let v = all_variants("Authorization", "Bearer", "eyJhbGciOiJ");
331        assert_eq!(v.len(), 8);
332        let kinds: HashSet<_> = v.iter().map(|p| p.variant).collect();
333        assert_eq!(kinds.len(), 8);
334    }
335
336    #[test]
337    fn lowercase_scheme_probe_actually_lowercases_the_scheme() {
338        let p = AuthSmuggleProbe::lowercase_scheme("Authorization", "Bearer", "X");
339        let (_, v) = &p.header_lines[0];
340        assert!(v.starts_with("bearer "), "expected lowercase scheme: {v:?}");
341        assert!(
342            !v.starts_with("Bearer "),
343            "must NOT preserve original case: {v:?}"
344        );
345    }
346
347    #[test]
348    fn no_whitespace_probe_has_no_sp_between_scheme_and_token() {
349        let p = AuthSmuggleProbe::no_whitespace_after_scheme("Authorization", "Bearer", "Token");
350        let (_, v) = &p.header_lines[0];
351        // First SP would split into scheme + token; the probe MUST
352        // not have one in the wire form.
353        assert!(
354            !v.contains(' '),
355            "no-whitespace probe must contain zero SPs, got: {v:?}"
356        );
357        assert!(v.starts_with("BearerToken"));
358    }
359
360    #[test]
361    fn tab_probe_uses_tab_not_space() {
362        let p = AuthSmuggleProbe::tab_between_scheme_and_token("Authorization", "Bearer", "T");
363        let (_, v) = &p.header_lines[0];
364        assert!(v.contains('\t'), "expected TAB in header value: {v:?}");
365        assert!(
366            !v.contains(' '),
367            "TAB probe must not also carry a space (would defeat the test)"
368        );
369    }
370
371    #[test]
372    fn multiple_spaces_probe_has_three_to_seven_spaces() {
373        let p = AuthSmuggleProbe::multiple_spaces_after_scheme("Authorization", "Bearer", "T");
374        let (_, v) = &p.header_lines[0];
375        // Count consecutive spaces between "Bearer" and "T".
376        let after_bearer = v.trim_start_matches("Bearer");
377        let space_count = after_bearer.chars().take_while(|&c| c == ' ').count();
378        assert!(
379            (3..=7).contains(&space_count),
380            "expected 3..=7 spaces, got {space_count}"
381        );
382    }
383
384    #[test]
385    fn duplicate_header_probe_emits_two_header_lines_same_name() {
386        let p = AuthSmuggleProbe::duplicate_header_first_wins_benign(
387            "Authorization",
388            "Bearer",
389            "benign",
390            "smuggle",
391        );
392        assert_eq!(p.header_lines.len(), 2);
393        assert_eq!(p.header_lines[0].0, "Authorization");
394        assert_eq!(p.header_lines[1].0, "Authorization");
395        // First is benign, second is smuggle.
396        assert!(p.header_lines[0].1.contains("benign"));
397        assert!(p.header_lines[1].1.contains("smuggle"));
398    }
399
400    #[test]
401    fn quoted_scheme_probe_wraps_scheme_in_double_quotes() {
402        let p = AuthSmuggleProbe::quoted_scheme("Authorization", "Bearer", "T");
403        let (_, v) = &p.header_lines[0];
404        assert!(v.starts_with("\"Bearer\""), "got: {v:?}");
405    }
406
407    #[test]
408    fn quoted_scheme_strips_inner_quotes_from_scheme() {
409        // Anti-rig: nested quotes would render the probe ambiguous.
410        let p = AuthSmuggleProbe::quoted_scheme("Authorization", "Be\"a\"rer", "T");
411        let (_, v) = &p.header_lines[0];
412        // Exactly two quotes (the wrappers).
413        assert_eq!(
414            v.matches('"').count(),
415            2,
416            "expected exactly 2 quotes (the wrappers), got: {v:?}"
417        );
418    }
419
420    #[test]
421    fn trailing_junk_probe_appends_extra_bytes_after_token() {
422        let p =
423            AuthSmuggleProbe::trailing_junk_after_token("Authorization", "Bearer", "TOK", "EXTRA");
424        let (_, v) = &p.header_lines[0];
425        // Format is "Bearer <token> <junk>" so two SPs split into 3
426        // segments.
427        let parts: Vec<&str> = v.splitn(3, ' ').collect();
428        assert_eq!(parts.len(), 3);
429        assert_eq!(parts[0], "Bearer");
430        assert_eq!(parts[1], "TOK");
431        assert_eq!(parts[2], "EXTRA");
432    }
433
434    #[test]
435    fn ctl_probe_injects_a_byte_from_the_pool() {
436        let p = AuthSmuggleProbe::control_byte_in_token("Authorization", "Bearer", "ABCDEF");
437        let (_, v) = &p.header_lines[0];
438        let bytes = v.as_bytes();
439        assert!(
440            bytes.iter().any(|b| CONTROL_BYTE_POOL.contains(b)),
441            "no CTL byte found in header: {v:?}"
442        );
443    }
444
445    #[test]
446    fn sanitise_strips_cr_lf_nul_from_token() {
447        // Anti-rig: CR/LF/NUL must NEVER reach the wire even when the
448        // probe explores lax parsers — they break framing universally.
449        let p = AuthSmuggleProbe::lowercase_scheme("Authorization", "Bearer", "to\rke\nn\0X");
450        let (_, v) = &p.header_lines[0];
451        assert!(!v.contains('\r'));
452        assert!(!v.contains('\n'));
453        assert!(!v.contains('\0'));
454    }
455
456    #[test]
457    fn every_probe_carries_a_distinct_canary() {
458        let a = AuthSmuggleProbe::lowercase_scheme("Authorization", "Bearer", "x");
459        let b = AuthSmuggleProbe::lowercase_scheme("Authorization", "Bearer", "x");
460        assert_ne!(a.canary.token, b.canary.token);
461        assert_eq!(a.canary.token.len(), 16);
462    }
463
464    #[test]
465    fn header_value_capped_at_max() {
466        let huge = "x".repeat(MAX_AUTH_HEADER_BYTES * 4);
467        let p = AuthSmuggleProbe::lowercase_scheme("Authorization", "Bearer", &huge);
468        let (_, v) = &p.header_lines[0];
469        assert!(
470            v.len() <= MAX_AUTH_HEADER_BYTES,
471            "header value exceeded cap: {}",
472            v.len()
473        );
474    }
475
476    #[test]
477    fn proxy_authorization_header_name_also_supported() {
478        let p = AuthSmuggleProbe::lowercase_scheme("Proxy-Authorization", "Bearer", "T");
479        assert_eq!(p.header_lines[0].0, "Proxy-Authorization");
480    }
481
482    #[test]
483    fn empty_inputs_do_not_panic_in_any_builder() {
484        let _ = AuthSmuggleProbe::lowercase_scheme("Authorization", "", "");
485        let _ = AuthSmuggleProbe::no_whitespace_after_scheme("Authorization", "", "");
486        let _ = AuthSmuggleProbe::tab_between_scheme_and_token("Authorization", "", "");
487        let _ = AuthSmuggleProbe::multiple_spaces_after_scheme("Authorization", "", "");
488        let _ = AuthSmuggleProbe::duplicate_header_first_wins_benign("Authorization", "", "", "");
489        let _ = AuthSmuggleProbe::quoted_scheme("Authorization", "", "");
490        let _ = AuthSmuggleProbe::trailing_junk_after_token("Authorization", "", "", "");
491        // control_byte_in_token's `mid = clean.len() / 2 = 0`; slice
492        // [..0] is empty, [0..] is empty. Verify no panic.
493        let _ = AuthSmuggleProbe::control_byte_in_token("Authorization", "", "");
494    }
495
496    #[test]
497    fn control_byte_in_token_multibyte_does_not_panic() {
498        // §15 regression (sibling of cookie_smuggle): sanitise_token keeps
499        // multibyte UTF-8, so the pre-fix `&clean[..clean.len()/2]` split
500        // panicked when a codepoint straddled the midpoint. "éa" is 3 bytes;
501        // len/2 = 1 is the middle of `é`. Now floor_char_boundary snaps it.
502        for tok in ["éa", "aé", "日本語", "🦀x", "Bearer-café-日"] {
503            let p = AuthSmuggleProbe::control_byte_in_token("Authorization", "Bearer", tok);
504            // The single header pair's value must be valid UTF-8 + non-empty;
505            // reaching here without a panic is the assertion.
506            assert!(
507                p.header_lines.iter().any(|(_, v)| !v.is_empty()),
508                "control-byte-in-token must not panic on multibyte token {tok:?}"
509            );
510        }
511    }
512}