Skip to main content

wafrift_encoding/encoding/
strategy.rs

1//! Strategy enum and main encode() dispatcher.
2
3use super::keyword::{
4    between_obfuscate, case_alternate, mysql_versioned_comment, percentage_prefix,
5    random_case_alternate, space_to_comment, space_to_dash, space_to_hash, space_to_plus,
6    space_to_random_blank, sql_comment_insert, unmagic_quotes, whitespace_insert,
7};
8use super::structural::{
9    base64_encode, base64_url_encode, chunked_split, deflate_encode, gzip_encode, hex_encode,
10    null_byte_inject, overlong_utf8, overlong_utf8_more, parameter_pollute, utf7_encode,
11};
12use super::unicode::{
13    fullwidth_encode, homoglyph_encode, html_entity_decimal_encode, html_entity_encode,
14    iis_unicode_encode, json_string_encode, unicode_encode,
15};
16use super::url::{double_url_encode, triple_url_encode, url_encode, url_encode_lower};
17use crate::error::EncodeError;
18
19/// Maximum input payload size to prevent OOM on adversarial input.
20pub const MAX_PAYLOAD_SIZE: usize = 8 * 1024 * 1024;
21
22/// Available encoding strategies.
23///
24/// # Context hints
25/// Many strategies are only semantically correct in specific parser contexts.
26/// Use [`Strategy::contexts`] to query the applicable contexts for a strategy.
27#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
28#[non_exhaustive]
29pub enum Strategy {
30    /// Standard URL encoding (%XX) — preserves unreserved chars per RFC 3986.
31    /// Safe for: query strings, paths, form data.
32    UrlEncode,
33    /// Lowercase hex URL encoding (%xx) — same semantics as UrlEncode.
34    /// Safe for: query strings, paths, form data.
35    UrlEncodeLower,
36    /// Double URL encoding (%25XX) — bypasses WAFs that decode once.
37    /// Safe for: query strings, paths, form data.
38    DoubleUrlEncode,
39    /// Triple URL encoding (%2525XX) — bypasses WAFs that decode twice.
40    /// Safe for: query strings, paths, form data.
41    TripleUrlEncode,
42    /// Unicode escape (\uXXXX) — ONLY safe when target parses JSON/JavaScript.
43    /// Unsafe for: raw HTTP parameters, headers, most server frameworks.
44    UnicodeEncode,
45    /// IIS/ASP percent Unicode (%uXXXX) — ONLY safe on IIS/ASP classic parsers.
46    /// Unsafe for: modern servers (nginx, Apache, Node.js, etc.).
47    IisUnicodeEncode,
48    /// JSON string encoding with Unicode escapes — ONLY safe in JSON contexts.
49    /// Unsafe for: raw HTTP parameters.
50    JsonEncode,
51    /// HTML entity encoding (&#xXX;) — ONLY safe in HTML contexts.
52    /// Unsafe for: raw HTTP parameters, JSON bodies.
53    HtmlEntityEncode,
54    /// HTML decimal entity encoding (<) — ONLY safe in HTML contexts.
55    /// Unsafe for: raw HTTP parameters, JSON bodies.
56    HtmlEntityDecimalEncode,
57    /// Alternating case (`SeLeCt`) — bypasses case-sensitive keyword filters.
58    /// Safe for: any text context where case is preserved.
59    CaseAlternation,
60    /// Random alternating case — non-deterministic variant of CaseAlternation.
61    /// Safe for: any text context where case is preserved.
62    RandomCase,
63    /// Tab insertion BETWEEN tokens — preserves keyword integrity.
64    /// Safe for: SQL contexts where whitespace separates tokens.
65    WhitespaceInsertion,
66    /// SQL comment insertion BETWEEN tokens — preserves keyword integrity.
67    /// Safe for: SQL contexts where comments are treated as whitespace.
68    SqlCommentInsertion,
69    /// MySQL versioned comment (`/*!50000SELECT*/`) — executed by MySQL, ignored by WAFs.
70    /// Safe for: MySQL backends.
71    MysqlVersionedComment,
72    /// Null byte injection (%00) — ONLY semantically correct for C-style string parsers.
73    /// Context: php, some CGI implementations.
74    NullByte,
75    /// Overlong UTF-8 encoding (2-byte) — ONLY works against legacy WAFs that normalize.
76    /// Context: iis-6, very old frontends.
77    OverlongUtf8,
78    /// Extended overlong UTF-8 encoding (3-byte) — broader coverage than OverlongUtf8.
79    /// Context: iis-6, very old frontends.
80    OverlongUtf8More,
81    /// Chunked transfer-encoding split — ONLY valid with `Transfer-Encoding: chunked`.
82    /// Context: http-request-body.
83    ChunkedSplit,
84    /// HTTP parameter pollution — duplicate parameter with benign first value.
85    /// Safe for: query strings, form data.
86    ParameterPollution,
87    /// Base64 encoding (standard alphabet).
88    /// Safe for: headers, bodies, query strings (may need URL encoding after).
89    Base64Encode,
90    /// Base64 URL-safe encoding (-_ no padding).
91    /// Safe for: URL contexts where +/ would be mangled.
92    Base64UrlEncode,
93    /// Hex encoding.
94    /// Safe for: any byte context.
95    HexEncode,
96    /// UTF-7 encoding per RFC 2152.
97    /// Context: legacy IIS/.NET parsers that decode UTF-7.
98    Utf7Encode,
99    /// Gzip compression — ONLY valid with `Content-Encoding: gzip`.
100    /// Context: http-request-body.
101    GzipEncode,
102    /// Deflate compression — ONLY valid with `Content-Encoding: deflate`.
103    /// Context: http-request-body.
104    DeflateEncode,
105    /// Replace spaces with SQL comments (`/**/`).
106    /// Safe for: SQL contexts.
107    SpaceToComment,
108    /// Replace spaces with dash comments (`--`).
109    /// Safe for: SQL contexts.
110    SpaceToDash,
111    /// Replace spaces with hash comments (`#`).
112    /// Safe for: MySQL contexts.
113    SpaceToHash,
114    /// Replace spaces with plus signs (`+`).
115    /// Safe for: URL-encoded form data.
116    SpaceToPlus,
117    /// Replace spaces with random blank characters.
118    /// Safe for: SQL contexts.
119    SpaceToRandomBlank,
120    /// Prefix each character with `%` — lightweight bypass.
121    /// Safe for: contexts that strip `%` before parsing.
122    PercentagePrefix,
123    /// Between obfuscation (`=` → `BETWEEN # AND #`).
124    /// Safe for: SQL contexts.
125    BetweenObfuscation,
126    /// Unmagic quotes (`%bf%27`) — multi-byte charset quote escape.
127    /// Context: PHP with GBK/Big5/Shift-JIS connections.
128    UnmagicQuotes,
129    /// Fullwidth Unicode (SELECTuntouched) — bypasses ASCII keyword regex.
130    /// Context: backends that perform NFKC normalization (Java, .NET, Python 3, PostgreSQL).
131    FullwidthEncode,
132    /// Homoglyph substitution — visually identical Unicode chars for `'`, `"`, `<`, `>`, `=`.
133    /// Context: byte-level WAFs with Unicode-tolerant backends.
134    HomoglyphEncode,
135}
136
137impl Strategy {
138    /// Returns the string identifier for this encoding strategy.
139    #[must_use]
140    pub const fn as_str(&self) -> &'static str {
141        match self {
142            Self::UrlEncode => "UrlEncode",
143            Self::UrlEncodeLower => "UrlEncodeLower",
144            Self::DoubleUrlEncode => "DoubleUrlEncode",
145            Self::TripleUrlEncode => "TripleUrlEncode",
146            Self::UnicodeEncode => "UnicodeEncode",
147            Self::IisUnicodeEncode => "IisUnicodeEncode",
148            Self::JsonEncode => "JsonEncode",
149            Self::HtmlEntityEncode => "HtmlEntityEncode",
150            Self::HtmlEntityDecimalEncode => "HtmlEntityDecimalEncode",
151            Self::CaseAlternation => "CaseAlternation",
152            Self::RandomCase => "RandomCase",
153            Self::WhitespaceInsertion => "WhitespaceInsertion",
154            Self::SqlCommentInsertion => "SqlCommentInsertion",
155            Self::MysqlVersionedComment => "MysqlVersionedComment",
156            Self::NullByte => "NullByte",
157            Self::OverlongUtf8 => "OverlongUtf8",
158            Self::OverlongUtf8More => "OverlongUtf8More",
159            Self::ChunkedSplit => "ChunkedSplit",
160            Self::ParameterPollution => "ParameterPollution",
161            Self::Base64Encode => "Base64Encode",
162            Self::Base64UrlEncode => "Base64UrlEncode",
163            Self::HexEncode => "HexEncode",
164            Self::Utf7Encode => "Utf7Encode",
165            Self::GzipEncode => "GzipEncode",
166            Self::DeflateEncode => "DeflateEncode",
167            Self::SpaceToComment => "SpaceToComment",
168            Self::SpaceToDash => "SpaceToDash",
169            Self::SpaceToHash => "SpaceToHash",
170            Self::SpaceToPlus => "SpaceToPlus",
171            Self::SpaceToRandomBlank => "SpaceToRandomBlank",
172            Self::PercentagePrefix => "PercentagePrefix",
173            Self::BetweenObfuscation => "BetweenObfuscation",
174            Self::UnmagicQuotes => "UnmagicQuotes",
175            Self::FullwidthEncode => "FullwidthEncode",
176            Self::HomoglyphEncode => "HomoglyphEncode",
177        }
178    }
179
180    /// Returns the parser contexts where this strategy is semantically safe.
181    ///
182    /// An empty slice means the strategy is generally applicable.
183    /// Callers should gate strategy application by matching these contexts
184    /// against the target type (e.g., `json`, `html`, `sql`, `php`, `iis-6`).
185    #[must_use]
186    pub const fn contexts(&self) -> &'static [&'static str] {
187        match self {
188            Self::UrlEncode
189            | Self::UrlEncodeLower
190            | Self::DoubleUrlEncode
191            | Self::TripleUrlEncode
192            | Self::ParameterPollution => &[],
193            Self::UnicodeEncode => &["json", "javascript"],
194            Self::IisUnicodeEncode => &["iis", "asp"],
195            Self::JsonEncode => &["json"],
196            Self::HtmlEntityEncode | Self::HtmlEntityDecimalEncode => &["html"],
197            Self::CaseAlternation | Self::RandomCase | Self::WhitespaceInsertion => &[],
198            Self::SqlCommentInsertion
199            | Self::MysqlVersionedComment
200            | Self::SpaceToComment
201            | Self::SpaceToDash
202            | Self::SpaceToRandomBlank
203            | Self::BetweenObfuscation => &["sql"],
204            Self::SpaceToHash => &["sql", "mysql"],
205            Self::SpaceToPlus => &["url-encoded"],
206            Self::NullByte => &["php", "cgi"],
207            Self::OverlongUtf8 | Self::OverlongUtf8More => &["iis-6"],
208            Self::ChunkedSplit => &["http-request-body"],
209            Self::Base64Encode | Self::Base64UrlEncode | Self::HexEncode => &[],
210            Self::Utf7Encode => &["iis", "legacy-dotnet"],
211            Self::GzipEncode | Self::DeflateEncode => &["http-request-body"],
212            Self::PercentagePrefix => &[],
213            Self::UnmagicQuotes => &["php", "gbk", "big5", "shift-jis"],
214            Self::FullwidthEncode => &["nfkc", "java", "dotnet", "python3", "postgresql"],
215            Self::HomoglyphEncode => &[],
216        }
217    }
218}
219
220fn check_size(payload: &[u8]) -> Result<(), EncodeError> {
221    if payload.len() > MAX_PAYLOAD_SIZE {
222        Err(EncodeError::PayloadTooLarge {
223            max: MAX_PAYLOAD_SIZE,
224            actual: payload.len(),
225        })
226    } else {
227        Ok(())
228    }
229}
230
231/// Encode a payload using the selected strategy.
232///
233/// # Errors
234/// Returns `EncodeError::PayloadTooLarge` if the input exceeds [`MAX_PAYLOAD_SIZE`].
235/// Returns `EncodeError::InvalidUtf8` for text-oriented strategies when the input
236/// contains invalid UTF-8.
237pub fn encode(payload: impl AsRef<[u8]>, strategy: Strategy) -> Result<String, EncodeError> {
238    let payload = payload.as_ref();
239    check_size(payload)?;
240
241    match strategy {
242        Strategy::UrlEncode => Ok(url_encode(payload)),
243        Strategy::UrlEncodeLower => Ok(url_encode_lower(payload)),
244        Strategy::DoubleUrlEncode => Ok(double_url_encode(payload)),
245        Strategy::TripleUrlEncode => Ok(triple_url_encode(payload)),
246        Strategy::UnicodeEncode => {
247            let text = std::str::from_utf8(payload).map_err(|_| EncodeError::InvalidUtf8)?;
248            Ok(unicode_encode(text))
249        }
250        Strategy::IisUnicodeEncode => {
251            let text = std::str::from_utf8(payload).map_err(|_| EncodeError::InvalidUtf8)?;
252            Ok(iis_unicode_encode(text))
253        }
254        Strategy::JsonEncode => {
255            let text = std::str::from_utf8(payload).map_err(|_| EncodeError::InvalidUtf8)?;
256            Ok(json_string_encode(text))
257        }
258        Strategy::HtmlEntityEncode => {
259            let text = std::str::from_utf8(payload).map_err(|_| EncodeError::InvalidUtf8)?;
260            Ok(html_entity_encode(text))
261        }
262        Strategy::HtmlEntityDecimalEncode => {
263            let text = std::str::from_utf8(payload).map_err(|_| EncodeError::InvalidUtf8)?;
264            Ok(html_entity_decimal_encode(text))
265        }
266        Strategy::CaseAlternation => {
267            let text = std::str::from_utf8(payload).map_err(|_| EncodeError::InvalidUtf8)?;
268            Ok(case_alternate(text))
269        }
270        Strategy::RandomCase => {
271            let text = std::str::from_utf8(payload).map_err(|_| EncodeError::InvalidUtf8)?;
272            Ok(random_case_alternate(text))
273        }
274        Strategy::WhitespaceInsertion => {
275            let text = std::str::from_utf8(payload).map_err(|_| EncodeError::InvalidUtf8)?;
276            Ok(whitespace_insert(text))
277        }
278        Strategy::SqlCommentInsertion => {
279            let text = std::str::from_utf8(payload).map_err(|_| EncodeError::InvalidUtf8)?;
280            Ok(sql_comment_insert(text))
281        }
282        Strategy::MysqlVersionedComment => {
283            let text = std::str::from_utf8(payload).map_err(|_| EncodeError::InvalidUtf8)?;
284            Ok(mysql_versioned_comment(text, 50_000))
285        }
286        Strategy::NullByte => Ok(null_byte_inject(payload)),
287        Strategy::OverlongUtf8 => Ok(overlong_utf8(payload)),
288        Strategy::OverlongUtf8More => Ok(overlong_utf8_more(payload)),
289        Strategy::ChunkedSplit => {
290            let body = chunked_split(payload, 1024)?.body;
291            Ok(String::from_utf8_lossy(&body).into_owned())
292        }
293        Strategy::ParameterPollution => Ok(parameter_pollute(payload)),
294        Strategy::Base64Encode => Ok(base64_encode(payload)),
295        Strategy::Base64UrlEncode => Ok(base64_url_encode(payload)),
296        Strategy::HexEncode => Ok(hex_encode(payload)),
297        Strategy::Utf7Encode => {
298            let text = std::str::from_utf8(payload).map_err(|_| EncodeError::InvalidUtf8)?;
299            Ok(utf7_encode(text))
300        }
301        Strategy::GzipEncode => Ok(gzip_encode(payload)?),
302        Strategy::DeflateEncode => Ok(deflate_encode(payload)?),
303        Strategy::SpaceToComment => {
304            let text = std::str::from_utf8(payload).map_err(|_| EncodeError::InvalidUtf8)?;
305            Ok(space_to_comment(text))
306        }
307        Strategy::SpaceToDash => {
308            let text = std::str::from_utf8(payload).map_err(|_| EncodeError::InvalidUtf8)?;
309            Ok(space_to_dash(text))
310        }
311        Strategy::SpaceToHash => {
312            let text = std::str::from_utf8(payload).map_err(|_| EncodeError::InvalidUtf8)?;
313            Ok(space_to_hash(text))
314        }
315        Strategy::SpaceToPlus => {
316            let text = std::str::from_utf8(payload).map_err(|_| EncodeError::InvalidUtf8)?;
317            Ok(space_to_plus(text))
318        }
319        Strategy::SpaceToRandomBlank => {
320            let text = std::str::from_utf8(payload).map_err(|_| EncodeError::InvalidUtf8)?;
321            Ok(space_to_random_blank(text))
322        }
323        Strategy::PercentagePrefix => {
324            let text = std::str::from_utf8(payload).map_err(|_| EncodeError::InvalidUtf8)?;
325            Ok(percentage_prefix(text))
326        }
327        Strategy::BetweenObfuscation => {
328            let text = std::str::from_utf8(payload).map_err(|_| EncodeError::InvalidUtf8)?;
329            Ok(between_obfuscate(text))
330        }
331        Strategy::UnmagicQuotes => Ok(unmagic_quotes(payload)),
332        Strategy::FullwidthEncode => {
333            let text = std::str::from_utf8(payload).map_err(|_| EncodeError::InvalidUtf8)?;
334            Ok(fullwidth_encode(text))
335        }
336        Strategy::HomoglyphEncode => {
337            let text = std::str::from_utf8(payload).map_err(|_| EncodeError::InvalidUtf8)?;
338            Ok(homoglyph_encode(text))
339        }
340    }
341}
342
343/// All available strategies in escalation order (least aggressive → most aggressive).
344#[must_use]
345pub fn all_strategies() -> Vec<Strategy> {
346    let mut strategies = vec![
347        Strategy::CaseAlternation,
348        Strategy::RandomCase,
349        Strategy::WhitespaceInsertion,
350        Strategy::SqlCommentInsertion,
351        Strategy::SpaceToPlus,
352        Strategy::SpaceToRandomBlank,
353        Strategy::SpaceToComment,
354        Strategy::SpaceToDash,
355        Strategy::SpaceToHash,
356        Strategy::UrlEncode,
357        Strategy::UrlEncodeLower,
358        Strategy::DoubleUrlEncode,
359        Strategy::UnicodeEncode,
360        Strategy::IisUnicodeEncode,
361        Strategy::JsonEncode,
362        Strategy::HtmlEntityEncode,
363        Strategy::HtmlEntityDecimalEncode,
364        Strategy::NullByte,
365        Strategy::PercentagePrefix,
366        Strategy::TripleUrlEncode,
367        Strategy::ChunkedSplit,
368        Strategy::ParameterPollution,
369        Strategy::MysqlVersionedComment,
370        Strategy::Base64Encode,
371        Strategy::Base64UrlEncode,
372        Strategy::OverlongUtf8,
373        Strategy::OverlongUtf8More,
374        Strategy::HexEncode,
375        Strategy::Utf7Encode,
376        Strategy::BetweenObfuscation,
377        Strategy::UnmagicQuotes,
378        Strategy::FullwidthEncode,
379        Strategy::HomoglyphEncode,
380        Strategy::GzipEncode,
381        Strategy::DeflateEncode,
382    ];
383    strategies.sort_by(|a, b| {
384        super::layered::aggressiveness(*a)
385            .partial_cmp(&super::layered::aggressiveness(*b))
386            .unwrap_or(std::cmp::Ordering::Equal)
387    });
388    strategies
389}