Skip to main content

wafrift_encoding/encoding/
strategy.rs

1//! Strategy enum and main encode() dispatcher.
2
3use super::keyword::{
4    between_obfuscate, case_alternate, mysql_versioned_comment, percentage_prefix,
5    random_case_alternate, space_to_comment, space_to_dash, space_to_hash, space_to_plus,
6    space_to_random_blank, sql_comment_insert, unmagic_quotes, whitespace_insert,
7};
8use super::structural::{
9    base64_encode, base64_url_encode, chunked_split, deflate_encode, gzip_encode, hex_encode,
10    null_byte_inject, overlong_utf8, overlong_utf8_more, parameter_pollute, utf7_encode,
11};
12use super::unicode::{
13    fullwidth_encode, homoglyph_encode, html_entity_decimal_encode, html_entity_encode,
14    iis_unicode_encode, json_string_encode, unicode_encode,
15};
16use super::url::{double_url_encode, triple_url_encode, url_encode, url_encode_lower};
17use crate::error::EncodeError;
18
19/// Maximum input payload size to prevent OOM on adversarial input.
20pub const MAX_PAYLOAD_SIZE: usize = 8 * 1024 * 1024;
21
22/// Available encoding strategies.
23///
24/// # Context hints
25/// Many strategies are only semantically correct in specific parser contexts.
26/// Use [`Strategy::contexts`] to query the applicable contexts for a strategy.
27#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
28#[non_exhaustive]
29pub enum Strategy {
30    /// Standard URL encoding (%XX) — preserves unreserved chars per RFC 3986.
31    /// Safe for: query strings, paths, form data.
32    UrlEncode,
33    /// Lowercase hex URL encoding (%xx) — same semantics as `UrlEncode`.
34    /// Safe for: query strings, paths, form data.
35    UrlEncodeLower,
36    /// Double URL encoding (%25XX) — bypasses WAFs that decode once.
37    /// Safe for: query strings, paths, form data.
38    DoubleUrlEncode,
39    /// Triple URL encoding (%2525XX) — bypasses WAFs that decode twice.
40    /// Safe for: query strings, paths, form data.
41    TripleUrlEncode,
42    /// Unicode escape (\uXXXX) — ONLY safe when target parses JSON/JavaScript.
43    /// Unsafe for: raw HTTP parameters, headers, most server frameworks.
44    UnicodeEncode,
45    /// IIS/ASP percent Unicode (%uXXXX) — ONLY safe on IIS/ASP classic parsers.
46    /// Unsafe for: modern servers (nginx, Apache, Node.js, etc.).
47    IisUnicodeEncode,
48    /// JSON string encoding with Unicode escapes — ONLY safe in JSON contexts.
49    /// Unsafe for: raw HTTP parameters.
50    JsonEncode,
51    /// HTML entity encoding (&#xXX;) — ONLY safe in HTML contexts.
52    /// Unsafe for: raw HTTP parameters, JSON bodies.
53    HtmlEntityEncode,
54    /// HTML decimal entity encoding (<) — ONLY safe in HTML contexts.
55    /// Unsafe for: raw HTTP parameters, JSON bodies.
56    HtmlEntityDecimalEncode,
57    /// Alternating case (`SeLeCt`) — bypasses case-sensitive keyword filters.
58    /// Safe for: any text context where case is preserved.
59    CaseAlternation,
60    /// Random alternating case — non-deterministic variant of `CaseAlternation`.
61    /// Safe for: any text context where case is preserved.
62    RandomCase,
63    /// Tab insertion BETWEEN tokens — preserves keyword integrity.
64    /// Safe for: SQL contexts where whitespace separates tokens.
65    WhitespaceInsertion,
66    /// SQL comment insertion BETWEEN tokens — preserves keyword integrity.
67    /// Safe for: SQL contexts where comments are treated as whitespace.
68    SqlCommentInsertion,
69    /// `MySQL` versioned comment (`/*!50000SELECT*/`) — executed by `MySQL`, ignored by WAFs.
70    /// Safe for: `MySQL` backends.
71    MysqlVersionedComment,
72    /// Null byte injection (%00) — ONLY semantically correct for C-style string parsers.
73    /// Context: php, some CGI implementations.
74    NullByte,
75    /// Overlong UTF-8 encoding (2-byte) — ONLY works against legacy WAFs that normalize.
76    /// Context: iis-6, very old frontends.
77    OverlongUtf8,
78    /// Extended overlong UTF-8 encoding (3-byte) — broader coverage than `OverlongUtf8`.
79    /// Context: iis-6, very old frontends.
80    OverlongUtf8More,
81    /// Chunked transfer-encoding split — ONLY valid with `Transfer-Encoding: chunked`.
82    /// Context: http-request-body.
83    ChunkedSplit,
84    /// HTTP parameter pollution — duplicate parameter with benign first value.
85    /// Safe for: query strings, form data.
86    ParameterPollution,
87    /// Base64 encoding (standard alphabet).
88    /// Safe for: headers, bodies, query strings (may need URL encoding after).
89    Base64Encode,
90    /// Base64 URL-safe encoding (-_ no padding).
91    /// Safe for: URL contexts where +/ would be mangled.
92    Base64UrlEncode,
93    /// Hex encoding.
94    /// Safe for: any byte context.
95    HexEncode,
96    /// UTF-7 encoding per RFC 2152.
97    /// Context: legacy IIS/.NET parsers that decode UTF-7.
98    Utf7Encode,
99    /// Gzip compression — ONLY valid with `Content-Encoding: gzip`.
100    /// Context: http-request-body.
101    GzipEncode,
102    /// Deflate compression — ONLY valid with `Content-Encoding: deflate`.
103    /// Context: http-request-body.
104    DeflateEncode,
105    /// Replace spaces with SQL comments (`/**/`).
106    /// Safe for: SQL contexts.
107    SpaceToComment,
108    /// Replace spaces with dash comments (`--`).
109    /// Safe for: SQL contexts.
110    SpaceToDash,
111    /// Replace spaces with hash comments (`#`).
112    /// Safe for: `MySQL` contexts.
113    SpaceToHash,
114    /// Replace spaces with plus signs (`+`).
115    /// Safe for: URL-encoded form data.
116    SpaceToPlus,
117    /// Replace spaces with random blank characters.
118    /// Safe for: SQL contexts.
119    SpaceToRandomBlank,
120    /// Prefix each character with `%` — lightweight bypass.
121    /// Safe for: contexts that strip `%` before parsing.
122    PercentagePrefix,
123    /// Between obfuscation (`=` → `BETWEEN # AND #`).
124    /// Safe for: SQL contexts.
125    BetweenObfuscation,
126    /// Unmagic quotes (`%bf%27`) — multi-byte charset quote escape.
127    /// Context: PHP with GBK/Big5/Shift-JIS connections.
128    UnmagicQuotes,
129    /// Fullwidth Unicode (`SELECTuntouched`) — bypasses ASCII keyword regex.
130    /// Context: backends that perform NFKC normalization (Java, .NET, Python 3, `PostgreSQL`).
131    FullwidthEncode,
132    /// Homoglyph substitution — visually identical Unicode chars for `'`, `"`, `<`, `>`, `=`.
133    /// Context: byte-level WAFs with Unicode-tolerant backends.
134    HomoglyphEncode,
135}
136
137impl Strategy {
138    /// Returns the string identifier for this encoding strategy.
139    #[must_use]
140    pub const fn as_str(&self) -> &'static str {
141        match self {
142            Self::UrlEncode => "UrlEncode",
143            Self::UrlEncodeLower => "UrlEncodeLower",
144            Self::DoubleUrlEncode => "DoubleUrlEncode",
145            Self::TripleUrlEncode => "TripleUrlEncode",
146            Self::UnicodeEncode => "UnicodeEncode",
147            Self::IisUnicodeEncode => "IisUnicodeEncode",
148            Self::JsonEncode => "JsonEncode",
149            Self::HtmlEntityEncode => "HtmlEntityEncode",
150            Self::HtmlEntityDecimalEncode => "HtmlEntityDecimalEncode",
151            Self::CaseAlternation => "CaseAlternation",
152            Self::RandomCase => "RandomCase",
153            Self::WhitespaceInsertion => "WhitespaceInsertion",
154            Self::SqlCommentInsertion => "SqlCommentInsertion",
155            Self::MysqlVersionedComment => "MysqlVersionedComment",
156            Self::NullByte => "NullByte",
157            Self::OverlongUtf8 => "OverlongUtf8",
158            Self::OverlongUtf8More => "OverlongUtf8More",
159            Self::ChunkedSplit => "ChunkedSplit",
160            Self::ParameterPollution => "ParameterPollution",
161            Self::Base64Encode => "Base64Encode",
162            Self::Base64UrlEncode => "Base64UrlEncode",
163            Self::HexEncode => "HexEncode",
164            Self::Utf7Encode => "Utf7Encode",
165            Self::GzipEncode => "GzipEncode",
166            Self::DeflateEncode => "DeflateEncode",
167            Self::SpaceToComment => "SpaceToComment",
168            Self::SpaceToDash => "SpaceToDash",
169            Self::SpaceToHash => "SpaceToHash",
170            Self::SpaceToPlus => "SpaceToPlus",
171            Self::SpaceToRandomBlank => "SpaceToRandomBlank",
172            Self::PercentagePrefix => "PercentagePrefix",
173            Self::BetweenObfuscation => "BetweenObfuscation",
174            Self::UnmagicQuotes => "UnmagicQuotes",
175            Self::FullwidthEncode => "FullwidthEncode",
176            Self::HomoglyphEncode => "HomoglyphEncode",
177        }
178    }
179
180    /// Returns the parser contexts where this strategy is semantically safe.
181    ///
182    /// An empty slice means the strategy is generally applicable.
183    /// Callers should gate strategy application by matching these contexts
184    /// against the target type (e.g., `json`, `html`, `sql`, `php`, `iis-6`).
185    #[must_use]
186    pub const fn contexts(&self) -> &'static [&'static str] {
187        match self {
188            Self::UrlEncode
189            | Self::UrlEncodeLower
190            | Self::DoubleUrlEncode
191            | Self::TripleUrlEncode
192            | Self::ParameterPollution => &[],
193            Self::UnicodeEncode => &["json", "javascript"],
194            Self::IisUnicodeEncode => &["iis", "asp"],
195            Self::JsonEncode => &["json"],
196            Self::HtmlEntityEncode | Self::HtmlEntityDecimalEncode => &["html"],
197            Self::CaseAlternation | Self::RandomCase | Self::WhitespaceInsertion => &[],
198            Self::SqlCommentInsertion
199            | Self::MysqlVersionedComment
200            | Self::SpaceToComment
201            | Self::SpaceToDash
202            | Self::SpaceToRandomBlank
203            | Self::BetweenObfuscation => &["sql"],
204            Self::SpaceToHash => &["sql", "mysql"],
205            Self::SpaceToPlus => &["url-encoded"],
206            Self::NullByte => &["php", "cgi"],
207            Self::OverlongUtf8 | Self::OverlongUtf8More => &["iis-6"],
208            Self::ChunkedSplit => &["http-request-body"],
209            Self::Base64Encode | Self::Base64UrlEncode | Self::HexEncode => &[],
210            Self::Utf7Encode => &["iis", "legacy-dotnet"],
211            Self::GzipEncode | Self::DeflateEncode => &["http-request-body"],
212            Self::PercentagePrefix => &[],
213            Self::UnmagicQuotes => &["php", "gbk", "big5", "shift-jis"],
214            Self::FullwidthEncode => &["nfkc", "java", "dotnet", "python3", "postgresql"],
215            Self::HomoglyphEncode => &[],
216        }
217    }
218}
219
220fn check_size(payload: &[u8]) -> Result<(), EncodeError> {
221    if payload.len() > MAX_PAYLOAD_SIZE {
222        Err(EncodeError::PayloadTooLarge {
223            max: MAX_PAYLOAD_SIZE,
224            actual: payload.len(),
225        })
226    } else {
227        Ok(())
228    }
229}
230
231/// Encode a payload using the selected strategy.
232///
233/// # Errors
234/// Returns `EncodeError::PayloadTooLarge` if the input exceeds [`MAX_PAYLOAD_SIZE`].
235/// Returns `EncodeError::InvalidUtf8` for text-oriented strategies when the input
236/// contains invalid UTF-8.
237///
238/// # UTF-8 safety
239/// Text-oriented strategies validate UTF-8 via `std::str::from_utf8` and return
240/// `InvalidUtf8` on failure. No unsafe UTF-8 conversions (`from_utf8_unchecked`,
241/// lossy casts, etc.) are used in the encoding pipeline.
242pub fn encode(payload: impl AsRef<[u8]>, strategy: Strategy) -> Result<String, EncodeError> {
243    let payload = payload.as_ref();
244    check_size(payload)?;
245
246    match strategy {
247        Strategy::UrlEncode => Ok(url_encode(payload)),
248        Strategy::UrlEncodeLower => Ok(url_encode_lower(payload)),
249        Strategy::DoubleUrlEncode => Ok(double_url_encode(payload)),
250        Strategy::TripleUrlEncode => Ok(triple_url_encode(payload)),
251        Strategy::UnicodeEncode => {
252            let text = std::str::from_utf8(payload).map_err(|_| EncodeError::InvalidUtf8)?;
253            Ok(unicode_encode(text))
254        }
255        Strategy::IisUnicodeEncode => {
256            let text = std::str::from_utf8(payload).map_err(|_| EncodeError::InvalidUtf8)?;
257            Ok(iis_unicode_encode(text))
258        }
259        Strategy::JsonEncode => {
260            let text = std::str::from_utf8(payload).map_err(|_| EncodeError::InvalidUtf8)?;
261            Ok(json_string_encode(text))
262        }
263        Strategy::HtmlEntityEncode => {
264            let text = std::str::from_utf8(payload).map_err(|_| EncodeError::InvalidUtf8)?;
265            Ok(html_entity_encode(text))
266        }
267        Strategy::HtmlEntityDecimalEncode => {
268            let text = std::str::from_utf8(payload).map_err(|_| EncodeError::InvalidUtf8)?;
269            Ok(html_entity_decimal_encode(text))
270        }
271        Strategy::CaseAlternation => {
272            let text = std::str::from_utf8(payload).map_err(|_| EncodeError::InvalidUtf8)?;
273            Ok(case_alternate(text))
274        }
275        Strategy::RandomCase => {
276            let text = std::str::from_utf8(payload).map_err(|_| EncodeError::InvalidUtf8)?;
277            Ok(random_case_alternate(text))
278        }
279        Strategy::WhitespaceInsertion => {
280            let text = std::str::from_utf8(payload).map_err(|_| EncodeError::InvalidUtf8)?;
281            Ok(whitespace_insert(text))
282        }
283        Strategy::SqlCommentInsertion => {
284            let text = std::str::from_utf8(payload).map_err(|_| EncodeError::InvalidUtf8)?;
285            Ok(sql_comment_insert(text))
286        }
287        Strategy::MysqlVersionedComment => {
288            let text = std::str::from_utf8(payload).map_err(|_| EncodeError::InvalidUtf8)?;
289            Ok(mysql_versioned_comment(text, 50_000))
290        }
291        Strategy::NullByte => Ok(null_byte_inject(payload)?),
292        Strategy::OverlongUtf8 => Ok(overlong_utf8(payload)?),
293        Strategy::OverlongUtf8More => Ok(overlong_utf8_more(payload)?),
294        Strategy::ChunkedSplit => {
295            let body = chunked_split(payload, 1024)?.body;
296            String::from_utf8(body).map_err(|_| EncodeError::InvalidUtf8)
297        }
298        Strategy::ParameterPollution => Ok(parameter_pollute(payload)?),
299        Strategy::Base64Encode => Ok(base64_encode(payload)),
300        Strategy::Base64UrlEncode => Ok(base64_url_encode(payload)),
301        Strategy::HexEncode => Ok(hex_encode(payload)),
302        Strategy::Utf7Encode => {
303            let text = std::str::from_utf8(payload).map_err(|_| EncodeError::InvalidUtf8)?;
304            Ok(utf7_encode(text))
305        }
306        Strategy::GzipEncode => Ok(gzip_encode(payload)?),
307        Strategy::DeflateEncode => Ok(deflate_encode(payload)?),
308        Strategy::SpaceToComment => {
309            let text = std::str::from_utf8(payload).map_err(|_| EncodeError::InvalidUtf8)?;
310            Ok(space_to_comment(text))
311        }
312        Strategy::SpaceToDash => {
313            let text = std::str::from_utf8(payload).map_err(|_| EncodeError::InvalidUtf8)?;
314            Ok(space_to_dash(text))
315        }
316        Strategy::SpaceToHash => {
317            let text = std::str::from_utf8(payload).map_err(|_| EncodeError::InvalidUtf8)?;
318            Ok(space_to_hash(text))
319        }
320        Strategy::SpaceToPlus => {
321            let text = std::str::from_utf8(payload).map_err(|_| EncodeError::InvalidUtf8)?;
322            Ok(space_to_plus(text))
323        }
324        Strategy::SpaceToRandomBlank => {
325            let text = std::str::from_utf8(payload).map_err(|_| EncodeError::InvalidUtf8)?;
326            Ok(space_to_random_blank(text))
327        }
328        Strategy::PercentagePrefix => {
329            let text = std::str::from_utf8(payload).map_err(|_| EncodeError::InvalidUtf8)?;
330            Ok(percentage_prefix(text))
331        }
332        Strategy::BetweenObfuscation => {
333            let text = std::str::from_utf8(payload).map_err(|_| EncodeError::InvalidUtf8)?;
334            Ok(between_obfuscate(text))
335        }
336        Strategy::UnmagicQuotes => Ok(unmagic_quotes(payload)?),
337        Strategy::FullwidthEncode => {
338            let text = std::str::from_utf8(payload).map_err(|_| EncodeError::InvalidUtf8)?;
339            Ok(fullwidth_encode(text))
340        }
341        Strategy::HomoglyphEncode => {
342            let text = std::str::from_utf8(payload).map_err(|_| EncodeError::InvalidUtf8)?;
343            Ok(homoglyph_encode(text))
344        }
345    }
346}
347
348/// All available strategies in escalation order (least aggressive → most aggressive).
349static ALL_STRATEGIES: std::sync::LazyLock<Vec<Strategy>> = std::sync::LazyLock::new(|| {
350    let mut strategies = vec![
351        Strategy::CaseAlternation,
352        Strategy::RandomCase,
353        Strategy::WhitespaceInsertion,
354        Strategy::SqlCommentInsertion,
355        Strategy::SpaceToPlus,
356        Strategy::SpaceToRandomBlank,
357        Strategy::SpaceToComment,
358        Strategy::SpaceToDash,
359        Strategy::SpaceToHash,
360        Strategy::UrlEncode,
361        Strategy::UrlEncodeLower,
362        Strategy::DoubleUrlEncode,
363        Strategy::UnicodeEncode,
364        Strategy::IisUnicodeEncode,
365        Strategy::JsonEncode,
366        Strategy::HtmlEntityEncode,
367        Strategy::HtmlEntityDecimalEncode,
368        Strategy::NullByte,
369        Strategy::PercentagePrefix,
370        Strategy::TripleUrlEncode,
371        Strategy::ChunkedSplit,
372        Strategy::ParameterPollution,
373        Strategy::MysqlVersionedComment,
374        Strategy::Base64Encode,
375        Strategy::Base64UrlEncode,
376        Strategy::OverlongUtf8,
377        Strategy::OverlongUtf8More,
378        Strategy::HexEncode,
379        Strategy::Utf7Encode,
380        Strategy::BetweenObfuscation,
381        Strategy::UnmagicQuotes,
382        Strategy::FullwidthEncode,
383        Strategy::HomoglyphEncode,
384        Strategy::GzipEncode,
385        Strategy::DeflateEncode,
386    ];
387    strategies.sort_by(|a, b| {
388        super::layered::aggressiveness(*a)
389            .partial_cmp(&super::layered::aggressiveness(*b))
390            .unwrap_or(std::cmp::Ordering::Equal)
391    });
392    strategies
393});
394
395#[must_use]
396pub fn all_strategies() -> &'static [Strategy] {
397    &ALL_STRATEGIES
398}
399
400#[cfg(test)]
401mod tests {
402    use super::*;
403
404    #[test]
405    fn encode_url_encode_basic() {
406        assert_eq!(encode("A<", Strategy::UrlEncode).unwrap(), "A%3C");
407    }
408
409    #[test]
410    fn encode_url_encode_lower() {
411        assert_eq!(encode("A<", Strategy::UrlEncodeLower).unwrap(), "A%3c");
412    }
413
414    #[test]
415    fn encode_double_url_encode() {
416        assert_eq!(
417            encode("A<", Strategy::DoubleUrlEncode).unwrap(),
418            "%2541%253C"
419        );
420    }
421
422    #[test]
423    fn encode_case_alternation() {
424        let result = encode("SELECT", Strategy::CaseAlternation).unwrap();
425        assert!(result.contains("SeL") || result.contains("sEl"));
426    }
427
428    #[test]
429    fn encode_null_byte() {
430        let result = encode("file.php", Strategy::NullByte).unwrap();
431        assert!(result.contains('\x00') || result.contains("%00"));
432    }
433
434    #[test]
435    fn encode_base64() {
436        assert_eq!(encode("hello", Strategy::Base64Encode).unwrap(), "aGVsbG8=");
437    }
438
439    #[test]
440    fn encode_hex() {
441        assert_eq!(encode("ABC", Strategy::HexEncode).unwrap(), "414243");
442    }
443
444    #[test]
445    fn encode_json() {
446        assert_eq!(encode("A<", Strategy::JsonEncode).unwrap(), "\"A<\"");
447    }
448
449    #[test]
450    fn encode_html_entity() {
451        assert_eq!(
452            encode("A<", Strategy::HtmlEntityEncode).unwrap(),
453            "&#x41;&#x3C;"
454        );
455    }
456
457    #[test]
458    fn encode_invalid_utf8_fails() {
459        let invalid = vec![0x80, 0x81, 0x82];
460        let result = encode(&invalid, Strategy::CaseAlternation);
461        assert!(matches!(result, Err(EncodeError::InvalidUtf8)));
462    }
463
464    #[test]
465    fn encode_payload_too_large_fails() {
466        let huge = vec![b'X'; MAX_PAYLOAD_SIZE + 1];
467        let result = encode(&huge, Strategy::UrlEncode);
468        assert!(matches!(result, Err(EncodeError::PayloadTooLarge { .. })));
469    }
470
471    #[test]
472    fn all_strategies_non_empty() {
473        let strategies = all_strategies();
474        assert!(!strategies.is_empty());
475        assert!(strategies.contains(&Strategy::UrlEncode));
476    }
477
478    #[test]
479    fn strategy_as_str_roundtrip() {
480        for s in all_strategies() {
481            assert!(!s.as_str().is_empty());
482        }
483    }
484
485    #[test]
486    fn strategy_contexts_returns_slice() {
487        assert!(Strategy::UrlEncode.contexts().is_empty());
488        assert_eq!(Strategy::JsonEncode.contexts(), &["json"]);
489        assert_eq!(Strategy::SpaceToComment.contexts(), &["sql"]);
490    }
491
492    #[test]
493    fn encode_empty_payload() {
494        assert_eq!(encode("", Strategy::UrlEncode).unwrap(), "");
495    }
496
497    #[test]
498    fn encode_unicode() {
499        let result = encode("A<", Strategy::UnicodeEncode).unwrap();
500        assert!(result.contains("\\u"));
501    }
502
503    #[test]
504    fn encode_chunked_split() {
505        let result = encode("hello", Strategy::ChunkedSplit).unwrap();
506        assert!(result.contains("\r\n"));
507        assert!(result.ends_with("0\r\n\r\n"));
508    }
509
510    #[test]
511    fn encode_parameter_pollution() {
512        let result = encode("key=value", Strategy::ParameterPollution).unwrap();
513        assert!(result.contains("key="));
514    }
515
516    #[test]
517    fn encode_gzip_produces_base64() {
518        let result = encode("hello", Strategy::GzipEncode).unwrap();
519        // Gzip output is base64-encoded
520        assert!(!result.is_empty());
521    }
522
523    #[test]
524    fn encode_iis_unicode() {
525        let result = encode("A<", Strategy::IisUnicodeEncode).unwrap();
526        assert!(result.contains("%u"));
527    }
528}