Skip to main content

wafrift_encoding/encoding/
layered.rs

1//! Multi-strategy encoding chains and aggressiveness scoring.
2
3use super::strategy::{MAX_PAYLOAD_SIZE, Strategy, all_strategies, encode};
4use crate::error::EncodeError;
5
6/// Maximum accumulated output size for layered encoding.
7///
8/// §7 DEDUP: mirrors [`MAX_PAYLOAD_SIZE`] so both constants track the
9/// same 8 MiB ceiling without the two diverging on future edits.
10pub const MAX_LAYERED_OUTPUT_SIZE: usize = MAX_PAYLOAD_SIZE;
11
12/// Apply multiple encoding strategies in sequence (layered encoding).
13///
14/// # Errors
15/// Returns `EncodeError::PayloadTooLarge` if the input exceeds [`super::strategy::MAX_PAYLOAD_SIZE`].
16/// Returns `EncodeError::LayeredOutputTooLarge` if any intermediate output
17/// exceeds [`MAX_LAYERED_OUTPUT_SIZE`].
18pub fn encode_layered(
19    payload: impl AsRef<[u8]>,
20    strategies: &[Strategy],
21) -> Result<String, EncodeError> {
22    let payload = payload.as_ref();
23    // F135: empty strategies = no-op. Pre-fix used
24    // `unwrap_or(Strategy::UrlEncode)` which silently URL-encoded the
25    // payload when callers passed `&[]`. The existing
26    // `encode_layered_empty_strategies` test passed only because
27    // `"hello"` happens to be all-unreserved (`[A-Za-z0-9-_.~]`) and
28    // therefore a fixed point under url_encode — any non-unreserved
29    // byte (e.g. `!` → `%21`, space → `%20`) would have caught the
30    // divergence. Returning the payload as a lossy UTF-8 string
31    // matches the documented contract and what the test asserts.
32    if strategies.is_empty() {
33        return Ok(String::from_utf8_lossy(payload).into_owned());
34    }
35    let mut result = encode(payload, strategies[0])?;
36    // Check size IMMEDIATELY after the first encoding too — the
37    // pre-fix guard only ran before the SECOND layer, so a single
38    // strategy that expands dramatically (HexEncode 2×,
39    // TripleUrlEncode up to 3×, GzipEncode + base64 ~1.33×) could
40    // produce up to expansion_factor × MAX_PAYLOAD_SIZE bytes
41    // (potentially 24 MiB from an 8 MiB input) before any guard
42    // fired.
43    if result.len() > MAX_LAYERED_OUTPUT_SIZE {
44        return Err(EncodeError::LayeredOutputTooLarge {
45            max: MAX_LAYERED_OUTPUT_SIZE,
46            actual: result.len(),
47        });
48    }
49
50    for strategy in strategies.iter().skip(1) {
51        result = encode(&result, *strategy)?;
52        if result.len() > MAX_LAYERED_OUTPUT_SIZE {
53            return Err(EncodeError::LayeredOutputTooLarge {
54                max: MAX_LAYERED_OUTPUT_SIZE,
55                actual: result.len(),
56            });
57        }
58    }
59
60    Ok(result)
61}
62
63/// Generate programmatic combinations up to a depth limit.
64///
65/// Filters out redundant pairings (same strategy twice, or pairings that
66/// produce semantically equivalent outputs).
67pub fn layered_combinations(depth: usize) -> Vec<Vec<Strategy>> {
68    let base = all_strategies();
69    let mut results: Vec<Vec<Strategy>> = Vec::new();
70
71    fn backtrack(
72        base: &[Strategy],
73        current: &mut Vec<Strategy>,
74        results: &mut Vec<Vec<Strategy>>,
75        depth: usize,
76    ) {
77        if current.len() >= 2 && current.len() <= depth {
78            results.push(current.clone());
79        }
80        if current.len() >= depth {
81            return;
82        }
83        for s in base {
84            // Skip redundant consecutive duplicates
85            if current.last() == Some(s) {
86                continue;
87            }
88            // Skip some known-redundant pairings
89            if let Some(last) = current.last()
90                && redundant_pair(*last, *s)
91            {
92                continue;
93            }
94            current.push(*s);
95            backtrack(base, current, results, depth);
96            current.pop();
97        }
98    }
99
100    let mut current = Vec::new();
101    backtrack(base, &mut current, &mut results, depth);
102    results
103}
104
105fn redundant_pair(a: Strategy, b: Strategy) -> bool {
106    // URL + URL variants are redundant with existing single strategies
107    matches!(
108        (a, b),
109        (
110            Strategy::UrlEncode
111                | Strategy::UrlEncodeLower
112                | Strategy::DoubleUrlEncode
113                | Strategy::TripleUrlEncode,
114            Strategy::UrlEncode
115        ) | (
116            Strategy::UrlEncode | Strategy::UrlEncodeLower,
117            Strategy::UrlEncodeLower
118        ) | (Strategy::CaseAlternation, Strategy::RandomCase)
119            | (Strategy::RandomCase, Strategy::CaseAlternation)
120    )
121}
122
123/// Estimate how aggressive an encoding strategy is (0.0 = mild, 1.0 = extreme).
124///
125/// Used by the strategy engine to decide escalation order.
126#[must_use]
127pub fn aggressiveness(strategy: Strategy) -> f64 {
128    match strategy {
129        Strategy::CaseAlternation => 0.05,
130        Strategy::RandomCase => 0.08,
131        Strategy::UrlEncode => 0.1,
132        Strategy::UrlEncodeLower => 0.1,
133        Strategy::WhitespaceInsertion => 0.12,
134        Strategy::SqlCommentInsertion => 0.12,
135        Strategy::SpaceToPlus => 0.13,
136        Strategy::SpaceToRandomBlank => 0.14,
137        Strategy::SpaceToComment => 0.15,
138        Strategy::SpaceToDash => 0.15,
139        Strategy::SpaceToHash => 0.15,
140        Strategy::HtmlEntityEncode => 0.2,
141        Strategy::HtmlEntityDecimalEncode => 0.2,
142        Strategy::DoubleUrlEncode => 0.25,
143        Strategy::UnicodeEncode => 0.3,
144        Strategy::IisUnicodeEncode => 0.3,
145        Strategy::JsonEncode => 0.3,
146        Strategy::NullByte => 0.35,
147        Strategy::FullwidthEncode => 0.36,
148        Strategy::HomoglyphEncode => 0.37,
149        Strategy::PercentagePrefix => 0.4,
150        Strategy::ParameterPollution => 0.45,
151        Strategy::TripleUrlEncode => 0.5,
152        Strategy::MysqlVersionedComment => 0.55,
153        Strategy::Base64Encode => 0.6,
154        Strategy::Base64UrlEncode => 0.6,
155        Strategy::OverlongUtf8 => 0.7,
156        Strategy::OverlongUtf8More => 0.75,
157        Strategy::HexEncode => 0.8,
158        Strategy::Utf7Encode => 0.85,
159        Strategy::BetweenObfuscation => 0.88,
160        Strategy::UnmagicQuotes => 0.9,
161        Strategy::ChunkedSplit => 0.92,
162        Strategy::GzipEncode => 0.95,
163        Strategy::DeflateEncode => 0.95,
164        // Invisible-character strategies — moderate to aggressive.
165        // They are highly evasive against ASCII-keyword WAFs but
166        // may break backends that don't perform Unicode normalization,
167        // so they sit in the 0.40–0.85 band.
168        Strategy::SoftHyphenInject => 0.40,
169        Strategy::WordJoinerWrap => 0.42,
170        Strategy::VariationSelectorPad => 0.50,
171        Strategy::VariationSelectorSupplementaryPad => 0.55,
172        Strategy::TagCharEncode => 0.70,
173        Strategy::LigatureEncode => 0.72,
174        Strategy::CircledLetterEncode => 0.74,
175        Strategy::ParenthesizedLetterEncode => 0.76,
176    }
177}
178
179#[cfg(test)]
180mod tests {
181    use super::*;
182    use crate::encoding::strategy::all_strategies;
183
184    #[test]
185    fn encode_layered_basic() {
186        let result =
187            encode_layered("A", &[Strategy::UrlEncode, Strategy::DoubleUrlEncode]).unwrap();
188        assert!(result.contains('%'));
189    }
190
191    #[test]
192    fn encode_layered_size_limit() {
193        // Use a non-unreserved char so URL encoding multiplies size by ~3x each pass
194        let big = "!".repeat(5 * 1024 * 1024);
195        let result = encode_layered(
196            &big,
197            &[
198                Strategy::UrlEncode,
199                Strategy::UrlEncode,
200                Strategy::UrlEncode,
201            ],
202        );
203        assert!(matches!(
204            result,
205            Err(EncodeError::LayeredOutputTooLarge { .. })
206        ));
207    }
208
209    #[test]
210    fn layered_combinations_depth_2() {
211        let combos = layered_combinations(2);
212        assert!(!combos.is_empty());
213        // All combos should have length 2
214        assert!(combos.iter().all(|c| c.len() == 2));
215    }
216
217    #[test]
218    fn layered_combinations_no_consecutive_duplicates() {
219        let combos = layered_combinations(3);
220        for combo in combos {
221            for window in combo.windows(2) {
222                assert_ne!(window[0], window[1], "no consecutive duplicates: {combo:?}");
223            }
224        }
225    }
226
227    #[test]
228    fn aggressiveness_ordering() {
229        let strategies = all_strategies();
230        for i in 1..strategies.len() {
231            assert!(
232                aggressiveness(strategies[i - 1]) <= aggressiveness(strategies[i]),
233                "aggressiveness should be non-decreasing"
234            );
235        }
236    }
237
238    #[test]
239    fn encode_layered_empty_strategies() {
240        let result = encode_layered("hello", &[]).unwrap();
241        assert_eq!(result, "hello");
242    }
243
244    #[test]
245    fn encode_layered_empty_strategies_preserves_non_unreserved_chars() {
246        // F135 regression: pre-fix this returned "hello%21%20world%21"
247        // because the empty-strategy path silently fell through to
248        // Strategy::UrlEncode. The legacy `encode_layered_empty_strategies`
249        // test passed by accident — its `"hello"` input has zero
250        // non-unreserved bytes so url_encode is a no-op on it. Any byte
251        // outside `[A-Za-z0-9-_.~]` exposes the divergence.
252        let result = encode_layered("hello! world!", &[]).unwrap();
253        assert_eq!(
254            result, "hello! world!",
255            "empty strategies must be a true no-op, not silently UrlEncode"
256        );
257    }
258
259    #[test]
260    fn encode_layered_empty_strategies_with_invalid_utf8_is_lossy() {
261        // No-op path uses from_utf8_lossy so invalid bytes don't panic
262        // and don't produce an error — they become U+FFFD. Callers that
263        // need byte-preserving no-op should avoid the empty-strategy
264        // call site entirely.
265        let invalid: &[u8] = &[0xC3, 0x28, b'!']; // 0xC3 0x28 = invalid UTF-8 pair
266        let result = encode_layered(invalid, &[]).unwrap();
267        assert!(result.contains('\u{FFFD}'));
268        assert!(result.ends_with('!'));
269    }
270
271    #[test]
272    fn encode_layered_single_strategy() {
273        let result = encode_layered("A<", &[Strategy::UrlEncode]).unwrap();
274        assert_eq!(result, "A%3C");
275    }
276
277    #[test]
278    fn layered_combinations_depth_1_returns_empty() {
279        let combos = layered_combinations(1);
280        assert!(combos.is_empty());
281    }
282
283    #[test]
284    fn aggressiveness_in_valid_range() {
285        for &s in all_strategies() {
286            let a = aggressiveness(s);
287            assert!(
288                (0.0..=1.0).contains(&a),
289                "aggressiveness for {s:?} out of range: {a}"
290            );
291        }
292    }
293}