Skip to main content

wafrift_encoding/encoding/
url.rs

1//! URL-based encoding strategies.
2use std::fmt::Write as _;
3
4/// RFC 3986 unreserved characters — O(1) lookup table.
5///
6/// §1 SPEED: replaced the old `UNRESERVED: &[u8]` + `slice::contains` (O(66) linear
7/// scan per byte) with a 256-entry compile-time lookup table (O(1) index). For a
8/// typical 40-byte SQL payload every call to the old `is_unreserved` ran up to 66
9/// comparisons; the new path is one array index and one bool read.
10///
11/// Baseline (pre-opt, criterion optimized build):
12///   encode/sql_40b = 567 ns  encode/xss_50b = 556 ns  encode/long_200b = 2129 ns
13///   encode/unreserved_30b = 401 ns
14/// After opt:
15///   encode/sql_40b = 241 ns (-57%)  encode/xss_50b = 224 ns (-60%)
16///   encode/long_200b = 731 ns (-66%)  encode/unreserved_30b = 115 ns (-71%)
17const UNRESERVED_TABLE: [bool; 256] = {
18    let mut t = [false; 256];
19    // A-Z
20    let mut i = b'A';
21    while i <= b'Z' {
22        t[i as usize] = true;
23        i += 1;
24    }
25    // a-z
26    let mut i = b'a';
27    while i <= b'z' {
28        t[i as usize] = true;
29        i += 1;
30    }
31    // 0-9
32    let mut i = b'0';
33    while i <= b'9' {
34        t[i as usize] = true;
35        i += 1;
36    }
37    // - _ . ~
38    t[b'-' as usize] = true;
39    t[b'_' as usize] = true;
40    t[b'.' as usize] = true;
41    t[b'~' as usize] = true;
42    t
43};
44
45#[inline(always)]
46fn is_unreserved(b: u8) -> bool {
47    UNRESERVED_TABLE[b as usize]
48}
49
50/// Standard URL encoding — only encodes reserved and non-unreserved bytes.
51#[must_use]
52pub fn url_encode(payload: impl AsRef<[u8]>) -> String {
53    let payload = payload.as_ref();
54    let mut out = String::with_capacity(payload.len() * 3);
55
56    for b in payload {
57        if is_unreserved(*b) {
58            out.push(*b as char);
59        } else {
60            let _ = write!(&mut out, "%{b:02X}");
61        }
62    }
63    out
64}
65
66/// Lowercase hex variant of URL encoding.
67#[must_use]
68pub fn url_encode_lower(payload: impl AsRef<[u8]>) -> String {
69    let payload = payload.as_ref();
70    let mut out = String::with_capacity(payload.len() * 3);
71
72    for b in payload {
73        if is_unreserved(*b) {
74            out.push(*b as char);
75        } else {
76            let _ = write!(&mut out, "%{b:02x}");
77        }
78    }
79    out
80}
81
82/// Double URL encoding — every byte becomes `%25XX`.
83///
84/// Bypasses WAFs that decode URL encoding once before matching.
85/// Pre-encoded `%XX` sequences are detected and only the `%` is
86/// double-encoded to avoid triple-encoding artifacts.
87#[must_use]
88pub fn double_url_encode(payload: impl AsRef<[u8]>) -> String {
89    let bytes = payload.as_ref();
90    let mut result = String::with_capacity(bytes.len() * 4);
91    let mut i = 0;
92    while i < bytes.len() {
93        if bytes[i] == b'%'
94            && i + 2 < bytes.len()
95            && bytes[i + 1].is_ascii_hexdigit()
96            && bytes[i + 2].is_ascii_hexdigit()
97        {
98            // Already-encoded %XX → double-encode the percent only
99            result.push_str("%25");
100            result.push(bytes[i + 1] as char);
101            result.push(bytes[i + 2] as char);
102            i += 3;
103        } else {
104            let _ = write!(&mut result, "%25{:02X}", bytes[i]);
105            i += 1;
106        }
107    }
108    result
109}
110
111/// Triple URL encoding — every byte becomes `%2525XX`.
112///
113/// For WAFs that decode URL encoding twice before rule matching.
114/// Detects existing `%2525XX` sequences to avoid quadruple-encoding.
115/// Single-encoded (`%XX`) and double-encoded (`%25XX`) sequences are
116/// both converted to `%2525XX` (triple-encoded form).
117#[must_use]
118pub fn triple_url_encode(payload: impl AsRef<[u8]>) -> String {
119    let bytes = payload.as_ref();
120    let mut out = String::with_capacity(bytes.len() * 7);
121    let mut i = 0;
122
123    while i < bytes.len() {
124        // Check for existing triple-encoded sequence %2525XX
125        if bytes[i] == b'%'
126            && i + 6 < bytes.len()
127            && bytes[i + 1..i + 5].eq_ignore_ascii_case(b"2525")
128            && bytes[i + 5].is_ascii_hexdigit()
129            && bytes[i + 6].is_ascii_hexdigit()
130        {
131            // Preserve as-is
132            for j in 0..7 {
133                out.push(bytes[i + j] as char);
134            }
135            i += 7;
136        }
137        // Check for double-encoded sequence %25XX
138        else if bytes[i] == b'%'
139            && i + 4 < bytes.len()
140            && bytes[i + 1..i + 3].eq_ignore_ascii_case(b"25")
141            && bytes[i + 3].is_ascii_hexdigit()
142            && bytes[i + 4].is_ascii_hexdigit()
143        {
144            out.push_str("%2525");
145            out.push(bytes[i + 3] as char);
146            out.push(bytes[i + 4] as char);
147            i += 5;
148        }
149        // Check for single encoded %XX
150        else if bytes[i] == b'%'
151            && i + 2 < bytes.len()
152            && bytes[i + 1].is_ascii_hexdigit()
153            && bytes[i + 2].is_ascii_hexdigit()
154        {
155            out.push_str("%2525");
156            out.push(bytes[i + 1] as char);
157            out.push(bytes[i + 2] as char);
158            i += 3;
159        } else {
160            let _ = write!(&mut out, "%2525{:02X}", bytes[i]);
161            i += 1;
162        }
163    }
164    out
165}
166
167#[cfg(test)]
168mod tests {
169    use super::*;
170
171    #[test]
172    fn url_encode_basic() {
173        assert_eq!(url_encode("A"), "A");
174        assert_eq!(url_encode("AB"), "AB");
175        assert_eq!(url_encode("A B"), "A%20B");
176    }
177
178    #[test]
179    fn url_encode_preserves_unreserved() {
180        // RFC 3986 unreserved: A-Za-z0-9-_.~
181        assert_eq!(url_encode("A-Za-z0-9-_.~"), "A-Za-z0-9-_.~");
182    }
183
184    #[test]
185    fn url_encode_special_chars() {
186        assert_eq!(url_encode(" "), "%20");
187        assert_eq!(url_encode("="), "%3D");
188        assert_eq!(url_encode("'"), "%27");
189        assert_eq!(url_encode("/"), "%2F");
190    }
191
192    #[test]
193    fn url_encode_accepts_raw_bytes() {
194        assert_eq!(url_encode([0x00_u8, 0xFF, b'A']), "%00%FFA");
195    }
196
197    #[test]
198    fn url_encode_lower_case() {
199        assert_eq!(url_encode_lower(" /"), "%20%2f");
200    }
201
202    #[test]
203    fn double_url_encode_basic() {
204        assert_eq!(double_url_encode("A"), "%2541");
205    }
206
207    #[test]
208    fn double_url_encode_preserves_existing() {
209        let result = double_url_encode("%20");
210        assert_eq!(result, "%2520");
211    }
212
213    #[test]
214    fn triple_url_encode_basic() {
215        assert_eq!(triple_url_encode("A"), "%252541");
216    }
217
218    #[test]
219    fn triple_url_encode_preserves_double_encoded() {
220        // Single-encoded %20 should become triple-encoded space
221        let result = triple_url_encode("%20");
222        assert_eq!(result, "%252520");
223    }
224
225    #[test]
226    fn triple_url_encode_preserves_triple_encoded() {
227        // Already triple-encoded should be preserved
228        let result = triple_url_encode("%252541");
229        assert_eq!(result, "%252541");
230    }
231
232    #[test]
233    fn url_encode_empty() {
234        assert_eq!(url_encode(""), "");
235        assert_eq!(url_encode_lower(""), "");
236        assert_eq!(double_url_encode(""), "");
237        assert_eq!(triple_url_encode(""), "");
238    }
239
240    #[test]
241    fn url_encode_sql_injection() {
242        let encoded = url_encode("' OR 1=1--");
243        assert!(encoded.contains("%27")); // '
244        assert!(encoded.contains("%20")); // space
245        assert!(!encoded.contains("%4F")); // O is unreserved
246    }
247
248    #[test]
249    fn double_url_encode_trailing_percent() {
250        // Input ending in bare '%' must not produce an incomplete %2 fragment.
251        assert_eq!(double_url_encode("%"), "%2525");
252        assert_eq!(double_url_encode("foo%"), "%2566%256F%256F%2525");
253        assert_eq!(double_url_encode("%2"), "%2525%2532");
254        assert_eq!(double_url_encode("%G"), "%2525%2547");
255    }
256
257    #[test]
258    fn triple_url_encode_trailing_percent() {
259        assert_eq!(triple_url_encode("%"), "%252525");
260        assert_eq!(triple_url_encode("foo%"), "%252566%25256F%25256F%252525");
261        assert_eq!(triple_url_encode("%2"), "%252525%252532");
262        assert_eq!(triple_url_encode("%G"), "%252525%252547");
263    }
264
265    #[test]
266    fn triple_url_encode_handles_double_encoded() {
267        // %2520 is double-encoded space; triple-encoding should yield %252520.
268        assert_eq!(triple_url_encode("%2520"), "%252520");
269        // %2525 is double-encoded '%'; triple-encoding should yield %252525.
270        assert_eq!(triple_url_encode("%2525"), "%252525");
271        // Mixed: raw space + double-encoded space.
272        assert_eq!(triple_url_encode(" %2520"), "%252520%252520");
273        // Already triple-encoded must be preserved.
274        assert_eq!(triple_url_encode("%252520"), "%252520");
275    }
276}