Skip to main content

wafrift_encoding/encoding/
url.rs

1//! URL-based encoding strategies.
2use std::fmt::Write as _;
3
4/// RFC 3986 unreserved characters that should NOT be percent-encoded.
5const UNRESERVED: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_.~";
6
7fn is_unreserved(b: u8) -> bool {
8    UNRESERVED.contains(&b)
9}
10
11/// Standard URL encoding — only encodes reserved and non-unreserved bytes.
12#[must_use]
13pub fn url_encode(payload: impl AsRef<[u8]>) -> String {
14    let payload = payload.as_ref();
15    let mut out = String::with_capacity(payload.len() * 3);
16
17    for b in payload {
18        if is_unreserved(*b) {
19            out.push(*b as char);
20        } else {
21            let _ = write!(&mut out, "%{b:02X}");
22        }
23    }
24    out
25}
26
27/// Lowercase hex variant of URL encoding.
28#[must_use]
29pub fn url_encode_lower(payload: impl AsRef<[u8]>) -> String {
30    let payload = payload.as_ref();
31    let mut out = String::with_capacity(payload.len() * 3);
32
33    for b in payload {
34        if is_unreserved(*b) {
35            out.push(*b as char);
36        } else {
37            let _ = write!(&mut out, "%{b:02x}");
38        }
39    }
40    out
41}
42
43/// Double URL encoding — every byte becomes `%25XX`.
44///
45/// Bypasses WAFs that decode URL encoding once before matching.
46/// Pre-encoded `%XX` sequences are detected and only the `%` is
47/// double-encoded to avoid triple-encoding artifacts.
48#[must_use]
49pub fn double_url_encode(payload: impl AsRef<[u8]>) -> String {
50    let bytes = payload.as_ref();
51    let mut result = String::with_capacity(bytes.len() * 4);
52    let mut i = 0;
53    while i < bytes.len() {
54        if bytes[i] == b'%'
55            && i + 2 < bytes.len()
56            && bytes[i + 1].is_ascii_hexdigit()
57            && bytes[i + 2].is_ascii_hexdigit()
58        {
59            // Already-encoded %XX → double-encode the percent only
60            result.push_str("%25");
61            result.push(bytes[i + 1] as char);
62            result.push(bytes[i + 2] as char);
63            i += 3;
64        } else {
65            let _ = write!(&mut result, "%25{:02X}", bytes[i]);
66            i += 1;
67        }
68    }
69    result
70}
71
72/// Triple URL encoding — every byte becomes `%2525XX`.
73///
74/// For WAFs that decode URL encoding twice before rule matching.
75/// Detects existing `%2525XX` sequences to avoid quadruple-encoding.
76/// Single-encoded (`%XX`) and double-encoded (`%25XX`) sequences are
77/// both converted to `%2525XX` (triple-encoded form).
78#[must_use]
79pub fn triple_url_encode(payload: impl AsRef<[u8]>) -> String {
80    let bytes = payload.as_ref();
81    let mut out = String::with_capacity(bytes.len() * 7);
82    let mut i = 0;
83
84    while i < bytes.len() {
85        // Check for existing triple-encoded sequence %2525XX
86        if bytes[i] == b'%'
87            && i + 6 < bytes.len()
88            && bytes[i + 1..i + 5].eq_ignore_ascii_case(b"2525")
89            && bytes[i + 5].is_ascii_hexdigit()
90            && bytes[i + 6].is_ascii_hexdigit()
91        {
92            // Preserve as-is
93            for j in 0..7 {
94                out.push(bytes[i + j] as char);
95            }
96            i += 7;
97        }
98        // Check for double-encoded sequence %25XX
99        else if bytes[i] == b'%'
100            && i + 4 < bytes.len()
101            && bytes[i + 1..i + 3].eq_ignore_ascii_case(b"25")
102            && bytes[i + 3].is_ascii_hexdigit()
103            && bytes[i + 4].is_ascii_hexdigit()
104        {
105            out.push_str("%2525");
106            out.push(bytes[i + 3] as char);
107            out.push(bytes[i + 4] as char);
108            i += 5;
109        }
110        // Check for single encoded %XX
111        else if bytes[i] == b'%'
112            && i + 2 < bytes.len()
113            && bytes[i + 1].is_ascii_hexdigit()
114            && bytes[i + 2].is_ascii_hexdigit()
115        {
116            out.push_str("%2525");
117            out.push(bytes[i + 1] as char);
118            out.push(bytes[i + 2] as char);
119            i += 3;
120        } else {
121            let _ = write!(&mut out, "%2525{:02X}", bytes[i]);
122            i += 1;
123        }
124    }
125    out
126}
127
128#[cfg(test)]
129mod tests {
130    use super::*;
131
132    #[test]
133    fn url_encode_basic() {
134        assert_eq!(url_encode("A"), "A");
135        assert_eq!(url_encode("AB"), "AB");
136        assert_eq!(url_encode("A B"), "A%20B");
137    }
138
139    #[test]
140    fn url_encode_preserves_unreserved() {
141        // RFC 3986 unreserved: A-Za-z0-9-_.~
142        assert_eq!(url_encode("A-Za-z0-9-_.~"), "A-Za-z0-9-_.~");
143    }
144
145    #[test]
146    fn url_encode_special_chars() {
147        assert_eq!(url_encode(" "), "%20");
148        assert_eq!(url_encode("="), "%3D");
149        assert_eq!(url_encode("'"), "%27");
150        assert_eq!(url_encode("/"), "%2F");
151    }
152
153    #[test]
154    fn url_encode_accepts_raw_bytes() {
155        assert_eq!(url_encode([0x00_u8, 0xFF, b'A']), "%00%FFA");
156    }
157
158    #[test]
159    fn url_encode_lower_case() {
160        assert_eq!(url_encode_lower(" /"), "%20%2f");
161    }
162
163    #[test]
164    fn double_url_encode_basic() {
165        assert_eq!(double_url_encode("A"), "%2541");
166    }
167
168    #[test]
169    fn double_url_encode_preserves_existing() {
170        let result = double_url_encode("%20");
171        assert_eq!(result, "%2520");
172    }
173
174    #[test]
175    fn triple_url_encode_basic() {
176        assert_eq!(triple_url_encode("A"), "%252541");
177    }
178
179    #[test]
180    fn triple_url_encode_preserves_double_encoded() {
181        // Single-encoded %20 should become triple-encoded space
182        let result = triple_url_encode("%20");
183        assert_eq!(result, "%252520");
184    }
185
186    #[test]
187    fn triple_url_encode_preserves_triple_encoded() {
188        // Already triple-encoded should be preserved
189        let result = triple_url_encode("%252541");
190        assert_eq!(result, "%252541");
191    }
192
193    #[test]
194    fn url_encode_empty() {
195        assert_eq!(url_encode(""), "");
196        assert_eq!(url_encode_lower(""), "");
197        assert_eq!(double_url_encode(""), "");
198        assert_eq!(triple_url_encode(""), "");
199    }
200
201    #[test]
202    fn url_encode_sql_injection() {
203        let encoded = url_encode("' OR 1=1--");
204        assert!(encoded.contains("%27")); // '
205        assert!(encoded.contains("%20")); // space
206        assert!(!encoded.contains("%4F")); // O is unreserved
207    }
208
209    #[test]
210    fn double_url_encode_trailing_percent() {
211        // Input ending in bare '%' must not produce an incomplete %2 fragment.
212        assert_eq!(double_url_encode("%"), "%2525");
213        assert_eq!(double_url_encode("foo%"), "%2566%256F%256F%2525");
214        assert_eq!(double_url_encode("%2"), "%2525%2532");
215        assert_eq!(double_url_encode("%G"), "%2525%2547");
216    }
217
218    #[test]
219    fn triple_url_encode_trailing_percent() {
220        assert_eq!(triple_url_encode("%"), "%252525");
221        assert_eq!(triple_url_encode("foo%"), "%252566%25256F%25256F%252525");
222        assert_eq!(triple_url_encode("%2"), "%252525%252532");
223        assert_eq!(triple_url_encode("%G"), "%252525%252547");
224    }
225
226    #[test]
227    fn triple_url_encode_handles_double_encoded() {
228        // %2520 is double-encoded space; triple-encoding should yield %252520.
229        assert_eq!(triple_url_encode("%2520"), "%252520");
230        // %2525 is double-encoded '%'; triple-encoding should yield %252525.
231        assert_eq!(triple_url_encode("%2525"), "%252525");
232        // Mixed: raw space + double-encoded space.
233        assert_eq!(triple_url_encode(" %2520"), "%252520%252520");
234        // Already triple-encoded must be preserved.
235        assert_eq!(triple_url_encode("%252520"), "%252520");
236    }
237}