Skip to main content

wafrift_encoding/encoding/
url.rs

1//! URL-based encoding strategies.
2use std::fmt::Write as _;
3
4/// RFC 3986 unreserved characters that should NOT be percent-encoded.
5const UNRESERVED: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_.~";
6
7fn is_unreserved(b: u8) -> bool {
8    UNRESERVED.contains(&b)
9}
10
11/// Standard URL encoding — only encodes reserved and non-unreserved bytes.
12#[must_use]
13pub fn url_encode(payload: impl AsRef<[u8]>) -> String {
14    let payload = payload.as_ref();
15    let mut out = String::with_capacity(payload.len() * 3);
16
17    for b in payload {
18        if is_unreserved(*b) {
19            out.push(*b as char);
20        } else {
21            let _ = write!(&mut out, "%{b:02X}");
22        }
23    }
24    out
25}
26
27/// Lowercase hex variant of URL encoding.
28#[must_use]
29pub fn url_encode_lower(payload: impl AsRef<[u8]>) -> String {
30    let payload = payload.as_ref();
31    let mut out = String::with_capacity(payload.len() * 3);
32
33    for b in payload {
34        if is_unreserved(*b) {
35            out.push(*b as char);
36        } else {
37            let _ = write!(&mut out, "%{b:02x}");
38        }
39    }
40    out
41}
42
43/// Double URL encoding — every byte becomes `%25XX`.
44///
45/// Bypasses WAFs that decode URL encoding once before matching.
46/// Pre-encoded `%XX` sequences are detected and only the `%` is
47/// double-encoded to avoid triple-encoding artifacts.
48#[must_use]
49pub fn double_url_encode(payload: impl AsRef<[u8]>) -> String {
50    let bytes = payload.as_ref();
51    let mut result = String::with_capacity(bytes.len() * 4);
52    let mut i = 0;
53    while i < bytes.len() {
54        if bytes[i] == b'%'
55            && i + 2 < bytes.len()
56            && bytes[i + 1].is_ascii_hexdigit()
57            && bytes[i + 2].is_ascii_hexdigit()
58        {
59            // Already-encoded %XX → double-encode the percent only
60            result.push_str("%25");
61            result.push(bytes[i + 1] as char);
62            result.push(bytes[i + 2] as char);
63            i += 3;
64        } else {
65            let _ = write!(&mut result, "%25{:02X}", bytes[i]);
66            i += 1;
67        }
68    }
69    result
70}
71
72/// Triple URL encoding — every byte becomes `%2525XX`.
73///
74/// For WAFs that decode URL encoding twice before rule matching.
75/// Detects existing `%2525XX` sequences to avoid quadruple-encoding.
76/// Single-encoded (`%XX`) and double-encoded (`%25XX`) sequences are
77/// both converted to `%2525XX` (triple-encoded form).
78#[must_use]
79pub fn triple_url_encode(payload: impl AsRef<[u8]>) -> String {
80    let bytes = payload.as_ref();
81    let mut out = String::with_capacity(bytes.len() * 7);
82    let mut i = 0;
83
84    while i < bytes.len() {
85        // Check for existing triple-encoded sequence %2525XX
86        if bytes[i] == b'%'
87            && i + 6 < bytes.len()
88            && bytes[i + 1..i + 5].eq_ignore_ascii_case(b"2525")
89            && bytes[i + 5].is_ascii_hexdigit()
90            && bytes[i + 6].is_ascii_hexdigit()
91        {
92            // Preserve as-is
93            for j in 0..7 {
94                out.push(bytes[i + j] as char);
95            }
96            i += 7;
97        }
98        // Check for single or double encoded %XX / %25XX
99        else if bytes[i] == b'%'
100            && i + 2 < bytes.len()
101            && bytes[i + 1].is_ascii_hexdigit()
102            && bytes[i + 2].is_ascii_hexdigit()
103        {
104            out.push_str("%2525");
105            out.push(bytes[i + 1] as char);
106            out.push(bytes[i + 2] as char);
107            i += 3;
108        } else {
109            let _ = write!(&mut out, "%2525{:02X}", bytes[i]);
110            i += 1;
111        }
112    }
113    out
114}
115
116#[cfg(test)]
117mod tests {
118    use super::*;
119
120    #[test]
121    fn url_encode_basic() {
122        assert_eq!(url_encode("A"), "A");
123        assert_eq!(url_encode("AB"), "AB");
124        assert_eq!(url_encode("A B"), "A%20B");
125    }
126
127    #[test]
128    fn url_encode_preserves_unreserved() {
129        // RFC 3986 unreserved: A-Za-z0-9-_.~
130        assert_eq!(url_encode("A-Za-z0-9-_.~"), "A-Za-z0-9-_.~");
131    }
132
133    #[test]
134    fn url_encode_special_chars() {
135        assert_eq!(url_encode(" "), "%20");
136        assert_eq!(url_encode("="), "%3D");
137        assert_eq!(url_encode("'"), "%27");
138        assert_eq!(url_encode("/"), "%2F");
139    }
140
141    #[test]
142    fn url_encode_accepts_raw_bytes() {
143        assert_eq!(url_encode([0x00_u8, 0xFF, b'A']), "%00%FFA");
144    }
145
146    #[test]
147    fn url_encode_lower_case() {
148        assert_eq!(url_encode_lower(" /"), "%20%2f");
149    }
150
151    #[test]
152    fn double_url_encode_basic() {
153        assert_eq!(double_url_encode("A"), "%2541");
154    }
155
156    #[test]
157    fn double_url_encode_preserves_existing() {
158        let result = double_url_encode("%20");
159        assert_eq!(result, "%2520");
160    }
161
162    #[test]
163    fn triple_url_encode_basic() {
164        assert_eq!(triple_url_encode("A"), "%252541");
165    }
166
167    #[test]
168    fn triple_url_encode_preserves_double_encoded() {
169        // Single-encoded %20 should become triple-encoded space
170        let result = triple_url_encode("%20");
171        assert_eq!(result, "%252520");
172    }
173
174    #[test]
175    fn triple_url_encode_preserves_triple_encoded() {
176        // Already triple-encoded should be preserved
177        let result = triple_url_encode("%252541");
178        assert_eq!(result, "%252541");
179    }
180
181    #[test]
182    fn url_encode_empty() {
183        assert_eq!(url_encode(""), "");
184        assert_eq!(url_encode_lower(""), "");
185        assert_eq!(double_url_encode(""), "");
186        assert_eq!(triple_url_encode(""), "");
187    }
188
189    #[test]
190    fn url_encode_sql_injection() {
191        let encoded = url_encode("' OR 1=1--");
192        assert!(encoded.contains("%27")); // '
193        assert!(encoded.contains("%20")); // space
194        assert!(!encoded.contains("%4F")); // O is unreserved
195    }
196}