Skip to main content

wafrift_encoding/encoding/
structural.rs

1//! Structural encoding strategies — byte-level and framing manipulations.
2
3use base64::{Engine as _, engine::general_purpose};
4use std::io::Write as _;
5
6use crate::error::EncodeError;
7
8/// Result of chunked transfer-encoding split.
9///
10/// This strategy is ONLY semantically correct when the body is sent as the body
11/// of an HTTP request with `Transfer-Encoding: chunked`.
12#[derive(Debug, Clone, PartialEq, Eq)]
13pub struct ChunkedBody {
14    /// The chunked-encoded body as raw bytes.
15    pub body: Vec<u8>,
16    /// Required headers that must accompany this body.
17    pub required_headers: Vec<(String, String)>,
18}
19
20/// Null byte injection — append `%00` to truncate strings in C-style parsers.
21///
22/// **Context**: `php`, `cgi` — only semantically correct for backends using
23/// C-style null-terminated string handling.
24pub fn null_byte_inject(payload: impl AsRef<[u8]>) -> String {
25    let payload = payload.as_ref();
26    let payload_str = String::from_utf8_lossy(payload);
27    if payload.contains(&b'.') {
28        format!("{payload_str}%00.jpg")
29    } else {
30        format!("{payload_str}%00")
31    }
32}
33
34/// Overlong UTF-8 encoding (2-byte) — represent ASCII non-alphanumeric as 2-byte sequences.
35///
36/// **Context**: `iis-6` — only works against specific legacy WAFs/frontends that
37/// normalize overlong sequences rather than rejecting them.
38pub fn overlong_utf8(payload: impl AsRef<[u8]>) -> String {
39    String::from_utf8_lossy(payload.as_ref())
40        .chars()
41        .map(|ch| {
42            if ch.is_ascii_alphanumeric() {
43                ch.to_string()
44            } else if ch.is_ascii() {
45                let byte = ch as u8;
46                format!("%{:02X}%{:02X}", 0xC0 | (byte >> 6), 0x80 | (byte & 0x3F))
47            } else {
48                ch.to_string()
49            }
50        })
51        .collect()
52}
53
54/// Extended overlong UTF-8 encoding (3-byte) — broader coverage with 3-byte sequences.
55///
56/// **Context**: `iis-6` — some WAFs reject 2-byte overlongs but accept 3-byte overlongs.
57pub fn overlong_utf8_more(payload: impl AsRef<[u8]>) -> String {
58    String::from_utf8_lossy(payload.as_ref())
59        .chars()
60        .map(|ch| {
61            if ch.is_ascii_alphanumeric() {
62                ch.to_string()
63            } else if ch.is_ascii() {
64                let byte = ch as u8;
65                format!("%{:02X}%{:02X}%{:02X}", 0xE0, 0x80, 0x80 | byte)
66            } else {
67                ch.to_string()
68            }
69        })
70        .collect()
71}
72
73/// Chunked transfer-encoding split — break payload across HTTP chunks.
74///
75/// **Context**: `http-request-body` — ONLY valid when sent with
76/// `Transfer-Encoding: chunked`.
77pub fn chunked_split(
78    payload: impl AsRef<[u8]>,
79    chunk_size: usize,
80) -> Result<ChunkedBody, EncodeError> {
81    let payload = payload.as_ref();
82    if payload.is_empty() {
83        return Ok(ChunkedBody {
84            body: Vec::new(),
85            required_headers: vec![("Transfer-Encoding".to_string(), "chunked".to_string())],
86        });
87    }
88    let chunk_size = chunk_size.max(1);
89    let mut result: Vec<u8> = Vec::with_capacity(payload.len() + 64);
90
91    for chunk in payload.chunks(chunk_size) {
92        let _ = write!(&mut result, "{:x}\r\n", chunk.len());
93        result.extend_from_slice(chunk);
94        result.extend_from_slice(b"\r\n");
95    }
96    result.extend_from_slice(b"0\r\n\r\n");
97
98    Ok(ChunkedBody {
99        body: result,
100        required_headers: vec![("Transfer-Encoding".to_string(), "chunked".to_string())],
101    })
102}
103
104/// HTTP parameter pollution — duplicate parameter with a benign first value.
105///
106/// Depending on the server framework, the last value wins (PHP, ASP.NET)
107/// while many WAFs only inspect the first parameter occurrence.
108pub fn parameter_pollute(payload: impl AsRef<[u8]>) -> String {
109    let payload = payload.as_ref();
110    let payload_str = String::from_utf8_lossy(payload);
111    if let Some(eq_pos) = payload.iter().position(|byte| *byte == b'=') {
112        let key = String::from_utf8_lossy(&payload[..eq_pos]);
113        format!("{key}=safe&{payload_str}")
114    } else {
115        let decoy: String = (0..8)
116            .map(|_| rand::random::<u8>() % 26 + b'a')
117            .map(|b| b as char)
118            .collect();
119        format!("{decoy}=1&{payload_str}")
120    }
121}
122
123/// Base64 encoding — standard alphabet.
124pub fn base64_encode(payload: impl AsRef<[u8]>) -> String {
125    general_purpose::STANDARD.encode(payload)
126}
127
128/// Base64 URL-safe encoding — `-_` alphabet, no padding.
129pub fn base64_url_encode(payload: impl AsRef<[u8]>) -> String {
130    general_purpose::URL_SAFE_NO_PAD.encode(payload)
131}
132
133/// Hex encoding.
134pub fn hex_encode(payload: impl AsRef<[u8]>) -> String {
135    hex::encode(payload)
136}
137
138/// Encode a single Unicode scalar value to UTF-16 BE bytes.
139fn char_to_utf16be(c: char) -> Vec<u8> {
140    let mut buf = [0u16; 2];
141    let enc = c.encode_utf16(&mut buf);
142    let mut out = Vec::with_capacity(enc.len() * 2);
143    for u in enc {
144        out.push((*u >> 8) as u8);
145        out.push((*u & 0xFF) as u8);
146    }
147    out
148}
149
150/// Modified Base64 for UTF-7 (RFC 2152) — standard alphabet without padding.
151fn modified_base64(bytes: &[u8]) -> String {
152    let mut b64 = general_purpose::STANDARD.encode(bytes);
153    b64.retain(|c| c != '=');
154    b64
155}
156
157/// RFC 2152 direct characters.
158fn is_utf7_direct(ch: char) -> bool {
159    matches!(
160        ch,
161        'A'..='Z'
162            | 'a'..='z'
163            | '0'..='9'
164            | '\''
165            | '('
166            | ')'
167            | ','
168            | '-'
169            | '.'
170            | '/'
171            | ':'
172            | '?'
173    )
174}
175
176/// UTF-7 encoding per RFC 2152.
177///
178/// **Context**: `iis`, `legacy-dotnet` — only safe where the target actually
179/// decodes UTF-7.
180pub fn utf7_encode(payload: &str) -> String {
181    let mut out = String::new();
182    let mut shift_buf: Vec<u8> = Vec::new();
183
184    fn flush_shift(out: &mut String, buf: &mut Vec<u8>) {
185        if !buf.is_empty() {
186            out.push('+');
187            out.push_str(&modified_base64(buf));
188            out.push('-');
189            buf.clear();
190        }
191    }
192
193    for ch in payload.chars() {
194        if ch == '+' {
195            flush_shift(&mut out, &mut shift_buf);
196            out.push_str("+-");
197        } else if is_utf7_direct(ch) {
198            flush_shift(&mut out, &mut shift_buf);
199            out.push(ch);
200        } else {
201            shift_buf.extend_from_slice(&char_to_utf16be(ch));
202        }
203    }
204    flush_shift(&mut out, &mut shift_buf);
205    out
206}
207
208/// Gzip compression.
209///
210/// **Context**: `http-request-body` — ONLY valid with `Content-Encoding: gzip`.
211pub fn gzip_encode(payload: impl AsRef<[u8]>) -> Result<String, EncodeError> {
212    let payload = payload.as_ref();
213    let mut encoder = flate2::write::GzEncoder::new(Vec::new(), flate2::Compression::default());
214    encoder
215        .write_all(payload)
216        .map_err(|e| EncodeError::InvalidConfig(format!("gzip failed: {e}")))?;
217    let bytes = encoder
218        .finish()
219        .map_err(|e| EncodeError::InvalidConfig(format!("gzip failed: {e}")))?;
220    Ok(general_purpose::STANDARD.encode(bytes))
221}
222
223/// Deflate compression.
224///
225/// **Context**: `http-request-body` — ONLY valid with `Content-Encoding: deflate`.
226pub fn deflate_encode(payload: impl AsRef<[u8]>) -> Result<String, EncodeError> {
227    let payload = payload.as_ref();
228    let mut encoder =
229        flate2::write::DeflateEncoder::new(Vec::new(), flate2::Compression::default());
230    encoder
231        .write_all(payload)
232        .map_err(|e| EncodeError::InvalidConfig(format!("deflate failed: {e}")))?;
233    let bytes = encoder
234        .finish()
235        .map_err(|e| EncodeError::InvalidConfig(format!("deflate failed: {e}")))?;
236    Ok(general_purpose::STANDARD.encode(bytes))
237}
238
239#[cfg(test)]
240mod tests {
241    use super::*;
242
243    #[test]
244    fn null_byte_with_extension() {
245        assert_eq!(null_byte_inject("file.php"), "file.php%00.jpg");
246    }
247
248    #[test]
249    fn null_byte_without_extension() {
250        assert_eq!(null_byte_inject("payload"), "payload%00");
251    }
252
253    #[test]
254    fn overlong_utf8_slash() {
255        let result = overlong_utf8("/");
256        assert_eq!(result, "%C0%AF");
257    }
258
259    #[test]
260    fn overlong_utf8_more_slash() {
261        let result = overlong_utf8_more("/");
262        assert_eq!(result, "%E0%80%AF");
263    }
264
265    #[test]
266    fn chunked_split_produces_valid_chunks() {
267        let result = chunked_split("SELECT * FROM users", 3).unwrap();
268        let body = String::from_utf8_lossy(&result.body);
269        assert!(body.contains("\r\n"));
270        assert!(body.ends_with("0\r\n\r\n"));
271        assert_eq!(
272            result.required_headers,
273            vec![("Transfer-Encoding".to_string(), "chunked".to_string())]
274        );
275    }
276
277    #[test]
278    fn chunked_split_byte_lengths_correct() {
279        let payload = b"abc\x80\x81defgh";
280        let result = chunked_split(payload, 3).unwrap();
281        // Parse the raw bytes: each chunk is "size\r\ndata\r\n"
282        let mut i = 0;
283        let mut chunk_count = 0;
284        let expected_chunk_sizes = [3_usize, 3, 3, 1];
285        while i < result.body.len() {
286            // Find the \r\n after the size
287            let size_end = result.body[i..]
288                .windows(2)
289                .position(|w| w == b"\r\n")
290                .unwrap_or(result.body.len() - i)
291                + i;
292            let size_str = std::str::from_utf8(&result.body[i..size_end]).unwrap();
293            if size_str == "0" {
294                // Terminating chunk
295                break;
296            }
297            let size = usize::from_str_radix(size_str, 16).unwrap();
298            assert_eq!(size, expected_chunk_sizes[chunk_count]);
299            // Data starts after \r\n and ends after size bytes
300            let data_start = size_end + 2;
301            let data_end = data_start + size;
302            assert_eq!(
303                &result.body[data_start..data_end],
304                &payload[chunk_count * 3..chunk_count * 3 + size]
305            );
306            // Skip the trailing \r\n
307            i = data_end + 2;
308            chunk_count += 1;
309        }
310        assert_eq!(chunk_count, 4);
311    }
312
313    #[test]
314    fn chunked_split_empty() {
315        let result = chunked_split("", 3).unwrap();
316        assert!(result.body.is_empty());
317    }
318
319    #[test]
320    fn parameter_pollution_with_key_value() {
321        let result = parameter_pollute("user=' OR 1=1--");
322        assert!(result.starts_with("user=safe&"));
323        assert!(result.contains("user=' OR 1=1--"));
324    }
325
326    #[test]
327    fn parameter_pollution_without_equals() {
328        let result = parameter_pollute("payload");
329        assert!(result.ends_with("&payload"));
330        assert!(!result.contains("_wafrift_decoy"));
331    }
332
333    #[test]
334    fn base64_standard() {
335        assert_eq!(base64_encode("hello"), "aGVsbG8=");
336    }
337
338    #[test]
339    fn base64_url_safe() {
340        assert_eq!(base64_url_encode("hello+++"), "aGVsbG8rKys");
341    }
342
343    #[test]
344    fn hex_encode_basic() {
345        assert_eq!(hex_encode("ABC"), "414243");
346    }
347
348    #[test]
349    fn utf7_rfc2152_basic() {
350        // Direct chars pass through
351        assert_eq!(utf7_encode("Hello"), "Hello");
352        // Plus sign escaped
353        assert_eq!(utf7_encode("A+B"), "A+-B");
354        // Non-ASCII encoded
355        assert!(utf7_encode("日本語").starts_with('+'));
356    }
357
358    #[test]
359    fn utf7_rfc2152_decodeable() {
360        // A+IBNg- is the standard UTF-7 for 日本語
361        let encoded = utf7_encode("日本語");
362        assert!(encoded.contains('+'));
363        assert!(encoded.contains('-'));
364    }
365
366    #[test]
367    fn gzip_roundtrip() {
368        let original = b"SELECT * FROM users";
369        let encoded = gzip_encode(original).unwrap();
370        assert!(!encoded.is_empty());
371        // Verify it's valid base64
372        let decoded = general_purpose::STANDARD.decode(&encoded).unwrap();
373        let mut decoder = flate2::read::GzDecoder::new(&decoded[..]);
374        let mut decompressed = Vec::new();
375        std::io::Read::read_to_end(&mut decoder, &mut decompressed).unwrap();
376        assert_eq!(decompressed, original);
377    }
378
379    #[test]
380    fn deflate_roundtrip() {
381        let original = b"SELECT * FROM users";
382        let encoded = deflate_encode(original).unwrap();
383        assert!(!encoded.is_empty());
384        let decoded = general_purpose::STANDARD.decode(&encoded).unwrap();
385        let mut decoder = flate2::read::DeflateDecoder::new(&decoded[..]);
386        let mut decompressed = Vec::new();
387        std::io::Read::read_to_end(&mut decoder, &mut decompressed).unwrap();
388        assert_eq!(decompressed, original);
389    }
390}