wafrift_encoding/encoding/
structural.rs

1//! Structural encoding strategies — byte-level and framing manipulations.
2
3use base64::{Engine as _, engine::general_purpose};
4use std::io::Write as _;
5
6use crate::error::EncodeError;
7
8/// Result of chunked transfer-encoding split.
9///
10/// This strategy is ONLY semantically correct when the body is sent as the body
11/// of an HTTP request with `Transfer-Encoding: chunked`.
12#[derive(Debug, Clone, PartialEq, Eq)]
13pub struct ChunkedBody {
14    /// The chunked-encoded body as raw bytes.
15    pub body: Vec<u8>,
16    /// Required headers that must accompany this body.
17    pub required_headers: Vec<(String, String)>,
18}
19
20/// Null byte injection — append `%00` to truncate strings in C-style parsers.
21///
22/// **Context**: `php`, `cgi` — only semantically correct for backends using
23/// C-style null-terminated string handling.
24pub fn null_byte_inject(payload: impl AsRef<[u8]>) -> Result<String, EncodeError> {
25    let payload = payload.as_ref();
26    let payload_str = std::str::from_utf8(payload).map_err(|_| EncodeError::InvalidUtf8)?;
27    if payload.contains(&b'.') {
28        Ok(format!("{payload_str}%00.jpg"))
29    } else {
30        Ok(format!("{payload_str}%00"))
31    }
32}
33
34/// Overlong UTF-8 encoding (2-byte) — represent ASCII non-alphanumeric as 2-byte sequences.
35///
36/// **Context**: `iis-6` — only works against specific legacy WAFs/frontends that
37/// normalize overlong sequences rather than rejecting them.
38pub fn overlong_utf8(payload: impl AsRef<[u8]>) -> Result<String, EncodeError> {
39    let text = std::str::from_utf8(payload.as_ref()).map_err(|_| EncodeError::InvalidUtf8)?;
40    Ok(text
41        .chars()
42        .map(|ch| {
43            if ch.is_ascii_alphanumeric() {
44                ch.to_string()
45            } else if ch.is_ascii() {
46                let byte = ch as u8;
47                format!("%{:02X}%{:02X}", 0xC0 | (byte >> 6), 0x80 | (byte & 0x3F))
48            } else {
49                ch.to_string()
50            }
51        })
52        .collect())
53}
54
55/// Extended overlong UTF-8 encoding (3-byte) — broader coverage with 3-byte sequences.
56///
57/// **Context**: `iis-6` — some WAFs reject 2-byte overlongs but accept 3-byte overlongs.
58pub fn overlong_utf8_more(payload: impl AsRef<[u8]>) -> Result<String, EncodeError> {
59    let text = std::str::from_utf8(payload.as_ref()).map_err(|_| EncodeError::InvalidUtf8)?;
60    Ok(text
61        .chars()
62        .map(|ch| {
63            if ch.is_ascii_alphanumeric() {
64                ch.to_string()
65            } else if ch.is_ascii() {
66                let byte = ch as u8;
67                format!("%{:02X}%{:02X}%{:02X}", 0xE0, 0x80, 0x80 | byte)
68            } else {
69                ch.to_string()
70            }
71        })
72        .collect())
73}
74
75/// Chunked transfer-encoding split — break payload across HTTP chunks.
76///
77/// **Context**: `http-request-body` — ONLY valid when sent with
78/// `Transfer-Encoding: chunked`.
79pub fn chunked_split(
80    payload: impl AsRef<[u8]>,
81    chunk_size: usize,
82) -> Result<ChunkedBody, EncodeError> {
83    let payload = payload.as_ref();
84    if payload.is_empty() {
85        return Ok(ChunkedBody {
86            body: Vec::new(),
87            required_headers: vec![("Transfer-Encoding".to_string(), "chunked".to_string())],
88        });
89    }
90    let chunk_size = chunk_size.max(1);
91    let mut result: Vec<u8> = Vec::with_capacity(payload.len() + 64);
92
93    for chunk in payload.chunks(chunk_size) {
94        let _ = write!(&mut result, "{:x}\r\n", chunk.len());
95        result.extend_from_slice(chunk);
96        result.extend_from_slice(b"\r\n");
97    }
98    result.extend_from_slice(b"0\r\n\r\n");
99
100    Ok(ChunkedBody {
101        body: result,
102        required_headers: vec![("Transfer-Encoding".to_string(), "chunked".to_string())],
103    })
104}
105
106/// HTTP parameter pollution — duplicate parameter with a benign first value.
107///
108/// Depending on the server framework, the last value wins (PHP, ASP.NET)
109/// while many WAFs only inspect the first parameter occurrence.
110pub fn parameter_pollute(payload: impl AsRef<[u8]>) -> Result<String, EncodeError> {
111    let payload = payload.as_ref();
112    let payload_str = std::str::from_utf8(payload).map_err(|_| EncodeError::InvalidUtf8)?;
113    if let Some(eq_pos) = payload.iter().position(|byte| *byte == b'=') {
114        let key = std::str::from_utf8(&payload[..eq_pos]).map_err(|_| EncodeError::InvalidUtf8)?;
115        Ok(format!("{key}=safe&{payload_str}"))
116    } else {
117        let decoy: String = (0..8)
118            .map(|_| rand::random::<u8>() % 26 + b'a')
119            .map(|b| b as char)
120            .collect();
121        Ok(format!("{decoy}=1&{payload_str}"))
122    }
123}
124
125/// Base64 encoding — standard alphabet.
126pub fn base64_encode(payload: impl AsRef<[u8]>) -> String {
127    general_purpose::STANDARD.encode(payload)
128}
129
130/// Base64 URL-safe encoding — `-_` alphabet, no padding.
131pub fn base64_url_encode(payload: impl AsRef<[u8]>) -> String {
132    general_purpose::URL_SAFE_NO_PAD.encode(payload)
133}
134
135/// Hex encoding.
136pub fn hex_encode(payload: impl AsRef<[u8]>) -> String {
137    hex::encode(payload)
138}
139
140/// Encode a single Unicode scalar value to UTF-16 BE bytes.
141fn char_to_utf16be(c: char) -> Vec<u8> {
142    let mut buf = [0u16; 2];
143    let enc = c.encode_utf16(&mut buf);
144    let mut out = Vec::with_capacity(enc.len() * 2);
145    for u in enc {
146        out.push((*u >> 8) as u8);
147        out.push((*u & 0xFF) as u8);
148    }
149    out
150}
151
152/// Modified Base64 for UTF-7 (RFC 2152) — standard alphabet without padding.
153fn modified_base64(bytes: &[u8]) -> String {
154    let mut b64 = general_purpose::STANDARD.encode(bytes);
155    b64.retain(|c| c != '=');
156    b64
157}
158
159/// RFC 2152 direct characters.
160fn is_utf7_direct(ch: char) -> bool {
161    matches!(
162        ch,
163        'A'..='Z'
164            | 'a'..='z'
165            | '0'..='9'
166            | '\''
167            | '('
168            | ')'
169            | ','
170            | '-'
171            | '.'
172            | '/'
173            | ':'
174            | '?'
175    )
176}
177
178/// UTF-7 encoding per RFC 2152.
179///
180/// **Context**: `iis`, `legacy-dotnet` — only safe where the target actually
181/// decodes UTF-7.
182pub fn utf7_encode(payload: &str) -> String {
183    let mut out = String::new();
184    let mut shift_buf: Vec<u8> = Vec::new();
185
186    fn flush_shift(out: &mut String, buf: &mut Vec<u8>) {
187        if !buf.is_empty() {
188            out.push('+');
189            out.push_str(&modified_base64(buf));
190            out.push('-');
191            buf.clear();
192        }
193    }
194
195    for ch in payload.chars() {
196        if ch == '+' {
197            flush_shift(&mut out, &mut shift_buf);
198            out.push_str("+-");
199        } else if is_utf7_direct(ch) {
200            flush_shift(&mut out, &mut shift_buf);
201            out.push(ch);
202        } else {
203            shift_buf.extend_from_slice(&char_to_utf16be(ch));
204        }
205    }
206    flush_shift(&mut out, &mut shift_buf);
207    out
208}
209
210/// Gzip compression.
211///
212/// **Context**: `http-request-body` — ONLY valid with `Content-Encoding: gzip`.
213pub fn gzip_encode(payload: impl AsRef<[u8]>) -> Result<String, EncodeError> {
214    let payload = payload.as_ref();
215    let mut encoder = flate2::write::GzEncoder::new(Vec::new(), flate2::Compression::default());
216    encoder
217        .write_all(payload)
218        .map_err(|e| EncodeError::InvalidConfig(format!("gzip failed: {e}")))?;
219    let bytes = encoder
220        .finish()
221        .map_err(|e| EncodeError::InvalidConfig(format!("gzip failed: {e}")))?;
222    Ok(general_purpose::STANDARD.encode(bytes))
223}
224
225/// Deflate compression.
226///
227/// **Context**: `http-request-body` — ONLY valid with `Content-Encoding: deflate`.
228pub fn deflate_encode(payload: impl AsRef<[u8]>) -> Result<String, EncodeError> {
229    let payload = payload.as_ref();
230    let mut encoder =
231        flate2::write::DeflateEncoder::new(Vec::new(), flate2::Compression::default());
232    encoder
233        .write_all(payload)
234        .map_err(|e| EncodeError::InvalidConfig(format!("deflate failed: {e}")))?;
235    let bytes = encoder
236        .finish()
237        .map_err(|e| EncodeError::InvalidConfig(format!("deflate failed: {e}")))?;
238    Ok(general_purpose::STANDARD.encode(bytes))
239}
240
241#[cfg(test)]
242mod tests {
243    use super::*;
244
245    #[test]
246    fn null_byte_with_extension() {
247        assert_eq!(null_byte_inject("file.php").unwrap(), "file.php%00.jpg");
248    }
249
250    #[test]
251    fn null_byte_without_extension() {
252        assert_eq!(null_byte_inject("payload").unwrap(), "payload%00");
253    }
254
255    #[test]
256    fn overlong_utf8_slash() {
257        let result = overlong_utf8("/").unwrap();
258        assert_eq!(result, "%C0%AF");
259    }
260
261    #[test]
262    fn overlong_utf8_more_slash() {
263        let result = overlong_utf8_more("/").unwrap();
264        assert_eq!(result, "%E0%80%AF");
265    }
266
267    #[test]
268    fn chunked_split_produces_valid_chunks() {
269        let result = chunked_split("SELECT * FROM users", 3).unwrap();
270        let body = String::from_utf8(result.body.clone()).unwrap();
271        assert!(body.contains("\r\n"));
272        assert!(body.ends_with("0\r\n\r\n"));
273        assert_eq!(
274            result.required_headers,
275            vec![("Transfer-Encoding".to_string(), "chunked".to_string())]
276        );
277    }
278
279    #[test]
280    fn chunked_split_byte_lengths_correct() {
281        let payload = b"abc\x80\x81defgh";
282        let result = chunked_split(payload, 3).unwrap();
283        // Parse the raw bytes: each chunk is "size\r\ndata\r\n"
284        let mut i = 0;
285        let mut chunk_count = 0;
286        let expected_chunk_sizes = [3_usize, 3, 3, 1];
287        while i < result.body.len() {
288            // Find the \r\n after the size
289            let size_end = result.body[i..]
290                .windows(2)
291                .position(|w| w == b"\r\n")
292                .unwrap_or(result.body.len() - i)
293                + i;
294            let size_str = std::str::from_utf8(&result.body[i..size_end]).unwrap();
295            if size_str == "0" {
296                // Terminating chunk
297                break;
298            }
299            let size = usize::from_str_radix(size_str, 16).unwrap();
300            assert_eq!(size, expected_chunk_sizes[chunk_count]);
301            // Data starts after \r\n and ends after size bytes
302            let data_start = size_end + 2;
303            let data_end = data_start + size;
304            assert_eq!(
305                &result.body[data_start..data_end],
306                &payload[chunk_count * 3..chunk_count * 3 + size]
307            );
308            // Skip the trailing \r\n
309            i = data_end + 2;
310            chunk_count += 1;
311        }
312        assert_eq!(chunk_count, 4);
313    }
314
315    #[test]
316    fn chunked_split_empty() {
317        let result = chunked_split("", 3).unwrap();
318        assert!(result.body.is_empty());
319    }
320
321    #[test]
322    fn parameter_pollution_with_key_value() {
323        let result = parameter_pollute("user=' OR 1=1--").unwrap();
324        assert!(result.starts_with("user=safe&"));
325        assert!(result.contains("user=' OR 1=1--"));
326    }
327
328    #[test]
329    fn parameter_pollution_without_equals() {
330        let result = parameter_pollute("payload").unwrap();
331        assert!(result.ends_with("&payload"));
332        assert!(!result.contains("_wafrift_decoy"));
333    }
334
335    #[test]
336    fn base64_standard() {
337        assert_eq!(base64_encode("hello"), "aGVsbG8=");
338    }
339
340    #[test]
341    fn base64_url_safe() {
342        assert_eq!(base64_url_encode("hello+++"), "aGVsbG8rKys");
343    }
344
345    #[test]
346    fn hex_encode_basic() {
347        assert_eq!(hex_encode("ABC"), "414243");
348    }
349
350    #[test]
351    fn utf7_rfc2152_basic() {
352        // Direct chars pass through
353        assert_eq!(utf7_encode("Hello"), "Hello");
354        // Plus sign escaped
355        assert_eq!(utf7_encode("A+B"), "A+-B");
356        // Non-ASCII encoded
357        assert!(utf7_encode("日本語").starts_with('+'));
358    }
359
360    #[test]
361    fn utf7_rfc2152_decodeable() {
362        // A+IBNg- is the standard UTF-7 for 日本語
363        let encoded = utf7_encode("日本語");
364        assert!(encoded.contains('+'));
365        assert!(encoded.contains('-'));
366    }
367
368    #[test]
369    fn gzip_roundtrip() {
370        let original = b"SELECT * FROM users";
371        let encoded = gzip_encode(original).unwrap();
372        assert!(!encoded.is_empty());
373        // Verify it's valid base64
374        let decoded = general_purpose::STANDARD.decode(&encoded).unwrap();
375        let mut decoder = flate2::read::GzDecoder::new(&decoded[..]);
376        let mut decompressed = Vec::new();
377        std::io::Read::read_to_end(&mut decoder, &mut decompressed).unwrap();
378        assert_eq!(decompressed, original);
379    }
380
381    #[test]
382    fn deflate_roundtrip() {
383        let original = b"SELECT * FROM users";
384        let encoded = deflate_encode(original).unwrap();
385        assert!(!encoded.is_empty());
386        let decoded = general_purpose::STANDARD.decode(&encoded).unwrap();
387        let mut decoder = flate2::read::DeflateDecoder::new(&decoded[..]);
388        let mut decompressed = Vec::new();
389        std::io::Read::read_to_end(&mut decoder, &mut decompressed).unwrap();
390        assert_eq!(decompressed, original);
391    }
392}
wafrift_encoding/encoding/structural.rs

wafrift_encoding/encoding/
structural.rs