wafrift_encoding/encoding/
structural.rs1use base64::{Engine as _, engine::general_purpose};
4use std::io::Write as _;
5
6use crate::error::EncodeError;
7use wafrift_types::hash::{FNV_OFFSET_64, FNV_PRIME_64};
8
9#[derive(Debug, Clone, PartialEq, Eq)]
14pub struct ChunkedBody {
15 pub body: Vec<u8>,
17 pub required_headers: Vec<(String, String)>,
19}
20
21pub fn null_byte_inject(payload: impl AsRef<[u8]>) -> Result<String, EncodeError> {
26 let payload = payload.as_ref();
27 let payload_str = std::str::from_utf8(payload).map_err(|_| EncodeError::InvalidUtf8)?;
28 if payload.contains(&b'.') {
29 Ok(format!("{payload_str}%00.jpg"))
30 } else {
31 Ok(format!("{payload_str}%00"))
32 }
33}
34
35pub fn overlong_utf8(payload: impl AsRef<[u8]>) -> Result<String, EncodeError> {
40 let text = std::str::from_utf8(payload.as_ref()).map_err(|_| EncodeError::InvalidUtf8)?;
41 Ok(text
42 .chars()
43 .map(|ch| {
44 if ch.is_ascii_alphanumeric() {
45 ch.to_string()
46 } else if ch.is_ascii() {
47 let byte = ch as u8;
48 format!("%{:02X}%{:02X}", 0xC0 | (byte >> 6), 0x80 | (byte & 0x3F))
49 } else {
50 ch.to_string()
51 }
52 })
53 .collect())
54}
55
56pub fn overlong_utf8_more(payload: impl AsRef<[u8]>) -> Result<String, EncodeError> {
77 let text = std::str::from_utf8(payload.as_ref()).map_err(|_| EncodeError::InvalidUtf8)?;
78 Ok(text
79 .chars()
80 .map(|ch| {
81 if ch.is_ascii_alphanumeric() {
82 ch.to_string()
83 } else if ch.is_ascii() {
84 let byte = ch as u8;
85 let cont1 = 0x80 | (byte >> 6);
86 let cont2 = 0x80 | (byte & 0x3F);
87 format!("%E0%{cont1:02X}%{cont2:02X}")
88 } else {
89 ch.to_string()
90 }
91 })
92 .collect())
93}
94
95pub fn chunked_split(
100 payload: impl AsRef<[u8]>,
101 chunk_size: usize,
102) -> Result<ChunkedBody, EncodeError> {
103 let payload = payload.as_ref();
104 if payload.is_empty() {
105 return Ok(ChunkedBody {
106 body: Vec::new(),
107 required_headers: vec![("Transfer-Encoding".to_string(), "chunked".to_string())],
108 });
109 }
110 let chunk_size = chunk_size.max(1);
111 let mut result: Vec<u8> = Vec::with_capacity(payload.len() + 64);
112
113 for chunk in payload.chunks(chunk_size) {
114 let _ = write!(&mut result, "{:x}\r\n", chunk.len());
115 result.extend_from_slice(chunk);
116 result.extend_from_slice(b"\r\n");
117 }
118 result.extend_from_slice(b"0\r\n\r\n");
119
120 Ok(ChunkedBody {
121 body: result,
122 required_headers: vec![("Transfer-Encoding".to_string(), "chunked".to_string())],
123 })
124}
125
126pub fn parameter_pollute(payload: impl AsRef<[u8]>) -> Result<String, EncodeError> {
131 let payload = payload.as_ref();
132 let payload_str = std::str::from_utf8(payload).map_err(|_| EncodeError::InvalidUtf8)?;
133 if let Some(eq_pos) = payload.iter().position(|byte| *byte == b'=') {
134 let key = std::str::from_utf8(&payload[..eq_pos]).map_err(|_| EncodeError::InvalidUtf8)?;
135 Ok(format!("{key}=safe&{payload_str}"))
136 } else {
137 let mut h: u64 = FNV_OFFSET_64;
144 for &b in payload {
145 h ^= u64::from(b);
146 h = h.wrapping_mul(FNV_PRIME_64);
147 }
148 let decoy: String = (0..8)
149 .map(|i| (b'a' + (((h >> (i * 8)) as u8) % 26)) as char)
150 .collect();
151 Ok(format!("{decoy}=1&{payload_str}"))
152 }
153}
154
155pub fn base64_encode(payload: impl AsRef<[u8]>) -> String {
157 general_purpose::STANDARD.encode(payload)
158}
159
160pub fn base64_url_encode(payload: impl AsRef<[u8]>) -> String {
162 general_purpose::URL_SAFE_NO_PAD.encode(payload)
163}
164
165pub fn hex_encode(payload: impl AsRef<[u8]>) -> String {
167 hex::encode(payload)
168}
169
170pub use wafrift_types::utf7::{utf7_decode, utf7_encode};
176
177pub fn gzip_encode(payload: impl AsRef<[u8]>) -> Result<String, EncodeError> {
181 let payload = payload.as_ref();
182 let mut encoder = flate2::write::GzEncoder::new(Vec::new(), flate2::Compression::default());
183 encoder
184 .write_all(payload)
185 .map_err(|e| EncodeError::InvalidConfig(format!("gzip failed: {e}")))?;
186 let bytes = encoder
187 .finish()
188 .map_err(|e| EncodeError::InvalidConfig(format!("gzip failed: {e}")))?;
189 Ok(general_purpose::STANDARD.encode(bytes))
190}
191
192pub fn deflate_encode(payload: impl AsRef<[u8]>) -> Result<String, EncodeError> {
196 let payload = payload.as_ref();
197 let mut encoder =
198 flate2::write::DeflateEncoder::new(Vec::new(), flate2::Compression::default());
199 encoder
200 .write_all(payload)
201 .map_err(|e| EncodeError::InvalidConfig(format!("deflate failed: {e}")))?;
202 let bytes = encoder
203 .finish()
204 .map_err(|e| EncodeError::InvalidConfig(format!("deflate failed: {e}")))?;
205 Ok(general_purpose::STANDARD.encode(bytes))
206}
207
208#[cfg(test)]
209mod tests {
210 use super::*;
211
212 #[test]
213 fn null_byte_with_extension() {
214 assert_eq!(null_byte_inject("file.php").unwrap(), "file.php%00.jpg");
215 }
216
217 #[test]
218 fn null_byte_without_extension() {
219 assert_eq!(null_byte_inject("payload").unwrap(), "payload%00");
220 }
221
222 #[test]
223 fn overlong_utf8_slash() {
224 let result = overlong_utf8("/").unwrap();
225 assert_eq!(result, "%C0%AF");
226 }
227
228 #[test]
229 fn overlong_utf8_more_slash() {
230 let result = overlong_utf8_more("/").unwrap();
231 assert_eq!(result, "%E0%80%AF");
232 }
233
234 #[test]
235 fn overlong_utf8_more_punctuation_above_0x40_uses_valid_continuation_bytes() {
236 for ch in ['@', '[', '\\', ']', '^', '_', '`', '{', '|', '}', '~'] {
249 let s = ch.to_string();
250 let encoded = overlong_utf8_more(&s).unwrap();
251 assert!(
252 encoded.starts_with("%E0%"),
253 "{ch:?} should use 3-byte form, got: {encoded}"
254 );
255 let bytes: Vec<u8> = encoded
256 .split('%')
257 .filter(|s| !s.is_empty())
258 .map(|s| u8::from_str_radix(s, 16).unwrap())
259 .collect();
260 assert_eq!(bytes.len(), 3, "expected 3 bytes for {ch:?}");
261 assert_eq!(bytes[0], 0xE0, "lead byte wrong for {ch:?}");
262 assert!(
263 (0x80..=0xBF).contains(&bytes[1]),
264 "{ch:?} 2nd byte 0x{:02X} outside valid continuation range",
265 bytes[1]
266 );
267 assert!(
268 (0x80..=0xBF).contains(&bytes[2]),
269 "{ch:?} 3rd byte 0x{:02X} outside valid continuation range",
270 bytes[2]
271 );
272 let codepoint = ((bytes[1] & 0x3F) as u32) << 6 | (bytes[2] & 0x3F) as u32;
278 assert_eq!(
279 codepoint, ch as u32,
280 "decoded codepoint 0x{codepoint:X} != original 0x{:X}",
281 ch as u32
282 );
283 }
284 }
285
286 #[test]
287 fn overlong_utf8_more_preserves_alphanumerics_verbatim() {
288 assert_eq!(overlong_utf8_more("abc123").unwrap(), "abc123");
292 }
293
294 #[test]
295 fn chunked_split_produces_valid_chunks() {
296 let result = chunked_split("SELECT * FROM users", 3).unwrap();
297 let body = String::from_utf8(result.body.clone()).unwrap();
298 assert!(body.contains("\r\n"));
299 assert!(body.ends_with("0\r\n\r\n"));
300 assert_eq!(
301 result.required_headers,
302 vec![("Transfer-Encoding".to_string(), "chunked".to_string())]
303 );
304 }
305
306 #[test]
307 fn chunked_split_byte_lengths_correct() {
308 let payload = b"abc\x80\x81defgh";
309 let result = chunked_split(payload, 3).unwrap();
310 let mut i = 0;
312 let mut chunk_count = 0;
313 let expected_chunk_sizes = [3_usize, 3, 3, 1];
314 while i < result.body.len() {
315 let size_end = result.body[i..]
317 .windows(2)
318 .position(|w| w == b"\r\n")
319 .unwrap_or(result.body.len() - i)
320 + i;
321 let size_str = std::str::from_utf8(&result.body[i..size_end]).unwrap();
322 if size_str == "0" {
323 break;
325 }
326 let size = usize::from_str_radix(size_str, 16).unwrap();
327 assert_eq!(size, expected_chunk_sizes[chunk_count]);
328 let data_start = size_end + 2;
330 let data_end = data_start + size;
331 assert_eq!(
332 &result.body[data_start..data_end],
333 &payload[chunk_count * 3..chunk_count * 3 + size]
334 );
335 i = data_end + 2;
337 chunk_count += 1;
338 }
339 assert_eq!(chunk_count, 4);
340 }
341
342 #[test]
343 fn chunked_split_empty() {
344 let result = chunked_split("", 3).unwrap();
345 assert!(result.body.is_empty());
346 }
347
348 #[test]
349 fn parameter_pollution_with_key_value() {
350 let result = parameter_pollute("user=' OR 1=1--").unwrap();
351 assert!(result.starts_with("user=safe&"));
352 assert!(result.contains("user=' OR 1=1--"));
353 }
354
355 #[test]
356 fn parameter_pollution_without_equals() {
357 let result = parameter_pollute("payload").unwrap();
358 assert!(result.ends_with("&payload"));
359 assert!(!result.contains("_wafrift_decoy"));
360 let decoy = result
362 .strip_suffix("=1&payload")
363 .expect("decoy=1&payload shape");
364 assert_eq!(decoy.len(), 8, "decoy must be 8 chars: {result}");
365 assert!(
366 decoy.bytes().all(|b| b.is_ascii_lowercase()),
367 "decoy must be [a-z]{{8}}: {result}"
368 );
369 assert_eq!(result, parameter_pollute("payload").unwrap());
372 assert_ne!(result, parameter_pollute("payloae").unwrap());
373 }
374
375 #[test]
376 fn base64_standard() {
377 assert_eq!(base64_encode("hello"), "aGVsbG8=");
378 }
379
380 #[test]
381 fn base64_url_safe() {
382 assert_eq!(base64_url_encode("hello+++"), "aGVsbG8rKys");
383 }
384
385 #[test]
386 fn hex_encode_basic() {
387 assert_eq!(hex_encode("ABC"), "414243");
388 }
389
390 #[test]
391 fn utf7_rfc2152_basic() {
392 assert_eq!(utf7_encode("Hello"), "Hello");
394 assert_eq!(utf7_encode("A+B"), "A+-B");
396 assert!(utf7_encode("日本語").starts_with('+'));
398 }
399
400 #[test]
401 fn utf7_rfc2152_decodeable() {
402 let encoded = utf7_encode("日本語");
404 assert!(encoded.contains('+'));
405 assert!(encoded.contains('-'));
406 }
407
408 #[test]
409 fn gzip_roundtrip() {
410 let original = b"SELECT * FROM users";
411 let encoded = gzip_encode(original).unwrap();
412 assert!(!encoded.is_empty());
413 let decoded = general_purpose::STANDARD.decode(&encoded).unwrap();
415 let mut decoder = flate2::read::GzDecoder::new(&decoded[..]);
416 let mut decompressed = Vec::new();
417 std::io::Read::read_to_end(&mut decoder, &mut decompressed).unwrap();
418 assert_eq!(decompressed, original);
419 }
420
421 #[test]
422 fn deflate_roundtrip() {
423 let original = b"SELECT * FROM users";
424 let encoded = deflate_encode(original).unwrap();
425 assert!(!encoded.is_empty());
426 let decoded = general_purpose::STANDARD.decode(&encoded).unwrap();
427 let mut decoder = flate2::read::DeflateDecoder::new(&decoded[..]);
428 let mut decompressed = Vec::new();
429 std::io::Read::read_to_end(&mut decoder, &mut decompressed).unwrap();
430 assert_eq!(decompressed, original);
431 }
432}