Skip to main content

wafrift_encoding/
lib.rs

1//! wafrift-encoding — Payload encoding strategies and header obfuscation.
2//!
3//! See [`cookie_smuggle`] for RFC 6265-vs-6265bis Cookie-header
4//! parser-differential probes (prefix bypass, duplicate-name pairs,
5//! quoted-semicolon values, empty-name pairs, control-byte injection,
6//! whitespace around `=`).
7//!
8//! Transforms attack payloads using various encoding strategies
9//! (URL, Unicode, HTML entity, SQL comments, etc.) and applies
10//! header-level obfuscation techniques for WAF bypass.
11//!
12//! # Examples
13//!
14//! Single-pass encoding with one strategy:
15//!
16//! ```
17//! use wafrift_encoding::{Strategy, encode};
18//!
19//! let payload = "' OR 1=1--";
20//! let url_encoded = encode(payload, Strategy::UrlEncode).unwrap();
21//! assert!(url_encoded.contains("%27"));    // single quote
22//! assert!(url_encoded.contains("%20"));    // space
23//! assert!(url_encoded.contains("%3D"));    // equals
24//!
25//! // Same payload, double-encoded — bypasses single-decode WAFs.
26//! let double = encode(payload, Strategy::DoubleUrlEncode).unwrap();
27//! assert!(double.contains("%2527"));
28//! ```
29//!
30//! Layered encoding for stronger evasion (HTML-entity-encode the
31//! Unicode-escaped form):
32//!
33//! ```
34//! use wafrift_encoding::{Strategy, encode_layered};
35//!
36//! let result = encode_layered(
37//!     "<script>",
38//!     &[Strategy::UnicodeEncode, Strategy::HtmlEntityEncode],
39//! ).unwrap();
40//! assert!(result.contains('&'));   // HTML entity encoded
41//! ```
42
43#![forbid(unsafe_code)]
44
45pub mod auth_bypass;
46pub mod auth_header_smuggle;
47pub mod compression;
48pub mod cookie_smuggle;
49pub mod encoding;
50pub mod error;
51pub mod header;
52pub mod host_header_smuggle;
53pub mod jwt_smuggle;
54pub mod path_normalize_smuggle;
55pub mod path_prefix;
56pub mod range_header_smuggle;
57pub mod tamper;
58pub mod url_mutate;
59
60// Re-export the encoding submodule's public API at crate root for ergonomics.
61pub use encoding::{
62    Strategy, aggressiveness, all_strategies, encode, encode_layered, layered_combinations,
63};
64
65// Re-export error types.
66pub use error::EncodeError;
67
68// Re-export tamper module for convenient access.
69pub use tamper::{
70    TamperConfig, TamperError, TamperRegistry, TamperStrategy, all_tamper_names, default_registry,
71    tamper,
72};
73
74pub mod contextual;
75
76/// Largest UTF-8 char-boundary byte index `<= idx` in `s` (and `<= s.len()`).
77///
78/// §7 canonical home for the "snap a byte offset down to a char boundary"
79/// primitive used across the header/cookie/range smuggle builders. These
80/// builders cap header values with `String::truncate(N)` and split values at
81/// computed byte offsets; their inputs (operator `--credential`, payload
82/// seeds) pass through sanitisers that strip only CR/LF/NUL, so multibyte
83/// UTF-8 survives and a raw byte index can land mid-codepoint — where
84/// `String::truncate` / `&s[..idx]` PANIC. Routing every such site through
85/// this one helper keeps them boundary-safe and prevents the three copies
86/// (was: `header::char_boundary_near`, `cookie_smuggle`'s local copy, and the
87/// open-coded walks) from drifting.
88#[must_use]
89pub(crate) fn floor_char_boundary(s: &str, idx: usize) -> usize {
90    let mut i = idx.min(s.len());
91    while i > 0 && !s.is_char_boundary(i) {
92        i -= 1;
93    }
94    i
95}
96
97#[cfg(test)]
98mod floor_char_boundary_tests {
99    use super::floor_char_boundary;
100
101    #[test]
102    fn snaps_down_to_boundary_and_clamps_to_len() {
103        // "é" is 2 bytes (0xC3 0xA9). Index 1 is mid-codepoint → snap to 0.
104        assert_eq!(floor_char_boundary("éa", 1), 0);
105        // A boundary index is returned unchanged.
106        assert_eq!(floor_char_boundary("éa", 2), 2); // after `é`
107        assert_eq!(floor_char_boundary("éa", 3), 3); // after `a` (== len)
108        // Past the end clamps to len (never panics, never exceeds).
109        assert_eq!(floor_char_boundary("éa", 99), 3);
110        // ASCII: every index is a boundary.
111        assert_eq!(floor_char_boundary("abcd", 2), 2);
112        // Empty string clamps to 0.
113        assert_eq!(floor_char_boundary("", 5), 0);
114        // 4-byte char (🦀) — every interior index snaps to the start.
115        assert_eq!(floor_char_boundary("🦀", 1), 0);
116        assert_eq!(floor_char_boundary("🦀", 3), 0);
117        assert_eq!(floor_char_boundary("🦀", 4), 4);
118    }
119}