wafrift_encoding/encoding/
unicode.rs1use std::fmt::Write as _;
3
4#[must_use]
9pub fn unicode_encode(payload: &str) -> String {
10 let mut out = String::with_capacity(payload.len() * 6);
11 for ch in payload.chars() {
12 let code = ch as u32;
13 if code > 0xFFFF {
14 let surrogate_base = code - 0x1_0000;
16 let high = 0xD800 + ((surrogate_base >> 10) & 0x3FF);
17 let low = 0xDC00 + (surrogate_base & 0x3FF);
18 let _ = write!(&mut out, "\\u{high:04X}\\u{low:04X}");
19 } else {
20 let _ = write!(&mut out, "\\u{code:04X}");
21 }
22 }
23 out
24}
25
26#[must_use]
30pub fn iis_unicode_encode(payload: &str) -> String {
31 let mut out = String::with_capacity(payload.len() * 6);
32 for ch in payload.chars() {
33 let _ = write!(&mut out, "%u{:04X}", ch as u32);
34 }
35 out
36}
37
38#[must_use]
42pub fn json_string_encode(payload: &str) -> String {
43 let mut out = String::with_capacity(payload.len() * 2 + 2);
44 out.push('"');
45 for ch in payload.chars() {
46 match ch {
47 '\\' => out.push_str("\\\\"),
48 '"' => out.push_str("\\\""),
49 '\u{0008}' => out.push_str("\\b"),
50 '\u{000C}' => out.push_str("\\f"),
51 '\n' => out.push_str("\\n"),
52 '\r' => out.push_str("\\r"),
53 '\t' => out.push_str("\\t"),
54 c if (c as u32) < 0x20 => {
55 let _ = write!(&mut out, "\\u{:04X}", c as u32);
56 }
57 c => out.push(c),
58 }
59 }
60 out.push('"');
61 out
62}
63
64#[must_use]
68pub fn html_entity_encode(payload: &str) -> String {
69 let mut out = String::with_capacity(payload.len() * 6);
70 for ch in payload.chars() {
71 let _ = write!(&mut out, "&#x{:X};", ch as u32);
72 }
73 out
74}
75
76#[must_use]
80pub fn html_entity_decimal_encode(payload: &str) -> String {
81 let mut out = String::with_capacity(payload.len() * 6);
82 for ch in payload.chars() {
83 let _ = write!(&mut out, "&#{};", ch as u32);
84 }
85 out
86}
87
88#[must_use]
102pub fn fullwidth_encode(payload: &str) -> String {
103 let mut out = String::with_capacity(payload.len() * 3);
104 for ch in payload.chars() {
105 let mapped = match ch {
106 ' ' => '\u{3000}', c if ('\x21'..='\x7e').contains(&c) => {
108 char::from_u32(c as u32 + 0xFEE0).unwrap_or(c)
110 }
111 c => c,
112 };
113 out.push(mapped);
114 }
115 out
116}
117
118#[must_use]
129pub fn homoglyph_encode(payload: &str) -> String {
130 let mut out = String::with_capacity(payload.len() * 4);
131 for ch in payload.chars() {
132 let mapped = match ch {
133 '\'' => '\u{2019}', '"' => '\u{201D}', '<' => '\u{FF1C}', '>' => '\u{FF1E}', '=' => '\u{FF1D}', '(' => '\u{FF08}', ')' => '\u{FF09}', ';' => '\u{FF1B}', '-' => '\u{2010}', '/' => '\u{2215}', c => c,
148 };
149 out.push(mapped);
150 }
151 out
152}
153
154#[cfg(test)]
155mod tests {
156 use super::*;
157
158 #[test]
159 fn unicode_encode_basic() {
160 assert_eq!(unicode_encode("A"), "\\u0041");
161 assert_eq!(unicode_encode("AB"), "\\u0041\\u0042");
162 }
163
164 #[test]
165 fn unicode_encode_special_chars() {
166 let encoded = unicode_encode("' OR 1=1--");
167 assert!(encoded.contains("\\u0027")); assert!(encoded.contains("\\u003D")); }
170
171 #[test]
172 fn unicode_encode_unicode() {
173 let encoded = unicode_encode("日本語");
174 assert_eq!(encoded, "\\u65E5\\u672C\\u8A9E");
175 }
176
177 #[test]
178 fn iis_unicode_encode_basic() {
179 assert_eq!(iis_unicode_encode("A"), "%u0041");
180 assert_eq!(iis_unicode_encode("AB"), "%u0041%u0042");
181 }
182
183 #[test]
184 fn json_encode_basic() {
185 assert_eq!(json_string_encode("A"), "\"A\"");
186 assert_eq!(json_string_encode("A\\B"), "\"A\\\\B\"");
187 assert_eq!(json_string_encode("A\"B"), "\"A\\\"B\"");
188 assert_eq!(json_string_encode("A\nB"), "\"A\\nB\"");
189 }
190
191 #[test]
192 fn json_encode_control_chars() {
193 assert_eq!(json_string_encode("\x01"), "\"\\u0001\"");
194 }
195
196 #[test]
197 fn html_entity_encode_basic() {
198 assert_eq!(html_entity_encode("A"), "A");
199 assert_eq!(html_entity_encode("AB"), "AB");
200 }
201
202 #[test]
203 fn html_entity_encode_special_chars() {
204 let encoded = html_entity_encode("<script>");
205 assert_eq!(encoded, "<script>");
206 }
207
208 #[test]
209 fn html_entity_decimal_encode_basic() {
210 assert_eq!(html_entity_decimal_encode("A"), "A");
211 assert_eq!(html_entity_decimal_encode("<"), "<");
212 }
213
214 #[test]
215 fn html_entity_encode_empty() {
216 assert_eq!(html_entity_encode(""), "");
217 }
218
219 #[test]
220 fn unicode_encode_empty() {
221 assert_eq!(unicode_encode(""), "");
222 }
223
224 #[test]
227 fn fullwidth_encode_sql_keywords() {
228 let encoded = fullwidth_encode("SELECT");
229 assert_eq!(encoded, "SELECT");
230 for ch in encoded.chars() {
232 assert!(
233 ch as u32 >= 0xFF01,
234 "expected fullwidth char, got {ch} (U+{:04X})",
235 ch as u32
236 );
237 }
238 }
239
240 #[test]
241 fn fullwidth_encode_spaces() {
242 let encoded = fullwidth_encode("A B");
243 assert!(
244 encoded.contains('\u{3000}'),
245 "space should become ideographic space"
246 );
247 }
248
249 #[test]
250 fn fullwidth_encode_preserves_non_ascii() {
251 let encoded = fullwidth_encode("日本語");
252 assert_eq!(encoded, "日本語", "non-ASCII should pass through unchanged");
253 }
254
255 #[test]
256 fn fullwidth_encode_operators() {
257 let encoded = fullwidth_encode("1=1");
258 assert_eq!(encoded, "1=1");
259 }
260
261 #[test]
262 fn fullwidth_encode_sqli_payload() {
263 let encoded = fullwidth_encode("' OR 1=1--");
264 assert!(!encoded.contains("OR"), "should not contain ASCII 'OR'");
266 assert!(encoded.contains("OR"), "should contain fullwidth 'OR'");
267 }
268
269 #[test]
270 fn fullwidth_encode_empty() {
271 assert_eq!(fullwidth_encode(""), "");
272 }
273
274 #[test]
277 fn homoglyph_replaces_quotes() {
278 let encoded = homoglyph_encode("' OR '1'='1");
279 assert!(
280 !encoded.contains('\''),
281 "ASCII single quote should be replaced"
282 );
283 assert!(
284 encoded.contains('\u{2019}'),
285 "should contain RIGHT SINGLE QUOTATION MARK"
286 );
287 }
288
289 #[test]
290 fn homoglyph_replaces_angle_brackets() {
291 let encoded = homoglyph_encode("<script>");
292 assert!(!encoded.contains('<'), "ASCII < should be replaced");
293 assert!(!encoded.contains('>'), "ASCII > should be replaced");
294 assert!(encoded.contains('\u{FF1C}'), "should contain fullwidth <");
295 assert!(encoded.contains('\u{FF1E}'), "should contain fullwidth >");
296 }
297
298 #[test]
299 fn homoglyph_replaces_equals() {
300 let encoded = homoglyph_encode("1=1");
301 assert!(!encoded.contains('='), "ASCII = should be replaced");
302 assert!(encoded.contains('\u{FF1D}'), "should contain fullwidth =");
303 }
304
305 #[test]
306 fn homoglyph_preserves_letters() {
307 let encoded = homoglyph_encode("SELECT");
308 assert_eq!(encoded, "SELECT", "letters should be preserved");
309 }
310
311 #[test]
312 fn homoglyph_encode_empty() {
313 assert_eq!(homoglyph_encode(""), "");
314 }
315
316 #[test]
317 fn homoglyph_replaces_parens() {
318 let encoded = homoglyph_encode("fn()");
319 assert!(encoded.contains('\u{FF08}'), "should contain fullwidth (");
320 assert!(encoded.contains('\u{FF09}'), "should contain fullwidth )");
321 }
322}