wafrift_encoding/encoding/
unicode.rs1use std::fmt::Write as _;
3
4#[must_use]
9pub fn unicode_encode(payload: &str) -> String {
10 let mut out = String::with_capacity(payload.len() * 6);
11 for ch in payload.chars() {
12 let _ = write!(&mut out, "\\u{:04X}", ch as u32);
13 }
14 out
15}
16
17#[must_use]
21pub fn iis_unicode_encode(payload: &str) -> String {
22 let mut out = String::with_capacity(payload.len() * 6);
23 for ch in payload.chars() {
24 let _ = write!(&mut out, "%u{:04X}", ch as u32);
25 }
26 out
27}
28
29#[must_use]
33pub fn json_string_encode(payload: &str) -> String {
34 let mut out = String::with_capacity(payload.len() * 2 + 2);
35 out.push('"');
36 for ch in payload.chars() {
37 match ch {
38 '\\' => out.push_str("\\\\"),
39 '"' => out.push_str("\\\""),
40 '\u{0008}' => out.push_str("\\b"),
41 '\u{000C}' => out.push_str("\\f"),
42 '\n' => out.push_str("\\n"),
43 '\r' => out.push_str("\\r"),
44 '\t' => out.push_str("\\t"),
45 c if (c as u32) < 0x20 => {
46 let _ = write!(&mut out, "\\u{:04X}", c as u32);
47 }
48 c => out.push(c),
49 }
50 }
51 out.push('"');
52 out
53}
54
55#[must_use]
59pub fn html_entity_encode(payload: &str) -> String {
60 let mut out = String::with_capacity(payload.len() * 6);
61 for ch in payload.chars() {
62 let _ = write!(&mut out, "&#x{:X};", ch as u32);
63 }
64 out
65}
66
67#[must_use]
71pub fn html_entity_decimal_encode(payload: &str) -> String {
72 let mut out = String::with_capacity(payload.len() * 6);
73 for ch in payload.chars() {
74 let _ = write!(&mut out, "&#{};", ch as u32);
75 }
76 out
77}
78
79#[must_use]
93pub fn fullwidth_encode(payload: &str) -> String {
94 let mut out = String::with_capacity(payload.len() * 3);
95 for ch in payload.chars() {
96 let mapped = match ch {
97 ' ' => '\u{3000}', c if ('\x21'..='\x7e').contains(&c) => {
99 char::from_u32(c as u32 + 0xFEE0).unwrap_or(c)
101 }
102 c => c,
103 };
104 out.push(mapped);
105 }
106 out
107}
108
109#[must_use]
120pub fn homoglyph_encode(payload: &str) -> String {
121 let mut out = String::with_capacity(payload.len() * 4);
122 for ch in payload.chars() {
123 let mapped = match ch {
124 '\'' => '\u{2019}', '"' => '\u{201D}', '<' => '\u{FF1C}', '>' => '\u{FF1E}', '=' => '\u{FF1D}', '(' => '\u{FF08}', ')' => '\u{FF09}', ';' => '\u{FF1B}', '-' => '\u{2010}', '/' => '\u{2215}', c => c,
139 };
140 out.push(mapped);
141 }
142 out
143}
144
145#[cfg(test)]
146mod tests {
147 use super::*;
148
149 #[test]
150 fn unicode_encode_basic() {
151 assert_eq!(unicode_encode("A"), "\\u0041");
152 assert_eq!(unicode_encode("AB"), "\\u0041\\u0042");
153 }
154
155 #[test]
156 fn unicode_encode_special_chars() {
157 let encoded = unicode_encode("' OR 1=1--");
158 assert!(encoded.contains("\\u0027")); assert!(encoded.contains("\\u003D")); }
161
162 #[test]
163 fn unicode_encode_unicode() {
164 let encoded = unicode_encode("日本語");
165 assert_eq!(encoded, "\\u65E5\\u672C\\u8A9E");
166 }
167
168 #[test]
169 fn iis_unicode_encode_basic() {
170 assert_eq!(iis_unicode_encode("A"), "%u0041");
171 assert_eq!(iis_unicode_encode("AB"), "%u0041%u0042");
172 }
173
174 #[test]
175 fn json_encode_basic() {
176 assert_eq!(json_string_encode("A"), "\"A\"");
177 assert_eq!(json_string_encode("A\\B"), "\"A\\\\B\"");
178 assert_eq!(json_string_encode("A\"B"), "\"A\\\"B\"");
179 assert_eq!(json_string_encode("A\nB"), "\"A\\nB\"");
180 }
181
182 #[test]
183 fn json_encode_control_chars() {
184 assert_eq!(json_string_encode("\x01"), "\"\\u0001\"");
185 }
186
187 #[test]
188 fn html_entity_encode_basic() {
189 assert_eq!(html_entity_encode("A"), "A");
190 assert_eq!(html_entity_encode("AB"), "AB");
191 }
192
193 #[test]
194 fn html_entity_encode_special_chars() {
195 let encoded = html_entity_encode("<script>");
196 assert_eq!(encoded, "<script>");
197 }
198
199 #[test]
200 fn html_entity_decimal_encode_basic() {
201 assert_eq!(html_entity_decimal_encode("A"), "A");
202 assert_eq!(html_entity_decimal_encode("<"), "<");
203 }
204
205 #[test]
206 fn html_entity_encode_empty() {
207 assert_eq!(html_entity_encode(""), "");
208 }
209
210 #[test]
211 fn unicode_encode_empty() {
212 assert_eq!(unicode_encode(""), "");
213 }
214
215 #[test]
218 fn fullwidth_encode_sql_keywords() {
219 let encoded = fullwidth_encode("SELECT");
220 assert_eq!(encoded, "SELECT");
221 for ch in encoded.chars() {
223 assert!(
224 ch as u32 >= 0xFF01,
225 "expected fullwidth char, got {ch} (U+{:04X})",
226 ch as u32
227 );
228 }
229 }
230
231 #[test]
232 fn fullwidth_encode_spaces() {
233 let encoded = fullwidth_encode("A B");
234 assert!(
235 encoded.contains('\u{3000}'),
236 "space should become ideographic space"
237 );
238 }
239
240 #[test]
241 fn fullwidth_encode_preserves_non_ascii() {
242 let encoded = fullwidth_encode("日本語");
243 assert_eq!(encoded, "日本語", "non-ASCII should pass through unchanged");
244 }
245
246 #[test]
247 fn fullwidth_encode_operators() {
248 let encoded = fullwidth_encode("1=1");
249 assert_eq!(encoded, "1=1");
250 }
251
252 #[test]
253 fn fullwidth_encode_sqli_payload() {
254 let encoded = fullwidth_encode("' OR 1=1--");
255 assert!(!encoded.contains("OR"), "should not contain ASCII 'OR'");
257 assert!(encoded.contains("OR"), "should contain fullwidth 'OR'");
258 }
259
260 #[test]
261 fn fullwidth_encode_empty() {
262 assert_eq!(fullwidth_encode(""), "");
263 }
264
265 #[test]
268 fn homoglyph_replaces_quotes() {
269 let encoded = homoglyph_encode("' OR '1'='1");
270 assert!(
271 !encoded.contains('\''),
272 "ASCII single quote should be replaced"
273 );
274 assert!(
275 encoded.contains('\u{2019}'),
276 "should contain RIGHT SINGLE QUOTATION MARK"
277 );
278 }
279
280 #[test]
281 fn homoglyph_replaces_angle_brackets() {
282 let encoded = homoglyph_encode("<script>");
283 assert!(!encoded.contains('<'), "ASCII < should be replaced");
284 assert!(!encoded.contains('>'), "ASCII > should be replaced");
285 assert!(encoded.contains('\u{FF1C}'), "should contain fullwidth <");
286 assert!(encoded.contains('\u{FF1E}'), "should contain fullwidth >");
287 }
288
289 #[test]
290 fn homoglyph_replaces_equals() {
291 let encoded = homoglyph_encode("1=1");
292 assert!(!encoded.contains('='), "ASCII = should be replaced");
293 assert!(encoded.contains('\u{FF1D}'), "should contain fullwidth =");
294 }
295
296 #[test]
297 fn homoglyph_preserves_letters() {
298 let encoded = homoglyph_encode("SELECT");
299 assert_eq!(encoded, "SELECT", "letters should be preserved");
300 }
301
302 #[test]
303 fn homoglyph_encode_empty() {
304 assert_eq!(homoglyph_encode(""), "");
305 }
306
307 #[test]
308 fn homoglyph_replaces_parens() {
309 let encoded = homoglyph_encode("fn()");
310 assert!(encoded.contains('\u{FF08}'), "should contain fullwidth (");
311 assert!(encoded.contains('\u{FF09}'), "should contain fullwidth )");
312 }
313}