Skip to main content

redstr/transformations/
encoding.rs

1use crate::rng::SimpleRng;
2
3/// Encodes characters using mixed encoding formats (HTML entities, Unicode escapes).
4///
5/// Randomly encodes each character using one of four formats: plain text,
6/// hexadecimal HTML entity (`&#x...;`), decimal HTML entity (`&#...;`),
7/// or Unicode escape (`\u{...}`). This mixed approach can bypass filters
8/// that only detect specific encoding formats.
9///
10/// # Use Cases
11///
12/// - **XSS Testing**: Bypass filters that don't handle all encoding formats
13/// - **Red Team**: Evade detection systems with mixed encoding
14/// - **Blue Team**: Test encoding normalization and parser robustness
15///
16/// # Examples
17///
18/// ```
19/// use redstr::mixed_encoding;
20///
21/// let result = mixed_encoding("test");
22/// // Example output: "tes\u{0074}" (varies each run)
23/// assert!(result.contains("&#") || result.contains("\\u"));
24///
25/// // XSS payload with mixed encoding
26/// let xss = mixed_encoding("<script>");
27/// // Example: "&#x3c;s&#99;r\u{0069}pt&#x3e;"
28/// ```
29pub fn mixed_encoding(input: &str) -> String {
30    let mut rng = SimpleRng::new();
31    let mut result = String::with_capacity(input.len() * 8); // Encoded chars are longer
32
33    for c in input.chars() {
34        match rng.next_u64() % 4 {
35            0 => result.push(c),
36            1 => result.push_str(&format!("&#x{:x};", c as u32)),
37            2 => result.push_str(&format!("&#{};", c as u32)),
38            _ => result.push_str(&format!("\\u{{{:04x}}}", c as u32)),
39        }
40    }
41    result
42}
43
44/// Encodes text to Base64.
45///
46/// Converts input text to Base64 encoding using the standard RFC 4648 alphabet.
47/// This is a lossless encoding that increases the string length by approximately
48/// 33% and is commonly used for transmitting binary data or obfuscating payloads.
49///
50/// # Use Cases
51///
52/// - **Red Team**: Obfuscate command payloads to evade detection
53/// - **Data Transmission**: Safely encode binary data in text formats
54/// - **Blue Team**: Test if security systems properly decode Base64
55/// - **API Testing**: Encode credentials or tokens
56///
57/// # Examples
58///
59/// ```
60/// use redstr::base64_encode;
61///
62/// assert_eq!(base64_encode("hello"), "aGVsbG8=");
63/// assert_eq!(base64_encode("test"), "dGVzdA==");
64///
65/// // Obfuscate shell commands
66/// let cmd = base64_encode("rm -rf /tmp/*");
67/// assert_eq!(cmd, "cm0gLXJmIC90bXAvKg==");
68///
69/// // Encode credentials
70/// let auth = base64_encode("username:password");
71/// // Use in Authorization: Basic header
72/// ```
73pub fn base64_encode(input: &str) -> String {
74    const BASE64_CHARS: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
75    let bytes = input.as_bytes();
76    let capacity = bytes.len().div_ceil(3) * 4; // Base64 expands by ~33%
77    let mut result = String::with_capacity(capacity);
78
79    for chunk in bytes.chunks(3) {
80        let mut buf = [0u8; 3];
81        for (i, &byte) in chunk.iter().enumerate() {
82            buf[i] = byte;
83        }
84
85        let b1 = (buf[0] >> 2) as usize;
86        let b2 = (((buf[0] & 0x03) << 4) | (buf[1] >> 4)) as usize;
87        let b3 = (((buf[1] & 0x0F) << 2) | (buf[2] >> 6)) as usize;
88        let b4 = (buf[2] & 0x3F) as usize;
89
90        result.push(BASE64_CHARS[b1] as char);
91        result.push(BASE64_CHARS[b2] as char);
92        result.push(if chunk.len() > 1 {
93            BASE64_CHARS[b3] as char
94        } else {
95            '='
96        });
97        result.push(if chunk.len() > 2 {
98            BASE64_CHARS[b4] as char
99        } else {
100            '='
101        });
102    }
103
104    result
105}
106
107/// Encodes text with URL/percent encoding (RFC 3986).
108///
109/// Converts characters to percent-encoded format (`%XX`) where unreserved
110/// characters (A-Z, a-z, 0-9, `-`, `_`, `.`, `~`) remain unchanged. Properly
111/// handles multi-byte UTF-8 characters by encoding each byte separately.
112///
113/// # Use Cases
114///
115/// - **URL Construction**: Safely encode query parameters and path segments
116/// - **Red Team**: Bypass input filters with encoded payloads
117/// - **API Testing**: Encode special characters in HTTP requests
118/// - **Blue Team**: Test URL parser and decoder implementations
119///
120/// # Examples
121///
122/// ```
123/// use redstr::url_encode;
124///
125/// assert_eq!(url_encode("hello world"), "hello%20world");
126/// assert_eq!(url_encode("user@example.com"), "user%40example.com");
127///
128/// // XSS payload encoding
129/// let xss = url_encode("<script>alert(1)</script>");
130/// // Output: "%3Cscript%3Ealert%281%29%3C%2Fscript%3E"
131///
132/// // SQL injection encoding
133/// let sql = url_encode("' OR '1'='1");
134/// // Output: "%27%20OR%20%271%27%3D%271"
135/// ```
136pub fn url_encode(input: &str) -> String {
137    let mut result = String::with_capacity(input.len() * 3); // URL encoding can triple size
138    for c in input.chars() {
139        if c.is_ascii_alphanumeric() || c == '-' || c == '_' || c == '.' || c == '~' {
140            result.push(c);
141        } else {
142            // Properly encode multi-byte UTF-8 characters
143            let mut buf = [0; 4];
144            let encoded = c.encode_utf8(&mut buf);
145            for byte in encoded.bytes() {
146                result.push_str(&format!("%{:02X}", byte));
147            }
148        }
149    }
150    result
151}
152
153/// Encodes text to hexadecimal representation (lowercase).
154///
155/// Converts each byte to a two-character lowercase hexadecimal string.
156/// The output contains only characters 0-9 and a-f. This is a common
157/// encoding format for binary data and raw byte representations.
158///
159/// # Use Cases
160///
161/// - **Binary Data**: Display binary content as readable hex
162/// - **Debugging**: View exact byte values in strings
163/// - **Red Team**: Encode shellcode or binary payloads
164/// - **Cryptography**: Display hash values and signatures
165///
166/// # Examples
167///
168/// ```
169/// use redstr::hex_encode;
170///
171/// assert_eq!(hex_encode("test"), "74657374");
172/// assert_eq!(hex_encode("AB"), "4142");
173/// assert_eq!(hex_encode("hello"), "68656c6c6f");
174///
175/// // Encode shellcode
176/// let shellcode = vec![0x90, 0x90, 0xc3]; // NOP NOP RET
177/// // Would encode to: "9090c3"
178/// ```
179pub fn hex_encode(input: &str) -> String {
180    input.bytes().fold(String::new(), |mut acc, b| {
181        use std::fmt::Write;
182        write!(&mut acc, "{:02x}", b).unwrap();
183        acc
184    })
185}
186
187/// Encodes text with mixed hexadecimal formats (0x, \x, %, &#x).
188///
189/// Randomly encodes each byte using one of four hex formats: C-style escape
190/// (`\x`), URL encoding (`%`), hex literal (`0x`), or HTML entity (`&#x;`).
191/// This format mixing can evade detection systems that pattern-match specific
192/// encoding styles.
193///
194/// # Use Cases
195///
196/// - **Red Team**: Bypass detection with varied encoding formats
197/// - **XSS/SQLi Testing**: Evade filters that only recognize one format
198/// - **Blue Team**: Test encoder/decoder robustness across formats
199///
200/// # Examples
201///
202/// ```
203/// use redstr::hex_encode_mixed;
204///
205/// let result = hex_encode_mixed("AB");
206/// // Example output: "\x41%42" or "0x41&#x42;" (varies each run)
207/// assert!(result.len() >= 2);
208///
209/// // Mixed format payload obfuscation
210/// let payload = hex_encode_mixed("<script>");
211/// // Example: "\x3c%73&#x63;0x72\x69%70&#x74;\x3e"
212/// ```
213pub fn hex_encode_mixed(input: &str) -> String {
214    let mut rng = SimpleRng::new();
215
216    input
217        .bytes()
218        .map(|b| match rng.next_u64() % 4 {
219            0 => format!("\\x{:02x}", b),
220            1 => format!("%{:02x}", b),
221            2 => format!("0x{:02x}", b),
222            _ => format!("&#x{:02x};", b),
223        })
224        .collect()
225}
226
227/// Encodes text using various HTML entity formats.
228///
229/// Randomly encodes characters using plain text, decimal entities (`&#...;`),
230/// hexadecimal entities (`&#x...;`), or named entities (`&lt;`, `&gt;`, etc.).
231/// This mixed approach tests HTML parser robustness and can bypass filters.
232///
233/// # Use Cases
234///
235/// - **XSS Testing**: Bypass HTML sanitizers with entity encoding
236/// - **Red Team**: Evade WAF rules that look for literal characters
237/// - **Blue Team**: Test HTML entity decoder implementations
238/// - **Web Scraping**: Handle various entity encoding formats
239///
240/// # Examples
241///
242/// ```
243/// use redstr::html_entity_encode;
244///
245/// let result = html_entity_encode("<script>");
246/// // Example: "&lt;&#115;&#x63;r&#105;pt&gt;" (varies each run)
247///
248/// // XSS payload with entity encoding
249/// let xss = html_entity_encode("<img src=x onerror=alert(1)>");
250/// // Bypasses filters looking for literal "<" and ">"
251///
252/// // Special character encoding
253/// let special = html_entity_encode("A&B<C>D");
254/// // Example: "A&amp;B&lt;C&gt;D"
255/// ```
256pub fn html_entity_encode(input: &str) -> String {
257    let mut rng = SimpleRng::new();
258    let mut result = String::new();
259
260    for c in input.chars() {
261        match rng.next_u64() % 4 {
262            0 => result.push(c),
263            1 => result.push_str(&format!("&#{};", c as u32)),
264            2 => result.push_str(&format!("&#x{:X};", c as u32)),
265            _ => {
266                // Named entities for common characters
267                match c {
268                    '<' => result.push_str("&lt;"),
269                    '>' => result.push_str("&gt;"),
270                    '&' => result.push_str("&amp;"),
271                    '"' => result.push_str("&quot;"),
272                    '\'' => result.push_str("&apos;"),
273                    ' ' => result.push_str("&nbsp;"),
274                    _ => result.push_str(&format!("&#{};", c as u32)),
275                }
276            }
277        }
278    }
279
280    result
281}
282
283#[cfg(test)]
284mod tests {
285    use super::*;
286
287    #[test]
288    fn test_base64_encode() {
289        assert_eq!(base64_encode("hello"), "aGVsbG8=");
290        assert_eq!(base64_encode("test"), "dGVzdA==");
291        assert_eq!(base64_encode("a"), "YQ==");
292    }
293
294    #[test]
295    fn test_base64_encode_empty_string() {
296        assert_eq!(base64_encode(""), "");
297    }
298
299    #[test]
300    fn test_base64_encode_single_char() {
301        let result = base64_encode("A");
302        assert_eq!(result, "QQ==");
303    }
304
305    #[test]
306    fn test_base64_encode_numbers() {
307        assert_eq!(base64_encode("123"), "MTIz");
308    }
309
310    #[test]
311    fn test_base64_encode_special_chars() {
312        let result = base64_encode("!@#");
313        assert!(!result.is_empty());
314    }
315
316    #[test]
317    fn test_base64_encode_long_string() {
318        let input = "The quick brown fox jumps over the lazy dog";
319        let result = base64_encode(input);
320        assert!(!result.is_empty());
321        assert!(result.len() > input.len());
322    }
323
324    #[test]
325    fn test_base64_encode_unicode() {
326        let result = base64_encode("hello 世界");
327        assert!(!result.is_empty());
328    }
329
330    #[test]
331    fn test_base64_encode_binary_data() {
332        let result = base64_encode("\x00\x01\x02\x03");
333        assert!(!result.is_empty());
334    }
335
336    #[test]
337    fn test_base64_encode_padding() {
338        assert!(base64_encode("a").ends_with("=="));
339        assert!(base64_encode("ab").ends_with("="));
340        assert!(!base64_encode("abc").ends_with("="));
341    }
342
343    #[test]
344    fn test_base64_encode_whitespace() {
345        let result = base64_encode("hello world");
346        assert!(!result.is_empty());
347    }
348
349    #[test]
350    fn test_base64_encode_sql_injection() {
351        let result = base64_encode("SELECT * FROM users");
352        assert!(!result.is_empty());
353    }
354
355    #[test]
356    fn test_url_encode() {
357        let result = url_encode("hello world");
358        assert!(result.contains("%20"));
359
360        let result2 = url_encode("test@example.com");
361        assert!(result2.contains("%40"));
362    }
363
364    #[test]
365    fn test_url_encode_empty_string() {
366        assert_eq!(url_encode(""), "");
367    }
368
369    #[test]
370    fn test_url_encode_alphanumeric() {
371        assert_eq!(url_encode("abc123"), "abc123");
372    }
373
374    #[test]
375    fn test_url_encode_special_chars() {
376        let result = url_encode("!@#$%^&*()");
377        assert!(result.contains("%"));
378    }
379
380    #[test]
381    fn test_url_encode_path() {
382        let result = url_encode("/path/to/file");
383        assert!(result.contains("%2F"));
384    }
385
386    #[test]
387    fn test_url_encode_query_string() {
388        let result = url_encode("key=value&foo=bar");
389        assert!(result.contains("%3D"));
390        assert!(result.contains("%26"));
391    }
392
393    #[test]
394    fn test_url_encode_unicode() {
395        let result = url_encode("hello 世界");
396        assert!(result.contains("%"));
397    }
398
399    #[test]
400    fn test_url_encode_plus_sign() {
401        let result = url_encode("a+b");
402        assert!(result.contains("%2B"));
403    }
404
405    #[test]
406    fn test_url_encode_slash() {
407        let result = url_encode("a/b");
408        assert!(result.contains("%2F"));
409    }
410
411    #[test]
412    fn test_url_encode_question_mark() {
413        let result = url_encode("what?");
414        assert!(result.contains("%3F"));
415    }
416
417    #[test]
418    fn test_url_encode_hash() {
419        let result = url_encode("#anchor");
420        assert!(result.contains("%23"));
421    }
422
423    #[test]
424    fn test_hex_encode() {
425        assert_eq!(hex_encode("test"), "74657374");
426        assert_eq!(hex_encode("ab"), "6162");
427    }
428
429    #[test]
430    fn test_hex_encode_empty_string() {
431        assert_eq!(hex_encode(""), "");
432    }
433
434    #[test]
435    fn test_hex_encode_single_char() {
436        assert_eq!(hex_encode("A"), "41");
437    }
438
439    #[test]
440    fn test_hex_encode_numbers() {
441        assert_eq!(hex_encode("123"), "313233");
442    }
443
444    #[test]
445    fn test_hex_encode_special_chars() {
446        let result = hex_encode("!@#");
447        assert!(!result.is_empty());
448        assert_eq!(result.len(), 6); // 3 chars * 2 hex digits
449    }
450
451    #[test]
452    fn test_hex_encode_whitespace() {
453        let result = hex_encode(" ");
454        assert_eq!(result, "20");
455    }
456
457    #[test]
458    fn test_hex_encode_lowercase() {
459        let result = hex_encode("abc");
460        assert_eq!(result, "616263");
461        assert!(!result.contains("A")); // Should be lowercase hex
462    }
463
464    #[test]
465    fn test_hex_encode_uppercase() {
466        let result = hex_encode("ABC");
467        assert_eq!(result, "414243");
468    }
469
470    #[test]
471    fn test_hex_encode_mixed_case_input() {
472        let result = hex_encode("Hello!");
473        assert!(!result.is_empty());
474        assert_eq!(result.len(), 12); // 6 chars * 2 hex digits
475    }
476
477    #[test]
478    fn test_hex_encode_newline() {
479        let result = hex_encode("\n");
480        assert_eq!(result, "0a");
481    }
482
483    #[test]
484    fn test_hex_encode_mixed() {
485        let result = hex_encode_mixed("ab");
486        // Should contain hexadecimal encoding
487        assert!(result.len() > 2);
488    }
489
490    #[test]
491    fn test_hex_encode_mixed_empty_string() {
492        let result = hex_encode_mixed("");
493        assert_eq!(result, "");
494    }
495
496    #[test]
497    fn test_hex_encode_mixed_single_char() {
498        let result = hex_encode_mixed("A");
499        assert!(!result.is_empty());
500    }
501
502    #[test]
503    fn test_hex_encode_mixed_formats() {
504        let result = hex_encode_mixed("test");
505        // Should contain mix of formats: \x, %, 0x, &#x
506        assert!(!result.is_empty());
507    }
508
509    #[test]
510    fn test_hex_encode_mixed_special_chars() {
511        let result = hex_encode_mixed("!@#");
512        assert!(!result.is_empty());
513    }
514
515    #[test]
516    fn test_hex_encode_mixed_numbers() {
517        let result = hex_encode_mixed("123");
518        assert!(!result.is_empty());
519    }
520
521    #[test]
522    fn test_hex_encode_mixed_whitespace() {
523        let result = hex_encode_mixed("a b");
524        assert!(!result.is_empty());
525    }
526
527    #[test]
528    fn test_hex_encode_mixed_long_string() {
529        let result = hex_encode_mixed("hello world");
530        assert!(!result.is_empty());
531    }
532
533    #[test]
534    fn test_hex_encode_mixed_xss_payload() {
535        let result = hex_encode_mixed("<script>");
536        assert!(!result.is_empty());
537    }
538
539    #[test]
540    fn test_hex_encode_mixed_preserves_content() {
541        let result = hex_encode_mixed("abc");
542        // Result should be longer due to encoding
543        assert!(result.len() >= 3);
544    }
545
546    #[test]
547    fn test_html_entity_encode() {
548        let result = html_entity_encode("test");
549        assert!(result.len() >= 4);
550    }
551
552    #[test]
553    fn test_html_entity_encode_empty_string() {
554        let result = html_entity_encode("");
555        assert_eq!(result, "");
556    }
557
558    #[test]
559    fn test_html_entity_encode_angle_brackets() {
560        let result = html_entity_encode("<>");
561        assert!(result.contains("lt") || result.contains("gt") || result.contains("&#"));
562    }
563
564    #[test]
565    fn test_html_entity_encode_ampersand() {
566        let result = html_entity_encode("&");
567        // Can be plain "&", "&amp;", "&#38;", or "&#x26;"
568        assert!(result.contains("amp") || result.contains("&#") || result == "&");
569    }
570
571    #[test]
572    fn test_html_entity_encode_quotes() {
573        let result = html_entity_encode("\"'");
574        assert!(!result.is_empty());
575    }
576
577    #[test]
578    fn test_html_entity_encode_space() {
579        let result = html_entity_encode(" ");
580        assert!(!result.is_empty());
581    }
582
583    #[test]
584    fn test_html_entity_encode_xss_payload() {
585        let result = html_entity_encode("<script>alert(1)</script>");
586        assert!(!result.is_empty());
587    }
588
589    #[test]
590    fn test_html_entity_encode_mixed_content() {
591        let result = html_entity_encode("Hello <world> & \"test\"");
592        assert!(!result.is_empty());
593    }
594
595    #[test]
596    fn test_html_entity_encode_numbers() {
597        let result = html_entity_encode("123");
598        assert!(!result.is_empty());
599    }
600
601    #[test]
602    fn test_html_entity_encode_special_chars() {
603        let result = html_entity_encode("!@#$%");
604        assert!(!result.is_empty());
605    }
606
607    #[test]
608    fn test_html_entity_encode_unicode() {
609        let result = html_entity_encode("♥");
610        assert!(!result.is_empty());
611    }
612}