sentinel_modsec/transformations/
normalize.rs

1//! Normalization transformations.
2
3use super::Transformation;
4use std::borrow::Cow;
5
6/// Lowercase transformation.
7pub struct Lowercase;
8
9impl Transformation for Lowercase {
10    fn transform<'a>(&self, input: &'a str) -> Cow<'a, str> {
11        let lower = input.to_lowercase();
12        if lower == input {
13            Cow::Borrowed(input)
14        } else {
15            Cow::Owned(lower)
16        }
17    }
18
19    fn name(&self) -> &'static str {
20        "lowercase"
21    }
22}
23
24/// Uppercase transformation.
25pub struct Uppercase;
26
27impl Transformation for Uppercase {
28    fn transform<'a>(&self, input: &'a str) -> Cow<'a, str> {
29        let upper = input.to_uppercase();
30        if upper == input {
31            Cow::Borrowed(input)
32        } else {
33            Cow::Owned(upper)
34        }
35    }
36
37    fn name(&self) -> &'static str {
38        "uppercase"
39    }
40}
41
42/// Compress whitespace transformation.
43pub struct CompressWhitespace;
44
45impl Transformation for CompressWhitespace {
46    fn transform<'a>(&self, input: &'a str) -> Cow<'a, str> {
47        let mut result = String::new();
48        let mut last_was_space = false;
49        let mut modified = false;
50
51        for c in input.chars() {
52            if c.is_whitespace() {
53                if !last_was_space {
54                    result.push(' ');
55                } else {
56                    modified = true;
57                }
58                last_was_space = true;
59            } else {
60                result.push(c);
61                last_was_space = false;
62            }
63        }
64
65        if modified || result.chars().any(|c| c.is_whitespace() && c != ' ') {
66            Cow::Owned(result)
67        } else if result == input {
68            Cow::Borrowed(input)
69        } else {
70            Cow::Owned(result)
71        }
72    }
73
74    fn name(&self) -> &'static str {
75        "compressWhitespace"
76    }
77}
78
79/// Remove whitespace transformation.
80pub struct RemoveWhitespace;
81
82impl Transformation for RemoveWhitespace {
83    fn transform<'a>(&self, input: &'a str) -> Cow<'a, str> {
84        let result: String = input.chars().filter(|c| !c.is_whitespace()).collect();
85        if result == input {
86            Cow::Borrowed(input)
87        } else {
88            Cow::Owned(result)
89        }
90    }
91
92    fn name(&self) -> &'static str {
93        "removeWhitespace"
94    }
95}
96
97/// Remove null bytes transformation.
98pub struct RemoveNulls;
99
100impl Transformation for RemoveNulls {
101    fn transform<'a>(&self, input: &'a str) -> Cow<'a, str> {
102        if !input.contains('\0') {
103            return Cow::Borrowed(input);
104        }
105        Cow::Owned(input.replace('\0', ""))
106    }
107
108    fn name(&self) -> &'static str {
109        "removeNulls"
110    }
111}
112
113/// Replace null bytes with spaces transformation.
114pub struct ReplaceNulls;
115
116impl Transformation for ReplaceNulls {
117    fn transform<'a>(&self, input: &'a str) -> Cow<'a, str> {
118        if !input.contains('\0') {
119            return Cow::Borrowed(input);
120        }
121        Cow::Owned(input.replace('\0', " "))
122    }
123
124    fn name(&self) -> &'static str {
125        "replaceNulls"
126    }
127}
128
129/// Trim transformation.
130pub struct Trim;
131
132impl Transformation for Trim {
133    fn transform<'a>(&self, input: &'a str) -> Cow<'a, str> {
134        let trimmed = input.trim();
135        if trimmed.len() == input.len() {
136            Cow::Borrowed(input)
137        } else {
138            Cow::Owned(trimmed.to_string())
139        }
140    }
141
142    fn name(&self) -> &'static str {
143        "trim"
144    }
145}
146
147/// Trim left transformation.
148pub struct TrimLeft;
149
150impl Transformation for TrimLeft {
151    fn transform<'a>(&self, input: &'a str) -> Cow<'a, str> {
152        let trimmed = input.trim_start();
153        if trimmed.len() == input.len() {
154            Cow::Borrowed(input)
155        } else {
156            Cow::Owned(trimmed.to_string())
157        }
158    }
159
160    fn name(&self) -> &'static str {
161        "trimLeft"
162    }
163}
164
165/// Trim right transformation.
166pub struct TrimRight;
167
168impl Transformation for TrimRight {
169    fn transform<'a>(&self, input: &'a str) -> Cow<'a, str> {
170        let trimmed = input.trim_end();
171        if trimmed.len() == input.len() {
172            Cow::Borrowed(input)
173        } else {
174            Cow::Owned(trimmed.to_string())
175        }
176    }
177
178    fn name(&self) -> &'static str {
179        "trimRight"
180    }
181}
182
183/// Normalize path transformation (Unix-style).
184pub struct NormalizePath;
185
186impl Transformation for NormalizePath {
187    fn transform<'a>(&self, input: &'a str) -> Cow<'a, str> {
188        let mut result = String::new();
189        let mut modified = false;
190
191        // Replace backslashes with forward slashes
192        let normalized = if input.contains('\\') {
193            modified = true;
194            Cow::Owned(input.replace('\\', "/"))
195        } else {
196            Cow::Borrowed(input)
197        };
198
199        // Collapse multiple slashes
200        let mut last_was_slash = false;
201        for c in normalized.chars() {
202            if c == '/' {
203                if !last_was_slash {
204                    result.push('/');
205                } else {
206                    modified = true;
207                }
208                last_was_slash = true;
209            } else {
210                result.push(c);
211                last_was_slash = false;
212            }
213        }
214
215        // Remove . and .. components
216        let parts: Vec<&str> = result.split('/').collect();
217        let mut stack: Vec<&str> = Vec::new();
218
219        for part in parts {
220            match part {
221                "." => {
222                    modified = true;
223                }
224                ".." => {
225                    modified = true;
226                    stack.pop();
227                }
228                "" if !stack.is_empty() => {
229                    // Keep leading empty string for absolute paths
230                }
231                other => {
232                    stack.push(other);
233                }
234            }
235        }
236
237        if modified {
238            Cow::Owned(stack.join("/"))
239        } else {
240            Cow::Borrowed(input)
241        }
242    }
243
244    fn name(&self) -> &'static str {
245        "normalizePath"
246    }
247}
248
249/// Normalize path transformation (Windows-style).
250pub struct NormalizePathWin;
251
252impl Transformation for NormalizePathWin {
253    fn transform<'a>(&self, input: &'a str) -> Cow<'a, str> {
254        // Same as NormalizePath but preserves backslashes
255        let np = NormalizePath;
256        let result = np.transform(input);
257        // Convert back to backslashes
258        if result.contains('/') {
259            Cow::Owned(result.replace('/', "\\"))
260        } else {
261            result
262        }
263    }
264
265    fn name(&self) -> &'static str {
266        "normalizePathWin"
267    }
268}
269
270/// Remove comments transformation.
271pub struct RemoveComments;
272
273impl Transformation for RemoveComments {
274    fn transform<'a>(&self, input: &'a str) -> Cow<'a, str> {
275        let mut result = String::new();
276        let mut in_comment = false;
277        let mut chars = input.chars().peekable();
278
279        while let Some(c) = chars.next() {
280            if in_comment {
281                if c == '*' && chars.peek() == Some(&'/') {
282                    chars.next();
283                    in_comment = false;
284                }
285            } else if c == '/' && chars.peek() == Some(&'*') {
286                chars.next();
287                in_comment = true;
288            } else {
289                result.push(c);
290            }
291        }
292
293        if result == input {
294            Cow::Borrowed(input)
295        } else {
296            Cow::Owned(result)
297        }
298    }
299
300    fn name(&self) -> &'static str {
301        "removeComments"
302    }
303}
304
305/// Replace comments transformation (replaces /* ... */ with space).
306pub struct ReplaceComments;
307
308impl Transformation for ReplaceComments {
309    fn transform<'a>(&self, input: &'a str) -> Cow<'a, str> {
310        let mut result = String::new();
311        let mut in_comment = false;
312        let mut chars = input.chars().peekable();
313        let mut modified = false;
314
315        while let Some(c) = chars.next() {
316            if in_comment {
317                if c == '*' && chars.peek() == Some(&'/') {
318                    chars.next();
319                    in_comment = false;
320                    result.push(' '); // Replace comment with space
321                }
322            } else if c == '/' && chars.peek() == Some(&'*') {
323                chars.next();
324                in_comment = true;
325                modified = true;
326            } else {
327                result.push(c);
328            }
329        }
330
331        if modified {
332            Cow::Owned(result)
333        } else {
334            Cow::Borrowed(input)
335        }
336    }
337
338    fn name(&self) -> &'static str {
339        "replaceComments"
340    }
341}
342
343/// Remove comment characters transformation.
344pub struct RemoveCommentsChar;
345
346impl Transformation for RemoveCommentsChar {
347    fn transform<'a>(&self, input: &'a str) -> Cow<'a, str> {
348        // Remove /*, */, --, and #
349        let mut result = input.to_string();
350        result = result.replace("/*", "");
351        result = result.replace("*/", "");
352        result = result.replace("--", "");
353        result = result.replace('#', "");
354
355        if result == input {
356            Cow::Borrowed(input)
357        } else {
358            Cow::Owned(result)
359        }
360    }
361
362    fn name(&self) -> &'static str {
363        "removeCommentsChar"
364    }
365}
366
367/// SQL hex decode transformation.
368pub struct SqlHexDecode;
369
370impl Transformation for SqlHexDecode {
371    fn transform<'a>(&self, input: &'a str) -> Cow<'a, str> {
372        // Decode SQL hex strings like 0x41424344 to ABCD
373        let mut result = String::new();
374        let mut chars = input.chars().peekable();
375        let mut modified = false;
376
377        while let Some(c) = chars.next() {
378            if c == '0' && chars.peek() == Some(&'x') {
379                chars.next(); // consume 'x'
380                let mut hex = String::new();
381                while let Some(&next) = chars.peek() {
382                    if next.is_ascii_hexdigit() {
383                        hex.push(chars.next().unwrap());
384                    } else {
385                        break;
386                    }
387                }
388                // Decode hex pairs
389                let mut i = 0;
390                while i + 1 < hex.len() {
391                    if let Ok(byte) = u8::from_str_radix(&hex[i..i+2], 16) {
392                        result.push(byte as char);
393                    }
394                    i += 2;
395                }
396                modified = true;
397            } else {
398                result.push(c);
399            }
400        }
401
402        if modified {
403            Cow::Owned(result)
404        } else {
405            Cow::Borrowed(input)
406        }
407    }
408
409    fn name(&self) -> &'static str {
410        "sqlHexDecode"
411    }
412}
413
414/// UTF-8 to Unicode transformation (pass-through for now).
415pub struct Utf8ToUnicode;
416
417impl Transformation for Utf8ToUnicode {
418    fn transform<'a>(&self, input: &'a str) -> Cow<'a, str> {
419        // In Rust strings are already UTF-8, this is a no-op
420        Cow::Borrowed(input)
421    }
422
423    fn name(&self) -> &'static str {
424        "utf8ToUnicode"
425    }
426}
427
428/// Command line normalization transformation.
429pub struct CmdLine;
430
431impl Transformation for CmdLine {
432    fn transform<'a>(&self, input: &'a str) -> Cow<'a, str> {
433        let mut result = String::new();
434        let mut modified = false;
435
436        for c in input.chars() {
437            match c {
438                // Replace with space
439                ',' | ';' | '\'' | '"' | '`' => {
440                    result.push(' ');
441                    modified = true;
442                }
443                // Remove caret (Windows escape)
444                '^' => {
445                    modified = true;
446                }
447                // Lowercase
448                c if c.is_ascii_uppercase() => {
449                    result.push(c.to_ascii_lowercase());
450                    modified = true;
451                }
452                _ => {
453                    result.push(c);
454                }
455            }
456        }
457
458        // Compress whitespace
459        let compressed: String = result
460            .split_whitespace()
461            .collect::<Vec<_>>()
462            .join(" ");
463
464        if modified || compressed != result {
465            Cow::Owned(compressed)
466        } else {
467            Cow::Borrowed(input)
468        }
469    }
470
471    fn name(&self) -> &'static str {
472        "cmdLine"
473    }
474}
475
476/// Escape sequence decode transformation.
477pub struct EscapeSeqDecode;
478
479impl Transformation for EscapeSeqDecode {
480    fn transform<'a>(&self, input: &'a str) -> Cow<'a, str> {
481        // Decode escape sequences like \n, \r, \t, \xHH, \uHHHH
482        let mut result = String::new();
483        let mut chars = input.chars().peekable();
484        let mut modified = false;
485
486        while let Some(c) = chars.next() {
487            if c == '\\' {
488                if let Some(&next) = chars.peek() {
489                    modified = true;
490                    chars.next();
491                    match next {
492                        'n' => result.push('\n'),
493                        'r' => result.push('\r'),
494                        't' => result.push('\t'),
495                        '\\' => result.push('\\'),
496                        '0' => result.push('\0'),
497                        'x' => {
498                            // Hex escape \xHH
499                            let mut hex = String::new();
500                            for _ in 0..2 {
501                                if let Some(&h) = chars.peek() {
502                                    if h.is_ascii_hexdigit() {
503                                        hex.push(chars.next().unwrap());
504                                    } else {
505                                        break;
506                                    }
507                                }
508                            }
509                            if let Ok(byte) = u8::from_str_radix(&hex, 16) {
510                                result.push(byte as char);
511                            } else {
512                                result.push('x');
513                                result.push_str(&hex);
514                            }
515                        }
516                        'u' => {
517                            // Unicode escape \uHHHH
518                            let mut hex = String::new();
519                            for _ in 0..4 {
520                                if let Some(&h) = chars.peek() {
521                                    if h.is_ascii_hexdigit() {
522                                        hex.push(chars.next().unwrap());
523                                    } else {
524                                        break;
525                                    }
526                                }
527                            }
528                            if let Ok(code) = u32::from_str_radix(&hex, 16) {
529                                if let Some(c) = char::from_u32(code) {
530                                    result.push(c);
531                                } else {
532                                    result.push('u');
533                                    result.push_str(&hex);
534                                }
535                            } else {
536                                result.push('u');
537                                result.push_str(&hex);
538                            }
539                        }
540                        _ => {
541                            result.push('\\');
542                            result.push(next);
543                        }
544                    }
545                } else {
546                    result.push(c);
547                }
548            } else {
549                result.push(c);
550            }
551        }
552
553        if modified {
554            Cow::Owned(result)
555        } else {
556            Cow::Borrowed(input)
557        }
558    }
559
560    fn name(&self) -> &'static str {
561        "escapeSeqDecode"
562    }
563}
564
565/// SHA-256 hash transformation.
566pub struct Sha256;
567
568impl Transformation for Sha256 {
569    fn transform<'a>(&self, input: &'a str) -> Cow<'a, str> {
570        use sha2::{Digest, Sha256 as Sha256Hasher};
571        let mut hasher = Sha256Hasher::new();
572        hasher.update(input.as_bytes());
573        let result = hasher.finalize();
574        Cow::Owned(hex::encode(result))
575    }
576
577    fn name(&self) -> &'static str {
578        "sha256"
579    }
580}
581
582#[cfg(test)]
583mod tests {
584    use super::*;
585
586    #[test]
587    fn test_lowercase() {
588        let t = Lowercase;
589        assert_eq!(t.transform("Hello World"), "hello world");
590        assert_eq!(t.transform("already lower"), "already lower");
591    }
592
593    #[test]
594    fn test_compress_whitespace() {
595        let t = CompressWhitespace;
596        assert_eq!(t.transform("hello   world"), "hello world");
597        assert_eq!(t.transform("a\t\nb"), "a b");
598    }
599
600    #[test]
601    fn test_remove_whitespace() {
602        let t = RemoveWhitespace;
603        assert_eq!(t.transform("hello world"), "helloworld");
604    }
605
606    #[test]
607    fn test_normalize_path() {
608        let t = NormalizePath;
609        assert_eq!(t.transform("/a/b/../c"), "/a/c");
610        assert_eq!(t.transform("/a//b/./c"), "/a/b/c");
611        assert_eq!(t.transform("a\\b\\c"), "a/b/c");
612    }
613
614    #[test]
615    fn test_cmdline() {
616        let t = CmdLine;
617        // Semicolon replaced with space, uppercase to lowercase
618        assert_eq!(t.transform("CMD;/C"), "cmd /c");
619        // Caret is the Windows escape character - it's simply removed
620        assert_eq!(t.transform("echo^hello"), "echohello");
621        // Multiple transformations
622        assert_eq!(t.transform("CMD,/C;DIR"), "cmd /c dir");
623    }
624}