sentinel_modsec/transformations/
decode.rs

1//! Decoding transformations.
2
3use super::Transformation;
4use std::borrow::Cow;
5
6/// URL decode transformation.
7pub struct UrlDecode;
8
9impl Transformation for UrlDecode {
10    fn transform<'a>(&self, input: &'a str) -> Cow<'a, str> {
11        match percent_encoding::percent_decode_str(input).decode_utf8() {
12            Ok(decoded) => {
13                if decoded == input {
14                    Cow::Borrowed(input)
15                } else {
16                    Cow::Owned(decoded.into_owned())
17                }
18            }
19            Err(_) => Cow::Borrowed(input),
20        }
21    }
22
23    fn name(&self) -> &'static str {
24        "urlDecode"
25    }
26}
27
28/// URL decode with Unicode support.
29pub struct UrlDecodeUni;
30
31impl Transformation for UrlDecodeUni {
32    fn transform<'a>(&self, input: &'a str) -> Cow<'a, str> {
33        // Handle %uXXXX unicode escapes in addition to standard URL encoding
34        let mut result = String::new();
35        let mut chars = input.chars().peekable();
36        let mut modified = false;
37
38        while let Some(c) = chars.next() {
39            if c == '%' {
40                if chars.peek() == Some(&'u') || chars.peek() == Some(&'U') {
41                    chars.next(); // consume 'u'
42                    // Read 4 hex digits
43                    let mut hex = String::new();
44                    for _ in 0..4 {
45                        if let Some(h) = chars.next() {
46                            hex.push(h);
47                        }
48                    }
49                    if let Ok(code) = u32::from_str_radix(&hex, 16) {
50                        if let Some(decoded) = char::from_u32(code) {
51                            result.push(decoded);
52                            modified = true;
53                            continue;
54                        }
55                    }
56                    result.push('%');
57                    result.push('u');
58                    result.push_str(&hex);
59                } else {
60                    // Standard URL encoding
61                    let mut hex = String::new();
62                    for _ in 0..2 {
63                        if let Some(h) = chars.next() {
64                            hex.push(h);
65                        }
66                    }
67                    if let Ok(byte) = u8::from_str_radix(&hex, 16) {
68                        result.push(byte as char);
69                        modified = true;
70                    } else {
71                        result.push('%');
72                        result.push_str(&hex);
73                    }
74                }
75            } else {
76                result.push(c);
77            }
78        }
79
80        if modified {
81            Cow::Owned(result)
82        } else {
83            Cow::Borrowed(input)
84        }
85    }
86
87    fn name(&self) -> &'static str {
88        "urlDecodeUni"
89    }
90}
91
92/// Base64 decode transformation.
93pub struct Base64Decode;
94
95impl Transformation for Base64Decode {
96    fn transform<'a>(&self, input: &'a str) -> Cow<'a, str> {
97        use base64::Engine;
98        match base64::engine::general_purpose::STANDARD.decode(input) {
99            Ok(bytes) => Cow::Owned(String::from_utf8_lossy(&bytes).into_owned()),
100            Err(_) => Cow::Borrowed(input),
101        }
102    }
103
104    fn name(&self) -> &'static str {
105        "base64Decode"
106    }
107}
108
109/// Extended base64 decode (handles URL-safe base64).
110pub struct Base64DecodeExt;
111
112impl Transformation for Base64DecodeExt {
113    fn transform<'a>(&self, input: &'a str) -> Cow<'a, str> {
114        use base64::Engine;
115        // Try URL-safe first, then standard
116        let result = base64::engine::general_purpose::URL_SAFE
117            .decode(input)
118            .or_else(|_| base64::engine::general_purpose::STANDARD.decode(input));
119
120        match result {
121            Ok(bytes) => Cow::Owned(String::from_utf8_lossy(&bytes).into_owned()),
122            Err(_) => Cow::Borrowed(input),
123        }
124    }
125
126    fn name(&self) -> &'static str {
127        "base64DecodeExt"
128    }
129}
130
131/// Hex decode transformation.
132pub struct HexDecode;
133
134impl Transformation for HexDecode {
135    fn transform<'a>(&self, input: &'a str) -> Cow<'a, str> {
136        let mut result = Vec::new();
137        let mut chars = input.chars().peekable();
138
139        while let Some(c1) = chars.next() {
140            if let Some(c2) = chars.next() {
141                let hex = format!("{}{}", c1, c2);
142                if let Ok(byte) = u8::from_str_radix(&hex, 16) {
143                    result.push(byte);
144                } else {
145                    // Invalid hex, keep original
146                    return Cow::Borrowed(input);
147                }
148            } else {
149                // Odd number of chars
150                return Cow::Borrowed(input);
151            }
152        }
153
154        Cow::Owned(String::from_utf8_lossy(&result).into_owned())
155    }
156
157    fn name(&self) -> &'static str {
158        "hexDecode"
159    }
160}
161
162/// HTML entity decode transformation.
163pub struct HtmlEntityDecode;
164
165impl Transformation for HtmlEntityDecode {
166    fn transform<'a>(&self, input: &'a str) -> Cow<'a, str> {
167        let decoded = html_escape::decode_html_entities(input);
168        if decoded == input {
169            Cow::Borrowed(input)
170        } else {
171            Cow::Owned(decoded.into_owned())
172        }
173    }
174
175    fn name(&self) -> &'static str {
176        "htmlEntityDecode"
177    }
178}
179
180/// JavaScript decode transformation.
181pub struct JsDecode;
182
183impl Transformation for JsDecode {
184    fn transform<'a>(&self, input: &'a str) -> Cow<'a, str> {
185        let mut result = String::new();
186        let mut chars = input.chars().peekable();
187        let mut modified = false;
188
189        while let Some(c) = chars.next() {
190            if c == '\\' {
191                modified = true;
192                match chars.next() {
193                    Some('n') => result.push('\n'),
194                    Some('r') => result.push('\r'),
195                    Some('t') => result.push('\t'),
196                    Some('\\') => result.push('\\'),
197                    Some('"') => result.push('"'),
198                    Some('\'') => result.push('\''),
199                    Some('x') => {
200                        // \xHH
201                        let mut hex = String::new();
202                        for _ in 0..2 {
203                            if let Some(h) = chars.next() {
204                                hex.push(h);
205                            }
206                        }
207                        if let Ok(byte) = u8::from_str_radix(&hex, 16) {
208                            result.push(byte as char);
209                        } else {
210                            result.push('\\');
211                            result.push('x');
212                            result.push_str(&hex);
213                        }
214                    }
215                    Some('u') => {
216                        // \uHHHH
217                        let mut hex = String::new();
218                        for _ in 0..4 {
219                            if let Some(h) = chars.next() {
220                                hex.push(h);
221                            }
222                        }
223                        if let Ok(code) = u32::from_str_radix(&hex, 16) {
224                            if let Some(decoded) = char::from_u32(code) {
225                                result.push(decoded);
226                            } else {
227                                result.push('\\');
228                                result.push('u');
229                                result.push_str(&hex);
230                            }
231                        } else {
232                            result.push('\\');
233                            result.push('u');
234                            result.push_str(&hex);
235                        }
236                    }
237                    Some(other) => {
238                        result.push('\\');
239                        result.push(other);
240                    }
241                    None => result.push('\\'),
242                }
243            } else {
244                result.push(c);
245            }
246        }
247
248        if modified {
249            Cow::Owned(result)
250        } else {
251            Cow::Borrowed(input)
252        }
253    }
254
255    fn name(&self) -> &'static str {
256        "jsDecode"
257    }
258}
259
260/// CSS decode transformation.
261pub struct CssDecode;
262
263impl Transformation for CssDecode {
264    fn transform<'a>(&self, input: &'a str) -> Cow<'a, str> {
265        let mut result = String::new();
266        let mut chars = input.chars().peekable();
267        let mut modified = false;
268
269        while let Some(c) = chars.next() {
270            if c == '\\' {
271                modified = true;
272                // CSS escape: \HH or \HHHHHH
273                let mut hex = String::new();
274                while hex.len() < 6 {
275                    match chars.peek() {
276                        Some(h) if h.is_ascii_hexdigit() => {
277                            hex.push(chars.next().unwrap());
278                        }
279                        _ => break,
280                    }
281                }
282                // Skip optional whitespace after hex
283                if let Some(' ') | Some('\t') | Some('\n') = chars.peek() {
284                    chars.next();
285                }
286
287                if !hex.is_empty() {
288                    if let Ok(code) = u32::from_str_radix(&hex, 16) {
289                        if let Some(decoded) = char::from_u32(code) {
290                            result.push(decoded);
291                            continue;
292                        }
293                    }
294                }
295                result.push('\\');
296                result.push_str(&hex);
297            } else {
298                result.push(c);
299            }
300        }
301
302        if modified {
303            Cow::Owned(result)
304        } else {
305            Cow::Borrowed(input)
306        }
307    }
308
309    fn name(&self) -> &'static str {
310        "cssDecode"
311    }
312}
313
314#[cfg(test)]
315mod tests {
316    use super::*;
317
318    #[test]
319    fn test_url_decode() {
320        let t = UrlDecode;
321        assert_eq!(t.transform("hello%20world"), "hello world");
322        assert_eq!(t.transform("test%2Fpath"), "test/path");
323    }
324
325    #[test]
326    fn test_base64_decode() {
327        let t = Base64Decode;
328        assert_eq!(t.transform("aGVsbG8="), "hello");
329    }
330
331    #[test]
332    fn test_html_entity_decode() {
333        let t = HtmlEntityDecode;
334        assert_eq!(t.transform("&lt;script&gt;"), "<script>");
335        assert_eq!(t.transform("&#60;"), "<");
336    }
337
338    #[test]
339    fn test_js_decode() {
340        let t = JsDecode;
341        assert_eq!(t.transform(r"\x3cscript\x3e"), "<script>");
342        assert_eq!(t.transform(r"\u003c"), "<");
343    }
344}