buup/transformers/
html_decode.rs1use crate::{Transform, TransformError, TransformerCategory};
2
3#[derive(Debug, Clone, Copy, PartialEq, Eq)]
5pub struct HtmlDecode;
6
7pub const DEFAULT_TEST_INPUT: &str = "<p>Hello & Welcome!</p>";
9
10impl Transform for HtmlDecode {
11 fn name(&self) -> &'static str {
12 "HTML Decode"
13 }
14
15 fn id(&self) -> &'static str {
16 "htmldecode"
17 }
18
19 fn description(&self) -> &'static str {
20 "Decodes HTML entities (e.g., <) back into characters (<)."
21 }
22
23 fn category(&self) -> TransformerCategory {
24 TransformerCategory::Decoder
25 }
26
27 fn default_test_input(&self) -> &'static str {
28 "<p>Hello & Welcome!</p>"
29 }
30
31 fn transform(&self, input: &str) -> Result<String, TransformError> {
32 if input.is_empty() {
33 return Ok(String::new());
34 }
35
36 let mut result = String::with_capacity(input.len());
38
39 let mut chars = input.chars().peekable();
40 while let Some(c) = chars.next() {
41 if c == '&' {
42 let mut entity = String::with_capacity(10); entity.push(c);
44
45 let mut entity_length = 1; const MAX_ENTITY_LENGTH: usize = 12; while let Some(&next_char) = chars.peek() {
50 if next_char == ';' || entity_length >= MAX_ENTITY_LENGTH {
51 entity.push(next_char);
52 chars.next(); break;
54 }
55 entity.push(next_char);
56 chars.next(); entity_length += 1;
58 }
59
60 if let Some(decoded) = decode_html_entity(&entity) {
62 result.push(decoded);
63 } else {
64 result.push_str(&entity);
66 }
67 } else {
68 result.push(c);
69 }
70 }
71
72 Ok(result)
73 }
74}
75
76fn decode_html_entity(entity: &str) -> Option<char> {
78 match entity {
79 "&" => Some('&'),
80 "<" => Some('<'),
81 ">" => Some('>'),
82 """ => Some('"'),
83 "'" => Some('\''),
84 "/" => Some('/'),
85 "`" => Some('`'),
86 "=" => Some('='),
87 _ if entity.starts_with("&#x") && entity.ends_with(';') => {
89 let hex_str = &entity[3..entity.len() - 1];
91 u32::from_str_radix(hex_str, 16)
92 .ok()
93 .and_then(std::char::from_u32)
94 }
95 _ if entity.starts_with("&#") && entity.ends_with(';') => {
96 let num_str = &entity[2..entity.len() - 1];
98 num_str.parse::<u32>().ok().and_then(std::char::from_u32)
99 }
100 _ => None,
101 }
102}
103
104#[cfg(test)]
105mod tests {
106 use super::*;
107
108 #[test]
109 fn test_html_decode() {
110 let decoder = HtmlDecode;
111
112 assert_eq!(
114 decoder.transform(DEFAULT_TEST_INPUT).unwrap(),
115 "<p>Hello & Welcome!</p>"
116 );
117
118 assert_eq!(
120 decoder
121 .transform("<script>alert("XSS attack");</script>")
122 .unwrap(),
123 "<script>alert(\"XSS attack\");</script>"
124 );
125
126 assert_eq!(
128 decoder.transform("a < b && c > d").unwrap(),
129 "a < b && c > d"
130 );
131
132 assert_eq!(
134 decoder
135 .transform("Don't use `eval(input)` or query='unsafe'")
136 .unwrap(),
137 "Don't use `eval(input)` or query='unsafe'"
138 );
139
140 assert_eq!(
142 decoder
143 .transform("Euro symbol: € or €")
144 .unwrap(),
145 "Euro symbol: € or €"
146 );
147
148 assert_eq!(
150 decoder.transform("Normal text with no entities").unwrap(),
151 "Normal text with no entities"
152 );
153
154 assert_eq!(decoder.transform("").unwrap(), "");
156
157 assert_eq!(
159 decoder.transform("This is an &incomplete entity").unwrap(),
160 "This is an &incomplete entity"
161 );
162
163 assert_eq!(
165 decoder
166 .transform("This is &invalid; and &#invalid;")
167 .unwrap(),
168 "This is &invalid; and &#invalid;"
169 );
170 }
171}