tiny_clean/
xml_encoder.rs1use crate::common::{char_bucket, char_mask};
2
3pub enum XmlEncoderMode {
4 All,
5 Content,
6 Attribute,
7 SingleQuotedAttribute,
8 DoubleQuotedAttribute,
9}
10
11pub struct XmlEncoder {
12 valid_masks: [u32; 4],
13}
14
15impl XmlEncoder {
16 pub fn new(mode: XmlEncoderMode) -> Self {
17 let base_mask = char_mask('\r') | char_mask('\t') | char_mask('\n');
18 match mode {
19 XmlEncoderMode::All => {
20 let to_be_encoded = ['&', '<', '>', '\'', '"'];
21 let mut to_be_encoded_mask = 0u32;
22 for char in to_be_encoded {
23 to_be_encoded_mask |= char_mask(char);
24 }
25 let valid_masks = [
26 base_mask,
27 u32::MAX & !to_be_encoded_mask,
28 u32::MAX,
29 u32::MAX
30 ];
31 Self {valid_masks}
32 }
33 XmlEncoderMode::Content => {
34 let to_be_encoded = ['&', '<', '>'];
35 let mut to_be_encoded_mask = 0u32;
36 for char in to_be_encoded {
37 to_be_encoded_mask |= char_mask(char);
38 }
39 let valid_masks = [
40 base_mask,
41 u32::MAX & !to_be_encoded_mask,
42 u32::MAX,
43 u32::MAX
44 ];
45 Self {valid_masks}
46 }
47 XmlEncoderMode::Attribute => {
48 let to_be_encoded = ['&', '<', '\'', '"'];
49 let mut to_be_encoded_mask = 0u32;
50 for char in to_be_encoded {
51 to_be_encoded_mask |= char_mask(char);
52 }
53 let valid_masks = [
54 base_mask,
55 u32::MAX & !to_be_encoded_mask,
56 u32::MAX,
57 u32::MAX
58 ];
59 Self {valid_masks}
60 }
61 XmlEncoderMode::SingleQuotedAttribute => {
62 let to_be_encoded = ['&', '<', '\''];
63 let mut to_be_encoded_mask = 0u32;
64 for char in to_be_encoded {
65 to_be_encoded_mask |= char_mask(char);
66 }
67 let valid_masks = [
68 base_mask,
69 u32::MAX & !to_be_encoded_mask,
70 u32::MAX,
71 u32::MAX
72 ];
73 Self {valid_masks}
74 }
75 XmlEncoderMode::DoubleQuotedAttribute => {
76 let to_be_encoded = ['&', '<', '"'];
77 let mut to_be_encoded_mask = 0u32;
78 for char in to_be_encoded {
79 to_be_encoded_mask |= char_mask(char);
80 }
81 let valid_masks = [
82 base_mask,
83 u32::MAX & !to_be_encoded_mask,
84 u32::MAX,
85 u32::MAX
86 ];
87 Self {valid_masks}
88 }
89 }
90 }
91
92 pub fn encode(&self, input: &str) -> String {
93 let max_capacity = (u32::MAX / 2).min((input.len() * 5) as u32) as usize;
94 let mut result = String::with_capacity(max_capacity);
95 for c in input.chars() {
96 if (c as u32) < 127 {
97 let bucket = char_bucket(c);
98 let mask = char_mask(c);
99 if c > '>' || self.valid_masks[bucket] & mask != 0 {
100 result.push(c);
101 } else {
102 match c {
103 '&' => {
104 result.push('&');
105 result.push('a');
106 result.push('m');
107 result.push('p');
108 result.push(';');
109 }
110 '<' => {
111 result.push('&');
112 result.push('l');
113 result.push('t');
114 result.push(';');
115 }
116 '>' => {
117 result.push('&');
118 result.push('g');
119 result.push('t');
120 result.push(';');
121 }
122 '\'' => {
123 result.push('&');
124 result.push('#');
125 result.push('3');
126 result.push('9');
127 result.push(';');
128 }
129 '\"' => {
130 result.push('&');
131 result.push('#');
132 result.push('3');
133 result.push('4');
134 result.push(';');
135 }
136 _ => result.push(' '),
137 }
138 }
139 } else if c > '\u{fffd}' || (c >= '\u{fdd0}' && c <= '\u{fdef}') {
140 result.push(' ');
141 } else {
142 result.push(c);
143 }
144 }
145 result.shrink_to_fit();
146 result
147 }
148}
149
150#[cfg(test)]
151mod test {
152 use crate::xml_encoder::{XmlEncoder, XmlEncoderMode};
153
154 fn generic_tests(encoder: &XmlEncoder) {
155 assert_eq!("\u{fffd}", encoder.encode("\u{fffd}"));
156 assert_eq!(" ", encoder.encode("\u{ffff}"));
157 }
158 #[test]
159 fn test_all_encode() {
160 let encoder = XmlEncoder::new(XmlEncoderMode::All);
161 assert_eq!("&", encoder.encode("&"));
162 assert_eq!(">", encoder.encode(">"));
163 assert_eq!("<", encoder.encode("<"));
164 assert_eq!("'", encoder.encode("\'"));
165 assert_eq!(""", encoder.encode("\""));
166 generic_tests(&encoder);
167 }
168
169 #[test]
170 fn test_content_encode() {
171 let encoder = XmlEncoder::new(XmlEncoderMode::Content);
172 assert_eq!("&", encoder.encode("&"));
173 assert_eq!(">", encoder.encode(">"));
174 assert_eq!("<", encoder.encode("<"));
175 assert_eq!("\'", encoder.encode("\'"));
176 assert_eq!("\"", encoder.encode("\""));
177 generic_tests(&encoder);
178 }
179
180 #[test]
181 fn test_attribute_encode() {
182 let encoder = XmlEncoder::new(XmlEncoderMode::Attribute);
183 assert_eq!("&", encoder.encode("&"));
184 assert_eq!(">", encoder.encode(">"));
185 assert_eq!("<", encoder.encode("<"));
186 assert_eq!("'", encoder.encode("\'"));
187 assert_eq!(""", encoder.encode("\""));
188 generic_tests(&encoder);
189 }
190
191 #[test]
192 fn test_single_quoted_encode() {
193 let encoder = XmlEncoder::new(XmlEncoderMode::SingleQuotedAttribute);
194 assert_eq!("&", encoder.encode("&"));
195 assert_eq!(">", encoder.encode(">"));
196 assert_eq!("<", encoder.encode("<"));
197 assert_eq!("'", encoder.encode("\'"));
198
199 assert_eq!("\"", encoder.encode("\""));
200
201 generic_tests(&encoder);
202 }
203
204 #[test]
205 fn test_double_quoted_encode() {
206 let encoder = XmlEncoder::new(XmlEncoderMode::DoubleQuotedAttribute);
207 assert_eq!("&", encoder.encode("&"));
208 assert_eq!(">", encoder.encode(">"));
209 assert_eq!("<", encoder.encode("<"));
210 assert_eq!(""", encoder.encode("\""));
211 assert_eq!("\'", encoder.encode("\'"));
212
213 generic_tests(&encoder);
214 }
215}