1#[inline]
16pub fn decode_named(name: &str) -> Option<&'static str> {
17 ENTITY_MAP.get(name).copied()
18}
19
20pub fn decode_numeric(digits: &str, is_hex: bool) -> Option<char> {
39 let codepoint = if is_hex {
40 u32::from_str_radix(digits, 16).ok()?
41 } else {
42 digits.parse::<u32>().ok()?
43 };
44
45 if codepoint == 0 {
46 return Some('\u{FFFD}');
47 }
48
49 char::from_u32(codepoint)
50}
51
52#[inline]
67pub fn escape_text(input: &str, out: &mut String) {
68 escape_impl::<false>(input, out);
69}
70
71#[inline]
86pub fn escape_attr(input: &str, out: &mut String) {
87 escape_impl::<true>(input, out);
88}
89
90#[inline(always)]
93fn escape_impl<const ESCAPE_QUOTES: bool>(input: &str, out: &mut String) {
94 out.reserve(input.len());
95
96 let bytes = input.as_bytes();
97 let mut last = 0;
98
99 for (i, &b) in bytes.iter().enumerate() {
100 let replacement = match b {
101 b'&' => "&",
102 b'<' => "<",
103 b'>' => ">",
104 b'"' if ESCAPE_QUOTES => """,
105 b'\'' if ESCAPE_QUOTES => "'",
106 _ => continue,
107 };
108
109 out.push_str(&input[last..i]);
110 out.push_str(replacement);
111 last = i + 1;
112 }
113
114 out.push_str(&input[last..]);
115}
116
117static ENTITY_MAP: phf::Map<&'static str, &'static str> = phf::phf_map! {
122 "amp" => "&",
124 "lt" => "<",
125 "gt" => ">",
126 "quot" => "\"",
127 "apos" => "'",
128 "nbsp" => "\u{00A0}",
129
130 "iexcl" => "\u{00A1}",
132 "cent" => "\u{00A2}",
133 "pound" => "\u{00A3}",
134 "curren" => "\u{00A4}",
135 "yen" => "\u{00A5}",
136 "brvbar" => "\u{00A6}",
137 "sect" => "\u{00A7}",
138 "uml" => "\u{00A8}",
139 "copy" => "\u{00A9}",
140 "ordf" => "\u{00AA}",
141 "laquo" => "\u{00AB}",
142 "not" => "\u{00AC}",
143 "shy" => "\u{00AD}",
144 "reg" => "\u{00AE}",
145 "macr" => "\u{00AF}",
146 "deg" => "\u{00B0}",
147 "plusmn" => "\u{00B1}",
148 "sup2" => "\u{00B2}",
149 "sup3" => "\u{00B3}",
150 "acute" => "\u{00B4}",
151 "micro" => "\u{00B5}",
152 "para" => "\u{00B6}",
153 "middot" => "\u{00B7}",
154 "cedil" => "\u{00B8}",
155 "sup1" => "\u{00B9}",
156 "ordm" => "\u{00BA}",
157 "raquo" => "\u{00BB}",
158 "frac14" => "\u{00BC}",
159 "frac12" => "\u{00BD}",
160 "frac34" => "\u{00BE}",
161 "iquest" => "\u{00BF}",
162
163 "Agrave" => "\u{00C0}",
165 "Aacute" => "\u{00C1}",
166 "Acirc" => "\u{00C2}",
167 "Atilde" => "\u{00C3}",
168 "Auml" => "\u{00C4}",
169 "Aring" => "\u{00C5}",
170 "AElig" => "\u{00C6}",
171 "Ccedil" => "\u{00C7}",
172 "Egrave" => "\u{00C8}",
173 "Eacute" => "\u{00C9}",
174 "Ecirc" => "\u{00CA}",
175 "Euml" => "\u{00CB}",
176 "Igrave" => "\u{00CC}",
177 "Iacute" => "\u{00CD}",
178 "Icirc" => "\u{00CE}",
179 "Iuml" => "\u{00CF}",
180 "ETH" => "\u{00D0}",
181 "Ntilde" => "\u{00D1}",
182 "Ograve" => "\u{00D2}",
183 "Oacute" => "\u{00D3}",
184 "Ocirc" => "\u{00D4}",
185 "Otilde" => "\u{00D5}",
186 "Ouml" => "\u{00D6}",
187 "times" => "\u{00D7}",
188 "Oslash" => "\u{00D8}",
189 "Ugrave" => "\u{00D9}",
190 "Uacute" => "\u{00DA}",
191 "Ucirc" => "\u{00DB}",
192 "Uuml" => "\u{00DC}",
193 "Yacute" => "\u{00DD}",
194 "THORN" => "\u{00DE}",
195 "szlig" => "\u{00DF}",
196 "agrave" => "\u{00E0}",
197 "aacute" => "\u{00E1}",
198 "acirc" => "\u{00E2}",
199 "atilde" => "\u{00E3}",
200 "auml" => "\u{00E4}",
201 "aring" => "\u{00E5}",
202 "aelig" => "\u{00E6}",
203 "ccedil" => "\u{00E7}",
204 "egrave" => "\u{00E8}",
205 "eacute" => "\u{00E9}",
206 "ecirc" => "\u{00EA}",
207 "euml" => "\u{00EB}",
208 "igrave" => "\u{00EC}",
209 "iacute" => "\u{00ED}",
210 "icirc" => "\u{00EE}",
211 "iuml" => "\u{00EF}",
212 "eth" => "\u{00F0}",
213 "ntilde" => "\u{00F1}",
214 "ograve" => "\u{00F2}",
215 "oacute" => "\u{00F3}",
216 "ocirc" => "\u{00F4}",
217 "otilde" => "\u{00F5}",
218 "ouml" => "\u{00F6}",
219 "divide" => "\u{00F7}",
220 "oslash" => "\u{00F8}",
221 "ugrave" => "\u{00F9}",
222 "uacute" => "\u{00FA}",
223 "ucirc" => "\u{00FB}",
224 "uuml" => "\u{00FC}",
225 "yacute" => "\u{00FD}",
226 "thorn" => "\u{00FE}",
227 "yuml" => "\u{00FF}",
228
229 "Alpha" => "\u{0391}",
231 "Beta" => "\u{0392}",
232 "Gamma" => "\u{0393}",
233 "Delta" => "\u{0394}",
234 "Epsilon" => "\u{0395}",
235 "Zeta" => "\u{0396}",
236 "Eta" => "\u{0397}",
237 "Theta" => "\u{0398}",
238 "Iota" => "\u{0399}",
239 "Kappa" => "\u{039A}",
240 "Lambda" => "\u{039B}",
241 "Mu" => "\u{039C}",
242 "Nu" => "\u{039D}",
243 "Xi" => "\u{039E}",
244 "Omicron" => "\u{039F}",
245 "Pi" => "\u{03A0}",
246 "Rho" => "\u{03A1}",
247 "Sigma" => "\u{03A3}",
248 "Tau" => "\u{03A4}",
249 "Upsilon" => "\u{03A5}",
250 "Phi" => "\u{03A6}",
251 "Chi" => "\u{03A7}",
252 "Psi" => "\u{03A8}",
253 "Omega" => "\u{03A9}",
254 "alpha" => "\u{03B1}",
255 "beta" => "\u{03B2}",
256 "gamma" => "\u{03B3}",
257 "delta" => "\u{03B4}",
258 "epsilon" => "\u{03B5}",
259 "zeta" => "\u{03B6}",
260 "eta" => "\u{03B7}",
261 "theta" => "\u{03B8}",
262 "iota" => "\u{03B9}",
263 "kappa" => "\u{03BA}",
264 "lambda" => "\u{03BB}",
265 "mu" => "\u{03BC}",
266 "nu" => "\u{03BD}",
267 "xi" => "\u{03BE}",
268 "omicron" => "\u{03BF}",
269 "pi" => "\u{03C0}",
270 "rho" => "\u{03C1}",
271 "sigmaf" => "\u{03C2}",
272 "sigma" => "\u{03C3}",
273 "tau" => "\u{03C4}",
274 "upsilon" => "\u{03C5}",
275 "phi" => "\u{03C6}",
276 "chi" => "\u{03C7}",
277 "psi" => "\u{03C8}",
278 "omega" => "\u{03C9}",
279
280 "bull" => "\u{2022}",
282 "hellip" => "\u{2026}",
283 "prime" => "\u{2032}",
284 "Prime" => "\u{2033}",
285 "oline" => "\u{203E}",
286 "frasl" => "\u{2044}",
287 "trade" => "\u{2122}",
288 "larr" => "\u{2190}",
289 "uarr" => "\u{2191}",
290 "rarr" => "\u{2192}",
291 "darr" => "\u{2193}",
292 "harr" => "\u{2194}",
293 "lArr" => "\u{21D0}",
294 "uArr" => "\u{21D1}",
295 "rArr" => "\u{21D2}",
296 "dArr" => "\u{21D3}",
297 "hArr" => "\u{21D4}",
298 "forall" => "\u{2200}",
299 "part" => "\u{2202}",
300 "exist" => "\u{2203}",
301 "empty" => "\u{2205}",
302 "nabla" => "\u{2207}",
303 "isin" => "\u{2208}",
304 "notin" => "\u{2209}",
305 "ni" => "\u{220B}",
306 "prod" => "\u{220F}",
307 "sum" => "\u{2211}",
308 "minus" => "\u{2212}",
309 "lowast" => "\u{2217}",
310 "radic" => "\u{221A}",
311 "prop" => "\u{221D}",
312 "infin" => "\u{221E}",
313 "ang" => "\u{2220}",
314 "and" => "\u{2227}",
315 "or" => "\u{2228}",
316 "cap" => "\u{2229}",
317 "cup" => "\u{222A}",
318 "int" => "\u{222B}",
319 "there4" => "\u{2234}",
320 "sim" => "\u{223C}",
321 "cong" => "\u{2245}",
322 "asymp" => "\u{2248}",
323 "ne" => "\u{2260}",
324 "equiv" => "\u{2261}",
325 "le" => "\u{2264}",
326 "ge" => "\u{2265}",
327 "sub" => "\u{2282}",
328 "sup" => "\u{2283}",
329 "nsub" => "\u{2284}",
330 "sube" => "\u{2286}",
331 "supe" => "\u{2287}",
332 "oplus" => "\u{2295}",
333 "otimes" => "\u{2297}",
334 "perp" => "\u{22A5}",
335 "sdot" => "\u{22C5}",
336
337 "ensp" => "\u{2002}",
339 "emsp" => "\u{2003}",
340 "thinsp" => "\u{2009}",
341 "zwnj" => "\u{200C}",
342 "zwj" => "\u{200D}",
343 "lrm" => "\u{200E}",
344 "rlm" => "\u{200F}",
345 "ndash" => "\u{2013}",
346 "mdash" => "\u{2014}",
347 "lsquo" => "\u{2018}",
348 "rsquo" => "\u{2019}",
349 "sbquo" => "\u{201A}",
350 "ldquo" => "\u{201C}",
351 "rdquo" => "\u{201D}",
352 "bdquo" => "\u{201E}",
353 "dagger" => "\u{2020}",
354 "Dagger" => "\u{2021}",
355 "permil" => "\u{2030}",
356 "lsaquo" => "\u{2039}",
357 "rsaquo" => "\u{203A}",
358 "euro" => "\u{20AC}",
359
360 "OElig" => "\u{0152}",
362 "oelig" => "\u{0153}",
363 "Scaron" => "\u{0160}",
364 "scaron" => "\u{0161}",
365 "Yuml" => "\u{0178}",
366 "circ" => "\u{02C6}",
367 "tilde" => "\u{02DC}",
368 "fnof" => "\u{0192}",
369
370 "spades" => "\u{2660}",
372 "clubs" => "\u{2663}",
373 "hearts" => "\u{2665}",
374 "diams" => "\u{2666}",
375 "loz" => "\u{25CA}",
376 "lceil" => "\u{2308}",
377 "rceil" => "\u{2309}",
378 "lfloor" => "\u{230A}",
379 "rfloor" => "\u{230B}",
380 "lang" => "\u{2329}",
381 "rang" => "\u{232A}",
382};
383
384#[cfg(test)]
385mod tests {
386 use super::*;
387
388 #[test]
389 fn common_named_entities() {
390 assert_eq!(decode_named("amp"), Some("&"));
391 assert_eq!(decode_named("lt"), Some("<"));
392 assert_eq!(decode_named("gt"), Some(">"));
393 assert_eq!(decode_named("quot"), Some("\""));
394 assert_eq!(decode_named("apos"), Some("'"));
395 assert_eq!(decode_named("nbsp"), Some("\u{00A0}"));
396 }
397
398 #[test]
399 fn unknown_entity() {
400 assert_eq!(decode_named("nonexistent"), None);
401 assert_eq!(decode_named(""), None);
402 }
403
404 #[test]
405 fn numeric_decimal() {
406 assert_eq!(decode_numeric("60", false), Some('<'));
407 assert_eq!(decode_numeric("62", false), Some('>'));
408 assert_eq!(decode_numeric("38", false), Some('&'));
409 assert_eq!(decode_numeric("128512", false), Some('\u{1F600}'));
410 }
411
412 #[test]
413 fn numeric_hex() {
414 assert_eq!(decode_numeric("3C", true), Some('<'));
415 assert_eq!(decode_numeric("3e", true), Some('>'));
416 assert_eq!(decode_numeric("1F600", true), Some('\u{1F600}'));
417 }
418
419 #[test]
420 fn numeric_null_replaced() {
421 assert_eq!(decode_numeric("0", false), Some('\u{FFFD}'));
422 assert_eq!(decode_numeric("0", true), Some('\u{FFFD}'));
423 }
424
425 #[test]
426 fn numeric_invalid() {
427 assert_eq!(decode_numeric("FFFFFF", true), None); assert_eq!(decode_numeric("abc", false), None); assert_eq!(decode_numeric("", false), None);
430 }
431
432 #[test]
433 fn greek_entities() {
434 assert_eq!(decode_named("alpha"), Some("\u{03B1}"));
435 assert_eq!(decode_named("omega"), Some("\u{03C9}"));
436 assert_eq!(decode_named("Sigma"), Some("\u{03A3}"));
437 }
438
439 #[test]
440 fn typography_entities() {
441 assert_eq!(decode_named("mdash"), Some("\u{2014}"));
442 assert_eq!(decode_named("euro"), Some("\u{20AC}"));
443 assert_eq!(decode_named("trade"), Some("\u{2122}"));
444 }
445
446 #[test]
449 fn escape_text_special_chars() {
450 let mut buf = String::new();
451 escape_text("&", &mut buf);
452 assert_eq!(buf, "&");
453
454 buf.clear();
455 escape_text("<", &mut buf);
456 assert_eq!(buf, "<");
457
458 buf.clear();
459 escape_text(">", &mut buf);
460 assert_eq!(buf, ">");
461 }
462
463 #[test]
464 fn escape_text_mixed() {
465 let mut buf = String::new();
466 escape_text("1 < 2 & 3 > 0", &mut buf);
467 assert_eq!(buf, "1 < 2 & 3 > 0");
468 }
469
470 #[test]
471 fn escape_text_plain() {
472 let mut buf = String::new();
473 escape_text("hello world", &mut buf);
474 assert_eq!(buf, "hello world");
475 }
476
477 #[test]
478 fn escape_text_empty() {
479 let mut buf = String::new();
480 escape_text("", &mut buf);
481 assert_eq!(buf, "");
482 }
483
484 #[test]
485 fn escape_text_all_special() {
486 let mut buf = String::new();
487 escape_text("&<>", &mut buf);
488 assert_eq!(buf, "&<>");
489 }
490
491 #[test]
494 fn escape_attr_quote() {
495 let mut buf = String::new();
496 escape_attr("say \"hello\"", &mut buf);
497 assert_eq!(buf, "say "hello"");
498 }
499
500 #[test]
501 fn escape_attr_mixed() {
502 let mut buf = String::new();
503 escape_attr("x&y=\"z\"", &mut buf);
504 assert_eq!(buf, "x&y="z"");
505 }
506
507 #[test]
508 fn escape_attr_plain() {
509 let mut buf = String::new();
510 escape_attr("plain", &mut buf);
511 assert_eq!(buf, "plain");
512 }
513
514 #[test]
515 fn escape_attr_single_quote() {
516 let mut buf = String::new();
517 escape_attr("it's", &mut buf);
518 assert_eq!(buf, "it's");
519 }
520
521 #[test]
522 fn escape_attr_empty() {
523 let mut buf = String::new();
524 escape_attr("", &mut buf);
525 assert_eq!(buf, "");
526 }
527
528 #[test]
529 fn escape_text_does_not_escape_quotes() {
530 let mut buf = String::new();
531 escape_text("say \"hello\" it's", &mut buf);
532 assert_eq!(buf, "say \"hello\" it's");
533 }
534}