1use std::borrow::Cow;
4
5use memchr::memchr2;
6
7fn resolve_entity(text: &str) -> Option<(char, &str)> {
8 let mut peek = text.chars();
9
10 let result = match peek.next()? {
11 'l' if peek.next()? == 't' => '<',
12 'g' if peek.next()? == 't' => '>',
13 'a' => match peek.next()? {
14 'p' if peek.next()? == 'o' && peek.next()? == 's' => '\'',
15 'm' if peek.next()? == 'p' => '&',
16 _ => return None,
17 },
18 'q' if peek.next()? == 'u' && peek.next()? == 'o' && peek.next()? == 't' => '"',
19 '#' => {
20 let mut code = 0;
21 let mut next = peek.next()?;
22 let radix = if next == 'x' {
23 next = peek.next()?;
24 16
25 } else {
26 10
27 };
28
29 while next != ';' {
30 code *= radix;
31 code += next.to_digit(radix)?;
32 next = peek.next()?;
33 }
34
35 let result = char::from_u32(code)?;
36 return Some((result, peek.as_str()));
38 }
39 _ => return None,
40 };
41
42 if peek.next()? != ';' {
43 None
44 } else {
45 Some((result, peek.as_str()))
46 }
47}
48
49pub fn unescape(string: &'_ str) -> Cow<'_, str> {
56 let mut replaced = String::new();
57
58 let mut current = string;
59 while let Some(next) = memchr2(b'&', b'\0', current.as_bytes()) {
60 match current.as_bytes()[next] {
61 b'&' => {
62 if let Some((chr, rest)) = resolve_entity(¤t[next + 1..]) {
63 replaced.push_str(¤t[..next]);
64
65 if chr == '\0' {
66 return Cow::Owned(replaced);
67 }
68
69 replaced.push(chr);
70 current = rest;
71 } else {
72 current = ¤t[1..];
73 }
74 }
75 _ => {
76 return if replaced.is_empty() {
77 Cow::Borrowed(string)
78 } else {
79 replaced.push_str(¤t[..next]);
80 Cow::Owned(replaced)
81 };
82 }
83 }
84 }
85
86 if replaced.is_empty() {
87 Cow::Borrowed(string)
88 } else {
89 replaced.push_str(current);
90 Cow::Owned(replaced)
91 }
92}
93
94fn escape(string: &str, next: impl Fn(&str) -> Option<usize>) -> Cow<'_, str> {
95 let mut replaced = String::new();
96
97 let mut current = string;
98 while let Some(escaped) = next(current) {
99 replaced.push_str(¤t[..escaped]);
100 match current.as_bytes()[escaped] {
101 b'<' => replaced.push_str("<"),
102 b'>' => replaced.push_str(">"),
103 b'&' => replaced.push_str("&"),
104 b'\"' => replaced.push_str("""),
105 _ => unreachable!(),
106 };
107 current = ¤t[escaped + 1..]
108 }
109
110 if replaced.is_empty() {
111 Cow::Borrowed(string)
112 } else {
113 replaced.push_str(current);
114 Cow::Owned(replaced)
115 }
116}
117
118pub fn attribute_value_escape(string: &'_ str) -> Cow<'_, str> {
120 escape(string, |text| {
121 memchr::memchr3(b'<', b'&', b'"', text.as_bytes())
122 })
123}
124
125pub fn content_escape(string: &'_ str) -> Cow<'_, str> {
127 escape(string, |text| memchr::memchr2(b'<', b'&', text.as_bytes()))
128}
129
130pub fn comment_escape(string: &'_ str) -> Cow<'_, str> {
132 escape(string, |text| memchr::memchr(b'>', text.as_bytes()))
133}
134
135#[cfg(test)]
136mod test {
137 use super::{content_escape, unescape};
138
139 #[test]
140 fn simple_unescape_escape() {
141 const STRINGS: &[(&str, &str, &str)] = &[
142 (
143 "" hello & world '",
144 "\" hello & world '",
145 "\" hello & world '",
146 ),
147 (
148 "⭐ <hello world> ⭐",
149 "⭐ <hello world> ⭐",
150 "⭐ <hello world> ⭐",
151 ),
152 ("&haha; &apo", "&haha; &apo", "&haha; &apo"),
153 ];
154
155 for (string, expected_unescaped, expected_escaped) in STRINGS {
156 let unescaped = unescape(string);
157 assert_eq!(&unescaped, expected_unescaped);
158 assert_eq!(&content_escape(&unescaped), expected_escaped);
159 }
160 }
161}