1use anyhow::{bail, Result};
4use compact_str::CompactString;
5use std::borrow::Cow;
6
7#[cfg(test)]
8mod test;
9
10#[derive(Debug, Clone)]
11pub struct Escape {
12 escape_char: char,
13 escape: Box<[char]>,
14 tr: Box<[(char, CompactString)]>,
15 generic: Option<fn(char) -> bool>,
16}
17
18fn is_sep(esc: &mut bool, escape_char: char, c: char, sep: char) -> bool {
19 if c == sep {
20 !*esc
21 } else {
22 *esc = c == escape_char && !*esc;
23 false
24 }
25}
26
27impl Escape {
28 pub fn get_escape_char(&self) -> char {
30 self.escape_char
31 }
32
33 pub fn get_escaped(&self) -> &[char] {
35 &self.escape
36 }
37
38 pub fn get_tr(&self) -> &[(char, CompactString)] {
40 &self.tr
41 }
42
43 pub fn new(
64 escape_char: char,
65 escape: &[char],
66 tr: &[(char, &str)],
67 generic: Option<fn(char) -> bool>,
68 ) -> Result<Self> {
69 if !escape_char.is_ascii() {
70 bail!("the escape char must be ascii")
71 }
72 if !escape.contains(&escape_char) {
73 bail!("the escape slice must contain the escape character")
74 }
75 for (i, (c, s)) in tr.iter().enumerate() {
76 if *c == escape_char {
77 bail!("you cannot translate the escape char")
78 }
79 if s.len() == 0 {
80 bail!("translation targets may not be empty")
81 }
82 if !s.is_ascii() {
83 bail!("translation targets must be ascii")
84 }
85 if s.starts_with("u") {
86 bail!("translation targets must not start with u")
87 }
88 if s.contains(escape_char) {
89 bail!("translation targets may not contain the escape char")
90 }
91 if !escape.contains(&c) {
92 bail!("the escape array must contain every translation key")
93 }
94 for (j, (c1, s1)) in tr.iter().enumerate() {
95 if i != j {
96 if c == c1 {
97 bail!("duplicate translation key {c}")
98 }
99 if s == s1 {
100 bail!("duplicate translation target {s}")
101 }
102 }
103 }
104 }
105 Ok(Self {
106 escape_char,
107 escape: Box::from(escape),
108 tr: Box::from_iter(tr.iter().map(|(c, s)| (*c, CompactString::new(s)))),
109 generic,
110 })
111 }
112
113 pub fn escape_to<T>(&self, s: &T, buf: &mut String)
115 where
116 T: AsRef<str> + ?Sized,
117 {
118 for c in s.as_ref().chars() {
119 if self.escape.contains(&c) {
120 buf.push(self.escape_char);
121 match self
122 .tr
123 .iter()
124 .find_map(|(s, e)| if c == *s { Some(e) } else { None })
125 {
126 Some(e) => buf.push_str(e),
127 None => buf.push(c),
128 }
129 } else if let Some(generic) = &self.generic
130 && (generic)(c)
131 {
132 use std::fmt::Write;
133 buf.push(self.escape_char);
134 write!(buf, "u{{{:x}}}", c as u32).unwrap();
135 } else {
136 buf.push(c);
137 }
138 }
139 }
140
141 pub fn escape<'a, T>(&self, s: &'a T) -> Cow<'a, str>
144 where
145 T: AsRef<str> + ?Sized,
146 {
147 let s = s.as_ref();
148 let mut to_escape = 0;
149 for c in s.chars() {
150 if self.escape.contains(&c)
151 || self.generic.as_ref().map(|f| (f)(c)).unwrap_or(false)
152 {
153 to_escape += 1
154 }
155 }
156 if to_escape == 0 {
157 Cow::Borrowed(s.as_ref())
158 } else {
159 let mut out = String::with_capacity(s.len() + to_escape);
160 self.escape_to(s, &mut out);
161 Cow::Owned(out)
162 }
163 }
164
165 pub fn unescape_to<T>(&self, s: &T, buf: &mut String)
167 where
168 T: AsRef<str> + ?Sized,
169 {
170 fn parse_unicode_escape_seq(s: &str) -> Option<(usize, char)> {
171 if !s.starts_with("u{") {
172 return None;
173 }
174 let i = s.find('}')?;
175 let n = u32::from_str_radix(&s[2..i], 16).ok()?;
176 let c = char::from_u32(n)?;
177 Some((i + 1, c))
178 }
179 let mut escaped = false;
180 let mut skip_to = 0;
181 let s = s.as_ref();
182 buf.extend(s.char_indices().filter_map(|(i, c)| {
183 if i < skip_to {
184 None
185 } else if c == self.escape_char && !escaped {
186 escaped = true;
187 None
188 } else if escaped {
189 escaped = false;
190 for (v, k) in &self.tr {
191 if s[i..].starts_with(k.as_str()) {
192 skip_to = i + k.len();
193 return Some(*v);
194 }
195 }
196 if let Some((j, c)) = parse_unicode_escape_seq(&s[i..]) {
197 skip_to = i + j;
198 return Some(c);
199 }
200 Some(c)
201 } else {
202 Some(c)
203 }
204 }))
205 }
206
207 pub fn unescape<'a, T>(&self, s: &'a T) -> Cow<'a, str>
210 where
211 T: AsRef<str> + ?Sized,
212 {
213 let s = s.as_ref();
214 if !s.contains(self.escape_char) {
215 Cow::Borrowed(s.as_ref())
216 } else {
217 let mut res = String::with_capacity(s.len());
218 self.unescape_to(s, &mut res);
219 Cow::Owned(res)
220 }
221 }
222
223 pub fn is_escaped<T>(&self, s: &T, i: usize) -> bool
226 where
227 T: AsRef<str> + ?Sized,
228 {
229 is_escaped(s, self.escape_char, i)
230 }
231
232 pub fn splitn<'a, T>(
235 &self,
236 s: &'a T,
237 n: usize,
238 sep: char,
239 ) -> impl Iterator<Item = &'a str> + use<'a, T>
240 where
241 T: AsRef<str> + ?Sized,
242 {
243 splitn(s, self.escape_char, n, sep)
244 }
245
246 pub fn split<'a, T>(
249 &self,
250 s: &'a T,
251 sep: char,
252 ) -> impl Iterator<Item = &'a str> + use<'a, T>
253 where
254 T: AsRef<str> + ?Sized,
255 {
256 split(s, self.escape_char, sep)
257 }
258}
259
260pub fn is_escaped<T>(s: &T, escape_char: char, i: usize) -> bool
263where
264 T: AsRef<str> + ?Sized,
265{
266 let s = s.as_ref();
267 let b = s.as_bytes();
268 !s.is_char_boundary(i) || {
269 let mut res = false;
270 for j in (0..i).rev() {
271 if s.is_char_boundary(j) && b[j] == (escape_char as u8) {
272 res = !res;
273 } else {
274 break;
275 }
276 }
277 res
278 }
279}
280
281pub fn splitn<'a, T>(
284 s: &'a T,
285 escape_char: char,
286 n: usize,
287 sep: char,
288) -> impl Iterator<Item = &'a str> + use<'a, T>
289where
290 T: AsRef<str> + ?Sized,
291{
292 s.as_ref().splitn(n, {
293 let mut esc = false;
294 move |c| is_sep(&mut esc, escape_char, c, sep)
295 })
296}
297
298pub fn split<'a, T>(
301 s: &'a T,
302 escape_char: char,
303 sep: char,
304) -> impl Iterator<Item = &'a str> + use<'a, T>
305where
306 T: AsRef<str> + ?Sized,
307{
308 s.as_ref().split({
309 let mut esc = false;
310 move |c| is_sep(&mut esc, escape_char, c, sep)
311 })
312}