1#[cfg(test)]
3mod test_unescape;
4
5use std::{borrow::Cow, fmt, iter::Peekable};
6
7use crate::{into_caveat, warning, Caveat, IntoCaveat};
8
9use super::Element;
10
11const ESCAPE_CHAR: char = '\\';
12
13#[derive(Debug, Eq, PartialEq, Ord, PartialOrd)]
15pub enum Warning {
16 ControlCharacterWhileParsingString(usize),
18
19 DecodeUtf16(usize, u16),
21
22 InvalidEscape(usize),
24
25 UnexpectedEndOfString(usize),
27}
28
29impl crate::Warning for Warning {
30 fn id(&self) -> warning::Id {
32 match self {
33 Self::ControlCharacterWhileParsingString(_) => {
34 warning::Id::from_static("control_character_while_parsing_string")
35 }
36 Self::DecodeUtf16(..) => warning::Id::from_static("decode_utf_1_6"),
37 Self::InvalidEscape(_) => warning::Id::from_static("invalid_escape"),
38 Self::UnexpectedEndOfString(_) => warning::Id::from_static("unexpected_end_of_string"),
39 }
40 }
41}
42
43impl fmt::Display for Warning {
44 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
45 match self {
46 Self::ControlCharacterWhileParsingString(index) => {
47 write!(
48 f,
49 "Control chars were found at index `{index}` while decoding a JSON string."
50 )
51 }
52 Self::DecodeUtf16(index, code) => {
53 write!(
54 f,
55 "A UTF-16 surrogate pair `{code}` failed to decode at index: `{index}`."
56 )
57 }
58 Self::InvalidEscape(index) => {
59 write!(
60 f,
61 "String contains an invalid escape char at index: `{index})`."
62 )
63 }
64 Self::UnexpectedEndOfString(index) => {
65 write!(f, "The String ended prematurely at index: `{index}`.")
66 }
67 }
68 }
69}
70
71pub(crate) fn analyze<'buf>(
73 s: &'buf str,
74 elem: &Element<'buf>,
75) -> Caveat<PendingStr<'buf>, Warning> {
76 let mut warnings = warning::Set::new();
77
78 if s.chars().any(|ch| ch == ESCAPE_CHAR) {
81 PendingStr::HasEscapes(EscapeStr(s)).into_caveat(warnings)
82 } else {
83 if let Some((index, _)) = s.char_indices().find(|(_, ch)| ch.is_control()) {
84 warnings.insert(Warning::ControlCharacterWhileParsingString(index), elem);
85 }
86
87 PendingStr::NoEscapes(s).into_caveat(warnings)
88 }
89}
90
91pub(crate) enum PendingStr<'buf> {
93 NoEscapes(&'buf str),
95
96 HasEscapes(EscapeStr<'buf>),
98}
99
100into_caveat!(PendingStr<'buf>);
101
102pub(crate) struct EscapeStr<'buf>(&'buf str);
104
105impl<'buf> EscapeStr<'buf> {
106 pub(crate) fn decode_escapes(&self, elem: &Element<'buf>) -> Caveat<Cow<'buf, str>, Warning> {
107 unescape_str(self.0, elem)
108 }
109
110 pub(crate) fn into_raw(self) -> &'buf str {
112 self.0
113 }
114}
115
116pub(crate) fn unescape_str<'buf>(
122 s: &'buf str,
123 elem: &Element<'buf>,
124) -> Caveat<Cow<'buf, str>, Warning> {
125 let mut warnings = warning::Set::new();
126
127 if !s.chars().any(|ch| ch == ESCAPE_CHAR) {
129 if let Some((index, _)) = s.char_indices().find(|(_, ch)| ch.is_control()) {
130 warnings.insert(Warning::ControlCharacterWhileParsingString(index), elem);
131 }
132 return Cow::Borrowed(s).into_caveat(warnings);
133 }
134
135 let mut chars = Chars::from_str(s);
136 let mut buf = Buffer::with_capacity(s.len());
137
138 loop {
139 let Some((index, ch)) = chars.next() else {
140 return Cow::<'buf, str>::Owned(buf.into_string()).into_caveat(warnings);
141 };
142
143 if ch == ESCAPE_CHAR {
144 if let Err(warn_kind) = parse_escape(&mut chars, &mut buf) {
145 warnings.insert(warn_kind, elem);
146 return Cow::Borrowed(s).into_caveat(warnings);
147 }
148 } else if let Err(warn_kind) = buf.push_char(ch, index) {
149 warnings.insert(warn_kind, elem);
150 return Cow::Borrowed(s).into_caveat(warnings);
151 }
152 }
153}
154
155fn parse_escape(chars: &mut Chars<'_>, buf: &mut Buffer) -> Result<(), Warning> {
160 let (index, ch) = chars.next_or_eof()?;
161
162 let ch = match ch {
163 '"' => '"',
164 '\\' => '\\',
165 '/' => '/',
166 'b' => '\x08',
167 'f' => '\x0c',
168 'n' => '\n',
169 'r' => '\r',
170 't' => '\t',
171 'u' => return parse_unicode_escape(chars, buf),
172 _ => {
173 return Err(Warning::InvalidEscape(index));
174 }
175 };
176
177 buf.push_char(ch, index)?;
178
179 Ok(())
180}
181
182fn parse_unicode_escape(chars: &mut Chars<'_>, buf: &mut Buffer) -> Result<(), Warning> {
189 let n1 = decode_hex_escape(chars)?;
190 let n2 = chars.is_next_escape()?;
191
192 if let Some(n2) = n2 {
193 buf.push_surrogate_pair(n1, n2, chars.index)?;
194 } else {
195 let Some(ch) = char::from_u32(u32::from(n1)) else {
196 return Err(Warning::InvalidEscape(chars.index));
197 };
198
199 buf.push_char(ch, chars.index)?;
200 }
201
202 Ok(())
203}
204
205struct Chars<'buf> {
207 char_indices: Peekable<std::str::CharIndices<'buf>>,
213
214 index: usize,
216}
217
218impl<'buf> Chars<'buf> {
219 fn from_str(s: &'buf str) -> Self {
221 Self {
222 char_indices: s.char_indices().peekable(),
223 index: 0,
224 }
225 }
226
227 fn next_or_eof(&mut self) -> Result<(usize, char), Warning> {
230 if let Some((index, ch)) = self.next() {
231 if ch.is_control() {
232 return Err(Warning::ControlCharacterWhileParsingString(index));
233 }
234
235 Ok((index, ch))
236 } else {
237 Err(Warning::UnexpectedEndOfString(self.index))
238 }
239 }
240
241 fn is_next_escape(&mut self) -> Result<Option<u16>, Warning> {
244 {
245 let escape_char = self.char_indices.next_if(|(_, ch)| *ch == ESCAPE_CHAR);
246
247 if escape_char.is_none() {
248 return Ok(None);
249 }
250 }
251
252 {
253 let escape_unicode = self.char_indices.next_if(|(_, ch)| *ch == 'u');
254
255 if escape_unicode.is_none() {
256 return Ok(None);
257 }
258 }
259
260 let n = decode_hex_escape(self)?;
261 Ok(Some(n))
262 }
263}
264
265impl Iterator for Chars<'_> {
266 type Item = (usize, char);
267
268 fn next(&mut self) -> Option<Self::Item> {
269 if let Some((index, char)) = self.char_indices.next() {
270 self.index = index;
271 Some((index, char))
272 } else {
273 None
274 }
275 }
276}
277
278struct Buffer {
283 buf: String,
285}
286
287impl Buffer {
288 fn with_capacity(capacity: usize) -> Self {
290 Self {
291 buf: String::with_capacity(capacity),
292 }
293 }
294
295 fn push_char(&mut self, ch: char, index: usize) -> Result<(), Warning> {
300 if ch.is_control() {
301 return Err(Warning::ControlCharacterWhileParsingString(index));
302 }
303
304 self.buf.push(ch);
305 Ok(())
306 }
307
308 fn into_string(self) -> String {
310 self.buf
311 }
312
313 fn push_surrogate_pair(&mut self, n1: u16, n2: u16, index: usize) -> Result<char, Warning> {
319 let Some(ch) = char::decode_utf16([n1, n2]).next() else {
320 return Err(Warning::InvalidEscape(index));
321 };
322
323 let ch = match ch {
324 Ok(ch) => ch,
325 Err(err) => {
326 return Err(Warning::DecodeUtf16(index, err.unpaired_surrogate()));
327 }
328 };
329
330 self.push_char(ch, index)?;
331
332 Ok(ch)
333 }
334}
335
336fn decode_hex_escape(chars: &mut Chars<'_>) -> Result<u16, Warning> {
338 const RADIX: u32 = 16;
339
340 let (_, one) = chars.next_or_eof()?;
341 let (_, two) = chars.next_or_eof()?;
342 let (_, three) = chars.next_or_eof()?;
343 let (index, four) = chars.next_or_eof()?;
344
345 let string = [one, two, three, four].into_iter().collect::<String>();
346 let Ok(n) = u16::from_str_radix(&string, RADIX) else {
347 return Err(Warning::InvalidEscape(index));
348 };
349
350 Ok(n)
351}