1#[cfg(test)]
3mod test_unescape;
4
5use std::{borrow::Cow, fmt, iter::Peekable};
6
7use crate::{warning, Caveat, IntoCaveat};
8
9use super::Element;
10
11const ESCAPE_CHAR: char = '\\';
12
13#[derive(Debug, Eq, PartialEq, Ord, PartialOrd)]
15pub enum Warning {
16 ControlCharacterWhileParsingString(usize),
18
19 DecodeUtf16(usize, u16),
21
22 InvalidEscape(usize),
24
25 UnexpectedEndOfString(usize),
27}
28
29impl crate::Warning for Warning {
30 fn id(&self) -> warning::Id {
32 match self {
33 Self::ControlCharacterWhileParsingString(_) => {
34 warning::Id::from_static("control_character_while_parsing_string")
35 }
36 Self::DecodeUtf16(..) => warning::Id::from_static("decode_utf_1_6"),
37 Self::InvalidEscape(_) => warning::Id::from_static("invalid_escape"),
38 Self::UnexpectedEndOfString(_) => warning::Id::from_static("unexpected_end_of_string"),
39 }
40 }
41}
42
43impl fmt::Display for Warning {
44 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
45 match self {
46 Self::ControlCharacterWhileParsingString(index) => {
47 write!(
48 f,
49 "Control chars were found at index `{index}` while decoding a JSON string."
50 )
51 }
52 Self::DecodeUtf16(index, code) => {
53 write!(
54 f,
55 "A UTF-16 surrogate pair `{code}` failed to decode at index: `{index}`."
56 )
57 }
58 Self::InvalidEscape(index) => {
59 write!(
60 f,
61 "String contains an invalid escape char at index: `{index})`."
62 )
63 }
64 Self::UnexpectedEndOfString(index) => {
65 write!(f, "The String ended prematurely at index: `{index}`.")
66 }
67 }
68 }
69}
70
71pub(crate) fn analyze<'buf>(
73 s: &'buf str,
74 elem: &Element<'buf>,
75) -> Caveat<PendingStr<'buf>, Warning> {
76 let mut warnings = warning::Set::new();
77
78 if s.chars().any(|ch| ch == ESCAPE_CHAR) {
81 PendingStr::HasEscapes(EscapeStr(s)).into_caveat(warnings)
82 } else {
83 if let Some((index, _)) = s.char_indices().find(|(_, ch)| ch.is_control()) {
84 warnings.insert(Warning::ControlCharacterWhileParsingString(index), elem);
85 }
86
87 PendingStr::NoEscapes(s).into_caveat(warnings)
88 }
89}
90
91pub(crate) enum PendingStr<'buf> {
93 NoEscapes(&'buf str),
95
96 HasEscapes(EscapeStr<'buf>),
98}
99
100pub(crate) struct EscapeStr<'buf>(&'buf str);
102
103impl<'buf> EscapeStr<'buf> {
104 pub(crate) fn decode_escapes(&self, elem: &Element<'buf>) -> Caveat<Cow<'buf, str>, Warning> {
105 unescape_str(self.0, elem)
106 }
107
108 pub(crate) fn into_raw(self) -> &'buf str {
110 self.0
111 }
112}
113
114pub(crate) fn unescape_str<'buf>(
120 s: &'buf str,
121 elem: &Element<'buf>,
122) -> Caveat<Cow<'buf, str>, Warning> {
123 let mut warnings = warning::Set::new();
124
125 if !s.chars().any(|ch| ch == ESCAPE_CHAR) {
128 if let Some((index, _)) = s.char_indices().find(|(_, ch)| ch.is_control()) {
129 warnings.insert(Warning::ControlCharacterWhileParsingString(index), elem);
130 }
131 return Cow::Borrowed(s).into_caveat(warnings);
132 }
133
134 let mut chars = Chars::from_str(s);
135 let mut buf = Buffer::with_capacity(s.len());
136
137 loop {
138 let Some((index, ch)) = chars.next() else {
139 return Cow::<'buf, str>::Owned(buf.into_string()).into_caveat(warnings);
140 };
141
142 if ch == ESCAPE_CHAR {
143 if let Err(warn_kind) = parse_escape(&mut chars, &mut buf) {
144 warnings.insert(warn_kind, elem);
145 return Cow::Borrowed(s).into_caveat(warnings);
146 }
147 } else if let Err(warn_kind) = buf.push_char(ch, index) {
148 warnings.insert(warn_kind, elem);
149 return Cow::Borrowed(s).into_caveat(warnings);
150 }
151 }
152}
153
154fn parse_escape(chars: &mut Chars<'_>, buf: &mut Buffer) -> Result<(), Warning> {
159 let (index, ch) = chars.next_or_eof()?;
160
161 let ch = match ch {
162 '"' => '"',
163 '\\' => '\\',
164 '/' => '/',
165 'b' => '\x08',
166 'f' => '\x0c',
167 'n' => '\n',
168 'r' => '\r',
169 't' => '\t',
170 'u' => return parse_unicode_escape(chars, buf),
171 _ => {
172 return Err(Warning::InvalidEscape(index));
173 }
174 };
175
176 buf.push_char(ch, index)?;
177
178 Ok(())
179}
180
181fn parse_unicode_escape(chars: &mut Chars<'_>, buf: &mut Buffer) -> Result<(), Warning> {
188 let n1 = decode_hex_escape(chars)?;
189 let n2 = chars.is_next_escape()?;
190
191 if let Some(n2) = n2 {
192 buf.push_surrogate_pair(n1, n2, chars.index)?;
193 } else {
194 let Some(ch) = char::from_u32(u32::from(n1)) else {
195 return Err(Warning::InvalidEscape(chars.index));
196 };
197
198 buf.push_char(ch, chars.index)?;
199 }
200
201 Ok(())
202}
203
204struct Chars<'buf> {
206 char_indices: Peekable<std::str::CharIndices<'buf>>,
212
213 index: usize,
215}
216
217impl<'buf> Chars<'buf> {
218 fn from_str(s: &'buf str) -> Self {
220 Self {
221 char_indices: s.char_indices().peekable(),
222 index: 0,
223 }
224 }
225
226 fn next_or_eof(&mut self) -> Result<(usize, char), Warning> {
229 if let Some((index, ch)) = self.next() {
230 if ch.is_control() {
231 return Err(Warning::ControlCharacterWhileParsingString(index));
232 }
233
234 Ok((index, ch))
235 } else {
236 Err(Warning::UnexpectedEndOfString(self.index))
237 }
238 }
239
240 fn is_next_escape(&mut self) -> Result<Option<u16>, Warning> {
243 {
244 let escape_char = self.char_indices.next_if(|(_, ch)| *ch == ESCAPE_CHAR);
245
246 if escape_char.is_none() {
247 return Ok(None);
248 }
249 }
250
251 {
252 let escape_unicode = self.char_indices.next_if(|(_, ch)| *ch == 'u');
253
254 if escape_unicode.is_none() {
255 return Ok(None);
256 }
257 }
258
259 let n = decode_hex_escape(self)?;
260 Ok(Some(n))
261 }
262}
263
264impl Iterator for Chars<'_> {
265 type Item = (usize, char);
266
267 fn next(&mut self) -> Option<Self::Item> {
268 if let Some((index, char)) = self.char_indices.next() {
269 self.index = index;
270 Some((index, char))
271 } else {
272 None
273 }
274 }
275}
276
277struct Buffer {
282 buf: String,
284}
285
286impl Buffer {
287 fn with_capacity(capacity: usize) -> Self {
289 Self {
290 buf: String::with_capacity(capacity),
291 }
292 }
293
294 fn push_char(&mut self, ch: char, index: usize) -> Result<(), Warning> {
299 if ch.is_control() {
300 return Err(Warning::ControlCharacterWhileParsingString(index));
301 }
302
303 self.buf.push(ch);
304 Ok(())
305 }
306
307 fn into_string(self) -> String {
309 self.buf
310 }
311
312 fn push_surrogate_pair(&mut self, n1: u16, n2: u16, index: usize) -> Result<char, Warning> {
318 let Some(ch) = char::decode_utf16([n1, n2]).next() else {
319 return Err(Warning::InvalidEscape(index));
320 };
321
322 let ch = match ch {
323 Ok(ch) => ch,
324 Err(err) => {
325 return Err(Warning::DecodeUtf16(index, err.unpaired_surrogate()));
326 }
327 };
328
329 self.push_char(ch, index)?;
330
331 Ok(ch)
332 }
333}
334
335fn decode_hex_escape(chars: &mut Chars<'_>) -> Result<u16, Warning> {
337 const RADIX: u32 = 16;
338
339 let (_, one) = chars.next_or_eof()?;
340 let (_, two) = chars.next_or_eof()?;
341 let (_, three) = chars.next_or_eof()?;
342 let (index, four) = chars.next_or_eof()?;
343
344 let string = [one, two, three, four].into_iter().collect::<String>();
345 let Ok(n) = u16::from_str_radix(&string, RADIX) else {
346 return Err(Warning::InvalidEscape(index));
347 };
348
349 Ok(n)
350}