1use std::{borrow::Cow, fmt, iter::Peekable};
3
4use crate::{into_caveat, warning, Caveat, IntoCaveat};
5
6use super::Element;
7
8const ESCAPE_CHAR: char = '\\';
9
10#[derive(Debug, Eq, PartialEq, Ord, PartialOrd)]
12pub enum WarningKind {
13 ControlCharacterWhileParsingString(usize),
15
16 DecodeUtf16(usize, u16),
18
19 InvalidEscape(usize),
21
22 UnexpectedEndOfString(usize),
24}
25
26impl warning::Kind for WarningKind {
27 fn id(&self) -> Cow<'static, str> {
29 match self {
30 WarningKind::ControlCharacterWhileParsingString(_) => {
31 "control_character_while_parsing_string".into()
32 }
33 WarningKind::DecodeUtf16(..) => "decode_utf_1_6".into(),
34 WarningKind::InvalidEscape(_) => "invalid_escape".into(),
35 WarningKind::UnexpectedEndOfString(_) => "unexpected_end_of_string".into(),
36 }
37 }
38}
39
40impl fmt::Display for WarningKind {
41 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
42 match self {
43 WarningKind::ControlCharacterWhileParsingString(index) => {
44 write!(
45 f,
46 "Control chars were found at index `{index}` while decoding a JSON string."
47 )
48 }
49 WarningKind::DecodeUtf16(index, code) => {
50 write!(
51 f,
52 "A UTF-16 surrogate pair `{code}` failed to decode at index: `{index}`."
53 )
54 }
55 WarningKind::InvalidEscape(index) => {
56 write!(
57 f,
58 "String contains an invalid escape char at index: `{index})`."
59 )
60 }
61 WarningKind::UnexpectedEndOfString(index) => {
62 write!(f, "The String ended prematurely at index: `{index}`.")
63 }
64 }
65 }
66}
67
68pub(crate) fn analyze<'buf>(
70 s: &'buf str,
71 elem: &Element<'buf>,
72) -> Caveat<PendingStr<'buf>, WarningKind> {
73 let mut warnings = warning::Set::new();
74
75 if s.chars().any(|ch| ch == ESCAPE_CHAR) {
78 PendingStr::HasEscapes(EscapeStr(s)).into_caveat(warnings)
79 } else {
80 if let Some((index, _)) = s.char_indices().find(|(_, ch)| ch.is_control()) {
81 warnings.with_elem(WarningKind::ControlCharacterWhileParsingString(index), elem);
82 }
83
84 PendingStr::NoEscapes(s).into_caveat(warnings)
85 }
86}
87
88pub(crate) enum PendingStr<'buf> {
90 NoEscapes(&'buf str),
92
93 HasEscapes(EscapeStr<'buf>),
95}
96
97into_caveat!(PendingStr<'buf>);
98
99pub(crate) struct EscapeStr<'buf>(&'buf str);
101
102impl<'buf> EscapeStr<'buf> {
103 pub(crate) fn decode_escapes(
104 &self,
105 elem: &Element<'buf>,
106 ) -> Caveat<Cow<'buf, str>, WarningKind> {
107 unescape_str(self.0, elem)
108 }
109
110 pub(crate) fn into_raw(self) -> &'buf str {
112 self.0
113 }
114}
115
116pub(crate) fn unescape_str<'buf>(
122 s: &'buf str,
123 elem: &Element<'buf>,
124) -> Caveat<Cow<'buf, str>, WarningKind> {
125 let mut warnings = warning::Set::new();
126
127 if !s.chars().any(|ch| ch == ESCAPE_CHAR) {
129 if let Some((index, _)) = s.char_indices().find(|(_, ch)| ch.is_control()) {
130 warnings.with_elem(WarningKind::ControlCharacterWhileParsingString(index), elem);
131 }
132 return Cow::Borrowed(s).into_caveat(warnings);
133 }
134
135 let mut chars = Chars::from_str(s);
136 let mut buf = Buffer::with_capacity(s.len());
137
138 loop {
139 let Some((index, ch)) = chars.next() else {
140 return Cow::<'buf, str>::Owned(buf.into_string()).into_caveat(warnings);
141 };
142
143 if ch == ESCAPE_CHAR {
144 if let Err(warn_kind) = parse_escape(&mut chars, &mut buf) {
145 warnings.with_elem(warn_kind, elem);
146 return Cow::Borrowed(s).into_caveat(warnings);
147 }
148 } else if let Err(warn_kind) = buf.push_char(ch, index) {
149 warnings.with_elem(warn_kind, elem);
150 return Cow::Borrowed(s).into_caveat(warnings);
151 }
152 }
153}
154
155fn parse_escape(chars: &mut Chars<'_>, buf: &mut Buffer) -> Result<(), WarningKind> {
160 let (index, ch) = chars.next_or_eof()?;
161
162 let ch = match ch {
163 '"' => '"',
164 '\\' => '\\',
165 '/' => '/',
166 'b' => '\x08',
167 'f' => '\x0c',
168 'n' => '\n',
169 'r' => '\r',
170 't' => '\t',
171 'u' => return parse_unicode_escape(chars, buf),
172 _ => {
173 return Err(WarningKind::InvalidEscape(index));
174 }
175 };
176
177 buf.push_char(ch, index)?;
178
179 Ok(())
180}
181
182fn parse_unicode_escape(chars: &mut Chars<'_>, buf: &mut Buffer) -> Result<(), WarningKind> {
189 let n1 = decode_hex_escape(chars)?;
190 let n2 = chars.is_next_escape()?;
191
192 if let Some(n2) = n2 {
193 buf.push_surrogate_pair(n1, n2, chars.index)?;
194 } else {
195 let Some(ch) = char::from_u32(u32::from(n1)) else {
196 return Err(WarningKind::InvalidEscape(chars.index));
197 };
198
199 buf.push_char(ch, chars.index)?;
200 }
201
202 Ok(())
203}
204
205struct Chars<'buf> {
207 char_indices: Peekable<std::str::CharIndices<'buf>>,
213
214 index: usize,
216}
217
218impl<'buf> Chars<'buf> {
219 fn from_str(s: &'buf str) -> Self {
221 Self {
222 char_indices: s.char_indices().peekable(),
223 index: 0,
224 }
225 }
226
227 fn next_or_eof(&mut self) -> Result<(usize, char), WarningKind> {
230 if let Some((index, ch)) = self.next() {
231 if ch.is_control() {
232 return Err(WarningKind::ControlCharacterWhileParsingString(index));
233 }
234
235 Ok((index, ch))
236 } else {
237 Err(WarningKind::UnexpectedEndOfString(self.index))
238 }
239 }
240
241 fn is_next_escape(&mut self) -> Result<Option<u16>, WarningKind> {
244 {
245 let escape_char = self.char_indices.next_if(|(_, ch)| *ch == ESCAPE_CHAR);
246
247 if escape_char.is_none() {
248 return Ok(None);
249 }
250 }
251
252 {
253 let escape_unicode = self.char_indices.next_if(|(_, ch)| *ch == 'u');
254
255 if escape_unicode.is_none() {
256 return Ok(None);
257 }
258 }
259
260 let n = decode_hex_escape(self)?;
261 Ok(Some(n))
262 }
263}
264
265impl Iterator for Chars<'_> {
266 type Item = (usize, char);
267
268 fn next(&mut self) -> Option<Self::Item> {
269 if let Some((index, char)) = self.char_indices.next() {
270 self.index = index;
271 Some((index, char))
272 } else {
273 None
274 }
275 }
276}
277
278struct Buffer {
283 buf: String,
285}
286
287impl Buffer {
288 fn with_capacity(capacity: usize) -> Self {
290 Self {
291 buf: String::with_capacity(capacity),
292 }
293 }
294
295 fn push_char(&mut self, ch: char, index: usize) -> Result<(), WarningKind> {
300 if ch.is_control() {
301 return Err(WarningKind::ControlCharacterWhileParsingString(index));
302 }
303
304 self.buf.push(ch);
305 Ok(())
306 }
307
308 fn into_string(self) -> String {
310 self.buf
311 }
312
313 fn push_surrogate_pair(&mut self, n1: u16, n2: u16, index: usize) -> Result<char, WarningKind> {
319 let Some(ch) = char::decode_utf16([n1, n2]).next() else {
320 return Err(WarningKind::InvalidEscape(index));
321 };
322
323 let ch = match ch {
324 Ok(ch) => ch,
325 Err(err) => {
326 return Err(WarningKind::DecodeUtf16(index, err.unpaired_surrogate()));
327 }
328 };
329
330 self.push_char(ch, index)?;
331
332 Ok(ch)
333 }
334}
335
336fn decode_hex_escape(chars: &mut Chars<'_>) -> Result<u16, WarningKind> {
338 const RADIX: u32 = 16;
339
340 let (_, one) = chars.next_or_eof()?;
341 let (_, two) = chars.next_or_eof()?;
342 let (_, three) = chars.next_or_eof()?;
343 let (index, four) = chars.next_or_eof()?;
344
345 let string = [one, two, three, four].into_iter().collect::<String>();
346 let Ok(n) = u16::from_str_radix(&string, RADIX) else {
347 return Err(WarningKind::InvalidEscape(index));
348 };
349
350 Ok(n)
351}
352
353#[cfg(test)]
354mod test_unescape {
355 use std::{borrow::Cow, rc::Rc};
356
357 use assert_matches::assert_matches;
358
359 use crate::json;
360
361 use super::{unescape_str, WarningKind};
362
363 fn test_elem() -> json::Element<'static> {
364 json::Element {
365 id: 0.into(),
366 path_node: Rc::new(json::PathNode::Root),
367 span: json::parser::Span::default(),
368 value: json::Value::Null,
369 }
370 }
371
372 #[test]
373 fn should_unescape_empty_str() {
374 const INPUT: &str = "";
375
376 let elem = test_elem();
377 let (string, warnings) = unescape_str(INPUT, &elem).into_parts();
378 assert_matches!(string, Cow::Borrowed(""));
379 assert_matches!(warnings.as_slice(), []);
380 }
381
382 #[test]
383 fn should_unescape_str_without_escapes() {
384 const INPUT: &str = "ab";
385
386 let elem = test_elem();
387 let (string, warnings) = unescape_str(INPUT, &elem).into_parts();
388 assert_matches!(string, Cow::Borrowed(INPUT));
389 assert_matches!(warnings.as_slice(), []);
390 }
391
392 #[test]
393 fn should_unescape_str_with_forward_slash_escape() {
394 const INPUT: &str = r"a\/b";
395
396 let elem = test_elem();
397 let (string, warnings) = unescape_str(INPUT, &elem).into_parts();
398 let s = assert_matches!(
399 string,
400 Cow::Owned(s) => s
401 );
402
403 assert_eq!(s, "a/b");
404 assert_matches!(warnings.as_slice(), []);
405 }
406
407 #[test]
408 fn should_unescape_str_with_many_escapes() {
409 const INPUT: &str = r#"a\/\"b\""#;
410
411 let elem = test_elem();
412 let (string, warnings) = unescape_str(INPUT, &elem).into_parts();
413 let s = assert_matches!(
414 string,
415 Cow::Owned(s) => s
416 );
417
418 assert_eq!(s, r#"a/"b""#);
419 assert_matches!(warnings.as_slice(), []);
420 }
421
422 #[test]
423 fn should_fail_to_unescape_str_with_invalid_escape() {
424 {
425 const INPUT: &str = r"\a/c";
426
427 let elem = test_elem();
428 let (string, warnings) = unescape_str(INPUT, &elem).into_parts();
429 let warnings = warnings.into_kind_vec();
430
431 assert_matches!(string, Cow::Borrowed(_));
432 assert_matches!(warnings.as_slice(), [WarningKind::InvalidEscape(1)]);
433 }
434
435 {
436 const INPUT: &str = r"a\c";
437
438 let elem = test_elem();
439 let (string, warnings) = unescape_str(INPUT, &elem).into_parts();
440 let warnings = warnings.into_kind_vec();
441
442 assert_matches!(string, Cow::Borrowed(_));
443 assert_matches!(warnings.as_slice(), [WarningKind::InvalidEscape(2)]);
444 }
445
446 {
447 const INPUT: &str = r"a/c\";
448
449 let elem = test_elem();
450 let (string, warnings) = unescape_str(INPUT, &elem).into_parts();
451 let warnings = warnings.into_kind_vec();
452
453 assert_matches!(string, Cow::Borrowed(_));
454 assert_matches!(warnings.as_slice(), [WarningKind::UnexpectedEndOfString(3)]);
455 }
456 }
457
458 #[test]
459 fn should_fail_to_unescape_str_with_control_char() {
460 const INPUT: &str = "hello\u{0019}world";
461
462 let elem = test_elem();
463 let (string, warnings) = unescape_str(INPUT, &elem).into_parts();
464 let warnings = warnings.into_kind_vec();
465
466 assert_matches!(string, Cow::Borrowed(_));
467 assert_matches!(
468 warnings.as_slice(),
469 [WarningKind::ControlCharacterWhileParsingString(5)]
470 );
471 }
472
473 #[test]
474 fn should_fail_to_unescape_raw_str_with_rust_unicode_literal_control_char() {
475 const INPUT: &str = r"hello\u{0019}world";
476
477 let elem = test_elem();
478 let (string, warnings) = unescape_str(INPUT, &elem).into_parts();
479 let warnings = warnings.into_kind_vec();
480
481 assert_matches!(string, Cow::Borrowed(_));
482 assert_matches!(warnings.as_slice(), [WarningKind::InvalidEscape(10)]);
483 }
484
485 #[test]
486 fn should_fail_to_unescape_json_control_escape() {
487 const INPUT: &str = r"hello\u0019world";
488
489 let elem = test_elem();
490 let (string, warnings) = unescape_str(INPUT, &elem).into_parts();
491 let warnings = warnings.into_kind_vec();
492
493 assert_matches!(string, Cow::Borrowed(_));
494 assert_matches!(
495 warnings.as_slice(),
496 [WarningKind::ControlCharacterWhileParsingString(10)]
497 );
498 }
499
500 #[test]
501 fn should_unescape_unicode_literals() {
502 const INPUT: &str = r"hello\u0020world\u0021";
503
504 let elem = test_elem();
505 let (string, warnings) = unescape_str(INPUT, &elem).into_parts();
506
507 let s = assert_matches!(
508 string,
509 Cow::Owned(s) => s
510 );
511 assert_eq!(s, "hello world!");
512 assert_matches!(warnings.as_slice(), []);
513 }
514
515 #[test]
516 fn should_unescape_utf_16_surrogate_pair() {
517 const INPUT: &str = r"hello\uD834\uDD1Eworld";
521
522 let elem = test_elem();
523 let (string, warnings) = unescape_str(INPUT, &elem).into_parts();
524
525 let s = assert_matches!(
526 string,
527 Cow::Owned(s) => s
528 );
529 assert_eq!(s, "hello\u{1D11E}world");
530 assert_matches!(warnings.as_slice(), []);
531 }
532
533 #[test]
534 fn should_unescape_unicode_literal_followed_by_simple_escape() {
535 const INPUT: &str = r"hello\u0020\/world\u0021";
536
537 let elem = test_elem();
538 let (string, warnings) = unescape_str(INPUT, &elem).into_parts();
539
540 let s = assert_matches!(
541 string,
542 Cow::Owned(s) => s
543 );
544 assert_eq!(s, "hello /world!");
545 assert_matches!(warnings.as_slice(), []);
546 }
547}