1use std::{borrow::Cow, fmt, iter::Peekable};
3
4use crate::{into_caveat, warning, Caveat, IntoCaveat};
5
6use super::Element;
7
8const ESCAPE_CHAR: char = '\\';
9
10#[derive(Debug, Eq, PartialEq, Ord, PartialOrd)]
12pub enum Warning {
13 ControlCharacterWhileParsingString(usize),
15
16 DecodeUtf16(usize, u16),
18
19 InvalidEscape(usize),
21
22 UnexpectedEndOfString(usize),
24}
25
26impl crate::Warning for Warning {
27 fn id(&self) -> warning::Id {
29 match self {
30 Warning::ControlCharacterWhileParsingString(_) => {
31 warning::Id::from_static("control_character_while_parsing_string")
32 }
33 Warning::DecodeUtf16(..) => warning::Id::from_static("decode_utf_1_6"),
34 Warning::InvalidEscape(_) => warning::Id::from_static("invalid_escape"),
35 Warning::UnexpectedEndOfString(_) => {
36 warning::Id::from_static("unexpected_end_of_string")
37 }
38 }
39 }
40}
41
42impl fmt::Display for Warning {
43 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
44 match self {
45 Warning::ControlCharacterWhileParsingString(index) => {
46 write!(
47 f,
48 "Control chars were found at index `{index}` while decoding a JSON string."
49 )
50 }
51 Warning::DecodeUtf16(index, code) => {
52 write!(
53 f,
54 "A UTF-16 surrogate pair `{code}` failed to decode at index: `{index}`."
55 )
56 }
57 Warning::InvalidEscape(index) => {
58 write!(
59 f,
60 "String contains an invalid escape char at index: `{index})`."
61 )
62 }
63 Warning::UnexpectedEndOfString(index) => {
64 write!(f, "The String ended prematurely at index: `{index}`.")
65 }
66 }
67 }
68}
69
70pub(crate) fn analyze<'buf>(
72 s: &'buf str,
73 elem: &Element<'buf>,
74) -> Caveat<PendingStr<'buf>, Warning> {
75 let mut warnings = warning::Set::new();
76
77 if s.chars().any(|ch| ch == ESCAPE_CHAR) {
80 PendingStr::HasEscapes(EscapeStr(s)).into_caveat(warnings)
81 } else {
82 if let Some((index, _)) = s.char_indices().find(|(_, ch)| ch.is_control()) {
83 warnings.with_elem(Warning::ControlCharacterWhileParsingString(index), elem);
84 }
85
86 PendingStr::NoEscapes(s).into_caveat(warnings)
87 }
88}
89
90pub(crate) enum PendingStr<'buf> {
92 NoEscapes(&'buf str),
94
95 HasEscapes(EscapeStr<'buf>),
97}
98
99into_caveat!(PendingStr<'buf>);
100
101pub(crate) struct EscapeStr<'buf>(&'buf str);
103
104impl<'buf> EscapeStr<'buf> {
105 pub(crate) fn decode_escapes(&self, elem: &Element<'buf>) -> Caveat<Cow<'buf, str>, Warning> {
106 unescape_str(self.0, elem)
107 }
108
109 pub(crate) fn into_raw(self) -> &'buf str {
111 self.0
112 }
113}
114
115pub(crate) fn unescape_str<'buf>(
121 s: &'buf str,
122 elem: &Element<'buf>,
123) -> Caveat<Cow<'buf, str>, Warning> {
124 let mut warnings = warning::Set::new();
125
126 if !s.chars().any(|ch| ch == ESCAPE_CHAR) {
128 if let Some((index, _)) = s.char_indices().find(|(_, ch)| ch.is_control()) {
129 warnings.with_elem(Warning::ControlCharacterWhileParsingString(index), elem);
130 }
131 return Cow::Borrowed(s).into_caveat(warnings);
132 }
133
134 let mut chars = Chars::from_str(s);
135 let mut buf = Buffer::with_capacity(s.len());
136
137 loop {
138 let Some((index, ch)) = chars.next() else {
139 return Cow::<'buf, str>::Owned(buf.into_string()).into_caveat(warnings);
140 };
141
142 if ch == ESCAPE_CHAR {
143 if let Err(warn_kind) = parse_escape(&mut chars, &mut buf) {
144 warnings.with_elem(warn_kind, elem);
145 return Cow::Borrowed(s).into_caveat(warnings);
146 }
147 } else if let Err(warn_kind) = buf.push_char(ch, index) {
148 warnings.with_elem(warn_kind, elem);
149 return Cow::Borrowed(s).into_caveat(warnings);
150 }
151 }
152}
153
154fn parse_escape(chars: &mut Chars<'_>, buf: &mut Buffer) -> Result<(), Warning> {
159 let (index, ch) = chars.next_or_eof()?;
160
161 let ch = match ch {
162 '"' => '"',
163 '\\' => '\\',
164 '/' => '/',
165 'b' => '\x08',
166 'f' => '\x0c',
167 'n' => '\n',
168 'r' => '\r',
169 't' => '\t',
170 'u' => return parse_unicode_escape(chars, buf),
171 _ => {
172 return Err(Warning::InvalidEscape(index));
173 }
174 };
175
176 buf.push_char(ch, index)?;
177
178 Ok(())
179}
180
181fn parse_unicode_escape(chars: &mut Chars<'_>, buf: &mut Buffer) -> Result<(), Warning> {
188 let n1 = decode_hex_escape(chars)?;
189 let n2 = chars.is_next_escape()?;
190
191 if let Some(n2) = n2 {
192 buf.push_surrogate_pair(n1, n2, chars.index)?;
193 } else {
194 let Some(ch) = char::from_u32(u32::from(n1)) else {
195 return Err(Warning::InvalidEscape(chars.index));
196 };
197
198 buf.push_char(ch, chars.index)?;
199 }
200
201 Ok(())
202}
203
204struct Chars<'buf> {
206 char_indices: Peekable<std::str::CharIndices<'buf>>,
212
213 index: usize,
215}
216
217impl<'buf> Chars<'buf> {
218 fn from_str(s: &'buf str) -> Self {
220 Self {
221 char_indices: s.char_indices().peekable(),
222 index: 0,
223 }
224 }
225
226 fn next_or_eof(&mut self) -> Result<(usize, char), Warning> {
229 if let Some((index, ch)) = self.next() {
230 if ch.is_control() {
231 return Err(Warning::ControlCharacterWhileParsingString(index));
232 }
233
234 Ok((index, ch))
235 } else {
236 Err(Warning::UnexpectedEndOfString(self.index))
237 }
238 }
239
240 fn is_next_escape(&mut self) -> Result<Option<u16>, Warning> {
243 {
244 let escape_char = self.char_indices.next_if(|(_, ch)| *ch == ESCAPE_CHAR);
245
246 if escape_char.is_none() {
247 return Ok(None);
248 }
249 }
250
251 {
252 let escape_unicode = self.char_indices.next_if(|(_, ch)| *ch == 'u');
253
254 if escape_unicode.is_none() {
255 return Ok(None);
256 }
257 }
258
259 let n = decode_hex_escape(self)?;
260 Ok(Some(n))
261 }
262}
263
264impl Iterator for Chars<'_> {
265 type Item = (usize, char);
266
267 fn next(&mut self) -> Option<Self::Item> {
268 if let Some((index, char)) = self.char_indices.next() {
269 self.index = index;
270 Some((index, char))
271 } else {
272 None
273 }
274 }
275}
276
277struct Buffer {
282 buf: String,
284}
285
286impl Buffer {
287 fn with_capacity(capacity: usize) -> Self {
289 Self {
290 buf: String::with_capacity(capacity),
291 }
292 }
293
294 fn push_char(&mut self, ch: char, index: usize) -> Result<(), Warning> {
299 if ch.is_control() {
300 return Err(Warning::ControlCharacterWhileParsingString(index));
301 }
302
303 self.buf.push(ch);
304 Ok(())
305 }
306
307 fn into_string(self) -> String {
309 self.buf
310 }
311
312 fn push_surrogate_pair(&mut self, n1: u16, n2: u16, index: usize) -> Result<char, Warning> {
318 let Some(ch) = char::decode_utf16([n1, n2]).next() else {
319 return Err(Warning::InvalidEscape(index));
320 };
321
322 let ch = match ch {
323 Ok(ch) => ch,
324 Err(err) => {
325 return Err(Warning::DecodeUtf16(index, err.unpaired_surrogate()));
326 }
327 };
328
329 self.push_char(ch, index)?;
330
331 Ok(ch)
332 }
333}
334
335fn decode_hex_escape(chars: &mut Chars<'_>) -> Result<u16, Warning> {
337 const RADIX: u32 = 16;
338
339 let (_, one) = chars.next_or_eof()?;
340 let (_, two) = chars.next_or_eof()?;
341 let (_, three) = chars.next_or_eof()?;
342 let (index, four) = chars.next_or_eof()?;
343
344 let string = [one, two, three, four].into_iter().collect::<String>();
345 let Ok(n) = u16::from_str_radix(&string, RADIX) else {
346 return Err(Warning::InvalidEscape(index));
347 };
348
349 Ok(n)
350}
351
352#[cfg(test)]
353mod test_unescape {
354 #![allow(
355 clippy::indexing_slicing,
356 reason = "unwraps are allowed anywhere in tests"
357 )]
358
359 use std::{borrow::Cow, sync::Arc};
360
361 use assert_matches::assert_matches;
362
363 use crate::json;
364
365 use super::{unescape_str, Warning};
366
367 fn test_elem() -> json::Element<'static> {
368 json::Element {
369 id: 0.into(),
370 path_node: Arc::new(json::PathNode::Root),
371 span: json::parser::Span::default(),
372 value: json::Value::Null,
373 }
374 }
375
376 #[test]
377 fn should_unescape_empty_str() {
378 const INPUT: &str = "";
379
380 let elem = test_elem();
381 let (string, warnings) = unescape_str(INPUT, &elem).into_parts();
382 assert_matches!(string, Cow::Borrowed(""));
383 assert!(warnings.is_empty(), "{warnings:#?}");
384 }
385
386 #[test]
387 fn should_unescape_str_without_escapes() {
388 const INPUT: &str = "ab";
389
390 let elem = test_elem();
391 let (string, warnings) = unescape_str(INPUT, &elem).into_parts();
392 assert_matches!(string, Cow::Borrowed(INPUT));
393 assert!(warnings.is_empty(), "{warnings:#?}");
394 }
395
396 #[test]
397 fn should_unescape_str_with_forward_slash_escape() {
398 const INPUT: &str = r"a\/b";
399
400 let elem = test_elem();
401 let (string, warnings) = unescape_str(INPUT, &elem).into_parts();
402 let s = assert_matches!(
403 string,
404 Cow::Owned(s) => s
405 );
406
407 assert_eq!(s, "a/b");
408 assert!(warnings.is_empty(), "{warnings:#?}");
409 }
410
411 #[test]
412 fn should_unescape_str_with_many_escapes() {
413 const INPUT: &str = r#"a\/\"b\""#;
414
415 let elem = test_elem();
416 let (string, warnings) = unescape_str(INPUT, &elem).into_parts();
417 let s = assert_matches!(
418 string,
419 Cow::Owned(s) => s
420 );
421
422 assert_eq!(s, r#"a/"b""#);
423 assert!(warnings.is_empty(), "{warnings:#?}");
424 }
425
426 #[test]
427 fn should_fail_to_unescape_str_with_invalid_escape() {
428 {
429 const INPUT: &str = r"\a/c";
430
431 let elem = test_elem();
432 let (string, warnings) = unescape_str(INPUT, &elem).into_parts();
433 let warnings = warnings.into_path_as_str_map();
434 let warnings = &warnings["$"];
435
436 assert_matches!(string, Cow::Borrowed(_));
437 assert_matches!(warnings.as_slice(), [Warning::InvalidEscape(1)]);
438 }
439
440 {
441 const INPUT: &str = r"a\c";
442
443 let elem = test_elem();
444 let (string, warnings) = unescape_str(INPUT, &elem).into_parts();
445 let warnings = warnings.into_path_as_str_map();
446 let warnings = &warnings["$"];
447
448 assert_matches!(string, Cow::Borrowed(_));
449 assert_matches!(warnings.as_slice(), [Warning::InvalidEscape(2)]);
450 }
451
452 {
453 const INPUT: &str = r"a/c\";
454
455 let elem = test_elem();
456 let (string, warnings) = unescape_str(INPUT, &elem).into_parts();
457 let warnings = warnings.into_path_as_str_map();
458 let warnings = &warnings["$"];
459
460 assert_matches!(string, Cow::Borrowed(_));
461 assert_matches!(warnings.as_slice(), [Warning::UnexpectedEndOfString(3)]);
462 }
463 }
464
465 #[test]
466 fn should_fail_to_unescape_str_with_control_char() {
467 const INPUT: &str = "hello\u{0019}world";
468
469 let elem = test_elem();
470 let (string, warnings) = unescape_str(INPUT, &elem).into_parts();
471 let warnings = warnings.into_path_as_str_map();
472 let warnings = &warnings["$"];
473
474 assert_matches!(string, Cow::Borrowed(_));
475 assert_matches!(
476 warnings.as_slice(),
477 [Warning::ControlCharacterWhileParsingString(5)]
478 );
479 }
480
481 #[test]
482 fn should_fail_to_unescape_raw_str_with_rust_unicode_literal_control_char() {
483 const INPUT: &str = r"hello\u{0019}world";
484
485 let elem = test_elem();
486 let (string, warnings) = unescape_str(INPUT, &elem).into_parts();
487 let warnings = warnings.into_path_as_str_map();
488 let warnings = &warnings["$"];
489
490 assert_matches!(string, Cow::Borrowed(_));
491 assert_matches!(warnings.as_slice(), [Warning::InvalidEscape(10)]);
492 }
493
494 #[test]
495 fn should_fail_to_unescape_json_control_escape() {
496 const INPUT: &str = r"hello\u0019world";
497
498 let elem = test_elem();
499 let (string, warnings) = unescape_str(INPUT, &elem).into_parts();
500 let warnings = warnings.into_path_as_str_map();
501 let warnings = &warnings["$"];
502
503 assert_matches!(string, Cow::Borrowed(_));
504 assert_matches!(
505 warnings.as_slice(),
506 [Warning::ControlCharacterWhileParsingString(10)]
507 );
508 }
509
510 #[test]
511 fn should_unescape_unicode_literals() {
512 const INPUT: &str = r"hello\u0020world\u0021";
513
514 let elem = test_elem();
515 let (string, warnings) = unescape_str(INPUT, &elem).into_parts();
516
517 let s = assert_matches!(
518 string,
519 Cow::Owned(s) => s
520 );
521 assert_eq!(s, "hello world!");
522 assert!(warnings.is_empty(), "{warnings:#?}");
523 }
524
525 #[test]
526 fn should_unescape_utf_16_surrogate_pair() {
527 const INPUT: &str = r"hello\uD834\uDD1Eworld";
531
532 let elem = test_elem();
533 let (string, warnings) = unescape_str(INPUT, &elem).into_parts();
534
535 let s = assert_matches!(
536 string,
537 Cow::Owned(s) => s
538 );
539 assert_eq!(s, "hello\u{1D11E}world");
540 assert!(warnings.is_empty(), "{warnings:#?}");
541 }
542
543 #[test]
544 fn should_unescape_unicode_literal_followed_by_simple_escape() {
545 const INPUT: &str = r"hello\u0020\/world\u0021";
546
547 let elem = test_elem();
548 let (string, warnings) = unescape_str(INPUT, &elem).into_parts();
549
550 let s = assert_matches!(
551 string,
552 Cow::Owned(s) => s
553 );
554 assert_eq!(s, "hello /world!");
555 assert!(warnings.is_empty(), "{warnings:#?}");
556 }
557}