1use std::{borrow::Cow, fmt, iter::Peekable};
3
4use crate::{into_caveat, warning, Caveat, IntoCaveat};
5
6use super::Element;
7
8const ESCAPE_CHAR: char = '\\';
9
10#[derive(Debug, Eq, PartialEq, Ord, PartialOrd)]
12pub enum Warning {
13 ControlCharacterWhileParsingString(usize),
15
16 DecodeUtf16(usize, u16),
18
19 InvalidEscape(usize),
21
22 UnexpectedEndOfString(usize),
24}
25
26impl crate::Warning for Warning {
27 fn id(&self) -> crate::SmartString {
29 match self {
30 Warning::ControlCharacterWhileParsingString(_) => {
31 "control_character_while_parsing_string".into()
32 }
33 Warning::DecodeUtf16(..) => "decode_utf_1_6".into(),
34 Warning::InvalidEscape(_) => "invalid_escape".into(),
35 Warning::UnexpectedEndOfString(_) => "unexpected_end_of_string".into(),
36 }
37 }
38}
39
40impl fmt::Display for Warning {
41 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
42 match self {
43 Warning::ControlCharacterWhileParsingString(index) => {
44 write!(
45 f,
46 "Control chars were found at index `{index}` while decoding a JSON string."
47 )
48 }
49 Warning::DecodeUtf16(index, code) => {
50 write!(
51 f,
52 "A UTF-16 surrogate pair `{code}` failed to decode at index: `{index}`."
53 )
54 }
55 Warning::InvalidEscape(index) => {
56 write!(
57 f,
58 "String contains an invalid escape char at index: `{index})`."
59 )
60 }
61 Warning::UnexpectedEndOfString(index) => {
62 write!(f, "The String ended prematurely at index: `{index}`.")
63 }
64 }
65 }
66}
67
68pub(crate) fn analyze<'buf>(
70 s: &'buf str,
71 elem: &Element<'buf>,
72) -> Caveat<PendingStr<'buf>, Warning> {
73 let mut warnings = warning::Set::new();
74
75 if s.chars().any(|ch| ch == ESCAPE_CHAR) {
78 PendingStr::HasEscapes(EscapeStr(s)).into_caveat(warnings)
79 } else {
80 if let Some((index, _)) = s.char_indices().find(|(_, ch)| ch.is_control()) {
81 warnings.with_elem(Warning::ControlCharacterWhileParsingString(index), elem);
82 }
83
84 PendingStr::NoEscapes(s).into_caveat(warnings)
85 }
86}
87
88pub(crate) enum PendingStr<'buf> {
90 NoEscapes(&'buf str),
92
93 HasEscapes(EscapeStr<'buf>),
95}
96
97into_caveat!(PendingStr<'buf>);
98
99pub(crate) struct EscapeStr<'buf>(&'buf str);
101
102impl<'buf> EscapeStr<'buf> {
103 pub(crate) fn decode_escapes(&self, elem: &Element<'buf>) -> Caveat<Cow<'buf, str>, Warning> {
104 unescape_str(self.0, elem)
105 }
106
107 pub(crate) fn into_raw(self) -> &'buf str {
109 self.0
110 }
111}
112
113pub(crate) fn unescape_str<'buf>(
119 s: &'buf str,
120 elem: &Element<'buf>,
121) -> Caveat<Cow<'buf, str>, Warning> {
122 let mut warnings = warning::Set::new();
123
124 if !s.chars().any(|ch| ch == ESCAPE_CHAR) {
126 if let Some((index, _)) = s.char_indices().find(|(_, ch)| ch.is_control()) {
127 warnings.with_elem(Warning::ControlCharacterWhileParsingString(index), elem);
128 }
129 return Cow::Borrowed(s).into_caveat(warnings);
130 }
131
132 let mut chars = Chars::from_str(s);
133 let mut buf = Buffer::with_capacity(s.len());
134
135 loop {
136 let Some((index, ch)) = chars.next() else {
137 return Cow::<'buf, str>::Owned(buf.into_string()).into_caveat(warnings);
138 };
139
140 if ch == ESCAPE_CHAR {
141 if let Err(warn_kind) = parse_escape(&mut chars, &mut buf) {
142 warnings.with_elem(warn_kind, elem);
143 return Cow::Borrowed(s).into_caveat(warnings);
144 }
145 } else if let Err(warn_kind) = buf.push_char(ch, index) {
146 warnings.with_elem(warn_kind, elem);
147 return Cow::Borrowed(s).into_caveat(warnings);
148 }
149 }
150}
151
152fn parse_escape(chars: &mut Chars<'_>, buf: &mut Buffer) -> Result<(), Warning> {
157 let (index, ch) = chars.next_or_eof()?;
158
159 let ch = match ch {
160 '"' => '"',
161 '\\' => '\\',
162 '/' => '/',
163 'b' => '\x08',
164 'f' => '\x0c',
165 'n' => '\n',
166 'r' => '\r',
167 't' => '\t',
168 'u' => return parse_unicode_escape(chars, buf),
169 _ => {
170 return Err(Warning::InvalidEscape(index));
171 }
172 };
173
174 buf.push_char(ch, index)?;
175
176 Ok(())
177}
178
179fn parse_unicode_escape(chars: &mut Chars<'_>, buf: &mut Buffer) -> Result<(), Warning> {
186 let n1 = decode_hex_escape(chars)?;
187 let n2 = chars.is_next_escape()?;
188
189 if let Some(n2) = n2 {
190 buf.push_surrogate_pair(n1, n2, chars.index)?;
191 } else {
192 let Some(ch) = char::from_u32(u32::from(n1)) else {
193 return Err(Warning::InvalidEscape(chars.index));
194 };
195
196 buf.push_char(ch, chars.index)?;
197 }
198
199 Ok(())
200}
201
202struct Chars<'buf> {
204 char_indices: Peekable<std::str::CharIndices<'buf>>,
210
211 index: usize,
213}
214
215impl<'buf> Chars<'buf> {
216 fn from_str(s: &'buf str) -> Self {
218 Self {
219 char_indices: s.char_indices().peekable(),
220 index: 0,
221 }
222 }
223
224 fn next_or_eof(&mut self) -> Result<(usize, char), Warning> {
227 if let Some((index, ch)) = self.next() {
228 if ch.is_control() {
229 return Err(Warning::ControlCharacterWhileParsingString(index));
230 }
231
232 Ok((index, ch))
233 } else {
234 Err(Warning::UnexpectedEndOfString(self.index))
235 }
236 }
237
238 fn is_next_escape(&mut self) -> Result<Option<u16>, Warning> {
241 {
242 let escape_char = self.char_indices.next_if(|(_, ch)| *ch == ESCAPE_CHAR);
243
244 if escape_char.is_none() {
245 return Ok(None);
246 }
247 }
248
249 {
250 let escape_unicode = self.char_indices.next_if(|(_, ch)| *ch == 'u');
251
252 if escape_unicode.is_none() {
253 return Ok(None);
254 }
255 }
256
257 let n = decode_hex_escape(self)?;
258 Ok(Some(n))
259 }
260}
261
262impl Iterator for Chars<'_> {
263 type Item = (usize, char);
264
265 fn next(&mut self) -> Option<Self::Item> {
266 if let Some((index, char)) = self.char_indices.next() {
267 self.index = index;
268 Some((index, char))
269 } else {
270 None
271 }
272 }
273}
274
275struct Buffer {
280 buf: String,
282}
283
284impl Buffer {
285 fn with_capacity(capacity: usize) -> Self {
287 Self {
288 buf: String::with_capacity(capacity),
289 }
290 }
291
292 fn push_char(&mut self, ch: char, index: usize) -> Result<(), Warning> {
297 if ch.is_control() {
298 return Err(Warning::ControlCharacterWhileParsingString(index));
299 }
300
301 self.buf.push(ch);
302 Ok(())
303 }
304
305 fn into_string(self) -> String {
307 self.buf
308 }
309
310 fn push_surrogate_pair(&mut self, n1: u16, n2: u16, index: usize) -> Result<char, Warning> {
316 let Some(ch) = char::decode_utf16([n1, n2]).next() else {
317 return Err(Warning::InvalidEscape(index));
318 };
319
320 let ch = match ch {
321 Ok(ch) => ch,
322 Err(err) => {
323 return Err(Warning::DecodeUtf16(index, err.unpaired_surrogate()));
324 }
325 };
326
327 self.push_char(ch, index)?;
328
329 Ok(ch)
330 }
331}
332
333fn decode_hex_escape(chars: &mut Chars<'_>) -> Result<u16, Warning> {
335 const RADIX: u32 = 16;
336
337 let (_, one) = chars.next_or_eof()?;
338 let (_, two) = chars.next_or_eof()?;
339 let (_, three) = chars.next_or_eof()?;
340 let (index, four) = chars.next_or_eof()?;
341
342 let string = [one, two, three, four].into_iter().collect::<String>();
343 let Ok(n) = u16::from_str_radix(&string, RADIX) else {
344 return Err(Warning::InvalidEscape(index));
345 };
346
347 Ok(n)
348}
349
350#[cfg(test)]
351mod test_unescape {
352 #![allow(
353 clippy::indexing_slicing,
354 reason = "unwraps are allowed anywhere in tests"
355 )]
356
357 use std::{borrow::Cow, sync::Arc};
358
359 use assert_matches::assert_matches;
360
361 use crate::json;
362
363 use super::{unescape_str, Warning};
364
365 fn test_elem() -> json::Element<'static> {
366 json::Element {
367 id: 0.into(),
368 path_node: Arc::new(json::PathNode::Root),
369 span: json::parser::Span::default(),
370 value: json::Value::Null,
371 }
372 }
373
374 #[test]
375 fn should_unescape_empty_str() {
376 const INPUT: &str = "";
377
378 let elem = test_elem();
379 let (string, warnings) = unescape_str(INPUT, &elem).into_parts();
380 assert_matches!(string, Cow::Borrowed(""));
381 assert!(warnings.is_empty(), "{warnings:#?}");
382 }
383
384 #[test]
385 fn should_unescape_str_without_escapes() {
386 const INPUT: &str = "ab";
387
388 let elem = test_elem();
389 let (string, warnings) = unescape_str(INPUT, &elem).into_parts();
390 assert_matches!(string, Cow::Borrowed(INPUT));
391 assert!(warnings.is_empty(), "{warnings:#?}");
392 }
393
394 #[test]
395 fn should_unescape_str_with_forward_slash_escape() {
396 const INPUT: &str = r"a\/b";
397
398 let elem = test_elem();
399 let (string, warnings) = unescape_str(INPUT, &elem).into_parts();
400 let s = assert_matches!(
401 string,
402 Cow::Owned(s) => s
403 );
404
405 assert_eq!(s, "a/b");
406 assert!(warnings.is_empty(), "{warnings:#?}");
407 }
408
409 #[test]
410 fn should_unescape_str_with_many_escapes() {
411 const INPUT: &str = r#"a\/\"b\""#;
412
413 let elem = test_elem();
414 let (string, warnings) = unescape_str(INPUT, &elem).into_parts();
415 let s = assert_matches!(
416 string,
417 Cow::Owned(s) => s
418 );
419
420 assert_eq!(s, r#"a/"b""#);
421 assert!(warnings.is_empty(), "{warnings:#?}");
422 }
423
424 #[test]
425 fn should_fail_to_unescape_str_with_invalid_escape() {
426 {
427 const INPUT: &str = r"\a/c";
428
429 let elem = test_elem();
430 let (string, warnings) = unescape_str(INPUT, &elem).into_parts();
431 let warnings = warnings.into_path_map();
432 let warnings = &warnings["$"];
433
434 assert_matches!(string, Cow::Borrowed(_));
435 assert_matches!(warnings.as_slice(), [Warning::InvalidEscape(1)]);
436 }
437
438 {
439 const INPUT: &str = r"a\c";
440
441 let elem = test_elem();
442 let (string, warnings) = unescape_str(INPUT, &elem).into_parts();
443 let warnings = warnings.into_path_map();
444 let warnings = &warnings["$"];
445
446 assert_matches!(string, Cow::Borrowed(_));
447 assert_matches!(warnings.as_slice(), [Warning::InvalidEscape(2)]);
448 }
449
450 {
451 const INPUT: &str = r"a/c\";
452
453 let elem = test_elem();
454 let (string, warnings) = unescape_str(INPUT, &elem).into_parts();
455 let warnings = warnings.into_path_map();
456 let warnings = &warnings["$"];
457
458 assert_matches!(string, Cow::Borrowed(_));
459 assert_matches!(warnings.as_slice(), [Warning::UnexpectedEndOfString(3)]);
460 }
461 }
462
463 #[test]
464 fn should_fail_to_unescape_str_with_control_char() {
465 const INPUT: &str = "hello\u{0019}world";
466
467 let elem = test_elem();
468 let (string, warnings) = unescape_str(INPUT, &elem).into_parts();
469 let warnings = warnings.into_path_map();
470 let warnings = &warnings["$"];
471
472 assert_matches!(string, Cow::Borrowed(_));
473 assert_matches!(
474 warnings.as_slice(),
475 [Warning::ControlCharacterWhileParsingString(5)]
476 );
477 }
478
479 #[test]
480 fn should_fail_to_unescape_raw_str_with_rust_unicode_literal_control_char() {
481 const INPUT: &str = r"hello\u{0019}world";
482
483 let elem = test_elem();
484 let (string, warnings) = unescape_str(INPUT, &elem).into_parts();
485 let warnings = warnings.into_path_map();
486 let warnings = &warnings["$"];
487
488 assert_matches!(string, Cow::Borrowed(_));
489 assert_matches!(warnings.as_slice(), [Warning::InvalidEscape(10)]);
490 }
491
492 #[test]
493 fn should_fail_to_unescape_json_control_escape() {
494 const INPUT: &str = r"hello\u0019world";
495
496 let elem = test_elem();
497 let (string, warnings) = unescape_str(INPUT, &elem).into_parts();
498 let warnings = warnings.into_path_map();
499 let warnings = &warnings["$"];
500
501 assert_matches!(string, Cow::Borrowed(_));
502 assert_matches!(
503 warnings.as_slice(),
504 [Warning::ControlCharacterWhileParsingString(10)]
505 );
506 }
507
508 #[test]
509 fn should_unescape_unicode_literals() {
510 const INPUT: &str = r"hello\u0020world\u0021";
511
512 let elem = test_elem();
513 let (string, warnings) = unescape_str(INPUT, &elem).into_parts();
514
515 let s = assert_matches!(
516 string,
517 Cow::Owned(s) => s
518 );
519 assert_eq!(s, "hello world!");
520 assert!(warnings.is_empty(), "{warnings:#?}");
521 }
522
523 #[test]
524 fn should_unescape_utf_16_surrogate_pair() {
525 const INPUT: &str = r"hello\uD834\uDD1Eworld";
529
530 let elem = test_elem();
531 let (string, warnings) = unescape_str(INPUT, &elem).into_parts();
532
533 let s = assert_matches!(
534 string,
535 Cow::Owned(s) => s
536 );
537 assert_eq!(s, "hello\u{1D11E}world");
538 assert!(warnings.is_empty(), "{warnings:#?}");
539 }
540
541 #[test]
542 fn should_unescape_unicode_literal_followed_by_simple_escape() {
543 const INPUT: &str = r"hello\u0020\/world\u0021";
544
545 let elem = test_elem();
546 let (string, warnings) = unescape_str(INPUT, &elem).into_parts();
547
548 let s = assert_matches!(
549 string,
550 Cow::Owned(s) => s
551 );
552 assert_eq!(s, "hello /world!");
553 assert!(warnings.is_empty(), "{warnings:#?}");
554 }
555}