1use std::{borrow::Cow, fmt, iter::Peekable};
2
3use crate::{warning, Caveat, IntoCaveat, IntoWarning};
4
5use super::Element;
6
7const ESCAPE_CHAR: char = '\\';
8
9#[derive(Debug, Eq, PartialEq, Ord, PartialOrd)]
11pub enum WarningKind {
12 ControlCharacterWhileParsingString(usize),
14
15 DecodeUtf16(usize, u16),
17
18 InvalidEscape(usize),
20
21 UnexpectedEndOfString(usize),
23}
24
25impl warning::Kind for WarningKind {
26 fn id(&self) -> Cow<'static, str> {
28 match self {
29 WarningKind::ControlCharacterWhileParsingString(_) => {
30 "control_character_while_parsing_string".into()
31 }
32 WarningKind::DecodeUtf16(..) => "decode_utf_1_6".into(),
33 WarningKind::InvalidEscape(_) => "invalid_escape".into(),
34 WarningKind::UnexpectedEndOfString(_) => "unexpected_end_of_string".into(),
35 }
36 }
37}
38
39impl fmt::Display for WarningKind {
40 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
41 match self {
42 WarningKind::ControlCharacterWhileParsingString(index) => {
43 write!(
44 f,
45 "Control chars were found at index `{index}` while decoding a JSON string."
46 )
47 }
48 WarningKind::DecodeUtf16(index, code) => {
49 write!(
50 f,
51 "A UTF-16 surrogate pair `{code}` failed to decode at index: `{index}`."
52 )
53 }
54 WarningKind::InvalidEscape(index) => {
55 write!(
56 f,
57 "String contains an invalid escape char at index: `{index})`."
58 )
59 }
60 WarningKind::UnexpectedEndOfString(index) => {
61 write!(f, "The String ended prematurely at index: `{index}`.")
62 }
63 }
64 }
65}
66
67pub(crate) fn analyze<'buf>(
69 s: &'buf str,
70 elem: &Element<'buf>,
71) -> Caveat<PendingStr<'buf>, WarningKind> {
72 let mut warnings = warning::Set::new();
73
74 if s.chars().any(|ch| ch == ESCAPE_CHAR) {
77 PendingStr::HasEscapes(EscapeStr(s)).into_caveat(warnings)
78 } else {
79 if let Some((index, _)) = s.char_indices().find(|(_, ch)| ch.is_control()) {
80 warnings
81 .push(WarningKind::ControlCharacterWhileParsingString(index).into_warning(elem));
82 }
83
84 PendingStr::NoEscapes(s).into_caveat(warnings)
85 }
86}
87
88pub(crate) enum PendingStr<'buf> {
90 NoEscapes(&'buf str),
92
93 #[allow(dead_code, reason = "pending use in `tariff::lint`")]
95 HasEscapes(EscapeStr<'buf>),
96}
97
98impl IntoCaveat for PendingStr<'_> {
99 fn into_caveat<W: warning::Kind>(self, warnings: warning::Set<W>) -> Caveat<Self, W> {
100 Caveat::new(self, warnings)
101 }
102}
103
104pub(crate) struct EscapeStr<'buf>(&'buf str);
106
107impl<'buf> EscapeStr<'buf> {
108 #[allow(dead_code, reason = "pending use in `tariff::lint`")]
109 pub(crate) fn decode_escapes(
110 &self,
111 elem: &Element<'buf>,
112 ) -> Caveat<Cow<'buf, str>, WarningKind> {
113 unescape_str(self.0, elem)
114 }
115}
116
117pub(crate) fn unescape_str<'buf>(
123 s: &'buf str,
124 elem: &Element<'buf>,
125) -> Caveat<Cow<'buf, str>, WarningKind> {
126 let mut warnings = warning::Set::new();
127
128 if !s.chars().any(|ch| ch == ESCAPE_CHAR) {
130 if let Some((index, _)) = s.char_indices().find(|(_, ch)| ch.is_control()) {
131 warnings
132 .push(WarningKind::ControlCharacterWhileParsingString(index).into_warning(elem));
133 }
134 return Cow::Borrowed(s).into_caveat(warnings);
135 }
136
137 let mut chars = Chars::from_str(s);
138 let mut buf = Buffer::with_capacity(s.len());
139
140 loop {
141 let Some((index, ch)) = chars.next() else {
142 return Cow::<'buf, str>::Owned(buf.into_string()).into_caveat(warnings);
143 };
144
145 if ch == ESCAPE_CHAR {
146 if let Err(warn_kind) = parse_escape(&mut chars, &mut buf) {
147 warnings.push(warn_kind.into_warning(elem));
148 return Cow::Borrowed(s).into_caveat(warnings);
149 }
150 } else if let Err(warn_kind) = buf.push_char(ch, index) {
151 warnings.push(warn_kind.into_warning(elem));
152 return Cow::Borrowed(s).into_caveat(warnings);
153 }
154 }
155}
156
157fn parse_escape(chars: &mut Chars<'_>, buf: &mut Buffer) -> Result<(), WarningKind> {
162 let (index, ch) = chars.next_or_eof()?;
163
164 let ch = match ch {
165 '"' => '"',
166 '\\' => '\\',
167 '/' => '/',
168 'b' => '\x08',
169 'f' => '\x0c',
170 'n' => '\n',
171 'r' => '\r',
172 't' => '\t',
173 'u' => return parse_unicode_escape(chars, buf),
174 _ => {
175 return Err(WarningKind::InvalidEscape(index));
176 }
177 };
178
179 buf.push_char(ch, index)?;
180
181 Ok(())
182}
183
184fn parse_unicode_escape(chars: &mut Chars<'_>, buf: &mut Buffer) -> Result<(), WarningKind> {
191 let n1 = decode_hex_escape(chars)?;
192 let n2 = chars.is_next_escape()?;
193
194 if let Some(n2) = n2 {
195 buf.push_surrogate_pair(n1, n2, chars.index)?;
196 } else {
197 let Some(ch) = char::from_u32(u32::from(n1)) else {
198 return Err(WarningKind::InvalidEscape(chars.index));
199 };
200
201 buf.push_char(ch, chars.index)?;
202 }
203
204 Ok(())
205}
206
207struct Chars<'buf> {
209 char_indices: Peekable<std::str::CharIndices<'buf>>,
215
216 index: usize,
218}
219
220impl<'buf> Chars<'buf> {
221 fn from_str(s: &'buf str) -> Self {
223 Self {
224 char_indices: s.char_indices().peekable(),
225 index: 0,
226 }
227 }
228
229 fn next_or_eof(&mut self) -> Result<(usize, char), WarningKind> {
232 if let Some((index, ch)) = self.next() {
233 if ch.is_control() {
234 return Err(WarningKind::ControlCharacterWhileParsingString(index));
235 }
236
237 Ok((index, ch))
238 } else {
239 Err(WarningKind::UnexpectedEndOfString(self.index))
240 }
241 }
242
243 fn is_next_escape(&mut self) -> Result<Option<u16>, WarningKind> {
246 {
247 let escape_char = self.char_indices.next_if(|(_, ch)| *ch == ESCAPE_CHAR);
248
249 if escape_char.is_none() {
250 return Ok(None);
251 }
252 }
253
254 {
255 let escape_unicode = self.char_indices.next_if(|(_, ch)| *ch == 'u');
256
257 if escape_unicode.is_none() {
258 return Ok(None);
259 }
260 }
261
262 let n = decode_hex_escape(self)?;
263 Ok(Some(n))
264 }
265}
266
267impl Iterator for Chars<'_> {
268 type Item = (usize, char);
269
270 fn next(&mut self) -> Option<Self::Item> {
271 if let Some((index, char)) = self.char_indices.next() {
272 self.index = index;
273 Some((index, char))
274 } else {
275 None
276 }
277 }
278}
279
280struct Buffer {
285 buf: String,
287}
288
289impl Buffer {
290 fn with_capacity(capacity: usize) -> Self {
292 Self {
293 buf: String::with_capacity(capacity),
294 }
295 }
296
297 fn push_char(&mut self, ch: char, index: usize) -> Result<(), WarningKind> {
302 if ch.is_control() {
303 return Err(WarningKind::ControlCharacterWhileParsingString(index));
304 }
305
306 self.buf.push(ch);
307 Ok(())
308 }
309
310 fn into_string(self) -> String {
312 self.buf
313 }
314
315 fn push_surrogate_pair(&mut self, n1: u16, n2: u16, index: usize) -> Result<char, WarningKind> {
321 let Some(ch) = char::decode_utf16([n1, n2]).next() else {
322 return Err(WarningKind::InvalidEscape(index));
323 };
324
325 let ch = match ch {
326 Ok(ch) => ch,
327 Err(err) => {
328 return Err(WarningKind::DecodeUtf16(index, err.unpaired_surrogate()));
329 }
330 };
331
332 self.push_char(ch, index)?;
333
334 Ok(ch)
335 }
336}
337
338fn decode_hex_escape(chars: &mut Chars<'_>) -> Result<u16, WarningKind> {
340 const RADIX: u32 = 16;
341
342 let (_, one) = chars.next_or_eof()?;
343 let (_, two) = chars.next_or_eof()?;
344 let (_, three) = chars.next_or_eof()?;
345 let (index, four) = chars.next_or_eof()?;
346
347 let string = [one, two, three, four].into_iter().collect::<String>();
348 let Ok(n) = u16::from_str_radix(&string, RADIX) else {
349 return Err(WarningKind::InvalidEscape(index));
350 };
351
352 Ok(n)
353}
354
355#[cfg(test)]
356mod test_unescape {
357 use std::{borrow::Cow, rc::Rc};
358
359 use assert_matches::assert_matches;
360
361 use crate::{json, Warning};
362
363 use super::{unescape_str, WarningKind};
364
365 fn test_elem() -> json::Element<'static> {
366 json::Element {
367 id: 0.into(),
368 path_node: Rc::new(json::PathNode::Root),
369 span: json::parser::Span::default(),
370 value: json::Value::Null,
371 }
372 }
373
374 #[test]
375 fn should_unescape_empty_str() {
376 const INPUT: &str = "";
377
378 let elem = test_elem();
379 let (string, warnings) = unescape_str(INPUT, &elem).into_parts();
380 assert_matches!(string, Cow::Borrowed(""));
381 assert_matches!(warnings.as_slice(), []);
382 }
383
384 #[test]
385 fn should_unescape_str_without_escapes() {
386 const INPUT: &str = "ab";
387
388 let elem = test_elem();
389 let (string, warnings) = unescape_str(INPUT, &elem).into_parts();
390 assert_matches!(string, Cow::Borrowed(INPUT));
391 assert_matches!(warnings.as_slice(), []);
392 }
393
394 #[test]
395 fn should_unescape_str_with_forward_slash_escape() {
396 const INPUT: &str = r"a\/b";
397
398 let elem = test_elem();
399 let (string, warnings) = unescape_str(INPUT, &elem).into_parts();
400 let s = assert_matches!(
401 string,
402 Cow::Owned(s) => s
403 );
404
405 assert_eq!(s, "a/b");
406 assert_matches!(warnings.as_slice(), []);
407 }
408
409 #[test]
410 fn should_unescape_str_with_many_escapes() {
411 const INPUT: &str = r#"a\/\"b\""#;
412
413 let elem = test_elem();
414 let (string, warnings) = unescape_str(INPUT, &elem).into_parts();
415 let s = assert_matches!(
416 string,
417 Cow::Owned(s) => s
418 );
419
420 assert_eq!(s, r#"a/"b""#);
421 assert_matches!(warnings.as_slice(), []);
422 }
423
424 #[test]
425 fn should_fail_to_unescape_str_with_invalid_escape() {
426 {
427 const INPUT: &str = r"\a/c";
428
429 let elem = test_elem();
430 let (string, warnings) = unescape_str(INPUT, &elem).into_parts();
431
432 assert_matches!(string, Cow::Borrowed(_));
433 assert_matches!(
434 warnings.as_slice(),
435 [Warning {
436 kind: WarningKind::InvalidEscape(1),
437 ..
438 }]
439 );
440 }
441
442 {
443 const INPUT: &str = r"a\c";
444
445 let elem = test_elem();
446 let (string, warnings) = unescape_str(INPUT, &elem).into_parts();
447
448 assert_matches!(string, Cow::Borrowed(_));
449 assert_matches!(
450 warnings.as_slice(),
451 [Warning {
452 kind: WarningKind::InvalidEscape(2),
453 ..
454 }]
455 );
456 }
457
458 {
459 const INPUT: &str = r"a/c\";
460
461 let elem = test_elem();
462 let (string, warnings) = unescape_str(INPUT, &elem).into_parts();
463
464 assert_matches!(string, Cow::Borrowed(_));
465 assert_matches!(
466 warnings.as_slice(),
467 [Warning {
468 kind: WarningKind::UnexpectedEndOfString(3),
469 ..
470 }]
471 );
472 }
473 }
474
475 #[test]
476 fn should_fail_to_unescape_str_with_control_char() {
477 const INPUT: &str = "hello\u{0019}world";
478
479 let elem = test_elem();
480 let (string, warnings) = unescape_str(INPUT, &elem).into_parts();
481
482 assert_matches!(string, Cow::Borrowed(_));
483 assert_matches!(
484 warnings.as_slice(),
485 [Warning {
486 kind: WarningKind::ControlCharacterWhileParsingString(5),
487 ..
488 }]
489 );
490 }
491
492 #[test]
493 fn should_fail_to_unescape_raw_str_with_rust_unicode_literal_control_char() {
494 const INPUT: &str = r"hello\u{0019}world";
495
496 let elem = test_elem();
497 let (string, warnings) = unescape_str(INPUT, &elem).into_parts();
498
499 assert_matches!(string, Cow::Borrowed(_));
500 assert_matches!(
501 warnings.as_slice(),
502 [Warning {
503 kind: WarningKind::InvalidEscape(10),
504 ..
505 }]
506 );
507 }
508
509 #[test]
510 fn should_fail_to_unescape_json_control_escape() {
511 const INPUT: &str = r"hello\u0019world";
512
513 let elem = test_elem();
514 let (string, warnings) = unescape_str(INPUT, &elem).into_parts();
515
516 assert_matches!(string, Cow::Borrowed(_));
517 assert_matches!(
518 warnings.as_slice(),
519 [Warning {
520 kind: WarningKind::ControlCharacterWhileParsingString(10),
521 ..
522 }]
523 );
524 }
525
526 #[test]
527 fn should_unescape_unicode_literals() {
528 const INPUT: &str = r"hello\u0020world\u0021";
529
530 let elem = test_elem();
531 let (string, warnings) = unescape_str(INPUT, &elem).into_parts();
532
533 let s = assert_matches!(
534 string,
535 Cow::Owned(s) => s
536 );
537 assert_eq!(s, "hello world!");
538 assert_matches!(warnings.as_slice(), []);
539 }
540
541 #[test]
542 fn should_unescape_utf_16_surrogate_pair() {
543 const INPUT: &str = r"hello\uD834\uDD1Eworld";
547
548 let elem = test_elem();
549 let (string, warnings) = unescape_str(INPUT, &elem).into_parts();
550
551 let s = assert_matches!(
552 string,
553 Cow::Owned(s) => s
554 );
555 assert_eq!(s, "hello\u{1D11E}world");
556 assert_matches!(warnings.as_slice(), []);
557 }
558
559 #[test]
560 fn should_unescape_unicode_literal_followed_by_simple_escape() {
561 const INPUT: &str = r"hello\u0020\/world\u0021";
562
563 let elem = test_elem();
564 let (string, warnings) = unescape_str(INPUT, &elem).into_parts();
565
566 let s = assert_matches!(
567 string,
568 Cow::Owned(s) => s
569 );
570 assert_eq!(s, "hello /world!");
571 assert_matches!(warnings.as_slice(), []);
572 }
573}