1#[cfg(test)]
16mod test_from_str;
17
18use std::{borrow::Cow, fmt, iter::Peekable, ops::RangeInclusive};
19
20use super::Element;
21use crate::{
22 warning::{self, CaveatDeferred, IntoCaveatDeferred as _},
23 Caveat, IntoCaveat as _,
24};
25
26const ESCAPE_CHAR: char = '\\';
27
28#[derive(Debug, Eq, PartialEq, Ord, PartialOrd)]
30pub enum Warning {
31 ControlCharacter(usize),
33
34 DecodeUtf16(usize, u16),
36
37 InvalidEscape(usize),
39
40 UnexpectedEndOfString(usize),
42}
43
44impl crate::Warning for Warning {
45 fn id(&self) -> warning::Id {
47 match self {
48 Self::ControlCharacter(_) => {
49 warning::Id::from_static("control_character_while_parsing_string")
50 }
51 Self::DecodeUtf16(..) => warning::Id::from_static("decode_utf_1_6"),
52 Self::InvalidEscape(_) => warning::Id::from_static("invalid_escape"),
53 Self::UnexpectedEndOfString(_) => warning::Id::from_static("unexpected_end_of_string"),
54 }
55 }
56}
57
58impl fmt::Display for Warning {
59 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
60 match self {
61 Self::ControlCharacter(index) => {
62 write!(
63 f,
64 "Control chars were found at index `{index}` while decoding a JSON string."
65 )
66 }
67 Self::DecodeUtf16(index, code) => {
68 write!(
69 f,
70 "A UTF-16 surrogate pair `{code}` failed to decode at index: `{index}`."
71 )
72 }
73 Self::InvalidEscape(index) => {
74 write!(
75 f,
76 "String contains an invalid escape char at index: `{index})`."
77 )
78 }
79 Self::UnexpectedEndOfString(index) => {
80 write!(f, "The String ended prematurely at index: `{index}`.")
81 }
82 }
83 }
84}
85
86pub(super) fn analyze<'buf>(
88 s: &'buf str,
89 elem: &Element<'buf>,
90) -> Caveat<super::PendingStr<'buf>, Warning> {
91 let mut warnings = warning::Set::new();
92
93 if let Some((index, _)) = s.char_indices().find(|(_, ch)| ch.is_control()) {
96 warnings.insert(elem, Warning::ControlCharacter(index));
97 }
98
99 if s.chars().any(|ch| ch == ESCAPE_CHAR) {
100 super::PendingStr::HasEscapes(super::EscapeStr(s)).into_caveat(warnings)
101 } else {
102 super::PendingStr::NoEscapes(s).into_caveat(warnings)
103 }
104}
105
106pub(super) fn from_raw<'buf>(s: &'buf str) -> CaveatDeferred<Cow<'buf, str>, Warning> {
112 let mut warnings = warning::SetDeferred::new();
113
114 if !s.chars().any(|ch| ch == ESCAPE_CHAR) {
117 if let Some((index, _)) = s.char_indices().find(|(_, ch)| ch.is_control()) {
118 warnings.insert(Warning::ControlCharacter(index));
119 }
120 return Cow::Borrowed(s).into_caveat_deferred(warnings);
121 }
122
123 let mut buf = Buffer::with_capacity(s.len());
124 for decoded in Decoded::from_str(s) {
125 match decoded {
126 Ok(ch) => buf.push(ch),
127 Err(warn_kind) => {
128 warnings.insert(warn_kind);
129 return Cow::Borrowed(s).into_caveat_deferred(warnings);
130 }
131 }
132 }
133
134 Cow::<'buf, str>::Owned(buf.into_string()).into_caveat_deferred(warnings)
135}
136
137pub(super) fn eq(raw: &str, other: &str) -> Result<bool, Warning> {
145 let mut decoded = Decoded::from_str(raw);
146 let mut expected = other.chars();
147
148 loop {
149 match decoded.next() {
150 Some(Err(warn_kind)) => return Err(warn_kind),
151 Some(Ok(actual)) => {
152 if expected.next() != Some(actual) {
153 return Ok(false);
154 }
155 }
156 None => return Ok(expected.next().is_none()),
158 }
159 }
160}
161
162pub(super) fn eq_ignore_ascii_case(raw: &str, other: &str) -> Result<bool, Warning> {
164 let mut decoded = Decoded::from_str(raw);
165 let mut expected = other.chars();
166
167 loop {
168 match decoded.next() {
169 Some(Err(warn_kind)) => return Err(warn_kind),
170 Some(Ok(actual)) => match expected.next() {
171 Some(expected) if expected.eq_ignore_ascii_case(&actual) => {}
172 _ => return Ok(false),
173 },
174 None => return Ok(expected.next().is_none()),
176 }
177 }
178}
179
180fn parse_escape(chars: &mut Chars<'_>) -> Result<char, Warning> {
185 let (index, ch) = chars.next_or_eof()?;
186
187 let ch = match ch {
188 '"' => '"',
189 '\\' => '\\',
190 '/' => '/',
191 'b' => '\x08',
192 'f' => '\x0c',
193 'n' => '\n',
194 'r' => '\r',
195 't' => '\t',
196 'u' => return parse_unicode_escape(chars),
197 _ => {
198 return Err(Warning::InvalidEscape(index));
199 }
200 };
201
202 if ch.is_control() {
203 return Err(Warning::ControlCharacter(index));
204 }
205
206 Ok(ch)
207}
208
209fn parse_unicode_escape(chars: &mut Chars<'_>) -> Result<char, Warning> {
216 const HIGH_SURROGATE: RangeInclusive<u16> = 0xD800..=0xDBFF;
221
222 let n1 = decode_hex_escape(chars)?;
223
224 let ch = if HIGH_SURROGATE.contains(&n1) {
225 let Some(n2) = chars.is_next_escape()? else {
230 return Err(Warning::InvalidEscape(chars.index));
231 };
232 decode_surrogate_pair(n1, n2, chars.index)?
233 } else {
234 let Some(ch) = char::from_u32(u32::from(n1)) else {
235 return Err(Warning::InvalidEscape(chars.index));
236 };
237 ch
238 };
239
240 if ch.is_control() {
241 return Err(Warning::ControlCharacter(chars.index));
242 }
243
244 Ok(ch)
245}
246
247struct Chars<'buf> {
249 char_indices: Peekable<std::str::CharIndices<'buf>>,
255
256 push_back: Option<(usize, char)>,
259
260 index: usize,
262}
263
264impl<'buf> Chars<'buf> {
265 fn from_str(s: &'buf str) -> Self {
267 Self {
268 char_indices: s.char_indices().peekable(),
269 push_back: None,
270 index: 0,
271 }
272 }
273
274 fn next_or_eof(&mut self) -> Result<(usize, char), Warning> {
277 if let Some((index, ch)) = self.next() {
278 if ch.is_control() {
279 return Err(Warning::ControlCharacter(index));
280 }
281
282 Ok((index, ch))
283 } else {
284 Err(Warning::UnexpectedEndOfString(self.index))
285 }
286 }
287
288 fn is_next_escape(&mut self) -> Result<Option<u16>, Warning> {
294 let Some(backslash) = self.char_indices.next_if(|(_, ch)| *ch == ESCAPE_CHAR) else {
295 return Ok(None);
296 };
297
298 if self.char_indices.next_if(|(_, ch)| *ch == 'u').is_none() {
299 self.push_back = Some(backslash);
300 return Ok(None);
301 }
302
303 let n = decode_hex_escape(self)?;
304 Ok(Some(n))
305 }
306}
307
308impl Iterator for Chars<'_> {
309 type Item = (usize, char);
310
311 fn next(&mut self) -> Option<Self::Item> {
312 if let Some(item) = self.push_back.take() {
313 self.index = item.0;
314 return Some(item);
315 }
316 if let Some((index, char)) = self.char_indices.next() {
317 self.index = index;
318 Some((index, char))
319 } else {
320 None
321 }
322 }
323}
324
325struct Decoded<'buf> {
330 chars: Chars<'buf>,
331}
332
333impl<'buf> Decoded<'buf> {
334 fn from_str(s: &'buf str) -> Self {
336 Self {
337 chars: Chars::from_str(s),
338 }
339 }
340}
341
342impl Iterator for Decoded<'_> {
343 type Item = Result<char, Warning>;
344
345 fn next(&mut self) -> Option<Self::Item> {
346 let (index, ch) = self.chars.next()?;
347
348 if ch == ESCAPE_CHAR {
349 Some(parse_escape(&mut self.chars))
350 } else if ch.is_control() {
351 Some(Err(Warning::ControlCharacter(index)))
352 } else {
353 Some(Ok(ch))
354 }
355 }
356}
357
358struct Buffer {
360 buf: String,
362}
363
364impl Buffer {
365 fn with_capacity(capacity: usize) -> Self {
367 Self {
368 buf: String::with_capacity(capacity),
369 }
370 }
371
372 fn push(&mut self, ch: char) {
374 self.buf.push(ch);
375 }
376
377 fn into_string(self) -> String {
379 self.buf
380 }
381}
382
383fn decode_surrogate_pair(n1: u16, n2: u16, index: usize) -> Result<char, Warning> {
387 let Some(ch) = char::decode_utf16([n1, n2]).next() else {
388 return Err(Warning::InvalidEscape(index));
389 };
390
391 match ch {
392 Ok(ch) => Ok(ch),
393 Err(err) => Err(Warning::DecodeUtf16(index, err.unpaired_surrogate())),
394 }
395}
396
397fn decode_hex_escape(chars: &mut Chars<'_>) -> Result<u16, Warning> {
399 const RADIX: u32 = 16;
400
401 let (_, one) = chars.next_or_eof()?;
402 let (_, two) = chars.next_or_eof()?;
403 let (_, three) = chars.next_or_eof()?;
404 let (index, four) = chars.next_or_eof()?;
405
406 let string = [one, two, three, four].into_iter().collect::<String>();
407 let Ok(n) = u16::from_str_radix(&string, RADIX) else {
408 return Err(Warning::InvalidEscape(index));
409 };
410
411 Ok(n)
412}