tinyjson/
parser.rs

1use std::char;
2use std::collections::HashMap;
3use std::fmt;
4use std::iter::Peekable;
5use std::str::FromStr;
6
7use crate::JsonValue;
8
9/// Parse error.
10///
11/// ```
12/// use tinyjson::{JsonParser, JsonParseError};
13/// let error = JsonParser::new("[1, 2, 3".chars()).parse().unwrap_err();
14/// assert!(matches!(error, JsonParseError{..}));
15/// ```
16#[derive(Debug)]
17pub struct JsonParseError {
18    msg: String,
19    line: usize,
20    col: usize,
21}
22
23impl JsonParseError {
24    fn new(msg: String, line: usize, col: usize) -> JsonParseError {
25        JsonParseError { msg, line, col }
26    }
27
28    /// Get the line numbr where the parse error happened. This value is 1-based.
29    ///
30    /// ```
31    /// use tinyjson::{JsonParser, JsonParseError};
32    /// let error = JsonParser::new("[1, 2, 3".chars()).parse().unwrap_err();
33    /// assert_eq!(error.line(), 1);
34    /// ```
35    pub fn line(&self) -> usize {
36        self.line
37    }
38
39    /// Get the column numbr where the parse error happened. This value is 1-based.
40    ///
41    /// ```
42    /// use tinyjson::{JsonParser, JsonParseError};
43    /// let error = JsonParser::new("[1, 2, 3".chars()).parse().unwrap_err();
44    /// assert_eq!(error.column(), 8);
45    /// ```
46    pub fn column(&self) -> usize {
47        self.col
48    }
49}
50
51impl fmt::Display for JsonParseError {
52    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
53        write!(
54            f,
55            "Parse error at line:{}, col:{}: {}",
56            self.line, self.col, &self.msg,
57        )
58    }
59}
60
61impl std::error::Error for JsonParseError {}
62
63/// Convenient type alias for parse results.
64pub type JsonParseResult = Result<JsonValue, JsonParseError>;
65
66// Note: char::is_ascii_whitespace is not available because some characters are not defined as
67// whitespace character in JSON spec. For example, U+000C FORM FEED is whitespace in Rust but
68// it isn't in JSON.
69fn is_whitespace(c: char) -> bool {
70    match c {
71        '\u{0020}' | '\u{000a}' | '\u{000d}' | '\u{0009}' => true,
72        _ => false,
73    }
74}
75
76/// JSON parser to parse UTF-8 string into `JsonValue` value.
77///
78/// Basically you don't need to use this struct directly thanks to `FromStr` trait implementation.
79///
80/// ```
81/// use tinyjson::{JsonParser, JsonValue};
82///
83/// let mut parser = JsonParser::new("[1, 2, 3]".chars());
84/// let array = parser.parse().unwrap();
85///
86/// // Equivalent to the above code using `FromStr`
87/// let array: JsonValue = "[1, 2, 3]".parse().unwrap();
88/// ```
89pub struct JsonParser<I>
90where
91    I: Iterator<Item = char>,
92{
93    chars: Peekable<I>,
94    line: usize,
95    col: usize,
96}
97
98impl<I: Iterator<Item = char>> JsonParser<I> {
99    /// Create a new parser instance from an iterator which iterates characters. The iterator is usually built from
100    /// `str::chars` for parsing `str` or `String` values.
101    pub fn new(it: I) -> Self {
102        JsonParser {
103            chars: it.peekable(),
104            line: 1,
105            col: 0,
106        }
107    }
108
109    fn err<T>(&self, msg: String) -> Result<T, JsonParseError> {
110        Err(JsonParseError::new(msg, self.line, self.col))
111    }
112
113    fn unexpected_eof(&self) -> Result<char, JsonParseError> {
114        Err(JsonParseError::new(
115            String::from("Unexpected EOF"),
116            self.line,
117            self.col,
118        ))
119    }
120
121    fn next_pos(&mut self, c: char) {
122        if c == '\n' {
123            self.col = 0;
124            self.line += 1;
125        } else {
126            self.col += 1;
127        }
128    }
129
130    fn peek(&mut self) -> Result<char, JsonParseError> {
131        while let Some(c) = self.chars.peek().copied() {
132            if !is_whitespace(c) {
133                return Ok(c);
134            }
135            self.next_pos(c);
136            self.chars.next().unwrap();
137        }
138        self.unexpected_eof()
139    }
140
141    fn next(&mut self) -> Option<char> {
142        while let Some(c) = self.chars.next() {
143            self.next_pos(c);
144            if !is_whitespace(c) {
145                return Some(c);
146            }
147        }
148        None
149    }
150
151    fn consume(&mut self) -> Result<char, JsonParseError> {
152        if let Some(c) = self.next() {
153            Ok(c)
154        } else {
155            self.unexpected_eof()
156        }
157    }
158
159    fn consume_no_skip(&mut self) -> Result<char, JsonParseError> {
160        if let Some(c) = self.chars.next() {
161            self.next_pos(c);
162            Ok(c)
163        } else {
164            self.unexpected_eof()
165        }
166    }
167
168    fn parse_object(&mut self) -> JsonParseResult {
169        if self.consume()? != '{' {
170            return self.err(String::from("Object must starts with '{'"));
171        }
172
173        if self.peek()? == '}' {
174            self.consume().unwrap();
175            return Ok(JsonValue::Object(HashMap::new()));
176        }
177
178        let mut m = HashMap::new();
179        loop {
180            let key = match self.parse_any()? {
181                JsonValue::String(s) => s,
182                v => return self.err(format!("Key of object must be string but found {:?}", v)),
183            };
184
185            let c = self.consume()?;
186            if c != ':' {
187                return self.err(format!(
188                    "':' is expected after key of object but actually found '{}'",
189                    c
190                ));
191            }
192
193            m.insert(key, self.parse_any()?);
194
195            match self.consume()? {
196                ',' => {}
197                '}' => return Ok(JsonValue::Object(m)),
198                c => {
199                    return self.err(format!(
200                        "',' or '}}' is expected for object but actually found '{}'",
201                        c.escape_debug(),
202                    ))
203                }
204            }
205        }
206    }
207
208    fn parse_array(&mut self) -> JsonParseResult {
209        if self.consume()? != '[' {
210            return self.err(String::from("Array must starts with '['"));
211        }
212
213        if self.peek()? == ']' {
214            self.consume().unwrap();
215            return Ok(JsonValue::Array(vec![]));
216        }
217
218        let mut v = vec![self.parse_any()?];
219        loop {
220            match self.consume()? {
221                ',' => {}
222                ']' => return Ok(JsonValue::Array(v)),
223                c => {
224                    return self.err(format!(
225                        "',' or ']' is expected for array but actually found '{}'",
226                        c
227                    ))
228                }
229            }
230
231            v.push(self.parse_any()?); // Next element
232        }
233    }
234
235    fn push_utf16(&self, s: &mut String, utf16: &mut Vec<u16>) -> Result<(), JsonParseError> {
236        if utf16.is_empty() {
237            return Ok(());
238        }
239
240        match String::from_utf16(utf16) {
241            Ok(utf8) => s.push_str(&utf8),
242            Err(err) => return self.err(format!("Invalid UTF-16 sequence {:?}: {}", &utf16, err)),
243        }
244        utf16.clear();
245        Ok(())
246    }
247
248    fn parse_string(&mut self) -> JsonParseResult {
249        if self.consume()? != '"' {
250            return self.err(String::from("String must starts with double quote"));
251        }
252
253        let mut utf16 = Vec::new(); // Buffer for parsing \uXXXX UTF-16 characters
254        let mut s = String::new();
255        loop {
256            let c = match self.consume_no_skip()? {
257                '\\' => match self.consume_no_skip()? {
258                    '\\' => '\\',
259                    '/' => '/',
260                    '"' => '"',
261                    'b' => '\u{0008}',
262                    'f' => '\u{000c}',
263                    'n' => '\n',
264                    'r' => '\r',
265                    't' => '\t',
266                    'u' => {
267                        let mut u = 0u16;
268                        for _ in 0..4 {
269                            let c = self.consume()?;
270                            if let Some(h) = c.to_digit(16) {
271                                u = u * 0x10 + h as u16;
272                            } else {
273                                return self.err(format!("Unicode character must be \\uXXXX (X is hex character) format but found character '{}'", c));
274                            }
275                        }
276                        utf16.push(u);
277                        // Additional \uXXXX character may follow. UTF-16 characters must be converted
278                        // into UTF-8 string as sequence because surrogate pairs must be considered
279                        // like "\uDBFF\uDFFF".
280                        continue;
281                    }
282                    c => return self.err(format!("'\\{}' is invalid escaped character", c)),
283                },
284                '"' => {
285                    self.push_utf16(&mut s, &mut utf16)?;
286                    return Ok(JsonValue::String(s));
287                }
288                // Note: c.is_control() is not available here because JSON accepts 0x7f (DEL) in
289                // string literals but 0x7f is control character.
290                // Rough spec of JSON says string literal cannot contain control characters. But it
291                // can actually contain 0x7f.
292                c if (c as u32) < 0x20 => {
293                    return self.err(format!(
294                        "String cannot contain control character {}",
295                        c.escape_debug(),
296                    ));
297                }
298                c => c,
299            };
300
301            self.push_utf16(&mut s, &mut utf16)?;
302
303            s.push(c);
304        }
305    }
306
307    fn parse_constant(&mut self, s: &'static str) -> Option<JsonParseError> {
308        for c in s.chars() {
309            match self.consume_no_skip() {
310                Ok(x) if x != c => {
311                    return Some(JsonParseError::new(
312                        format!("Unexpected character '{}' while parsing '{}'", c, s),
313                        self.line,
314                        self.col,
315                    ));
316                }
317                Ok(_) => {}
318                Err(e) => return Some(e),
319            }
320        }
321        None
322    }
323
324    fn parse_null(&mut self) -> JsonParseResult {
325        match self.parse_constant("null") {
326            Some(err) => Err(err),
327            None => Ok(JsonValue::Null),
328        }
329    }
330
331    fn parse_true(&mut self) -> JsonParseResult {
332        match self.parse_constant("true") {
333            Some(err) => Err(err),
334            None => Ok(JsonValue::Boolean(true)),
335        }
336    }
337
338    fn parse_false(&mut self) -> JsonParseResult {
339        match self.parse_constant("false") {
340            Some(err) => Err(err),
341            None => Ok(JsonValue::Boolean(false)),
342        }
343    }
344
345    fn parse_number(&mut self) -> JsonParseResult {
346        let neg = if self.peek()? == '-' {
347            self.consume_no_skip().unwrap();
348            true
349        } else {
350            false
351        };
352
353        let mut s = String::new();
354        let mut saw_dot = false;
355        let mut saw_exp = false;
356
357        while let Some(d) = self.chars.peek() {
358            match d {
359                '0'..='9' => s.push(*d),
360                '.' => {
361                    saw_dot = true;
362                    break;
363                }
364                'e' | 'E' => {
365                    saw_exp = true;
366                    break;
367                }
368                _ => break,
369            }
370            self.consume_no_skip().unwrap();
371        }
372
373        if s.is_empty() {
374            return self.err("Integer part must not be empty in number literal".to_string());
375        }
376
377        if s.starts_with('0') && s.len() > 1 {
378            return self
379                .err("Integer part of number must not start with 0 except for '0'".to_string());
380        }
381
382        if saw_dot {
383            s.push(self.consume_no_skip().unwrap()); // eat '.'
384            while let Some(d) = self.chars.peek() {
385                match d {
386                    '0'..='9' => s.push(*d),
387                    'e' | 'E' => {
388                        saw_exp = true;
389                        break;
390                    }
391                    _ => break,
392                }
393                self.consume_no_skip().unwrap();
394            }
395            if s.ends_with('.') {
396                return self.err("Fraction part of number must not be empty".to_string());
397            }
398        }
399
400        if saw_exp {
401            s.push(self.consume_no_skip().unwrap()); // eat 'e' or 'E'
402            if let Some('+') | Some('-') = self.chars.peek() {
403                s.push(self.consume_no_skip().unwrap());
404            }
405
406            let mut saw_digit = false;
407            while let Some(d) = self.chars.peek() {
408                match d {
409                    '0'..='9' => s.push(*d),
410                    _ => break,
411                }
412                saw_digit = true;
413                self.consume_no_skip().unwrap();
414            }
415
416            if !saw_digit {
417                return self.err("Exponent part must not be empty in number literal".to_string());
418            }
419        }
420
421        match s.parse::<f64>() {
422            Ok(n) => Ok(JsonValue::Number(if neg { -n } else { n })),
423            Err(err) => self.err(format!("Invalid number literal '{}': {}", s, err)),
424        }
425    }
426
427    fn parse_any(&mut self) -> JsonParseResult {
428        match self.peek()? {
429            '0'..='9' | '-' => self.parse_number(),
430            '"' => self.parse_string(),
431            '[' => self.parse_array(),
432            '{' => self.parse_object(),
433            't' => self.parse_true(),
434            'f' => self.parse_false(),
435            'n' => self.parse_null(),
436            c => self.err(format!("Invalid character: {}", c.escape_debug())),
437        }
438    }
439
440    /// Run the parser to parse one JSON value.
441    pub fn parse(&mut self) -> JsonParseResult {
442        let v = self.parse_any()?;
443
444        if let Some(c) = self.next() {
445            return self.err(format!(
446                "Expected EOF but got character '{}'",
447                c.escape_debug(),
448            ));
449        }
450
451        Ok(v)
452    }
453}
454
455/// Parse given `str` object into `JsonValue` value. This is recommended way to parse strings into JSON value with
456/// this library.
457///
458/// ```
459/// use tinyjson::JsonValue;
460///
461/// let array: JsonValue = "[1, 2, 3]".parse().unwrap();
462/// assert!(array.is_array());
463/// ```
464impl FromStr for JsonValue {
465    type Err = JsonParseError;
466
467    fn from_str(s: &str) -> Result<Self, Self::Err> {
468        JsonParser::new(s.chars()).parse()
469    }
470}