lib0/
json_parser.rs

1/**
2 * the MIT License
3 *
4 * Copyright (c) 2016-2022 rhysd
5 * Copyright (c) 2022 Bartosz Sypytkowski
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
11 * of the Software, and to permit persons to whom the Software is furnished to do so,
12 * subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included in all
15 * copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
18 * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
19 * PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
22 * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24use std::char;
25use std::collections::HashMap;
26use std::fmt;
27use std::iter::Peekable;
28
29use crate::any::Any;
30
31#[derive(Debug)]
32pub struct JsonParseError {
33    msg: String,
34    line: usize,
35    col: usize,
36}
37
38impl JsonParseError {
39    fn new(msg: String, line: usize, col: usize) -> JsonParseError {
40        JsonParseError { msg, line, col }
41    }
42}
43
44impl fmt::Display for JsonParseError {
45    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
46        write!(
47            f,
48            "Parse error at line:{}, col:{}: {}",
49            self.line, self.col, &self.msg,
50        )
51    }
52}
53
54impl std::error::Error for JsonParseError {}
55
56pub type JsonParseResult = Result<Any, JsonParseError>;
57
58// Note: char::is_ascii_whitespace is not available because some characters are not defined as
59// whitespace character in JSON spec. For example, U+000C FORM FEED is whitespace in Rust but
60// it isn't in JSON.
61fn is_whitespace(c: char) -> bool {
62    match c {
63        '\u{0020}' | '\u{000a}' | '\u{000d}' | '\u{0009}' => true,
64        _ => false,
65    }
66}
67
68pub struct JsonParser<I>
69where
70    I: Iterator<Item = char>,
71{
72    chars: Peekable<I>,
73    line: usize,
74    col: usize,
75}
76
77impl<I: Iterator<Item = char>> JsonParser<I> {
78    pub fn new(it: I) -> Self {
79        JsonParser {
80            chars: it.peekable(),
81            line: 1,
82            col: 0,
83        }
84    }
85
86    fn err<T>(&self, msg: String) -> Result<T, JsonParseError> {
87        Err(JsonParseError::new(msg, self.line, self.col))
88    }
89
90    fn unexpected_eof(&self) -> Result<char, JsonParseError> {
91        Err(JsonParseError::new(
92            String::from("Unexpected EOF"),
93            self.line,
94            self.col,
95        ))
96    }
97
98    fn next_pos(&mut self, c: char) {
99        if c == '\n' {
100            self.col = 0;
101            self.line += 1;
102        } else {
103            self.col += 1;
104        }
105    }
106
107    fn peek(&mut self) -> Result<char, JsonParseError> {
108        while let Some(c) = self.chars.peek().copied() {
109            if !is_whitespace(c) {
110                return Ok(c);
111            }
112            self.next_pos(c);
113            self.chars.next().unwrap();
114        }
115        self.unexpected_eof()
116    }
117
118    fn next(&mut self) -> Option<char> {
119        while let Some(c) = self.chars.next() {
120            self.next_pos(c);
121            if !is_whitespace(c) {
122                return Some(c);
123            }
124        }
125        None
126    }
127
128    fn consume(&mut self) -> Result<char, JsonParseError> {
129        if let Some(c) = self.next() {
130            Ok(c)
131        } else {
132            self.unexpected_eof()
133        }
134    }
135
136    fn consume_no_skip(&mut self) -> Result<char, JsonParseError> {
137        if let Some(c) = self.chars.next() {
138            self.next_pos(c);
139            Ok(c)
140        } else {
141            self.unexpected_eof()
142        }
143    }
144
145    fn parse_object(&mut self) -> JsonParseResult {
146        if self.consume()? != '{' {
147            return self.err(String::from("Object must starts with '{'"));
148        }
149
150        if self.peek()? == '}' {
151            self.consume().unwrap();
152            return Ok(Any::Map(Box::new(HashMap::new())));
153        }
154
155        let mut m = HashMap::new();
156        loop {
157            let key = match self.parse_any()? {
158                Any::String(s) => s.into_string(),
159                v => return self.err(format!("Key of object must be string but found {:?}", v)),
160            };
161
162            let c = self.consume()?;
163            if c != ':' {
164                return self.err(format!(
165                    "':' is expected after key of object but actually found '{}'",
166                    c
167                ));
168            }
169
170            m.insert(key, self.parse_any()?);
171
172            match self.consume()? {
173                ',' => {}
174                '}' => return Ok(Any::Map(Box::new(m))),
175                c => {
176                    return self.err(format!(
177                        "',' or '}}' is expected for object but actually found '{}'",
178                        c.escape_debug(),
179                    ))
180                }
181            }
182        }
183    }
184
185    fn parse_array(&mut self) -> JsonParseResult {
186        if self.consume()? != '[' {
187            return self.err(String::from("Array must starts with '['"));
188        }
189
190        if self.peek()? == ']' {
191            self.consume().unwrap();
192            return Ok(Any::Array(Box::new([])));
193        }
194
195        let mut v = vec![self.parse_any()?];
196        loop {
197            match self.consume()? {
198                ',' => {}
199                ']' => return Ok(Any::Array(v.into_boxed_slice())),
200                c => {
201                    return self.err(format!(
202                        "',' or ']' is expected for array but actually found '{}'",
203                        c
204                    ))
205                }
206            }
207
208            v.push(self.parse_any()?); // Next element
209        }
210    }
211
212    fn push_utf16(&self, s: &mut String, utf16: &mut Vec<u16>) -> Result<(), JsonParseError> {
213        if utf16.is_empty() {
214            return Ok(());
215        }
216
217        match String::from_utf16(utf16) {
218            Ok(utf8) => s.push_str(&utf8),
219            Err(err) => return self.err(format!("Invalid UTF-16 sequence {:?}: {}", &utf16, err)),
220        }
221        utf16.clear();
222        Ok(())
223    }
224
225    fn parse_string(&mut self) -> JsonParseResult {
226        if self.consume()? != '"' {
227            return self.err(String::from("String must starts with double quote"));
228        }
229
230        let mut utf16 = Vec::new(); // Buffer for parsing \uXXXX UTF-16 characters
231        let mut s = String::new();
232        loop {
233            let c = match self.consume_no_skip()? {
234                '\\' => match self.consume_no_skip()? {
235                    '\\' => '\\',
236                    '/' => '/',
237                    '"' => '"',
238                    'b' => '\u{0008}',
239                    'f' => '\u{000c}',
240                    'n' => '\n',
241                    'r' => '\r',
242                    't' => '\t',
243                    'u' => {
244                        let mut u = 0u16;
245                        for _ in 0..4 {
246                            let c = self.consume()?;
247                            if let Some(h) = c.to_digit(16) {
248                                u = u * 0x10 + h as u16;
249                            } else {
250                                return self.err(format!("Unicode character must be \\uXXXX (X is hex character) format but found character '{}'", c));
251                            }
252                        }
253                        utf16.push(u);
254                        // Additional \uXXXX character may follow. UTF-16 characters must be converted
255                        // into UTF-8 string as sequence because surrogate pairs must be considered
256                        // like "\uDBFF\uDFFF".
257                        continue;
258                    }
259                    c => return self.err(format!("'\\{}' is invalid escaped character", c)),
260                },
261                '"' => {
262                    self.push_utf16(&mut s, &mut utf16)?;
263                    return Ok(Any::String(s.into_boxed_str()));
264                }
265                // Note: c.is_control() is not available here because JSON accepts 0x7f (DEL) in
266                // string literals but 0x7f is control character.
267                // Rough spec of JSON says string literal cannot contain control characters. But it
268                // can actually contain 0x7f.
269                c if (c as u32) < 0x20 => {
270                    return self.err(format!(
271                        "String cannot contain control character {}",
272                        c.escape_debug(),
273                    ));
274                }
275                c => c,
276            };
277
278            self.push_utf16(&mut s, &mut utf16)?;
279
280            s.push(c);
281        }
282    }
283
284    fn parse_constant(&mut self, s: &'static str) -> Option<JsonParseError> {
285        for c in s.chars() {
286            match self.consume_no_skip() {
287                Ok(x) if x != c => {
288                    return Some(JsonParseError::new(
289                        format!("Unexpected character '{}' while parsing '{}'", c, s),
290                        self.line,
291                        self.col,
292                    ));
293                }
294                Ok(_) => {}
295                Err(e) => return Some(e),
296            }
297        }
298        None
299    }
300
301    fn parse_null(&mut self) -> JsonParseResult {
302        match self.parse_constant("null") {
303            Some(err) => Err(err),
304            None => Ok(Any::Null),
305        }
306    }
307
308    fn parse_true(&mut self) -> JsonParseResult {
309        match self.parse_constant("true") {
310            Some(err) => Err(err),
311            None => Ok(Any::Bool(true)),
312        }
313    }
314
315    fn parse_false(&mut self) -> JsonParseResult {
316        match self.parse_constant("false") {
317            Some(err) => Err(err),
318            None => Ok(Any::Bool(false)),
319        }
320    }
321
322    fn parse_number(&mut self) -> JsonParseResult {
323        let neg = if self.peek()? == '-' {
324            self.consume_no_skip().unwrap();
325            true
326        } else {
327            false
328        };
329
330        let mut s = String::new();
331        let mut saw_dot = false;
332        let mut saw_exp = false;
333
334        while let Some(d) = self.chars.peek() {
335            match d {
336                '0'..='9' => s.push(*d),
337                '.' => {
338                    saw_dot = true;
339                    break;
340                }
341                'e' | 'E' => {
342                    saw_exp = true;
343                    break;
344                }
345                _ => break,
346            }
347            self.consume_no_skip().unwrap();
348        }
349
350        if s.is_empty() {
351            return self.err("Integer part must not be empty in number literal".to_string());
352        }
353
354        if s.starts_with('0') && s.len() > 1 {
355            return self
356                .err("Integer part of number must not start with 0 except for '0'".to_string());
357        }
358
359        if saw_dot {
360            s.push(self.consume_no_skip().unwrap()); // eat '.'
361            while let Some(d) = self.chars.peek() {
362                match d {
363                    '0'..='9' => s.push(*d),
364                    'e' | 'E' => {
365                        saw_exp = true;
366                        break;
367                    }
368                    _ => break,
369                }
370                self.consume_no_skip().unwrap();
371            }
372            if s.ends_with('.') {
373                return self.err("Fraction part of number must not be empty".to_string());
374            }
375        }
376
377        if saw_exp {
378            s.push(self.consume_no_skip().unwrap()); // eat 'e' or 'E'
379            if let Some('+') | Some('-') = self.chars.peek() {
380                s.push(self.consume_no_skip().unwrap());
381            }
382
383            let mut saw_digit = false;
384            while let Some(d) = self.chars.peek() {
385                match d {
386                    '0'..='9' => s.push(*d),
387                    _ => break,
388                }
389                saw_digit = true;
390                self.consume_no_skip().unwrap();
391            }
392
393            if !saw_digit {
394                return self.err("Exponent part must not be empty in number literal".to_string());
395            }
396        }
397
398        match s.parse::<f64>() {
399            Ok(n) => Ok(Any::Number(if neg { -n } else { n })),
400            Err(err) => self.err(format!("Invalid number literal '{}': {}", s, err)),
401        }
402    }
403
404    fn parse_any(&mut self) -> JsonParseResult {
405        match self.peek()? {
406            '0'..='9' | '-' => self.parse_number(),
407            '"' => self.parse_string(),
408            '[' => self.parse_array(),
409            '{' => self.parse_object(),
410            't' => self.parse_true(),
411            'f' => self.parse_false(),
412            'n' => self.parse_null(),
413            c => self.err(format!("Invalid character: {}", c.escape_debug())),
414        }
415    }
416
417    pub fn parse(&mut self) -> JsonParseResult {
418        let v = self.parse_any()?;
419
420        if let Some(c) = self.next() {
421            return self.err(format!(
422                "Expected EOF but got character '{}'",
423                c.escape_debug(),
424            ));
425        }
426
427        Ok(v)
428    }
429}