humphrey_json/
parser.rs

1//! Provides the core JSON-parsing functionality.
2
3use crate::error::{ParseError, TracebackError};
4use crate::Value;
5
6use std::borrow::Borrow;
7use std::iter::Peekable;
8use std::str::Chars;
9
10const MAX_DEPTH: usize = 256;
11
12impl Value {
13    /// Parse a string into a JSON value.
14    ///
15    /// If unsuccessful, returns a `TracebackError`, giving information about the location of the syntax error within the JSON string.
16    ///
17    /// ## Usage
18    /// ```
19    /// let value = Value::parse("[1, 2, 3]");
20    /// ```
21    pub fn parse(s: impl AsRef<str>) -> Result<Self, TracebackError> {
22        let chars = s.as_ref().chars();
23        let mut parser = Parser::new(chars, MAX_DEPTH);
24        let value = parser.parse_value()?;
25        parser.expect_eof()?;
26
27        Ok(value)
28    }
29
30    /// Parse a string into a JSON value with the specified maximum recursion depth.
31    ///
32    /// If unsuccessful, returns a `TracebackError`, giving information about the location of the syntax error within the JSON string.
33    ///
34    /// ## Usage
35    /// ```
36    /// let value = Value::parse_max_depth("[1, 2, 3]", 8);
37    /// ```
38    pub fn parse_max_depth(s: impl AsRef<str>, max_depth: usize) -> Result<Self, TracebackError> {
39        let chars = s.as_ref().chars();
40        let mut parser = Parser::new(chars, max_depth);
41        let value = parser.parse_value()?;
42        parser.expect_eof()?;
43
44        Ok(value)
45    }
46}
47
48/// Encapsulates the internal state of the parsing process.
49struct Parser<'a> {
50    chars: Peekable<Chars<'a>>,
51    depth: usize,
52    max_depth: usize,
53    line: usize,
54    column: usize,
55    next_line: usize,
56    next_column: usize,
57}
58
59impl<'a> Parser<'a> {
60    /// Initialise a new parser.
61    fn new(chars: Chars<'a>, max_depth: usize) -> Self {
62        Self {
63            chars: chars.peekable(),
64            depth: 0,
65            max_depth,
66            line: 1,
67            column: 1,
68            next_line: 1,
69            next_column: 1,
70        }
71    }
72
73    /// Get the next character to be parsed.
74    fn next(&mut self) -> Result<char, TracebackError> {
75        if let Some(c) = self.chars.next() {
76            self.line = self.next_line;
77            self.column = self.next_column;
78
79            if c == '\n' {
80                self.next_line += 1;
81                self.next_column = 0;
82            } else if c != '\r' {
83                self.next_column += 1;
84            }
85
86            return Ok(c);
87        }
88
89        Err(self.traceback(ParseError::UnexpectedEOF))
90    }
91
92    /// Convert a regular parsing error into a traceback error containing the location of the error.
93    fn traceback(&self, e: ParseError) -> TracebackError {
94        TracebackError {
95            line: self.line,
96            column: self.column,
97            kind: e,
98        }
99    }
100
101    /// Attempt to parse a value from the character stream.
102    fn parse_value(&mut self) -> Result<Value, TracebackError> {
103        self.flush_whitespace();
104
105        match self.next() {
106            Ok('"') => self.parse_string(),
107            Ok('[') => self.parse_array(),
108            Ok('{') => self.parse_object(),
109            Ok(c) => self.parse_literal(c),
110            Err(e) => Err(e),
111        }
112    }
113
114    /// Attempt to parse a string from the character stream.
115    fn parse_string(&mut self) -> Result<Value, TracebackError> {
116        let mut string = String::with_capacity(256);
117        let mut backslash = false;
118
119        loop {
120            let c = self.next()?;
121
122            if backslash {
123                match c {
124                    '"' => string.push(0x22 as char),
125                    '\\' => string.push(0x5c as char),
126                    '/' => string.push(0x2f as char),
127                    'b' => string.push(0x08 as char),
128                    'f' => string.push(0x0c as char),
129                    'n' => string.push(0x0a as char),
130                    'r' => string.push(0x0d as char),
131                    't' => string.push(0x09 as char),
132                    'u' => {
133                        let hex: String = [self.next()?, self.next()?, self.next()?, self.next()?]
134                            .iter()
135                            .collect();
136                        let code = u16::from_str_radix(&hex, 16)
137                            .map_err(|_| self.traceback(ParseError::InvalidEscapeSequence))?;
138
139                        let new_char = if let Some(new_char) = char::from_u32(code as u32) {
140                            new_char
141                        } else {
142                            quiet_assert(
143                                self.next()? == '\\' && self.next()? == 'u',
144                                self.traceback(ParseError::InvalidEscapeSequence),
145                            )?;
146
147                            let hex: String =
148                                [self.next()?, self.next()?, self.next()?, self.next()?]
149                                    .iter()
150                                    .collect();
151                            let code_2 = u16::from_str_radix(&hex, 16)
152                                .map_err(|_| self.traceback(ParseError::InvalidEscapeSequence))?;
153
154                            char::decode_utf16([code, code_2])
155                                .next()
156                                .ok_or_else(|| self.traceback(ParseError::InvalidEscapeSequence))?
157                                .map_err(|_| self.traceback(ParseError::InvalidEscapeSequence))?
158                        };
159
160                        string.push(new_char);
161                    }
162                    _ => return Err(self.traceback(ParseError::InvalidEscapeSequence)),
163                }
164
165                backslash = false;
166            } else if c == '\\' {
167                backslash = true;
168            } else if c == '"' {
169                break;
170            } else {
171                match c as u32 {
172                    0x20..=0x21 | 0x23..=0x5b | 0x5d..=0x10ffff => string.push(c),
173                    _ => return Err(self.traceback(ParseError::InvalidToken)),
174                }
175            }
176        }
177
178        Ok(Value::String(string))
179    }
180
181    /// Attempt to parse an array from the character stream.
182    fn parse_array(&mut self) -> Result<Value, TracebackError> {
183        self.inc_depth()?;
184
185        let mut array: Vec<Value> = Vec::with_capacity(16);
186
187        loop {
188            self.flush_whitespace();
189
190            match self.chars.peek() {
191                Some(&']') => {
192                    if array.is_empty() {
193                        break;
194                    } else {
195                        return Err(self.traceback(ParseError::TrailingComma));
196                    }
197                }
198                Some(_) => array.push(self.parse_value()?),
199                None => return Err(self.traceback(ParseError::UnexpectedEOF)),
200            }
201
202            self.flush_whitespace();
203
204            match self.chars.peek() {
205                Some(&',') => (),
206                Some(&']') => break,
207                Some(_) => return Err(self.traceback(ParseError::InvalidToken)),
208                None => return Err(self.traceback(ParseError::UnexpectedEOF)),
209            }
210
211            self.next()?;
212        }
213
214        self.next()?;
215        self.dec_depth();
216
217        Ok(Value::Array(array))
218    }
219
220    /// Attempt to parse an object from the character stream.
221    fn parse_object(&mut self) -> Result<Value, TracebackError> {
222        self.inc_depth()?;
223
224        let mut object: Vec<(String, Value)> = Vec::with_capacity(16);
225        let mut trailing_comma = false;
226
227        loop {
228            self.flush_whitespace();
229
230            match self.chars.peek() {
231                Some(&'}') => {
232                    if trailing_comma {
233                        return Err(self.traceback(ParseError::TrailingComma));
234                    } else {
235                        break;
236                    }
237                }
238                Some(&',') => {
239                    if trailing_comma {
240                        return Err(self.traceback(ParseError::InvalidToken));
241                    } else {
242                        trailing_comma = true;
243
244                        if object.is_empty() {
245                            return Err(self.traceback(ParseError::InvalidToken));
246                        }
247
248                        self.next()?;
249                    }
250                }
251                Some(_) => {
252                    trailing_comma = false;
253                    let string_start = self.next()?;
254                    quiet_assert(
255                        string_start == '"',
256                        self.traceback(ParseError::InvalidToken),
257                    )?;
258
259                    let key = self.parse_string()?.as_str().unwrap().to_string();
260                    self.flush_whitespace();
261
262                    let sep = self.next()?;
263                    quiet_assert(sep == ':', self.traceback(ParseError::InvalidToken))?;
264                    self.flush_whitespace();
265
266                    let value = self.parse_value()?;
267
268                    object.push((key, value));
269                }
270                None => return Err(self.traceback(ParseError::UnexpectedEOF)),
271            }
272        }
273
274        self.next()?;
275        self.dec_depth();
276
277        Ok(Value::Object(object))
278    }
279
280    /// Attempt to parse a literal from the character stream.
281    fn parse_literal(&mut self, c: char) -> Result<Value, TracebackError> {
282        let mut string = String::from(c);
283
284        while self.chars.peek().map_or(false, |&c| is_literal(c)) {
285            string.push(self.next().unwrap());
286        }
287
288        match string.as_str() {
289            "null" => Ok(Value::Null),
290            "true" => Ok(Value::Bool(true)),
291            "false" => Ok(Value::Bool(false)),
292            number => Ok(Value::Number(
293                number
294                    .parse()
295                    .map_err(|_| self.traceback(ParseError::InvalidToken))?,
296            )),
297        }
298    }
299
300    /// Assert that there are no more characters to be parsed, or return an error.
301    fn expect_eof(&mut self) -> Result<(), TracebackError> {
302        self.flush_whitespace();
303
304        match self.chars.peek() {
305            Some(_) => Err(self.traceback(ParseError::InvalidToken)),
306            None => Ok(()),
307        }
308    }
309
310    /// Fast-forward the iterator until the next character is not whitespace.
311    fn flush_whitespace(&mut self) {
312        while self.chars.peek().map_or(false, is_whitespace) {
313            self.next().ok();
314        }
315    }
316
317    fn inc_depth(&mut self) -> Result<(), TracebackError> {
318        if self.depth == self.max_depth {
319            Err(self.traceback(ParseError::RecursionDepthExceeded))
320        } else {
321            self.depth += 1;
322            Ok(())
323        }
324    }
325
326    fn dec_depth(&mut self) {
327        self.depth -= 1;
328    }
329}
330
331/// Assert a condition, or return an error.
332fn quiet_assert(condition: bool, error: TracebackError) -> Result<(), TracebackError> {
333    if condition {
334        Ok(())
335    } else {
336        Err(error)
337    }
338}
339
340/// Check whether a character is whitespace according to the specification.
341fn is_whitespace(c: impl Borrow<char>) -> bool {
342    matches!(c.borrow(), ' ' | '\t' | '\n' | '\r')
343}
344
345/// Check whether the character is reserved.
346fn is_literal(c: impl Borrow<char>) -> bool {
347    let c = c.borrow();
348    !is_whitespace(c) && *c != ',' && *c != '}' && *c != ']'
349}