sn/
parser.rs

1use crate::{
2    bytes::Bytes,
3    stream::Stream,
4    util::{
5        constants::{punctuators::*, *},
6        is_numeric_like, is_numeric_or_negative,
7    },
8};
9use std::collections::HashMap;
10#[derive(Debug)]
11/// An error returned by the parser
12/// in the event that the input JSON is malformed.
13pub enum ParseError {
14    /// The parser encountered a symbol (character) in a place it wasn't expecting.
15    UnexpectedSymbol(char),
16    /// The parser reached the end of the input prematurely.
17    UnexpectedEndOfInput,
18    /// An internal error that gets thrown if a number somehow fails to parse.
19    /// If this is returned, please open an issue.
20    NumberParseError
21}
22
23#[derive(Debug, PartialEq)]
24/// A value as represented in parsed JSON.
25pub enum Value<'a> {
26    /// A string, composed of bytes borrowed from the input.
27    String(Bytes<'a>),
28    /// A 64-bit precision floating point number.
29    Number(f64),
30    /// A boolean.
31    Boolean(bool),
32    /// An object, represented as a HashMap of a String to a Value.
33    Object(HashMap<Bytes<'a>, Value<'a>>),
34    /// An array, represented as a Vec of Values.
35    Array(Vec<Value<'a>>),
36    /// Null (No value).
37    Null,
38    /// The supplied JSON is completely empty.
39    Nothing,
40}
41
42/// The parser itself. Create a new parser with the `new` method,
43/// and parse it using the `parse` method.
44pub struct Parser<'a> {
45    stream: Stream<'a, u8>,
46}
47impl<'a> Parser<'a> {
48    /// Create a new parser from raw JSON encoded as a Vec of u8s
49    /// THIS IS SUBJECT TO CHANGE.
50    pub fn new(input: &'a [u8]) -> Parser {
51        Parser {
52            stream: Stream::new(input),
53        }
54    }
55
56    /// Parse a single Value.
57    /// This function DOES NOT consume self as it is called recursively.
58    /// However,this function is only designed to be called externally once.
59    /// This will be fixed in an upcoming release.
60    pub fn parse(&mut self) -> Result<Value<'a>, ParseError> {
61        self.skip_whitespace_no_eof()?;
62
63        let initial = self.stream.current_unchecked();
64
65        match initial {
66            FALSE_IDENT | TRUE_IDENT => self.parse_boolean(),
67            NULL_IDENT => self.parse_null(),
68            STRING_BOUNDARY => Ok(Value::String(self.parse_string()?)),
69            OBJECT_OPEN | ARRAY_OPEN => self.parse_from_punctuator(initial),
70            _ => {
71                if is_numeric_or_negative(initial) {
72                    self.parse_number()
73                } else {
74                    Err(ParseError::UnexpectedSymbol(initial as char))
75                }
76            }
77        }
78    }
79
80    fn parse_from_punctuator(&mut self, punctuator: u8) -> Result<Value<'a>, ParseError> {
81        match punctuator {
82            ARRAY_OPEN => self.parse_array(),
83            OBJECT_OPEN => self.parse_object(),
84            _ => Err(ParseError::UnexpectedSymbol(punctuator as char)),
85        }
86    }
87
88    fn parse_array(&mut self) -> Result<Value<'a>, ParseError> {
89        // at this point the stream is pointing at the opening punctuator for the array.
90        let mut inner: Vec<Value> = vec![];
91        let mut has_read_initial = false;
92
93        while !self.stream.is_eof() {
94            self.stream.skip();
95            self.skip_whitespace_no_eof()?;
96
97            let next = self.stream.current_unchecked();
98
99            match next {
100                ARRAY_DELIMITER => {
101                    let parsed = self.parse_array_punctuator(next)?;
102                    inner.push(parsed);
103                }
104                ARRAY_CLOSE => {
105                    break;
106                }
107                _ => {
108                    // this guard exists to allow the first element to not be delimited (a.k.a [1])
109                    // but disallows subsequent elements from not being delimited (a.k.a [1 1])
110                    if !has_read_initial {
111                        inner.push(self.parse()?);
112                        has_read_initial = true;
113                    } else {
114                        return Err(ParseError::UnexpectedSymbol(next as char));
115                    }
116                }
117            }
118        }
119
120        Ok(Value::Array(inner))
121    }
122
123    fn parse_array_punctuator(&mut self, punctuator: u8) -> Result<Value<'a>, ParseError> {
124        match punctuator {
125            ARRAY_CLOSE => Ok(Value::Nothing),
126            ARRAY_DELIMITER => {
127                // we're on the delimiter, must skip past it to get to the expression to parse
128                self.stream.skip();
129                if self.stream.is_eof() {
130                    return Err(ParseError::UnexpectedEndOfInput);
131                }
132                Ok(self.parse()?)
133            }
134            _ => self.parse_from_punctuator(punctuator),
135        }
136    }
137
138    fn parse_boolean(&mut self) -> Result<Value<'a>, ParseError> {
139        let next_4 = self.stream.slice_len(self.stream.position(), 4);
140        if next_4.eq(TRUE) {
141            self.stream.skip_n(3);
142            Ok(Value::Boolean(true))
143        } else if self.stream.slice_len(self.stream.position(), 5).eq(FALSE) {
144            self.stream.skip_n(4);
145            Ok(Value::Boolean(false))
146        } else {
147            Err(ParseError::UnexpectedSymbol(
148                self.stream.current_unchecked() as char,
149            ))
150        }
151    }
152
153    fn parse_null(&mut self) -> Result<Value<'a>, ParseError> {
154        let next_4 = self.stream.slice_len(self.stream.position(), 4);
155        if next_4.eq(NULL) {
156            self.stream.skip_n(3);
157            Ok(Value::Null)
158        } else {
159            Err(ParseError::UnexpectedSymbol(
160                self.stream.current_unchecked() as char,
161            ))
162        }
163    }
164
165    fn parse_number(&mut self) -> Result<Value<'a>, ParseError> {
166        let start = self.stream.position();
167        let mut is_first_iteration = true;
168        self.skip_whitespace_no_eof()?;
169
170        while !self.stream.is_eof() {
171            let next_char = self.stream.current_unchecked();
172
173            if next_char == NEGATIVE && is_first_iteration {
174                self.stream.skip();
175                continue;
176            }
177
178            if !is_numeric_like(next_char) || self.stream.peek().is_none() {
179                let res = Ok(Value::Number(
180                    std::str::from_utf8(self.stream.slice_unchecked(start, self.stream.position()))
181                        .ok()
182                        .unwrap()
183                        .parse::<f64>()
184                        .ok()
185                        .unwrap(),
186                ));
187                self.stream.unskip();
188                return res;
189            }
190
191            self.stream.skip();
192            is_first_iteration = false;
193        }
194
195        Err(ParseError::NumberParseError)
196    }
197
198    fn parse_object(&mut self) -> Result<Value<'a>, ParseError> {
199        let mut inner: HashMap<Bytes<'a>, Value<'a>> = HashMap::new();
200
201        let mut is_first_entry = true;
202
203        while !self.stream.is_eof() {
204            self.stream.skip();
205
206            self.skip_whitespace_no_eof()?;
207
208            // the value read here should always be a string boundary
209            let mut next = self.stream.current_unchecked();
210
211            let key: Bytes<'a>;
212
213            // checking that the key is a string or if this is an empty object
214            match next {
215                STRING_BOUNDARY => key = self.parse_string()?,
216                OBJECT_CLOSE => {
217                    // this check disallows { "key": "value", }, but permits {}
218                    // by checking if any entries have been parsed yet
219                    // we should never get to this point if the json is
220                    // { "key": "value" } because another check for } is made
221                    // later in this loop.
222                    if !is_first_entry {
223                        return Err(ParseError::UnexpectedSymbol(next as char));
224                    } else {
225                        return Ok(Value::Object(inner));
226                    }
227                }
228                _ => return Err(ParseError::UnexpectedSymbol(next as char)),
229            }
230
231            // still on string closing boundary
232            self.stream.skip();
233
234            self.skip_whitespace_no_eof()?;
235
236            next = self.stream.current_unchecked();
237            if next != OBJECT_KV_DELIMITER {
238                return Err(ParseError::UnexpectedSymbol(next as char));
239            };
240
241            // next entry in the data should be the value itself, but this can be any type so we will just parse it
242            // we are still on the divider at this stage so we will skip to the start of the value
243            self.stream.skip();
244            self.skip_whitespace_no_eof()?;
245
246            let value = self.parse()?;
247            inner.insert(key, value);
248
249            self.stream.skip();
250            self.skip_whitespace_no_eof()?;
251
252            // next thing in the object could either be a delimiter between entries or a closing character
253            // delimiter is not valid if there are no more items, so we need to check for this
254            next = self.stream.current_unchecked();
255
256            match next {
257                OBJECT_ENTRY_DELIMITER => {
258                    is_first_entry = false;
259                    continue;
260                }
261                OBJECT_CLOSE => {
262                    break;
263                }
264                _ => return Err(ParseError::UnexpectedSymbol(next as char)),
265            }
266        }
267
268        Ok(Value::Object(inner))
269    }
270
271    fn parse_string(&mut self) -> Result<Bytes<'a>, ParseError> {
272        let start = self.stream.position() + 1;
273
274        while !self.stream.is_eof() {
275            let next_char = *self.stream.next_unchecked();
276
277            if next_char == ESCAPE {
278                self.stream.skip();
279            } else if next_char == STRING_BOUNDARY {
280                return Ok(Bytes::from(
281                    self.stream.slice_unchecked(start, self.stream.position()),
282                ));
283            }
284        }
285
286        Err(ParseError::UnexpectedEndOfInput)
287    }
288
289    /// Skips whitespace and checks if there is anything left.
290    fn skip_whitespace_no_eof(&mut self) -> Result<(), ParseError> {
291        while !self.stream.is_eof() {
292            let character = self.stream.current_unchecked();
293            if !WHITESPACE.contains(&character) {
294                break;
295            }
296            self.stream.skip();
297        }
298        if self.stream.is_eof() {
299            Err(ParseError::UnexpectedEndOfInput)
300        } else {
301            Ok(())
302        }
303    }
304}