chisel_parsers/json/
sax.rs

1//! The SAX parser
2use std::borrow::Cow;
3use std::fs::File;
4use std::io::{BufRead, BufReader};
5use std::path::Path;
6
7use chisel_common::char::coords::Coords;
8use chisel_decoders::{default_decoder, new_decoder, Encoding};
9use chisel_json_pointer::JsonPointer;
10use chisel_lexers::json::lexer::Lexer;
11use chisel_lexers::json::tokens::Token;
12
13use crate::json::events::{Event, Match};
14use crate::parser_error;
15use crate::{ParserError, ParserErrorDetails, ParserResult};
16
17macro_rules! emit_event {
18    ($cb : expr, $m : expr, $span : expr, $path : expr) => {
19        $cb(&Event {
20            matched: $m,
21            span: $span,
22            pointer: Some(&$path),
23        })
24    };
25    ($cb : expr, $m : expr, $span : expr) => {
26        $cb(&Event {
27            matched: $m,
28            span: $span,
29            pointer: None,
30        })
31    };
32}
33
34/// Main JSON parser struct
35pub struct Parser {
36    encoding: Encoding,
37}
38
39impl Default for Parser {
40    /// The default encoding is Utf-8
41    fn default() -> Self {
42        Self {
43            encoding: Default::default(),
44        }
45    }
46}
47
48impl Parser {
49    /// Create a new instance of the parser using a specific [Encoding]
50    pub fn with_encoding(encoding: Encoding) -> Self {
51        Self { encoding }
52    }
53
54    pub fn parse_file<PathLike: AsRef<Path>, Callback>(
55        &self,
56        path: PathLike,
57        cb: &mut Callback,
58    ) -> ParserResult<()>
59    where
60        Callback: FnMut(&Event) -> ParserResult<()>,
61    {
62        match File::open(&path) {
63            Ok(f) => {
64                let mut reader = BufReader::new(f);
65                let mut chars = new_decoder(&mut reader, self.encoding);
66                self.parse(&mut chars, cb)
67            }
68            Err(_) => {
69                parser_error!(ParserErrorDetails::InvalidFile)
70            }
71        }
72    }
73
74    pub fn parse_bytes<Callback>(&self, bytes: &[u8], cb: &mut Callback) -> ParserResult<()>
75    where
76        Callback: FnMut(&Event) -> ParserResult<()>,
77    {
78        if bytes.is_empty() {
79            return parser_error!(ParserErrorDetails::ZeroLengthInput, Coords::default());
80        }
81        let mut reader = BufReader::new(bytes);
82        let mut chars = default_decoder(&mut reader);
83        self.parse(&mut chars, cb)
84    }
85
86    pub fn parse_str<Callback>(&self, str: &str, cb: &mut Callback) -> ParserResult<()>
87    where
88        Callback: FnMut(&Event) -> ParserResult<()>,
89    {
90        if str.is_empty() {
91            return parser_error!(ParserErrorDetails::ZeroLengthInput, Coords::default());
92        }
93        let mut reader = BufReader::new(str.as_bytes());
94        let mut chars = default_decoder(&mut reader);
95        self.parse(&mut chars, cb)
96    }
97
98    /// Parse the contents extracted from an instance of [BufRead]
99    pub fn parse_buffer<Callback>(
100        &self,
101        buffer: &mut impl BufRead,
102        cb: &mut Callback,
103    ) -> ParserResult<()>
104    where
105        Callback: FnMut(&Event) -> ParserResult<()>,
106    {
107        let mut chars = default_decoder(buffer);
108        self.parse(&mut chars, cb)
109    }
110
111    pub fn parse<Callback>(
112        &self,
113        chars: &mut impl Iterator<Item = char>,
114        cb: &mut Callback,
115    ) -> ParserResult<()>
116    where
117        Callback: FnMut(&Event) -> ParserResult<()>,
118    {
119        let mut pointer = JsonPointer::root();
120        let mut lexer = Lexer::new(chars);
121        match lexer.consume()? {
122            (Token::StartObject, span) => {
123                emit_event!(cb, Match::StartOfInput, span)?;
124                emit_event!(cb, Match::StartObject, span, pointer)?;
125                self.parse_object(&mut lexer, &mut pointer, cb)
126            }
127            (Token::StartArray, span) => {
128                emit_event!(cb, Match::StartOfInput, span, pointer)?;
129                emit_event!(cb, Match::StartArray, span, pointer)?;
130                self.parse_array(&mut lexer, &mut pointer, cb)
131            }
132            (_, span) => {
133                parser_error!(ParserErrorDetails::InvalidRootObject, span.start)
134            }
135        }
136    }
137
138    #[inline]
139    fn parse_value<Callback>(
140        &self,
141        lexer: &mut Lexer,
142        pointer: &mut JsonPointer,
143        cb: &mut Callback,
144    ) -> ParserResult<()>
145    where
146        Callback: FnMut(&Event) -> ParserResult<()>,
147    {
148        match lexer.consume()? {
149            (Token::StartObject, span) => {
150                emit_event!(cb, Match::StartObject, span, pointer)?;
151                self.parse_object(lexer, pointer, cb)
152            }
153            (Token::StartArray, span) => {
154                emit_event!(cb, Match::StartArray, span, pointer)?;
155                self.parse_array(lexer, pointer, cb)
156            }
157            (Token::Str(str), span) => {
158                emit_event!(cb, Match::String(Cow::Borrowed(&str)), span, pointer)
159            }
160            (Token::LazyNumeric(value), span) => {
161                emit_event!(cb, Match::Numeric(value), span, pointer)
162            }
163            (Token::Float(value), span) => {
164                emit_event!(cb, Match::Float(value), span, pointer)
165            }
166            (Token::Integer(value), span) => {
167                emit_event!(cb, Match::Integer(value), span, pointer)
168            }
169            (Token::Boolean(value), span) => {
170                emit_event!(cb, Match::Boolean(value), span, pointer)
171            }
172            (Token::Null, span) => {
173                emit_event!(cb, Match::Null, span, pointer)
174            }
175            (token, span) => {
176                parser_error!(
177                    ParserErrorDetails::UnexpectedToken(token.to_string()),
178                    span.start
179                )
180            }
181        }
182    }
183
184    /// An object is just a list of comma separated KV pairs
185    fn parse_object<Callback>(
186        &self,
187        lexer: &mut Lexer,
188        pointer: &mut JsonPointer,
189        cb: &mut Callback,
190    ) -> ParserResult<()>
191    where
192        Callback: FnMut(&Event) -> ParserResult<()>,
193    {
194        loop {
195            match lexer.consume()? {
196                (Token::Str(str), span) => {
197                    pointer.push_name(str.replace("\"", ""));
198                    emit_event!(cb, Match::ObjectKey(Cow::Borrowed(&str)), span, pointer)?;
199                    let should_be_colon = lexer.consume()?;
200                    match should_be_colon {
201                        (Token::Colon, _) => {
202                            self.parse_value(lexer, pointer, cb)?;
203                            pointer.pop();
204                        }
205                        (_, _) => {
206                            return parser_error!(
207                                ParserErrorDetails::PairExpected,
208                                should_be_colon.1.start
209                            )
210                        }
211                    }
212                }
213                (Token::Comma, _) => (),
214                (Token::EndObject, span) => {
215                    return emit_event!(cb, Match::EndObject, span, pointer);
216                }
217                (_token, span) => {
218                    return parser_error!(ParserErrorDetails::InvalidArray, span.start)
219                }
220            }
221        }
222    }
223
224    /// An array is just a list of comma separated values
225    fn parse_array<Callback>(
226        &self,
227        lexer: &mut Lexer,
228        pointer: &mut JsonPointer,
229        cb: &mut Callback,
230    ) -> ParserResult<()>
231    where
232        Callback: FnMut(&Event) -> ParserResult<()>,
233    {
234        let mut index = 0;
235        let mut expect_value: bool = true;
236        let mut first_pass = true;
237        loop {
238            pointer.push_index(index);
239            match lexer.consume()? {
240                (Token::StartArray, span) => {
241                    emit_event!(cb, Match::StartArray, span, pointer)?;
242                    self.parse_array(lexer, pointer, cb)?;
243                }
244                (Token::EndArray, span) => {
245                    return if !expect_value || first_pass {
246                        pointer.pop();
247                        emit_event!(cb, Match::EndArray, span, pointer)
248                    } else {
249                        parser_error!(ParserErrorDetails::ValueExpected, span.start)
250                    }
251                }
252                (Token::StartObject, span) => {
253                    emit_event!(cb, Match::StartObject, span, pointer)?;
254                    self.parse_object(lexer, pointer, cb)?;
255                }
256                (Token::Str(str), span) => {
257                    emit_event!(cb, Match::String(Cow::Borrowed(&str)), span, pointer)?;
258                }
259                (Token::LazyNumeric(value), span) => {
260                    emit_event!(cb, Match::Numeric(value), span, pointer)?;
261                }
262                (Token::Float(value), span) => {
263                    emit_event!(cb, Match::Float(value), span, pointer)?;
264                }
265                (Token::Integer(value), span) => {
266                    emit_event!(cb, Match::Integer(value), span, pointer)?;
267                }
268                (Token::Boolean(value), span) => {
269                    emit_event!(cb, Match::Boolean(value), span, pointer)?;
270                }
271                (Token::Null, span) => emit_event!(cb, Match::Null, span, pointer)?,
272                (Token::Comma, span) => {
273                    if !expect_value {
274                        index += 1
275                    } else {
276                        return parser_error!(ParserErrorDetails::ValueExpected, span.start);
277                    }
278                }
279                (_token, span) => {
280                    return parser_error!(ParserErrorDetails::InvalidArray, span.start);
281                }
282            }
283            first_pass = false;
284            expect_value = !expect_value;
285            pointer.pop();
286        }
287    }
288}
289
290#[cfg(test)]
291mod tests {
292    use std::io::BufReader;
293    use std::path::PathBuf;
294    use std::time::Instant;
295    use std::{env, fs};
296
297    use bytesize::ByteSize;
298    use chisel_common::char::coords::Coords;
299
300    use chisel_common::relative_file;
301
302    use crate::json::sax::Parser;
303    use crate::json::specs;
304    use crate::ParserErrorDetails;
305
306    #[test]
307    fn should_puke_on_empty_input() {
308        let input = "";
309        let parser = Parser::default();
310        let parsed = parser.parse_str(input, &mut |_e| Ok(()));
311        assert!(parsed.is_err());
312        assert_eq!(
313            parsed.err().unwrap().details,
314            ParserErrorDetails::ZeroLengthInput
315        );
316    }
317
318    #[test]
319    fn should_parse_successfully() {
320        let mut counter = 0;
321        let path = relative_file!("fixtures/json/valid/events.json");
322        let parser = Parser::default();
323        let parsed = parser.parse_file(&path, &mut |_e| {
324            counter += 1;
325            Ok(())
326        });
327        println!("{} SAX events processed", counter);
328        assert!(parsed.is_ok());
329    }
330
331    #[test]
332    fn should_successfully_handle_basic_invalid_inputs() {
333        for spec in specs::invalid_json_specs() {
334            let mut counter = 0;
335            let path = relative_file!(spec.filename);
336            let parser = Parser::default();
337            let parse_result = parser.parse_file(&path, &mut |_e| {
338                counter += 1;
339                Ok(())
340            });
341            println!("Parse result = {:?}", parse_result);
342            assert!(&parse_result.is_err());
343
344            let err = parse_result.err().unwrap();
345            let err_coords = Coords::from_coords(&err.coords.unwrap());
346            assert_eq!(err_coords.line, spec.expected.coords.line);
347            assert_eq!(err_coords.column, spec.expected.coords.column)
348        }
349    }
350
351    #[test]
352    fn should_allow_for_parsing_of_a_buffer() {
353        let input = "{ \"test\" : 2123232323}".as_bytes();
354        let mut buffer = BufReader::new(input);
355        let parser = Parser::default();
356        let _parsed = parser.parse_buffer(&mut buffer, &mut |_e| Ok(()));
357    }
358
359    #[test]
360    fn should_parse_basic_test_files() {
361        for f in fs::read_dir("fixtures/json/valid").unwrap() {
362            let path = f.unwrap().path();
363            println!("Parsing {:?}", &path);
364            if path.is_file() {
365                let mut counter = 0;
366                let len = fs::metadata(&path).unwrap().len();
367                let start = Instant::now();
368                let path = relative_file!(path.to_str().unwrap());
369                let parser = Parser::default();
370                let parsed = parser.parse_file(&path, &mut |_e| {
371                    counter += 1;
372                    Ok(())
373                });
374                if parsed.is_err() {
375                    println!("Parse of {:?} failed!", &path);
376                    println!("Parse failed with errors: {:?}", &parsed)
377                }
378                assert!(parsed.is_ok());
379                println!(
380                    "Parsed {} in {:?} [{:?}], {} SAX events processed",
381                    ByteSize(len),
382                    start.elapsed(),
383                    path,
384                    counter
385                );
386            }
387        }
388    }
389}