chisel_parsers/json/
dom.rs

1//! The DOM parser
2//!
3//!
4use std::borrow::Cow;
5use std::fs::File;
6use std::io::{BufRead, BufReader};
7use std::path::Path;
8
9use chisel_decoders::{default_decoder, new_decoder, Encoding};
10use chisel_lexers::json::lexer::Lexer;
11use chisel_lexers::json::tokens::Token;
12
13use crate::json::{JsonKeyValue, JsonNumeric, JsonValue};
14use crate::{parser_error, ParserError, ParserErrorDetails, ParserResult};
15
16/// Main JSON parser struct
17pub struct Parser {
18    /// The current encoding
19    encoding: Encoding,
20}
21
22impl Default for Parser {
23    /// The default encoding is Utf-8
24    fn default() -> Self {
25        Self {
26            encoding: Default::default(),
27        }
28    }
29}
30
31impl Parser {
32    /// Create a new instance of the parser using a specific [Encoding]
33    pub fn with_encoding(encoding: Encoding) -> Self {
34        Self { encoding }
35    }
36
37    ///
38    pub fn parse_file<PathLike: AsRef<Path>>(&self, path: PathLike) -> ParserResult<JsonValue> {
39        match File::open(&path) {
40            Ok(f) => {
41                let mut reader = BufReader::new(f);
42                let mut chars = new_decoder(&mut reader, self.encoding);
43                self.parse(&mut chars)
44            }
45            Err(_) => {
46                parser_error!(ParserErrorDetails::InvalidFile)
47            }
48        }
49    }
50
51    pub fn parse_bytes(&self, bytes: &[u8]) -> ParserResult<JsonValue> {
52        let mut reader = BufReader::new(bytes);
53        let mut chars = default_decoder(&mut reader);
54        self.parse(&mut chars)
55    }
56
57    pub fn parse_str(&self, str: &str) -> ParserResult<JsonValue> {
58        let mut reader = BufReader::new(str.as_bytes());
59        let mut chars = default_decoder(&mut reader);
60        self.parse(&mut chars)
61    }
62
63    /// Parse the contents of a buffer (e.g. implementation of [BufRead])
64    pub fn parse_buffer(&self, buffer: &mut impl BufRead) -> ParserResult<JsonValue> {
65        let mut chars = default_decoder(buffer);
66        self.parse(&mut chars)
67    }
68
69    pub fn parse(&self, chars: &mut impl Iterator<Item = char>) -> ParserResult<JsonValue> {
70        let mut lexer = Lexer::new(chars);
71        match lexer.consume()? {
72            (Token::StartObject, _) => self.parse_object(&mut lexer),
73            (Token::StartArray, _) => self.parse_array(&mut lexer),
74            (_, span) => {
75                parser_error!(ParserErrorDetails::InvalidRootObject, span.start)
76            }
77        }
78    }
79
80    #[inline]
81    fn parse_value(&self, lexer: &mut Lexer) -> ParserResult<JsonValue> {
82        match lexer.consume()? {
83            (Token::StartObject, _) => self.parse_object(lexer),
84            (Token::StartArray, _) => self.parse_array(lexer),
85            (Token::Str(str), _) => Ok(JsonValue::String(Cow::Owned(str))),
86            (Token::LazyNumeric(value), _) => Ok(JsonValue::Number(JsonNumeric::Lazy(value))),
87            (Token::Float(value), _) => Ok(JsonValue::Number(JsonNumeric::Float(value))),
88            (Token::Integer(value), _) => Ok(JsonValue::Number(JsonNumeric::Integer(value))),
89            (Token::Boolean(value), _) => Ok(JsonValue::Boolean(value)),
90            (Token::Null, _) => Ok(JsonValue::Null),
91            (token, span) => {
92                parser_error!(
93                    ParserErrorDetails::UnexpectedToken(token.to_string()),
94                    span.start
95                )
96            }
97        }
98    }
99
100    /// An object is just a list of comma separated KV pairs
101    fn parse_object(&self, lexer: &mut Lexer) -> ParserResult<JsonValue> {
102        let mut pairs = vec![];
103        loop {
104            match lexer.consume()? {
105                (Token::Str(str), _) => {
106                    let should_be_colon = lexer.consume()?;
107                    match should_be_colon {
108                        (Token::Colon, _) => pairs.push(JsonKeyValue {
109                            key: str,
110                            value: self.parse_value(lexer)?,
111                        }),
112                        (_, _) => {
113                            return parser_error!(
114                                ParserErrorDetails::PairExpected,
115                                should_be_colon.1.start
116                            )
117                        }
118                    }
119                }
120                (Token::Comma, _) => (),
121                (Token::EndObject, _) => return Ok(JsonValue::Object(pairs)),
122                (_token, span) => {
123                    return parser_error!(ParserErrorDetails::InvalidObject, span.start);
124                }
125            }
126        }
127    }
128
129    /// An array is just a list of comma separated values, but we need to do additional checking
130    /// to make sure that we don't have consecutive commas, we do allow for empty arrays etc...
131    fn parse_array(&self, lexer: &mut Lexer) -> ParserResult<JsonValue> {
132        let mut values: Vec<JsonValue> = vec![];
133        let mut expect_value: bool = true;
134        loop {
135            match lexer.consume()? {
136                (Token::StartArray, _) => {
137                    values.push(self.parse_array(lexer)?);
138                }
139                (Token::EndArray, span) => {
140                    return if !expect_value || values.is_empty() {
141                        Ok(JsonValue::Array(values))
142                    } else {
143                        parser_error!(ParserErrorDetails::ValueExpected, span.start)
144                    }
145                }
146                (Token::StartObject, _) => values.push(self.parse_object(lexer)?),
147                (Token::Str(str), _) => values.push(JsonValue::String(Cow::Owned(str))),
148                (Token::LazyNumeric(value), _) => {
149                    values.push(JsonValue::Number(JsonNumeric::Lazy(value)))
150                }
151                (Token::Float(value), _) => {
152                    values.push(JsonValue::Number(JsonNumeric::Float(value)))
153                }
154                (Token::Integer(value), _) => {
155                    values.push(JsonValue::Number(JsonNumeric::Integer(value)))
156                }
157                (Token::Boolean(value), _) => values.push(JsonValue::Boolean(value)),
158                (Token::Null, _) => values.push(JsonValue::Null),
159                (Token::Comma, span) => {
160                    if expect_value {
161                        return parser_error!(ParserErrorDetails::ValueExpected, span.start);
162                    }
163                }
164                (_token, span) => {
165                    return parser_error!(ParserErrorDetails::InvalidArray, span.start);
166                }
167            }
168            expect_value = !expect_value
169        }
170    }
171}
172
173#[cfg(test)]
174mod tests {
175    #![allow(unused_macros)]
176    use crate::json::dom::Parser;
177    use crate::json::specs;
178    use bytesize::ByteSize;
179    use chisel_common::char::coords::Coords;
180    use chisel_common::relative_file;
181    use std::path::PathBuf;
182    use std::time::Instant;
183    use std::{env, fs};
184
185    #[test]
186    fn should_parse_char_iterators_directly() {
187        let source = r#"{
188            "test" : 1232.0,
189            "some other" : "thasdasd",
190            "a bool" : true,
191            "an array" : [1,2,3,4,5.8,6,7.2,7,8,10]
192        }"#;
193        let parser = Parser::default();
194        let parsed = parser.parse(&mut source.chars());
195        println!("{parsed:?}");
196        assert!(parsed.is_ok())
197    }
198
199    #[test]
200    fn should_parse_lengthy_arrays() {
201        let path = relative_file!("fixtures/json/valid/bc_block.json");
202        let parser = Parser::default();
203        let parsed = parser.parse_file(&path);
204        println!("{parsed:?}");
205        assert!(parsed.is_ok());
206    }
207
208    #[test]
209    fn should_parse_simple_schema() {
210        let path = relative_file!("fixtures/json/valid/simple_schema.json");
211        let parser = Parser::default();
212        let parsed = parser.parse_file(&path);
213        println!("{parsed:?}");
214        assert!(parsed.is_ok());
215    }
216    #[test]
217    fn should_successfully_handle_basic_invalid_inputs() {
218        for spec in specs::invalid_json_specs() {
219            let path = relative_file!(spec.filename);
220            let parser = Parser::default();
221            let parse_result = parser.parse_file(&path);
222            println!("Parse result = {:?}", parse_result);
223            assert!(&parse_result.is_err());
224            let err = parse_result.err().unwrap();
225            let err_coords = Coords::from_coords(&err.coords.unwrap());
226            assert_eq!(err_coords.line, spec.expected.coords.line);
227            assert_eq!(err_coords.column, spec.expected.coords.column)
228        }
229    }
230
231    #[test]
232    fn should_parse_basic_test_files() {
233        for f in fs::read_dir("fixtures/json/valid").unwrap() {
234            let path = f.unwrap().path();
235            println!("Parsing {:?}", &path);
236            if path.is_file() {
237                let len = fs::metadata(&path).unwrap().len();
238                let start = Instant::now();
239                let path = relative_file!(path.to_str().unwrap());
240                let parser = Parser::default();
241                let parsed = parser.parse_file(&path);
242                if parsed.is_err() {
243                    println!("Parse of {:?} failed!", &path);
244                    println!("Parse failed with errors: {:?}", &parsed)
245                }
246                assert!(parsed.is_ok());
247                println!(
248                    "Parsed {} in {:?} [{:?}]",
249                    ByteSize(len),
250                    start.elapsed(),
251                    path,
252                );
253            }
254        }
255    }
256}