chisel_json/parsers/
dom.rs

1//! The DOM parser
2//!
3//!
4use std::borrow::Cow;
5use std::fmt::Debug;
6use std::fs::File;
7use std::io::{BufRead, BufReader, Read};
8use std::path::Path;
9
10use crate::lexer::decoders::{DecoderSelector, Encoding};
11use crate::lexer::lexer_core::{Lexer, Token};
12use crate::results::{ParserError, ParserErrorDetails, ParserErrorSource, ParserResult};
13use crate::JsonValue;
14use crate::{dom_parser_error, JsonKeyValue};
15
16/// Main JSON parser struct
17pub struct Parser {
18    decoders: DecoderSelector,
19    encoding: Encoding,
20}
21
22impl Default for Parser {
23    /// The default encoding is Utf-8
24    fn default() -> Self {
25        Self {
26            decoders: Default::default(),
27            encoding: Default::default(),
28        }
29    }
30}
31
32impl Parser {
33    /// Create a new instance of the parser using a specific [Encoding]
34    pub fn with_encoding(encoding: Encoding) -> Self {
35        Self {
36            decoders: Default::default(),
37            encoding,
38        }
39    }
40
41    ///
42    pub fn parse_file<PathLike: AsRef<Path>>(&self, path: PathLike) -> ParserResult<JsonValue> {
43        match File::open(&path) {
44            Ok(f) => {
45                let mut reader = BufReader::new(f);
46                let mut chars = self.decoders.new_decoder(&mut reader, self.encoding);
47                self.parse(&mut chars)
48            }
49            Err(_) => {
50                dom_parser_error!(ParserErrorDetails::InvalidFile)
51            }
52        }
53    }
54
55    pub fn parse_bytes(&self, bytes: &[u8]) -> ParserResult<JsonValue> {
56        let mut reader = BufReader::new(bytes);
57        let mut chars = self.decoders.default_decoder(&mut reader);
58        self.parse(&mut chars)
59    }
60
61    pub fn parse_str(&self, str: &str) -> ParserResult<JsonValue> {
62        let mut reader = BufReader::new(str.as_bytes());
63        let mut chars = self.decoders.default_decoder(&mut reader);
64        self.parse(&mut chars)
65    }
66
67    /// Parse the contents of a buffer (e.g. implementation of [BufRead])
68    pub fn parse_buffer(&self, buffer: &mut impl BufRead) -> ParserResult<JsonValue> {
69        let mut chars = self.decoders.default_decoder(buffer);
70        self.parse(&mut chars)
71    }
72
73    pub fn parse(&self, chars: &mut impl Iterator<Item = char>) -> ParserResult<JsonValue> {
74        let mut lexer = Lexer::new(chars);
75        match lexer.consume()? {
76            (Token::StartObject, _) => self.parse_object(&mut lexer),
77            (Token::StartArray, _) => self.parse_array(&mut lexer),
78            (_, span) => {
79                dom_parser_error!(ParserErrorDetails::InvalidRootObject, span.start)
80            }
81        }
82    }
83
84    fn parse_value(&self, lexer: &mut Lexer) -> ParserResult<JsonValue> {
85        match lexer.consume()? {
86            (Token::StartObject, _) => self.parse_object(lexer),
87            (Token::StartArray, _) => self.parse_array(lexer),
88            (Token::Str(str), _) => Ok(JsonValue::String(Cow::Owned(str))),
89            (Token::Float(value), _) => Ok(JsonValue::Float(value)),
90            (Token::Integer(value), _) => Ok(JsonValue::Integer(value)),
91            (Token::Boolean(value), _) => Ok(JsonValue::Boolean(value)),
92            (Token::Null, _) => Ok(JsonValue::Null),
93            (token, span) => {
94                dom_parser_error!(ParserErrorDetails::UnexpectedToken(token), span.start)
95            }
96        }
97    }
98
99    /// An object is just a list of comma separated KV pairs
100    fn parse_object(&self, lexer: &mut Lexer) -> ParserResult<JsonValue> {
101        let mut pairs = vec![];
102        loop {
103            match lexer.consume()? {
104                (Token::Str(str), _) => {
105                    let should_be_colon = lexer.consume()?;
106                    match should_be_colon {
107                        (Token::Colon, _) => pairs.push(JsonKeyValue {
108                            key: str,
109                            value: self.parse_value(lexer)?,
110                        }),
111                        (_, _) => {
112                            return dom_parser_error!(
113                                ParserErrorDetails::PairExpected,
114                                should_be_colon.1.start
115                            )
116                        }
117                    }
118                }
119                (Token::Comma, _) => (),
120                (Token::EndObject, _) => return Ok(JsonValue::Object(pairs)),
121                (_token, span) => {
122                    return dom_parser_error!(ParserErrorDetails::InvalidObject, span.start);
123                }
124            }
125        }
126    }
127
128    /// An array is just a list of comma separated values
129    fn parse_array(&self, lexer: &mut Lexer) -> ParserResult<JsonValue> {
130        let mut values: Vec<JsonValue> = vec![];
131        loop {
132            match lexer.consume()? {
133                (Token::StartArray, _) => values.push(self.parse_array(lexer)?),
134                (Token::EndArray, _) => return Ok(JsonValue::Array(values)),
135                (Token::StartObject, _) => values.push(self.parse_object(lexer)?),
136                (Token::Str(str), _) => values.push(JsonValue::String(Cow::Owned(str))),
137                (Token::Float(value), _) => values.push(JsonValue::Float(value)),
138                (Token::Integer(value), _) => values.push(JsonValue::Integer(value)),
139                (Token::Boolean(value), _) => values.push(JsonValue::Boolean(value)),
140                (Token::Null, _) => values.push(JsonValue::Null),
141                (Token::Comma, _) => (),
142                (_token, span) => {
143                    return dom_parser_error!(ParserErrorDetails::InvalidArray, span.start);
144                }
145            }
146        }
147    }
148}
149
150#[cfg(test)]
151mod tests {
152    #![allow(unused_macros)]
153
154    use std::path::PathBuf;
155    use std::time::Instant;
156    use std::{env, fs};
157
158    use bytesize::ByteSize;
159
160    use crate::parsers::dom::Parser;
161    use crate::relative_file;
162    use crate::results::ParserErrorDetails;
163
164    #[test]
165    fn should_parse_char_iterators_directly() {
166        let source = r#"{
167            "test" : 1232.0,
168            "some other" : "thasdasd",
169            "a bool" : true,
170            "an array" : [1,2,3,4,5.8,6,7.2,7,8,10]
171        }"#;
172        let parser = Parser::default();
173        let parsed = parser.parse(&mut source.chars());
174        println!("{parsed:?}");
175        assert!(parsed.is_ok())
176    }
177
178    #[test]
179    fn should_parse_lengthy_arrays() {
180        let path = relative_file!("fixtures/json/valid/bc_block.json");
181        let parser = Parser::default();
182        let parsed = parser.parse_file(&path);
183        println!("{parsed:?}");
184        assert!(parsed.is_ok());
185    }
186
187    #[test]
188    fn should_parse_simple_schema() {
189        let path = relative_file!("fixtures/json/valid/simple_schema.json");
190        let parser = Parser::default();
191        let parsed = parser.parse_file(&path);
192        println!("{parsed:?}");
193        assert!(parsed.is_ok());
194    }
195    #[test]
196    fn should_successfully_bail() {
197        let path = relative_file!("fixtures/json/invalid/invalid_1.json");
198        let parser = Parser::default();
199        let parsed = parser.parse_file(&path);
200        println!("Parse result = {:?}", parsed);
201        assert!(parsed.is_err());
202        assert_eq!(
203            parsed.err().unwrap().details,
204            ParserErrorDetails::InvalidRootObject
205        );
206    }
207    #[test]
208    fn should_parse_basic_test_files() {
209        for f in fs::read_dir("fixtures/json/valid").unwrap() {
210            let path = f.unwrap().path();
211            println!("Parsing {:?}", &path);
212            if path.is_file() {
213                let len = fs::metadata(&path).unwrap().len();
214                let start = Instant::now();
215                let path = relative_file!(path.to_str().unwrap());
216                let parser = Parser::default();
217                let parsed = parser.parse_file(&path);
218                if parsed.is_err() {
219                    println!("Parse of {:?} failed!", &path);
220                    println!("Parse failed with errors: {:?}", &parsed)
221                }
222                assert!(parsed.is_ok());
223                println!(
224                    "Parsed {} in {:?} [{:?}]",
225                    ByteSize(len),
226                    start.elapsed(),
227                    path,
228                );
229            }
230        }
231    }
232}