1use std::borrow::Cow;
3use std::fs::File;
4use std::io::{BufRead, BufReader};
5use std::path::Path;
6
7use chisel_common::char::coords::Coords;
8use chisel_decoders::{default_decoder, new_decoder, Encoding};
9use chisel_json_pointer::JsonPointer;
10use chisel_lexers::json::lexer::Lexer;
11use chisel_lexers::json::tokens::Token;
12
13use crate::json::events::{Event, Match};
14use crate::parser_error;
15use crate::{ParserError, ParserErrorDetails, ParserResult};
16
17macro_rules! emit_event {
18 ($cb : expr, $m : expr, $span : expr, $path : expr) => {
19 $cb(&Event {
20 matched: $m,
21 span: $span,
22 pointer: Some(&$path),
23 })
24 };
25 ($cb : expr, $m : expr, $span : expr) => {
26 $cb(&Event {
27 matched: $m,
28 span: $span,
29 pointer: None,
30 })
31 };
32}
33
34pub struct Parser {
36 encoding: Encoding,
37}
38
39impl Default for Parser {
40 fn default() -> Self {
42 Self {
43 encoding: Default::default(),
44 }
45 }
46}
47
48impl Parser {
49 pub fn with_encoding(encoding: Encoding) -> Self {
51 Self { encoding }
52 }
53
54 pub fn parse_file<PathLike: AsRef<Path>, Callback>(
55 &self,
56 path: PathLike,
57 cb: &mut Callback,
58 ) -> ParserResult<()>
59 where
60 Callback: FnMut(&Event) -> ParserResult<()>,
61 {
62 match File::open(&path) {
63 Ok(f) => {
64 let mut reader = BufReader::new(f);
65 let mut chars = new_decoder(&mut reader, self.encoding);
66 self.parse(&mut chars, cb)
67 }
68 Err(_) => {
69 parser_error!(ParserErrorDetails::InvalidFile)
70 }
71 }
72 }
73
74 pub fn parse_bytes<Callback>(&self, bytes: &[u8], cb: &mut Callback) -> ParserResult<()>
75 where
76 Callback: FnMut(&Event) -> ParserResult<()>,
77 {
78 if bytes.is_empty() {
79 return parser_error!(ParserErrorDetails::ZeroLengthInput, Coords::default());
80 }
81 let mut reader = BufReader::new(bytes);
82 let mut chars = default_decoder(&mut reader);
83 self.parse(&mut chars, cb)
84 }
85
86 pub fn parse_str<Callback>(&self, str: &str, cb: &mut Callback) -> ParserResult<()>
87 where
88 Callback: FnMut(&Event) -> ParserResult<()>,
89 {
90 if str.is_empty() {
91 return parser_error!(ParserErrorDetails::ZeroLengthInput, Coords::default());
92 }
93 let mut reader = BufReader::new(str.as_bytes());
94 let mut chars = default_decoder(&mut reader);
95 self.parse(&mut chars, cb)
96 }
97
98 pub fn parse_buffer<Callback>(
100 &self,
101 buffer: &mut impl BufRead,
102 cb: &mut Callback,
103 ) -> ParserResult<()>
104 where
105 Callback: FnMut(&Event) -> ParserResult<()>,
106 {
107 let mut chars = default_decoder(buffer);
108 self.parse(&mut chars, cb)
109 }
110
111 pub fn parse<Callback>(
112 &self,
113 chars: &mut impl Iterator<Item = char>,
114 cb: &mut Callback,
115 ) -> ParserResult<()>
116 where
117 Callback: FnMut(&Event) -> ParserResult<()>,
118 {
119 let mut pointer = JsonPointer::root();
120 let mut lexer = Lexer::new(chars);
121 match lexer.consume()? {
122 (Token::StartObject, span) => {
123 emit_event!(cb, Match::StartOfInput, span)?;
124 emit_event!(cb, Match::StartObject, span, pointer)?;
125 self.parse_object(&mut lexer, &mut pointer, cb)
126 }
127 (Token::StartArray, span) => {
128 emit_event!(cb, Match::StartOfInput, span, pointer)?;
129 emit_event!(cb, Match::StartArray, span, pointer)?;
130 self.parse_array(&mut lexer, &mut pointer, cb)
131 }
132 (_, span) => {
133 parser_error!(ParserErrorDetails::InvalidRootObject, span.start)
134 }
135 }
136 }
137
138 #[inline]
139 fn parse_value<Callback>(
140 &self,
141 lexer: &mut Lexer,
142 pointer: &mut JsonPointer,
143 cb: &mut Callback,
144 ) -> ParserResult<()>
145 where
146 Callback: FnMut(&Event) -> ParserResult<()>,
147 {
148 match lexer.consume()? {
149 (Token::StartObject, span) => {
150 emit_event!(cb, Match::StartObject, span, pointer)?;
151 self.parse_object(lexer, pointer, cb)
152 }
153 (Token::StartArray, span) => {
154 emit_event!(cb, Match::StartArray, span, pointer)?;
155 self.parse_array(lexer, pointer, cb)
156 }
157 (Token::Str(str), span) => {
158 emit_event!(cb, Match::String(Cow::Borrowed(&str)), span, pointer)
159 }
160 (Token::LazyNumeric(value), span) => {
161 emit_event!(cb, Match::Numeric(value), span, pointer)
162 }
163 (Token::Float(value), span) => {
164 emit_event!(cb, Match::Float(value), span, pointer)
165 }
166 (Token::Integer(value), span) => {
167 emit_event!(cb, Match::Integer(value), span, pointer)
168 }
169 (Token::Boolean(value), span) => {
170 emit_event!(cb, Match::Boolean(value), span, pointer)
171 }
172 (Token::Null, span) => {
173 emit_event!(cb, Match::Null, span, pointer)
174 }
175 (token, span) => {
176 parser_error!(
177 ParserErrorDetails::UnexpectedToken(token.to_string()),
178 span.start
179 )
180 }
181 }
182 }
183
184 fn parse_object<Callback>(
186 &self,
187 lexer: &mut Lexer,
188 pointer: &mut JsonPointer,
189 cb: &mut Callback,
190 ) -> ParserResult<()>
191 where
192 Callback: FnMut(&Event) -> ParserResult<()>,
193 {
194 loop {
195 match lexer.consume()? {
196 (Token::Str(str), span) => {
197 pointer.push_name(str.replace("\"", ""));
198 emit_event!(cb, Match::ObjectKey(Cow::Borrowed(&str)), span, pointer)?;
199 let should_be_colon = lexer.consume()?;
200 match should_be_colon {
201 (Token::Colon, _) => {
202 self.parse_value(lexer, pointer, cb)?;
203 pointer.pop();
204 }
205 (_, _) => {
206 return parser_error!(
207 ParserErrorDetails::PairExpected,
208 should_be_colon.1.start
209 )
210 }
211 }
212 }
213 (Token::Comma, _) => (),
214 (Token::EndObject, span) => {
215 return emit_event!(cb, Match::EndObject, span, pointer);
216 }
217 (_token, span) => {
218 return parser_error!(ParserErrorDetails::InvalidArray, span.start)
219 }
220 }
221 }
222 }
223
224 fn parse_array<Callback>(
226 &self,
227 lexer: &mut Lexer,
228 pointer: &mut JsonPointer,
229 cb: &mut Callback,
230 ) -> ParserResult<()>
231 where
232 Callback: FnMut(&Event) -> ParserResult<()>,
233 {
234 let mut index = 0;
235 let mut expect_value: bool = true;
236 let mut first_pass = true;
237 loop {
238 pointer.push_index(index);
239 match lexer.consume()? {
240 (Token::StartArray, span) => {
241 emit_event!(cb, Match::StartArray, span, pointer)?;
242 self.parse_array(lexer, pointer, cb)?;
243 }
244 (Token::EndArray, span) => {
245 return if !expect_value || first_pass {
246 pointer.pop();
247 emit_event!(cb, Match::EndArray, span, pointer)
248 } else {
249 parser_error!(ParserErrorDetails::ValueExpected, span.start)
250 }
251 }
252 (Token::StartObject, span) => {
253 emit_event!(cb, Match::StartObject, span, pointer)?;
254 self.parse_object(lexer, pointer, cb)?;
255 }
256 (Token::Str(str), span) => {
257 emit_event!(cb, Match::String(Cow::Borrowed(&str)), span, pointer)?;
258 }
259 (Token::LazyNumeric(value), span) => {
260 emit_event!(cb, Match::Numeric(value), span, pointer)?;
261 }
262 (Token::Float(value), span) => {
263 emit_event!(cb, Match::Float(value), span, pointer)?;
264 }
265 (Token::Integer(value), span) => {
266 emit_event!(cb, Match::Integer(value), span, pointer)?;
267 }
268 (Token::Boolean(value), span) => {
269 emit_event!(cb, Match::Boolean(value), span, pointer)?;
270 }
271 (Token::Null, span) => emit_event!(cb, Match::Null, span, pointer)?,
272 (Token::Comma, span) => {
273 if !expect_value {
274 index += 1
275 } else {
276 return parser_error!(ParserErrorDetails::ValueExpected, span.start);
277 }
278 }
279 (_token, span) => {
280 return parser_error!(ParserErrorDetails::InvalidArray, span.start);
281 }
282 }
283 first_pass = false;
284 expect_value = !expect_value;
285 pointer.pop();
286 }
287 }
288}
289
290#[cfg(test)]
291mod tests {
292 use std::io::BufReader;
293 use std::path::PathBuf;
294 use std::time::Instant;
295 use std::{env, fs};
296
297 use bytesize::ByteSize;
298 use chisel_common::char::coords::Coords;
299
300 use chisel_common::relative_file;
301
302 use crate::json::sax::Parser;
303 use crate::json::specs;
304 use crate::ParserErrorDetails;
305
306 #[test]
307 fn should_puke_on_empty_input() {
308 let input = "";
309 let parser = Parser::default();
310 let parsed = parser.parse_str(input, &mut |_e| Ok(()));
311 assert!(parsed.is_err());
312 assert_eq!(
313 parsed.err().unwrap().details,
314 ParserErrorDetails::ZeroLengthInput
315 );
316 }
317
318 #[test]
319 fn should_parse_successfully() {
320 let mut counter = 0;
321 let path = relative_file!("fixtures/json/valid/events.json");
322 let parser = Parser::default();
323 let parsed = parser.parse_file(&path, &mut |_e| {
324 counter += 1;
325 Ok(())
326 });
327 println!("{} SAX events processed", counter);
328 assert!(parsed.is_ok());
329 }
330
331 #[test]
332 fn should_successfully_handle_basic_invalid_inputs() {
333 for spec in specs::invalid_json_specs() {
334 let mut counter = 0;
335 let path = relative_file!(spec.filename);
336 let parser = Parser::default();
337 let parse_result = parser.parse_file(&path, &mut |_e| {
338 counter += 1;
339 Ok(())
340 });
341 println!("Parse result = {:?}", parse_result);
342 assert!(&parse_result.is_err());
343
344 let err = parse_result.err().unwrap();
345 let err_coords = Coords::from_coords(&err.coords.unwrap());
346 assert_eq!(err_coords.line, spec.expected.coords.line);
347 assert_eq!(err_coords.column, spec.expected.coords.column)
348 }
349 }
350
351 #[test]
352 fn should_allow_for_parsing_of_a_buffer() {
353 let input = "{ \"test\" : 2123232323}".as_bytes();
354 let mut buffer = BufReader::new(input);
355 let parser = Parser::default();
356 let _parsed = parser.parse_buffer(&mut buffer, &mut |_e| Ok(()));
357 }
358
359 #[test]
360 fn should_parse_basic_test_files() {
361 for f in fs::read_dir("fixtures/json/valid").unwrap() {
362 let path = f.unwrap().path();
363 println!("Parsing {:?}", &path);
364 if path.is_file() {
365 let mut counter = 0;
366 let len = fs::metadata(&path).unwrap().len();
367 let start = Instant::now();
368 let path = relative_file!(path.to_str().unwrap());
369 let parser = Parser::default();
370 let parsed = parser.parse_file(&path, &mut |_e| {
371 counter += 1;
372 Ok(())
373 });
374 if parsed.is_err() {
375 println!("Parse of {:?} failed!", &path);
376 println!("Parse failed with errors: {:?}", &parsed)
377 }
378 assert!(parsed.is_ok());
379 println!(
380 "Parsed {} in {:?} [{:?}], {} SAX events processed",
381 ByteSize(len),
382 start.elapsed(),
383 path,
384 counter
385 );
386 }
387 }
388 }
389}