1use std::borrow::Cow;
3use std::fs::File;
4use std::io::{BufRead, BufReader};
5use std::path::Path;
6
7use crate::coords::Coords;
8use crate::lexer::decoders::{DecoderSelector, Encoding};
9use crate::lexer::lexer_core::{Lexer, Token};
10use crate::parsers::sax_events::{Event, Match};
11use crate::pointers::pointer::JsonPointer;
12use crate::results::{ParserError, ParserErrorDetails, ParserErrorSource, ParserResult};
13use crate::sax_parser_error;
14
15macro_rules! emit_event {
16 ($cb : expr, $m : expr, $span : expr, $path : expr) => {
17 $cb(&Event {
18 matched: $m,
19 span: $span,
20 pointer: Some(&$path),
21 })
22 };
23 ($cb : expr, $m : expr, $span : expr) => {
24 $cb(&Event {
25 matched: $m,
26 span: $span,
27 pointer: None,
28 })
29 };
30}
31
32pub struct Parser {
34 decoders: DecoderSelector,
35 encoding: Encoding,
36}
37
38impl Default for Parser {
39 fn default() -> Self {
41 Self {
42 decoders: Default::default(),
43 encoding: Default::default(),
44 }
45 }
46}
47
48impl Parser {
49 pub fn with_encoding(encoding: Encoding) -> Self {
51 Self {
52 decoders: Default::default(),
53 encoding,
54 }
55 }
56
57 pub fn parse_file<PathLike: AsRef<Path>, Callback>(
58 &self,
59 path: PathLike,
60 cb: &mut Callback,
61 ) -> ParserResult<()>
62 where
63 Callback: FnMut(&Event) -> ParserResult<()>,
64 {
65 match File::open(&path) {
66 Ok(f) => {
67 let mut reader = BufReader::new(f);
68 let mut chars = self.decoders.new_decoder(&mut reader, self.encoding);
69 self.parse(&mut chars, cb)
70 }
71 Err(_) => {
72 sax_parser_error!(ParserErrorDetails::InvalidFile)
73 }
74 }
75 }
76
77 pub fn parse_bytes<Callback>(&self, bytes: &[u8], cb: &mut Callback) -> ParserResult<()>
78 where
79 Callback: FnMut(&Event) -> ParserResult<()>,
80 {
81 if bytes.is_empty() {
82 return sax_parser_error!(ParserErrorDetails::ZeroLengthInput, Coords::default());
83 }
84 let mut reader = BufReader::new(bytes);
85 let mut chars = self.decoders.default_decoder(&mut reader);
86 self.parse(&mut chars, cb)
87 }
88
89 pub fn parse_str<Callback>(&self, str: &str, cb: &mut Callback) -> ParserResult<()>
90 where
91 Callback: FnMut(&Event) -> ParserResult<()>,
92 {
93 if str.is_empty() {
94 return sax_parser_error!(ParserErrorDetails::ZeroLengthInput, Coords::default());
95 }
96 let mut reader = BufReader::new(str.as_bytes());
97 let mut chars = self.decoders.default_decoder(&mut reader);
98 self.parse(&mut chars, cb)
99 }
100
101 pub fn parse_buffer<Callback>(
103 &self,
104 buffer: &mut impl BufRead,
105 cb: &mut Callback,
106 ) -> ParserResult<()>
107 where
108 Callback: FnMut(&Event) -> ParserResult<()>,
109 {
110 let mut chars = self.decoders.default_decoder(buffer);
111 self.parse(&mut chars, cb)
112 }
113
114 pub fn parse<Callback>(
115 &self,
116 chars: &mut impl Iterator<Item = char>,
117 cb: &mut Callback,
118 ) -> ParserResult<()>
119 where
120 Callback: FnMut(&Event) -> ParserResult<()>,
121 {
122 let mut pointer = JsonPointer::root();
123 let mut lexer = Lexer::new(chars);
124 match lexer.consume()? {
125 (Token::StartObject, span) => {
126 emit_event!(cb, Match::StartOfInput, span)?;
127 emit_event!(cb, Match::StartObject, span, pointer)?;
128 self.parse_object(&mut lexer, &mut pointer, cb)
129 }
130 (Token::StartArray, span) => {
131 emit_event!(cb, Match::StartOfInput, span, pointer)?;
132 emit_event!(cb, Match::StartArray, span, pointer)?;
133 self.parse_array(&mut lexer, &mut pointer, cb)
134 }
135 (_, span) => {
136 sax_parser_error!(ParserErrorDetails::InvalidRootObject, span.start)
137 }
138 }
139 }
140
141 fn parse_value<Callback>(
142 &self,
143 lexer: &mut Lexer,
144 pointer: &mut JsonPointer,
145 cb: &mut Callback,
146 ) -> ParserResult<()>
147 where
148 Callback: FnMut(&Event) -> ParserResult<()>,
149 {
150 match lexer.consume()? {
151 (Token::StartObject, span) => {
152 emit_event!(cb, Match::StartObject, span, pointer)?;
153 self.parse_object(lexer, pointer, cb)
154 }
155 (Token::StartArray, span) => {
156 emit_event!(cb, Match::StartArray, span, pointer)?;
157 self.parse_array(lexer, pointer, cb)
158 }
159 (Token::Str(str), span) => {
160 emit_event!(cb, Match::String(Cow::Borrowed(&str)), span, pointer)
161 }
162 (Token::Float(value), span) => {
163 emit_event!(cb, Match::Float(value), span, pointer)
164 }
165 (Token::Integer(value), span) => {
166 emit_event!(cb, Match::Integer(value), span, pointer)
167 }
168 (Token::Boolean(value), span) => {
169 emit_event!(cb, Match::Boolean(value), span, pointer)
170 }
171 (Token::Null, span) => {
172 emit_event!(cb, Match::Null, span, pointer)
173 }
174 (token, span) => {
175 sax_parser_error!(ParserErrorDetails::UnexpectedToken(token), span.start)
176 }
177 }
178 }
179
180 fn parse_object<Callback>(
182 &self,
183 lexer: &mut Lexer,
184 pointer: &mut JsonPointer,
185 cb: &mut Callback,
186 ) -> ParserResult<()>
187 where
188 Callback: FnMut(&Event) -> ParserResult<()>,
189 {
190 loop {
191 match lexer.consume()? {
192 (Token::Str(str), span) => {
193 pointer.push_name(str.replace("\"", ""));
194 emit_event!(cb, Match::ObjectKey(Cow::Borrowed(&str)), span, pointer)?;
195 let should_be_colon = lexer.consume()?;
196 match should_be_colon {
197 (Token::Colon, _) => {
198 self.parse_value(lexer, pointer, cb)?;
199 pointer.pop();
200 }
201 (_, _) => {
202 return sax_parser_error!(
203 ParserErrorDetails::PairExpected,
204 should_be_colon.1.start
205 )
206 }
207 }
208 }
209 (Token::Comma, _) => (),
210 (Token::EndObject, span) => {
211 return emit_event!(cb, Match::EndObject, span, pointer);
212 }
213 (_token, span) => {
214 return sax_parser_error!(ParserErrorDetails::InvalidArray, span.start)
215 }
216 }
217 }
218 }
219
220 fn parse_array<Callback>(
222 &self,
223 lexer: &mut Lexer,
224 pointer: &mut JsonPointer,
225 cb: &mut Callback,
226 ) -> ParserResult<()>
227 where
228 Callback: FnMut(&Event) -> ParserResult<()>,
229 {
230 let mut index = 0;
231 loop {
232 pointer.push_index(index);
233 match lexer.consume()? {
234 (Token::StartArray, span) => {
235 emit_event!(cb, Match::StartArray, span, pointer)?;
236 self.parse_array(lexer, pointer, cb)?;
237 }
238 (Token::EndArray, span) => {
239 pointer.pop();
240 return emit_event!(cb, Match::EndArray, span, pointer);
241 }
242 (Token::StartObject, span) => {
243 emit_event!(cb, Match::StartObject, span, pointer)?;
244 self.parse_object(lexer, pointer, cb)?;
245 }
246 (Token::Str(str), span) => {
247 emit_event!(cb, Match::String(Cow::Borrowed(&str)), span, pointer)?;
248 }
249 (Token::Float(value), span) => {
250 emit_event!(cb, Match::Float(value), span, pointer)?;
251 }
252 (Token::Integer(value), span) => {
253 emit_event!(cb, Match::Integer(value), span, pointer)?;
254 }
255 (Token::Boolean(value), span) => {
256 emit_event!(cb, Match::Boolean(value), span, pointer)?;
257 }
258 (Token::Null, span) => emit_event!(cb, Match::Null, span, pointer)?,
259 (Token::Comma, _) => index += 1,
260 (_token, span) => {
261 return sax_parser_error!(ParserErrorDetails::InvalidArray, span.start);
262 }
263 }
264 pointer.pop();
265 }
266 }
267}
268
269#[cfg(test)]
270mod tests {
271 use std::io::BufReader;
272 use std::path::PathBuf;
273 use std::time::Instant;
274 use std::{env, fs};
275
276 use bytesize::ByteSize;
277
278 use crate::parsers::sax::Parser;
279 use crate::relative_file;
280 use crate::results::ParserErrorDetails;
281
282 #[test]
283 fn should_puke_on_empty_input() {
284 let input = "";
285 let parser = Parser::default();
286 let parsed = parser.parse_str(input, &mut |_e| Ok(()));
287 assert!(parsed.is_err());
288 assert_eq!(
289 parsed.err().unwrap().details,
290 ParserErrorDetails::ZeroLengthInput
291 );
292 }
293
294 #[test]
295 fn should_parse_successfully() {
296 let mut counter = 0;
297 let path = relative_file!("fixtures/json/valid/events.json");
298 let parser = Parser::default();
299 let parsed = parser.parse_file(&path, &mut |_e| {
300 counter += 1;
301 Ok(())
302 });
303 println!("{} SAX events processed", counter);
304 assert!(parsed.is_ok());
305 }
306
307 #[test]
308 fn should_successfully_bail() {
309 let path = relative_file!("fixtures/json/invalid/invalid_1.json");
310 let parser = Parser::default();
311 let parsed = parser.parse_file(&path, &mut |_e| Ok(()));
312 println!("Parse result = {:?}", parsed);
313 assert!(parsed.is_err());
314 assert_eq!(
315 parsed.err().unwrap().details,
316 ParserErrorDetails::InvalidRootObject
317 );
318 }
319
320 #[test]
321 fn should_allow_for_parsing_of_a_buffer() {
322 let input = "{ \"test\" : 2123232323}".as_bytes();
323 let mut buffer = BufReader::new(input);
324 let parser = Parser::default();
325 let _parsed = parser.parse_buffer(&mut buffer, &mut |_e| Ok(()));
326 }
327
328 #[test]
329 fn should_parse_basic_test_files() {
330 for f in fs::read_dir("fixtures/json/valid").unwrap() {
331 let path = f.unwrap().path();
332 println!("Parsing {:?}", &path);
333 if path.is_file() {
334 let mut counter = 0;
335 let len = fs::metadata(&path).unwrap().len();
336 let start = Instant::now();
337 let path = relative_file!(path.to_str().unwrap());
338 let parser = Parser::default();
339 let parsed = parser.parse_file(&path, &mut |_e| {
340 counter += 1;
341 Ok(())
342 });
343 if parsed.is_err() {
344 println!("Parse of {:?} failed!", &path);
345 println!("Parse failed with errors: {:?}", &parsed)
346 }
347 assert!(parsed.is_ok());
348 println!(
349 "Parsed {} in {:?} [{:?}], {} SAX events processed",
350 ByteSize(len),
351 start.elapsed(),
352 path,
353 counter
354 );
355 }
356 }
357 }
358}