1#![allow(dead_code)]
2
3use crate::event::JsonEvent;
4use anyhow::{anyhow, Result};
5use std::borrow::Cow;
6use std::collections::VecDeque;
7use std::io::{BufRead, Error, ErrorKind, Seek, SeekFrom};
8use std::str;
9
10#[allow(dead_code)]
11pub struct JsonReader<R> {
12 reader: R,
13 state_stack: Vec<JsonState>,
14 element_read: bool,
15 remain_events: VecDeque<JsonEvent<'static>>,
16 max_stack_size: Option<usize>,
17}
18
19impl<R: BufRead + Seek> JsonReader<R> {
20 pub fn from_reader(reader: R) -> Self {
21 Self {
22 reader,
23 state_stack: Vec::new(),
24 element_read: false,
25 max_stack_size: None,
26 remain_events: VecDeque::new(),
27 }
28 }
29
30 pub fn max_stack_size(&mut self, size: usize) -> &mut Self {
32 self.max_stack_size = Some(size);
33 self
34 }
35
36 pub fn read_event<'a>(&mut self, buffer: &'a mut Vec<u8>) -> Result<JsonEvent<'a>> {
37 if let Some(ev) = self.remain_events.pop_front() {
38 return Ok(ev);
39 }
40
41 match self.lookup_front_segment_whitespaces(buffer)? {
42 SkipWhitespace::EmptyBuffer => {
43 if self.state_stack.is_empty() && self.element_read {
44 Ok(JsonEvent::Eof)
45 } else {
46 Err(anyhow!(Error::from(ErrorKind::UnexpectedEof)))
47 }
48 }
49 SkipWhitespace::Skip(whitespaces, _) => Ok(JsonEvent::WhiteSpace(whitespaces)),
50 SkipWhitespace::NoSkip(front) => match front {
51 b'{' => {
52 self.reader.consume(1);
53 self.check_stack_size()?;
54 self.state_stack.push(JsonState::FirstObjectKey);
55 Ok(JsonEvent::StartObject)
56 }
57 b'}' => {
58 self.reader.consume(1);
59 if matches!(
60 self.state_stack.pop(),
61 Some(JsonState::FirstObjectKey) | Some(JsonState::LastObjectKey)
62 ) {
63 self.read_after_value(JsonEvent::EndObject, buffer)
64 } else {
65 Err(anyhow!(Error::new(
66 ErrorKind::InvalidData,
67 "Closing a not opened object",
68 )))
69 }
70 }
71 b'[' => {
72 self.reader.consume(1);
73 self.check_stack_size()?;
74 self.state_stack.push(JsonState::FirstArray);
75 Ok(JsonEvent::StartArray)
76 }
77 b']' => {
78 self.reader.consume(1);
79 if matches!(
80 self.state_stack.pop(),
81 Some(JsonState::FirstArray) | Some(JsonState::LastArray)
82 ) {
83 self.read_after_value(JsonEvent::EndArray, buffer)
84 } else {
85 Err(anyhow!(Error::new(
86 ErrorKind::InvalidData,
87 "Closing a not opened array",
88 )))
89 }
90 }
91 b'"' => self.parse_string(buffer),
92 b't' => self.parse_constant::<4>("true", JsonEvent::Boolean(true), buffer),
93 b'f' => self.parse_constant::<5>("false", JsonEvent::Boolean(false), buffer),
94 b'n' => self.parse_constant::<4>("null", JsonEvent::Null, buffer),
95 b'-' | b'0'..=b'9' => self.parse_number(front, buffer),
96 c => {
97 self.reader.consume(1);
98 Err(anyhow!(Error::new(
99 ErrorKind::InvalidData,
100 format!("Unexpected char: {}", char::from(c)),
101 )))
102 }
103 },
104 }
105 }
106
107 fn parse_string<'a>(&mut self, output: &'a mut Vec<u8>) -> Result<JsonEvent<'a>> {
108 output.clear();
109 self.reader.consume(1);
110
111 #[derive(Eq, PartialEq, Copy, Clone)]
112 #[allow(dead_code)]
113 enum StringState {
114 Default,
115 Escape,
116 }
117
118 let mut state = StringState::Default;
119 loop {
120 match state {
121 StringState::Default => {
122 let buffer = match self.reader.fill_buf() {
123 Ok(buf) => {
124 if buf.is_empty() {
125 return Err(anyhow!(Error::from(ErrorKind::UnexpectedEof)));
126 } else {
127 buf
128 }
129 }
130 Err(e) => {
131 if e.kind() == ErrorKind::Interrupted {
132 continue;
133 } else {
134 return Err(anyhow!(e));
135 }
136 }
137 };
138 let mut i = 0;
139 for c in buffer {
140 i += 1;
141 match *c {
142 b'"' => {
143 self.reader.consume(i);
144 return self.read_after_value(
145 JsonEvent::String(Cow::Owned(
146 String::from_utf8(output.clone())
147 .map_err(|e| Error::new(ErrorKind::InvalidData, e))?,
148 )),
149 output,
150 );
151 }
152 b'\\' => {
153 state = StringState::Escape;
154 break;
155 }
156 0..=0x1F => {
157 self.reader.consume(i);
158 return Err(anyhow!(Error::new(
159 ErrorKind::InvalidData,
160 "Control characters are not allowed in JSON",
161 )));
162 }
163 c => output.push(c),
164 }
165 }
166 self.reader.consume(i);
167 }
168 StringState::Escape => {
169 let c = self.lookup_mandatory_front()?;
170 self.reader.consume(1);
171 match c {
172 b'"' => {
173 output.push(b'"');
174 }
175 b'\\' => {
176 output.push(b'\\');
177 }
178 b'/' => {
179 output.push(b'/');
180 }
181 b'b' => {
182 output.push(8);
183 }
184 b'f' => {
185 output.push(12);
186 }
187 b'n' => {
188 output.push(b'\n');
189 }
190 b'r' => {
191 output.push(b'\r');
192 }
193 b't' => {
194 output.push(b'\t');
195 }
196 b'u' => {
197 let mut buf = [0u8; 4];
198 self.reader.read_exact(&mut buf)?;
199 let code_point = read_hexa_char(&buf)?;
200 if let Some(c) = char::from_u32(code_point) {
201 output.extend_from_slice(c.encode_utf8(&mut buf).as_bytes());
202 } else {
203 let high_surrogate = code_point;
204 let mut buf = [0u8; 6];
205 self.reader.read_exact(&mut buf)?;
206 if !buf.starts_with(b"\\u") {
207 return Err(anyhow!(Error::new(
208 ErrorKind::InvalidData,
209 format!(
210 "\\u{:X} is a surrogate should be followed by an other surrogate",
211 high_surrogate
212 )),
213 ));
214 }
215 let low_surrogate = read_hexa_char(&buf[2..])?;
216 let code_point = 0x10000
217 + ((high_surrogate & 0x03FF) << 10)
218 + (low_surrogate & 0x03FF);
219 if let Some(c) = char::from_u32(code_point) {
220 output.extend_from_slice(c.encode_utf8(&mut buf).as_bytes())
221 } else {
222 return Err(anyhow!(Error::new(
223 ErrorKind::InvalidData,
224 format!(
225 "\\u{:X}\\u{:X} is an invalid surrogate pair",
226 high_surrogate, low_surrogate
227 ),
228 )));
229 }
230 }
231 }
232 _ => {
233 return Err(anyhow!(Error::new(
234 ErrorKind::InvalidData,
235 "Invalid string escape",
236 )));
237 }
238 }
239 state = StringState::Default;
240 }
241 }
242 }
243 }
244
245 fn parse_constant<'a, const SIZE: usize>(
246 &mut self,
247 expected: &str,
248 value: JsonEvent<'a>,
249 buffer: &mut Vec<u8>,
250 ) -> Result<JsonEvent<'a>> {
251 debug_assert_eq!(expected.len(), SIZE);
252 let mut buf = [0u8; SIZE];
253 self.reader.read_exact(&mut buf)?;
254 if buf == expected.as_bytes() {
255 self.read_after_value(value, buffer)
256 } else {
257 Err(anyhow!(Error::new(
258 ErrorKind::InvalidData,
259 format!(
260 "{} expected, found {}",
261 expected,
262 str::from_utf8(&buf).map_err(|e| Error::new(ErrorKind::InvalidData, e))?
263 ),
264 )))
265 }
266 }
267
268 fn parse_number<'a>(
269 &mut self,
270 first_byte: u8,
271 output: &'a mut Vec<u8>,
272 ) -> Result<JsonEvent<'a>> {
273 output.clear();
274 if first_byte == b'-' {
275 output.push(b'-');
276 self.reader.consume(1);
277 }
278 let c = self.lookup_mandatory_front()?;
281 match c {
282 b'0' => {
283 output.push(b'0');
284 self.reader.consume(1);
285 }
286 b'1'..=b'9' => {
287 output.push(c);
288 self.reader.consume(1);
289 self.read_digits(output)?;
290 }
291 _ => {
292 return Err(anyhow!(Error::new(
293 ErrorKind::InvalidData,
294 "Invalid number"
295 )))
296 }
297 }
298
299 if self.lookup_front()? == Some(b'.') {
301 output.push(b'.');
302 self.reader.consume(1);
303 self.read_char(|c| matches!(c, b'0'..=b'9'), output)?;
304 self.read_digits(output)?;
305 }
306
307 if let Some(c) = self.lookup_front()? {
309 if c == b'e' || c == b'E' {
310 output.push(c);
311 self.reader.consume(1);
312 let c = self.lookup_mandatory_front()?;
313 match c {
314 b'-' | b'+' => {
315 output.push(c);
316 self.reader.consume(1);
317 self.read_char(|c| matches!(c, b'0'..=b'9'), output)?;
318 }
319 b'0'..=b'9' => {
320 output.push(c);
321 self.reader.consume(1);
322 }
323 _ => {
324 return Err(anyhow!(Error::new(
325 ErrorKind::InvalidData,
326 format!("Invalid number. Found char {}", char::from(c)),
327 )))
328 }
329 }
330 self.read_digits(output)?;
331 }
332 }
333
334 self.read_after_value(
335 JsonEvent::Number(Cow::Owned(
336 String::from_utf8(output.clone())
337 .map_err(|e| Error::new(ErrorKind::InvalidData, e))?,
338 )),
339 output,
340 )
341 }
342
343 fn read_char(&mut self, valid: impl Fn(u8) -> bool, output: &mut Vec<u8>) -> Result<()> {
344 let c = self.lookup_mandatory_front()?;
345 if valid(c) {
346 output.push(c);
347 self.reader.consume(1);
348 Ok(())
349 } else {
350 Err(anyhow!(Error::new(
351 ErrorKind::InvalidData,
352 format!("Invalid number. Found char {}", char::from(c)),
353 )))
354 }
355 }
356
357 fn read_digits(&mut self, output: &mut Vec<u8>) -> Result<()> {
358 while let Some(c) = self.lookup_front()? {
359 if matches!(c, b'0'..=b'9') {
360 output.push(c);
361 self.reader.consume(1);
362 } else {
363 break;
364 }
365 }
366 Ok(())
367 }
368
369 fn read_after_value<'a>(
370 &mut self,
371 value: JsonEvent<'a>,
372 buffer: &mut Vec<u8>,
373 ) -> Result<JsonEvent<'a>> {
374 let JsonReader {
375 reader,
376 remain_events,
377 ..
378 } = self;
379
380 type SkipMatchValue<'a> = (JsonEvent<'a>, bool, Option<JsonEvent<'static>>);
381 let mut skip_with_match_before =
382 |skip_whitespace,
383 match_func: Box<dyn FnOnce(Option<u8>) -> Result<SkipMatchValue<'a>>>| {
384 match skip_whitespace {
385 SkipWhitespace::NoSkip(front) => {
386 let (event, _, next) = match_func(Some(front))?;
387 if let Some(next) = next {
388 remain_events.push_back(next)
389 }
390 Ok(event)
391 }
392 SkipWhitespace::Skip(whitespace, front) => {
393 let (event, is_before, next) = match_func(front)?;
394
395 if is_before {
396 remain_events.push_back(event.into_owned());
397 Ok(JsonEvent::WhiteSpace(whitespace))
398 } else {
399 remain_events.push_back(JsonEvent::WhiteSpace(whitespace));
400 if let Some(next) = next {
401 remain_events.push_back(next)
402 }
403 Ok(event.into_owned())
404 }
405 }
406 SkipWhitespace::EmptyBuffer => Err(anyhow!(Error::new(
407 ErrorKind::UnexpectedEof,
408 "Unexpected end of input while parsing JSON",
409 ))),
410 }
411 };
412
413 match self.state_stack.pop() {
414 Some(JsonState::FirstObjectKey) | Some(JsonState::NextObjectKey) => {
415 skip_with_match_before(
416 lookup_front_segment_whitespaces_impl(reader, buffer)?,
417 Box::new(|front| {
418 if front == Some(b':') {
419 self.reader.consume(1);
420 self.state_stack.push(JsonState::ObjectValue);
421 if let JsonEvent::String(value) = value {
422 Ok((JsonEvent::ObjectKey(value), true, None))
423 } else {
424 Err(anyhow!(Error::new(
425 ErrorKind::InvalidData,
426 "Object keys should strings",
427 )))
428 }
429 } else {
430 Err(anyhow!(Error::new(
431 ErrorKind::InvalidData,
432 "Object keys should be followed by ':'",
433 )))
434 }
435 }),
436 )
437 }
438 Some(JsonState::ObjectValue) => skip_with_match_before(
439 lookup_front_segment_whitespaces_impl(reader, buffer)?,
440 Box::new(|front| match front {
441 Some(b',') => {
442 self.reader.consume(1);
443 self.state_stack.push(JsonState::NextObjectKey);
444 Ok((value, false, Some(JsonEvent::NextObjectValue)))
445 }
446 Some(b'}') => {
447 self.state_stack.push(JsonState::LastObjectKey);
448 Ok((value, false, None))
449 }
450 _ => Err(anyhow!(Error::new(
451 ErrorKind::InvalidData,
452 "Object values should be followed by ',' or '}'",
453 ))),
454 }),
455 ),
456 Some(JsonState::FirstArray) | Some(JsonState::NextArray) => skip_with_match_before(
457 lookup_front_segment_whitespaces_impl(reader, buffer)?,
458 Box::new(|front| match front {
459 Some(b',') => {
460 self.reader.consume(1);
461 self.state_stack.push(JsonState::NextArray);
462 Ok((value, false, Some(JsonEvent::NextArrayValue)))
463 }
464 Some(b']') => {
465 self.state_stack.push(JsonState::LastArray);
466 Ok((value, false, None))
467 }
468 _ => Err(anyhow!(Error::new(
469 ErrorKind::InvalidData,
470 "Array values should be followed by ',' or ']'",
471 ))),
472 }),
473 ),
474 None => {
475 if self.element_read {
476 Err(anyhow!(Error::new(
477 ErrorKind::InvalidData,
478 "JSON trailing content"
479 )))
480 } else {
481 self.element_read = true;
482 Ok(value)
483 }
484 }
485 Some(JsonState::LastObjectKey) => Err(anyhow!(Error::new(
486 ErrorKind::InvalidData,
487 "JSON object elements should be separated by commas",
488 ))),
489 Some(JsonState::LastArray) => Err(anyhow!(Error::new(
490 ErrorKind::InvalidData,
491 "JSON array elements should be separated by commas",
492 ))),
493 }
494 }
495
496 fn peek_front_skipping_whitespaces(&mut self) -> Result<Option<u8>> {
497 let mut back_pos = None;
498 loop {
499 match self.reader.fill_buf() {
500 Ok(buf) => {
501 if buf.is_empty() {
502 return Ok(None);
503 }
504 let skipped = skip_whitespaces(buf);
505 if skipped == buf.len() {
506 back_pos = Some(self.reader.stream_position()?);
507 self.reader.consume(skipped);
508 } else {
509 let result = Some(buf[skipped]);
510 if let Some(bp) = back_pos {
511 self.reader.seek(SeekFrom::Start(bp))?;
512 }
513 return Ok(result);
514 }
515 }
516 Err(error) => {
517 if error.kind() != ErrorKind::Interrupted {
518 return Err(anyhow!(error));
519 }
520 }
521 }
522 }
523 }
524
525 fn lookup_front_segment_whitespaces(&mut self, output: &mut Vec<u8>) -> Result<SkipWhitespace> {
526 lookup_front_segment_whitespaces_impl(&mut self.reader, output)
527 }
528
529 fn lookup_mandatory_front(&mut self) -> Result<u8> {
530 if let Some(v) = self.lookup_front()? {
531 Ok(v)
532 } else {
533 Err(anyhow!(Error::from(ErrorKind::UnexpectedEof)))
534 }
535 }
536
537 fn lookup_front(&mut self) -> Result<Option<u8>> {
538 loop {
539 match self.reader.fill_buf() {
540 Ok(buf) => return Ok(if buf.is_empty() { None } else { Some(buf[0]) }),
541 Err(error) => {
542 if error.kind() != ErrorKind::Interrupted {
543 return Err(anyhow!(error));
544 }
545 }
546 }
547 }
548 }
549
550 fn check_stack_size(&self) -> Result<()> {
551 if let Some(max_stack_size) = self.max_stack_size {
552 if self.state_stack.len() > max_stack_size {
553 Err(anyhow!(Error::new(
554 ErrorKind::InvalidData,
555 format!(
556 "Max stack size of {} reached on an object opening",
557 max_stack_size
558 ),
559 )))
560 } else {
561 Ok(())
562 }
563 } else {
564 Ok(())
565 }
566 }
567}
568
569#[derive(Debug, Eq, PartialEq, Copy, Clone)]
570#[allow(dead_code)]
571enum JsonState {
572 FirstArray,
573 NextArray,
574 LastArray,
575 FirstObjectKey,
576 NextObjectKey,
577 LastObjectKey,
578 ObjectValue,
579}
580
581#[allow(dead_code)]
582fn skip_whitespaces(buf: &[u8]) -> usize {
583 for (i, c) in buf.iter().enumerate() {
584 if !matches!(c, b' ' | b'\t' | b'\n' | b'\r') {
585 return i;
586 }
587 }
588 buf.len()
589}
590fn read_hexa_char(input: &[u8]) -> Result<u32> {
591 let mut value = 0;
592 for c in input.iter().copied() {
593 value = value * 16
594 + match c {
595 b'0'..=b'9' => u32::from(c) - u32::from(b'0'),
596 b'a'..=b'f' => u32::from(c) - u32::from(b'a') + 10,
597 b'A'..=b'F' => u32::from(c) - u32::from(b'A') + 10,
598 _ => {
599 return Err(anyhow!(Error::new(
600 ErrorKind::InvalidData,
601 "Unexpected character in a unicode escape",
602 )))
603 }
604 }
605 }
606 Ok(value)
607}
608
609fn lookup_front_segment_whitespaces_impl<R: BufRead + Seek>(
610 reader: &mut R,
611 output: &mut Vec<u8>,
612) -> Result<SkipWhitespace> {
613 output.clear();
614 loop {
615 match reader.fill_buf() {
616 Ok(buf) => {
617 if buf.is_empty() {
618 return if output.is_empty() {
619 Ok(SkipWhitespace::EmptyBuffer)
620 } else {
621 Ok(SkipWhitespace::Skip(
622 String::from_utf8(output.clone())?,
623 None,
624 ))
625 };
626 }
627 let skipped = skip_whitespaces(buf);
628 if skipped == buf.len() {
629 output.extend_from_slice(buf);
630 reader.consume(skipped);
631 } else {
632 let c = buf[skipped];
633 output.extend_from_slice(&buf[0..skipped]);
634 reader.consume(skipped);
635
636 return if output.is_empty() {
637 Ok(SkipWhitespace::NoSkip(c))
638 } else {
639 Ok(SkipWhitespace::Skip(
640 String::from_utf8(output.clone())?,
641 Some(c),
642 ))
643 };
644 }
645 }
646 Err(error) => {
647 if error.kind() != ErrorKind::Interrupted {
648 return Err(anyhow!(error));
649 }
650 }
651 }
652 }
653}
654
655enum SkipWhitespace {
656 EmptyBuffer,
657 NoSkip(u8),
658 Skip(String, Option<u8>),
659}
660
661#[cfg(test)]
662mod tests {
663 use super::*;
664 use std::io::{BufReader, Cursor};
665
666 #[test]
667 fn it_works() {
668 let json_str = r#"{
669 "nadeko": "cute",
670 "sumire": "cute",
671 "number": 1234,
672 "numbers": [1, 2, 3]
673}"#
674 .to_string();
675
676 let mut reader = JsonReader::from_reader(BufReader::new(Cursor::new(json_str.as_bytes())));
677 let mut buffer = Vec::new();
678 loop {
679 let ev = dbg!(reader.read_event(&mut buffer).unwrap());
680 if matches!(ev, JsonEvent::Eof) {
681 break;
682 }
683 }
684 }
685}