#![allow(dead_code)]
use crate::event::JsonEvent;
use anyhow::{anyhow, Result};
use std::borrow::Cow;
use std::collections::VecDeque;
use std::io::{BufRead, Error, ErrorKind, Seek, SeekFrom};
use std::str;
#[allow(dead_code)]
pub struct JsonReader<R> {
reader: R,
state_stack: Vec<JsonState>,
element_read: bool,
remain_events: VecDeque<JsonEvent<'static>>,
max_stack_size: Option<usize>,
}
impl<R: BufRead + Seek> JsonReader<R> {
pub fn from_reader(reader: R) -> Self {
Self {
reader,
state_stack: Vec::new(),
element_read: false,
max_stack_size: None,
remain_events: VecDeque::new(),
}
}
pub fn max_stack_size(&mut self, size: usize) -> &mut Self {
self.max_stack_size = Some(size);
self
}
pub fn read_event<'a>(&mut self, buffer: &'a mut Vec<u8>) -> Result<JsonEvent<'a>> {
if let Some(ev) = self.remain_events.pop_front() {
return Ok(ev);
}
match self.lookup_front_segment_whitespaces(buffer)? {
SkipWhitespace::EmptyBuffer => {
if self.state_stack.is_empty() && self.element_read {
Ok(JsonEvent::Eof)
} else {
Err(anyhow!(Error::from(ErrorKind::UnexpectedEof)))
}
}
SkipWhitespace::Skip(whitespaces, _) => Ok(JsonEvent::WhiteSpace(whitespaces)),
SkipWhitespace::NoSkip(front) => match front {
b'{' => {
self.reader.consume(1);
self.check_stack_size()?;
self.state_stack.push(JsonState::FirstObjectKey);
Ok(JsonEvent::StartObject)
}
b'}' => {
self.reader.consume(1);
if matches!(
self.state_stack.pop(),
Some(JsonState::FirstObjectKey) | Some(JsonState::LastObjectKey)
) {
self.read_after_value(JsonEvent::EndObject, buffer)
} else {
Err(anyhow!(Error::new(
ErrorKind::InvalidData,
"Closing a not opened object",
)))
}
}
b'[' => {
self.reader.consume(1);
self.check_stack_size()?;
self.state_stack.push(JsonState::FirstArray);
Ok(JsonEvent::StartArray)
}
b']' => {
self.reader.consume(1);
if matches!(
self.state_stack.pop(),
Some(JsonState::FirstArray) | Some(JsonState::LastArray)
) {
self.read_after_value(JsonEvent::EndArray, buffer)
} else {
Err(anyhow!(Error::new(
ErrorKind::InvalidData,
"Closing a not opened array",
)))
}
}
b'"' => self.parse_string(buffer),
b't' => self.parse_constant::<4>("true", JsonEvent::Boolean(true), buffer),
b'f' => self.parse_constant::<5>("false", JsonEvent::Boolean(false), buffer),
b'n' => self.parse_constant::<4>("null", JsonEvent::Null, buffer),
b'-' | b'0'..=b'9' => self.parse_number(front, buffer),
c => {
self.reader.consume(1);
Err(anyhow!(Error::new(
ErrorKind::InvalidData,
format!("Unexpected char: {}", char::from(c)),
)))
}
},
}
}
fn parse_string<'a>(&mut self, output: &'a mut Vec<u8>) -> Result<JsonEvent<'a>> {
output.clear();
self.reader.consume(1);
#[derive(Eq, PartialEq, Copy, Clone)]
#[allow(dead_code)]
enum StringState {
Default,
Escape,
}
let mut state = StringState::Default;
loop {
match state {
StringState::Default => {
let buffer = match self.reader.fill_buf() {
Ok(buf) => {
if buf.is_empty() {
return Err(anyhow!(Error::from(ErrorKind::UnexpectedEof)));
} else {
buf
}
}
Err(e) => {
if e.kind() == ErrorKind::Interrupted {
continue;
} else {
return Err(anyhow!(e));
}
}
};
let mut i = 0;
for c in buffer {
i += 1;
match *c {
b'"' => {
self.reader.consume(i);
return self.read_after_value(
JsonEvent::String(Cow::Owned(
String::from_utf8(output.clone())
.map_err(|e| Error::new(ErrorKind::InvalidData, e))?,
)),
output,
);
}
b'\\' => {
state = StringState::Escape;
break;
}
0..=0x1F => {
self.reader.consume(i);
return Err(anyhow!(Error::new(
ErrorKind::InvalidData,
"Control characters are not allowed in JSON",
)));
}
c => output.push(c),
}
}
self.reader.consume(i);
}
StringState::Escape => {
let c = self.lookup_mandatory_front()?;
self.reader.consume(1);
match c {
b'"' => {
output.push(b'"');
}
b'\\' => {
output.push(b'\\');
}
b'/' => {
output.push(b'/');
}
b'b' => {
output.push(8);
}
b'f' => {
output.push(12);
}
b'n' => {
output.push(b'\n');
}
b'r' => {
output.push(b'\r');
}
b't' => {
output.push(b'\t');
}
b'u' => {
let mut buf = [0u8; 4];
self.reader.read_exact(&mut buf)?;
let code_point = read_hexa_char(&buf)?;
if let Some(c) = char::from_u32(code_point) {
output.extend_from_slice(c.encode_utf8(&mut buf).as_bytes());
} else {
let high_surrogate = code_point;
let mut buf = [0u8; 6];
self.reader.read_exact(&mut buf)?;
if !buf.starts_with(b"\\u") {
return Err(anyhow!(Error::new(
ErrorKind::InvalidData,
format!(
"\\u{:X} is a surrogate should be followed by an other surrogate",
high_surrogate
)),
));
}
let low_surrogate = read_hexa_char(&buf[2..])?;
let code_point = 0x10000
+ ((high_surrogate & 0x03FF) << 10)
+ (low_surrogate & 0x03FF);
if let Some(c) = char::from_u32(code_point) {
output.extend_from_slice(c.encode_utf8(&mut buf).as_bytes())
} else {
return Err(anyhow!(Error::new(
ErrorKind::InvalidData,
format!(
"\\u{:X}\\u{:X} is an invalid surrogate pair",
high_surrogate, low_surrogate
),
)));
}
}
}
_ => {
return Err(anyhow!(Error::new(
ErrorKind::InvalidData,
"Invalid string escape",
)));
}
}
state = StringState::Default;
}
}
}
}
fn parse_constant<'a, const SIZE: usize>(
&mut self,
expected: &str,
value: JsonEvent<'a>,
buffer: &mut Vec<u8>,
) -> Result<JsonEvent<'a>> {
debug_assert_eq!(expected.len(), SIZE);
let mut buf = [0u8; SIZE];
self.reader.read_exact(&mut buf)?;
if buf == expected.as_bytes() {
self.read_after_value(value, buffer)
} else {
Err(anyhow!(Error::new(
ErrorKind::InvalidData,
format!(
"{} expected, found {}",
expected,
str::from_utf8(&buf).map_err(|e| Error::new(ErrorKind::InvalidData, e))?
),
)))
}
}
fn parse_number<'a>(
&mut self,
first_byte: u8,
output: &'a mut Vec<u8>,
) -> Result<JsonEvent<'a>> {
output.clear();
if first_byte == b'-' {
output.push(b'-');
self.reader.consume(1);
}
let c = self.lookup_mandatory_front()?;
match c {
b'0' => {
output.push(b'0');
self.reader.consume(1);
}
b'1'..=b'9' => {
output.push(c);
self.reader.consume(1);
self.read_digits(output)?;
}
_ => {
return Err(anyhow!(Error::new(
ErrorKind::InvalidData,
"Invalid number"
)))
}
}
if self.lookup_front()? == Some(b'.') {
output.push(b'.');
self.reader.consume(1);
self.read_char(|c| matches!(c, b'0'..=b'9'), output)?;
self.read_digits(output)?;
}
if let Some(c) = self.lookup_front()? {
if c == b'e' || c == b'E' {
output.push(c);
self.reader.consume(1);
let c = self.lookup_mandatory_front()?;
match c {
b'-' | b'+' => {
output.push(c);
self.reader.consume(1);
self.read_char(|c| matches!(c, b'0'..=b'9'), output)?;
}
b'0'..=b'9' => {
output.push(c);
self.reader.consume(1);
}
_ => {
return Err(anyhow!(Error::new(
ErrorKind::InvalidData,
format!("Invalid number. Found char {}", char::from(c)),
)))
}
}
self.read_digits(output)?;
}
}
self.read_after_value(
JsonEvent::Number(Cow::Owned(
String::from_utf8(output.clone())
.map_err(|e| Error::new(ErrorKind::InvalidData, e))?,
)),
output,
)
}
fn read_char(&mut self, valid: impl Fn(u8) -> bool, output: &mut Vec<u8>) -> Result<()> {
let c = self.lookup_mandatory_front()?;
if valid(c) {
output.push(c);
self.reader.consume(1);
Ok(())
} else {
Err(anyhow!(Error::new(
ErrorKind::InvalidData,
format!("Invalid number. Found char {}", char::from(c)),
)))
}
}
fn read_digits(&mut self, output: &mut Vec<u8>) -> Result<()> {
while let Some(c) = self.lookup_front()? {
if matches!(c, b'0'..=b'9') {
output.push(c);
self.reader.consume(1);
} else {
break;
}
}
Ok(())
}
fn read_after_value<'a>(
&mut self,
value: JsonEvent<'a>,
buffer: &mut Vec<u8>,
) -> Result<JsonEvent<'a>> {
let JsonReader {
reader,
remain_events,
..
} = self;
type SkipMatchValue<'a> = (JsonEvent<'a>, bool, Option<JsonEvent<'static>>);
let mut skip_with_match_before =
|skip_whitespace,
match_func: Box<dyn FnOnce(Option<u8>) -> Result<SkipMatchValue<'a>>>| {
match skip_whitespace {
SkipWhitespace::NoSkip(front) => {
let (event, _, next) = match_func(Some(front))?;
if let Some(next) = next {
remain_events.push_back(next)
}
Ok(event)
}
SkipWhitespace::Skip(whitespace, front) => {
let (event, is_before, next) = match_func(front)?;
if is_before {
remain_events.push_back(event.into_owned());
Ok(JsonEvent::WhiteSpace(whitespace))
} else {
remain_events.push_back(JsonEvent::WhiteSpace(whitespace));
if let Some(next) = next {
remain_events.push_back(next)
}
Ok(event.into_owned())
}
}
SkipWhitespace::EmptyBuffer => Err(anyhow!(Error::new(
ErrorKind::UnexpectedEof,
"Unexpected end of input while parsing JSON",
))),
}
};
match self.state_stack.pop() {
Some(JsonState::FirstObjectKey) | Some(JsonState::NextObjectKey) => {
skip_with_match_before(
lookup_front_segment_whitespaces_impl(reader, buffer)?,
Box::new(|front| {
if front == Some(b':') {
self.reader.consume(1);
self.state_stack.push(JsonState::ObjectValue);
if let JsonEvent::String(value) = value {
Ok((JsonEvent::ObjectKey(value), true, None))
} else {
Err(anyhow!(Error::new(
ErrorKind::InvalidData,
"Object keys should strings",
)))
}
} else {
Err(anyhow!(Error::new(
ErrorKind::InvalidData,
"Object keys should be followed by ':'",
)))
}
}),
)
}
Some(JsonState::ObjectValue) => skip_with_match_before(
lookup_front_segment_whitespaces_impl(reader, buffer)?,
Box::new(|front| match front {
Some(b',') => {
self.reader.consume(1);
self.state_stack.push(JsonState::NextObjectKey);
Ok((value, false, Some(JsonEvent::NextObjectValue)))
}
Some(b'}') => {
self.state_stack.push(JsonState::LastObjectKey);
Ok((value, false, None))
}
_ => Err(anyhow!(Error::new(
ErrorKind::InvalidData,
"Object values should be followed by ',' or '}'",
))),
}),
),
Some(JsonState::FirstArray) | Some(JsonState::NextArray) => skip_with_match_before(
lookup_front_segment_whitespaces_impl(reader, buffer)?,
Box::new(|front| match front {
Some(b',') => {
self.reader.consume(1);
self.state_stack.push(JsonState::NextArray);
Ok((value, false, Some(JsonEvent::NextArrayValue)))
}
Some(b']') => {
self.state_stack.push(JsonState::LastArray);
Ok((value, false, None))
}
_ => Err(anyhow!(Error::new(
ErrorKind::InvalidData,
"Array values should be followed by ',' or ']'",
))),
}),
),
None => {
if self.element_read {
Err(anyhow!(Error::new(
ErrorKind::InvalidData,
"JSON trailing content"
)))
} else {
self.element_read = true;
Ok(value)
}
}
Some(JsonState::LastObjectKey) => Err(anyhow!(Error::new(
ErrorKind::InvalidData,
"JSON object elements should be separated by commas",
))),
Some(JsonState::LastArray) => Err(anyhow!(Error::new(
ErrorKind::InvalidData,
"JSON array elements should be separated by commas",
))),
}
}
fn peek_front_skipping_whitespaces(&mut self) -> Result<Option<u8>> {
let mut back_pos = None;
loop {
match self.reader.fill_buf() {
Ok(buf) => {
if buf.is_empty() {
return Ok(None);
}
let skipped = skip_whitespaces(buf);
if skipped == buf.len() {
back_pos = Some(self.reader.stream_position()?);
self.reader.consume(skipped);
} else {
let result = Some(buf[skipped]);
if let Some(bp) = back_pos {
self.reader.seek(SeekFrom::Start(bp))?;
}
return Ok(result);
}
}
Err(error) => {
if error.kind() != ErrorKind::Interrupted {
return Err(anyhow!(error));
}
}
}
}
}
fn lookup_front_segment_whitespaces(&mut self, output: &mut Vec<u8>) -> Result<SkipWhitespace> {
lookup_front_segment_whitespaces_impl(&mut self.reader, output)
}
fn lookup_mandatory_front(&mut self) -> Result<u8> {
if let Some(v) = self.lookup_front()? {
Ok(v)
} else {
Err(anyhow!(Error::from(ErrorKind::UnexpectedEof)))
}
}
fn lookup_front(&mut self) -> Result<Option<u8>> {
loop {
match self.reader.fill_buf() {
Ok(buf) => return Ok(if buf.is_empty() { None } else { Some(buf[0]) }),
Err(error) => {
if error.kind() != ErrorKind::Interrupted {
return Err(anyhow!(error));
}
}
}
}
}
fn check_stack_size(&self) -> Result<()> {
if let Some(max_stack_size) = self.max_stack_size {
if self.state_stack.len() > max_stack_size {
Err(anyhow!(Error::new(
ErrorKind::InvalidData,
format!(
"Max stack size of {} reached on an object opening",
max_stack_size
),
)))
} else {
Ok(())
}
} else {
Ok(())
}
}
}
#[derive(Debug, Eq, PartialEq, Copy, Clone)]
#[allow(dead_code)]
enum JsonState {
FirstArray,
NextArray,
LastArray,
FirstObjectKey,
NextObjectKey,
LastObjectKey,
ObjectValue,
}
#[allow(dead_code)]
fn skip_whitespaces(buf: &[u8]) -> usize {
for (i, c) in buf.iter().enumerate() {
if !matches!(c, b' ' | b'\t' | b'\n' | b'\r') {
return i;
}
}
buf.len()
}
fn read_hexa_char(input: &[u8]) -> Result<u32> {
let mut value = 0;
for c in input.iter().copied() {
value = value * 16
+ match c {
b'0'..=b'9' => u32::from(c) - u32::from(b'0'),
b'a'..=b'f' => u32::from(c) - u32::from(b'a') + 10,
b'A'..=b'F' => u32::from(c) - u32::from(b'A') + 10,
_ => {
return Err(anyhow!(Error::new(
ErrorKind::InvalidData,
"Unexpected character in a unicode escape",
)))
}
}
}
Ok(value)
}
fn lookup_front_segment_whitespaces_impl<R: BufRead + Seek>(
reader: &mut R,
output: &mut Vec<u8>,
) -> Result<SkipWhitespace> {
output.clear();
loop {
match reader.fill_buf() {
Ok(buf) => {
if buf.is_empty() {
return if output.is_empty() {
Ok(SkipWhitespace::EmptyBuffer)
} else {
Ok(SkipWhitespace::Skip(
String::from_utf8(output.clone())?,
None,
))
};
}
let skipped = skip_whitespaces(buf);
if skipped == buf.len() {
output.extend_from_slice(buf);
reader.consume(skipped);
} else {
let c = buf[skipped];
output.extend_from_slice(&buf[0..skipped]);
reader.consume(skipped);
return if output.is_empty() {
Ok(SkipWhitespace::NoSkip(c))
} else {
Ok(SkipWhitespace::Skip(
String::from_utf8(output.clone())?,
Some(c),
))
};
}
}
Err(error) => {
if error.kind() != ErrorKind::Interrupted {
return Err(anyhow!(error));
}
}
}
}
}
enum SkipWhitespace {
EmptyBuffer,
NoSkip(u8),
Skip(String, Option<u8>),
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::{BufReader, Cursor};
#[test]
fn it_works() {
let json_str = r#"{
"nadeko": "cute",
"sumire": "cute",
"number": 1234,
"numbers": [1, 2, 3]
}"#
.to_string();
let mut reader = JsonReader::from_reader(BufReader::new(Cursor::new(json_str.as_bytes())));
let mut buffer = Vec::new();
loop {
let ev = dbg!(reader.read_event(&mut buffer).unwrap());
if matches!(ev, JsonEvent::Eof) {
break;
}
}
}
}