#[derive(Clone, Copy, PartialEq, Eq, Debug)]
pub enum Op {
Continue, BeginLiteral, BeginObject, ObjectKey, ObjectValue, EndObject, BeginArray, ArrayValue, EndArray, SkipSpace, End, Error, }
#[derive(Clone, Copy, PartialEq, Eq)]
enum ParseState {
ObjectKey, ObjectValue, ArrayValue, }
#[rustfmt::skip]
#[derive(Clone, Copy)]
enum State {
BeginValue,
BeginValueOrEmpty, BeginStringOrEmpty, BeginString, EndValue,
EndTop,
InString,
InStringEsc,
InStringEscU,
InStringEscU1,
InStringEscU12,
InStringEscU123,
Neg, Num0, Num1, Dot, Dot0, Exp, ExpSign, Exp0,
T, Tr, Tru,
F, Fa, Fal, Fals,
N, Nu, Nul,
Error,
}
pub struct Scanner {
state: State,
end_top: bool,
parse_state: Vec<ParseState>,
pub(crate) err: Option<String>,
position: i64,
}
impl Scanner {
pub(crate) const fn new() -> Self {
Self {
state: State::BeginValue,
end_top: false,
parse_state: Vec::new(),
err: None,
position: 0,
}
}
pub(crate) fn reset(&mut self) {
self.state = State::BeginValue;
self.parse_state.clear();
self.err = None;
self.end_top = false;
}
pub(crate) fn eof(&mut self) -> Op {
if self.err.is_some() {
return Op::Error;
}
if self.end_top {
return Op::End;
}
self.step(' ');
if self.end_top {
return Op::End;
}
if self.err.is_none() {
self.err = Some(format!(
"unexpected end of JSON input at char position {}",
self.position
));
}
Op::Error
}
#[allow(clippy::too_many_lines, reason = "FIXME: split this function")]
pub(crate) fn step(&mut self, c: char) -> Op {
self.position += 1;
match self.state {
State::BeginValue => self.begin_value(c),
State::BeginValueOrEmpty => {
if is_space(c) {
return Op::SkipSpace;
}
if c == ']' {
return self.end_value(c);
}
self.begin_value(c)
}
State::BeginStringOrEmpty => {
if is_space(c) {
return Op::SkipSpace;
}
if c == '}' {
if let Some(ps) = self.parse_state.last_mut() {
*ps = ParseState::ObjectValue;
}
return self.end_value(c);
}
self.begin_string(c)
}
State::BeginString => self.begin_string(c),
State::EndValue => self.end_value(c),
State::EndTop => self.end_top(c),
State::InString => match c {
'"' => {
self.state = State::EndValue;
Op::Continue
}
'\\' => {
self.state = State::InStringEsc;
Op::Continue
}
'\x00'..'\x20' => self.error(c, "in string literal"),
_ => Op::Continue,
},
State::InStringEsc => match c {
'b' | 'f' | 'n' | 'r' | 't' | '\\' | '/' | '"' => {
self.state = State::InString;
Op::Continue
}
'u' => {
self.state = State::InStringEscU;
Op::Continue
}
_ => self.error(c, "in string escape code"),
},
State::InStringEscU => self.hex_digit(c, State::InStringEscU1),
State::InStringEscU1 => self.hex_digit(c, State::InStringEscU12),
State::InStringEscU12 => self.hex_digit(c, State::InStringEscU123),
State::InStringEscU123 => self.hex_digit(c, State::InString),
State::Neg => {
if c == '0' {
self.state = State::Num0;
Op::Continue
} else if ('1'..='9').contains(&c) {
self.state = State::Num1;
Op::Continue
} else {
self.error(c, "in numeric literal")
}
}
State::Num1 => {
if c.is_ascii_digit() {
Op::Continue
} else {
self.num0(c)
}
}
State::Num0 => self.num0(c),
State::Dot => {
if c.is_ascii_digit() {
self.state = State::Dot0;
Op::Continue
} else {
self.error(c, "after decimal point in numeric literal")
}
}
State::Dot0 => {
if c.is_ascii_digit() {
Op::Continue
} else if c == 'e' || c == 'E' {
self.state = State::Exp;
Op::Continue
} else {
self.end_value(c)
}
}
State::Exp => {
if c == '+' || c == '-' {
self.state = State::ExpSign;
Op::Continue
} else {
self.exp_sign(c)
}
}
State::ExpSign => self.exp_sign(c),
State::Exp0 => {
if c.is_ascii_digit() {
Op::Continue
} else {
self.end_value(c)
}
}
State::T => self.lit(c, 'r', State::Tr, "in literal true (expecting 'r')"),
State::Tr => self.lit(c, 'u', State::Tru, "in literal true (expecting 'u')"),
State::Tru => self.lit_end(c, 'e', "in literal true (expecting 'e')"),
State::F => self.lit(c, 'a', State::Fa, "in literal false (expecting 'a')"),
State::Fa => self.lit(c, 'l', State::Fal, "in literal false (expecting 'l')"),
State::Fal => self.lit(c, 's', State::Fals, "in literal false (expecting 's')"),
State::Fals => self.lit_end(c, 'e', "in literal false (expecting 'e')"),
State::N => self.lit(c, 'u', State::Nu, "in literal null (expecting 'u')"),
State::Nu => self.lit(c, 'l', State::Nul, "in literal null (expecting 'l')"),
State::Nul => self.lit_end(c, 'l', "in literal null (expecting 'l')"),
State::Error => Op::Error,
}
}
fn begin_value(&mut self, c: char) -> Op {
if is_space(c) {
return Op::SkipSpace;
}
match c {
'{' => {
self.state = State::BeginStringOrEmpty;
self.parse_state.push(ParseState::ObjectKey);
Op::BeginObject
}
'[' => {
self.state = State::BeginValueOrEmpty;
self.parse_state.push(ParseState::ArrayValue);
Op::BeginArray
}
'"' => {
self.state = State::InString;
Op::BeginLiteral
}
'-' => {
self.state = State::Neg;
Op::BeginLiteral
}
'0' => {
self.state = State::Num0;
Op::BeginLiteral
}
't' => {
self.state = State::T;
Op::BeginLiteral
}
'f' => {
self.state = State::F;
Op::BeginLiteral
}
'n' => {
self.state = State::N;
Op::BeginLiteral
}
'1'..='9' => {
self.state = State::Num1;
Op::BeginLiteral
}
_ => self.error(c, "looking for beginning of value"),
}
}
fn begin_string(&mut self, c: char) -> Op {
if is_space(c) {
return Op::SkipSpace;
}
if c == '"' {
self.state = State::InString;
Op::BeginLiteral
} else {
self.error(c, "looking for beginning of object key string")
}
}
fn end_value(&mut self, c: char) -> Op {
let Some(parse_state) = self.parse_state.last_mut() else {
self.state = State::EndTop;
self.end_top = true;
return self.end_top(c);
};
if is_space(c) {
self.state = State::EndValue;
return Op::SkipSpace;
}
match *parse_state {
ParseState::ObjectKey => {
if c == ':' {
*parse_state = ParseState::ObjectValue;
self.state = State::BeginValue;
Op::ObjectKey
} else {
self.error(c, "after object key")
}
}
ParseState::ObjectValue => {
if c == ',' {
*parse_state = ParseState::ObjectKey;
self.state = State::BeginString;
Op::ObjectValue
} else if c == '}' {
self.pop_parse_state();
Op::EndObject
} else {
self.error(c, "after object key:value pair")
}
}
ParseState::ArrayValue => {
if c == ',' {
self.state = State::BeginValue;
Op::ArrayValue
} else if c == ']' {
self.pop_parse_state();
Op::EndArray
} else {
self.error(c, "after array element")
}
}
}
}
fn end_top(&mut self, c: char) -> Op {
if is_space(c) {
Op::End
} else {
self.reset();
self.step(c)
}
}
fn pop_parse_state(&mut self) {
let n = self.parse_state.len();
if n <= 1 {
self.state = State::EndTop;
self.end_top = true;
} else {
self.parse_state.truncate(n - 1);
self.state = State::EndValue;
}
}
fn num0(&mut self, c: char) -> Op {
match c {
'.' => {
self.state = State::Dot;
Op::Continue
}
'e' | 'E' => {
self.state = State::Exp;
Op::Continue
}
_ => self.end_value(c),
}
}
fn exp_sign(&mut self, c: char) -> Op {
if c.is_ascii_digit() {
self.state = State::Exp0;
Op::Continue
} else {
self.error(c, "in exponent of numeric literal")
}
}
fn hex_digit(&mut self, c: char, next: State) -> Op {
if c.is_ascii_hexdigit() {
self.state = next;
Op::Continue
} else {
self.error(c, "in \\u hexadecimal character escape")
}
}
fn lit(&mut self, c: char, expected: char, next: State, ctx: &'static str) -> Op {
if c == expected {
self.state = next;
Op::Continue
} else {
self.error(c, ctx)
}
}
fn lit_end(&mut self, c: char, expected: char, ctx: &'static str) -> Op {
if c == expected {
self.state = State::EndValue;
Op::Continue
} else {
self.error(c, ctx)
}
}
fn error(&mut self, c: char, ctx: &str) -> Op {
self.state = State::Error;
self.err = Some(format!("invalid character '{c}' {ctx}"));
Op::Error
}
}
#[inline]
const fn is_space(c: char) -> bool {
matches!(c, ' ' | '\t' | '\r' | '\n')
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_valid_empty_object() {
let mut s = Scanner::new();
for c in "{}".chars() {
assert_ne!(s.step(c), Op::Error, "error on char '{}'", { c });
}
assert_eq!(s.eof(), Op::End);
assert_eq!(s.position, 2);
}
#[test]
fn test_valid_nested_json() {
let mut s = Scanner::new();
for c in r#"{"key":"value","num":42}"#.chars() {
assert_ne!(s.step(c), Op::Error, "error on char '{}'", { c });
}
assert_eq!(s.eof(), Op::End);
}
#[test]
fn test_truncated_input_returns_error_on_eof() {
let mut s = Scanner::new();
for c in r#"{"key":"#.chars() {
s.step(c);
}
assert_eq!(s.eof(), Op::Error);
}
#[test]
fn test_invalid_input_returns_error() {
let mut s = Scanner::new();
assert_eq!(s.step(')'), Op::Error);
}
#[test]
fn test_multiple_json_objects_no_errors() {
let mut s = Scanner::new();
for c in r#"{"a":1} {"b":2}"#.chars() {
assert_ne!(s.step(c), Op::Error, "error on char '{}'", { c });
}
}
}