use std::io;
use std::fmt::{self, Display, Debug};
use std::error::Error;
use std::str::FromStr;
use crate::value::{ArrayType, ObjectType, Value};
#[derive(Debug, Clone)]
pub struct ParseError {
msg: String,
line: usize,
col: usize
}
impl Display for ParseError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
write!(f, "\"{}\" at line {} col {}", self.msg, self.line, self.col)
}
}
impl Error for ParseError {}
pub type ParseResult = Result<(), ParseError>;
const N: usize = 5;
#[derive(Debug)]
pub struct Parser<T> {
buf: [u8; N],
cur_pos: usize,
end_pos: usize,
eof: bool,
lines: usize,
cols: usize,
src: T
}
impl<T: io::Read + Debug> Parser<T> {
pub fn new(src: T) -> Self {
Parser {
buf: [b'\0'; N],
cur_pos: 0,
end_pos: 0,
lines: 0,
cols: 0,
eof: false,
src: src
}
}
fn raise(&self, msg: &str) -> ParseError {
ParseError {
msg: msg.to_string(),
line: self.lines,
col: self.cols
}
}
fn available(&self) -> usize {
self.end_pos - self.cur_pos
}
fn pump(&mut self, n: usize) {
if self.available() < n {
assert!(n <= self.buf.len());
if !self.eof {
self.buf.rotate_left(self.cur_pos);
self.end_pos -= self.cur_pos;
self.cur_pos = 0;
let r = self.src.read(&mut self.buf[self.end_pos..]);
if let Ok(rd) = r {
if rd == 0 {
self.eof = true;
} else {
self.end_pos += rd;
}
} else {
panic!("data source reading error");
}
}
}
}
fn pop(&mut self, n: usize) {
assert!(n <= self.available());
for i in self.cur_pos..self.cur_pos + n {
if self.buf[i] == b'\n' {
self.lines += 1;
self.cols = 0;
} else {
self.cols += 1;
}
}
self.cur_pos += n;
}
fn peek(&mut self) -> Option<u8> {
self.pump(1);
if self.cur_pos == self.end_pos {
None
} else {
Some(self.buf[self.cur_pos])
}
}
fn peek_match(&mut self, v: &[u8]) -> bool {
self.pump(v.len());
self.available() >= v.len() && v == &self.buf[self.cur_pos..self.cur_pos + v.len()]
}
fn skip_by<F>(&mut self, test: F) where F: Fn(u8) -> bool {
while let Some(c) = self.peek() {
if !test(c) {
break
} else {
self.pop(1);
}
}
}
fn skip_to(&mut self, stop: u8) {
self.skip_by(|c| { c != stop })
}
fn skip_to_next(&mut self) {
loop {
self.skip_by(|c| { c.is_ascii_whitespace() });
if let Some(c) = self.peek() {
if c == b'#' {
self.skip_to(b'\n');
} else {
break;
}
} else {
break;
}
}
}
pub fn parse(&mut self) -> Result<Value, ParseError> {
self.skip_to_next();
if let Some(c) = self.peek() {
match c {
b'{' => self.parse_object(),
b'[' => self.parse_array(),
b'\'' | b'"' => self.parse_string(),
_ => {
if self.peek_match(b"true") {
self.pop(4);
Ok(Value::Bool(true))
} else if self.peek_match(b"false") {
self.pop(5);
Ok(Value::Bool(false))
} else if self.peek_match(b"null") {
self.pop(4);
Ok(Value::Null)
} else {
self.parse_number()
}
}
}
} else {
Err(self.raise("not enough data"))
}
}
fn parse_object(&mut self) -> Result<Value, ParseError> {
assert_eq!(self.peek(), Some(b'{'));
self.pop(1);
let mut obj = ObjectType::new();
loop {
self.skip_to_next();
if let Some(c) = self.peek() {
if c == b'}' {
self.pop(1);
return Ok(Value::Object(obj));
} else if c == b'\'' || c == b'"' {
} else {
return Err(self.raise("object: key expecting ' or \""));
}
} else {
return Err(self.raise("object: key expecting more data"));
}
let k = self.parse_string_raw()?;
self.skip_to_next();
if self.peek() != Some(b':') {
return Err(self.raise("object: expecting \":\""));
}
self.pop(1);
self.skip_to_next();
let v = self.parse()?;
obj.insert(k, v);
self.skip_to_next();
if let Some(c) = self.peek() {
if c == b',' {
self.pop(1);
} else if c == b'}' {
self.pop(1);
return Ok(Value::Object(obj));
} else {
return Err(self.raise("object: bad item delimeter, expecting , or }"));
}
} else {
return Err(self.raise("object: expecting , or }"));
}
}
}
fn parse_array(&mut self) -> Result<Value, ParseError> {
assert_eq!(self.peek(), Some(b'['));
self.pop(1);
let mut arr = ArrayType::new();
loop {
self.skip_to_next();
if Some(b']') == self.peek() {
self.pop(1);
return Ok(Value::Array(arr));
}
arr.push(self.parse()?);
self.skip_to_next();
if let Some(c) = self.peek() {
if c == b',' {
self.pop(1);
} else if c == b']' {
self.pop(1);
return Ok(Value::Array(arr));
}
} else {
return Err(self.raise("array: expecting , or ]"));
}
}
}
fn parse_string_raw(&mut self) -> Result<String, ParseError> {
assert!(self.peek() == Some(b'"') || self.peek() == Some(b'\''));
let quoter = self.peek().unwrap();
self.pop(1);
let mut v: Vec<u8> = Vec::new();
let mut esc = false;
while let Some(c) = self.peek() {
if esc {
match c {
b't' => v.push(b'\t'),
b'r' => v.push(b'\r'),
b'n' => v.push(b'\n'),
_ => v.push(c),
}
esc = false;
} else if c == b'\\' {
esc = true;
} else if c == quoter {
self.pop(1);
return if let Ok(s) = String::from_utf8(v) {
Ok(s)
} else {
Err(self.raise("string: bad utf-8 encode"))
}
} else {
v.push(c);
}
self.pop(1)
}
Err(self.raise("string: expecting more data"))
}
fn parse_string(&mut self) -> Result<Value, ParseError> {
Ok(Value::String(self.parse_string_raw()?))
}
fn is_valid_number_char(c: u8, radix: u32) -> bool {
match radix {
2 => c >= b'0' && c <= b'1',
8 => c >= b'0' && c <= b'7',
10 => c >= b'0' && c <= b'9',
16 => c >= b'0' && c <= b'9' || c >= b'a' && c <= b'f' || c >= b'A' && c <= b'F',
_ => false,
}
}
fn parse_number(&mut self) -> Result<Value, ParseError> {
#[derive(PartialEq)]
enum Phase {
Sign,
Radix,
Int,
Float,
SciSign,
Sci
}
let mut v: Vec<u8> = Vec::new();
let mut ph = Phase::Sign;
let mut radix = 10;
while let Some(c) = self.peek() {
match ph {
Phase::Sign => {
if b'-' == c || b'+' == c {
v.push(c);
self.pop(1);
} else if c.is_ascii_digit() {
} else {
return Err(self.raise("number: bad leading char, expecting \"+-[0-9]\""))
}
ph = Phase::Radix;
}
Phase::Radix => {
if self.peek_match(b"0b") || self.peek_match(b"0B") {
self.pop(2);
radix = 2;
} else if self.peek_match(b"0o") || self.peek_match(b"0O") {
self.pop(2);
radix = 8;
} else if self.peek_match(b"0x") || self.peek_match(b"0X") {
self.pop(2);
radix = 16;
}
ph = Phase::Int;
}
Phase::Int => {
if b'.' == c {
if radix != 10 {
return Err(self.raise(&format!("number: bad float parts for radix {}", radix)))
}
v.push(c);
self.pop(1);
ph = Phase::Float;
} else if Self::is_valid_number_char(c, radix) {
v.push(c);
self.pop(1);
} else if radix == 10 && (b'e' == c || b'E' == c) {
v.push(c);
self.pop(1);
ph = Phase::SciSign;
} else {
break;
}
}
Phase::Float => {
assert_eq!(radix, 10);
if Self::is_valid_number_char(c, 10) {
v.push(c);
self.pop(1);
} else if b'e' == c || b'E' == c {
v.push(c);
self.pop(1);
ph = Phase::SciSign;
} else {
break;
}
}
Phase::SciSign => {
if b'-' == c || b'+' == c {
v.push(c);
self.pop(1);
}
ph = Phase::Sci;
}
Phase::Sci => {
if Self::is_valid_number_char(c, 10) {
v.push(c);
self.pop(1);
} else {
break;
}
}
}
}
if v.is_empty() {
Err(self.raise("number: expecting more data"))
} else {
if let Ok(s) = String::from_utf8(v) {
if radix != 10 {
if let Ok(v) = i64::from_str_radix(&s, radix) {
Ok(Value::Number(v as f64))
} else {
Err(self.raise("number: bad i64 string"))
}
} else {
if let Ok(v) = f64::from_str(&s) {
Ok(Value::Number(v))
} else {
Err(self.raise("number: bad f64 string"))
}
}
} else {
Err(self.raise("number: bad utf-8 encoding"))
}
}
}
}