mod io;
mod slice;
mod str;
mod utils;
pub use io::IoRead;
pub use slice::SliceRead;
pub use str::StrRead;
use thiserror::Error;
use utils::{decode_hex_sequence, IS_HEX, NEED_ESCAPE};
macro_rules! parse_number {
($self:ident) => {{
match $self.peek()? {
Some(b'-') => $self.discard(),
Some(b'0'..=b'9') => (),
Some(_) => return Err(ReadError::Bug{
msg: "macro_rules! parse_number: assume the first character is a number or a minus sign".to_string(),
position: $self.position(),
}),
None => return Err(ReadError::UnexpectedEndOfInput($self.position())),
}
let first = match $self.next()? {
Some(n @ b'0'..=b'9') => n,
_ => return Err(ReadError::Bug {
msg: "macro_rules! parse_number: assume the first character is a number".to_string(),
position: $self.position(),
}),
};
let second = $self.peek()?;
if second.is_none() {
return Ok(());
}
if first == b'0' && matches!(second, Some(b'0'..=b'9')) {
return Err(ReadError::LeadingZerosInNumber($self.position()));
}
loop {
match $self.peek()? {
Some(b'0'..=b'9') => $self.discard(),
Some(b'.') => return parse_float!($self),
Some(b'e') | Some(b'E') => return parse_exponent!($self),
_ => break,
}
}
Ok(())
}};
}
macro_rules! parse_float {
($self:ident) => {{
if $self.next()? != Some(b'.') {
return Err(ReadError::Bug {
msg: "macro_rules! parse_float: assume the first character is a period".to_string(),
position: $self.position(),
});
}
match $self.peek()? {
Some(b'0'..=b'9') => $self.discard(),
Some(_) => return Err(ReadError::NoNumberCharactersAfterFraction($self.position())),
None => return Err(ReadError::UnexpectedEndOfInput($self.position())),
}
loop {
match $self.peek()? {
Some(b'0'..=b'9') => $self.discard(),
Some(b'e') | Some(b'E') => return parse_exponent!($self),
_ => break,
}
}
Ok(())
}};
}
macro_rules! parse_exponent {
($self:ident) => {{
if !matches!($self.next()?, Some(b'e') | Some(b'E')) {
return Err(ReadError::Bug {
msg: "macro_rules! parse_exponent: assume the first character is an exponent"
.to_string(),
position: $self.position(),
});
}
match $self.peek()? {
Some(b'-') | Some(b'+') => $self.discard(),
Some(b'0'..=b'9') => (),
Some(_) => return Err(ReadError::NoNumberCharactersAfterExponent($self.position())),
None => return Err(ReadError::UnexpectedEndOfInput($self.position())),
}
match $self.peek()? {
Some(b'0'..=b'9') => (),
Some(_) => return Err(ReadError::NoNumberCharactersAfterExponent($self.position())),
None => return Err(ReadError::UnexpectedEndOfInput($self.position())),
}
loop {
match $self.peek()? {
Some(b'0'..=b'9') => $self.discard(),
_ => break,
}
}
Ok(())
}};
}
macro_rules! next4_hex {
($self:ident) => {{
let mut buf = [0; 4];
for i in 0..4 {
let next = $self.next()?;
if next.is_none() {
return Err(ReadError::UnexpectedEndOfInput($self.position()));
}
let next = next.unwrap();
if IS_HEX[next as usize] {
buf[i] = next;
} else {
return Err(ReadError::NonHexCharacterInUnicodeEscape($self.position()));
}
}
buf
}};
}
pub use utils::Position;
#[derive(Debug, Error)]
pub enum ReadError {
#[error("unexpected end of input ({0})")]
UnexpectedEndOfInput(Position),
#[error("I/O Error ({0})")]
IoError(std::io::Error, Position),
#[error("non numirical character ({0})")]
NonNumericalCharacter(Position),
#[error("unclosed string ({0})")]
UnclosedString(Position),
#[error("invalid escape sequence ({0})")]
InvalidEscapeSequence(Position),
#[error("control character in string ({0})")]
ControlCharacterInString(Position),
#[error("non hex character in unicode escape sequence ({0})")]
NonHexCharacterInUnicodeEscape(Position),
#[error("leading zeros in number ({0})")]
LeadingZerosInNumber(Position),
#[error("no number characters after fraction ({0})")]
NoNumberCharactersAfterFraction(Position),
#[error("no number characters after exponent ({0})")]
NoNumberCharactersAfterExponent(Position),
#[error("running into unexpected state, please report this issue to the maintainer, ({msg}) ({position})")]
Bug {
msg: String,
position: Position,
},
}
pub trait Read {
fn position(&self) -> Position;
fn peek(&mut self) -> Result<Option<u8>, ReadError>;
fn next(&mut self) -> Result<Option<u8>, ReadError>;
fn discard(&mut self) {
self.next().unwrap();
}
fn next4(&mut self) -> Result<[u8; 4], ReadError> {
let mut buf = [0; 4];
for i in 0..4 {
match self.next()? {
Some(ch) => buf[i] = ch,
None => return Err(ReadError::UnexpectedEndOfInput(self.position())),
}
}
Ok(buf)
}
fn next5(&mut self) -> Result<[u8; 5], ReadError> {
let mut buf = [0; 5];
for i in 0..5 {
match self.next()? {
Some(ch) => buf[i] = ch,
None => return Err(ReadError::UnexpectedEndOfInput(self.position())),
}
}
Ok(buf)
}
fn skip_whitespace(&mut self) -> Result<(), ReadError> {
loop {
match self.peek()? {
Some(b' ') | Some(b'\t') | Some(b'\n') | Some(b'\r') => {
self.next()?;
}
_ => break,
}
}
Ok(())
}
fn next_number(&mut self) -> Result<(), ReadError> {
parse_number!(self)
}
fn next_likely_string(&mut self, buf: &mut Vec<u8>) -> Result<(), ReadError> {
if self.next()? != Some(b'"') {
return Err(ReadError::Bug {
msg: "Read.next_likely_string: assume the first character is a double quote"
.to_string(),
position: self.position(),
});
}
while let Some(byte) = self.next()? {
if !NEED_ESCAPE[byte as usize] {
buf.push(byte);
continue;
}
match byte {
b'"' => return Ok(()),
b'\\' => {
let mut simple_escape = true;
match self.next()? {
Some(b'"') => buf.push(b'"'),
Some(b'\\') => buf.push(b'\\'),
Some(b'/') => buf.push(b'/'),
Some(b'b') => buf.push(b'\x08'),
Some(b'f') => buf.push(b'\x0C'),
Some(b'n') => buf.push(b'\n'),
Some(b'r') => buf.push(b'\r'),
Some(b't') => buf.push(b'\t'),
Some(b'u') => simple_escape = false,
Some(_) => return Err(ReadError::InvalidEscapeSequence(self.position())),
None => return Err(ReadError::UnexpectedEndOfInput(self.position())),
};
if simple_escape {
continue;
}
let hex = decode_hex_sequence(&next4_hex!(self));
let ch = match hex {
_n @ 0xDC00..=0xDFFF => {
return Err(ReadError::InvalidEscapeSequence(self.position()));
}
n @ 0xD800..=0xDBFF => {
let high = n;
if self.next()? != Some(b'\\') {
return Err(ReadError::InvalidEscapeSequence(self.position()));
}
if self.next()? != Some(b'u') {
return Err(ReadError::InvalidEscapeSequence(self.position()));
}
let low = decode_hex_sequence(&next4_hex!(self));
if !matches!(low, 0xDC00..=0xDFFF) {
return Err(ReadError::InvalidEscapeSequence(self.position()));
}
let high = ((high & 0x03FF) << 10) as u32;
let low = (low & 0x03FF) as u32;
let codepoint = 0x10000u32 + high + low;
match std::char::from_u32(codepoint) {
Some(ch) => ch,
None => {
return Err(ReadError::Bug {
msg:
"Read.next_likely_string: assume the codepoint is valid"
.to_string(),
position: self.position(),
})
}
}
}
n => match std::char::from_u32(n as u32) {
Some(ch) => ch,
None => {
return Err(ReadError::Bug {
msg: "Read.next_likely_string: assume the codepoint is valid"
.to_string(),
position: self.position(),
});
}
},
};
buf.extend_from_slice(ch.encode_utf8(&mut [0u8; 4]).as_bytes());
}
_ => return Err(ReadError::ControlCharacterInString(self.position())),
}
}
Err(ReadError::UnclosedString(self.position()))
}
}