use super::{
LexError, LexemeId, LexerError, Token,
lexer::{read_id, read_rgb, read_string},
};
use crate::{
Scalar,
binary::{Rgb, lexer::TokenKind},
buffer::{BufferError, BufferWindow, BufferWindowBuilder},
util::get_split,
};
use std::{fmt, io::Read};
#[derive(Debug)]
pub struct TokenReader<R> {
reader: R,
buf: BufferWindow,
data: [u8; 8],
}
impl TokenReader<()> {
#[inline]
pub fn from_slice(data: &[u8]) -> TokenReader<&'_ [u8]> {
TokenReader {
reader: data,
buf: BufferWindow::from_slice(data),
data: [0; 8],
}
}
}
impl<R> TokenReader<R>
where
R: Read,
{
#[inline]
pub fn new(reader: R) -> Self {
TokenReader::builder().build(reader)
}
#[inline]
pub fn position(&self) -> usize {
self.buf.position()
}
#[inline]
pub fn read_bytes(&mut self, bytes: usize) -> Result<&[u8], ReaderError> {
while self.buf.window_len() < bytes {
match self.buf.fill_buf(&mut self.reader) {
Ok(0) => return Err(self.lex_error(LexError::Eof)),
Ok(_) => {}
Err(e) => return Err(self.buffer_error(e)),
}
}
Ok(self.buf.split(bytes))
}
#[inline]
pub fn skip_container(&mut self) -> Result<(), ReaderError> {
let mut depth = 1;
loop {
let mut window = self.buf.window();
while let Ok((id, data)) = read_id(window) {
match id {
LexemeId::CLOSE => {
depth -= 1;
if depth == 0 {
self.buf.advance_to(data.as_ptr());
return Ok(());
}
window = data;
}
LexemeId::OPEN => {
window = data;
depth += 1
}
LexemeId::BOOL => match data.get(1..) {
Some(d) => window = d,
None => break,
},
LexemeId::F32 | LexemeId::U32 | LexemeId::I32 => match data.get(4..) {
Some(d) => window = d,
None => break,
},
LexemeId::F64 | LexemeId::I64 | LexemeId::U64 => match data.get(8..) {
Some(d) => window = d,
None => break,
},
LexemeId::QUOTED | LexemeId::UNQUOTED => match read_string(data) {
Ok((_, d)) => window = d,
Err(_) => break,
},
LexemeId::LOOKUP_U8 => match data.get(1..) {
Some(d) => window = d,
None => break,
},
LexemeId::LOOKUP_U16 => match data.get(2..) {
Some(d) => window = d,
None => break,
},
LexemeId::LOOKUP_U8_ALT => match data.get(1..) {
Some(d) => window = d,
None => break,
},
LexemeId::LOOKUP_U16_ALT => match data.get(2..) {
Some(d) => window = d,
None => break,
},
LexemeId::LOOKUP_U24 => match data.get(3..) {
Some(d) => window = d,
None => break,
},
LexemeId::FIXED5_ZERO => window = data,
LexemeId::FIXED5_U8 | LexemeId::FIXED5_I8 => match data.get(1..) {
Some(d) => window = d,
None => break,
},
LexemeId::FIXED5_U16 | LexemeId::FIXED5_I16 => match data.get(2..) {
Some(d) => window = d,
None => break,
},
LexemeId::FIXED5_U24 | LexemeId::FIXED5_I24 => match data.get(3..) {
Some(d) => window = d,
None => break,
},
LexemeId::FIXED5_U32 | LexemeId::FIXED5_I32 => match data.get(4..) {
Some(d) => window = d,
None => break,
},
LexemeId::FIXED5_U40 | LexemeId::FIXED5_I40 => match data.get(5..) {
Some(d) => window = d,
None => break,
},
LexemeId::FIXED5_U48 | LexemeId::FIXED5_I48 => match data.get(6..) {
Some(d) => window = d,
None => break,
},
LexemeId::FIXED5_U56 | LexemeId::FIXED5_I56 => match data.get(7..) {
Some(d) => window = d,
None => break,
},
_ => window = data,
}
}
self.buf.advance_to(window.as_ptr());
match self.buf.fill_buf(&mut self.reader) {
Ok(0) => return Err(self.lex_error(LexError::Eof)),
Ok(_) => {}
Err(e) => return Err(self.buffer_error(e)),
}
}
}
#[inline]
pub fn into_parts(self) -> (Box<[u8]>, R) {
(self.buf.buf, self.reader)
}
#[inline]
pub fn read(&mut self) -> Result<Token<'_>, ReaderError> {
let s = std::ptr::addr_of!(self);
self.next()?
.ok_or_else(|| unsafe { s.read().lex_error(LexError::Eof) })
}
fn refill_with<T>(
&mut self,
f: impl FnOnce(&mut Self) -> Result<Option<T>, ReaderError>,
) -> Result<Option<T>, ReaderError> {
match self.buf.fill_buf(&mut self.reader) {
Ok(0) if self.buf.window_len() == 0 => Ok(None),
Ok(0) => Err(self.lex_error(LexError::Eof)),
Ok(_) => f(self),
Err(e) => Err(self.buffer_error(e)),
}
}
#[inline]
#[allow(clippy::should_implement_trait)]
pub fn next(&mut self) -> Result<Option<Token<'_>>, ReaderError> {
match self.next_token()? {
Some(kind) => Ok(Some(self.token_from_kind(kind))),
None => Ok(None),
}
}
#[inline]
fn token_from_kind(&self, kind: TokenKind) -> Token<'_> {
match kind {
TokenKind::Open => Token::Open,
TokenKind::Close => Token::Close,
TokenKind::Equal => Token::Equal,
TokenKind::U32 => Token::U32(self.u32_data()),
TokenKind::U64 => Token::U64(self.u64_data()),
TokenKind::I32 => Token::I32(self.i32_data()),
TokenKind::Bool => Token::Bool(self.bool_data()),
TokenKind::Quoted => Token::Quoted(unsafe { self.scalar_data() }),
TokenKind::Unquoted => Token::Unquoted(unsafe { self.scalar_data() }),
TokenKind::F32 => Token::F32(self.f32_data()),
TokenKind::F64 => Token::F64(self.f64_data()),
TokenKind::Rgb => Token::Rgb(self.rgb_data()),
TokenKind::I64 => Token::I64(self.i64_data()),
TokenKind::Lookup => Token::Lookup(self.lookup_data()),
TokenKind::Id => Token::Id(self.token_id()),
}
}
#[inline]
pub fn token_id(&self) -> u16 {
u16::from_le_bytes([self.data[0], self.data[1]])
}
#[inline]
pub unsafe fn scalar_data(&self) -> Scalar<'_> {
let len = u16::from_le_bytes([self.data[0], self.data[1]]);
let data = unsafe {
std::slice::from_raw_parts(self.buf.start.byte_sub(len as usize), len as usize)
};
Scalar::new(data)
}
#[inline]
pub fn u64_data(&self) -> u64 {
u64::from_le_bytes(self.data)
}
#[inline]
pub fn i64_data(&self) -> i64 {
i64::from_le_bytes(self.data)
}
#[inline]
pub fn f64_data(&self) -> [u8; 8] {
self.data
}
#[inline]
pub fn u32_data(&self) -> u32 {
u32::from_le_bytes([self.data[0], self.data[1], self.data[2], self.data[3]])
}
#[inline]
pub fn i32_data(&self) -> i32 {
i32::from_le_bytes([self.data[0], self.data[1], self.data[2], self.data[3]])
}
#[inline]
pub fn f32_data(&self) -> [u8; 4] {
[self.data[0], self.data[1], self.data[2], self.data[3]]
}
#[inline]
pub fn bool_data(&self) -> bool {
self.data[0] != 0
}
#[inline]
pub fn lookup_data(&self) -> u32 {
u32::from_le_bytes([self.data[0], self.data[1], self.data[2], 0])
}
#[inline]
pub fn rgb_data(&self) -> Rgb {
let size = self.data[0] as usize;
let data = unsafe { std::slice::from_raw_parts(self.buf.start.byte_sub(size), size) };
let (result, _data) = read_rgb(data).expect("valid rgb data");
result
}
#[inline]
fn next_token_fast(&mut self, window: &[u8]) -> Option<TokenKind> {
let (id, rest) = get_split::<2>(window).unwrap();
let lexeme = LexemeId::new(u16::from_le_bytes(*id));
match lexeme {
LexemeId::OPEN => {
self.buf.advance_to(rest.as_ptr());
Some(TokenKind::Open)
}
LexemeId::CLOSE => {
self.buf.advance_to(rest.as_ptr());
Some(TokenKind::Close)
}
LexemeId::EQUAL => {
self.buf.advance_to(rest.as_ptr());
Some(TokenKind::Equal)
}
LexemeId::U32 | LexemeId::I32 | LexemeId::F32 => {
let (data, rest) = rest.split_at(4);
self.data[..4].copy_from_slice(data);
self.buf.advance_to(rest.as_ptr());
if lexeme == LexemeId::F32 {
Some(TokenKind::F32)
} else if lexeme == LexemeId::U32 {
Some(TokenKind::U32)
} else {
Some(TokenKind::I32)
}
}
LexemeId::U64 | LexemeId::I64 | LexemeId::F64 => {
let (data, rest) = rest.split_at(8);
self.data[..8].copy_from_slice(data);
self.buf.advance_to(rest.as_ptr());
if lexeme == LexemeId::F64 {
Some(TokenKind::F64)
} else if lexeme == LexemeId::U64 {
Some(TokenKind::U64)
} else {
Some(TokenKind::I64)
}
}
LexemeId::BOOL => {
let (data, rest) = rest.split_at(1);
self.data[0] = data[0];
self.buf.advance_to(rest.as_ptr());
Some(TokenKind::Bool)
}
LexemeId::QUOTED | LexemeId::UNQUOTED => {
let (len_data, rest) = get_split::<2>(rest).unwrap();
let len = u16::from_le_bytes(*len_data) as usize;
let (_str_data, rest) = rest.split_at_checked(len)?;
self.data[0..2].copy_from_slice(len_data);
self.buf.advance_to(rest.as_ptr());
if lexeme == LexemeId::UNQUOTED {
Some(TokenKind::Unquoted)
} else {
Some(TokenKind::Quoted)
}
}
LexemeId::LOOKUP_U8 | LexemeId::LOOKUP_U8_ALT => {
let (data, rest) = rest.split_at(1);
let mut tmp = [0u8; 8];
tmp[0] = data[0];
self.data = tmp;
self.buf.advance_to(rest.as_ptr());
Some(TokenKind::Lookup)
}
LexemeId::LOOKUP_U16 | LexemeId::LOOKUP_U16_ALT => {
let (data, rest) = get_split::<2>(rest).unwrap();
let mut tmp = [0u8; 8];
tmp[0..2].copy_from_slice(data);
self.data = tmp;
self.buf.advance_to(rest.as_ptr());
Some(TokenKind::Lookup)
}
LexemeId::LOOKUP_U24 => {
let (data, rest) = get_split::<3>(rest).unwrap();
let mut tmp = [0u8; 8];
tmp[0..3].copy_from_slice(data);
self.data = tmp;
self.buf.advance_to(rest.as_ptr());
Some(TokenKind::Lookup)
}
LexemeId::RGB => None,
lexeme if lexeme >= LexemeId::FIXED5_ZERO && lexeme <= LexemeId::FIXED5_I56 => {
let offset = lexeme.0 - LexemeId::FIXED5_ZERO.0;
let is_negative = offset > 7;
let byte_count = offset - (is_negative as u16 * 7);
let (data, rest) = rest.split_at(byte_count as usize);
let mut buf = [0u8; 8];
buf[..byte_count as usize].copy_from_slice(data);
self.buf.advance_to(rest.as_ptr());
let sign = 1i64 - (is_negative as i64) * 2;
self.data = (u64::from_le_bytes(buf) as i64 * sign).to_le_bytes();
Some(TokenKind::F64)
}
_ => {
self.data[..2].copy_from_slice(id);
self.buf.advance_to(rest.as_ptr());
Some(TokenKind::Id)
}
}
}
#[inline]
pub fn read_token(&mut self) -> Result<TokenKind, ReaderError> {
let s = std::ptr::addr_of!(self);
self.next_token()?
.ok_or_else(|| unsafe { s.read().lex_error(LexError::Eof) })
}
fn next_token_slow(&mut self) -> Result<TokenKind, LexError> {
let window = unsafe { std::slice::from_raw_parts(self.buf.start, self.buf.window_len()) };
let (id, rest) = get_split::<2>(window).ok_or(LexError::Eof)?;
let lexeme = LexemeId::new(u16::from_le_bytes(*id));
match lexeme {
LexemeId::OPEN => {
self.buf.advance_to(rest.as_ptr());
Ok(TokenKind::Open)
}
LexemeId::CLOSE => {
self.buf.advance_to(rest.as_ptr());
Ok(TokenKind::Close)
}
LexemeId::EQUAL => {
self.buf.advance_to(rest.as_ptr());
Ok(TokenKind::Equal)
}
LexemeId::U32 | LexemeId::I32 | LexemeId::F32 => {
let (data, rest) = get_split::<4>(rest).ok_or(LexError::Eof)?;
self.data[..4].copy_from_slice(data);
self.buf.advance_to(rest.as_ptr());
if lexeme == LexemeId::F32 {
Ok(TokenKind::F32)
} else if lexeme == LexemeId::U32 {
Ok(TokenKind::U32)
} else {
Ok(TokenKind::I32)
}
}
LexemeId::U64 | LexemeId::I64 | LexemeId::F64 => {
let (data, rest) = get_split::<8>(rest).ok_or(LexError::Eof)?;
self.data[..8].copy_from_slice(data);
self.buf.advance_to(rest.as_ptr());
if lexeme == LexemeId::F64 {
Ok(TokenKind::F64)
} else if lexeme == LexemeId::U64 {
Ok(TokenKind::U64)
} else {
Ok(TokenKind::I64)
}
}
LexemeId::BOOL => {
let (data, rest) = get_split::<1>(rest).ok_or(LexError::Eof)?;
self.data[0] = data[0];
self.buf.advance_to(rest.as_ptr());
Ok(TokenKind::Bool)
}
LexemeId::QUOTED | LexemeId::UNQUOTED => {
let (len_data, rest) = get_split::<2>(rest).ok_or(LexError::Eof)?;
let len = u16::from_le_bytes(*len_data) as usize;
let rest = rest.get(len..).ok_or(LexError::Eof)?;
self.data[0..2].copy_from_slice(len_data);
self.buf.advance_to(rest.as_ptr());
if lexeme == LexemeId::UNQUOTED {
Ok(TokenKind::Unquoted)
} else {
Ok(TokenKind::Quoted)
}
}
LexemeId::LOOKUP_U8 | LexemeId::LOOKUP_U8_ALT => {
let (data, rest) = get_split::<1>(rest).ok_or(LexError::Eof)?;
let mut tmp = [0u8; 8];
tmp[0] = data[0];
self.data = tmp;
self.buf.advance_to(rest.as_ptr());
Ok(TokenKind::Lookup)
}
LexemeId::LOOKUP_U16 | LexemeId::LOOKUP_U16_ALT => {
let (data, rest) = get_split::<2>(rest).ok_or(LexError::Eof)?;
let mut tmp = [0u8; 8];
tmp[0..2].copy_from_slice(data);
self.data = tmp;
self.buf.advance_to(rest.as_ptr());
Ok(TokenKind::Lookup)
}
LexemeId::LOOKUP_U24 => {
let (data, rest) = get_split::<3>(rest).ok_or(LexError::Eof)?;
let mut tmp = [0u8; 8];
tmp[0..3].copy_from_slice(data);
self.data = tmp;
self.buf.advance_to(rest.as_ptr());
Ok(TokenKind::Lookup)
}
LexemeId::RGB => {
let (_, nrest) = read_rgb(rest)?;
let size = nrest.as_ptr() as usize - rest.as_ptr() as usize;
self.data[0] = size as u8;
self.buf.advance_to(nrest.as_ptr());
Ok(TokenKind::Rgb)
}
lexeme if lexeme >= LexemeId::FIXED5_ZERO && lexeme <= LexemeId::FIXED5_I56 => {
let offset = lexeme.0 - LexemeId::FIXED5_ZERO.0;
let is_negative = offset > 7;
let byte_count = offset - (is_negative as u16 * 7);
let (data, rest) = rest
.split_at_checked(byte_count as usize)
.ok_or(LexError::Eof)?;
let mut buf = [0u8; 8];
buf[..byte_count as usize].copy_from_slice(data);
self.buf.advance_to(rest.as_ptr());
let sign = 1i64 - (is_negative as i64) * 2;
self.data = (u64::from_le_bytes(buf) as i64 * sign).to_le_bytes();
Ok(TokenKind::F64)
}
_ => {
self.data[..2].copy_from_slice(id);
self.buf.advance_to(rest.as_ptr());
Ok(TokenKind::Id)
}
}
}
#[inline(never)]
fn next_token_slow_refill(&mut self) -> Result<Option<TokenKind>, ReaderError> {
match self.next_token_slow() {
Ok(kind) => Ok(Some(kind)),
Err(LexError::Eof) => self.refill_with(|s| s.next_token()),
Err(e) => Err(self.lex_error(e)),
}
}
#[inline]
pub fn next_token(&mut self) -> Result<Option<TokenKind>, ReaderError> {
let window = unsafe { std::slice::from_raw_parts(self.buf.start, self.buf.window_len()) };
if window.len() >= 16
&& let Some(kind) = self.next_token_fast(window)
{
return Ok(Some(kind));
}
self.next_token_slow_refill()
}
#[cold]
#[inline(never)]
fn buffer_error(&self, e: BufferError) -> ReaderError {
ReaderError {
position: self.position(),
kind: ReaderErrorKind::from(e),
}
}
#[cold]
#[inline(never)]
fn lex_error(&self, e: LexError) -> ReaderError {
ReaderError::from(e.at(self.position()))
}
}
impl TokenReader<()> {
pub fn builder() -> TokenReaderBuilder {
TokenReaderBuilder::default()
}
}
#[derive(Debug, Default)]
pub struct TokenReaderBuilder {
buffer: BufferWindowBuilder,
}
impl TokenReaderBuilder {
#[inline]
pub fn buffer(mut self, val: Box<[u8]>) -> TokenReaderBuilder {
self.buffer = self.buffer.buffer(val);
self
}
#[inline]
pub fn buffer_len(mut self, val: usize) -> TokenReaderBuilder {
self.buffer = self.buffer.buffer_len(val);
self
}
#[inline]
pub fn build<R>(self, reader: R) -> TokenReader<R> {
let buf = self.buffer.build();
TokenReader {
reader,
buf,
data: [0; 8],
}
}
}
#[derive(Debug)]
pub enum ReaderErrorKind {
Read(std::io::Error),
BufferFull,
Lexer(LexError),
}
#[derive(Debug)]
pub struct ReaderError {
position: usize,
kind: ReaderErrorKind,
}
impl ReaderError {
pub fn position(&self) -> usize {
self.position
}
pub fn kind(&self) -> &ReaderErrorKind {
&self.kind
}
#[must_use]
pub fn into_kind(self) -> ReaderErrorKind {
self.kind
}
}
impl std::error::Error for ReaderError {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
match &self.kind {
ReaderErrorKind::Read(cause) => Some(cause),
_ => None,
}
}
}
impl std::fmt::Display for ReaderError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match &self.kind {
ReaderErrorKind::Read { .. } => {
write!(f, "failed to read past position: {}", self.position)
}
ReaderErrorKind::BufferFull => {
write!(f, "max buffer size exceeded at position: {}", self.position)
}
ReaderErrorKind::Lexer(cause) => {
write!(f, "{} at position: {}", cause, self.position)
}
}
}
}
impl From<LexerError> for ReaderError {
fn from(value: LexerError) -> Self {
ReaderError {
position: value.position(),
kind: ReaderErrorKind::Lexer(value.into_kind()),
}
}
}
impl From<BufferError> for ReaderErrorKind {
fn from(value: BufferError) -> Self {
match value {
BufferError::Io(x) => ReaderErrorKind::Read(x),
BufferError::BufferFull => ReaderErrorKind::BufferFull,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{Scalar, binary::Rgb};
use rstest::*;
#[rstest]
#[case(&[
Token::Id(0x2838),
Token::Equal,
Token::Open,
Token::Id(0x2863),
Token::Equal,
Token::Unquoted(Scalar::new(b"western")),
Token::Quoted(Scalar::new(b"1446.5.31")),
Token::Equal,
Token::Id(0x2838),
Token::Close,
])]
#[case(&[
Token::Id(0x2ec9),
Token::Equal,
Token::Open,
Token::Id(0x28e2),
Token::Equal,
Token::I32(1),
Token::Id(0x28e3),
Token::Equal,
Token::I32(11),
Token::Id(0x2ec7),
Token::Equal,
Token::I32(4),
Token::Id(0x2ec8),
Token::Equal,
Token::I32(0),
Token::Close,
])]
#[case(&[
Token::Id(0x053a),
Token::Equal,
Token::Rgb(Rgb {
r: 110,
g: 28,
b: 27,
a: None
})
])]
#[case(&[
Token::Id(0x053a),
Token::Equal,
Token::Rgb(Rgb {
r: 110,
g: 28,
b: 27,
a: Some(128),
})
])]
#[case(&[
Token::Id(0x326b), Token::Equal, Token::U64(128),
Token::Id(0x326b), Token::Equal, Token::I64(-1),
Token::Id(0x2d82), Token::Equal, Token::F64([0xc7, 0xe4, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]),
Token::Id(0x2d82), Token::Equal, Token::F32([0x8f, 0xc2, 0x75, 0x3e]),
Token::Id(0x2d82), Token::Equal, Token::U32(89)
])]
#[case(&[
Token::Id(0x2d82),
Token::Equal,
Token::Lookup(0),
Token::Id(0x2d82),
Token::Equal,
Token::Lookup(255),
Token::Id(0x2d82),
Token::Equal,
Token::Lookup(0),
Token::Id(0x2d82),
Token::Equal,
Token::Lookup(65535),
])]
fn test_roundtrip(#[case] input: &[Token]) {
let data = Vec::new();
let mut writer = std::io::Cursor::new(data);
for tok in input {
tok.write(&mut writer).unwrap();
}
let data = writer.into_inner();
let mut reader = TokenReader::new(data.as_slice());
for (i, e) in input.iter().enumerate() {
assert_eq!(*e, reader.read().unwrap(), "failure at token idx: {}", i);
}
reader.read().unwrap_err();
assert_eq!(reader.position(), data.len());
let mut reader = TokenReader::from_slice(data.as_slice());
for (i, e) in input.iter().enumerate() {
assert_eq!(*e, reader.read().unwrap(), "failure at token idx: {}", i);
}
reader.read().unwrap_err();
assert_eq!(reader.position(), data.len());
for i in 30..40 {
let mut reader = TokenReader::builder().buffer_len(i).build(data.as_slice());
for e in input {
assert_eq!(*e, reader.read().unwrap(), "failure at token idx: {}", i);
}
reader.read().unwrap_err();
assert_eq!(reader.position(), data.len());
}
}
#[test]
fn test_not_enough_data() {
let mut reader = TokenReader::new(&[0x43][..]);
assert!(matches!(
reader.read().unwrap_err().kind(),
&ReaderErrorKind::Lexer(LexError::Eof)
));
}
}