pub struct Lexer<I: IntoIterator<Item = u8>> {
chars: I::IntoIter,
next_byte: Option<u8>,
cursor: u64,
buffer_type: BufferType,
}
#[derive(Debug, PartialEq, Clone)]
pub enum TokenType {
CurlyOpen,
CurlyClose,
BracketOpen,
BracketClose,
Colon,
Comma,
String,
BooleanTrue,
BooleanFalse,
Number,
Null,
Invalid,
}
impl AsRef<str> for TokenType {
fn as_ref(&self) -> &str {
match *self {
TokenType::CurlyOpen => "{",
TokenType::CurlyClose => "}",
TokenType::BracketOpen => "[",
TokenType::BracketClose => "]",
TokenType::Colon => ":",
TokenType::Comma => ",",
TokenType::BooleanTrue => "true",
TokenType::BooleanFalse => "false",
TokenType::Null => "null",
TokenType::Invalid => panic!("Cannot convert invalid TokenType"),
_ => panic!("Cannot convert variant TokenTypes"),
}
}
}
#[derive(Debug, PartialEq, Clone, Default)]
pub struct Span {
pub first: u64,
pub end: u64,
}
#[derive(Debug, PartialEq, Clone)]
pub struct Token {
pub kind: TokenType,
pub buf: Buffer,
}
#[derive(Debug, PartialEq, Clone)]
pub enum Buffer {
MultiByte(Vec<u8>),
Span(Span),
}
#[derive(Debug, PartialEq, Clone)]
pub enum BufferType {
Bytes(usize),
Span,
}
impl<I> Lexer<I>
where
I: IntoIterator<Item = u8>,
{
pub fn new(chars: I, buffer_type: BufferType) -> Lexer<I> {
Lexer {
chars: chars.into_iter(),
next_byte: None,
cursor: 0,
buffer_type,
}
}
pub fn into_inner(self) -> I::IntoIter {
self.chars
}
fn put_back(&mut self, c: u8) {
debug_assert!(self.next_byte.is_none());
self.next_byte = Some(c);
self.cursor -= 1;
}
fn next_byte(&mut self) -> Option<u8> {
match self.next_byte.take() {
Some(c) => {
self.cursor += 1;
Some(c)
}
None => {
let res = self.chars.next();
match res {
None => None,
Some(_) => {
self.cursor += 1;
res
}
}
}
}
}
}
enum Mode {
String(bool, usize),
Null([u8; 4], usize),
True([u8; 4], usize),
False([u8; 5], usize),
Number,
SlowPath,
}
impl<I> Iterator for Lexer<I>
where
I: IntoIterator<Item = u8>,
{
type Item = Token;
fn next(&mut self) -> Option<Token> {
let mut t: Option<TokenType> = None;
let mut first = 0;
let mut state = Mode::SlowPath;
let last_cursor = self.cursor;
let mut buf = match self.buffer_type {
BufferType::Bytes(capacity) => Some(Vec::<u8>::with_capacity(capacity)),
BufferType::Span => None,
};
while let Some(c) = self.next_byte() {
let mut set_cursor = |cursor| {
first = cursor - 1;
};
match state {
Mode::String(ref mut ign_next, ref mut ign_digits) => {
if let Some(ref mut v) = buf {
v.push(c);
}
if *ign_next {
match c {
b'"' | b'\\' | b'/' | b'b' | b'f' | b'n' | b'r' | b't' => {
*ign_next = false;
continue;
}
b'u' => {
*ign_next = false;
*ign_digits = 4;
continue;
}
_ => {
t = Some(TokenType::Invalid);
break;
}
}
}
if *ign_digits > 0 {
match c {
b'0'..=b'9' | b'A'..=b'F' | b'a'..=b'f' => {
*ign_digits -= 1;
continue;
}
_ => {
t = Some(TokenType::Invalid);
break;
}
}
}
match c {
b'"' => {
t = Some(TokenType::String);
break;
}
b'\\' => {
*ign_next = true;
continue;
}
_ => {
continue;
}
}
}
Mode::Null(ref mut b, ref mut i) => {
b[*i] = c;
if *i == 3 {
if b[1] == b'u' && b[2] == b'l' && b[3] == b'l' {
t = Some(TokenType::Null);
} else {
t = Some(TokenType::Invalid);
}
break;
} else {
*i += 1;
continue;
}
}
Mode::Number => match c {
b'0'..=b'9' | b'-' | b'+' | b'.' | b'E' | b'e' => {
if let Some(ref mut v) = buf {
v.push(c);
}
continue;
}
_ => {
t = Some(TokenType::Number);
self.put_back(c);
break;
}
},
Mode::True(ref mut b, ref mut i) => {
b[*i] = c;
if *i == 3 {
if b[1] == b'r' && b[2] == b'u' && b[3] == b'e' {
t = Some(TokenType::BooleanTrue);
} else {
t = Some(TokenType::Invalid);
}
break;
} else {
*i += 1;
continue;
}
}
Mode::False(ref mut b, ref mut i) => {
b[*i] = c;
if *i == 4 {
if b[1] == b'a' && b[2] == b'l' && b[3] == b's' && b[4] == b'e' {
t = Some(TokenType::BooleanFalse);
} else {
t = Some(TokenType::Invalid);
}
break;
} else {
*i += 1;
continue;
}
}
Mode::SlowPath => {
match c {
b'{' => {
t = Some(TokenType::CurlyOpen);
set_cursor(self.cursor);
break;
}
b'}' => {
t = Some(TokenType::CurlyClose);
set_cursor(self.cursor);
break;
}
b'"' => {
state = Mode::String(false, 0);
if let Some(ref mut v) = buf {
v.push(c);
} else {
set_cursor(self.cursor);
t = Some(TokenType::Invalid);
}
}
b'n' => {
state = Mode::Null([c, b'x', b'x', b'x'], 1);
set_cursor(self.cursor);
}
b'0'..=b'9' | b'-' | b'.' => {
state = Mode::Number;
if let Some(ref mut v) = buf {
v.push(c);
} else {
set_cursor(self.cursor);
}
}
b't' => {
state = Mode::True([c, b'x', b'x', b'x'], 1);
set_cursor(self.cursor);
}
b'f' => {
state = Mode::False([c, b'x', b'x', b'x', b'x'], 1);
set_cursor(self.cursor);
}
b'[' => {
t = Some(TokenType::BracketOpen);
set_cursor(self.cursor);
break;
}
b']' => {
t = Some(TokenType::BracketClose);
set_cursor(self.cursor);
break;
}
b':' => {
t = Some(TokenType::Colon);
set_cursor(self.cursor);
break;
}
b',' => {
t = Some(TokenType::Comma);
set_cursor(self.cursor);
break;
}
b'\\' => {
t = Some(TokenType::Invalid);
set_cursor(self.cursor);
break;
}
_ => {}
}
}
}
}
match t {
None => None,
Some(t) => {
if self.cursor == last_cursor {
None
} else {
let buf = match (&t, buf) {
(&TokenType::String, Some(b)) | (&TokenType::Number, Some(b)) => Buffer::MultiByte(b),
_ => Buffer::Span(Span { first, end: self.cursor }),
};
Some(Token { kind: t, buf })
}
}
}
}
}