#![deny(missing_docs)]
use std::error::Error;
use std::fmt;
use std::iter::Peekable;
use std::num::ParseIntError;
use std::str::FromStr;
use itertools::PeekingNext;
pub trait Lexer {
type Char;
fn consume_next_token(
&mut self,
input: &mut (impl Iterator<Item = Self::Char> + PeekingNext),
output: impl FnMut(Self::Char),
) -> Result<(), ParserError<Self::Char>>;
}
pub struct Tokenized<I, L>
where
I: Iterator,
{
iter: I,
lexer: L,
has_current: bool,
current_token: Option<Vec<I::Item>>,
}
impl<I, L> Iterator for Tokenized<I, L>
where
I: Iterator + PeekingNext,
L: Lexer<Char = I::Item>,
I::Item: PartialEq + Eq + Clone + Copy + 'static,
{
type Item = Vec<I::Item>;
fn next(&mut self) -> Option<Self::Item> {
self.next_ref().map(|e| e.to_vec())
}
}
impl<I, L> Tokenized<I, L>
where
I: Iterator + PeekingNext,
L: Lexer<Char = I::Item>,
I::Item: PartialEq + Eq + Clone + Copy + 'static,
{
pub fn next_ref(&mut self) -> Option<&[I::Item]> {
self.advance().ok().and_then(|_| self.current_token_ref())
}
pub fn take(&mut self) -> Result<Vec<I::Item>, ParserError<I::Item>> {
let s = self.current_token();
self.advance()?;
if let Some(s) = s {
Ok(s)
} else {
Err(ParserError::UnexpectedEndOfFile)
}
}
pub fn advance(&mut self) -> Result<(), ParserError<I::Item>> {
let mut buffer = self.current_token.take().unwrap_or_default();
buffer.clear();
self.lexer
.consume_next_token(&mut self.iter, |c| buffer.push(c))?;
let has_next = !buffer.is_empty();
if has_next {
self.current_token = Some(buffer);
}
self.has_current = has_next;
Ok(())
}
pub fn current_token_ref(&self) -> Option<&[I::Item]> {
if self.has_current {
self.current_token.as_deref()
} else {
None
}
}
pub fn current_token(&self) -> Option<Vec<I::Item>> {
self.current_token_ref().map(|s| s.to_vec())
}
pub fn expect(
&mut self,
s: impl IntoIterator<Item = I::Item> + Clone,
) -> Result<(), ParserError<I::Item>> {
match &self.current_token {
None => Err(ParserError::UnexpectedEndOfFile)?,
Some(token) => {
if token.iter().copied().eq(s.clone()) {
self.advance()?;
Ok(())
} else {
Err(ParserError::UnexpectedToken(
s.into_iter().collect(),
self.current_token().unwrap().to_vec(),
))
}
}
}
}
pub fn test(&mut self, s: &[I::Item]) -> Result<bool, ParserError<I::Item>> {
let result = self.peeking_test(s)?;
if result {
self.advance()?;
}
Ok(result)
}
pub fn peeking_test(&mut self, s: &[I::Item]) -> Result<bool, ParserError<I::Item>> {
if self.current_token.is_none() {
Err(ParserError::UnexpectedEndOfFile)?;
}
if self.current_token_ref() == Some(s) {
Ok(true)
} else {
Ok(false)
}
}
pub fn skip_until(&mut self, s: &[I::Item]) -> Result<(), ParserError<I::Item>> {
while !self.test(s)? {
self.advance()?;
}
Ok(())
}
}
impl<I, L> Tokenized<I, L>
where
I: Iterator<Item = char> + PeekingNext,
L: Lexer<Char = I::Item>,
{
pub fn current_token_str(&self) -> Option<String> {
self.current_token_ref().map(|s| s.iter().collect())
}
pub fn take_str(&mut self) -> Result<String, ParserError<I::Item>> {
let s = self.current_token_str();
self.advance()?;
if let Some(s) = s {
Ok(s)
} else {
Err(ParserError::UnexpectedEndOfFile)
}
}
pub fn take_and_parse<F: FromStr>(&mut self) -> Result<F, ParserError<I::Item>> {
let result = if let Some(token) = self.current_token_ref() {
let string: String = token.iter().collect();
if let Ok(parsed) = string.parse::<F>() {
Ok(parsed)
} else {
Err(ParserError::InvalidLiteral(token.to_vec()))
}
} else {
Err(ParserError::UnexpectedEndOfFile)
};
self.advance()?;
result
}
pub fn expect_str(&mut self, s: &str) -> Result<(), ParserError<I::Item>> {
match &self.current_token {
None => Err(ParserError::UnexpectedEndOfFile)?,
Some(token) => {
if token.iter().copied().eq(s.chars()) {
self.advance()?;
Ok(())
} else {
Err(ParserError::UnexpectedToken(
s.chars().collect(),
self.current_token().unwrap().to_vec(),
))
}
}
}
}
pub fn test_str(&mut self, s: &str) -> Result<bool, ParserError<I::Item>> {
let result = self.peeking_test_str(s)?;
if result {
self.advance()?;
}
Ok(result)
}
pub fn peeking_test_str(&mut self, s: &str) -> Result<bool, ParserError<I::Item>> {
match &self.current_token {
None => Err(ParserError::UnexpectedEndOfFile)?,
Some(token) => Ok(token.iter().copied().eq(s.chars())),
}
}
pub fn skip_until_str(&mut self, s: &str) -> Result<(), ParserError<I::Item>> {
while !self.test_str(s)? {
self.advance()?;
}
Ok(())
}
}
pub fn tokenize<I, L>(iter: I, lexer: L) -> Tokenized<Peekable<I>, L>
where
I: Iterator<Item = char>,
{
Tokenized {
iter: iter.peekable(),
lexer,
has_current: false,
current_token: None,
}
}
#[derive(Clone, Debug)]
pub enum ParserError<C: 'static> {
UnexpectedEndOfFile,
UnexpectedToken(Vec<C>, Vec<C>),
InvalidLiteral(Vec<C>),
ParseIntError(ParseIntError),
}
impl<C: 'static + fmt::Display + fmt::Debug> Error for ParserError<C> {}
impl<C: fmt::Display + fmt::Debug> fmt::Display for ParserError<C> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
ParserError::UnexpectedEndOfFile => write!(f, "Unexpected end of file."),
ParserError::UnexpectedToken(actual, exp) => {
write!(f, "Unexpected token. '{actual:?}' instead of '{exp:?}'")
}
ParserError::InvalidLiteral(n) => write!(f, "Invalid literal: '{n:?}'."),
ParserError::ParseIntError(e) => write!(f, "Illegal integer: '{e:?}'"),
}
}
}
impl<C> From<ParseIntError> for ParserError<C> {
fn from(e: ParseIntError) -> Self {
Self::ParseIntError(e)
}
}
#[test]
fn test_tokenize_simple() {
use itertools::Itertools;
struct MyLexer {}
impl Lexer for MyLexer {
type Char = char;
fn consume_next_token(
&mut self,
input: &mut (impl Iterator<Item = Self::Char> + PeekingNext),
mut output: impl FnMut(Self::Char),
) -> Result<(), ParserError<char>> {
if let Some(c) = input.next() {
output(c);
let take_whitespace = c.is_whitespace();
input
.peeking_take_while(|c| c.is_whitespace() == take_whitespace)
.for_each(output);
}
Ok(())
}
}
let data = "here \n are \t some words ";
let mut tk = tokenize(data.chars(), MyLexer {});
tk.advance().unwrap();
tk.expect_str("here").unwrap();
tk.next();
tk.expect_str("are").unwrap();
tk.next();
tk.expect_str("some").unwrap();
tk.next();
tk.expect_str("words").unwrap();
}