use crate::tokens::Token;
use crate::traits::*;
use logos::Logos;
use thiserror::Error;
#[derive(Error, Debug, Clone, PartialEq)]
pub enum LexerError {
#[error("Invalid token at position {position}")]
InvalidToken { position: usize },
#[error("Unexpected token: expected {expected}, found {found}")]
UnexpectedToken { expected: String, found: String },
#[error("Unterminated string")]
UnterminatedString,
#[error("Invalid numeric literal")]
InvalidNumber,
#[error("Unexpected end of file")]
UnexpectedEof,
#[error("Number too large for the target type")]
NumberTooLarge,
#[error("Invalid escape sequence: {sequence}")]
InvalidEscapeSequence { sequence: String },
#[error("Lexing error: {0}")]
Other(String),
}
impl From<std::num::ParseIntError> for LexerError {
fn from(_: std::num::ParseIntError) -> Self {
LexerError::InvalidNumber
}
}
impl From<std::num::ParseFloatError> for LexerError {
fn from(_: std::num::ParseFloatError) -> Self {
LexerError::InvalidNumber
}
}
pub struct Lexer<'a> {
source: &'a str,
inner: logos::Lexer<'a, Token>,
}
impl<'a> Lexer<'a> {
pub fn new(input: &'a str) -> Self {
Self {
inner: Token::lexer(input),
source: input,
}
}
pub fn position(&self) -> std::ops::Range<usize> {
self.inner.span()
}
pub fn current_span(&self) -> std::ops::Range<usize> {
self.inner.span()
}
pub fn remaining_input(&self) -> &'a str {
&self.source[self.inner.span().end..]
}
pub fn source(&self) -> &'a str {
self.source
}
pub fn next_token(&mut self) -> Option<Result<(usize, Token, usize), LexerError>> {
loop {
match self.inner.next() {
Some(Ok(token)) => {
let span = self.inner.span();
match token {
Token::Whitespace | Token::LineComment | Token::BlockComment => continue,
_ => return Some(Ok((span.start, token, span.end))),
}
}
Some(Err(_)) => {
let span = self.inner.span();
return Some(Err(LexerError::InvalidToken {
position: span.start,
}));
}
None => return None,
}
}
}
}
impl<'a> Lexer<'a> {
pub fn peek_token(&mut self) -> Option<Result<(usize, Token, usize), LexerError>> {
let mut clone = self.inner.clone();
loop {
match clone.next()? {
Ok(token) => {
if !matches!(
token,
Token::Whitespace | Token::LineComment | Token::BlockComment
) {
let span = clone.span();
let result = Ok((span.start, token, span.end));
return Some(result);
}
}
Err(_) => {
let span = clone.span();
return Some(Err(LexerError::InvalidToken {
position: span.start,
}));
}
}
}
}
pub fn line_column(&self, pos: usize) -> (usize, usize) {
let mut line = 1;
let mut column = 1;
for (i, c) in self.source.chars().enumerate() {
if i >= pos {
break;
}
if c == '\n' {
line += 1;
column = 1;
} else {
column += 1;
}
}
(line, column)
}
}
impl<'a> Iterator for Lexer<'a> {
type Item = Result<(usize, Token, usize), LexerError>;
fn next(&mut self) -> Option<Self::Item> {
self.next_token()
}
}
impl<'a> TokenValidator for Lexer<'a> {
type Token = Token;
fn is_valid_token(&self, _token: &Token, _position: usize) -> bool {
true
}
fn is_valid_sequence(&self, _tokens: &[Token]) -> bool {
true
}
fn validate_token(&self, _token: &Token) -> bool {
true
}
}
impl<'a> ErrorReporter for Lexer<'a> {
fn report_error(&self, error: &LexerError, context: &str) -> String {
format!("Lexer error in '{}': {}", context, error)
}
fn report_warning(&self, message: &str, position: usize) -> String {
format!("Warning at position {}: {}", position, message)
}
}
impl<'a> TokenStream for Lexer<'a> {
type Token = Token;
fn token_iter(
&mut self,
) -> Box<dyn Iterator<Item = Result<(usize, Token, usize), LexerError>> + '_> {
Box::new(self)
}
fn reset(&mut self) {
self.inner = Token::lexer(self.source);
}
fn token_count(&self) -> Option<usize> {
None }
}