mod token;
pub use token::{parse_regex, Token};
use crate::error::CompilerError;
use crate::location::Span;
use logos::Logos;
#[derive(Debug)]
pub struct Lexer<'source> {
inner: logos::Lexer<'source, Token>,
source: &'source str,
errors: Vec<CompilerError>,
}
impl<'source> Lexer<'source> {
#[must_use]
pub fn new(source: &'source str) -> Self {
Self {
inner: Token::lexer(source),
source,
errors: Vec::new(),
}
}
pub fn next_token(&mut self) -> Option<(Token, Span)> {
loop {
let token = self.inner.next()?;
let range = self.inner.span();
let span = Span::from_range(range.start, range.end);
match token {
Ok(tok) => return Some((tok, span)),
Err(()) => {
self.errors
.push(Self::classify_error(self.source, range.start, range.end));
}
}
}
}
fn classify_error(source: &str, start: usize, end: usize) -> CompilerError {
let span = Span::from_range(start, end);
let slice = source.get(start..end).unwrap_or_default();
let first = slice.chars().next();
match first {
Some('"') => CompilerError::UnterminatedString { span },
Some(c) if c.is_ascii_digit() => CompilerError::InvalidNumber {
value: slice.to_string(),
span,
},
Some(c) => CompilerError::InvalidCharacter { character: c, span },
None => CompilerError::InvalidCharacter {
character: '\u{0}',
span,
},
}
}
#[must_use]
pub fn span(&self) -> Span {
let range = self.inner.span();
Span::from_range(range.start, range.end)
}
pub fn take_errors(&mut self) -> Vec<CompilerError> {
std::mem::take(&mut self.errors)
}
#[must_use]
pub fn tokenize_all(source: &'source str) -> Vec<(Token, Span)> {
Self::tokenize_all_with_errors(source).0
}
#[must_use]
pub fn tokenize_all_with_errors(
source: &'source str,
) -> (Vec<(Token, Span)>, Vec<CompilerError>) {
let mut lexer = Self::new(source);
let mut tokens = Vec::new();
while let Some((token, span)) = lexer.next_token() {
let span = crate::location::fill_span_positions(span, source);
tokens.push((token, span));
}
for (start, end) in std::mem::take(&mut lexer.inner.extras.unterminated_block_comments) {
lexer.errors.push(CompilerError::UnterminatedBlockComment {
span: Span::from_range(start, end),
});
}
for (start, end, hex) in std::mem::take(&mut lexer.inner.extras.invalid_unicode_escapes) {
lexer.errors.push(CompilerError::InvalidUnicodeEscape {
value: hex,
span: Span::from_range(start, end),
});
}
let errors = lexer
.take_errors()
.into_iter()
.map(|e| fill_error_span_positions(e, source))
.collect();
(tokens, errors)
}
}
#[expect(
clippy::wildcard_enum_match_arm,
reason = "Lexer::classify_error only produces a small set of lexer-error variants; enumerating every CompilerError variant would be noisy without adding safety"
)]
fn fill_error_span_positions(error: CompilerError, source: &str) -> CompilerError {
let span = crate::location::fill_span_positions(error.span(), source);
match error {
CompilerError::InvalidCharacter { character, .. } => {
CompilerError::InvalidCharacter { character, span }
}
CompilerError::UnterminatedString { .. } => CompilerError::UnterminatedString { span },
CompilerError::UnterminatedBlockComment { .. } => {
CompilerError::UnterminatedBlockComment { span }
}
CompilerError::InvalidUnicodeEscape { value, .. } => {
CompilerError::InvalidUnicodeEscape { value, span }
}
CompilerError::InvalidNumber { value, .. } => CompilerError::InvalidNumber { value, span },
other => other,
}
}