use std::mem;
use std::ops::Index;
use std::ops::RangeBounds;
use byteyarn::Yarn;
use regex_automata::hybrid::dfa::Cache;
use crate::f;
use crate::file::Context;
use crate::file::File;
use crate::file::Span;
use crate::file::SpanId;
use crate::file::Spanned;
use crate::report::Builtins;
use crate::report::Report;
use crate::rt;
use crate::rule;
use crate::rule::Bracket;
use crate::spec::Lexeme;
use crate::spec::Spec;
use crate::token;
use super::unicode::is_xid;
pub struct Lexer<'a, 'ctx> {
report: &'a Report,
spec: &'ctx Spec,
file: File<'ctx>,
cursor: usize,
tokens: Vec<rt::Token>,
closers: Vec<Closer>,
comments: Vec<SpanId>,
eof: SpanId,
cache: Cache,
}
pub struct Closer {
lexeme: Lexeme<rule::Bracket>,
open_idx: usize,
original_open_idx: usize, close: Yarn,
}
impl<'a, 'ctx> Lexer<'a, 'ctx> {
pub fn new(file: File<'ctx>, report: &'a Report, spec: &'ctx Spec) -> Self {
Lexer {
eof: file.span(file.len()..file.len()).intern(file.context()),
cache: Cache::new(&spec.dfa().engine),
file,
report,
spec,
cursor: 0,
tokens: Vec::new(),
closers: Vec::new(),
comments: Vec::new(),
}
}
pub fn advance(&mut self, by: usize) {
assert!(
self.cursor.saturating_add(by) <= self.text(..).len(),
"ilex: advanced cursor beyond the end of text; this is a bug"
);
self.cursor += by;
}
pub fn report(&self) -> &Report {
self.report
}
pub fn spec(&self) -> &'ctx Spec {
self.spec
}
pub fn builtins(&self) -> Builtins {
self.report.builtins(self.spec())
}
pub fn file(&self) -> File<'ctx> {
self.file
}
pub fn text<R>(&self, range: R) -> &'ctx str
where
str: Index<R, Output = str>,
{
self.file.text(range)
}
pub fn cursor(&self) -> usize {
self.cursor
}
pub fn rest(&self) -> &'ctx str {
self.text(self.cursor..)
}
pub fn eof(&self) -> SpanId {
self.eof
}
pub fn span(&self, range: impl RangeBounds<usize>) -> Span {
self.file.span(range)
}
pub fn intern(&self, range: impl RangeBounds<usize>) -> SpanId {
self.file.span(range).intern(self.ctx())
}
pub fn ctx(&self) -> &'ctx Context {
self.file().context()
}
pub fn cache(&mut self) -> &mut Cache {
&mut self.cache
}
pub fn last_token(&self) -> &rt::Token {
self.tokens.last().unwrap()
}
pub fn push_closer(&mut self, lexeme: Lexeme<Bracket>, close: Yarn) {
self.closers.push(Closer {
lexeme,
close,
open_idx: self.tokens.len(),
original_open_idx: self.tokens.len(),
});
}
pub fn pop_closer(&mut self) {
let idx = self
.closers
.iter()
.rposition(|close| self.rest().starts_with(close.close.as_str()));
let Some(idx) = idx else { return };
let len = self.closers.len();
self.closers.swap(idx, len - 1);
let mut close = self.closers.pop().unwrap();
if idx != self.closers.len() {
mem::swap(&mut close.open_idx, &mut self.closers[idx].open_idx);
}
let start = self.cursor();
self.advance(close.close.len());
let close_idx = self.tokens.len();
let offset_to_open = (close_idx - close.open_idx) as u32;
match &mut self.tokens[close.open_idx].kind {
rt::Kind::Open { offset_to_close, .. } => {
*offset_to_close = offset_to_open
}
_ => {
panic!("ilex: lexer.closers.last().open_idx did not point to an rt::Kind::Open; this is a bug")
}
}
let open_sp = self.tokens[close.open_idx].span;
let prev = self.rest().chars().next_back();
if prev.is_some_and(is_xid) {
let xids = self
.rest()
.find(|c| !is_xid(c))
.unwrap_or(self.rest().len());
if xids > 0 {
let start = self.cursor();
self.advance(xids);
let span = self.span(start..self.cursor());
self.builtins().extra_chars(
self.spec().rule_name_or(
close.lexeme.any(),
f!("{} ... {}", open_sp.text(self.file.context()), close.close),
),
span,
);
}
}
let span = self.span(start..self.cursor).intern(self.ctx());
if idx != self.closers.len() {
self.builtins().unclosed(
open_sp,
&self.closers.last().unwrap().close,
close.close.as_str(),
span,
);
}
let full_span =
self.intern(open_sp.span(self.ctx()).start()..self.cursor());
self.add_token(rt::Token {
kind: rt::Kind::Close { full_span, offset_to_open },
span,
lexeme: close.lexeme.any(),
prefix: None,
suffix: None,
});
}
pub fn add_token(&mut self, tok: rt::Token) {
let span = tok.span.span(self.ctx());
for comment in self.comments.drain(..) {
span.append_comment_span(self.file.context(), comment);
}
self.tokens.push(tok);
}
pub fn add_comment(&mut self, span: SpanId) {
self.comments.push(span);
}
pub fn add_unexpected(&mut self, mut start: usize, end: usize) {
let mut idx = start;
while let Some(c) = self.text(idx..end).chars().next() {
if c.is_whitespace() {
if idx > start {
let span = self.span(start..idx);
self.builtins().unexpected_token(span);
}
start = idx + c.len_utf8();
}
idx += c.len_utf8();
}
if idx > start {
let span = self.span(start..idx);
self.builtins().unexpected_token(span);
}
}
pub fn finish(mut self) -> token::Stream<'ctx> {
self.add_token(rt::Token {
kind: rt::Kind::Eof,
span: self.eof,
lexeme: Lexeme::eof().cast(),
prefix: None,
suffix: None,
});
for close in mem::take(&mut self.closers) {
let open = self.tokens[close.original_open_idx].span;
self
.builtins()
.unclosed(open, &close.close, Lexeme::eof(), self.eof());
}
token::Stream { file: self.file, spec: self.spec, toks: self.tokens }
}
}