use std::{cell::RefCell, rc::Rc};
use crate::{
Dfa, Lookahead, ScannerImpl,
internals::{
char_iter::item::CharItem,
char_iter::iter::CharIter,
char_iter::iter_with_position::CharIterWithPosition,
match_types::{Match, MatchEnd, MatchStart},
position::{Position, Positions},
},
};
pub trait FindMatchesTrait {
fn current_dfa(&self) -> &'static Dfa;
fn handle_mode_transition(&self, token_type: usize);
fn peek(&mut self) -> Option<CharItem>;
fn get_disjoint_class(&self, ch: char) -> Option<usize>;
fn advance_char_iter(&mut self) -> bool;
fn save_char_iter(&mut self);
fn restore_saved_char_iter(&mut self);
}
pub struct FindMatches<'a, F>
where
F: Fn(char) -> Option<usize> + 'static + ?Sized,
{
char_iter: CharIter<'a>,
scanner_impl: Rc<RefCell<ScannerImpl>>,
match_function: &'static F,
}
impl<'a, F> Clone for FindMatches<'a, F>
where
F: Fn(char) -> Option<usize> + 'static + ?Sized,
{
fn clone(&self) -> Self {
Self {
char_iter: self.char_iter.clone(),
scanner_impl: self.scanner_impl.clone(),
match_function: self.match_function,
}
}
}
impl<'a, F> FindMatches<'a, F>
where
F: Fn(char) -> Option<usize> + 'static + ?Sized,
{
pub(crate) fn new(
input: &'a str,
offset: usize,
scanner_impl: Rc<RefCell<ScannerImpl>>,
match_function: &'static F,
) -> Self {
FindMatches {
char_iter: CharIter::new(input, offset),
scanner_impl,
match_function,
}
}
#[inline]
pub fn current_mode_name(&self) -> &'static str {
let scanner_impl = self.scanner_impl.borrow();
scanner_impl.current_mode_name()
}
#[inline]
pub fn mode_name(&self, index: usize) -> Option<&'static str> {
self.scanner_impl.borrow().mode_name(index)
}
#[inline]
pub fn current_mode_index(&self) -> usize {
self.scanner_impl.borrow().current_mode_index()
}
}
impl<F> Iterator for FindMatches<'_, F>
where
F: Fn(char) -> Option<usize> + 'static + ?Sized,
{
type Item = Match;
fn next(&mut self) -> Option<Match> {
next_match(self)
}
}
impl<F> FindMatchesTrait for FindMatches<'_, F>
where
F: Fn(char) -> Option<usize> + 'static + ?Sized,
{
#[inline(always)]
fn current_dfa(&self) -> &'static Dfa {
let scanner_impl = self.scanner_impl.borrow();
&scanner_impl.modes()[scanner_impl.current_mode_index()].dfa
}
#[inline(always)]
fn handle_mode_transition(&self, token_type: usize) {
let scanner_impl = self.scanner_impl.borrow();
scanner_impl.handle_mode_transition(token_type);
}
#[inline(always)]
fn peek(&mut self) -> Option<CharItem> {
self.char_iter.peek()
}
#[inline(always)]
fn advance_char_iter(&mut self) -> bool {
self.char_iter.next().is_some()
}
#[inline(always)]
fn get_disjoint_class(&self, ch: char) -> Option<usize> {
(self.match_function)(ch)
}
fn save_char_iter(&mut self) {
self.char_iter.save_state();
}
fn restore_saved_char_iter(&mut self) {
self.char_iter.restore_state();
}
}
pub struct FindMatchesWithPosition<'a, F>
where
F: Fn(char) -> Option<usize> + 'static + ?Sized,
{
char_iter: CharIterWithPosition<'a>,
scanner_impl: Rc<RefCell<ScannerImpl>>,
match_function: &'static F,
}
impl<'a, F> Clone for FindMatchesWithPosition<'a, F>
where
F: Fn(char) -> Option<usize> + 'static + ?Sized,
{
fn clone(&self) -> Self {
Self {
char_iter: self.char_iter.clone(),
scanner_impl: self.scanner_impl.clone(),
match_function: self.match_function,
}
}
}
impl<'a, F> FindMatchesWithPosition<'a, F>
where
F: Fn(char) -> Option<usize> + 'static + ?Sized,
{
pub(crate) fn new(
input: &'a str,
offset: usize,
scanner_impl: Rc<RefCell<ScannerImpl>>,
match_function: &'static F,
) -> Self {
FindMatchesWithPosition {
char_iter: CharIterWithPosition::new(input, offset),
scanner_impl,
match_function,
}
}
#[inline]
pub fn current_mode_name(&self) -> Option<&'static str> {
let scanner_impl = self.scanner_impl.borrow();
let current_mode_index = scanner_impl.current_mode_index();
scanner_impl.mode_name(current_mode_index)
}
#[inline]
pub fn mode_name(&self, index: usize) -> Option<&'static str> {
self.scanner_impl.borrow().mode_name(index)
}
#[inline]
pub fn current_mode(&self) -> usize {
self.scanner_impl.borrow().current_mode_index()
}
}
impl<F> Iterator for FindMatchesWithPosition<'_, F>
where
F: Fn(char) -> Option<usize> + 'static + ?Sized,
{
type Item = Match;
fn next(&mut self) -> Option<Match> {
next_match(self)
}
}
impl<F> FindMatchesTrait for FindMatchesWithPosition<'_, F>
where
F: Fn(char) -> Option<usize> + 'static + ?Sized,
{
#[inline(always)]
fn current_dfa(&self) -> &'static Dfa {
let scanner_impl = self.scanner_impl.borrow();
&scanner_impl.modes()[scanner_impl.current_mode_index()].dfa
}
#[inline(always)]
fn handle_mode_transition(&self, token_type: usize) {
let scanner_impl = self.scanner_impl.borrow();
scanner_impl.handle_mode_transition(token_type);
}
#[inline(always)]
fn peek(&mut self) -> Option<CharItem> {
self.char_iter.peek()
}
#[inline(always)]
fn advance_char_iter(&mut self) -> bool {
self.char_iter.next().is_some()
}
#[inline(always)]
fn get_disjoint_class(&self, ch: char) -> Option<usize> {
(self.match_function)(ch)
}
fn save_char_iter(&mut self) {
self.char_iter.save_state();
}
fn restore_saved_char_iter(&mut self) {
self.char_iter.restore_state();
}
}
fn evaluate_lookahead<F: FindMatchesTrait + Clone>(
mut find_matches: F,
accept_data: &crate::AcceptData,
) -> (bool, usize) {
match &accept_data.lookahead {
crate::Lookahead::None => {
unreachable!("Lookahead::None should not be evaluated here")
}
crate::Lookahead::Positive(dfa) => {
if let Some(ma) = find_next(&mut find_matches, dfa) {
(true, ma.span.len())
} else {
(false, 0)
}
}
crate::Lookahead::Negative(dfa) => {
if find_next(&mut find_matches, dfa).is_some() {
(false, 0)
} else {
(true, 0)
}
}
}
}
#[inline(always)]
pub(crate) fn next_match<F: FindMatchesTrait + Clone>(find_matches: &mut F) -> Option<Match> {
let dfa: &Dfa = find_matches.current_dfa();
loop {
if let Some(ma) = find_next(find_matches, dfa) {
find_matches.handle_mode_transition(ma.token_type);
return Some(ma);
}
if !find_matches.advance_char_iter() {
return None; }
}
}
#[inline(always)]
fn find_next<F: FindMatchesTrait + Clone>(find_matches: &mut F, dfa: &Dfa) -> Option<Match> {
let mut state = 0; let mut match_start = MatchStart::default();
let mut match_end = MatchEnd::default();
let mut start_set = false;
let mut end_set = false;
find_matches.save_char_iter();
while let Some(char_item) = find_matches.peek() {
let character_class = find_matches.get_disjoint_class(char_item.ch);
let Some(class_idx) = character_class else {
break;
};
let state_data = &dfa.states[state];
if let Some(Some(next_state)) = state_data.transitions.get(class_idx) {
state = next_state.to;
} else {
break;
};
let state_data = &dfa.states[state];
find_matches.advance_char_iter();
if !start_set {
match_start = MatchStart::new(char_item.byte_index).with_position(char_item.position);
start_set = true;
}
for accept_data in state_data.accept_data {
let (lookahead_satisfied, _lookahead_len) =
if !matches!(accept_data.lookahead, Lookahead::None) {
evaluate_lookahead(find_matches.clone(), accept_data)
} else {
(true, 0)
};
if lookahead_satisfied {
let new_byte_index = char_item.byte_index + char_item.ch.len_utf8();
let new_len = new_byte_index - match_start.byte_index;
let update = !end_set || {
let old_len = match_end.byte_index - match_start.byte_index;
new_len > old_len
|| (new_len == old_len && accept_data.priority < match_end.priority)
};
if update {
match_end =
MatchEnd::new(new_byte_index, accept_data.token_type, accept_data.priority)
.with_position(char_item.position.map(|p| {
if char_item.ch == '\n' {
Position::new(p.line + 1, 1)
} else {
Position::new(p.line, p.column + 1)
}
}));
end_set = true;
find_matches.save_char_iter();
}
break;
}
}
}
if end_set {
let span: crate::Span = match_start.byte_index..match_end.byte_index;
find_matches.restore_saved_char_iter();
Some(
Match::new(span, match_end.token_type).with_positions(
match_start
.position
.zip(match_end.position)
.map(|(start, end)| Positions::new(start, end)),
),
)
} else {
find_matches.restore_saved_char_iter();
None
}
}