use super::end::{EndRegex, EndRegexCache};
use super::pattern::{Match, OpenRule, Pattern, RegexMatch};
use super::*;
#[derive(Debug)]
pub struct LineTokenizer<'grammar> {
pub(crate) grammar: &'grammar Grammar,
end_regex_cache: EndRegexCache,
}
impl<'grammar> LineTokenizer<'grammar> {
pub fn new(grammar: &'grammar Grammar) -> Self {
Self {
grammar,
end_regex_cache: EndRegexCache::new(),
}
}
pub fn clear_caches(&mut self) {
self.end_regex_cache.clear();
}
pub fn tokenize_line(&mut self, state: &mut LineState, line: &str) -> Vec<ScopeSpan> {
let mut spans = Vec::new();
self.tokenize_line_into(state, line, &mut spans);
spans
}
pub fn tokenize_line_into(
&mut self,
state: &mut LineState,
line: &str,
spans: &mut Vec<ScopeSpan>,
) {
spans.clear();
if matches!(self.grammar.kind, GrammarKind::Json) {
json::tokenize_line_into(line, spans);
return;
}
let line = trim_line_end(line);
let mut pos = 0;
while pos < line.len() {
pos = self.advance(state, line, pos, spans);
}
}
fn advance(
&mut self,
state: &mut LineState,
line: &str,
pos: usize,
spans: &mut Vec<ScopeSpan>,
) -> usize {
if let Some(open) = state.stack.last().cloned() {
self.advance_inside_open_rule(state, line, pos, spans, open)
} else {
self.advance_at_root(state, line, pos, spans)
}
}
fn advance_at_root(
&mut self,
state: &mut LineState,
line: &str,
pos: usize,
spans: &mut Vec<ScopeSpan>,
) -> usize {
let Some(found) = self
.grammar
.patterns
.find_next(line, pos, &mut self.end_regex_cache)
else {
return line.len();
};
let next_pos = found.next_pos(line);
let opened = found.open_rule();
spans.extend(found.spans);
if let Some(opened) = opened {
state.stack.push(opened);
}
next_pos
}
fn advance_inside_open_rule(
&mut self,
state: &mut LineState,
line: &str,
pos: usize,
spans: &mut Vec<ScopeSpan>,
open: OpenRule,
) -> usize {
let Some(pattern) = self.grammar.pattern_by_rule(open.rule_id) else {
state.stack.pop();
return pos;
};
let Some(end) = pattern.resume_end(open.dynamic_end.as_deref(), &mut self.end_regex_cache)
else {
state.stack.pop();
return pos;
};
let event = self.next_event_inside(pattern, &end, line, pos);
event.apply(state, line, pos, spans, pattern)
}
fn next_event_inside(
&mut self,
pattern: &Pattern,
end: &EndRegex<'_>,
line: &str,
pos: usize,
) -> InsideEvent {
let nested = pattern
.nested
.find_next(line, pos, &mut self.end_regex_cache);
let close = RegexMatch::find(end.regex(), line, pos, !pattern.captures.end.is_empty());
match (nested, close) {
(Some(nested), Some(close)) if nested.start < close.start => {
InsideEvent::Nested(nested)
}
(_, Some(close)) => InsideEvent::Close(close),
(Some(nested), None) => InsideEvent::Nested(nested),
(None, None) => InsideEvent::None,
}
}
#[cfg(test)]
pub(super) fn end_regex_cache_len(&self) -> usize {
self.end_regex_cache.len()
}
}
enum InsideEvent {
Nested(Match),
Close(RegexMatch),
None,
}
impl InsideEvent {
fn apply(
self,
state: &mut LineState,
line: &str,
pos: usize,
spans: &mut Vec<ScopeSpan>,
pattern: &Pattern,
) -> usize {
match self {
Self::Close(close) => {
pattern.scope.push_visible(spans, pos, close.start);
pattern.append_match_spans(&close, &pattern.captures.end, spans);
state.stack.pop();
close.next_pos(line)
}
Self::Nested(nested) => {
pattern.scope.push_visible(spans, pos, nested.start);
let next_pos = nested.next_pos(line);
let opened = nested.open_rule();
spans.extend(nested.spans);
if let Some(opened) = opened {
state.stack.push(opened);
}
next_pos
}
Self::None => {
pattern.scope.push_visible(spans, pos, line.len());
line.len()
}
}
}
}