rustik_highlight/grammar/
tokenize.rs1use super::end::{EndRegex, EndRegexCache};
8use super::pattern::{Match, OpenRule, Pattern, RegexMatch};
9use super::*;
10
11#[derive(Debug)]
17pub struct LineTokenizer<'grammar> {
18 pub(crate) grammar: &'grammar Grammar,
20 end_regex_cache: EndRegexCache,
22}
23
24impl<'grammar> LineTokenizer<'grammar> {
25 pub fn new(grammar: &'grammar Grammar) -> Self {
27 Self {
28 grammar,
29 end_regex_cache: EndRegexCache::new(),
30 }
31 }
32
33 pub fn clear_caches(&mut self) {
35 self.end_regex_cache.clear();
36 }
37
38 pub fn tokenize_line(&mut self, state: &mut LineState, line: &str) -> Vec<ScopeSpan> {
40 let mut spans = Vec::new();
41 self.tokenize_line_into(state, line, &mut spans);
42 spans
43 }
44
45 pub fn tokenize_line_into(
47 &mut self,
48 state: &mut LineState,
49 line: &str,
50 spans: &mut Vec<ScopeSpan>,
51 ) {
52 spans.clear();
53
54 if matches!(self.grammar.kind, GrammarKind::Json) {
55 json::tokenize_line_into(line, spans);
56 return;
57 }
58 let line = trim_line_end(line);
59 let mut pos = 0;
60
61 while pos < line.len() {
62 pos = self.advance(state, line, pos, spans);
63 }
64 }
65
66 fn advance(
68 &mut self,
69 state: &mut LineState,
70 line: &str,
71 pos: usize,
72 spans: &mut Vec<ScopeSpan>,
73 ) -> usize {
74 if let Some(open) = state.stack.last().cloned() {
75 self.advance_inside_open_rule(state, line, pos, spans, open)
76 } else {
77 self.advance_at_root(state, line, pos, spans)
78 }
79 }
80
81 fn advance_at_root(
83 &mut self,
84 state: &mut LineState,
85 line: &str,
86 pos: usize,
87 spans: &mut Vec<ScopeSpan>,
88 ) -> usize {
89 let Some(found) = self
90 .grammar
91 .patterns
92 .find_next(line, pos, &mut self.end_regex_cache)
93 else {
94 return line.len();
95 };
96 let next_pos = found.next_pos(line);
97 let opened = found.open_rule();
98 spans.extend(found.spans);
99 if let Some(opened) = opened {
100 state.stack.push(opened);
101 }
102 next_pos
103 }
104
105 fn advance_inside_open_rule(
107 &mut self,
108 state: &mut LineState,
109 line: &str,
110 pos: usize,
111 spans: &mut Vec<ScopeSpan>,
112 open: OpenRule,
113 ) -> usize {
114 let Some(pattern) = self.grammar.pattern_by_rule(open.rule_id) else {
115 state.stack.pop();
116 return pos;
117 };
118 let Some(end) = pattern.resume_end(open.dynamic_end.as_deref(), &mut self.end_regex_cache)
119 else {
120 state.stack.pop();
121 return pos;
122 };
123 let event = self.next_event_inside(pattern, &end, line, pos);
124 event.apply(state, line, pos, spans, pattern)
125 }
126
127 fn next_event_inside(
129 &mut self,
130 pattern: &Pattern,
131 end: &EndRegex<'_>,
132 line: &str,
133 pos: usize,
134 ) -> InsideEvent {
135 let nested = pattern
136 .nested
137 .find_next(line, pos, &mut self.end_regex_cache);
138 let close = RegexMatch::find(end.regex(), line, pos, !pattern.captures.end.is_empty());
139
140 match (nested, close) {
141 (Some(nested), Some(close)) if nested.start < close.start => {
142 InsideEvent::Nested(nested)
143 }
144 (_, Some(close)) => InsideEvent::Close(close),
145 (Some(nested), None) => InsideEvent::Nested(nested),
146 (None, None) => InsideEvent::None,
147 }
148 }
149
150 #[cfg(test)]
152 pub(super) fn end_regex_cache_len(&self) -> usize {
153 self.end_regex_cache.len()
154 }
155}
156
157enum InsideEvent {
159 Nested(Match),
161 Close(RegexMatch),
163 None,
165}
166
167impl InsideEvent {
168 fn apply(
170 self,
171 state: &mut LineState,
172 line: &str,
173 pos: usize,
174 spans: &mut Vec<ScopeSpan>,
175 pattern: &Pattern,
176 ) -> usize {
177 match self {
178 Self::Close(close) => {
179 pattern.scope.push_visible(spans, pos, close.start);
180 pattern.append_match_spans(&close, &pattern.captures.end, spans);
181 state.stack.pop();
182 close.next_pos(line)
183 }
184 Self::Nested(nested) => {
185 pattern.scope.push_visible(spans, pos, nested.start);
186
187 let next_pos = nested.next_pos(line);
188 let opened = nested.open_rule();
189
190 spans.extend(nested.spans);
191
192 if let Some(opened) = opened {
193 state.stack.push(opened);
194 }
195 next_pos
196 }
197 Self::None => {
198 pattern.scope.push_visible(spans, pos, line.len());
199 line.len()
200 }
201 }
202 }
203}