Skip to main content

rustik_highlight/
blob.rs

1//! Incremental highlighting over complete text blobs.
2//!
3//! This module sits above raw line tokenization and keeps the per-line parser
4//! state needed by editor and pager workloads. Callers can ask for one line or
5//! a visible range, and unchanged lines reuse cached start states and scope
6//! spans instead of reparsing from the top of the file.
7
8use std::ops::Range;
9
10use crate::grammar::{Grammar, LineState, LineTokenizer, ScopeSpan};
11use crate::theme::{StyleCache, StyleScratch, StyleSpan, Theme};
12use crate::util::line_starts;
13
14/// Scratch space for highlighting one line without reallocating per call.
15///
16/// A buffer holds the latest tokenization and styling results so callers can
17/// reuse allocations while scanning many lines or repeatedly re-highlighting a
18/// changing line.
19#[derive(Debug, Default)]
20pub struct LineBuffer {
21    /// Current scope spans.
22    pub scopes: Vec<ScopeSpan>,
23    /// Current style spans.
24    pub styles: Vec<StyleSpan>,
25    /// Cached resolved styles for the active grammar/theme pair.
26    style_cache: StyleCache,
27    /// Per-line scratch storage used when converting scope spans into style spans.
28    style_scratch: StyleScratch,
29}
30
31/// Borrowed tokenization result for a line visited by a range highlighter.
32///
33/// This is the zero-copy callback view used when callers need grammar scopes
34/// but not themed styles. The borrowed text and spans remain valid only for
35/// the duration of the callback.
36#[derive(Debug)]
37pub struct LineTokens<'text, 'spans> {
38    /// Zero-based line index in the blob.
39    pub line_index: usize,
40    /// Byte range of this line in the blob, excluding line terminators.
41    pub byte_range: Range<usize>,
42    /// Line text, excluding line terminators.
43    pub text: &'text str,
44    /// Scope spans for this line.
45    pub scopes: &'spans [ScopeSpan],
46}
47
48/// Borrowed tokenization and styling result for a line visited by a range highlighter.
49///
50/// This is the zero-copy callback view used when callers need both grammar
51/// scopes and theme-resolved style spans. The borrowed text and spans remain
52/// valid only for the duration of the callback.
53#[derive(Debug)]
54pub struct StyledLine<'text, 'spans> {
55    /// Zero-based line index in the blob.
56    pub line_index: usize,
57    /// Byte range of this line in the blob, excluding line terminators.
58    pub byte_range: Range<usize>,
59    /// Line text, excluding line terminators.
60    pub text: &'text str,
61    /// Scope spans for this line.
62    pub scopes: &'spans [ScopeSpan],
63    /// Styled spans for this line.
64    pub styles: &'spans [StyleSpan],
65}
66
67/// Owned tokenization result for APIs that collect highlighted lines.
68///
69/// Unlike [`LineTokens`], this type owns its scope spans so callers can keep
70/// the result after range traversal has moved on to another line.
71#[derive(Clone, Debug, Eq, PartialEq)]
72pub struct OwnedLineTokens {
73    /// Zero-based line index in the blob.
74    pub line_index: usize,
75    /// Byte range of this line in the blob, excluding line terminators.
76    pub byte_range: Range<usize>,
77    /// Scope spans for this line.
78    pub scopes: Vec<ScopeSpan>,
79}
80
81/// Incremental highlighter for a complete text blob.
82///
83/// The highlighter caches parser states and per-line scope spans so callers can
84/// ask for individual lines or ranges without reparsing the whole blob each
85/// time. Editing callers should call [`BlobHighlighter::reset_text`] or
86/// [`BlobHighlighter::invalidate_from`] to keep those caches aligned with the
87/// backing text.
88#[derive(Debug)]
89pub struct BlobHighlighter<'text> {
90    /// Reusable tokenizer for the backing grammar.
91    tokenizer: LineTokenizer<'text>,
92    /// Backing source text.
93    text: &'text str,
94    /// Byte offsets for each source line start.
95    line_starts: Vec<usize>,
96    /// Parser state at the start of each source line.
97    states: Vec<Option<LineState>>,
98    /// Cached scope spans for each source line.
99    scopes: Vec<Option<Vec<ScopeSpan>>>,
100    /// Reusable per-line scratch for styled range calls.
101    buffer: LineBuffer,
102}
103
104impl LineBuffer {
105    /// Clears all buffered spans while retaining allocations.
106    ///
107    /// Cached scope styles are intentionally retained: they are keyed by `(theme,
108    /// grammar, scope_count)` and stays valid across line resets.
109    pub fn clear(&mut self) {
110        self.scopes.clear();
111        self.styles.clear();
112        self.style_scratch.clear_line();
113    }
114
115    /// Tokenizes one line into this reusable buffer.
116    pub fn tokenize<'a>(
117        &'a mut self,
118        grammar: &Grammar,
119        state: &mut LineState,
120        line: &str,
121    ) -> &'a [ScopeSpan] {
122        grammar.tokenize_line_into(state, line, &mut self.scopes);
123        &self.scopes
124    }
125
126    /// Applies a theme to this buffer's current scope spans.
127    pub fn style<'a>(
128        &'a mut self,
129        grammar: &Grammar,
130        theme: &Theme,
131        line: &str,
132    ) -> &'a [StyleSpan] {
133        self.style_cache.refresh(theme, grammar);
134        theme.style_line_into(
135            grammar,
136            line,
137            &self.scopes,
138            &self.style_cache,
139            &mut self.style_scratch,
140            &mut self.styles,
141        );
142        &self.styles
143    }
144
145    /// Tokenizes and styles one line into this reusable buffer.
146    pub fn highlight<'a>(
147        &'a mut self,
148        grammar: &Grammar,
149        theme: &Theme,
150        state: &mut LineState,
151        line: &str,
152    ) -> (&'a [ScopeSpan], &'a [StyleSpan]) {
153        grammar.tokenize_line_into(state, line, &mut self.scopes);
154        self.style(grammar, theme, line);
155        (&self.scopes, &self.styles)
156    }
157}
158
159impl<'text> BlobHighlighter<'text> {
160    /// Creates a blob highlighter and caches only the initial empty state.
161    pub fn new(grammar: &'text Grammar, text: &'text str) -> Self {
162        let mut highlighter = Self {
163            tokenizer: LineTokenizer::new(grammar),
164            text,
165            line_starts: Vec::new(),
166            states: Vec::new(),
167            scopes: Vec::new(),
168            buffer: LineBuffer::default(),
169        };
170        highlighter.rebuild_caches();
171        highlighter
172    }
173
174    /// Replaces the blob text and clears cached line states.
175    pub fn reset_text(&mut self, text: &'text str) {
176        self.text = text;
177        self.rebuild_caches();
178    }
179
180    /// Rebuilds line metadata and clears cached highlighting results.
181    fn rebuild_caches(&mut self) {
182        self.line_starts = line_starts(self.text);
183        self.states = vec![None; self.line_starts.len() + 1];
184        self.states[0] = Some(LineState::default());
185        self.scopes = vec![None; self.line_starts.len()];
186    }
187
188    /// Returns the number of highlightable lines in the blob.
189    pub fn line_count(&self) -> usize {
190        self.line_starts.len()
191    }
192
193    /// Returns a line by index, excluding line terminators.
194    pub fn line(&self, line: usize) -> Option<&'text str> {
195        let range = self.line_byte_range(line)?;
196        Some(&self.text[range])
197    }
198
199    /// Returns a line's byte range, excluding line terminators.
200    pub fn line_byte_range(&self, line: usize) -> Option<Range<usize>> {
201        (line < self.line_count()).then(|| self.line_range_unchecked(line))
202    }
203
204    /// Returns whether a start state for `line` is already cached.
205    pub fn is_state_cached(&self, line: usize) -> bool {
206        self.states.get(line).is_some_and(Option::is_some)
207    }
208
209    /// Invalidates cached states after a changed line.
210    pub fn invalidate_from(&mut self, line: usize) {
211        let start = line.saturating_add(1).min(self.states.len());
212        for state in &mut self.states[start..] {
213            *state = None;
214        }
215        let start = line.min(self.scopes.len());
216        for scopes in &mut self.scopes[start..] {
217            *scopes = None;
218        }
219    }
220
221    /// Ensures the parser state at the start of `line` is cached.
222    pub fn ensure_state(&mut self, line: usize) -> Option<&LineState> {
223        if line > self.line_count() {
224            return None;
225        }
226        if self.states[line].is_none() {
227            let state = self.compute_state(line)?;
228            self.states[line] = Some(state);
229        }
230        self.states[line].as_ref()
231    }
232
233    /// Tokenizes one line into a caller-owned buffer.
234    pub fn highlight_line_into(&mut self, line: usize, scopes: &mut Vec<ScopeSpan>) -> bool {
235        if line >= self.line_count() {
236            scopes.clear();
237            return false;
238        }
239        let Some(mut state) = self.ensure_state(line).cloned() else {
240            scopes.clear();
241            return false;
242        };
243        let range = self.line_range_unchecked(line);
244        let text = &self.text[range];
245
246        tokenize_or_cache_into(
247            &mut self.tokenizer,
248            &mut self.states,
249            &mut self.scopes,
250            &mut state,
251            line,
252            text,
253            scopes,
254        );
255        true
256    }
257
258    /// Tokenizes a line range and invokes `f` for each requested line.
259    pub fn highlight_range<F>(&mut self, range: Range<usize>, mut f: F)
260    where
261        F: FnMut(LineTokens<'_, '_>),
262    {
263        let start = range.start.min(self.line_count());
264        let end = range.end.min(self.line_count());
265        if start >= end {
266            return;
267        }
268        let Some(mut state) = self.ensure_state(start).cloned() else {
269            return;
270        };
271
272        let mut scopes = Vec::new();
273        for line_index in start..end {
274            let byte_range = self.line_range_unchecked(line_index);
275            let text = &self.text[byte_range.clone()];
276
277            tokenize_or_cache_into(
278                &mut self.tokenizer,
279                &mut self.states,
280                &mut self.scopes,
281                &mut state,
282                line_index,
283                text,
284                &mut scopes,
285            );
286            f(LineTokens {
287                line_index,
288                byte_range,
289                text,
290                scopes: &scopes,
291            });
292        }
293    }
294
295    /// Tokenizes and styles a line range and invokes `f` for each requested line.
296    pub fn highlight_styled_range<F>(&mut self, theme: &Theme, range: Range<usize>, mut f: F)
297    where
298        F: FnMut(StyledLine<'_, '_>),
299    {
300        let start = range.start.min(self.line_count());
301        let end = range.end.min(self.line_count());
302        if start >= end {
303            return;
304        }
305        let Some(mut state) = self.ensure_state(start).cloned() else {
306            return;
307        };
308
309        let grammar = self.tokenizer.grammar;
310        for line_index in start..end {
311            let byte_range = self.line_range_unchecked(line_index);
312            let text = &self.text[byte_range.clone()];
313
314            tokenize_or_cache_into(
315                &mut self.tokenizer,
316                &mut self.states,
317                &mut self.scopes,
318                &mut state,
319                line_index,
320                text,
321                &mut self.buffer.scopes,
322            );
323            self.buffer.style(grammar, theme, text);
324
325            f(StyledLine {
326                line_index,
327                byte_range,
328                text,
329                scopes: &self.buffer.scopes,
330                styles: &self.buffer.styles,
331            });
332        }
333    }
334
335    /// Tokenizes a line range into an owned output vector.
336    pub fn highlight_range_into(&mut self, range: Range<usize>, output: &mut Vec<OwnedLineTokens>) {
337        output.clear();
338
339        self.highlight_range(range, |line| {
340            output.push(OwnedLineTokens {
341                line_index: line.line_index,
342                byte_range: line.byte_range,
343                scopes: line.scopes.to_vec(),
344            });
345        });
346    }
347
348    /// Returns newly allocated token data for a line range.
349    pub fn highlighted_range(&mut self, range: Range<usize>) -> Vec<OwnedLineTokens> {
350        let mut output = Vec::new();
351        self.highlight_range_into(range, &mut output);
352        output
353    }
354
355    /// Computes the parser state at the start of `line` from the nearest cached predecessor.
356    fn compute_state(&mut self, line: usize) -> Option<LineState> {
357        let mut index = line;
358        while index > 0 && self.states[index].is_none() {
359            index -= 1;
360        }
361        let mut state = self.states[index].clone().unwrap_or_default();
362        let mut scratch = Vec::new();
363
364        while index < line {
365            let range = self.line_range_unchecked(index);
366            let text = &self.text[range];
367
368            self.tokenizer
369                .tokenize_line_into(&mut state, text, &mut scratch);
370
371            index += 1;
372            self.states[index] = Some(state.clone());
373        }
374        Some(state)
375    }
376
377    /// Returns a line's byte range without checking that the line index exists.
378    fn line_range_unchecked(&self, line: usize) -> Range<usize> {
379        let start = self.line_starts[line];
380        let mut end = self
381            .line_starts
382            .get(line + 1)
383            .copied()
384            .unwrap_or(self.text.len());
385        let bytes = self.text.as_bytes();
386        if end > start && bytes[end - 1] == b'\n' {
387            end -= 1;
388        }
389        if end > start && bytes[end - 1] == b'\r' {
390            end -= 1;
391        }
392        start..end
393    }
394}
395
396/// Tokenizes a line or copies its cached scopes and following parser state.
397fn tokenize_or_cache_into(
398    tokenizer: &mut LineTokenizer<'_>,
399    states: &mut [Option<LineState>],
400    scopes_cache: &mut [Option<Vec<ScopeSpan>>],
401    state: &mut LineState,
402    line_index: usize,
403    text: &str,
404    out: &mut Vec<ScopeSpan>,
405) {
406    if let Some(cached) = scopes_cache[line_index].as_ref()
407        && let Some(next_state) = states[line_index + 1].as_ref()
408    {
409        out.clone_from(cached);
410        *state = next_state.clone();
411    } else {
412        tokenizer.tokenize_line_into(state, text, out);
413
414        states[line_index + 1] = Some(state.clone());
415        scopes_cache[line_index] = Some(out.clone());
416    }
417}