Skip to main content

fresh/primitives/
highlight_engine.rs

1//! Unified highlighting engine over syntect (TextMate grammars) and
2//! tree-sitter. Syntect is the default; tree-sitter `Language` is still
3//! detected for non-highlighting features (indentation, semantic highlighting).
4//!
5//! # TextMate cache design
6//!
7//! Syntect's parser is a sequential state machine — it must process bytes
8//! in order from a known parse state to track multi-line constructs and
9//! embedded language transitions. To make scrolling cheap, the engine keeps
10//! a span cache, a `(ParseState, ScopeStack)` snapshot at the cache tail,
11//! and periodic checkpoint anchors to support resume-from-anywhere.
12//!
13//! Three render-time paths, gated by what the cache covers:
14//!
15//! - **Cache hit** — cache fully covers the parse range and there's no
16//!   pending edit; filter cached spans for the viewport. Zero parse work.
17//! - **Forward extension** — cache covers the start of the parse range but
18//!   not its end; resume from `tail_state` and parse only the uncovered
19//!   tail bytes. Steady-state scroll path.
20//! - **Partial update** — there's a pending edit; resume from the nearest
21//!   checkpoint before the dirty point and parse forward looking for
22//!   convergence (state matches an existing checkpoint), bounded by a
23//!   per-pass byte budget so pathological edits can't degenerate into
24//!   whole-file reparses.
25//! - **Cold start / fallback** — no cache, or none of the above applies;
26//!   parse the appropriate range from a fresh state or nearest checkpoint.
27//!
28//! For files at or below `MAX_PARSE_BYTES` the parse range is the whole
29//! file, so the cache is whole-file after the first parse and scrolling
30//! becomes filter-only. Larger files use a viewport-centred window of
31//! `±context_bytes` and rely on the forward-extension path to keep
32//! scroll-cost bounded.
33//!
34//! Edits go through `notify_insert` / `notify_delete`, which shift cached
35//! span byte offsets in place, set `dirty_from`, and invalidate `tail_state`
36//! when the edit lies inside the cached range.
37
38use crate::model::buffer::Buffer;
39use crate::model::marker::{MarkerId, MarkerList};
40use crate::primitives::grammar::GrammarRegistry;
41use crate::primitives::highlighter::{
42    highlight_color, HighlightCategory, HighlightSpan, Highlighter, Language,
43};
44use crate::view::theme::Theme;
45use std::collections::HashMap;
46use std::ops::Range;
47use std::path::Path;
48use std::sync::Arc;
49use syntect::parsing::SyntaxSet;
50
51/// Map TextMate scope to highlight category
52fn scope_to_category(scope: &str) -> Option<HighlightCategory> {
53    let scope_lower = scope.to_lowercase();
54
55    // Comments - highest priority
56    if scope_lower.starts_with("comment") {
57        return Some(HighlightCategory::Comment);
58    }
59
60    // Strings
61    if scope_lower.starts_with("string") {
62        return Some(HighlightCategory::String);
63    }
64
65    // Markdown/markup scopes - handle before generic keyword/punctuation checks
66    // See: https://macromates.com/manual/en/language_grammars (TextMate scope naming)
67    // Headings: markup.heading and entity.name.section (used by syntect's markdown grammar)
68    if scope_lower.starts_with("markup.heading") || scope_lower.starts_with("entity.name.section") {
69        return Some(HighlightCategory::Keyword); // Headers styled like keywords (bold, prominent)
70    }
71    // Bold: markup.bold
72    if scope_lower.starts_with("markup.bold") {
73        return Some(HighlightCategory::Constant); // Bold styled like constants (bright)
74    }
75    // Italic: markup.italic
76    if scope_lower.starts_with("markup.italic") {
77        return Some(HighlightCategory::Variable); // Italic styled like variables
78    }
79    // Inline code and code blocks: markup.raw, markup.inline.raw
80    if scope_lower.starts_with("markup.raw") || scope_lower.starts_with("markup.inline.raw") {
81        return Some(HighlightCategory::String); // Code styled like strings
82    }
83    // Links: markup.underline.link
84    if scope_lower.starts_with("markup.underline.link") {
85        return Some(HighlightCategory::Function); // Links styled like functions (distinct color)
86    }
87    // Generic underline (often links)
88    if scope_lower.starts_with("markup.underline") {
89        return Some(HighlightCategory::Function);
90    }
91    // Block quotes: markup.quote
92    if scope_lower.starts_with("markup.quote") {
93        return Some(HighlightCategory::Comment); // Quotes styled like comments (subdued)
94    }
95    // Lists: markup.list
96    if scope_lower.starts_with("markup.list") {
97        return Some(HighlightCategory::Operator); // List markers styled like operators
98    }
99    // Strikethrough: markup.strikethrough
100    if scope_lower.starts_with("markup.strikethrough") {
101        return Some(HighlightCategory::Comment); // Strikethrough styled subdued
102    }
103
104    // Keywords
105    if scope_lower.starts_with("keyword.control")
106        || scope_lower.starts_with("keyword.other")
107        || scope_lower.starts_with("keyword.declaration")
108        || scope_lower.starts_with("keyword")
109    {
110        // keyword.operator should map to Operator, not Keyword
111        if !scope_lower.starts_with("keyword.operator") {
112            return Some(HighlightCategory::Keyword);
113        }
114    }
115
116    // Punctuation that belongs to a parent construct (comment/string delimiters)
117    // These must be checked before the generic punctuation rule below.
118    // TextMate grammars assign e.g. `punctuation.definition.comment` to # // /* etc.
119    if scope_lower.starts_with("punctuation.definition.comment") {
120        return Some(HighlightCategory::Comment);
121    }
122    if scope_lower.starts_with("punctuation.definition.string") {
123        return Some(HighlightCategory::String);
124    }
125
126    // Operators (keyword.operator only)
127    if scope_lower.starts_with("keyword.operator") {
128        return Some(HighlightCategory::Operator);
129    }
130
131    // Punctuation brackets ({, }, (, ), [, ], <, >)
132    // Covers punctuation.section.*, punctuation.bracket.*,
133    // and punctuation.definition.{array,block,brackets,group,inline-table,section,table,tag}
134    if scope_lower.starts_with("punctuation.section")
135        || scope_lower.starts_with("punctuation.bracket")
136        || scope_lower.starts_with("punctuation.definition.array")
137        || scope_lower.starts_with("punctuation.definition.block")
138        || scope_lower.starts_with("punctuation.definition.brackets")
139        || scope_lower.starts_with("punctuation.definition.group")
140        || scope_lower.starts_with("punctuation.definition.inline-table")
141        || scope_lower.starts_with("punctuation.definition.section")
142        || scope_lower.starts_with("punctuation.definition.table")
143        || scope_lower.starts_with("punctuation.definition.tag")
144    {
145        return Some(HighlightCategory::PunctuationBracket);
146    }
147
148    // Punctuation delimiters (;, ,, .)
149    if scope_lower.starts_with("punctuation.separator")
150        || scope_lower.starts_with("punctuation.terminator")
151        || scope_lower.starts_with("punctuation.accessor")
152    {
153        return Some(HighlightCategory::PunctuationDelimiter);
154    }
155
156    // Functions
157    if scope_lower.starts_with("entity.name.function")
158        || scope_lower.starts_with("support.function")
159        || scope_lower.starts_with("meta.function-call")
160        || scope_lower.starts_with("variable.function")
161    {
162        return Some(HighlightCategory::Function);
163    }
164
165    // Types
166    if scope_lower.starts_with("entity.name.type")
167        || scope_lower.starts_with("entity.name.class")
168        || scope_lower.starts_with("entity.name.struct")
169        || scope_lower.starts_with("entity.name.enum")
170        || scope_lower.starts_with("entity.name.interface")
171        || scope_lower.starts_with("entity.name.trait")
172        || scope_lower.starts_with("support.type")
173        || scope_lower.starts_with("support.class")
174        || scope_lower.starts_with("storage.type")
175    {
176        return Some(HighlightCategory::Type);
177    }
178
179    // Storage modifiers (pub, static, const as keywords)
180    if scope_lower.starts_with("storage.modifier") {
181        return Some(HighlightCategory::Keyword);
182    }
183
184    // Constants and numbers
185    if scope_lower.starts_with("constant.numeric")
186        || scope_lower.starts_with("constant.language.boolean")
187    {
188        return Some(HighlightCategory::Number);
189    }
190    if scope_lower.starts_with("constant") {
191        return Some(HighlightCategory::Constant);
192    }
193
194    // Variables
195    if scope_lower.starts_with("variable.parameter")
196        || scope_lower.starts_with("variable.other")
197        || scope_lower.starts_with("variable.language")
198    {
199        return Some(HighlightCategory::Variable);
200    }
201
202    // Properties / object keys
203    if scope_lower.starts_with("entity.name.tag")
204        || scope_lower.starts_with("support.other.property")
205        || scope_lower.starts_with("meta.object-literal.key")
206        || scope_lower.starts_with("variable.other.property")
207        || scope_lower.starts_with("variable.other.object.property")
208    {
209        return Some(HighlightCategory::Property);
210    }
211
212    // Attributes (decorators, annotations)
213    if scope_lower.starts_with("entity.other.attribute")
214        || scope_lower.starts_with("meta.attribute")
215        || scope_lower.starts_with("entity.name.decorator")
216    {
217        return Some(HighlightCategory::Attribute);
218    }
219
220    // Generic variable fallback
221    if scope_lower.starts_with("variable") {
222        return Some(HighlightCategory::Variable);
223    }
224
225    None
226}
227
228/// Unified highlighting engine supporting multiple backends
229#[derive(Default)]
230pub enum HighlightEngine {
231    /// Tree-sitter based highlighting (built-in languages)
232    TreeSitter(Box<Highlighter>),
233    /// TextMate grammar based highlighting
234    TextMate(Box<TextMateEngine>),
235    /// No highlighting available
236    #[default]
237    None,
238}
239
240/// TextMate highlighting engine. See module docs for the cache design.
241pub struct TextMateEngine {
242    syntax_set: Arc<SyntaxSet>,
243    syntax_index: usize,
244    checkpoint_markers: MarkerList,
245    checkpoint_states:
246        HashMap<MarkerId, (syntect::parsing::ParseState, syntect::parsing::ScopeStack)>,
247    dirty_from: Option<usize>,
248    cache: Option<TextMateCache>,
249    last_buffer_len: usize,
250    ts_language: Option<Language>,
251    stats: HighlightStats,
252    // Scope→Category memo. Syntect Scope atoms are append-only-interned
253    // globally, so entries never need invalidation.
254    scope_category_cache: HashMap<syntect::parsing::Scope, Option<HighlightCategory>>,
255}
256
257/// Counters for monitoring highlighting performance in tests.
258#[derive(Debug, Default, Clone)]
259pub struct HighlightStats {
260    /// Number of bytes parsed by syntect (total across all highlight_viewport calls).
261    pub bytes_parsed: usize,
262    /// Number of highlight_viewport calls that hit the span cache.
263    pub cache_hits: usize,
264    /// Number of highlight_viewport calls that missed the cache and re-parsed.
265    pub cache_misses: usize,
266    /// Number of checkpoint states updated during convergence.
267    pub checkpoints_updated: usize,
268    /// Number of times convergence was detected (state matched existing checkpoint).
269    pub convergences: usize,
270}
271
272#[derive(Debug, Clone)]
273struct TextMateCache {
274    range: Range<usize>,
275    spans: Vec<CachedSpan>,
276    // Parse state at `range.end`; powers forward extension. None when the
277    // last mutation didn't end at `range.end`.
278    tail_state: Option<(syntect::parsing::ParseState, syntect::parsing::ScopeStack)>,
279}
280
281#[derive(Debug, Clone)]
282struct CachedSpan {
283    range: Range<usize>,
284    category: crate::primitives::highlighter::HighlightCategory,
285}
286
287/// Small/large file threshold (whole-file cache vs viewport window).
288const MAX_PARSE_BYTES: usize = 1024 * 1024;
289
290/// Distance between checkpoint anchors. Smaller = faster convergence on edit.
291const CHECKPOINT_INTERVAL: usize = 256;
292
293/// Per-pass cap on partial-update parsing past `dirty_pos`. Bounds work for
294/// pathological edits whose effect doesn't converge.
295const CONVERGENCE_BUDGET: usize = 64 * 1024;
296
297impl TextMateEngine {
298    /// Create a new TextMate engine for the given syntax
299    pub fn new(syntax_set: Arc<SyntaxSet>, syntax_index: usize) -> Self {
300        Self {
301            syntax_set,
302            syntax_index,
303            checkpoint_markers: MarkerList::new(),
304            checkpoint_states: HashMap::new(),
305            dirty_from: None,
306            cache: None,
307            last_buffer_len: 0,
308            ts_language: None,
309            stats: HighlightStats::default(),
310            scope_category_cache: HashMap::new(),
311        }
312    }
313
314    /// Create a new TextMate engine with a tree-sitter language for non-highlighting features
315    pub fn with_language(
316        syntax_set: Arc<SyntaxSet>,
317        syntax_index: usize,
318        ts_language: Option<Language>,
319    ) -> Self {
320        Self {
321            syntax_set,
322            syntax_index,
323            checkpoint_markers: MarkerList::new(),
324            checkpoint_states: HashMap::new(),
325            dirty_from: None,
326            cache: None,
327            last_buffer_len: 0,
328            ts_language,
329            stats: HighlightStats::default(),
330            scope_category_cache: HashMap::new(),
331        }
332    }
333
334    /// Get performance stats for testing and diagnostics.
335    pub fn stats(&self) -> &HighlightStats {
336        &self.stats
337    }
338
339    /// Reset performance counters.
340    pub fn reset_stats(&mut self) {
341        self.stats = HighlightStats::default();
342    }
343
344    /// Get the tree-sitter language (for indentation, semantic highlighting, etc.)
345    pub fn language(&self) -> Option<&Language> {
346        self.ts_language.as_ref()
347    }
348
349    /// Buffer-insert notification. Shifts span offsets in place and marks
350    /// the cache dirty so the partial-update path runs on next render.
351    pub fn notify_insert(&mut self, position: usize, length: usize) {
352        self.checkpoint_markers.adjust_for_insert(position, length);
353        self.dirty_from = Some(self.dirty_from.map_or(position, |d| d.min(position)));
354        if let Some(cache) = &mut self.cache {
355            for span in &mut cache.spans {
356                if span.range.start >= position {
357                    span.range.start += length;
358                    span.range.end += length;
359                } else if span.range.end > position {
360                    span.range.end += length;
361                }
362            }
363            if cache.range.end >= position {
364                cache.range.end += length;
365                if position < cache.range.end {
366                    cache.tail_state = None;
367                }
368            }
369        }
370    }
371
372    /// Buffer-delete notification. Mirror of `notify_insert`.
373    pub fn notify_delete(&mut self, position: usize, length: usize) {
374        self.checkpoint_markers.adjust_for_delete(position, length);
375        self.dirty_from = Some(self.dirty_from.map_or(position, |d| d.min(position)));
376        if let Some(cache) = &mut self.cache {
377            let delete_end = position + length;
378            cache.spans.retain_mut(|span| {
379                if span.range.start >= delete_end {
380                    span.range.start -= length;
381                    span.range.end -= length;
382                    true
383                } else if span.range.end <= position {
384                    true
385                } else if span.range.start >= position && span.range.end <= delete_end {
386                    false
387                } else {
388                    if span.range.start < position {
389                        span.range.end = position.min(span.range.end);
390                    } else {
391                        span.range.start = position;
392                        span.range.end = position + span.range.end.saturating_sub(delete_end);
393                    }
394                    span.range.start < span.range.end
395                }
396            });
397            if cache.range.end > delete_end {
398                cache.range.end -= length;
399            } else if cache.range.end > position {
400                cache.range.end = position;
401            }
402            if position < cache.range.end {
403                cache.tail_state = None;
404            }
405        }
406    }
407
408    /// Highlight the visible viewport. Path selection is documented in the
409    /// module-level docs ("TextMate cache design").
410    pub fn highlight_viewport(
411        &mut self,
412        buffer: &Buffer,
413        viewport_start: usize,
414        viewport_end: usize,
415        theme: &Theme,
416        context_bytes: usize,
417    ) -> Vec<HighlightSpan> {
418        let buf_len = buffer.len();
419        let (desired_parse_start, parse_end) = if buf_len <= MAX_PARSE_BYTES {
420            (0, buf_len)
421        } else {
422            let s = viewport_start.saturating_sub(context_bytes);
423            let e = (viewport_end + context_bytes).min(buf_len);
424            (s, e)
425        };
426
427        let dirty = self.dirty_from.take();
428        let cache_covers_viewport = self.cache.as_ref().is_some_and(|c| {
429            c.range.start <= desired_parse_start && c.range.end >= desired_parse_start
430        });
431        let exact_cache_hit = cache_covers_viewport
432            && dirty.is_none()
433            && self.last_buffer_len == buffer.len()
434            && self
435                .cache
436                .as_ref()
437                .is_some_and(|c| c.range.end >= parse_end);
438
439        // Cache hit.
440        if exact_cache_hit {
441            self.stats.cache_hits += 1;
442            return self.filter_cached_spans(viewport_start, viewport_end, theme);
443        }
444
445        // Forward extension.
446        if dirty.is_none()
447            && cache_covers_viewport
448            && self.last_buffer_len == buffer.len()
449            && self
450                .cache
451                .as_ref()
452                .is_some_and(|c| c.range.end < parse_end && c.tail_state.is_some())
453        {
454            return self.extend_cache_forward(
455                buffer,
456                parse_end,
457                viewport_start,
458                viewport_end,
459                theme,
460            );
461        }
462
463        // Partial update.
464        if cache_covers_viewport && dirty.is_some() {
465            if let Some(dirty_pos) = dirty {
466                if dirty_pos < parse_end {
467                    if let Some(result) = self.try_partial_update(
468                        buffer,
469                        dirty_pos,
470                        desired_parse_start,
471                        parse_end,
472                        viewport_start,
473                        viewport_end,
474                        theme,
475                    ) {
476                        return result;
477                    }
478                } else {
479                    // Dirty region past viewport: cached spans are still valid.
480                    self.dirty_from = Some(dirty_pos);
481                    self.stats.cache_hits += 1;
482                    return self.filter_cached_spans(viewport_start, viewport_end, theme);
483                }
484            }
485        } else if let Some(d) = dirty {
486            self.dirty_from = Some(d);
487        }
488
489        // Cold start / fallback.
490        self.full_parse(
491            buffer,
492            desired_parse_start,
493            parse_end,
494            viewport_start,
495            viewport_end,
496            theme,
497            context_bytes,
498        )
499    }
500
501    /// Filter cached spans for the viewport and resolve colors.
502    fn filter_cached_spans(
503        &self,
504        viewport_start: usize,
505        viewport_end: usize,
506        theme: &Theme,
507    ) -> Vec<HighlightSpan> {
508        let cache = self.cache.as_ref().unwrap();
509        cache
510            .spans
511            .iter()
512            .filter(|span| span.range.start < viewport_end && span.range.end > viewport_start)
513            .map(|span| HighlightSpan {
514                range: span.range.clone(),
515                color: highlight_color(span.category, theme),
516                category: Some(span.category),
517            })
518            .collect()
519    }
520
521    /// Partial update path. Returns `Some` whenever an anchor was available,
522    /// even on budget hit or EOF (see post-loop classification). `None` only
523    /// when no checkpoint anchor reaches the dirty point.
524    #[allow(clippy::too_many_arguments)]
525    fn try_partial_update(
526        &mut self,
527        buffer: &Buffer,
528        dirty_pos: usize,
529        desired_parse_start: usize,
530        parse_end: usize,
531        viewport_start: usize,
532        viewport_end: usize,
533        theme: &Theme,
534    ) -> Option<Vec<HighlightSpan>> {
535        let syntax = &self.syntax_set.syntaxes()[self.syntax_index];
536
537        // Find checkpoint before the dirty point (bounded search)
538        let (actual_start, mut state, mut current_scopes) = {
539            let search_start = dirty_pos.saturating_sub(MAX_PARSE_BYTES);
540            let markers = self.checkpoint_markers.query_range(search_start, dirty_pos);
541            let nearest = markers.into_iter().max_by_key(|(_, start, _)| *start);
542            if let Some((id, cp_pos, _)) = nearest {
543                if let Some((s, sc)) = self.checkpoint_states.get(&id) {
544                    (cp_pos, s.clone(), sc.clone())
545                } else {
546                    return None; // orphan, fall back
547                }
548            } else if parse_end <= MAX_PARSE_BYTES {
549                (
550                    0,
551                    syntect::parsing::ParseState::new(syntax),
552                    syntect::parsing::ScopeStack::new(),
553                )
554            } else {
555                return None; // large file, no nearby checkpoint, fall back
556            }
557        };
558
559        // Get markers from dirty point forward for convergence checking
560        let mut markers_ahead: Vec<(MarkerId, usize)> = self
561            .checkpoint_markers
562            .query_range(dirty_pos, parse_end)
563            .into_iter()
564            .map(|(id, start, _)| (id, start))
565            .collect();
566        markers_ahead.sort_by_key(|(_, pos)| *pos);
567        let mut marker_idx = 0;
568
569        // Parse from actual_start to parse_end, looking for convergence
570        let content_end = parse_end.min(buffer.len());
571        if actual_start >= content_end {
572            return None;
573        }
574        let content = buffer.slice_bytes(actual_start..content_end);
575        let content_str = match std::str::from_utf8(&content) {
576            Ok(s) => s,
577            Err(_) => return None,
578        };
579
580        let mut new_spans = Vec::new();
581        let content_bytes = content_str.as_bytes();
582        let mut pos = 0;
583        let mut current_offset = actual_start;
584        let mut converged_at: Option<usize> = None;
585        let mut budget_hit_at: Option<usize> = None;
586        let mut bytes_since_checkpoint: usize = 0;
587
588        while pos < content_bytes.len() {
589            // Create checkpoints in new territory
590            if bytes_since_checkpoint >= CHECKPOINT_INTERVAL {
591                let nearby = self.checkpoint_markers.query_range(
592                    current_offset.saturating_sub(CHECKPOINT_INTERVAL / 2),
593                    current_offset + CHECKPOINT_INTERVAL / 2,
594                );
595                if nearby.is_empty() {
596                    let marker_id = self.checkpoint_markers.create(current_offset, true);
597                    self.checkpoint_states
598                        .insert(marker_id, (state.clone(), current_scopes.clone()));
599                }
600                bytes_since_checkpoint = 0;
601            }
602
603            let line_start = pos;
604            let mut line_end = pos;
605            while line_end < content_bytes.len() {
606                if content_bytes[line_end] == b'\n' {
607                    line_end += 1;
608                    break;
609                } else if content_bytes[line_end] == b'\r' {
610                    if line_end + 1 < content_bytes.len() && content_bytes[line_end + 1] == b'\n' {
611                        line_end += 2;
612                    } else {
613                        line_end += 1;
614                    }
615                    break;
616                }
617                line_end += 1;
618            }
619
620            let line_bytes = &content_bytes[line_start..line_end];
621            let actual_line_byte_len = line_bytes.len();
622
623            let line_str = match std::str::from_utf8(line_bytes) {
624                Ok(s) => s,
625                Err(_) => {
626                    pos = line_end;
627                    current_offset += actual_line_byte_len;
628                    bytes_since_checkpoint += actual_line_byte_len;
629                    continue;
630                }
631            };
632
633            let line_content = line_str.trim_end_matches(&['\r', '\n'][..]);
634            let line_for_syntect = if line_end < content_bytes.len() || line_str.ends_with('\n') {
635                format!("{}\n", line_content)
636            } else {
637                line_content.to_string()
638            };
639
640            let ops = match state.parse_line(&line_for_syntect, &self.syntax_set) {
641                Ok(ops) => ops,
642                Err(_) => {
643                    pos = line_end;
644                    current_offset += actual_line_byte_len;
645                    bytes_since_checkpoint += actual_line_byte_len;
646                    continue;
647                }
648            };
649
650            // Collect spans for the dirty region
651            let collect_spans =
652                current_offset + actual_line_byte_len > desired_parse_start.max(actual_start);
653            let mut syntect_offset = 0;
654            let line_content_len = line_content.len();
655
656            for (op_offset, op) in ops {
657                let clamped_op_offset = op_offset.min(line_content_len);
658                if collect_spans && clamped_op_offset > syntect_offset {
659                    if let Some(category) = self.scope_stack_to_category(&current_scopes) {
660                        let byte_start = current_offset + syntect_offset;
661                        let byte_end = current_offset + clamped_op_offset;
662                        let clamped_start = byte_start.max(actual_start);
663                        if clamped_start < byte_end {
664                            new_spans.push(CachedSpan {
665                                range: clamped_start..byte_end,
666                                category,
667                            });
668                        }
669                    }
670                }
671                syntect_offset = clamped_op_offset;
672                #[allow(clippy::let_underscore_must_use)]
673                let _ = current_scopes.apply(&op);
674            }
675
676            if collect_spans && syntect_offset < line_content_len {
677                if let Some(category) = self.scope_stack_to_category(&current_scopes) {
678                    let byte_start = current_offset + syntect_offset;
679                    let byte_end = current_offset + line_content_len;
680                    let clamped_start = byte_start.max(actual_start);
681                    if clamped_start < byte_end {
682                        new_spans.push(CachedSpan {
683                            range: clamped_start..byte_end,
684                            category,
685                        });
686                    }
687                }
688            }
689
690            pos = line_end;
691            current_offset += actual_line_byte_len;
692            bytes_since_checkpoint += actual_line_byte_len;
693
694            // Check convergence at checkpoint markers
695            while marker_idx < markers_ahead.len() && markers_ahead[marker_idx].1 <= current_offset
696            {
697                let (marker_id, _) = markers_ahead[marker_idx];
698                marker_idx += 1;
699                if let Some(stored) = self.checkpoint_states.get(&marker_id) {
700                    if *stored == (state.clone(), current_scopes.clone()) {
701                        self.stats.convergences += 1;
702                        converged_at = Some(current_offset);
703                        break;
704                    }
705                }
706                self.stats.checkpoints_updated += 1;
707                self.checkpoint_states
708                    .insert(marker_id, (state.clone(), current_scopes.clone()));
709            }
710
711            if converged_at.is_some() {
712                break;
713            }
714
715            // Bound work per pass: pathological edits (e.g. unclosed `/*`
716            // re-scoping the rest of the file) can never converge. Stop here
717            // and resume from `current_offset` on the next render.
718            if current_offset.saturating_sub(dirty_pos) >= CONVERGENCE_BUDGET {
719                budget_hit_at = Some(current_offset);
720                break;
721            }
722        }
723
724        self.stats.bytes_parsed += current_offset.saturating_sub(actual_start);
725
726        // Splice classification: converged → clear dirty; budget hit → keep
727        // dirty for next pass; EOF → clear dirty.
728        let (splice_end, dirty_after) = if let Some(c) = converged_at {
729            (c, None)
730        } else if let Some(b) = budget_hit_at {
731            (b, Some(b))
732        } else {
733            (current_offset, None)
734        };
735
736        self.stats.cache_misses += 1; // partial update counts as a miss
737
738        Self::merge_adjacent_spans(&mut new_spans);
739
740        if let Some(cache) = &mut self.cache {
741            let splice_start = actual_start;
742            cache
743                .spans
744                .retain(|span| span.range.end <= splice_start || span.range.start >= splice_end);
745            cache.spans.extend(new_spans);
746            cache.spans.sort_by_key(|s| s.range.start);
747            Self::merge_adjacent_spans(&mut cache.spans);
748            if splice_end > cache.range.end {
749                cache.range.end = splice_end;
750            }
751            cache.tail_state = None;
752        }
753
754        self.last_buffer_len = buffer.len();
755        self.dirty_from = dirty_after;
756
757        Some(self.filter_cached_spans(viewport_start, viewport_end, theme))
758    }
759
760    /// Forward extension path (see module docs). Caller checks the cache
761    /// exists, has a `tail_state`, has no dirty edits, and `cache.range.end
762    /// < parse_end`.
763    fn extend_cache_forward(
764        &mut self,
765        buffer: &Buffer,
766        parse_end: usize,
767        viewport_start: usize,
768        viewport_end: usize,
769        theme: &Theme,
770    ) -> Vec<HighlightSpan> {
771        self.stats.cache_misses += 1;
772        let buf_len = buffer.len();
773        let parse_end = parse_end.min(buf_len);
774
775        let (extension_start, mut state, mut current_scopes) = {
776            let cache = self
777                .cache
778                .as_ref()
779                .expect("extend_cache_forward: cache must exist");
780            let (s, sc) = cache
781                .tail_state
782                .as_ref()
783                .expect("extend_cache_forward: tail_state must exist")
784                .clone();
785            (cache.range.end, s, sc)
786        };
787
788        if parse_end <= extension_start {
789            return self.filter_cached_spans(viewport_start, viewport_end, theme);
790        }
791
792        let content = buffer.slice_bytes(extension_start..parse_end);
793        let content_str = match std::str::from_utf8(&content) {
794            Ok(s) => s,
795            Err(_) => return self.filter_cached_spans(viewport_start, viewport_end, theme),
796        };
797
798        let mut new_spans = Vec::new();
799        let content_bytes = content_str.as_bytes();
800        let mut pos = 0;
801        let mut current_offset = extension_start;
802        let mut bytes_since_checkpoint: usize = 0;
803
804        while pos < content_bytes.len() {
805            if bytes_since_checkpoint >= CHECKPOINT_INTERVAL {
806                let nearby = self.checkpoint_markers.query_range(
807                    current_offset.saturating_sub(CHECKPOINT_INTERVAL / 2),
808                    current_offset + CHECKPOINT_INTERVAL / 2,
809                );
810                if nearby.is_empty() {
811                    let marker_id = self.checkpoint_markers.create(current_offset, true);
812                    self.checkpoint_states
813                        .insert(marker_id, (state.clone(), current_scopes.clone()));
814                }
815                bytes_since_checkpoint = 0;
816            }
817
818            let line_start = pos;
819            let mut line_end = pos;
820            while line_end < content_bytes.len() {
821                if content_bytes[line_end] == b'\n' {
822                    line_end += 1;
823                    break;
824                } else if content_bytes[line_end] == b'\r' {
825                    if line_end + 1 < content_bytes.len() && content_bytes[line_end + 1] == b'\n' {
826                        line_end += 2;
827                    } else {
828                        line_end += 1;
829                    }
830                    break;
831                }
832                line_end += 1;
833            }
834
835            let line_bytes = &content_bytes[line_start..line_end];
836            let actual_line_byte_len = line_bytes.len();
837
838            let line_str = match std::str::from_utf8(line_bytes) {
839                Ok(s) => s,
840                Err(_) => {
841                    pos = line_end;
842                    current_offset += actual_line_byte_len;
843                    bytes_since_checkpoint += actual_line_byte_len;
844                    continue;
845                }
846            };
847
848            let line_content = line_str.trim_end_matches(&['\r', '\n'][..]);
849            let line_for_syntect = if line_end < content_bytes.len() || line_str.ends_with('\n') {
850                format!("{}\n", line_content)
851            } else {
852                line_content.to_string()
853            };
854
855            let ops = match state.parse_line(&line_for_syntect, &self.syntax_set) {
856                Ok(ops) => ops,
857                Err(_) => {
858                    pos = line_end;
859                    current_offset += actual_line_byte_len;
860                    bytes_since_checkpoint += actual_line_byte_len;
861                    continue;
862                }
863            };
864
865            let mut syntect_offset = 0;
866            let line_content_len = line_content.len();
867
868            for (op_offset, op) in ops {
869                let clamped_op_offset = op_offset.min(line_content_len);
870                if clamped_op_offset > syntect_offset {
871                    if let Some(category) = self.scope_stack_to_category(&current_scopes) {
872                        let byte_start = current_offset + syntect_offset;
873                        let byte_end = current_offset + clamped_op_offset;
874                        if byte_start < byte_end {
875                            new_spans.push(CachedSpan {
876                                range: byte_start..byte_end,
877                                category,
878                            });
879                        }
880                    }
881                }
882                syntect_offset = clamped_op_offset;
883                #[allow(clippy::let_underscore_must_use)]
884                let _ = current_scopes.apply(&op);
885            }
886
887            if syntect_offset < line_content_len {
888                if let Some(category) = self.scope_stack_to_category(&current_scopes) {
889                    let byte_start = current_offset + syntect_offset;
890                    let byte_end = current_offset + line_content_len;
891                    if byte_start < byte_end {
892                        new_spans.push(CachedSpan {
893                            range: byte_start..byte_end,
894                            category,
895                        });
896                    }
897                }
898            }
899
900            pos = line_end;
901            current_offset += actual_line_byte_len;
902            bytes_since_checkpoint += actual_line_byte_len;
903        }
904
905        self.stats.bytes_parsed += parse_end - extension_start;
906
907        Self::merge_adjacent_spans(&mut new_spans);
908
909        let cache = self
910            .cache
911            .as_mut()
912            .expect("extend_cache_forward: cache must still exist");
913        cache.spans.extend(new_spans);
914        Self::merge_adjacent_spans(&mut cache.spans);
915        cache.range.end = parse_end;
916        cache.tail_state = Some((state, current_scopes));
917        self.last_buffer_len = buf_len;
918
919        self.filter_cached_spans(viewport_start, viewport_end, theme)
920    }
921
922    /// Full re-parse from desired_parse_start to parse_end. Used on cold start
923    /// or when partial update fails (no convergence).
924    #[allow(clippy::too_many_arguments)]
925    fn full_parse(
926        &mut self,
927        buffer: &Buffer,
928        desired_parse_start: usize,
929        parse_end: usize,
930        viewport_start: usize,
931        viewport_end: usize,
932        theme: &Theme,
933        _context_bytes: usize,
934    ) -> Vec<HighlightSpan> {
935        self.stats.cache_misses += 1;
936        self.dirty_from = None; // consumed
937
938        if parse_end <= desired_parse_start {
939            return Vec::new();
940        }
941
942        let syntax = &self.syntax_set.syntaxes()[self.syntax_index];
943        let (actual_start, mut state, mut current_scopes, create_checkpoints) =
944            self.find_parse_resume_point(desired_parse_start, parse_end, syntax);
945
946        let content = buffer.slice_bytes(actual_start..parse_end);
947        let content_str = match std::str::from_utf8(&content) {
948            Ok(s) => s,
949            Err(_) => return Vec::new(),
950        };
951
952        let mut spans = Vec::new();
953        let content_bytes = content_str.as_bytes();
954        let mut pos = 0;
955        let mut current_offset = actual_start;
956        let mut bytes_since_checkpoint: usize = 0;
957
958        while pos < content_bytes.len() {
959            if create_checkpoints && bytes_since_checkpoint >= CHECKPOINT_INTERVAL {
960                let nearby = self.checkpoint_markers.query_range(
961                    current_offset.saturating_sub(CHECKPOINT_INTERVAL / 2),
962                    current_offset + CHECKPOINT_INTERVAL / 2,
963                );
964                if nearby.is_empty() {
965                    let marker_id = self.checkpoint_markers.create(current_offset, true);
966                    self.checkpoint_states
967                        .insert(marker_id, (state.clone(), current_scopes.clone()));
968                }
969                bytes_since_checkpoint = 0;
970            }
971
972            let line_start = pos;
973            let mut line_end = pos;
974
975            while line_end < content_bytes.len() {
976                if content_bytes[line_end] == b'\n' {
977                    line_end += 1;
978                    break;
979                } else if content_bytes[line_end] == b'\r' {
980                    if line_end + 1 < content_bytes.len() && content_bytes[line_end + 1] == b'\n' {
981                        line_end += 2;
982                    } else {
983                        line_end += 1;
984                    }
985                    break;
986                }
987                line_end += 1;
988            }
989
990            let line_bytes = &content_bytes[line_start..line_end];
991            let actual_line_byte_len = line_bytes.len();
992
993            let line_str = match std::str::from_utf8(line_bytes) {
994                Ok(s) => s,
995                Err(_) => {
996                    pos = line_end;
997                    current_offset += actual_line_byte_len;
998                    bytes_since_checkpoint += actual_line_byte_len;
999                    continue;
1000                }
1001            };
1002
1003            let line_content = line_str.trim_end_matches(&['\r', '\n'][..]);
1004            let line_for_syntect = if line_end < content_bytes.len() || line_str.ends_with('\n') {
1005                format!("{}\n", line_content)
1006            } else {
1007                line_content.to_string()
1008            };
1009
1010            let ops = match state.parse_line(&line_for_syntect, &self.syntax_set) {
1011                Ok(ops) => ops,
1012                Err(_) => {
1013                    pos = line_end;
1014                    current_offset += actual_line_byte_len;
1015                    bytes_since_checkpoint += actual_line_byte_len;
1016                    continue;
1017                }
1018            };
1019
1020            let collect_spans = current_offset + actual_line_byte_len > desired_parse_start;
1021            let mut syntect_offset = 0;
1022            let line_content_len = line_content.len();
1023
1024            for (op_offset, op) in ops {
1025                let clamped_op_offset = op_offset.min(line_content_len);
1026                if collect_spans && clamped_op_offset > syntect_offset {
1027                    if let Some(category) = self.scope_stack_to_category(&current_scopes) {
1028                        let byte_start = current_offset + syntect_offset;
1029                        let byte_end = current_offset + clamped_op_offset;
1030                        let clamped_start = byte_start.max(desired_parse_start);
1031                        if clamped_start < byte_end {
1032                            spans.push(CachedSpan {
1033                                range: clamped_start..byte_end,
1034                                category,
1035                            });
1036                        }
1037                    }
1038                }
1039                syntect_offset = clamped_op_offset;
1040                #[allow(clippy::let_underscore_must_use)]
1041                let _ = current_scopes.apply(&op);
1042            }
1043
1044            if collect_spans && syntect_offset < line_content_len {
1045                if let Some(category) = self.scope_stack_to_category(&current_scopes) {
1046                    let byte_start = current_offset + syntect_offset;
1047                    let byte_end = current_offset + line_content_len;
1048                    let clamped_start = byte_start.max(desired_parse_start);
1049                    if clamped_start < byte_end {
1050                        spans.push(CachedSpan {
1051                            range: clamped_start..byte_end,
1052                            category,
1053                        });
1054                    }
1055                }
1056            }
1057
1058            pos = line_end;
1059            current_offset += actual_line_byte_len;
1060            bytes_since_checkpoint += actual_line_byte_len;
1061
1062            // Update checkpoint states as we pass them
1063            let markers_here: Vec<(MarkerId, usize)> = self
1064                .checkpoint_markers
1065                .query_range(
1066                    current_offset.saturating_sub(actual_line_byte_len),
1067                    current_offset,
1068                )
1069                .into_iter()
1070                .map(|(id, start, _)| (id, start))
1071                .collect();
1072            for (marker_id, _) in markers_here {
1073                self.checkpoint_states
1074                    .insert(marker_id, (state.clone(), current_scopes.clone()));
1075            }
1076        }
1077
1078        self.stats.bytes_parsed += parse_end.saturating_sub(actual_start);
1079
1080        Self::merge_adjacent_spans(&mut spans);
1081
1082        self.cache = Some(TextMateCache {
1083            range: desired_parse_start..parse_end,
1084            spans: spans.clone(),
1085            tail_state: Some((state, current_scopes)),
1086        });
1087        self.last_buffer_len = buffer.len();
1088
1089        spans
1090            .into_iter()
1091            .filter(|span| span.range.start < viewport_end && span.range.end > viewport_start)
1092            .map(|span| {
1093                let cat = span.category;
1094                HighlightSpan {
1095                    range: span.range,
1096                    color: highlight_color(cat, theme),
1097                    category: Some(cat),
1098                }
1099            })
1100            .collect()
1101    }
1102
1103    /// Find the best point to resume parsing from for the viewport.
1104    fn find_parse_resume_point(
1105        &self,
1106        desired_start: usize,
1107        parse_end: usize,
1108        syntax: &syntect::parsing::SyntaxReference,
1109    ) -> (
1110        usize,
1111        syntect::parsing::ParseState,
1112        syntect::parsing::ScopeStack,
1113        bool,
1114    ) {
1115        use syntect::parsing::{ParseState, ScopeStack};
1116
1117        // Look for a checkpoint near the desired start. For large files, only
1118        // consider checkpoints that are within MAX_PARSE_BYTES of desired_start
1119        // to avoid parsing hundreds of MB from a distant checkpoint.
1120        let search_start = desired_start.saturating_sub(MAX_PARSE_BYTES);
1121        let markers = self
1122            .checkpoint_markers
1123            .query_range(search_start, desired_start + 1);
1124        let nearest = markers.into_iter().max_by_key(|(_, start, _)| *start);
1125
1126        if let Some((id, cp_pos, _)) = nearest {
1127            if let Some((s, sc)) = self.checkpoint_states.get(&id) {
1128                return (cp_pos, s.clone(), sc.clone(), true);
1129            }
1130        }
1131
1132        if parse_end <= MAX_PARSE_BYTES {
1133            // File is small enough to parse from byte 0
1134            (0, ParseState::new(syntax), ScopeStack::new(), true)
1135        } else {
1136            // Large file, no nearby checkpoint — start fresh from desired_start.
1137            // Still create checkpoints so future visits to this region can resume.
1138            (
1139                desired_start,
1140                ParseState::new(syntax),
1141                ScopeStack::new(),
1142                true,
1143            )
1144        }
1145    }
1146
1147    /// Map scope stack to highlight category, memoising per-scope lookups.
1148    /// `scope.build_string()` is the costly step; the cache hides it after
1149    /// each scope atom has been seen once.
1150    fn scope_stack_to_category(
1151        &mut self,
1152        scopes: &syntect::parsing::ScopeStack,
1153    ) -> Option<HighlightCategory> {
1154        for scope in scopes.as_slice().iter().rev() {
1155            let cat = match self.scope_category_cache.get(scope) {
1156                Some(c) => *c,
1157                None => {
1158                    let computed = scope_to_category(&scope.build_string());
1159                    self.scope_category_cache.insert(*scope, computed);
1160                    computed
1161                }
1162            };
1163            if let Some(c) = cat {
1164                return Some(c);
1165            }
1166        }
1167        None
1168    }
1169
1170    /// Merge adjacent spans with same category
1171    fn merge_adjacent_spans(spans: &mut Vec<CachedSpan>) {
1172        if spans.len() < 2 {
1173            return;
1174        }
1175
1176        let mut write_idx = 0;
1177        for read_idx in 1..spans.len() {
1178            if spans[write_idx].category == spans[read_idx].category
1179                && spans[write_idx].range.end == spans[read_idx].range.start
1180            {
1181                spans[write_idx].range.end = spans[read_idx].range.end;
1182            } else {
1183                write_idx += 1;
1184                if write_idx != read_idx {
1185                    spans[write_idx] = spans[read_idx].clone();
1186                }
1187            }
1188        }
1189        spans.truncate(write_idx + 1);
1190    }
1191
1192    /// Invalidate span cache for an edited range.
1193    /// Checkpoint positions are handled by notify_insert/notify_delete.
1194    /// The span cache is NOT cleared here — it will be patched (partial update)
1195    /// during the next highlight_viewport call using convergence. Only dirty_from
1196    /// (set by notify_insert/notify_delete) controls re-parsing scope.
1197    pub fn invalidate_range(&mut self, _edit_range: Range<usize>) {
1198        // Intentionally does NOT clear self.cache.
1199        // The cache will be partially updated in highlight_viewport when
1200        // dirty_from is set. This avoids full re-parses for small edits.
1201    }
1202
1203    /// Invalidate all cache and checkpoints (file reload, language change, etc.)
1204    pub fn invalidate_all(&mut self) {
1205        self.cache = None;
1206        let ids: Vec<MarkerId> = self.checkpoint_states.keys().copied().collect();
1207        for id in ids {
1208            self.checkpoint_markers.delete(id);
1209        }
1210        self.checkpoint_states.clear();
1211        self.dirty_from = None;
1212    }
1213
1214    /// Get the highlight category at a byte position from the cache.
1215    ///
1216    /// Returns the category if the position falls within a cached highlight span.
1217    /// The position must be within the last highlighted viewport range for a result.
1218    pub fn category_at_position(&self, position: usize) -> Option<HighlightCategory> {
1219        let cache = self.cache.as_ref()?;
1220        cache
1221            .spans
1222            .iter()
1223            .find(|span| span.range.start <= position && position < span.range.end)
1224            .map(|span| span.category)
1225    }
1226
1227    /// Get syntax name
1228    pub fn syntax_name(&self) -> &str {
1229        &self.syntax_set.syntaxes()[self.syntax_index].name
1230    }
1231}
1232
1233impl HighlightEngine {
1234    /// Build a highlighting engine for a catalog entry.
1235    ///
1236    /// Single chokepoint for the "prefer syntect, fall back to tree-sitter"
1237    /// logic. Callers that start from a path or a syntax name should resolve
1238    /// the entry through `GrammarRegistry::find_by_path` / `find_by_name` and
1239    /// then call this.
1240    pub fn from_entry(
1241        entry: &crate::primitives::grammar::GrammarEntry,
1242        registry: &GrammarRegistry,
1243    ) -> Self {
1244        let syntax_set = registry.syntax_set_arc();
1245        if let Some(index) = entry.engines.syntect {
1246            return Self::TextMate(Box::new(TextMateEngine::with_language(
1247                syntax_set,
1248                index,
1249                entry.engines.tree_sitter,
1250            )));
1251        }
1252        if let Some(lang) = entry.engines.tree_sitter {
1253            if let Ok(highlighter) = Highlighter::new(lang) {
1254                return Self::TreeSitter(Box::new(highlighter));
1255            }
1256        }
1257        Self::None
1258    }
1259
1260    /// Create a highlighting engine for a file.
1261    ///
1262    /// Thin wrapper around `from_entry` that resolves the path via the catalog.
1263    /// User-config-declared filename/extension mappings are honoured as long as
1264    /// `GrammarRegistry::apply_language_config` has been called on the registry.
1265    /// `first_line` is used for shebang / first-line regex fallback — pass
1266    /// `None` when no content is available.
1267    pub fn for_file(path: &Path, first_line: Option<&str>, registry: &GrammarRegistry) -> Self {
1268        if let Some(entry) = registry.find_by_path(path, first_line) {
1269            return Self::from_entry(entry, registry);
1270        }
1271        Self::None
1272    }
1273
1274    /// Create a highlighting engine for a syntax by name.
1275    ///
1276    /// Thin wrapper around `from_entry` that performs the lookup via
1277    /// `find_by_name`. The catalog entry already knows which tree-sitter
1278    /// `Language` (if any) serves it, so no separate hint is needed.
1279    pub fn for_syntax_name(name: &str, registry: &GrammarRegistry) -> Self {
1280        if let Some(entry) = registry.find_by_name(name) {
1281            return Self::from_entry(entry, registry);
1282        }
1283        Self::None
1284    }
1285
1286    /// Highlight the visible viewport
1287    ///
1288    /// `context_bytes` controls how far before/after the viewport to parse for accurate
1289    /// highlighting of multi-line constructs (strings, comments, nested blocks).
1290    pub fn highlight_viewport(
1291        &mut self,
1292        buffer: &Buffer,
1293        viewport_start: usize,
1294        viewport_end: usize,
1295        theme: &Theme,
1296        context_bytes: usize,
1297    ) -> Vec<HighlightSpan> {
1298        match self {
1299            Self::TreeSitter(h) => {
1300                h.highlight_viewport(buffer, viewport_start, viewport_end, theme, context_bytes)
1301            }
1302            Self::TextMate(h) => {
1303                h.highlight_viewport(buffer, viewport_start, viewport_end, theme, context_bytes)
1304            }
1305            Self::None => Vec::new(),
1306        }
1307    }
1308
1309    /// Notify the highlighting engine of a buffer insert (for checkpoint position tracking).
1310    pub fn notify_insert(&mut self, position: usize, length: usize) {
1311        if let Self::TextMate(h) = self {
1312            h.notify_insert(position, length);
1313        }
1314    }
1315
1316    /// Notify the highlighting engine of a buffer delete (for checkpoint position tracking).
1317    pub fn notify_delete(&mut self, position: usize, length: usize) {
1318        if let Self::TextMate(h) = self {
1319            h.notify_delete(position, length);
1320        }
1321    }
1322
1323    /// Invalidate cache for an edited range
1324    pub fn invalidate_range(&mut self, edit_range: Range<usize>) {
1325        match self {
1326            Self::TreeSitter(h) => h.invalidate_range(edit_range),
1327            Self::TextMate(h) => h.invalidate_range(edit_range),
1328            Self::None => {}
1329        }
1330    }
1331
1332    /// Invalidate entire cache
1333    pub fn invalidate_all(&mut self) {
1334        match self {
1335            Self::TreeSitter(h) => h.invalidate_all(),
1336            Self::TextMate(h) => h.invalidate_all(),
1337            Self::None => {}
1338        }
1339    }
1340
1341    /// Check if this engine has highlighting available
1342    pub fn has_highlighting(&self) -> bool {
1343        !matches!(self, Self::None)
1344    }
1345
1346    /// Get a description of the active backend
1347    pub fn backend_name(&self) -> &str {
1348        match self {
1349            Self::TreeSitter(_) => "tree-sitter",
1350            Self::TextMate(_) => "textmate",
1351            Self::None => "none",
1352        }
1353    }
1354
1355    /// Get performance stats (TextMate engine only).
1356    pub fn highlight_stats(&self) -> Option<&HighlightStats> {
1357        if let Self::TextMate(h) = self {
1358            Some(h.stats())
1359        } else {
1360            None
1361        }
1362    }
1363
1364    /// Reset performance counters.
1365    pub fn reset_highlight_stats(&mut self) {
1366        if let Self::TextMate(h) = self {
1367            h.reset_stats();
1368        }
1369    }
1370
1371    /// Get the language/syntax name if available
1372    pub fn syntax_name(&self) -> Option<&str> {
1373        match self {
1374            Self::TreeSitter(_) => None, // Tree-sitter doesn't expose name easily
1375            Self::TextMate(h) => Some(h.syntax_name()),
1376            Self::None => None,
1377        }
1378    }
1379
1380    /// Get the highlight category at a byte position from the cache.
1381    ///
1382    /// Returns the category if the position falls within a cached highlight span.
1383    /// Useful for detecting whether the cursor is inside a string, comment, etc.
1384    pub fn category_at_position(&self, position: usize) -> Option<HighlightCategory> {
1385        match self {
1386            Self::TreeSitter(h) => h.category_at_position(position),
1387            Self::TextMate(h) => h.category_at_position(position),
1388            Self::None => None,
1389        }
1390    }
1391
1392    /// Get the tree-sitter Language for non-highlighting features
1393    /// Returns the language even when using TextMate for highlighting
1394    pub fn language(&self) -> Option<&Language> {
1395        match self {
1396            Self::TreeSitter(h) => Some(h.language()),
1397            Self::TextMate(h) => h.language(),
1398            Self::None => None,
1399        }
1400    }
1401}
1402
1403/// Highlight a code string using syntect (for markdown code blocks, hover popups, etc.)
1404/// Returns spans with byte ranges relative to the input string.
1405///
1406/// This uses TextMate grammars via syntect which provides broader language coverage
1407/// than tree-sitter (~150+ languages vs ~17).
1408pub fn highlight_string(
1409    code: &str,
1410    lang_hint: &str,
1411    registry: &GrammarRegistry,
1412    theme: &Theme,
1413) -> Vec<HighlightSpan> {
1414    use syntect::parsing::{ParseState, ScopeStack};
1415
1416    // Find syntax by language token (handles aliases like "py" -> Python)
1417    let syntax = match registry.syntax_set().find_syntax_by_token(lang_hint) {
1418        Some(s) => s,
1419        None => return Vec::new(),
1420    };
1421
1422    let syntax_set = registry.syntax_set();
1423    let mut state = ParseState::new(syntax);
1424    let mut spans = Vec::new();
1425    let mut current_scopes = ScopeStack::new();
1426    let mut current_offset = 0;
1427
1428    // Parse line by line
1429    for line in code.split_inclusive('\n') {
1430        let line_start = current_offset;
1431        let line_len = line.len();
1432
1433        // Remove trailing newline for syntect, then add it back
1434        let line_content = line.trim_end_matches(&['\r', '\n'][..]);
1435        let line_for_syntect = if line.ends_with('\n') {
1436            format!("{}\n", line_content)
1437        } else {
1438            line_content.to_string()
1439        };
1440
1441        let ops = match state.parse_line(&line_for_syntect, syntax_set) {
1442            Ok(ops) => ops,
1443            Err(_) => {
1444                current_offset += line_len;
1445                continue;
1446            }
1447        };
1448
1449        let mut syntect_offset = 0;
1450        let line_content_len = line_content.len();
1451
1452        for (op_offset, op) in ops {
1453            let clamped_op_offset = op_offset.min(line_content_len);
1454            if clamped_op_offset > syntect_offset {
1455                if let Some(category) = scope_stack_to_category(&current_scopes) {
1456                    let byte_start = line_start + syntect_offset;
1457                    let byte_end = line_start + clamped_op_offset;
1458                    if byte_start < byte_end {
1459                        spans.push(HighlightSpan {
1460                            range: byte_start..byte_end,
1461                            color: highlight_color(category, theme),
1462                            category: Some(category),
1463                        });
1464                    }
1465                }
1466            }
1467            syntect_offset = clamped_op_offset;
1468            // Scope stack errors are non-fatal for highlighting
1469            #[allow(clippy::let_underscore_must_use)]
1470            let _ = current_scopes.apply(&op);
1471        }
1472
1473        // Handle remaining text on line
1474        if syntect_offset < line_content_len {
1475            if let Some(category) = scope_stack_to_category(&current_scopes) {
1476                let byte_start = line_start + syntect_offset;
1477                let byte_end = line_start + line_content_len;
1478                if byte_start < byte_end {
1479                    spans.push(HighlightSpan {
1480                        range: byte_start..byte_end,
1481                        color: highlight_color(category, theme),
1482                        category: Some(category),
1483                    });
1484                }
1485            }
1486        }
1487
1488        current_offset += line_len;
1489    }
1490
1491    // Merge adjacent spans with same color
1492    merge_adjacent_highlight_spans(&mut spans);
1493
1494    spans
1495}
1496
1497/// Map scope stack to highlight category (for highlight_string)
1498fn scope_stack_to_category(scopes: &syntect::parsing::ScopeStack) -> Option<HighlightCategory> {
1499    for scope in scopes.as_slice().iter().rev() {
1500        let scope_str = scope.build_string();
1501        if let Some(cat) = scope_to_category(&scope_str) {
1502            return Some(cat);
1503        }
1504    }
1505    None
1506}
1507
1508/// Merge adjacent spans with same color
1509fn merge_adjacent_highlight_spans(spans: &mut Vec<HighlightSpan>) {
1510    if spans.len() < 2 {
1511        return;
1512    }
1513
1514    let mut write_idx = 0;
1515    for read_idx in 1..spans.len() {
1516        if spans[write_idx].color == spans[read_idx].color
1517            && spans[write_idx].range.end == spans[read_idx].range.start
1518        {
1519            spans[write_idx].range.end = spans[read_idx].range.end;
1520        } else {
1521            write_idx += 1;
1522            if write_idx != read_idx {
1523                spans[write_idx] = spans[read_idx].clone();
1524            }
1525        }
1526    }
1527    spans.truncate(write_idx + 1);
1528}
1529
1530#[cfg(test)]
1531mod tests {
1532    use crate::model::filesystem::StdFileSystem;
1533    use std::sync::Arc;
1534
1535    fn test_fs() -> Arc<dyn crate::model::filesystem::FileSystem + Send + Sync> {
1536        Arc::new(StdFileSystem)
1537    }
1538    use super::*;
1539    use crate::view::theme;
1540
1541    #[test]
1542    fn test_highlight_engine_default() {
1543        let engine = HighlightEngine::default();
1544        assert!(!engine.has_highlighting());
1545        assert_eq!(engine.backend_name(), "none");
1546    }
1547
1548    #[test]
1549    fn test_textmate_backend_selection() {
1550        let registry =
1551            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1552
1553        // Languages with TextMate grammars use TextMate for highlighting
1554        let engine = HighlightEngine::for_file(Path::new("test.rs"), None, &registry);
1555        assert_eq!(engine.backend_name(), "textmate");
1556        // Tree-sitter language should still be detected for other features
1557        assert!(engine.language().is_some());
1558
1559        let engine = HighlightEngine::for_file(Path::new("test.py"), None, &registry);
1560        assert_eq!(engine.backend_name(), "textmate");
1561        assert!(engine.language().is_some());
1562
1563        // JavaScript is routed to tree-sitter (issue #899: syntect's JS
1564        // grammar bleeds template-literal string state past the closing
1565        // backtick).
1566        let engine = HighlightEngine::for_file(Path::new("test.js"), None, &registry);
1567        assert_eq!(engine.backend_name(), "tree-sitter");
1568        assert!(engine.language().is_some());
1569
1570        // TypeScript falls back to tree-sitter (syntect doesn't include TS by default)
1571        let engine = HighlightEngine::for_file(Path::new("test.ts"), None, &registry);
1572        assert_eq!(engine.backend_name(), "tree-sitter");
1573        assert!(engine.language().is_some());
1574
1575        let engine = HighlightEngine::for_file(Path::new("test.tsx"), None, &registry);
1576        assert_eq!(engine.backend_name(), "tree-sitter");
1577        assert!(engine.language().is_some());
1578    }
1579
1580    #[test]
1581    fn test_tree_sitter_direct() {
1582        // Verify tree-sitter highlighter can be created directly for Rust
1583        let highlighter = Highlighter::new(Language::Rust);
1584        assert!(highlighter.is_ok());
1585    }
1586
1587    #[test]
1588    fn test_unknown_extension() {
1589        let registry =
1590            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1591
1592        // Unknown extension
1593        let engine = HighlightEngine::for_file(Path::new("test.unknown_xyz_123"), None, &registry);
1594        // Might be none or might find something via syntect
1595        // Just verify it doesn't panic
1596        let _ = engine.backend_name();
1597    }
1598
1599    #[test]
1600    fn test_highlight_viewport_empty_buffer_no_panic() {
1601        // Regression test: calling highlight_viewport with an empty buffer
1602        // and non-zero viewport range previously caused subtraction overflow panic.
1603        //
1604        // The bug occurred when:
1605        // - buffer is empty (len = 0)
1606        // - viewport_start > context_bytes (so parse_start > 0 after saturating_sub)
1607        // - parse_end = min(viewport_end + context_bytes, buffer.len()) = 0
1608        // - parse_end - parse_start would underflow (0 - positive = overflow)
1609        let registry =
1610            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1611
1612        let mut engine = HighlightEngine::for_file(Path::new("test.rs"), None, &registry);
1613
1614        // Create empty buffer
1615        let buffer = Buffer::from_str("", 0, test_fs());
1616        let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
1617
1618        // Test the specific case that triggered the overflow:
1619        // viewport_start=100, context_bytes=10 => parse_start=90, parse_end=0
1620        // 0 - 90 = overflow!
1621        if let HighlightEngine::TextMate(ref mut tm) = engine {
1622            // Small context_bytes so parse_start remains > 0
1623            let spans = tm.highlight_viewport(&buffer, 100, 200, &theme, 10);
1624            assert!(spans.is_empty());
1625        }
1626    }
1627
1628    /// Test that TextMateEngine produces correct byte offsets for CRLF content.
1629    /// This is a regression test for a bug where using str::lines() caused 1-byte
1630    /// offset drift per line because it strips line terminators.
1631    #[test]
1632    fn test_textmate_engine_crlf_byte_offsets() {
1633        let registry =
1634            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1635
1636        let mut engine = HighlightEngine::for_file(Path::new("test.java"), None, &registry);
1637
1638        // Create CRLF content with keywords on each line
1639        // Each "public" keyword should be highlighted at byte positions:
1640        // Line 1: "public" at bytes 0-5
1641        // Line 2: "public" at bytes 8-13 (after "public\r\n" = 8 bytes)
1642        // Line 3: "public" at bytes 16-21 (after two "public\r\n" = 16 bytes)
1643        let content = b"public\r\npublic\r\npublic\r\n";
1644        let buffer = Buffer::from_bytes(content.to_vec(), test_fs());
1645        let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
1646
1647        if let HighlightEngine::TextMate(ref mut tm) = engine {
1648            // Highlight the entire content
1649            let spans = tm.highlight_viewport(&buffer, 0, content.len(), &theme, 0);
1650
1651            // Find spans that cover keyword positions
1652            // The keyword "public" should have spans at these byte ranges:
1653            // Line 1: 0..6
1654            // Line 2: 8..14 (NOT 7..13 which would be the buggy offset)
1655            // Line 3: 16..22 (NOT 14..20 which would be the buggy offset)
1656
1657            eprintln!(
1658                "Spans: {:?}",
1659                spans.iter().map(|s| &s.range).collect::<Vec<_>>()
1660            );
1661
1662            // Check that we have spans covering the correct positions
1663            let has_span_at = |start: usize, end: usize| -> bool {
1664                spans
1665                    .iter()
1666                    .any(|s| s.range.start <= start && s.range.end >= end)
1667            };
1668
1669            // Line 1: "public" at bytes 0-6
1670            assert!(
1671                has_span_at(0, 6),
1672                "Should have span covering bytes 0-6 (line 1 'public'). Spans: {:?}",
1673                spans.iter().map(|s| &s.range).collect::<Vec<_>>()
1674            );
1675
1676            // Line 2: "public" at bytes 8-14 (after "public\r\n")
1677            // If buggy, would be at 7-13
1678            assert!(
1679                has_span_at(8, 14),
1680                "Should have span covering bytes 8-14 (line 2 'public'). \
1681                 If this fails, CRLF offset drift is occurring. Spans: {:?}",
1682                spans.iter().map(|s| &s.range).collect::<Vec<_>>()
1683            );
1684
1685            // Line 3: "public" at bytes 16-22 (after two "public\r\n")
1686            // If buggy, would be at 14-20
1687            assert!(
1688                has_span_at(16, 22),
1689                "Should have span covering bytes 16-22 (line 3 'public'). \
1690                 If this fails, CRLF offset drift is occurring. Spans: {:?}",
1691                spans.iter().map(|s| &s.range).collect::<Vec<_>>()
1692            );
1693        } else {
1694            panic!("Expected TextMate engine for .java file");
1695        }
1696    }
1697
1698    #[test]
1699    fn test_git_rebase_todo_highlighting() {
1700        let registry =
1701            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1702
1703        // git-rebase-todo files should use the Git Rebase Todo grammar
1704        let engine = HighlightEngine::for_file(Path::new("git-rebase-todo"), None, &registry);
1705        assert_eq!(engine.backend_name(), "textmate");
1706        assert!(engine.has_highlighting());
1707    }
1708
1709    #[test]
1710    fn test_git_commit_message_highlighting() {
1711        let registry =
1712            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1713
1714        // COMMIT_EDITMSG should use the Git Commit Message grammar
1715        let engine = HighlightEngine::for_file(Path::new("COMMIT_EDITMSG"), None, &registry);
1716        assert_eq!(engine.backend_name(), "textmate");
1717        assert!(engine.has_highlighting());
1718
1719        // MERGE_MSG should also work
1720        let engine = HighlightEngine::for_file(Path::new("MERGE_MSG"), None, &registry);
1721        assert_eq!(engine.backend_name(), "textmate");
1722        assert!(engine.has_highlighting());
1723    }
1724
1725    #[test]
1726    fn test_gitignore_highlighting() {
1727        let registry =
1728            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1729
1730        // .gitignore should use the Gitignore grammar
1731        let engine = HighlightEngine::for_file(Path::new(".gitignore"), None, &registry);
1732        assert_eq!(engine.backend_name(), "textmate");
1733        assert!(engine.has_highlighting());
1734
1735        // .dockerignore should also work
1736        let engine = HighlightEngine::for_file(Path::new(".dockerignore"), None, &registry);
1737        assert_eq!(engine.backend_name(), "textmate");
1738        assert!(engine.has_highlighting());
1739    }
1740
1741    #[test]
1742    fn test_gitconfig_highlighting() {
1743        let registry =
1744            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1745
1746        // .gitconfig should use the Git Config grammar
1747        let engine = HighlightEngine::for_file(Path::new(".gitconfig"), None, &registry);
1748        assert_eq!(engine.backend_name(), "textmate");
1749        assert!(engine.has_highlighting());
1750
1751        // .gitmodules should also work
1752        let engine = HighlightEngine::for_file(Path::new(".gitmodules"), None, &registry);
1753        assert_eq!(engine.backend_name(), "textmate");
1754        assert!(engine.has_highlighting());
1755    }
1756
1757    #[test]
1758    fn test_gitattributes_highlighting() {
1759        let registry =
1760            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1761
1762        // .gitattributes should use the Git Attributes grammar
1763        let engine = HighlightEngine::for_file(Path::new(".gitattributes"), None, &registry);
1764        assert_eq!(engine.backend_name(), "textmate");
1765        assert!(engine.has_highlighting());
1766    }
1767
1768    #[test]
1769    fn test_comment_delimiter_uses_comment_color() {
1770        // Comment delimiters (#, //, /*) should use comment color, not operator
1771        assert_eq!(
1772            scope_to_category("punctuation.definition.comment"),
1773            Some(HighlightCategory::Comment)
1774        );
1775        assert_eq!(
1776            scope_to_category("punctuation.definition.comment.python"),
1777            Some(HighlightCategory::Comment)
1778        );
1779        assert_eq!(
1780            scope_to_category("punctuation.definition.comment.begin"),
1781            Some(HighlightCategory::Comment)
1782        );
1783    }
1784
1785    #[test]
1786    fn test_string_delimiter_uses_string_color() {
1787        // String delimiters (", ', `) should use string color, not operator
1788        assert_eq!(
1789            scope_to_category("punctuation.definition.string.begin"),
1790            Some(HighlightCategory::String)
1791        );
1792        assert_eq!(
1793            scope_to_category("punctuation.definition.string.end"),
1794            Some(HighlightCategory::String)
1795        );
1796    }
1797
1798    #[test]
1799    fn test_punctuation_bracket() {
1800        // punctuation.section (TextMate standard for block delimiters)
1801        assert_eq!(
1802            scope_to_category("punctuation.section"),
1803            Some(HighlightCategory::PunctuationBracket)
1804        );
1805        assert_eq!(
1806            scope_to_category("punctuation.section.block.begin.c"),
1807            Some(HighlightCategory::PunctuationBracket)
1808        );
1809        assert_eq!(
1810            scope_to_category("punctuation.bracket"),
1811            Some(HighlightCategory::PunctuationBracket)
1812        );
1813        // punctuation.definition.* bracket-like scopes from sublime-syntax grammars
1814        assert_eq!(
1815            scope_to_category("punctuation.definition.array.begin.toml"),
1816            Some(HighlightCategory::PunctuationBracket)
1817        );
1818        assert_eq!(
1819            scope_to_category("punctuation.definition.block.code.typst"),
1820            Some(HighlightCategory::PunctuationBracket)
1821        );
1822        assert_eq!(
1823            scope_to_category("punctuation.definition.group.typst"),
1824            Some(HighlightCategory::PunctuationBracket)
1825        );
1826        assert_eq!(
1827            scope_to_category("punctuation.definition.inline-table.begin.toml"),
1828            Some(HighlightCategory::PunctuationBracket)
1829        );
1830        assert_eq!(
1831            scope_to_category("punctuation.definition.tag.end.svelte"),
1832            Some(HighlightCategory::PunctuationBracket)
1833        );
1834    }
1835
1836    #[test]
1837    fn test_punctuation_delimiter() {
1838        assert_eq!(
1839            scope_to_category("punctuation.separator"),
1840            Some(HighlightCategory::PunctuationDelimiter)
1841        );
1842        assert_eq!(
1843            scope_to_category("punctuation.terminator.statement.c"),
1844            Some(HighlightCategory::PunctuationDelimiter)
1845        );
1846        assert_eq!(
1847            scope_to_category("punctuation.accessor"),
1848            Some(HighlightCategory::PunctuationDelimiter)
1849        );
1850    }
1851
1852    /// First parse of a small file populates a whole-file cache; subsequent
1853    /// scrolls anywhere in the file are exact cache hits with no extra parse
1854    /// work.
1855    #[test]
1856    fn test_small_file_scroll_is_cache_hit() {
1857        let registry =
1858            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1859        let mut engine = HighlightEngine::for_file(Path::new("test.rs"), None, &registry);
1860
1861        let mut content = String::new();
1862        for i in 0..200 {
1863            content.push_str(&format!("fn f_{i}() {{ let x = {i}; }}\n"));
1864        }
1865        let buffer = Buffer::from_str(&content, 0, test_fs());
1866        let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
1867
1868        let HighlightEngine::TextMate(ref mut tm) = engine else {
1869            panic!("expected TextMate engine for .rs");
1870        };
1871
1872        // First call: cold start, full parse.
1873        let _ = tm.highlight_viewport(&buffer, 0, 200, &theme, 10_000);
1874        let stats_after_first = tm.stats().clone();
1875        assert_eq!(
1876            stats_after_first.cache_hits, 0,
1877            "first call cannot hit cache"
1878        );
1879        assert_eq!(
1880            stats_after_first.cache_misses, 1,
1881            "first call must be a miss"
1882        );
1883
1884        // Scroll anywhere — top, middle, end. All must be cache hits.
1885        let mid = buffer.len() / 2;
1886        let near_end = buffer.len().saturating_sub(200);
1887        let probes = [(0, 200), (mid, mid + 200), (near_end, buffer.len())];
1888        for (vs, ve) in probes {
1889            let _ = tm.highlight_viewport(&buffer, vs, ve, &theme, 10_000);
1890        }
1891
1892        let stats_after_scroll = tm.stats().clone();
1893        assert_eq!(
1894            stats_after_scroll.cache_misses,
1895            1,
1896            "scrolling must not add cache misses (got extra: {})",
1897            stats_after_scroll.cache_misses - 1
1898        );
1899        assert_eq!(
1900            stats_after_scroll.cache_hits, 3,
1901            "all three scroll probes must hit the cache"
1902        );
1903        assert_eq!(
1904            stats_after_scroll.bytes_parsed, stats_after_first.bytes_parsed,
1905            "scrolling must not parse any new bytes"
1906        );
1907    }
1908
1909    /// After a small edit, the next render takes the partial-update path
1910    /// (convergence) and continues to serve cache hits afterwards. Crucially:
1911    /// the partial update parses far fewer bytes than the file is long.
1912    #[test]
1913    fn test_small_file_edit_uses_partial_update() {
1914        let registry =
1915            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1916        let mut engine = HighlightEngine::for_file(Path::new("test.rs"), None, &registry);
1917
1918        let mut content = String::new();
1919        for i in 0..200 {
1920            content.push_str(&format!("fn f_{i}() {{ let x = {i}; }}\n"));
1921        }
1922        let buffer = Buffer::from_str(&content, 0, test_fs());
1923        let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
1924
1925        let HighlightEngine::TextMate(ref mut tm) = engine else {
1926            panic!("expected TextMate engine for .rs");
1927        };
1928
1929        // Warm cache.
1930        let _ = tm.highlight_viewport(&buffer, 0, 100, &theme, 10_000);
1931        let bytes_before_edit = tm.stats().bytes_parsed;
1932        let buf_len = buffer.len();
1933        assert!(
1934            buf_len > 4000,
1935            "test needs a buffer larger than the partial-update region"
1936        );
1937
1938        // Simulate an edit deep in the file.
1939        let edit_pos = buf_len / 2;
1940        tm.notify_insert(edit_pos, 1);
1941        // The buffer itself doesn't change here (we test the engine in isolation),
1942        // but notify_insert sets dirty_from and shifts spans, which is what the
1943        // partial-update path consumes.
1944
1945        let _ = tm.highlight_viewport(&buffer, 0, 100, &theme, 10_000);
1946        let bytes_after_edit = tm.stats().bytes_parsed;
1947        let parsed = bytes_after_edit - bytes_before_edit;
1948
1949        assert!(
1950            parsed < buf_len,
1951            "edit must not trigger a whole-file reparse (parsed {parsed}, file {buf_len})"
1952        );
1953    }
1954
1955    /// Convergence budget caps per-pass work even when the parse state never
1956    /// agrees with any existing checkpoint. Without the cap, a non-converging
1957    /// edit would parse the rest of the file on every keystroke.
1958    #[test]
1959    fn test_partial_update_budget_caps_work() {
1960        let registry =
1961            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1962        let mut engine = HighlightEngine::for_file(Path::new("test.rs"), None, &registry);
1963
1964        // Build a buffer comfortably larger than CONVERGENCE_BUDGET.
1965        let mut content = String::new();
1966        while content.len() < (CONVERGENCE_BUDGET * 4) {
1967            content.push_str("fn name() { let mut v = 0; v += 1; }\n");
1968        }
1969        let buffer = Buffer::from_str(&content, 0, test_fs());
1970        let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
1971
1972        let HighlightEngine::TextMate(ref mut tm) = engine else {
1973            panic!("expected TextMate engine for .rs");
1974        };
1975
1976        // Warm cache (whole-file parse).
1977        let _ = tm.highlight_viewport(&buffer, 0, 200, &theme, 10_000);
1978        // Simulate an edit and force every checkpoint to disagree by clearing
1979        // their stored states. The convergence loop will look at each marker,
1980        // find the slot empty, and never converge.
1981        tm.notify_insert(100, 0);
1982        tm.checkpoint_states.clear();
1983
1984        let bytes_before = tm.stats().bytes_parsed;
1985        let _ = tm.highlight_viewport(&buffer, 0, 200, &theme, 10_000);
1986        let parsed = tm.stats().bytes_parsed - bytes_before;
1987
1988        // Budget bounds the work to roughly CONVERGENCE_BUDGET past the dirty
1989        // point (plus the prefix back to the resume checkpoint). Allow a small
1990        // overshoot for the line that crossed the budget threshold.
1991        assert!(
1992            parsed <= CONVERGENCE_BUDGET + 4096,
1993            "partial update parsed {parsed}, expected <= {} \
1994             (budget {CONVERGENCE_BUDGET} + slack)",
1995            CONVERGENCE_BUDGET + 4096
1996        );
1997
1998        // Budget hit must leave dirty_from set for follow-up passes.
1999        assert!(
2000            tm.dirty_from.is_some(),
2001            "budget exit must keep dirty_from set"
2002        );
2003    }
2004
2005    /// Large files (above MAX_PARSE_BYTES) keep the existing windowed
2006    /// behaviour: parse range is bounded by ±context_bytes around the
2007    /// viewport, not the whole file.
2008    ///
2009    /// The viewport is placed past `MAX_PARSE_BYTES` so we exercise the
2010    /// "large file, no nearby checkpoint" branch in `find_parse_resume_point`
2011    /// — the symmetric branch that fires when `parse_end <= MAX_PARSE_BYTES`
2012    /// still parses from byte 0 even on big files (pre-existing behaviour,
2013    /// addressed in a later phase).
2014    #[test]
2015    fn test_large_file_uses_windowed_parse() {
2016        let registry =
2017            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
2018        let mut engine = HighlightEngine::for_file(Path::new("test.rs"), None, &registry);
2019
2020        // Build content well past MAX_PARSE_BYTES so we can put the viewport
2021        // beyond it.
2022        let line = "fn long_name_for_padding() { let v = 1; v + 1; }\n";
2023        let bytes_needed = MAX_PARSE_BYTES * 2;
2024        let lines_needed = bytes_needed / line.len() + 100;
2025        let mut content = String::with_capacity(lines_needed * line.len());
2026        for _ in 0..lines_needed {
2027            content.push_str(line);
2028        }
2029        assert!(content.len() > MAX_PARSE_BYTES * 2);
2030        let buffer = Buffer::from_str(&content, 0, test_fs());
2031        let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
2032
2033        let HighlightEngine::TextMate(ref mut tm) = engine else {
2034            panic!("expected TextMate engine for .rs");
2035        };
2036
2037        // Viewport past MAX_PARSE_BYTES: parse_end > MAX_PARSE_BYTES, so the
2038        // resume-from-byte-0 fallback in find_parse_resume_point doesn't fire.
2039        let context_bytes = 10_000usize;
2040        let viewport_start = MAX_PARSE_BYTES + 200_000;
2041        let viewport_end = viewport_start + 1000;
2042        let _ = tm.highlight_viewport(&buffer, viewport_start, viewport_end, &theme, context_bytes);
2043        let parsed = tm.stats().bytes_parsed;
2044
2045        // Windowed parse covers viewport ± context_bytes plus a tiny prefix
2046        // for the resume anchor. Allow generous slack (4×) but reject
2047        // anything close to whole-file.
2048        let window = (viewport_end - viewport_start) + 2 * context_bytes;
2049        assert!(
2050            parsed <= window * 4,
2051            "large file windowed parse should be ~{window} bytes, got {parsed} \
2052             (file {})",
2053            buffer.len()
2054        );
2055    }
2056
2057    /// Regression for issue #899: a class field initialised with an arrow
2058    /// function that returns a template literal must not bleed string
2059    /// highlighting onto the rest of the class body. The user-reported
2060    /// repro pinned the syntect JavaScript grammar to a string state from
2061    /// the trailing `;` until EOF; the constructor keyword, comments, and
2062    /// the closing `}` were all painted as a string.
2063    #[test]
2064    fn test_javascript_template_literal_does_not_bleed() {
2065        let registry =
2066            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
2067        let mut engine = HighlightEngine::for_file(Path::new("repro.js"), None, &registry);
2068
2069        // Reproduction code from issue #899.
2070        let source = "class ExampleClass {\n\
2071                      \texampleFunction = exampleArg => `${exampleArg}`;\n\
2072                      \n\
2073                      \tconstructor() {\n\
2074                      \t\t// constructor body\n\
2075                      \t}\n\
2076                      \n\
2077                      \t/* multiline comment */\n\
2078                      }\n";
2079        let buffer = Buffer::from_str(source, 0, test_fs());
2080        let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
2081
2082        let _ = engine.highlight_viewport(&buffer, 0, source.len(), &theme, 0);
2083
2084        // The `constructor` keyword sits well after the template literal.
2085        // If string state bleeds, this position is reported as String.
2086        let ctor_pos = source.find("constructor").expect("locate constructor");
2087        let ctor_cat = engine.category_at_position(ctor_pos);
2088        assert_ne!(
2089            ctor_cat,
2090            Some(HighlightCategory::String),
2091            "constructor keyword must not inherit string state from earlier \
2092             template literal (got {:?})",
2093            ctor_cat,
2094        );
2095
2096        // The closing brace of the class — the very last non-whitespace char
2097        // — also lives outside any string in correct JS.
2098        let last_brace = source.rfind('}').expect("locate closing brace");
2099        let brace_cat = engine.category_at_position(last_brace);
2100        assert_ne!(
2101            brace_cat,
2102            Some(HighlightCategory::String),
2103            "closing class brace must not be highlighted as string \
2104             (got {:?})",
2105            brace_cat,
2106        );
2107    }
2108
2109    /// The closing `}` of a `${…}` template substitution and the closing
2110    /// backtick of the surrounding template literal must keep template
2111    /// string colouring — not inherit the `@variable` highlight from the
2112    /// substitution's expression. Tree-sitter-highlight emits one
2113    /// HighlightEnd event per started highlight; if the editor's
2114    /// span-flattening logic doesn't pop the inner `@variable` correctly
2115    /// when the substitution closes, the variable colour bleeds across
2116    /// `}` and the trailing `\`` until the next sibling capture (here,
2117    /// the `;` operator).
2118    #[test]
2119    fn test_javascript_template_substitution_closing_tokens_are_string() {
2120        let registry =
2121            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
2122        let mut engine = HighlightEngine::for_file(Path::new("tmpl.js"), None, &registry);
2123
2124        // Minimal template literal: `${name}` — wrapped in a statement so
2125        // the parser sees a complete program.
2126        let source = "const x = `${name}`;\n";
2127        let buffer = Buffer::from_str(source, 0, test_fs());
2128        let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
2129
2130        let _ = engine.highlight_viewport(&buffer, 0, source.len(), &theme, 0);
2131
2132        // Locate the closing `}` of the substitution and the closing
2133        // backtick of the template literal.
2134        let close_brace = source
2135            .find("}`")
2136            .expect("locate substitution closing brace");
2137        let close_backtick = close_brace + 1;
2138
2139        // Sanity: the inner identifier `name` is correctly tagged as a
2140        // variable (this guards us against an unrelated regression where
2141        // the entire template gets typed wrong).
2142        let name_pos = source.find("name").expect("locate identifier");
2143        let name_cat = engine.category_at_position(name_pos);
2144        assert_eq!(
2145            name_cat,
2146            Some(HighlightCategory::Variable),
2147            "substitution identifier should be Variable (got {:?})",
2148            name_cat,
2149        );
2150
2151        // The closing `}` and `` ` `` live inside the surrounding
2152        // `template_string` node, so tree-sitter assigns them the
2153        // `@string` capture. They must surface as String here — not
2154        // as Variable (the previous symptom of the bleed) and not as
2155        // None (which would make the editor render them with the
2156        // default foreground colour, equally wrong).
2157        let brace_cat = engine.category_at_position(close_brace);
2158        assert_eq!(
2159            brace_cat,
2160            Some(HighlightCategory::String),
2161            "closing }} of ${{…}} must be String (got {:?})",
2162            brace_cat,
2163        );
2164        let backtick_cat = engine.category_at_position(close_backtick);
2165        assert_eq!(
2166            backtick_cat,
2167            Some(HighlightCategory::String),
2168            "closing backtick of template literal must be String \
2169             (got {:?})",
2170            backtick_cat,
2171        );
2172    }
2173}