Skip to main content

fresh/primitives/
highlight_engine.rs

1//! Unified highlighting engine over syntect (TextMate grammars) and
2//! tree-sitter. Syntect is the default; tree-sitter `Language` is still
3//! detected for non-highlighting features (indentation, semantic highlighting).
4//!
5//! # TextMate cache design
6//!
7//! Syntect's parser is a sequential state machine — it must process bytes
8//! in order from a known parse state to track multi-line constructs and
9//! embedded language transitions. To make scrolling cheap, the engine keeps
10//! a span cache, a `(ParseState, ScopeStack)` snapshot at the cache tail,
11//! and periodic checkpoint anchors to support resume-from-anywhere.
12//!
13//! Three render-time paths, gated by what the cache covers:
14//!
15//! - **Cache hit** — cache fully covers the parse range and there's no
16//!   pending edit; filter cached spans for the viewport. Zero parse work.
17//! - **Forward extension** — cache covers the start of the parse range but
18//!   not its end; resume from `tail_state` and parse only the uncovered
19//!   tail bytes. Steady-state scroll path.
20//! - **Partial update** — there's a pending edit; resume from the nearest
21//!   checkpoint before the dirty point and parse forward looking for
22//!   convergence (state matches an existing checkpoint), bounded by a
23//!   per-pass byte budget so pathological edits can't degenerate into
24//!   whole-file reparses.
25//! - **Cold start / fallback** — no cache, or none of the above applies;
26//!   parse the appropriate range from a fresh state or nearest checkpoint.
27//!
28//! For files at or below `MAX_PARSE_BYTES` the parse range is the whole
29//! file, so the cache is whole-file after the first parse and scrolling
30//! becomes filter-only. Larger files use a viewport-centred window of
31//! `±context_bytes` and rely on the forward-extension path to keep
32//! scroll-cost bounded.
33//!
34//! Edits go through `notify_insert` / `notify_delete`, which shift cached
35//! span byte offsets in place, set `dirty_from`, and invalidate `tail_state`
36//! when the edit lies inside the cached range.
37
38use crate::model::buffer::Buffer;
39use crate::model::marker::{MarkerId, MarkerList};
40use crate::primitives::grammar::GrammarRegistry;
41use crate::primitives::highlighter::{
42    highlight_bg, highlight_color, HighlightCategory, HighlightSpan, Highlighter, Language,
43};
44use crate::view::theme::Theme;
45use std::collections::HashMap;
46use std::ops::Range;
47use std::path::Path;
48use std::sync::Arc;
49use syntect::parsing::SyntaxSet;
50
51/// Map TextMate scope to highlight category
52fn scope_to_category(scope: &str) -> Option<HighlightCategory> {
53    let scope_lower = scope.to_lowercase();
54
55    // Comments - highest priority
56    if scope_lower.starts_with("comment") {
57        return Some(HighlightCategory::Comment);
58    }
59
60    // Strings
61    if scope_lower.starts_with("string") {
62        return Some(HighlightCategory::String);
63    }
64
65    // Markdown/markup scopes - handle before generic keyword/punctuation checks
66    // See: https://macromates.com/manual/en/language_grammars (TextMate scope naming)
67    // Headings: markup.heading and entity.name.section (used by syntect's markdown grammar)
68    if scope_lower.starts_with("markup.heading") || scope_lower.starts_with("entity.name.section") {
69        return Some(HighlightCategory::Keyword); // Headers styled like keywords (bold, prominent)
70    }
71    // Bold: markup.bold
72    if scope_lower.starts_with("markup.bold") {
73        return Some(HighlightCategory::Constant); // Bold styled like constants (bright)
74    }
75    // Italic: markup.italic
76    if scope_lower.starts_with("markup.italic") {
77        return Some(HighlightCategory::Variable); // Italic styled like variables
78    }
79    // Inline code and code blocks: markup.raw, markup.inline.raw
80    if scope_lower.starts_with("markup.raw") || scope_lower.starts_with("markup.inline.raw") {
81        return Some(HighlightCategory::String); // Code styled like strings
82    }
83    // Links: markup.underline.link
84    if scope_lower.starts_with("markup.underline.link") {
85        return Some(HighlightCategory::Function); // Links styled like functions (distinct color)
86    }
87    // Generic underline (often links)
88    if scope_lower.starts_with("markup.underline") {
89        return Some(HighlightCategory::Function);
90    }
91    // Block quotes: markup.quote
92    if scope_lower.starts_with("markup.quote") {
93        return Some(HighlightCategory::Comment); // Quotes styled like comments (subdued)
94    }
95    // Lists: markup.list
96    if scope_lower.starts_with("markup.list") {
97        return Some(HighlightCategory::Operator); // List markers styled like operators
98    }
99    // Strikethrough: markup.strikethrough
100    if scope_lower.starts_with("markup.strikethrough") {
101        return Some(HighlightCategory::Comment); // Strikethrough styled subdued
102    }
103
104    // Diff scopes (syntect's bundled `Diff` grammar). These scope the
105    // entire row, not just the leading +/-/@@ marker, so the renderer
106    // can paint a whole-line background by reading the span's bg.
107    //
108    //   markup.inserted.diff      — `+` line
109    //   markup.deleted.diff       — `-` line
110    //   meta.diff.range.unified   — `@@ ... @@` hunk header
111    //   markup.changed.*          — generic "changed" marker (rare)
112    //   meta.diff.header.*        — `diff --git`, `index ...`, file
113    //                               headers; render like Type so they
114    //                               stand out without a bg wash.
115    if scope_lower.starts_with("markup.inserted") {
116        return Some(HighlightCategory::Inserted);
117    }
118    if scope_lower.starts_with("markup.deleted") {
119        return Some(HighlightCategory::Deleted);
120    }
121    if scope_lower.starts_with("markup.changed") || scope_lower.starts_with("meta.diff.range") {
122        return Some(HighlightCategory::Changed);
123    }
124    if scope_lower.starts_with("meta.diff.header") {
125        return Some(HighlightCategory::Type);
126    }
127
128    // Keywords
129    if scope_lower.starts_with("keyword.control")
130        || scope_lower.starts_with("keyword.other")
131        || scope_lower.starts_with("keyword.declaration")
132        || scope_lower.starts_with("keyword")
133    {
134        // keyword.operator should map to Operator, not Keyword
135        if !scope_lower.starts_with("keyword.operator") {
136            return Some(HighlightCategory::Keyword);
137        }
138    }
139
140    // Punctuation that belongs to a parent construct (comment/string delimiters)
141    // These must be checked before the generic punctuation rule below.
142    // TextMate grammars assign e.g. `punctuation.definition.comment` to # // /* etc.
143    if scope_lower.starts_with("punctuation.definition.comment") {
144        return Some(HighlightCategory::Comment);
145    }
146    if scope_lower.starts_with("punctuation.definition.string") {
147        return Some(HighlightCategory::String);
148    }
149
150    // Operators (keyword.operator only)
151    if scope_lower.starts_with("keyword.operator") {
152        return Some(HighlightCategory::Operator);
153    }
154
155    // Punctuation brackets ({, }, (, ), [, ], <, >)
156    // Covers punctuation.section.*, punctuation.bracket.*,
157    // and punctuation.definition.{array,block,brackets,group,inline-table,section,table,tag}
158    if scope_lower.starts_with("punctuation.section")
159        || scope_lower.starts_with("punctuation.bracket")
160        || scope_lower.starts_with("punctuation.definition.array")
161        || scope_lower.starts_with("punctuation.definition.block")
162        || scope_lower.starts_with("punctuation.definition.brackets")
163        || scope_lower.starts_with("punctuation.definition.group")
164        || scope_lower.starts_with("punctuation.definition.inline-table")
165        || scope_lower.starts_with("punctuation.definition.section")
166        || scope_lower.starts_with("punctuation.definition.table")
167        || scope_lower.starts_with("punctuation.definition.tag")
168    {
169        return Some(HighlightCategory::PunctuationBracket);
170    }
171
172    // Punctuation delimiters (;, ,, .)
173    if scope_lower.starts_with("punctuation.separator")
174        || scope_lower.starts_with("punctuation.terminator")
175        || scope_lower.starts_with("punctuation.accessor")
176    {
177        return Some(HighlightCategory::PunctuationDelimiter);
178    }
179
180    // Functions
181    if scope_lower.starts_with("entity.name.function")
182        || scope_lower.starts_with("support.function")
183        || scope_lower.starts_with("meta.function-call")
184        || scope_lower.starts_with("variable.function")
185    {
186        return Some(HighlightCategory::Function);
187    }
188
189    // Types
190    if scope_lower.starts_with("entity.name.type")
191        || scope_lower.starts_with("entity.name.class")
192        || scope_lower.starts_with("entity.name.struct")
193        || scope_lower.starts_with("entity.name.enum")
194        || scope_lower.starts_with("entity.name.interface")
195        || scope_lower.starts_with("entity.name.trait")
196        || scope_lower.starts_with("support.type")
197        || scope_lower.starts_with("support.class")
198    {
199        return Some(HighlightCategory::Type);
200    }
201
202    // Storage keywords (class, def, function, var, let, const, etc.) and modifiers
203    if scope_lower.starts_with("storage.type") || scope_lower.starts_with("storage.modifier") {
204        return Some(HighlightCategory::Keyword);
205    }
206
207    // Constants and numbers
208    if scope_lower.starts_with("constant.numeric")
209        || scope_lower.starts_with("constant.language.boolean")
210    {
211        return Some(HighlightCategory::Number);
212    }
213    if scope_lower.starts_with("constant") {
214        return Some(HighlightCategory::Constant);
215    }
216
217    // Variables
218    if scope_lower.starts_with("variable.language") {
219        return Some(HighlightCategory::VariableBuiltin);
220    }
221    if scope_lower.starts_with("variable.parameter") || scope_lower.starts_with("variable.other") {
222        return Some(HighlightCategory::Variable);
223    }
224
225    // Properties / object keys
226    if scope_lower.starts_with("entity.name.tag")
227        || scope_lower.starts_with("support.other.property")
228        || scope_lower.starts_with("meta.object-literal.key")
229        || scope_lower.starts_with("variable.other.property")
230        || scope_lower.starts_with("variable.other.object.property")
231    {
232        return Some(HighlightCategory::Property);
233    }
234
235    // Attributes (decorators, annotations)
236    if scope_lower.starts_with("entity.other.attribute")
237        || scope_lower.starts_with("meta.attribute")
238        || scope_lower.starts_with("entity.name.decorator")
239    {
240        return Some(HighlightCategory::Attribute);
241    }
242
243    // Generic variable fallback
244    if scope_lower.starts_with("variable") {
245        return Some(HighlightCategory::Variable);
246    }
247
248    None
249}
250
251/// Unified highlighting engine supporting multiple backends
252#[derive(Default)]
253pub enum HighlightEngine {
254    /// Tree-sitter based highlighting (built-in languages)
255    TreeSitter(Box<Highlighter>),
256    /// TextMate grammar based highlighting
257    TextMate(Box<TextMateEngine>),
258    /// No highlighting available
259    #[default]
260    None,
261}
262
263/// TextMate highlighting engine. See module docs for the cache design.
264pub struct TextMateEngine {
265    syntax_set: Arc<SyntaxSet>,
266    syntax_index: usize,
267    checkpoint_markers: MarkerList,
268    checkpoint_states:
269        HashMap<MarkerId, (syntect::parsing::ParseState, syntect::parsing::ScopeStack)>,
270    dirty_from: Option<usize>,
271    cache: Option<TextMateCache>,
272    last_buffer_len: usize,
273    ts_language: Option<Language>,
274    stats: HighlightStats,
275    // Scope→Category memo. Syntect Scope atoms are append-only-interned
276    // globally, so entries never need invalidation.
277    scope_category_cache: HashMap<syntect::parsing::Scope, Option<HighlightCategory>>,
278}
279
280/// Counters for monitoring highlighting performance in tests.
281#[derive(Debug, Default, Clone)]
282pub struct HighlightStats {
283    /// Number of bytes parsed by syntect (total across all highlight_viewport calls).
284    pub bytes_parsed: usize,
285    /// Number of highlight_viewport calls that hit the span cache.
286    pub cache_hits: usize,
287    /// Number of highlight_viewport calls that missed the cache and re-parsed.
288    pub cache_misses: usize,
289    /// Number of checkpoint states updated during convergence.
290    pub checkpoints_updated: usize,
291    /// Number of times convergence was detected (state matched existing checkpoint).
292    pub convergences: usize,
293}
294
295#[derive(Debug, Clone)]
296struct TextMateCache {
297    range: Range<usize>,
298    spans: Vec<CachedSpan>,
299    // Parse state at `range.end`; powers forward extension. None when the
300    // last mutation didn't end at `range.end`.
301    tail_state: Option<(syntect::parsing::ParseState, syntect::parsing::ScopeStack)>,
302}
303
304#[derive(Debug, Clone)]
305struct CachedSpan {
306    range: Range<usize>,
307    category: crate::primitives::highlighter::HighlightCategory,
308}
309
310/// Small/large file threshold (whole-file cache vs viewport window).
311const MAX_PARSE_BYTES: usize = 1024 * 1024;
312
313/// Distance between checkpoint anchors. Smaller = faster convergence on edit.
314const CHECKPOINT_INTERVAL: usize = 256;
315
316/// Per-pass cap on partial-update parsing past `dirty_pos`. Bounds work for
317/// pathological edits whose effect doesn't converge.
318const CONVERGENCE_BUDGET: usize = 64 * 1024;
319
320/// Byte position one past the end of the line that starts at `pos`.
321/// Accepts `\n` and `\r\n` terminators; returns `content_bytes.len()`
322/// when the buffer ends without a terminator (the streaming tail).
323fn find_line_end(content_bytes: &[u8], pos: usize) -> usize {
324    let mut line_end = pos;
325    while line_end < content_bytes.len() {
326        if content_bytes[line_end] == b'\n' {
327            line_end += 1;
328            break;
329        } else if content_bytes[line_end] == b'\r' {
330            if line_end + 1 < content_bytes.len() && content_bytes[line_end + 1] == b'\n' {
331                line_end += 2;
332            } else {
333                line_end += 1;
334            }
335            break;
336        }
337        line_end += 1;
338    }
339    line_end
340}
341
342/// UTF-8-decoded line ready to feed `state.parse_line`.
343struct PreparedLine {
344    /// What `parse_line` sees: line content always terminated by `\n`,
345    /// EXCEPT for the buffer's final partial line where no `\n` has
346    /// arrived yet (caller must not commit cache state past such a
347    /// line — see `extend_cache_forward`).
348    line_for_syntect: String,
349    /// Byte length of the line excluding `\r` / `\n` terminator.
350    line_content_len: usize,
351    /// Whether the original line ended with `\n` (true for every line
352    /// except the streaming tail).
353    ends_with_newline: bool,
354}
355
356/// Slice the line starting at `pos` from `content_bytes` and prepare
357/// it for `parse_line`. Returns `(line_end, line_byte_len, prepared)`:
358/// callers always advance by `line_byte_len` to `line_end`; `prepared`
359/// is `None` only when the line wasn't valid UTF-8 (skip & continue).
360fn prepare_line_at(content_bytes: &[u8], pos: usize) -> (usize, usize, Option<PreparedLine>) {
361    let line_end = find_line_end(content_bytes, pos);
362    let line_bytes = &content_bytes[pos..line_end];
363    let line_byte_len = line_bytes.len();
364    let prepared = std::str::from_utf8(line_bytes).ok().map(|line_str| {
365        let line_content = line_str.trim_end_matches(&['\r', '\n'][..]);
366        let ends_with_newline = line_str.ends_with('\n');
367        let is_streaming_tail = line_end == content_bytes.len() && !ends_with_newline;
368        let line_for_syntect = if is_streaming_tail {
369            line_content.to_string()
370        } else {
371            format!("{}\n", line_content)
372        };
373        PreparedLine {
374            line_for_syntect,
375            line_content_len: line_content.len(),
376            ends_with_newline,
377        }
378    });
379    (line_end, line_byte_len, prepared)
380}
381
382impl TextMateEngine {
383    /// Create a new TextMate engine for the given syntax
384    pub fn new(syntax_set: Arc<SyntaxSet>, syntax_index: usize) -> Self {
385        Self {
386            syntax_set,
387            syntax_index,
388            checkpoint_markers: MarkerList::new(),
389            checkpoint_states: HashMap::new(),
390            dirty_from: None,
391            cache: None,
392            last_buffer_len: 0,
393            ts_language: None,
394            stats: HighlightStats::default(),
395            scope_category_cache: HashMap::new(),
396        }
397    }
398
399    /// Create a new TextMate engine with a tree-sitter language for non-highlighting features
400    pub fn with_language(
401        syntax_set: Arc<SyntaxSet>,
402        syntax_index: usize,
403        ts_language: Option<Language>,
404    ) -> Self {
405        Self {
406            syntax_set,
407            syntax_index,
408            checkpoint_markers: MarkerList::new(),
409            checkpoint_states: HashMap::new(),
410            dirty_from: None,
411            cache: None,
412            last_buffer_len: 0,
413            ts_language,
414            stats: HighlightStats::default(),
415            scope_category_cache: HashMap::new(),
416        }
417    }
418
419    /// Get performance stats for testing and diagnostics.
420    pub fn stats(&self) -> &HighlightStats {
421        &self.stats
422    }
423
424    /// Reset performance counters.
425    pub fn reset_stats(&mut self) {
426        self.stats = HighlightStats::default();
427    }
428
429    /// Get the tree-sitter language (for indentation, semantic highlighting, etc.)
430    pub fn language(&self) -> Option<&Language> {
431        self.ts_language.as_ref()
432    }
433
434    /// Buffer-insert notification. Shifts span offsets in place and marks
435    /// the cache dirty so the partial-update path runs on next render.
436    pub fn notify_insert(&mut self, position: usize, length: usize) {
437        self.checkpoint_markers.adjust_for_insert(position, length);
438        self.dirty_from = Some(self.dirty_from.map_or(position, |d| d.min(position)));
439        if let Some(cache) = &mut self.cache {
440            for span in &mut cache.spans {
441                if span.range.start >= position {
442                    span.range.start += length;
443                    span.range.end += length;
444                } else if span.range.end > position {
445                    span.range.end += length;
446                }
447            }
448            if cache.range.end >= position {
449                cache.range.end += length;
450                if position < cache.range.end {
451                    cache.tail_state = None;
452                }
453            }
454        }
455    }
456
457    /// Buffer-delete notification. Mirror of `notify_insert`.
458    pub fn notify_delete(&mut self, position: usize, length: usize) {
459        self.checkpoint_markers.adjust_for_delete(position, length);
460        self.dirty_from = Some(self.dirty_from.map_or(position, |d| d.min(position)));
461        if let Some(cache) = &mut self.cache {
462            let delete_end = position + length;
463            cache.spans.retain_mut(|span| {
464                if span.range.start >= delete_end {
465                    span.range.start -= length;
466                    span.range.end -= length;
467                    true
468                } else if span.range.end <= position {
469                    true
470                } else if span.range.start >= position && span.range.end <= delete_end {
471                    false
472                } else {
473                    if span.range.start < position {
474                        span.range.end = position.min(span.range.end);
475                    } else {
476                        span.range.start = position;
477                        span.range.end = position + span.range.end.saturating_sub(delete_end);
478                    }
479                    span.range.start < span.range.end
480                }
481            });
482            if cache.range.end > delete_end {
483                cache.range.end -= length;
484            } else if cache.range.end > position {
485                cache.range.end = position;
486            }
487            if position < cache.range.end {
488                cache.tail_state = None;
489            }
490        }
491    }
492
493    /// Create a checkpoint at `current_offset` carrying the supplied
494    /// parse state, unless one already exists within half an interval
495    /// (which would shadow it). Callers gate on
496    /// `bytes_since_checkpoint >= CHECKPOINT_INTERVAL` to control
497    /// spacing.
498    fn maybe_create_checkpoint(
499        &mut self,
500        current_offset: usize,
501        state: &syntect::parsing::ParseState,
502        current_scopes: &syntect::parsing::ScopeStack,
503    ) {
504        let nearby = self.checkpoint_markers.query_range(
505            current_offset.saturating_sub(CHECKPOINT_INTERVAL / 2),
506            current_offset + CHECKPOINT_INTERVAL / 2,
507        );
508        if nearby.is_empty() {
509            let marker_id = self.checkpoint_markers.create(current_offset, true);
510            self.checkpoint_states
511                .insert(marker_id, (state.clone(), current_scopes.clone()));
512        }
513    }
514
515    /// Drive `state.parse_line(prepared.line_for_syntect)` and emit one
516    /// span per category-carrying byte range via `on_span(start, end,
517    /// category)`. Returns `false` when `parse_line` errored — caller
518    /// should advance past the line and continue (state may have been
519    /// mutated mid-parse but is left as-is, matching prior behaviour).
520    ///
521    /// Span emission is in two passes: the op iterator emits the
522    /// segment between consecutive ops with the scope-stack-active
523    /// category, and a trailing segment covers `[syntect_offset,
524    /// line_content_len)` when the final op didn't reach end-of-line.
525    fn parse_line_into_spans(
526        &mut self,
527        state: &mut syntect::parsing::ParseState,
528        current_scopes: &mut syntect::parsing::ScopeStack,
529        prepared: &PreparedLine,
530        current_offset: usize,
531        mut on_span: impl FnMut(usize, usize, HighlightCategory),
532    ) -> bool {
533        let ops = match state.parse_line(&prepared.line_for_syntect, &self.syntax_set) {
534            Ok(ops) => ops,
535            Err(_) => return false,
536        };
537
538        let line_content_len = prepared.line_content_len;
539        let mut syntect_offset = 0;
540
541        for (op_offset, op) in ops {
542            let clamped_op_offset = op_offset.min(line_content_len);
543            if clamped_op_offset > syntect_offset {
544                if let Some(category) = self.scope_stack_to_category(current_scopes) {
545                    on_span(
546                        current_offset + syntect_offset,
547                        current_offset + clamped_op_offset,
548                        category,
549                    );
550                }
551            }
552            syntect_offset = clamped_op_offset;
553            #[allow(clippy::let_underscore_must_use)]
554            let _ = current_scopes.apply(&op);
555        }
556
557        if syntect_offset < line_content_len {
558            if let Some(category) = self.scope_stack_to_category(current_scopes) {
559                on_span(
560                    current_offset + syntect_offset,
561                    current_offset + line_content_len,
562                    category,
563                );
564            }
565        }
566        true
567    }
568
569    /// Highlight the visible viewport. Path selection is documented in the
570    /// module-level docs ("TextMate cache design").
571    /// Test-only: inspect the cache commit point (range.end) and
572    /// whether tail_state is populated. The cache must commit at a
573    /// newline boundary — anything past that risks streaming
574    /// forward-extension picking up where partial-line state poisoned
575    /// the parser, see `test_partial_trailing_line_not_committed_to_cache`.
576    #[cfg(test)]
577    pub fn cache_commit_for_test(&self) -> (usize, bool) {
578        match &self.cache {
579            Some(c) => (c.range.end, c.tail_state.is_some()),
580            None => (0, false),
581        }
582    }
583
584    pub fn highlight_viewport(
585        &mut self,
586        buffer: &Buffer,
587        viewport_start: usize,
588        viewport_end: usize,
589        theme: &Theme,
590        context_bytes: usize,
591    ) -> Vec<HighlightSpan> {
592        let buf_len = buffer.len();
593        let (desired_parse_start, parse_end) = if buf_len <= MAX_PARSE_BYTES {
594            (0, buf_len)
595        } else {
596            let s = viewport_start.saturating_sub(context_bytes);
597            let e = (viewport_end + context_bytes).min(buf_len);
598            (s, e)
599        };
600
601        let dirty = self.dirty_from.take();
602        let cache_covers_viewport = self.cache.as_ref().is_some_and(|c| {
603            c.range.start <= desired_parse_start && c.range.end >= desired_parse_start
604        });
605        let exact_cache_hit = cache_covers_viewport
606            && dirty.is_none()
607            && self.last_buffer_len == buffer.len()
608            && self
609                .cache
610                .as_ref()
611                .is_some_and(|c| c.range.end >= parse_end);
612
613        // Cache hit.
614        if exact_cache_hit {
615            self.stats.cache_hits += 1;
616            return self.filter_cached_spans(viewport_start, viewport_end, theme);
617        }
618
619        // Forward extension.
620        if dirty.is_none()
621            && cache_covers_viewport
622            && self.last_buffer_len == buffer.len()
623            && self
624                .cache
625                .as_ref()
626                .is_some_and(|c| c.range.end < parse_end && c.tail_state.is_some())
627        {
628            return self.extend_cache_forward(
629                buffer,
630                parse_end,
631                viewport_start,
632                viewport_end,
633                theme,
634            );
635        }
636
637        // Partial update.
638        if cache_covers_viewport && dirty.is_some() {
639            if let Some(dirty_pos) = dirty {
640                if dirty_pos < parse_end {
641                    if let Some(result) = self.try_partial_update(
642                        buffer,
643                        dirty_pos,
644                        desired_parse_start,
645                        parse_end,
646                        viewport_start,
647                        viewport_end,
648                        theme,
649                    ) {
650                        return result;
651                    }
652                } else {
653                    // Dirty region past viewport: cached spans are still valid.
654                    self.dirty_from = Some(dirty_pos);
655                    self.stats.cache_hits += 1;
656                    return self.filter_cached_spans(viewport_start, viewport_end, theme);
657                }
658            }
659        } else if let Some(d) = dirty {
660            self.dirty_from = Some(d);
661        }
662
663        // Cold start / fallback.
664        self.full_parse(
665            buffer,
666            desired_parse_start,
667            parse_end,
668            viewport_start,
669            viewport_end,
670            theme,
671            context_bytes,
672        )
673    }
674
675    /// Filter cached spans for the viewport and resolve colors.
676    fn filter_cached_spans(
677        &self,
678        viewport_start: usize,
679        viewport_end: usize,
680        theme: &Theme,
681    ) -> Vec<HighlightSpan> {
682        let cache = self.cache.as_ref().unwrap();
683        cache
684            .spans
685            .iter()
686            .filter(|span| span.range.start < viewport_end && span.range.end > viewport_start)
687            .map(|span| HighlightSpan {
688                range: span.range.clone(),
689                color: highlight_color(span.category, theme),
690                bg: highlight_bg(span.category, theme),
691                category: Some(span.category),
692            })
693            .collect()
694    }
695
696    /// Partial update path. Returns `Some` whenever an anchor was available,
697    /// even on budget hit or EOF (see post-loop classification). `None` only
698    /// when no checkpoint anchor reaches the dirty point.
699    #[allow(clippy::too_many_arguments)]
700    fn try_partial_update(
701        &mut self,
702        buffer: &Buffer,
703        dirty_pos: usize,
704        desired_parse_start: usize,
705        parse_end: usize,
706        viewport_start: usize,
707        viewport_end: usize,
708        theme: &Theme,
709    ) -> Option<Vec<HighlightSpan>> {
710        let syntax = &self.syntax_set.syntaxes()[self.syntax_index];
711
712        // Find checkpoint before the dirty point (bounded search)
713        let (actual_start, mut state, mut current_scopes) = {
714            let search_start = dirty_pos.saturating_sub(MAX_PARSE_BYTES);
715            let markers = self.checkpoint_markers.query_range(search_start, dirty_pos);
716            let nearest = markers.into_iter().max_by_key(|(_, start, _)| *start);
717            if let Some((id, cp_pos, _)) = nearest {
718                if let Some((s, sc)) = self.checkpoint_states.get(&id) {
719                    (cp_pos, s.clone(), sc.clone())
720                } else {
721                    return None; // orphan, fall back
722                }
723            } else if parse_end <= MAX_PARSE_BYTES {
724                (
725                    0,
726                    syntect::parsing::ParseState::new(syntax),
727                    syntect::parsing::ScopeStack::new(),
728                )
729            } else {
730                return None; // large file, no nearby checkpoint, fall back
731            }
732        };
733
734        // Get markers from dirty point forward for convergence checking
735        let mut markers_ahead: Vec<(MarkerId, usize)> = self
736            .checkpoint_markers
737            .query_range(dirty_pos, parse_end)
738            .into_iter()
739            .map(|(id, start, _)| (id, start))
740            .collect();
741        markers_ahead.sort_by_key(|(_, pos)| *pos);
742        let mut marker_idx = 0;
743
744        // Parse from actual_start to parse_end, looking for convergence
745        let content_end = parse_end.min(buffer.len());
746        if actual_start >= content_end {
747            return None;
748        }
749        let content = buffer.slice_bytes(actual_start..content_end);
750        let content_str = match std::str::from_utf8(&content) {
751            Ok(s) => s,
752            Err(_) => return None,
753        };
754
755        let mut new_spans = Vec::new();
756        let content_bytes = content_str.as_bytes();
757        let mut pos = 0;
758        let mut current_offset = actual_start;
759        let mut converged_at: Option<usize> = None;
760        let mut budget_hit_at: Option<usize> = None;
761        let mut bytes_since_checkpoint: usize = 0;
762
763        while pos < content_bytes.len() {
764            // Create checkpoints in new territory
765            if bytes_since_checkpoint >= CHECKPOINT_INTERVAL {
766                self.maybe_create_checkpoint(current_offset, &state, &current_scopes);
767                bytes_since_checkpoint = 0;
768            }
769
770            let (line_end, line_byte_len, prepared) = prepare_line_at(content_bytes, pos);
771            // Collect spans for the dirty region
772            let collect_spans =
773                current_offset + line_byte_len > desired_parse_start.max(actual_start);
774            if let Some(prepared) = prepared {
775                let _ = self.parse_line_into_spans(
776                    &mut state,
777                    &mut current_scopes,
778                    &prepared,
779                    current_offset,
780                    |byte_start, byte_end, category| {
781                        if !collect_spans {
782                            return;
783                        }
784                        let clamped_start = byte_start.max(actual_start);
785                        if clamped_start < byte_end {
786                            new_spans.push(CachedSpan {
787                                range: clamped_start..byte_end,
788                                category,
789                            });
790                        }
791                    },
792                );
793            }
794
795            pos = line_end;
796            current_offset += line_byte_len;
797            bytes_since_checkpoint += line_byte_len;
798
799            // Check convergence at checkpoint markers
800            while marker_idx < markers_ahead.len() && markers_ahead[marker_idx].1 <= current_offset
801            {
802                let (marker_id, _) = markers_ahead[marker_idx];
803                marker_idx += 1;
804                if let Some(stored) = self.checkpoint_states.get(&marker_id) {
805                    if *stored == (state.clone(), current_scopes.clone()) {
806                        self.stats.convergences += 1;
807                        converged_at = Some(current_offset);
808                        break;
809                    }
810                }
811                self.stats.checkpoints_updated += 1;
812                self.checkpoint_states
813                    .insert(marker_id, (state.clone(), current_scopes.clone()));
814            }
815
816            if converged_at.is_some() {
817                break;
818            }
819
820            // Bound work per pass: pathological edits (e.g. unclosed `/*`
821            // re-scoping the rest of the file) can never converge. Stop here
822            // and resume from `current_offset` on the next render.
823            if current_offset.saturating_sub(dirty_pos) >= CONVERGENCE_BUDGET {
824                budget_hit_at = Some(current_offset);
825                break;
826            }
827        }
828
829        self.stats.bytes_parsed += current_offset.saturating_sub(actual_start);
830
831        // Splice classification: converged → clear dirty; budget hit → keep
832        // dirty for next pass; EOF → clear dirty.
833        let (splice_end, dirty_after) = if let Some(c) = converged_at {
834            (c, None)
835        } else if let Some(b) = budget_hit_at {
836            (b, Some(b))
837        } else {
838            (current_offset, None)
839        };
840
841        self.stats.cache_misses += 1; // partial update counts as a miss
842
843        Self::merge_adjacent_spans(&mut new_spans);
844
845        if let Some(cache) = &mut self.cache {
846            let splice_start = actual_start;
847            cache
848                .spans
849                .retain(|span| span.range.end <= splice_start || span.range.start >= splice_end);
850            cache.spans.extend(new_spans);
851            cache.spans.sort_by_key(|s| s.range.start);
852            Self::merge_adjacent_spans(&mut cache.spans);
853            if splice_end > cache.range.end {
854                cache.range.end = splice_end;
855            }
856            cache.tail_state = None;
857        }
858
859        self.last_buffer_len = buffer.len();
860        self.dirty_from = dirty_after;
861
862        Some(self.filter_cached_spans(viewport_start, viewport_end, theme))
863    }
864
865    /// Forward extension path (see module docs). Caller checks the cache
866    /// exists, has a `tail_state`, has no dirty edits, and `cache.range.end
867    /// < parse_end`.
868    fn extend_cache_forward(
869        &mut self,
870        buffer: &Buffer,
871        parse_end: usize,
872        viewport_start: usize,
873        viewport_end: usize,
874        theme: &Theme,
875    ) -> Vec<HighlightSpan> {
876        self.stats.cache_misses += 1;
877        let buf_len = buffer.len();
878        let parse_end = parse_end.min(buf_len);
879
880        let (extension_start, mut state, mut current_scopes) = {
881            let cache = self
882                .cache
883                .as_ref()
884                .expect("extend_cache_forward: cache must exist");
885            let (s, sc) = cache
886                .tail_state
887                .as_ref()
888                .expect("extend_cache_forward: tail_state must exist")
889                .clone();
890            (cache.range.end, s, sc)
891        };
892
893        if parse_end <= extension_start {
894            return self.filter_cached_spans(viewport_start, viewport_end, theme);
895        }
896
897        let content = buffer.slice_bytes(extension_start..parse_end);
898        let content_str = match std::str::from_utf8(&content) {
899            Ok(s) => s,
900            Err(_) => return self.filter_cached_spans(viewport_start, viewport_end, theme),
901        };
902
903        let mut new_spans = Vec::new();
904        let content_bytes = content_str.as_bytes();
905        let mut pos = 0;
906        let mut current_offset = extension_start;
907        let mut bytes_since_checkpoint: usize = 0;
908        // Snapshot of the last newline-aligned cache commit point. We never
909        // commit parse state for a partial trailing line: with a streaming
910        // grammar like syntect's `Diff` (line-anchored `^\+.*` etc.) the
911        // state at end-of-input has already popped `markup.inserted`, so
912        // resuming from there parses the rest of the same line in
913        // `source.diff` with no scope — bytes of the line streamed in
914        // later get default editor bg, producing the dark-bar artifact
915        // inside `+` lines. Re-parsing the trailing partial line on every
916        // refresh costs at most one extra `parse_line` and is correct.
917        let mut safe_offset = extension_start;
918        let mut safe_state = state.clone();
919        let mut safe_scopes = current_scopes.clone();
920
921        while pos < content_bytes.len() {
922            if bytes_since_checkpoint >= CHECKPOINT_INTERVAL {
923                self.maybe_create_checkpoint(current_offset, &state, &current_scopes);
924                bytes_since_checkpoint = 0;
925            }
926
927            let (line_end, line_byte_len, prepared) = prepare_line_at(content_bytes, pos);
928            let mut newline_terminated = false;
929            if let Some(prepared) = prepared {
930                let parse_ok = self.parse_line_into_spans(
931                    &mut state,
932                    &mut current_scopes,
933                    &prepared,
934                    current_offset,
935                    |byte_start, byte_end, category| {
936                        new_spans.push(CachedSpan {
937                            range: byte_start..byte_end,
938                            category,
939                        });
940                    },
941                );
942                if parse_ok {
943                    newline_terminated = prepared.ends_with_newline;
944                }
945            }
946
947            pos = line_end;
948            current_offset += line_byte_len;
949            bytes_since_checkpoint += line_byte_len;
950
951            if newline_terminated {
952                safe_offset = current_offset;
953                safe_state = state.clone();
954                safe_scopes = current_scopes.clone();
955            }
956        }
957
958        self.stats.bytes_parsed += parse_end - extension_start;
959
960        Self::merge_adjacent_spans(&mut new_spans);
961
962        // Split spans into safe (fully before the trailing partial line,
963        // cacheable) and unsafe (overlap the partial line, render-only).
964        // Unsafe spans are returned in this pass so the partial line is
965        // still highlighted, but won't be cached — they'll be recomputed
966        // on the next refresh once more bytes (and a newline) stream in.
967        let (safe_spans, unsafe_spans): (Vec<_>, Vec<_>) = new_spans
968            .into_iter()
969            .partition(|s| s.range.end <= safe_offset);
970
971        let cache = self
972            .cache
973            .as_mut()
974            .expect("extend_cache_forward: cache must still exist");
975        cache.spans.extend(safe_spans);
976        Self::merge_adjacent_spans(&mut cache.spans);
977        cache.range.end = safe_offset;
978        cache.tail_state = Some((safe_state, safe_scopes));
979        self.last_buffer_len = buf_len;
980
981        let mut result = self.filter_cached_spans(viewport_start, viewport_end, theme);
982        result.extend(
983            unsafe_spans
984                .into_iter()
985                .filter(|s| s.range.start < viewport_end && s.range.end > viewport_start)
986                .map(|s| HighlightSpan {
987                    range: s.range,
988                    color: highlight_color(s.category, theme),
989                    bg: highlight_bg(s.category, theme),
990                    category: Some(s.category),
991                }),
992        );
993        result
994    }
995
996    /// Full re-parse from desired_parse_start to parse_end. Used on cold start
997    /// or when partial update fails (no convergence).
998    #[allow(clippy::too_many_arguments)]
999    fn full_parse(
1000        &mut self,
1001        buffer: &Buffer,
1002        desired_parse_start: usize,
1003        parse_end: usize,
1004        viewport_start: usize,
1005        viewport_end: usize,
1006        theme: &Theme,
1007        _context_bytes: usize,
1008    ) -> Vec<HighlightSpan> {
1009        self.stats.cache_misses += 1;
1010        self.dirty_from = None; // consumed
1011
1012        if parse_end <= desired_parse_start {
1013            return Vec::new();
1014        }
1015
1016        let syntax = &self.syntax_set.syntaxes()[self.syntax_index];
1017        let (actual_start, mut state, mut current_scopes, create_checkpoints) =
1018            self.find_parse_resume_point(desired_parse_start, parse_end, syntax);
1019
1020        let content = buffer.slice_bytes(actual_start..parse_end);
1021        let content_str = match std::str::from_utf8(&content) {
1022            Ok(s) => s,
1023            Err(_) => return Vec::new(),
1024        };
1025
1026        let mut spans = Vec::new();
1027        let content_bytes = content_str.as_bytes();
1028        let mut pos = 0;
1029        let mut current_offset = actual_start;
1030        let mut bytes_since_checkpoint: usize = 0;
1031        // See `extend_cache_forward` for rationale: never commit cache
1032        // state past the last newline. `safe_offset` ends up == parse_end
1033        // for buffers that end on `\n` (no behaviour change), and at the
1034        // start of the trailing partial line otherwise so the next
1035        // refresh re-parses it from scratch.
1036        let mut safe_offset = actual_start;
1037        let mut safe_state = state.clone();
1038        let mut safe_scopes = current_scopes.clone();
1039
1040        while pos < content_bytes.len() {
1041            if create_checkpoints && bytes_since_checkpoint >= CHECKPOINT_INTERVAL {
1042                self.maybe_create_checkpoint(current_offset, &state, &current_scopes);
1043                bytes_since_checkpoint = 0;
1044            }
1045
1046            let (line_end, line_byte_len, prepared) = prepare_line_at(content_bytes, pos);
1047            // Skip span collection for lines that ended before the viewport's
1048            // desired_parse_start — we still need to drive `parse_line` for
1049            // state continuity, but their spans wouldn't be returned anyway.
1050            let collect_spans = current_offset + line_byte_len > desired_parse_start;
1051            let mut newline_terminated = false;
1052            if let Some(prepared) = prepared {
1053                let parse_ok = self.parse_line_into_spans(
1054                    &mut state,
1055                    &mut current_scopes,
1056                    &prepared,
1057                    current_offset,
1058                    |byte_start, byte_end, category| {
1059                        if !collect_spans {
1060                            return;
1061                        }
1062                        let clamped_start = byte_start.max(desired_parse_start);
1063                        if clamped_start < byte_end {
1064                            spans.push(CachedSpan {
1065                                range: clamped_start..byte_end,
1066                                category,
1067                            });
1068                        }
1069                    },
1070                );
1071                if parse_ok {
1072                    newline_terminated = prepared.ends_with_newline;
1073                }
1074            }
1075
1076            pos = line_end;
1077            current_offset += line_byte_len;
1078            bytes_since_checkpoint += line_byte_len;
1079
1080            if newline_terminated {
1081                safe_offset = current_offset;
1082                safe_state = state.clone();
1083                safe_scopes = current_scopes.clone();
1084            }
1085
1086            // Update checkpoint states as we pass them. Done after the
1087            // line is parsed (state now reflects end-of-line) so a
1088            // checkpoint placed at the line's start position carries
1089            // the state at that position, ready to feed the next line.
1090            let markers_here: Vec<(MarkerId, usize)> = self
1091                .checkpoint_markers
1092                .query_range(current_offset.saturating_sub(line_byte_len), current_offset)
1093                .into_iter()
1094                .map(|(id, start, _)| (id, start))
1095                .collect();
1096            for (marker_id, _) in markers_here {
1097                self.checkpoint_states
1098                    .insert(marker_id, (state.clone(), current_scopes.clone()));
1099            }
1100        }
1101
1102        self.stats.bytes_parsed += parse_end.saturating_sub(actual_start);
1103
1104        Self::merge_adjacent_spans(&mut spans);
1105
1106        // Cache only the prefix up to the last newline. Spans straddling
1107        // or past the trailing partial line are returned for THIS render
1108        // pass (so the partial line is highlighted now), but excluded
1109        // from the cache — the next refresh re-parses them from
1110        // `safe_state` once more bytes have streamed in.
1111        let cache_range_end = safe_offset.max(desired_parse_start);
1112        let cached_spans: Vec<CachedSpan> = spans
1113            .iter()
1114            .filter(|s| s.range.end <= cache_range_end)
1115            .cloned()
1116            .collect();
1117
1118        self.cache = Some(TextMateCache {
1119            range: desired_parse_start..cache_range_end,
1120            spans: cached_spans,
1121            tail_state: Some((safe_state, safe_scopes)),
1122        });
1123        self.last_buffer_len = buffer.len();
1124
1125        spans
1126            .into_iter()
1127            .filter(|span| span.range.start < viewport_end && span.range.end > viewport_start)
1128            .map(|span| {
1129                let cat = span.category;
1130                HighlightSpan {
1131                    range: span.range,
1132                    color: highlight_color(cat, theme),
1133                    bg: highlight_bg(cat, theme),
1134                    category: Some(cat),
1135                }
1136            })
1137            .collect()
1138    }
1139
1140    /// Find the best point to resume parsing from for the viewport.
1141    fn find_parse_resume_point(
1142        &self,
1143        desired_start: usize,
1144        parse_end: usize,
1145        syntax: &syntect::parsing::SyntaxReference,
1146    ) -> (
1147        usize,
1148        syntect::parsing::ParseState,
1149        syntect::parsing::ScopeStack,
1150        bool,
1151    ) {
1152        use syntect::parsing::{ParseState, ScopeStack};
1153
1154        // Look for a checkpoint near the desired start. For large files, only
1155        // consider checkpoints that are within MAX_PARSE_BYTES of desired_start
1156        // to avoid parsing hundreds of MB from a distant checkpoint.
1157        let search_start = desired_start.saturating_sub(MAX_PARSE_BYTES);
1158        let markers = self
1159            .checkpoint_markers
1160            .query_range(search_start, desired_start + 1);
1161        let nearest = markers.into_iter().max_by_key(|(_, start, _)| *start);
1162
1163        if let Some((id, cp_pos, _)) = nearest {
1164            if let Some((s, sc)) = self.checkpoint_states.get(&id) {
1165                return (cp_pos, s.clone(), sc.clone(), true);
1166            }
1167        }
1168
1169        if parse_end <= MAX_PARSE_BYTES {
1170            // File is small enough to parse from byte 0
1171            (0, ParseState::new(syntax), ScopeStack::new(), true)
1172        } else {
1173            // Large file, no nearby checkpoint — start fresh from desired_start.
1174            // Still create checkpoints so future visits to this region can resume.
1175            (
1176                desired_start,
1177                ParseState::new(syntax),
1178                ScopeStack::new(),
1179                true,
1180            )
1181        }
1182    }
1183
1184    /// Map scope stack to highlight category, memoising per-scope lookups.
1185    /// `scope.build_string()` is the costly step; the cache hides it after
1186    /// each scope atom has been seen once.
1187    fn scope_stack_to_category(
1188        &mut self,
1189        scopes: &syntect::parsing::ScopeStack,
1190    ) -> Option<HighlightCategory> {
1191        for scope in scopes.as_slice().iter().rev() {
1192            let cat = match self.scope_category_cache.get(scope) {
1193                Some(c) => *c,
1194                None => {
1195                    let computed = scope_to_category(&scope.build_string());
1196                    self.scope_category_cache.insert(*scope, computed);
1197                    computed
1198                }
1199            };
1200            if let Some(c) = cat {
1201                return Some(c);
1202            }
1203        }
1204        None
1205    }
1206
1207    /// Merge adjacent spans with same category
1208    fn merge_adjacent_spans(spans: &mut Vec<CachedSpan>) {
1209        if spans.len() < 2 {
1210            return;
1211        }
1212
1213        let mut write_idx = 0;
1214        for read_idx in 1..spans.len() {
1215            if spans[write_idx].category == spans[read_idx].category
1216                && spans[write_idx].range.end == spans[read_idx].range.start
1217            {
1218                spans[write_idx].range.end = spans[read_idx].range.end;
1219            } else {
1220                write_idx += 1;
1221                if write_idx != read_idx {
1222                    spans[write_idx] = spans[read_idx].clone();
1223                }
1224            }
1225        }
1226        spans.truncate(write_idx + 1);
1227    }
1228
1229    /// Invalidate span cache for an edited range.
1230    /// Checkpoint positions are handled by notify_insert/notify_delete.
1231    /// The span cache is NOT cleared here — it will be patched (partial update)
1232    /// during the next highlight_viewport call using convergence. Only dirty_from
1233    /// (set by notify_insert/notify_delete) controls re-parsing scope.
1234    pub fn invalidate_range(&mut self, _edit_range: Range<usize>) {
1235        // Intentionally does NOT clear self.cache.
1236        // The cache will be partially updated in highlight_viewport when
1237        // dirty_from is set. This avoids full re-parses for small edits.
1238    }
1239
1240    /// Invalidate all cache and checkpoints (file reload, language change, etc.)
1241    pub fn invalidate_all(&mut self) {
1242        self.cache = None;
1243        let ids: Vec<MarkerId> = self.checkpoint_states.keys().copied().collect();
1244        for id in ids {
1245            self.checkpoint_markers.delete(id);
1246        }
1247        self.checkpoint_states.clear();
1248        self.dirty_from = None;
1249    }
1250
1251    /// Get the highlight category at a byte position from the cache.
1252    ///
1253    /// Returns the category if the position falls within a cached highlight span.
1254    /// The position must be within the last highlighted viewport range for a result.
1255    pub fn category_at_position(&self, position: usize) -> Option<HighlightCategory> {
1256        let cache = self.cache.as_ref()?;
1257        cache
1258            .spans
1259            .iter()
1260            .find(|span| span.range.start <= position && position < span.range.end)
1261            .map(|span| span.category)
1262    }
1263
1264    /// Get syntax name
1265    pub fn syntax_name(&self) -> &str {
1266        &self.syntax_set.syntaxes()[self.syntax_index].name
1267    }
1268}
1269
1270impl HighlightEngine {
1271    /// Build a highlighting engine for a catalog entry.
1272    ///
1273    /// Single chokepoint for the "prefer syntect, fall back to tree-sitter"
1274    /// logic. Callers that start from a path or a syntax name should resolve
1275    /// the entry through `GrammarRegistry::find_by_path` / `find_by_name` and
1276    /// then call this.
1277    pub fn from_entry(
1278        entry: &crate::primitives::grammar::GrammarEntry,
1279        registry: &GrammarRegistry,
1280    ) -> Self {
1281        let syntax_set = registry.syntax_set_arc();
1282        if let Some(index) = entry.engines.syntect {
1283            return Self::TextMate(Box::new(TextMateEngine::with_language(
1284                syntax_set,
1285                index,
1286                entry.engines.tree_sitter,
1287            )));
1288        }
1289        if let Some(lang) = entry.engines.tree_sitter {
1290            if let Ok(highlighter) = Highlighter::new(lang) {
1291                return Self::TreeSitter(Box::new(highlighter));
1292            }
1293        }
1294        Self::None
1295    }
1296
1297    /// Create a highlighting engine for a file.
1298    ///
1299    /// Thin wrapper around `from_entry` that resolves the path via the catalog.
1300    /// User-config-declared filename/extension mappings are honoured as long as
1301    /// `GrammarRegistry::apply_language_config` has been called on the registry.
1302    /// `first_line` is used for shebang / first-line regex fallback — pass
1303    /// `None` when no content is available.
1304    pub fn for_file(path: &Path, first_line: Option<&str>, registry: &GrammarRegistry) -> Self {
1305        if let Some(entry) = registry.find_by_path(path, first_line) {
1306            return Self::from_entry(entry, registry);
1307        }
1308        Self::None
1309    }
1310
1311    /// Create a highlighting engine for a syntax by name.
1312    ///
1313    /// Thin wrapper around `from_entry` that performs the lookup via
1314    /// `find_by_name`. The catalog entry already knows which tree-sitter
1315    /// `Language` (if any) serves it, so no separate hint is needed.
1316    pub fn for_syntax_name(name: &str, registry: &GrammarRegistry) -> Self {
1317        if let Some(entry) = registry.find_by_name(name) {
1318            return Self::from_entry(entry, registry);
1319        }
1320        Self::None
1321    }
1322
1323    /// Highlight the visible viewport
1324    ///
1325    /// `context_bytes` controls how far before/after the viewport to parse for accurate
1326    /// highlighting of multi-line constructs (strings, comments, nested blocks).
1327    pub fn highlight_viewport(
1328        &mut self,
1329        buffer: &Buffer,
1330        viewport_start: usize,
1331        viewport_end: usize,
1332        theme: &Theme,
1333        context_bytes: usize,
1334    ) -> Vec<HighlightSpan> {
1335        match self {
1336            Self::TreeSitter(h) => {
1337                h.highlight_viewport(buffer, viewport_start, viewport_end, theme, context_bytes)
1338            }
1339            Self::TextMate(h) => {
1340                h.highlight_viewport(buffer, viewport_start, viewport_end, theme, context_bytes)
1341            }
1342            Self::None => Vec::new(),
1343        }
1344    }
1345
1346    /// Notify the highlighting engine of a buffer insert (for checkpoint position tracking).
1347    pub fn notify_insert(&mut self, position: usize, length: usize) {
1348        if let Self::TextMate(h) = self {
1349            h.notify_insert(position, length);
1350        }
1351    }
1352
1353    /// Notify the highlighting engine of a buffer delete (for checkpoint position tracking).
1354    pub fn notify_delete(&mut self, position: usize, length: usize) {
1355        if let Self::TextMate(h) = self {
1356            h.notify_delete(position, length);
1357        }
1358    }
1359
1360    /// Invalidate cache for an edited range
1361    pub fn invalidate_range(&mut self, edit_range: Range<usize>) {
1362        match self {
1363            Self::TreeSitter(h) => h.invalidate_range(edit_range),
1364            Self::TextMate(h) => h.invalidate_range(edit_range),
1365            Self::None => {}
1366        }
1367    }
1368
1369    /// Invalidate entire cache
1370    pub fn invalidate_all(&mut self) {
1371        match self {
1372            Self::TreeSitter(h) => h.invalidate_all(),
1373            Self::TextMate(h) => h.invalidate_all(),
1374            Self::None => {}
1375        }
1376    }
1377
1378    /// Track a sequence of bulk edits in the cache.
1379    ///
1380    /// Each edit is `(pos, del_len, ins_len)`. The slice must be sorted in
1381    /// descending position order — the same order `apply_bulk_edits` uses to
1382    /// mutate the buffer — so positions remain valid as the buffer changes.
1383    ///
1384    /// This mirrors the `notify_*` + `invalidate_range` pattern used by
1385    /// single-edit paths. It preserves the TextMate engine's checkpoints and
1386    /// dirty-from anchor (so the next render uses the partial-update path
1387    /// rather than a cold reparse from byte zero) and drops the tree-sitter
1388    /// viewport cache only when an edit overlaps it.
1389    pub fn notify_edits(&mut self, edits: &[(usize, usize, usize)]) {
1390        for &(pos, del_len, ins_len) in edits {
1391            if del_len > 0 {
1392                self.notify_delete(pos, del_len);
1393            }
1394            if ins_len > 0 {
1395                self.notify_insert(pos, ins_len);
1396            }
1397            let edit_end = pos + del_len.max(ins_len);
1398            self.invalidate_range(pos..edit_end);
1399        }
1400    }
1401
1402    /// Check if this engine has highlighting available
1403    pub fn has_highlighting(&self) -> bool {
1404        !matches!(self, Self::None)
1405    }
1406
1407    /// Get a description of the active backend
1408    pub fn backend_name(&self) -> &str {
1409        match self {
1410            Self::TreeSitter(_) => "tree-sitter",
1411            Self::TextMate(_) => "textmate",
1412            Self::None => "none",
1413        }
1414    }
1415
1416    /// Get performance stats (TextMate engine only).
1417    pub fn highlight_stats(&self) -> Option<&HighlightStats> {
1418        if let Self::TextMate(h) = self {
1419            Some(h.stats())
1420        } else {
1421            None
1422        }
1423    }
1424
1425    /// Reset performance counters.
1426    pub fn reset_highlight_stats(&mut self) {
1427        if let Self::TextMate(h) = self {
1428            h.reset_stats();
1429        }
1430    }
1431
1432    /// Get the language/syntax name if available
1433    pub fn syntax_name(&self) -> Option<&str> {
1434        match self {
1435            Self::TreeSitter(_) => None, // Tree-sitter doesn't expose name easily
1436            Self::TextMate(h) => Some(h.syntax_name()),
1437            Self::None => None,
1438        }
1439    }
1440
1441    /// Get the highlight category at a byte position from the cache.
1442    ///
1443    /// Returns the category if the position falls within a cached highlight span.
1444    /// Useful for detecting whether the cursor is inside a string, comment, etc.
1445    pub fn category_at_position(&self, position: usize) -> Option<HighlightCategory> {
1446        match self {
1447            Self::TreeSitter(h) => h.category_at_position(position),
1448            Self::TextMate(h) => h.category_at_position(position),
1449            Self::None => None,
1450        }
1451    }
1452
1453    /// Get the tree-sitter Language for non-highlighting features
1454    /// Returns the language even when using TextMate for highlighting
1455    pub fn language(&self) -> Option<&Language> {
1456        match self {
1457            Self::TreeSitter(h) => Some(h.language()),
1458            Self::TextMate(h) => h.language(),
1459            Self::None => None,
1460        }
1461    }
1462}
1463
1464/// Highlight a code string using syntect (for markdown code blocks, hover popups, etc.)
1465/// Returns spans with byte ranges relative to the input string.
1466///
1467/// This uses TextMate grammars via syntect which provides broader language coverage
1468/// than tree-sitter (~150+ languages vs ~17).
1469pub fn highlight_string(
1470    code: &str,
1471    lang_hint: &str,
1472    registry: &GrammarRegistry,
1473    theme: &Theme,
1474) -> Vec<HighlightSpan> {
1475    use syntect::parsing::{ParseState, ScopeStack};
1476
1477    // Find syntax by language token (handles aliases like "py" -> Python)
1478    let syntax = match registry.syntax_set().find_syntax_by_token(lang_hint) {
1479        Some(s) => s,
1480        None => return Vec::new(),
1481    };
1482
1483    let syntax_set = registry.syntax_set();
1484    let mut state = ParseState::new(syntax);
1485    let mut spans = Vec::new();
1486    let mut current_scopes = ScopeStack::new();
1487    let mut current_offset = 0;
1488
1489    // Parse line by line
1490    for line in code.split_inclusive('\n') {
1491        let line_start = current_offset;
1492        let line_len = line.len();
1493
1494        // Remove trailing newline for syntect, then add it back
1495        let line_content = line.trim_end_matches(&['\r', '\n'][..]);
1496        let line_for_syntect = if line.ends_with('\n') {
1497            format!("{}\n", line_content)
1498        } else {
1499            line_content.to_string()
1500        };
1501
1502        let ops = match state.parse_line(&line_for_syntect, syntax_set) {
1503            Ok(ops) => ops,
1504            Err(_) => {
1505                current_offset += line_len;
1506                continue;
1507            }
1508        };
1509
1510        let mut syntect_offset = 0;
1511        let line_content_len = line_content.len();
1512
1513        for (op_offset, op) in ops {
1514            let clamped_op_offset = op_offset.min(line_content_len);
1515            if clamped_op_offset > syntect_offset {
1516                if let Some(category) = scope_stack_to_category(&current_scopes) {
1517                    let byte_start = line_start + syntect_offset;
1518                    let byte_end = line_start + clamped_op_offset;
1519                    if byte_start < byte_end {
1520                        spans.push(HighlightSpan {
1521                            range: byte_start..byte_end,
1522                            color: highlight_color(category, theme),
1523                            bg: highlight_bg(category, theme),
1524                            category: Some(category),
1525                        });
1526                    }
1527                }
1528            }
1529            syntect_offset = clamped_op_offset;
1530            // Scope stack errors are non-fatal for highlighting
1531            #[allow(clippy::let_underscore_must_use)]
1532            let _ = current_scopes.apply(&op);
1533        }
1534
1535        // Handle remaining text on line
1536        if syntect_offset < line_content_len {
1537            if let Some(category) = scope_stack_to_category(&current_scopes) {
1538                let byte_start = line_start + syntect_offset;
1539                let byte_end = line_start + line_content_len;
1540                if byte_start < byte_end {
1541                    spans.push(HighlightSpan {
1542                        range: byte_start..byte_end,
1543                        color: highlight_color(category, theme),
1544                        bg: highlight_bg(category, theme),
1545                        category: Some(category),
1546                    });
1547                }
1548            }
1549        }
1550
1551        current_offset += line_len;
1552    }
1553
1554    // Merge adjacent spans with same color
1555    merge_adjacent_highlight_spans(&mut spans);
1556
1557    spans
1558}
1559
1560/// Map scope stack to highlight category (for highlight_string)
1561fn scope_stack_to_category(scopes: &syntect::parsing::ScopeStack) -> Option<HighlightCategory> {
1562    for scope in scopes.as_slice().iter().rev() {
1563        let scope_str = scope.build_string();
1564        if let Some(cat) = scope_to_category(&scope_str) {
1565            return Some(cat);
1566        }
1567    }
1568    None
1569}
1570
1571/// Merge adjacent spans with same color
1572fn merge_adjacent_highlight_spans(spans: &mut Vec<HighlightSpan>) {
1573    if spans.len() < 2 {
1574        return;
1575    }
1576
1577    let mut write_idx = 0;
1578    for read_idx in 1..spans.len() {
1579        if spans[write_idx].color == spans[read_idx].color
1580            && spans[write_idx].range.end == spans[read_idx].range.start
1581        {
1582            spans[write_idx].range.end = spans[read_idx].range.end;
1583        } else {
1584            write_idx += 1;
1585            if write_idx != read_idx {
1586                spans[write_idx] = spans[read_idx].clone();
1587            }
1588        }
1589    }
1590    spans.truncate(write_idx + 1);
1591}
1592
1593#[cfg(test)]
1594mod tests {
1595    use crate::model::filesystem::StdFileSystem;
1596    use std::sync::Arc;
1597
1598    fn test_fs() -> Arc<dyn crate::model::filesystem::FileSystem + Send + Sync> {
1599        Arc::new(StdFileSystem)
1600    }
1601    use super::*;
1602    use crate::view::theme;
1603
1604    #[test]
1605    fn test_highlight_engine_default() {
1606        let engine = HighlightEngine::default();
1607        assert!(!engine.has_highlighting());
1608        assert_eq!(engine.backend_name(), "none");
1609    }
1610
1611    #[test]
1612    fn test_textmate_backend_selection() {
1613        let registry =
1614            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1615
1616        // Languages with TextMate grammars use TextMate for highlighting
1617        let engine = HighlightEngine::for_file(Path::new("test.rs"), None, &registry);
1618        assert_eq!(engine.backend_name(), "textmate");
1619        // Tree-sitter language should still be detected for other features
1620        assert!(engine.language().is_some());
1621
1622        let engine = HighlightEngine::for_file(Path::new("test.py"), None, &registry);
1623        assert_eq!(engine.backend_name(), "textmate");
1624        assert!(engine.language().is_some());
1625
1626        // JavaScript is routed to tree-sitter (issue #899: syntect's JS
1627        // grammar bleeds template-literal string state past the closing
1628        // backtick).
1629        let engine = HighlightEngine::for_file(Path::new("test.js"), None, &registry);
1630        assert_eq!(engine.backend_name(), "tree-sitter");
1631        assert!(engine.language().is_some());
1632
1633        // TypeScript falls back to tree-sitter (syntect doesn't include TS by default)
1634        let engine = HighlightEngine::for_file(Path::new("test.ts"), None, &registry);
1635        assert_eq!(engine.backend_name(), "tree-sitter");
1636        assert!(engine.language().is_some());
1637
1638        let engine = HighlightEngine::for_file(Path::new("test.tsx"), None, &registry);
1639        assert_eq!(engine.backend_name(), "tree-sitter");
1640        assert!(engine.language().is_some());
1641    }
1642
1643    #[test]
1644    fn test_tree_sitter_direct() {
1645        // Verify a tree-sitter highlighter can be created directly for a
1646        // bundled grammar (TypeScript — most grammars were dropped and are now
1647        // highlighted by syntect instead).
1648        let highlighter = Highlighter::new(Language::TypeScript);
1649        assert!(highlighter.is_ok());
1650    }
1651
1652    #[test]
1653    fn test_unknown_extension() {
1654        let registry =
1655            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1656
1657        // Unknown extension
1658        let engine = HighlightEngine::for_file(Path::new("test.unknown_xyz_123"), None, &registry);
1659        // Might be none or might find something via syntect
1660        // Just verify it doesn't panic
1661        let _ = engine.backend_name();
1662    }
1663
1664    #[test]
1665    fn test_highlight_viewport_empty_buffer_no_panic() {
1666        // Regression test: calling highlight_viewport with an empty buffer
1667        // and non-zero viewport range previously caused subtraction overflow panic.
1668        //
1669        // The bug occurred when:
1670        // - buffer is empty (len = 0)
1671        // - viewport_start > context_bytes (so parse_start > 0 after saturating_sub)
1672        // - parse_end = min(viewport_end + context_bytes, buffer.len()) = 0
1673        // - parse_end - parse_start would underflow (0 - positive = overflow)
1674        let registry =
1675            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1676
1677        let mut engine = HighlightEngine::for_file(Path::new("test.rs"), None, &registry);
1678
1679        // Create empty buffer
1680        let buffer = Buffer::from_str("", 0, test_fs());
1681        let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
1682
1683        // Test the specific case that triggered the overflow:
1684        // viewport_start=100, context_bytes=10 => parse_start=90, parse_end=0
1685        // 0 - 90 = overflow!
1686        if let HighlightEngine::TextMate(ref mut tm) = engine {
1687            // Small context_bytes so parse_start remains > 0
1688            let spans = tm.highlight_viewport(&buffer, 100, 200, &theme, 10);
1689            assert!(spans.is_empty());
1690        }
1691    }
1692
1693    /// Test that TextMateEngine produces correct byte offsets for CRLF content.
1694    /// This is a regression test for a bug where using str::lines() caused 1-byte
1695    /// offset drift per line because it strips line terminators.
1696    #[test]
1697    fn test_textmate_engine_crlf_byte_offsets() {
1698        let registry =
1699            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1700
1701        let mut engine = HighlightEngine::for_file(Path::new("test.java"), None, &registry);
1702
1703        // Create CRLF content with keywords on each line
1704        // Each "public" keyword should be highlighted at byte positions:
1705        // Line 1: "public" at bytes 0-5
1706        // Line 2: "public" at bytes 8-13 (after "public\r\n" = 8 bytes)
1707        // Line 3: "public" at bytes 16-21 (after two "public\r\n" = 16 bytes)
1708        let content = b"public\r\npublic\r\npublic\r\n";
1709        let buffer = Buffer::from_bytes(content.to_vec(), test_fs());
1710        let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
1711
1712        if let HighlightEngine::TextMate(ref mut tm) = engine {
1713            // Highlight the entire content
1714            let spans = tm.highlight_viewport(&buffer, 0, content.len(), &theme, 0);
1715
1716            // Find spans that cover keyword positions
1717            // The keyword "public" should have spans at these byte ranges:
1718            // Line 1: 0..6
1719            // Line 2: 8..14 (NOT 7..13 which would be the buggy offset)
1720            // Line 3: 16..22 (NOT 14..20 which would be the buggy offset)
1721
1722            eprintln!(
1723                "Spans: {:?}",
1724                spans.iter().map(|s| &s.range).collect::<Vec<_>>()
1725            );
1726
1727            // Check that we have spans covering the correct positions
1728            let has_span_at = |start: usize, end: usize| -> bool {
1729                spans
1730                    .iter()
1731                    .any(|s| s.range.start <= start && s.range.end >= end)
1732            };
1733
1734            // Line 1: "public" at bytes 0-6
1735            assert!(
1736                has_span_at(0, 6),
1737                "Should have span covering bytes 0-6 (line 1 'public'). Spans: {:?}",
1738                spans.iter().map(|s| &s.range).collect::<Vec<_>>()
1739            );
1740
1741            // Line 2: "public" at bytes 8-14 (after "public\r\n")
1742            // If buggy, would be at 7-13
1743            assert!(
1744                has_span_at(8, 14),
1745                "Should have span covering bytes 8-14 (line 2 'public'). \
1746                 If this fails, CRLF offset drift is occurring. Spans: {:?}",
1747                spans.iter().map(|s| &s.range).collect::<Vec<_>>()
1748            );
1749
1750            // Line 3: "public" at bytes 16-22 (after two "public\r\n")
1751            // If buggy, would be at 14-20
1752            assert!(
1753                has_span_at(16, 22),
1754                "Should have span covering bytes 16-22 (line 3 'public'). \
1755                 If this fails, CRLF offset drift is occurring. Spans: {:?}",
1756                spans.iter().map(|s| &s.range).collect::<Vec<_>>()
1757            );
1758        } else {
1759            panic!("Expected TextMate engine for .java file");
1760        }
1761    }
1762
1763    /// When a buffer is parsed with no trailing newline (the streaming
1764    /// case for `git show` output between writes), the engine must not
1765    /// commit cache tail state at the end of the partial trailing line.
1766    /// With syntect's `Diff` grammar (line-anchored `^\+.*` etc.), the
1767    /// state at end-of-input has popped `markup.inserted`, so any
1768    /// follow-up parse from there would see the rest of the line as a
1769    /// new line in `source.diff` and emit no scope — losing the bg
1770    /// inside otherwise-green `+` lines.
1771    ///
1772    /// This test pins the boundary the cache commits at: after parsing
1773    /// a buffer ending mid-line, `cache.range.end` must be the last
1774    /// newline (or `desired_parse_start` if no newline was seen), not
1775    /// the end of the partial line.
1776    #[test]
1777    fn test_partial_trailing_line_not_committed_to_cache() {
1778        let registry =
1779            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1780        let mut engine = HighlightEngine::for_file(Path::new("commit.diff"), None, &registry);
1781        let theme = Theme::load_builtin(theme::THEME_DARK).unwrap();
1782
1783        // A complete `+` line followed by a partial `+` line (no \n).
1784        let content = "+complete\n+partial";
1785        let buffer = Buffer::from_str(content, 0, test_fs());
1786
1787        if let HighlightEngine::TextMate(ref mut tm) = engine {
1788            let _ = tm.highlight_viewport(&buffer, 0, content.len(), &theme, 0);
1789            let (cache_end, has_tail) = tm.cache_commit_for_test();
1790            assert_eq!(
1791                cache_end,
1792                "+complete\n".len(),
1793                "cache should commit at the last newline, not into the partial \
1794                 trailing line — committing past the newline causes streaming \
1795                 forward-extension to parse the line's continuation in the wrong \
1796                 grammar context, losing the diff bg."
1797            );
1798            assert!(has_tail, "tail state should be saved at the safe boundary");
1799        }
1800    }
1801
1802    /// Reproduce: artifacts inside `+` lines whose content contains
1803    /// JS template literals — `\`...\`` with `${}` interpolation.
1804    /// The whole `+` line should be one contiguous Inserted span
1805    /// carrying `theme.diff_add_bg`, with no bg-less holes mid-line.
1806    #[test]
1807    fn test_diff_inserted_line_is_fully_covered() {
1808        let registry =
1809            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1810        let mut engine = HighlightEngine::for_file(Path::new("commit.diff"), None, &registry);
1811        let theme = Theme::load_builtin(theme::THEME_DARK).unwrap();
1812
1813        let content =
1814            "diff --git a/file.ts b/file.ts\n\
1815             index aaa..bbb 100644\n\
1816             --- a/file.ts\n\
1817             +++ b/file.ts\n\
1818             @@ -1,3 +1,5 @@\n\
1819             +${seen[g.subtree] > 1 ? `**Seen ${seen[g.subtree]}× — likely cross-subtree type seam.**` : \"\"}\n\
1820             +              const k = `${b.fn}::${(b.what || \"\").slice(0, 80)}`;\n";
1821        let buffer = Buffer::from_str(content, 0, test_fs());
1822
1823        if let HighlightEngine::TextMate(ref mut tm) = engine {
1824            let spans = tm.highlight_viewport(&buffer, 0, content.len(), &theme, 0);
1825
1826            let bytes = content.as_bytes();
1827            let mut line_start = 0;
1828            while line_start < bytes.len() {
1829                let mut line_end = line_start;
1830                while line_end < bytes.len() && bytes[line_end] != b'\n' {
1831                    line_end += 1;
1832                }
1833                if bytes[line_start] == b'+' && !content[line_start..line_end].starts_with("+++") {
1834                    for byte_pos in line_start..line_end {
1835                        let span = spans
1836                            .iter()
1837                            .find(|s| s.range.start <= byte_pos && s.range.end > byte_pos);
1838                        let bg = span.and_then(|s| s.bg);
1839                        assert_eq!(
1840                            bg,
1841                            Some(theme.diff_add_bg),
1842                            "byte {} (`{}`) of `+` line starting at {} should carry diff_add_bg; \
1843                             got span={:?}",
1844                            byte_pos,
1845                            content[byte_pos..byte_pos + 1].escape_debug(),
1846                            line_start,
1847                            span,
1848                        );
1849                    }
1850                }
1851                line_start = line_end + 1;
1852            }
1853        } else {
1854            panic!("Expected TextMate engine for .diff file");
1855        }
1856    }
1857
1858    #[test]
1859    fn test_git_rebase_todo_highlighting() {
1860        let registry =
1861            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1862
1863        // git-rebase-todo files should use the Git Rebase Todo grammar
1864        let engine = HighlightEngine::for_file(Path::new("git-rebase-todo"), None, &registry);
1865        assert_eq!(engine.backend_name(), "textmate");
1866        assert!(engine.has_highlighting());
1867    }
1868
1869    #[test]
1870    fn test_git_commit_message_highlighting() {
1871        let registry =
1872            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1873
1874        // COMMIT_EDITMSG should use the Git Commit Message grammar
1875        let engine = HighlightEngine::for_file(Path::new("COMMIT_EDITMSG"), None, &registry);
1876        assert_eq!(engine.backend_name(), "textmate");
1877        assert!(engine.has_highlighting());
1878
1879        // MERGE_MSG should also work
1880        let engine = HighlightEngine::for_file(Path::new("MERGE_MSG"), None, &registry);
1881        assert_eq!(engine.backend_name(), "textmate");
1882        assert!(engine.has_highlighting());
1883    }
1884
1885    #[test]
1886    fn test_gitignore_highlighting() {
1887        let registry =
1888            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1889
1890        // .gitignore should use the Gitignore grammar
1891        let engine = HighlightEngine::for_file(Path::new(".gitignore"), None, &registry);
1892        assert_eq!(engine.backend_name(), "textmate");
1893        assert!(engine.has_highlighting());
1894
1895        // .dockerignore should also work
1896        let engine = HighlightEngine::for_file(Path::new(".dockerignore"), None, &registry);
1897        assert_eq!(engine.backend_name(), "textmate");
1898        assert!(engine.has_highlighting());
1899    }
1900
1901    #[test]
1902    fn test_gitconfig_highlighting() {
1903        let registry =
1904            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1905
1906        // .gitconfig should use the Git Config grammar
1907        let engine = HighlightEngine::for_file(Path::new(".gitconfig"), None, &registry);
1908        assert_eq!(engine.backend_name(), "textmate");
1909        assert!(engine.has_highlighting());
1910
1911        // .gitmodules should also work
1912        let engine = HighlightEngine::for_file(Path::new(".gitmodules"), None, &registry);
1913        assert_eq!(engine.backend_name(), "textmate");
1914        assert!(engine.has_highlighting());
1915    }
1916
1917    #[test]
1918    fn test_gitattributes_highlighting() {
1919        let registry =
1920            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1921
1922        // .gitattributes should use the Git Attributes grammar
1923        let engine = HighlightEngine::for_file(Path::new(".gitattributes"), None, &registry);
1924        assert_eq!(engine.backend_name(), "textmate");
1925        assert!(engine.has_highlighting());
1926    }
1927
1928    #[test]
1929    fn test_comment_delimiter_uses_comment_color() {
1930        // Comment delimiters (#, //, /*) should use comment color, not operator
1931        assert_eq!(
1932            scope_to_category("punctuation.definition.comment"),
1933            Some(HighlightCategory::Comment)
1934        );
1935        assert_eq!(
1936            scope_to_category("punctuation.definition.comment.python"),
1937            Some(HighlightCategory::Comment)
1938        );
1939        assert_eq!(
1940            scope_to_category("punctuation.definition.comment.begin"),
1941            Some(HighlightCategory::Comment)
1942        );
1943    }
1944
1945    #[test]
1946    fn test_variable_builtin_category() {
1947        assert_eq!(
1948            scope_to_category("variable.language.this"),
1949            Some(HighlightCategory::VariableBuiltin)
1950        );
1951        assert_eq!(
1952            scope_to_category("variable.language.super"),
1953            Some(HighlightCategory::VariableBuiltin)
1954        );
1955    }
1956
1957    #[test]
1958    fn test_string_delimiter_uses_string_color() {
1959        // String delimiters (", ', `) should use string color, not operator
1960        assert_eq!(
1961            scope_to_category("punctuation.definition.string.begin"),
1962            Some(HighlightCategory::String)
1963        );
1964        assert_eq!(
1965            scope_to_category("punctuation.definition.string.end"),
1966            Some(HighlightCategory::String)
1967        );
1968    }
1969
1970    #[test]
1971    fn test_punctuation_bracket() {
1972        // punctuation.section (TextMate standard for block delimiters)
1973        assert_eq!(
1974            scope_to_category("punctuation.section"),
1975            Some(HighlightCategory::PunctuationBracket)
1976        );
1977        assert_eq!(
1978            scope_to_category("punctuation.section.block.begin.c"),
1979            Some(HighlightCategory::PunctuationBracket)
1980        );
1981        assert_eq!(
1982            scope_to_category("punctuation.bracket"),
1983            Some(HighlightCategory::PunctuationBracket)
1984        );
1985        // punctuation.definition.* bracket-like scopes from sublime-syntax grammars
1986        assert_eq!(
1987            scope_to_category("punctuation.definition.array.begin.toml"),
1988            Some(HighlightCategory::PunctuationBracket)
1989        );
1990        assert_eq!(
1991            scope_to_category("punctuation.definition.block.code.typst"),
1992            Some(HighlightCategory::PunctuationBracket)
1993        );
1994        assert_eq!(
1995            scope_to_category("punctuation.definition.group.typst"),
1996            Some(HighlightCategory::PunctuationBracket)
1997        );
1998        assert_eq!(
1999            scope_to_category("punctuation.definition.inline-table.begin.toml"),
2000            Some(HighlightCategory::PunctuationBracket)
2001        );
2002        assert_eq!(
2003            scope_to_category("punctuation.definition.tag.end.svelte"),
2004            Some(HighlightCategory::PunctuationBracket)
2005        );
2006    }
2007
2008    #[test]
2009    fn test_punctuation_delimiter() {
2010        assert_eq!(
2011            scope_to_category("punctuation.separator"),
2012            Some(HighlightCategory::PunctuationDelimiter)
2013        );
2014        assert_eq!(
2015            scope_to_category("punctuation.terminator.statement.c"),
2016            Some(HighlightCategory::PunctuationDelimiter)
2017        );
2018        assert_eq!(
2019            scope_to_category("punctuation.accessor"),
2020            Some(HighlightCategory::PunctuationDelimiter)
2021        );
2022    }
2023
2024    #[test]
2025    fn test_storage_type_keyword() {
2026        assert_eq!(
2027            scope_to_category("storage.type"),
2028            Some(HighlightCategory::Keyword)
2029        );
2030        assert_eq!(
2031            scope_to_category("storage.type.class"),
2032            Some(HighlightCategory::Keyword)
2033        );
2034        assert_ne!(
2035            scope_to_category("storage.type"),
2036            Some(HighlightCategory::Type)
2037        );
2038    }
2039
2040    /// First parse of a small file populates a whole-file cache; subsequent
2041    /// scrolls anywhere in the file are exact cache hits with no extra parse
2042    /// work.
2043    #[test]
2044    fn test_small_file_scroll_is_cache_hit() {
2045        let registry =
2046            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
2047        let mut engine = HighlightEngine::for_file(Path::new("test.rs"), None, &registry);
2048
2049        let mut content = String::new();
2050        for i in 0..200 {
2051            content.push_str(&format!("fn f_{i}() {{ let x = {i}; }}\n"));
2052        }
2053        let buffer = Buffer::from_str(&content, 0, test_fs());
2054        let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
2055
2056        let HighlightEngine::TextMate(ref mut tm) = engine else {
2057            panic!("expected TextMate engine for .rs");
2058        };
2059
2060        // First call: cold start, full parse.
2061        let _ = tm.highlight_viewport(&buffer, 0, 200, &theme, 10_000);
2062        let stats_after_first = tm.stats().clone();
2063        assert_eq!(
2064            stats_after_first.cache_hits, 0,
2065            "first call cannot hit cache"
2066        );
2067        assert_eq!(
2068            stats_after_first.cache_misses, 1,
2069            "first call must be a miss"
2070        );
2071
2072        // Scroll anywhere — top, middle, end. All must be cache hits.
2073        let mid = buffer.len() / 2;
2074        let near_end = buffer.len().saturating_sub(200);
2075        let probes = [(0, 200), (mid, mid + 200), (near_end, buffer.len())];
2076        for (vs, ve) in probes {
2077            let _ = tm.highlight_viewport(&buffer, vs, ve, &theme, 10_000);
2078        }
2079
2080        let stats_after_scroll = tm.stats().clone();
2081        assert_eq!(
2082            stats_after_scroll.cache_misses,
2083            1,
2084            "scrolling must not add cache misses (got extra: {})",
2085            stats_after_scroll.cache_misses - 1
2086        );
2087        assert_eq!(
2088            stats_after_scroll.cache_hits, 3,
2089            "all three scroll probes must hit the cache"
2090        );
2091        assert_eq!(
2092            stats_after_scroll.bytes_parsed, stats_after_first.bytes_parsed,
2093            "scrolling must not parse any new bytes"
2094        );
2095    }
2096
2097    /// After a small edit, the next render takes the partial-update path
2098    /// (convergence) and continues to serve cache hits afterwards. Crucially:
2099    /// the partial update parses far fewer bytes than the file is long.
2100    #[test]
2101    fn test_small_file_edit_uses_partial_update() {
2102        let registry =
2103            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
2104        let mut engine = HighlightEngine::for_file(Path::new("test.rs"), None, &registry);
2105
2106        let mut content = String::new();
2107        for i in 0..200 {
2108            content.push_str(&format!("fn f_{i}() {{ let x = {i}; }}\n"));
2109        }
2110        let buffer = Buffer::from_str(&content, 0, test_fs());
2111        let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
2112
2113        let HighlightEngine::TextMate(ref mut tm) = engine else {
2114            panic!("expected TextMate engine for .rs");
2115        };
2116
2117        // Warm cache.
2118        let _ = tm.highlight_viewport(&buffer, 0, 100, &theme, 10_000);
2119        let bytes_before_edit = tm.stats().bytes_parsed;
2120        let buf_len = buffer.len();
2121        assert!(
2122            buf_len > 4000,
2123            "test needs a buffer larger than the partial-update region"
2124        );
2125
2126        // Simulate an edit deep in the file.
2127        let edit_pos = buf_len / 2;
2128        tm.notify_insert(edit_pos, 1);
2129        // The buffer itself doesn't change here (we test the engine in isolation),
2130        // but notify_insert sets dirty_from and shifts spans, which is what the
2131        // partial-update path consumes.
2132
2133        let _ = tm.highlight_viewport(&buffer, 0, 100, &theme, 10_000);
2134        let bytes_after_edit = tm.stats().bytes_parsed;
2135        let parsed = bytes_after_edit - bytes_before_edit;
2136
2137        assert!(
2138            parsed < buf_len,
2139            "edit must not trigger a whole-file reparse (parsed {parsed}, file {buf_len})"
2140        );
2141    }
2142
2143    /// Bulk edits (multi-cursor typing, "select word + type letter" replace,
2144    /// search-replace, etc.) must take the same partial-update path as single
2145    /// edits. Regression for #1958: the previous code called `invalidate_all()`
2146    /// after a bulk edit, wiping every checkpoint and forcing a cold reparse
2147    /// from byte zero on the next keystroke.
2148    #[test]
2149    fn test_bulk_edit_uses_partial_update() {
2150        let registry =
2151            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
2152        let mut engine = HighlightEngine::for_file(Path::new("test.rs"), None, &registry);
2153
2154        let mut content = String::new();
2155        for i in 0..200 {
2156            content.push_str(&format!("fn f_{i}() {{ let x = {i}; }}\n"));
2157        }
2158        let buffer = Buffer::from_str(&content, 0, test_fs());
2159        let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
2160
2161        // Warm cache.
2162        let _ = engine.highlight_viewport(&buffer, 0, 100, &theme, 10_000);
2163        let bytes_before_edit = match &engine {
2164            HighlightEngine::TextMate(h) => h.stats().bytes_parsed,
2165            _ => panic!("expected TextMate engine for .rs"),
2166        };
2167        let buf_len = buffer.len();
2168        assert!(
2169            buf_len > 4000,
2170            "test needs a buffer larger than the partial-update region"
2171        );
2172
2173        // Simulate "select a word, type a letter" deep in the file: a single
2174        // bulk edit that deletes 8 bytes and inserts 1 byte at the same
2175        // position. This is exactly the user-facing scenario in #1958.
2176        let edit_pos = buf_len / 2;
2177        let edits = vec![(edit_pos, 8usize, 1usize)];
2178        engine.notify_edits(&edits);
2179
2180        let _ = engine.highlight_viewport(&buffer, 0, 100, &theme, 10_000);
2181        let bytes_after_edit = match &engine {
2182            HighlightEngine::TextMate(h) => h.stats().bytes_parsed,
2183            _ => unreachable!(),
2184        };
2185        let parsed = bytes_after_edit - bytes_before_edit;
2186
2187        assert!(
2188            parsed < buf_len,
2189            "bulk edit must not trigger a whole-file reparse \
2190             (parsed {parsed}, file {buf_len})"
2191        );
2192    }
2193
2194    /// Bulk edits whose positions are all outside the cached viewport must
2195    /// not invalidate the cache at all on the tree-sitter / `Highlighter`
2196    /// path. (TextMate has a richer convergence model, but for both engines
2197    /// the regression to guard against is: "any bulk edit, even a tiny one,
2198    /// destroys the cache and forces a full reparse.")
2199    #[test]
2200    fn test_bulk_edit_outside_cache_keeps_textmate_partial_update() {
2201        let registry =
2202            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
2203        let mut engine = HighlightEngine::for_file(Path::new("test.rs"), None, &registry);
2204
2205        let mut content = String::new();
2206        for i in 0..400 {
2207            content.push_str(&format!("fn f_{i}() {{ let x = {i}; }}\n"));
2208        }
2209        let buffer = Buffer::from_str(&content, 0, test_fs());
2210        let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
2211
2212        // Warm a viewport near the start.
2213        let _ = engine.highlight_viewport(&buffer, 0, 200, &theme, 1_000);
2214        let bytes_before = match &engine {
2215            HighlightEngine::TextMate(h) => h.stats().bytes_parsed,
2216            _ => panic!("expected TextMate engine for .rs"),
2217        };
2218
2219        // Apply a bulk edit far past the warmed viewport.
2220        let far_pos = buffer.len() - 100;
2221        engine.notify_edits(&[(far_pos, 3, 1)]);
2222
2223        // Re-render the original viewport. The partial-update path must keep
2224        // parsed bytes well below a whole-file reparse.
2225        let _ = engine.highlight_viewport(&buffer, 0, 200, &theme, 1_000);
2226        let bytes_after = match &engine {
2227            HighlightEngine::TextMate(h) => h.stats().bytes_parsed,
2228            _ => unreachable!(),
2229        };
2230        let parsed = bytes_after - bytes_before;
2231        let buf_len = buffer.len();
2232        assert!(
2233            parsed < buf_len,
2234            "bulk edit outside the viewport must not force a whole-file \
2235             reparse (parsed {parsed}, file {buf_len})"
2236        );
2237    }
2238
2239    /// Convergence budget caps per-pass work even when the parse state never
2240    /// agrees with any existing checkpoint. Without the cap, a non-converging
2241    /// edit would parse the rest of the file on every keystroke.
2242    #[test]
2243    fn test_partial_update_budget_caps_work() {
2244        let registry =
2245            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
2246        let mut engine = HighlightEngine::for_file(Path::new("test.rs"), None, &registry);
2247
2248        // Build a buffer comfortably larger than CONVERGENCE_BUDGET.
2249        let mut content = String::new();
2250        while content.len() < (CONVERGENCE_BUDGET * 4) {
2251            content.push_str("fn name() { let mut v = 0; v += 1; }\n");
2252        }
2253        let buffer = Buffer::from_str(&content, 0, test_fs());
2254        let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
2255
2256        let HighlightEngine::TextMate(ref mut tm) = engine else {
2257            panic!("expected TextMate engine for .rs");
2258        };
2259
2260        // Warm cache (whole-file parse).
2261        let _ = tm.highlight_viewport(&buffer, 0, 200, &theme, 10_000);
2262        // Simulate an edit and force every checkpoint to disagree by clearing
2263        // their stored states. The convergence loop will look at each marker,
2264        // find the slot empty, and never converge.
2265        tm.notify_insert(100, 0);
2266        tm.checkpoint_states.clear();
2267
2268        let bytes_before = tm.stats().bytes_parsed;
2269        let _ = tm.highlight_viewport(&buffer, 0, 200, &theme, 10_000);
2270        let parsed = tm.stats().bytes_parsed - bytes_before;
2271
2272        // Budget bounds the work to roughly CONVERGENCE_BUDGET past the dirty
2273        // point (plus the prefix back to the resume checkpoint). Allow a small
2274        // overshoot for the line that crossed the budget threshold.
2275        assert!(
2276            parsed <= CONVERGENCE_BUDGET + 4096,
2277            "partial update parsed {parsed}, expected <= {} \
2278             (budget {CONVERGENCE_BUDGET} + slack)",
2279            CONVERGENCE_BUDGET + 4096
2280        );
2281
2282        // Budget hit must leave dirty_from set for follow-up passes.
2283        assert!(
2284            tm.dirty_from.is_some(),
2285            "budget exit must keep dirty_from set"
2286        );
2287    }
2288
2289    /// Large files (above MAX_PARSE_BYTES) keep the existing windowed
2290    /// behaviour: parse range is bounded by ±context_bytes around the
2291    /// viewport, not the whole file.
2292    ///
2293    /// The viewport is placed past `MAX_PARSE_BYTES` so we exercise the
2294    /// "large file, no nearby checkpoint" branch in `find_parse_resume_point`
2295    /// — the symmetric branch that fires when `parse_end <= MAX_PARSE_BYTES`
2296    /// still parses from byte 0 even on big files (pre-existing behaviour,
2297    /// addressed in a later phase).
2298    #[test]
2299    fn test_large_file_uses_windowed_parse() {
2300        let registry =
2301            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
2302        let mut engine = HighlightEngine::for_file(Path::new("test.rs"), None, &registry);
2303
2304        // Build content well past MAX_PARSE_BYTES so we can put the viewport
2305        // beyond it.
2306        let line = "fn long_name_for_padding() { let v = 1; v + 1; }\n";
2307        let bytes_needed = MAX_PARSE_BYTES * 2;
2308        let lines_needed = bytes_needed / line.len() + 100;
2309        let mut content = String::with_capacity(lines_needed * line.len());
2310        for _ in 0..lines_needed {
2311            content.push_str(line);
2312        }
2313        assert!(content.len() > MAX_PARSE_BYTES * 2);
2314        let buffer = Buffer::from_str(&content, 0, test_fs());
2315        let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
2316
2317        let HighlightEngine::TextMate(ref mut tm) = engine else {
2318            panic!("expected TextMate engine for .rs");
2319        };
2320
2321        // Viewport past MAX_PARSE_BYTES: parse_end > MAX_PARSE_BYTES, so the
2322        // resume-from-byte-0 fallback in find_parse_resume_point doesn't fire.
2323        let context_bytes = 10_000usize;
2324        let viewport_start = MAX_PARSE_BYTES + 200_000;
2325        let viewport_end = viewport_start + 1000;
2326        let _ = tm.highlight_viewport(&buffer, viewport_start, viewport_end, &theme, context_bytes);
2327        let parsed = tm.stats().bytes_parsed;
2328
2329        // Windowed parse covers viewport ± context_bytes plus a tiny prefix
2330        // for the resume anchor. Allow generous slack (4×) but reject
2331        // anything close to whole-file.
2332        let window = (viewport_end - viewport_start) + 2 * context_bytes;
2333        assert!(
2334            parsed <= window * 4,
2335            "large file windowed parse should be ~{window} bytes, got {parsed} \
2336             (file {})",
2337            buffer.len()
2338        );
2339    }
2340
2341    /// Regression for issue #899: a class field initialised with an arrow
2342    /// function that returns a template literal must not bleed string
2343    /// highlighting onto the rest of the class body. The user-reported
2344    /// repro pinned the syntect JavaScript grammar to a string state from
2345    /// the trailing `;` until EOF; the constructor keyword, comments, and
2346    /// the closing `}` were all painted as a string.
2347    #[test]
2348    fn test_javascript_template_literal_does_not_bleed() {
2349        let registry =
2350            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
2351        let mut engine = HighlightEngine::for_file(Path::new("repro.js"), None, &registry);
2352
2353        // Reproduction code from issue #899.
2354        let source = "class ExampleClass {\n\
2355                      \texampleFunction = exampleArg => `${exampleArg}`;\n\
2356                      \n\
2357                      \tconstructor() {\n\
2358                      \t\t// constructor body\n\
2359                      \t}\n\
2360                      \n\
2361                      \t/* multiline comment */\n\
2362                      }\n";
2363        let buffer = Buffer::from_str(source, 0, test_fs());
2364        let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
2365
2366        let _ = engine.highlight_viewport(&buffer, 0, source.len(), &theme, 0);
2367
2368        // The `constructor` keyword sits well after the template literal.
2369        // If string state bleeds, this position is reported as String.
2370        let ctor_pos = source.find("constructor").expect("locate constructor");
2371        let ctor_cat = engine.category_at_position(ctor_pos);
2372        assert_ne!(
2373            ctor_cat,
2374            Some(HighlightCategory::String),
2375            "constructor keyword must not inherit string state from earlier \
2376             template literal (got {:?})",
2377            ctor_cat,
2378        );
2379
2380        // The closing brace of the class — the very last non-whitespace char
2381        // — also lives outside any string in correct JS.
2382        let last_brace = source.rfind('}').expect("locate closing brace");
2383        let brace_cat = engine.category_at_position(last_brace);
2384        assert_ne!(
2385            brace_cat,
2386            Some(HighlightCategory::String),
2387            "closing class brace must not be highlighted as string \
2388             (got {:?})",
2389            brace_cat,
2390        );
2391    }
2392
2393    /// The closing `}` of a `${…}` template substitution and the closing
2394    /// backtick of the surrounding template literal must keep template
2395    /// string colouring — not inherit the `@variable` highlight from the
2396    /// substitution's expression. Tree-sitter-highlight emits one
2397    /// HighlightEnd event per started highlight; if the editor's
2398    /// span-flattening logic doesn't pop the inner `@variable` correctly
2399    /// when the substitution closes, the variable colour bleeds across
2400    /// `}` and the trailing `\`` until the next sibling capture (here,
2401    /// the `;` operator).
2402    #[test]
2403    fn test_javascript_template_substitution_closing_tokens_are_string() {
2404        let registry =
2405            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
2406        let mut engine = HighlightEngine::for_file(Path::new("tmpl.js"), None, &registry);
2407
2408        // Minimal template literal: `${name}` — wrapped in a statement so
2409        // the parser sees a complete program.
2410        let source = "const x = `${name}`;\n";
2411        let buffer = Buffer::from_str(source, 0, test_fs());
2412        let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
2413
2414        let _ = engine.highlight_viewport(&buffer, 0, source.len(), &theme, 0);
2415
2416        // Locate the closing `}` of the substitution and the closing
2417        // backtick of the template literal.
2418        let close_brace = source
2419            .find("}`")
2420            .expect("locate substitution closing brace");
2421        let close_backtick = close_brace + 1;
2422
2423        // Sanity: the inner identifier `name` is correctly tagged as a
2424        // variable (this guards us against an unrelated regression where
2425        // the entire template gets typed wrong).
2426        let name_pos = source.find("name").expect("locate identifier");
2427        let name_cat = engine.category_at_position(name_pos);
2428        assert_eq!(
2429            name_cat,
2430            Some(HighlightCategory::Variable),
2431            "substitution identifier should be Variable (got {:?})",
2432            name_cat,
2433        );
2434
2435        // The closing `}` and `` ` `` live inside the surrounding
2436        // `template_string` node, so tree-sitter assigns them the
2437        // `@string` capture. They must surface as String here — not
2438        // as Variable (the previous symptom of the bleed) and not as
2439        // None (which would make the editor render them with the
2440        // default foreground colour, equally wrong).
2441        let brace_cat = engine.category_at_position(close_brace);
2442        assert_eq!(
2443            brace_cat,
2444            Some(HighlightCategory::String),
2445            "closing }} of ${{…}} must be String (got {:?})",
2446            brace_cat,
2447        );
2448        let backtick_cat = engine.category_at_position(close_backtick);
2449        assert_eq!(
2450            backtick_cat,
2451            Some(HighlightCategory::String),
2452            "closing backtick of template literal must be String \
2453             (got {:?})",
2454            backtick_cat,
2455        );
2456    }
2457}