Skip to main content

fresh/primitives/
highlight_engine.rs

1//! Unified highlighting engine over syntect (TextMate grammars) and
2//! tree-sitter. Syntect is the default; tree-sitter `Language` is still
3//! detected for non-highlighting features (indentation, semantic highlighting).
4//!
5//! # TextMate cache design
6//!
7//! Syntect's parser is a sequential state machine — it must process bytes
8//! in order from a known parse state to track multi-line constructs and
9//! embedded language transitions. To make scrolling cheap, the engine keeps
10//! a span cache, a `(ParseState, ScopeStack)` snapshot at the cache tail,
11//! and periodic checkpoint anchors to support resume-from-anywhere.
12//!
13//! Three render-time paths, gated by what the cache covers:
14//!
15//! - **Cache hit** — cache fully covers the parse range and there's no
16//!   pending edit; filter cached spans for the viewport. Zero parse work.
17//! - **Forward extension** — cache covers the start of the parse range but
18//!   not its end; resume from `tail_state` and parse only the uncovered
19//!   tail bytes. Steady-state scroll path.
20//! - **Partial update** — there's a pending edit; resume from the nearest
21//!   checkpoint before the dirty point and parse forward looking for
22//!   convergence (state matches an existing checkpoint), bounded by a
23//!   per-pass byte budget so pathological edits can't degenerate into
24//!   whole-file reparses.
25//! - **Cold start / fallback** — no cache, or none of the above applies;
26//!   parse the appropriate range from a fresh state or nearest checkpoint.
27//!
28//! For files at or below `MAX_PARSE_BYTES` the parse range is the whole
29//! file, so the cache is whole-file after the first parse and scrolling
30//! becomes filter-only. Larger files use a viewport-centred window of
31//! `±context_bytes` and rely on the forward-extension path to keep
32//! scroll-cost bounded.
33//!
34//! Edits go through `notify_insert` / `notify_delete`, which shift cached
35//! span byte offsets in place, set `dirty_from`, and invalidate `tail_state`
36//! when the edit lies inside the cached range.
37
38use crate::model::buffer::Buffer;
39use crate::model::marker::{MarkerId, MarkerList};
40use crate::primitives::grammar::GrammarRegistry;
41use crate::primitives::highlighter::{
42    highlight_bg, highlight_color, HighlightCategory, HighlightSpan, Highlighter, Language,
43};
44use crate::view::theme::Theme;
45use std::collections::HashMap;
46use std::ops::Range;
47use std::path::Path;
48use std::sync::Arc;
49use syntect::parsing::SyntaxSet;
50
51/// Map TextMate scope to highlight category
52fn scope_to_category(scope: &str) -> Option<HighlightCategory> {
53    let scope_lower = scope.to_lowercase();
54
55    // Comments - highest priority
56    if scope_lower.starts_with("comment") {
57        return Some(HighlightCategory::Comment);
58    }
59
60    // Strings
61    if scope_lower.starts_with("string") {
62        return Some(HighlightCategory::String);
63    }
64
65    // Markdown/markup scopes - handle before generic keyword/punctuation checks
66    // See: https://macromates.com/manual/en/language_grammars (TextMate scope naming)
67    // Headings: markup.heading and entity.name.section (used by syntect's markdown grammar)
68    if scope_lower.starts_with("markup.heading") || scope_lower.starts_with("entity.name.section") {
69        return Some(HighlightCategory::Keyword); // Headers styled like keywords (bold, prominent)
70    }
71    // Bold: markup.bold
72    if scope_lower.starts_with("markup.bold") {
73        return Some(HighlightCategory::Constant); // Bold styled like constants (bright)
74    }
75    // Italic: markup.italic
76    if scope_lower.starts_with("markup.italic") {
77        return Some(HighlightCategory::Variable); // Italic styled like variables
78    }
79    // Inline code and code blocks: markup.raw, markup.inline.raw
80    if scope_lower.starts_with("markup.raw") || scope_lower.starts_with("markup.inline.raw") {
81        return Some(HighlightCategory::String); // Code styled like strings
82    }
83    // Links: markup.underline.link
84    if scope_lower.starts_with("markup.underline.link") {
85        return Some(HighlightCategory::Function); // Links styled like functions (distinct color)
86    }
87    // Generic underline (often links)
88    if scope_lower.starts_with("markup.underline") {
89        return Some(HighlightCategory::Function);
90    }
91    // Block quotes: markup.quote
92    if scope_lower.starts_with("markup.quote") {
93        return Some(HighlightCategory::Comment); // Quotes styled like comments (subdued)
94    }
95    // Lists: markup.list
96    if scope_lower.starts_with("markup.list") {
97        return Some(HighlightCategory::Operator); // List markers styled like operators
98    }
99    // Strikethrough: markup.strikethrough
100    if scope_lower.starts_with("markup.strikethrough") {
101        return Some(HighlightCategory::Comment); // Strikethrough styled subdued
102    }
103
104    // Diff scopes (syntect's bundled `Diff` grammar). These scope the
105    // entire row, not just the leading +/-/@@ marker, so the renderer
106    // can paint a whole-line background by reading the span's bg.
107    //
108    //   markup.inserted.diff      — `+` line
109    //   markup.deleted.diff       — `-` line
110    //   meta.diff.range.unified   — `@@ ... @@` hunk header
111    //   markup.changed.*          — generic "changed" marker (rare)
112    //   meta.diff.header.*        — `diff --git`, `index ...`, file
113    //                               headers; render like Type so they
114    //                               stand out without a bg wash.
115    if scope_lower.starts_with("markup.inserted") {
116        return Some(HighlightCategory::Inserted);
117    }
118    if scope_lower.starts_with("markup.deleted") {
119        return Some(HighlightCategory::Deleted);
120    }
121    if scope_lower.starts_with("markup.changed") || scope_lower.starts_with("meta.diff.range") {
122        return Some(HighlightCategory::Changed);
123    }
124    if scope_lower.starts_with("meta.diff.header") {
125        return Some(HighlightCategory::Type);
126    }
127
128    // Keywords
129    if scope_lower.starts_with("keyword.control")
130        || scope_lower.starts_with("keyword.other")
131        || scope_lower.starts_with("keyword.declaration")
132        || scope_lower.starts_with("keyword")
133    {
134        // keyword.operator should map to Operator, not Keyword
135        if !scope_lower.starts_with("keyword.operator") {
136            return Some(HighlightCategory::Keyword);
137        }
138    }
139
140    // Punctuation that belongs to a parent construct (comment/string delimiters)
141    // These must be checked before the generic punctuation rule below.
142    // TextMate grammars assign e.g. `punctuation.definition.comment` to # // /* etc.
143    if scope_lower.starts_with("punctuation.definition.comment") {
144        return Some(HighlightCategory::Comment);
145    }
146    if scope_lower.starts_with("punctuation.definition.string") {
147        return Some(HighlightCategory::String);
148    }
149
150    // Operators (keyword.operator only)
151    if scope_lower.starts_with("keyword.operator") {
152        return Some(HighlightCategory::Operator);
153    }
154
155    // Punctuation brackets ({, }, (, ), [, ], <, >)
156    // Covers punctuation.section.*, punctuation.bracket.*,
157    // and punctuation.definition.{array,block,brackets,group,inline-table,section,table,tag}
158    if scope_lower.starts_with("punctuation.section")
159        || scope_lower.starts_with("punctuation.bracket")
160        || scope_lower.starts_with("punctuation.definition.array")
161        || scope_lower.starts_with("punctuation.definition.block")
162        || scope_lower.starts_with("punctuation.definition.brackets")
163        || scope_lower.starts_with("punctuation.definition.group")
164        || scope_lower.starts_with("punctuation.definition.inline-table")
165        || scope_lower.starts_with("punctuation.definition.section")
166        || scope_lower.starts_with("punctuation.definition.table")
167        || scope_lower.starts_with("punctuation.definition.tag")
168    {
169        return Some(HighlightCategory::PunctuationBracket);
170    }
171
172    // Punctuation delimiters (;, ,, .)
173    if scope_lower.starts_with("punctuation.separator")
174        || scope_lower.starts_with("punctuation.terminator")
175        || scope_lower.starts_with("punctuation.accessor")
176    {
177        return Some(HighlightCategory::PunctuationDelimiter);
178    }
179
180    // Functions
181    if scope_lower.starts_with("entity.name.function")
182        || scope_lower.starts_with("support.function")
183        || scope_lower.starts_with("meta.function-call")
184        || scope_lower.starts_with("variable.function")
185    {
186        return Some(HighlightCategory::Function);
187    }
188
189    // Types
190    if scope_lower.starts_with("entity.name.type")
191        || scope_lower.starts_with("entity.name.class")
192        || scope_lower.starts_with("entity.name.struct")
193        || scope_lower.starts_with("entity.name.enum")
194        || scope_lower.starts_with("entity.name.interface")
195        || scope_lower.starts_with("entity.name.trait")
196        || scope_lower.starts_with("support.type")
197        || scope_lower.starts_with("support.class")
198        || scope_lower.starts_with("storage.type")
199    {
200        return Some(HighlightCategory::Type);
201    }
202
203    // Storage modifiers (pub, static, const as keywords)
204    if scope_lower.starts_with("storage.modifier") {
205        return Some(HighlightCategory::Keyword);
206    }
207
208    // Constants and numbers
209    if scope_lower.starts_with("constant.numeric")
210        || scope_lower.starts_with("constant.language.boolean")
211    {
212        return Some(HighlightCategory::Number);
213    }
214    if scope_lower.starts_with("constant") {
215        return Some(HighlightCategory::Constant);
216    }
217
218    // Variables
219    if scope_lower.starts_with("variable.parameter")
220        || scope_lower.starts_with("variable.other")
221        || scope_lower.starts_with("variable.language")
222    {
223        return Some(HighlightCategory::Variable);
224    }
225
226    // Properties / object keys
227    if scope_lower.starts_with("entity.name.tag")
228        || scope_lower.starts_with("support.other.property")
229        || scope_lower.starts_with("meta.object-literal.key")
230        || scope_lower.starts_with("variable.other.property")
231        || scope_lower.starts_with("variable.other.object.property")
232    {
233        return Some(HighlightCategory::Property);
234    }
235
236    // Attributes (decorators, annotations)
237    if scope_lower.starts_with("entity.other.attribute")
238        || scope_lower.starts_with("meta.attribute")
239        || scope_lower.starts_with("entity.name.decorator")
240    {
241        return Some(HighlightCategory::Attribute);
242    }
243
244    // Generic variable fallback
245    if scope_lower.starts_with("variable") {
246        return Some(HighlightCategory::Variable);
247    }
248
249    None
250}
251
252/// Unified highlighting engine supporting multiple backends
253#[derive(Default)]
254pub enum HighlightEngine {
255    /// Tree-sitter based highlighting (built-in languages)
256    TreeSitter(Box<Highlighter>),
257    /// TextMate grammar based highlighting
258    TextMate(Box<TextMateEngine>),
259    /// No highlighting available
260    #[default]
261    None,
262}
263
264/// TextMate highlighting engine. See module docs for the cache design.
265pub struct TextMateEngine {
266    syntax_set: Arc<SyntaxSet>,
267    syntax_index: usize,
268    checkpoint_markers: MarkerList,
269    checkpoint_states:
270        HashMap<MarkerId, (syntect::parsing::ParseState, syntect::parsing::ScopeStack)>,
271    dirty_from: Option<usize>,
272    cache: Option<TextMateCache>,
273    last_buffer_len: usize,
274    ts_language: Option<Language>,
275    stats: HighlightStats,
276    // Scope→Category memo. Syntect Scope atoms are append-only-interned
277    // globally, so entries never need invalidation.
278    scope_category_cache: HashMap<syntect::parsing::Scope, Option<HighlightCategory>>,
279}
280
281/// Counters for monitoring highlighting performance in tests.
282#[derive(Debug, Default, Clone)]
283pub struct HighlightStats {
284    /// Number of bytes parsed by syntect (total across all highlight_viewport calls).
285    pub bytes_parsed: usize,
286    /// Number of highlight_viewport calls that hit the span cache.
287    pub cache_hits: usize,
288    /// Number of highlight_viewport calls that missed the cache and re-parsed.
289    pub cache_misses: usize,
290    /// Number of checkpoint states updated during convergence.
291    pub checkpoints_updated: usize,
292    /// Number of times convergence was detected (state matched existing checkpoint).
293    pub convergences: usize,
294}
295
296#[derive(Debug, Clone)]
297struct TextMateCache {
298    range: Range<usize>,
299    spans: Vec<CachedSpan>,
300    // Parse state at `range.end`; powers forward extension. None when the
301    // last mutation didn't end at `range.end`.
302    tail_state: Option<(syntect::parsing::ParseState, syntect::parsing::ScopeStack)>,
303}
304
305#[derive(Debug, Clone)]
306struct CachedSpan {
307    range: Range<usize>,
308    category: crate::primitives::highlighter::HighlightCategory,
309}
310
311/// Small/large file threshold (whole-file cache vs viewport window).
312const MAX_PARSE_BYTES: usize = 1024 * 1024;
313
314/// Distance between checkpoint anchors. Smaller = faster convergence on edit.
315const CHECKPOINT_INTERVAL: usize = 256;
316
317/// Per-pass cap on partial-update parsing past `dirty_pos`. Bounds work for
318/// pathological edits whose effect doesn't converge.
319const CONVERGENCE_BUDGET: usize = 64 * 1024;
320
321/// Byte position one past the end of the line that starts at `pos`.
322/// Accepts `\n` and `\r\n` terminators; returns `content_bytes.len()`
323/// when the buffer ends without a terminator (the streaming tail).
324fn find_line_end(content_bytes: &[u8], pos: usize) -> usize {
325    let mut line_end = pos;
326    while line_end < content_bytes.len() {
327        if content_bytes[line_end] == b'\n' {
328            line_end += 1;
329            break;
330        } else if content_bytes[line_end] == b'\r' {
331            if line_end + 1 < content_bytes.len() && content_bytes[line_end + 1] == b'\n' {
332                line_end += 2;
333            } else {
334                line_end += 1;
335            }
336            break;
337        }
338        line_end += 1;
339    }
340    line_end
341}
342
343/// UTF-8-decoded line ready to feed `state.parse_line`.
344struct PreparedLine {
345    /// What `parse_line` sees: line content always terminated by `\n`,
346    /// EXCEPT for the buffer's final partial line where no `\n` has
347    /// arrived yet (caller must not commit cache state past such a
348    /// line — see `extend_cache_forward`).
349    line_for_syntect: String,
350    /// Byte length of the line excluding `\r` / `\n` terminator.
351    line_content_len: usize,
352    /// Whether the original line ended with `\n` (true for every line
353    /// except the streaming tail).
354    ends_with_newline: bool,
355}
356
357/// Slice the line starting at `pos` from `content_bytes` and prepare
358/// it for `parse_line`. Returns `(line_end, line_byte_len, prepared)`:
359/// callers always advance by `line_byte_len` to `line_end`; `prepared`
360/// is `None` only when the line wasn't valid UTF-8 (skip & continue).
361fn prepare_line_at(content_bytes: &[u8], pos: usize) -> (usize, usize, Option<PreparedLine>) {
362    let line_end = find_line_end(content_bytes, pos);
363    let line_bytes = &content_bytes[pos..line_end];
364    let line_byte_len = line_bytes.len();
365    let prepared = std::str::from_utf8(line_bytes).ok().map(|line_str| {
366        let line_content = line_str.trim_end_matches(&['\r', '\n'][..]);
367        let ends_with_newline = line_str.ends_with('\n');
368        let is_streaming_tail = line_end == content_bytes.len() && !ends_with_newline;
369        let line_for_syntect = if is_streaming_tail {
370            line_content.to_string()
371        } else {
372            format!("{}\n", line_content)
373        };
374        PreparedLine {
375            line_for_syntect,
376            line_content_len: line_content.len(),
377            ends_with_newline,
378        }
379    });
380    (line_end, line_byte_len, prepared)
381}
382
383impl TextMateEngine {
384    /// Create a new TextMate engine for the given syntax
385    pub fn new(syntax_set: Arc<SyntaxSet>, syntax_index: usize) -> Self {
386        Self {
387            syntax_set,
388            syntax_index,
389            checkpoint_markers: MarkerList::new(),
390            checkpoint_states: HashMap::new(),
391            dirty_from: None,
392            cache: None,
393            last_buffer_len: 0,
394            ts_language: None,
395            stats: HighlightStats::default(),
396            scope_category_cache: HashMap::new(),
397        }
398    }
399
400    /// Create a new TextMate engine with a tree-sitter language for non-highlighting features
401    pub fn with_language(
402        syntax_set: Arc<SyntaxSet>,
403        syntax_index: usize,
404        ts_language: Option<Language>,
405    ) -> Self {
406        Self {
407            syntax_set,
408            syntax_index,
409            checkpoint_markers: MarkerList::new(),
410            checkpoint_states: HashMap::new(),
411            dirty_from: None,
412            cache: None,
413            last_buffer_len: 0,
414            ts_language,
415            stats: HighlightStats::default(),
416            scope_category_cache: HashMap::new(),
417        }
418    }
419
420    /// Get performance stats for testing and diagnostics.
421    pub fn stats(&self) -> &HighlightStats {
422        &self.stats
423    }
424
425    /// Reset performance counters.
426    pub fn reset_stats(&mut self) {
427        self.stats = HighlightStats::default();
428    }
429
430    /// Get the tree-sitter language (for indentation, semantic highlighting, etc.)
431    pub fn language(&self) -> Option<&Language> {
432        self.ts_language.as_ref()
433    }
434
435    /// Buffer-insert notification. Shifts span offsets in place and marks
436    /// the cache dirty so the partial-update path runs on next render.
437    pub fn notify_insert(&mut self, position: usize, length: usize) {
438        self.checkpoint_markers.adjust_for_insert(position, length);
439        self.dirty_from = Some(self.dirty_from.map_or(position, |d| d.min(position)));
440        if let Some(cache) = &mut self.cache {
441            for span in &mut cache.spans {
442                if span.range.start >= position {
443                    span.range.start += length;
444                    span.range.end += length;
445                } else if span.range.end > position {
446                    span.range.end += length;
447                }
448            }
449            if cache.range.end >= position {
450                cache.range.end += length;
451                if position < cache.range.end {
452                    cache.tail_state = None;
453                }
454            }
455        }
456    }
457
458    /// Buffer-delete notification. Mirror of `notify_insert`.
459    pub fn notify_delete(&mut self, position: usize, length: usize) {
460        self.checkpoint_markers.adjust_for_delete(position, length);
461        self.dirty_from = Some(self.dirty_from.map_or(position, |d| d.min(position)));
462        if let Some(cache) = &mut self.cache {
463            let delete_end = position + length;
464            cache.spans.retain_mut(|span| {
465                if span.range.start >= delete_end {
466                    span.range.start -= length;
467                    span.range.end -= length;
468                    true
469                } else if span.range.end <= position {
470                    true
471                } else if span.range.start >= position && span.range.end <= delete_end {
472                    false
473                } else {
474                    if span.range.start < position {
475                        span.range.end = position.min(span.range.end);
476                    } else {
477                        span.range.start = position;
478                        span.range.end = position + span.range.end.saturating_sub(delete_end);
479                    }
480                    span.range.start < span.range.end
481                }
482            });
483            if cache.range.end > delete_end {
484                cache.range.end -= length;
485            } else if cache.range.end > position {
486                cache.range.end = position;
487            }
488            if position < cache.range.end {
489                cache.tail_state = None;
490            }
491        }
492    }
493
494    /// Create a checkpoint at `current_offset` carrying the supplied
495    /// parse state, unless one already exists within half an interval
496    /// (which would shadow it). Callers gate on
497    /// `bytes_since_checkpoint >= CHECKPOINT_INTERVAL` to control
498    /// spacing.
499    fn maybe_create_checkpoint(
500        &mut self,
501        current_offset: usize,
502        state: &syntect::parsing::ParseState,
503        current_scopes: &syntect::parsing::ScopeStack,
504    ) {
505        let nearby = self.checkpoint_markers.query_range(
506            current_offset.saturating_sub(CHECKPOINT_INTERVAL / 2),
507            current_offset + CHECKPOINT_INTERVAL / 2,
508        );
509        if nearby.is_empty() {
510            let marker_id = self.checkpoint_markers.create(current_offset, true);
511            self.checkpoint_states
512                .insert(marker_id, (state.clone(), current_scopes.clone()));
513        }
514    }
515
516    /// Drive `state.parse_line(prepared.line_for_syntect)` and emit one
517    /// span per category-carrying byte range via `on_span(start, end,
518    /// category)`. Returns `false` when `parse_line` errored — caller
519    /// should advance past the line and continue (state may have been
520    /// mutated mid-parse but is left as-is, matching prior behaviour).
521    ///
522    /// Span emission is in two passes: the op iterator emits the
523    /// segment between consecutive ops with the scope-stack-active
524    /// category, and a trailing segment covers `[syntect_offset,
525    /// line_content_len)` when the final op didn't reach end-of-line.
526    fn parse_line_into_spans(
527        &mut self,
528        state: &mut syntect::parsing::ParseState,
529        current_scopes: &mut syntect::parsing::ScopeStack,
530        prepared: &PreparedLine,
531        current_offset: usize,
532        mut on_span: impl FnMut(usize, usize, HighlightCategory),
533    ) -> bool {
534        let ops = match state.parse_line(&prepared.line_for_syntect, &self.syntax_set) {
535            Ok(ops) => ops,
536            Err(_) => return false,
537        };
538
539        let line_content_len = prepared.line_content_len;
540        let mut syntect_offset = 0;
541
542        for (op_offset, op) in ops {
543            let clamped_op_offset = op_offset.min(line_content_len);
544            if clamped_op_offset > syntect_offset {
545                if let Some(category) = self.scope_stack_to_category(current_scopes) {
546                    on_span(
547                        current_offset + syntect_offset,
548                        current_offset + clamped_op_offset,
549                        category,
550                    );
551                }
552            }
553            syntect_offset = clamped_op_offset;
554            #[allow(clippy::let_underscore_must_use)]
555            let _ = current_scopes.apply(&op);
556        }
557
558        if syntect_offset < line_content_len {
559            if let Some(category) = self.scope_stack_to_category(current_scopes) {
560                on_span(
561                    current_offset + syntect_offset,
562                    current_offset + line_content_len,
563                    category,
564                );
565            }
566        }
567        true
568    }
569
570    /// Highlight the visible viewport. Path selection is documented in the
571    /// module-level docs ("TextMate cache design").
572    /// Test-only: inspect the cache commit point (range.end) and
573    /// whether tail_state is populated. The cache must commit at a
574    /// newline boundary — anything past that risks streaming
575    /// forward-extension picking up where partial-line state poisoned
576    /// the parser, see `test_partial_trailing_line_not_committed_to_cache`.
577    #[cfg(test)]
578    pub fn cache_commit_for_test(&self) -> (usize, bool) {
579        match &self.cache {
580            Some(c) => (c.range.end, c.tail_state.is_some()),
581            None => (0, false),
582        }
583    }
584
585    pub fn highlight_viewport(
586        &mut self,
587        buffer: &Buffer,
588        viewport_start: usize,
589        viewport_end: usize,
590        theme: &Theme,
591        context_bytes: usize,
592    ) -> Vec<HighlightSpan> {
593        let buf_len = buffer.len();
594        let (desired_parse_start, parse_end) = if buf_len <= MAX_PARSE_BYTES {
595            (0, buf_len)
596        } else {
597            let s = viewport_start.saturating_sub(context_bytes);
598            let e = (viewport_end + context_bytes).min(buf_len);
599            (s, e)
600        };
601
602        let dirty = self.dirty_from.take();
603        let cache_covers_viewport = self.cache.as_ref().is_some_and(|c| {
604            c.range.start <= desired_parse_start && c.range.end >= desired_parse_start
605        });
606        let exact_cache_hit = cache_covers_viewport
607            && dirty.is_none()
608            && self.last_buffer_len == buffer.len()
609            && self
610                .cache
611                .as_ref()
612                .is_some_and(|c| c.range.end >= parse_end);
613
614        // Cache hit.
615        if exact_cache_hit {
616            self.stats.cache_hits += 1;
617            return self.filter_cached_spans(viewport_start, viewport_end, theme);
618        }
619
620        // Forward extension.
621        if dirty.is_none()
622            && cache_covers_viewport
623            && self.last_buffer_len == buffer.len()
624            && self
625                .cache
626                .as_ref()
627                .is_some_and(|c| c.range.end < parse_end && c.tail_state.is_some())
628        {
629            return self.extend_cache_forward(
630                buffer,
631                parse_end,
632                viewport_start,
633                viewport_end,
634                theme,
635            );
636        }
637
638        // Partial update.
639        if cache_covers_viewport && dirty.is_some() {
640            if let Some(dirty_pos) = dirty {
641                if dirty_pos < parse_end {
642                    if let Some(result) = self.try_partial_update(
643                        buffer,
644                        dirty_pos,
645                        desired_parse_start,
646                        parse_end,
647                        viewport_start,
648                        viewport_end,
649                        theme,
650                    ) {
651                        return result;
652                    }
653                } else {
654                    // Dirty region past viewport: cached spans are still valid.
655                    self.dirty_from = Some(dirty_pos);
656                    self.stats.cache_hits += 1;
657                    return self.filter_cached_spans(viewport_start, viewport_end, theme);
658                }
659            }
660        } else if let Some(d) = dirty {
661            self.dirty_from = Some(d);
662        }
663
664        // Cold start / fallback.
665        self.full_parse(
666            buffer,
667            desired_parse_start,
668            parse_end,
669            viewport_start,
670            viewport_end,
671            theme,
672            context_bytes,
673        )
674    }
675
676    /// Filter cached spans for the viewport and resolve colors.
677    fn filter_cached_spans(
678        &self,
679        viewport_start: usize,
680        viewport_end: usize,
681        theme: &Theme,
682    ) -> Vec<HighlightSpan> {
683        let cache = self.cache.as_ref().unwrap();
684        cache
685            .spans
686            .iter()
687            .filter(|span| span.range.start < viewport_end && span.range.end > viewport_start)
688            .map(|span| HighlightSpan {
689                range: span.range.clone(),
690                color: highlight_color(span.category, theme),
691                bg: highlight_bg(span.category, theme),
692                category: Some(span.category),
693            })
694            .collect()
695    }
696
697    /// Partial update path. Returns `Some` whenever an anchor was available,
698    /// even on budget hit or EOF (see post-loop classification). `None` only
699    /// when no checkpoint anchor reaches the dirty point.
700    #[allow(clippy::too_many_arguments)]
701    fn try_partial_update(
702        &mut self,
703        buffer: &Buffer,
704        dirty_pos: usize,
705        desired_parse_start: usize,
706        parse_end: usize,
707        viewport_start: usize,
708        viewport_end: usize,
709        theme: &Theme,
710    ) -> Option<Vec<HighlightSpan>> {
711        let syntax = &self.syntax_set.syntaxes()[self.syntax_index];
712
713        // Find checkpoint before the dirty point (bounded search)
714        let (actual_start, mut state, mut current_scopes) = {
715            let search_start = dirty_pos.saturating_sub(MAX_PARSE_BYTES);
716            let markers = self.checkpoint_markers.query_range(search_start, dirty_pos);
717            let nearest = markers.into_iter().max_by_key(|(_, start, _)| *start);
718            if let Some((id, cp_pos, _)) = nearest {
719                if let Some((s, sc)) = self.checkpoint_states.get(&id) {
720                    (cp_pos, s.clone(), sc.clone())
721                } else {
722                    return None; // orphan, fall back
723                }
724            } else if parse_end <= MAX_PARSE_BYTES {
725                (
726                    0,
727                    syntect::parsing::ParseState::new(syntax),
728                    syntect::parsing::ScopeStack::new(),
729                )
730            } else {
731                return None; // large file, no nearby checkpoint, fall back
732            }
733        };
734
735        // Get markers from dirty point forward for convergence checking
736        let mut markers_ahead: Vec<(MarkerId, usize)> = self
737            .checkpoint_markers
738            .query_range(dirty_pos, parse_end)
739            .into_iter()
740            .map(|(id, start, _)| (id, start))
741            .collect();
742        markers_ahead.sort_by_key(|(_, pos)| *pos);
743        let mut marker_idx = 0;
744
745        // Parse from actual_start to parse_end, looking for convergence
746        let content_end = parse_end.min(buffer.len());
747        if actual_start >= content_end {
748            return None;
749        }
750        let content = buffer.slice_bytes(actual_start..content_end);
751        let content_str = match std::str::from_utf8(&content) {
752            Ok(s) => s,
753            Err(_) => return None,
754        };
755
756        let mut new_spans = Vec::new();
757        let content_bytes = content_str.as_bytes();
758        let mut pos = 0;
759        let mut current_offset = actual_start;
760        let mut converged_at: Option<usize> = None;
761        let mut budget_hit_at: Option<usize> = None;
762        let mut bytes_since_checkpoint: usize = 0;
763
764        while pos < content_bytes.len() {
765            // Create checkpoints in new territory
766            if bytes_since_checkpoint >= CHECKPOINT_INTERVAL {
767                self.maybe_create_checkpoint(current_offset, &state, &current_scopes);
768                bytes_since_checkpoint = 0;
769            }
770
771            let (line_end, line_byte_len, prepared) = prepare_line_at(content_bytes, pos);
772            // Collect spans for the dirty region
773            let collect_spans =
774                current_offset + line_byte_len > desired_parse_start.max(actual_start);
775            if let Some(prepared) = prepared {
776                let _ = self.parse_line_into_spans(
777                    &mut state,
778                    &mut current_scopes,
779                    &prepared,
780                    current_offset,
781                    |byte_start, byte_end, category| {
782                        if !collect_spans {
783                            return;
784                        }
785                        let clamped_start = byte_start.max(actual_start);
786                        if clamped_start < byte_end {
787                            new_spans.push(CachedSpan {
788                                range: clamped_start..byte_end,
789                                category,
790                            });
791                        }
792                    },
793                );
794            }
795
796            pos = line_end;
797            current_offset += line_byte_len;
798            bytes_since_checkpoint += line_byte_len;
799
800            // Check convergence at checkpoint markers
801            while marker_idx < markers_ahead.len() && markers_ahead[marker_idx].1 <= current_offset
802            {
803                let (marker_id, _) = markers_ahead[marker_idx];
804                marker_idx += 1;
805                if let Some(stored) = self.checkpoint_states.get(&marker_id) {
806                    if *stored == (state.clone(), current_scopes.clone()) {
807                        self.stats.convergences += 1;
808                        converged_at = Some(current_offset);
809                        break;
810                    }
811                }
812                self.stats.checkpoints_updated += 1;
813                self.checkpoint_states
814                    .insert(marker_id, (state.clone(), current_scopes.clone()));
815            }
816
817            if converged_at.is_some() {
818                break;
819            }
820
821            // Bound work per pass: pathological edits (e.g. unclosed `/*`
822            // re-scoping the rest of the file) can never converge. Stop here
823            // and resume from `current_offset` on the next render.
824            if current_offset.saturating_sub(dirty_pos) >= CONVERGENCE_BUDGET {
825                budget_hit_at = Some(current_offset);
826                break;
827            }
828        }
829
830        self.stats.bytes_parsed += current_offset.saturating_sub(actual_start);
831
832        // Splice classification: converged → clear dirty; budget hit → keep
833        // dirty for next pass; EOF → clear dirty.
834        let (splice_end, dirty_after) = if let Some(c) = converged_at {
835            (c, None)
836        } else if let Some(b) = budget_hit_at {
837            (b, Some(b))
838        } else {
839            (current_offset, None)
840        };
841
842        self.stats.cache_misses += 1; // partial update counts as a miss
843
844        Self::merge_adjacent_spans(&mut new_spans);
845
846        if let Some(cache) = &mut self.cache {
847            let splice_start = actual_start;
848            cache
849                .spans
850                .retain(|span| span.range.end <= splice_start || span.range.start >= splice_end);
851            cache.spans.extend(new_spans);
852            cache.spans.sort_by_key(|s| s.range.start);
853            Self::merge_adjacent_spans(&mut cache.spans);
854            if splice_end > cache.range.end {
855                cache.range.end = splice_end;
856            }
857            cache.tail_state = None;
858        }
859
860        self.last_buffer_len = buffer.len();
861        self.dirty_from = dirty_after;
862
863        Some(self.filter_cached_spans(viewport_start, viewport_end, theme))
864    }
865
866    /// Forward extension path (see module docs). Caller checks the cache
867    /// exists, has a `tail_state`, has no dirty edits, and `cache.range.end
868    /// < parse_end`.
869    fn extend_cache_forward(
870        &mut self,
871        buffer: &Buffer,
872        parse_end: usize,
873        viewport_start: usize,
874        viewport_end: usize,
875        theme: &Theme,
876    ) -> Vec<HighlightSpan> {
877        self.stats.cache_misses += 1;
878        let buf_len = buffer.len();
879        let parse_end = parse_end.min(buf_len);
880
881        let (extension_start, mut state, mut current_scopes) = {
882            let cache = self
883                .cache
884                .as_ref()
885                .expect("extend_cache_forward: cache must exist");
886            let (s, sc) = cache
887                .tail_state
888                .as_ref()
889                .expect("extend_cache_forward: tail_state must exist")
890                .clone();
891            (cache.range.end, s, sc)
892        };
893
894        if parse_end <= extension_start {
895            return self.filter_cached_spans(viewport_start, viewport_end, theme);
896        }
897
898        let content = buffer.slice_bytes(extension_start..parse_end);
899        let content_str = match std::str::from_utf8(&content) {
900            Ok(s) => s,
901            Err(_) => return self.filter_cached_spans(viewport_start, viewport_end, theme),
902        };
903
904        let mut new_spans = Vec::new();
905        let content_bytes = content_str.as_bytes();
906        let mut pos = 0;
907        let mut current_offset = extension_start;
908        let mut bytes_since_checkpoint: usize = 0;
909        // Snapshot of the last newline-aligned cache commit point. We never
910        // commit parse state for a partial trailing line: with a streaming
911        // grammar like syntect's `Diff` (line-anchored `^\+.*` etc.) the
912        // state at end-of-input has already popped `markup.inserted`, so
913        // resuming from there parses the rest of the same line in
914        // `source.diff` with no scope — bytes of the line streamed in
915        // later get default editor bg, producing the dark-bar artifact
916        // inside `+` lines. Re-parsing the trailing partial line on every
917        // refresh costs at most one extra `parse_line` and is correct.
918        let mut safe_offset = extension_start;
919        let mut safe_state = state.clone();
920        let mut safe_scopes = current_scopes.clone();
921
922        while pos < content_bytes.len() {
923            if bytes_since_checkpoint >= CHECKPOINT_INTERVAL {
924                self.maybe_create_checkpoint(current_offset, &state, &current_scopes);
925                bytes_since_checkpoint = 0;
926            }
927
928            let (line_end, line_byte_len, prepared) = prepare_line_at(content_bytes, pos);
929            let mut newline_terminated = false;
930            if let Some(prepared) = prepared {
931                let parse_ok = self.parse_line_into_spans(
932                    &mut state,
933                    &mut current_scopes,
934                    &prepared,
935                    current_offset,
936                    |byte_start, byte_end, category| {
937                        new_spans.push(CachedSpan {
938                            range: byte_start..byte_end,
939                            category,
940                        });
941                    },
942                );
943                if parse_ok {
944                    newline_terminated = prepared.ends_with_newline;
945                }
946            }
947
948            pos = line_end;
949            current_offset += line_byte_len;
950            bytes_since_checkpoint += line_byte_len;
951
952            if newline_terminated {
953                safe_offset = current_offset;
954                safe_state = state.clone();
955                safe_scopes = current_scopes.clone();
956            }
957        }
958
959        self.stats.bytes_parsed += parse_end - extension_start;
960
961        Self::merge_adjacent_spans(&mut new_spans);
962
963        // Split spans into safe (fully before the trailing partial line,
964        // cacheable) and unsafe (overlap the partial line, render-only).
965        // Unsafe spans are returned in this pass so the partial line is
966        // still highlighted, but won't be cached — they'll be recomputed
967        // on the next refresh once more bytes (and a newline) stream in.
968        let (safe_spans, unsafe_spans): (Vec<_>, Vec<_>) = new_spans
969            .into_iter()
970            .partition(|s| s.range.end <= safe_offset);
971
972        let cache = self
973            .cache
974            .as_mut()
975            .expect("extend_cache_forward: cache must still exist");
976        cache.spans.extend(safe_spans);
977        Self::merge_adjacent_spans(&mut cache.spans);
978        cache.range.end = safe_offset;
979        cache.tail_state = Some((safe_state, safe_scopes));
980        self.last_buffer_len = buf_len;
981
982        let mut result = self.filter_cached_spans(viewport_start, viewport_end, theme);
983        result.extend(
984            unsafe_spans
985                .into_iter()
986                .filter(|s| s.range.start < viewport_end && s.range.end > viewport_start)
987                .map(|s| HighlightSpan {
988                    range: s.range,
989                    color: highlight_color(s.category, theme),
990                    bg: highlight_bg(s.category, theme),
991                    category: Some(s.category),
992                }),
993        );
994        result
995    }
996
997    /// Full re-parse from desired_parse_start to parse_end. Used on cold start
998    /// or when partial update fails (no convergence).
999    #[allow(clippy::too_many_arguments)]
1000    fn full_parse(
1001        &mut self,
1002        buffer: &Buffer,
1003        desired_parse_start: usize,
1004        parse_end: usize,
1005        viewport_start: usize,
1006        viewport_end: usize,
1007        theme: &Theme,
1008        _context_bytes: usize,
1009    ) -> Vec<HighlightSpan> {
1010        self.stats.cache_misses += 1;
1011        self.dirty_from = None; // consumed
1012
1013        if parse_end <= desired_parse_start {
1014            return Vec::new();
1015        }
1016
1017        let syntax = &self.syntax_set.syntaxes()[self.syntax_index];
1018        let (actual_start, mut state, mut current_scopes, create_checkpoints) =
1019            self.find_parse_resume_point(desired_parse_start, parse_end, syntax);
1020
1021        let content = buffer.slice_bytes(actual_start..parse_end);
1022        let content_str = match std::str::from_utf8(&content) {
1023            Ok(s) => s,
1024            Err(_) => return Vec::new(),
1025        };
1026
1027        let mut spans = Vec::new();
1028        let content_bytes = content_str.as_bytes();
1029        let mut pos = 0;
1030        let mut current_offset = actual_start;
1031        let mut bytes_since_checkpoint: usize = 0;
1032        // See `extend_cache_forward` for rationale: never commit cache
1033        // state past the last newline. `safe_offset` ends up == parse_end
1034        // for buffers that end on `\n` (no behaviour change), and at the
1035        // start of the trailing partial line otherwise so the next
1036        // refresh re-parses it from scratch.
1037        let mut safe_offset = actual_start;
1038        let mut safe_state = state.clone();
1039        let mut safe_scopes = current_scopes.clone();
1040
1041        while pos < content_bytes.len() {
1042            if create_checkpoints && bytes_since_checkpoint >= CHECKPOINT_INTERVAL {
1043                self.maybe_create_checkpoint(current_offset, &state, &current_scopes);
1044                bytes_since_checkpoint = 0;
1045            }
1046
1047            let (line_end, line_byte_len, prepared) = prepare_line_at(content_bytes, pos);
1048            // Skip span collection for lines that ended before the viewport's
1049            // desired_parse_start — we still need to drive `parse_line` for
1050            // state continuity, but their spans wouldn't be returned anyway.
1051            let collect_spans = current_offset + line_byte_len > desired_parse_start;
1052            let mut newline_terminated = false;
1053            if let Some(prepared) = prepared {
1054                let parse_ok = self.parse_line_into_spans(
1055                    &mut state,
1056                    &mut current_scopes,
1057                    &prepared,
1058                    current_offset,
1059                    |byte_start, byte_end, category| {
1060                        if !collect_spans {
1061                            return;
1062                        }
1063                        let clamped_start = byte_start.max(desired_parse_start);
1064                        if clamped_start < byte_end {
1065                            spans.push(CachedSpan {
1066                                range: clamped_start..byte_end,
1067                                category,
1068                            });
1069                        }
1070                    },
1071                );
1072                if parse_ok {
1073                    newline_terminated = prepared.ends_with_newline;
1074                }
1075            }
1076
1077            pos = line_end;
1078            current_offset += line_byte_len;
1079            bytes_since_checkpoint += line_byte_len;
1080
1081            if newline_terminated {
1082                safe_offset = current_offset;
1083                safe_state = state.clone();
1084                safe_scopes = current_scopes.clone();
1085            }
1086
1087            // Update checkpoint states as we pass them. Done after the
1088            // line is parsed (state now reflects end-of-line) so a
1089            // checkpoint placed at the line's start position carries
1090            // the state at that position, ready to feed the next line.
1091            let markers_here: Vec<(MarkerId, usize)> = self
1092                .checkpoint_markers
1093                .query_range(current_offset.saturating_sub(line_byte_len), current_offset)
1094                .into_iter()
1095                .map(|(id, start, _)| (id, start))
1096                .collect();
1097            for (marker_id, _) in markers_here {
1098                self.checkpoint_states
1099                    .insert(marker_id, (state.clone(), current_scopes.clone()));
1100            }
1101        }
1102
1103        self.stats.bytes_parsed += parse_end.saturating_sub(actual_start);
1104
1105        Self::merge_adjacent_spans(&mut spans);
1106
1107        // Cache only the prefix up to the last newline. Spans straddling
1108        // or past the trailing partial line are returned for THIS render
1109        // pass (so the partial line is highlighted now), but excluded
1110        // from the cache — the next refresh re-parses them from
1111        // `safe_state` once more bytes have streamed in.
1112        let cache_range_end = safe_offset.max(desired_parse_start);
1113        let cached_spans: Vec<CachedSpan> = spans
1114            .iter()
1115            .filter(|s| s.range.end <= cache_range_end)
1116            .cloned()
1117            .collect();
1118
1119        self.cache = Some(TextMateCache {
1120            range: desired_parse_start..cache_range_end,
1121            spans: cached_spans,
1122            tail_state: Some((safe_state, safe_scopes)),
1123        });
1124        self.last_buffer_len = buffer.len();
1125
1126        spans
1127            .into_iter()
1128            .filter(|span| span.range.start < viewport_end && span.range.end > viewport_start)
1129            .map(|span| {
1130                let cat = span.category;
1131                HighlightSpan {
1132                    range: span.range,
1133                    color: highlight_color(cat, theme),
1134                    bg: highlight_bg(cat, theme),
1135                    category: Some(cat),
1136                }
1137            })
1138            .collect()
1139    }
1140
1141    /// Find the best point to resume parsing from for the viewport.
1142    fn find_parse_resume_point(
1143        &self,
1144        desired_start: usize,
1145        parse_end: usize,
1146        syntax: &syntect::parsing::SyntaxReference,
1147    ) -> (
1148        usize,
1149        syntect::parsing::ParseState,
1150        syntect::parsing::ScopeStack,
1151        bool,
1152    ) {
1153        use syntect::parsing::{ParseState, ScopeStack};
1154
1155        // Look for a checkpoint near the desired start. For large files, only
1156        // consider checkpoints that are within MAX_PARSE_BYTES of desired_start
1157        // to avoid parsing hundreds of MB from a distant checkpoint.
1158        let search_start = desired_start.saturating_sub(MAX_PARSE_BYTES);
1159        let markers = self
1160            .checkpoint_markers
1161            .query_range(search_start, desired_start + 1);
1162        let nearest = markers.into_iter().max_by_key(|(_, start, _)| *start);
1163
1164        if let Some((id, cp_pos, _)) = nearest {
1165            if let Some((s, sc)) = self.checkpoint_states.get(&id) {
1166                return (cp_pos, s.clone(), sc.clone(), true);
1167            }
1168        }
1169
1170        if parse_end <= MAX_PARSE_BYTES {
1171            // File is small enough to parse from byte 0
1172            (0, ParseState::new(syntax), ScopeStack::new(), true)
1173        } else {
1174            // Large file, no nearby checkpoint — start fresh from desired_start.
1175            // Still create checkpoints so future visits to this region can resume.
1176            (
1177                desired_start,
1178                ParseState::new(syntax),
1179                ScopeStack::new(),
1180                true,
1181            )
1182        }
1183    }
1184
1185    /// Map scope stack to highlight category, memoising per-scope lookups.
1186    /// `scope.build_string()` is the costly step; the cache hides it after
1187    /// each scope atom has been seen once.
1188    fn scope_stack_to_category(
1189        &mut self,
1190        scopes: &syntect::parsing::ScopeStack,
1191    ) -> Option<HighlightCategory> {
1192        for scope in scopes.as_slice().iter().rev() {
1193            let cat = match self.scope_category_cache.get(scope) {
1194                Some(c) => *c,
1195                None => {
1196                    let computed = scope_to_category(&scope.build_string());
1197                    self.scope_category_cache.insert(*scope, computed);
1198                    computed
1199                }
1200            };
1201            if let Some(c) = cat {
1202                return Some(c);
1203            }
1204        }
1205        None
1206    }
1207
1208    /// Merge adjacent spans with same category
1209    fn merge_adjacent_spans(spans: &mut Vec<CachedSpan>) {
1210        if spans.len() < 2 {
1211            return;
1212        }
1213
1214        let mut write_idx = 0;
1215        for read_idx in 1..spans.len() {
1216            if spans[write_idx].category == spans[read_idx].category
1217                && spans[write_idx].range.end == spans[read_idx].range.start
1218            {
1219                spans[write_idx].range.end = spans[read_idx].range.end;
1220            } else {
1221                write_idx += 1;
1222                if write_idx != read_idx {
1223                    spans[write_idx] = spans[read_idx].clone();
1224                }
1225            }
1226        }
1227        spans.truncate(write_idx + 1);
1228    }
1229
1230    /// Invalidate span cache for an edited range.
1231    /// Checkpoint positions are handled by notify_insert/notify_delete.
1232    /// The span cache is NOT cleared here — it will be patched (partial update)
1233    /// during the next highlight_viewport call using convergence. Only dirty_from
1234    /// (set by notify_insert/notify_delete) controls re-parsing scope.
1235    pub fn invalidate_range(&mut self, _edit_range: Range<usize>) {
1236        // Intentionally does NOT clear self.cache.
1237        // The cache will be partially updated in highlight_viewport when
1238        // dirty_from is set. This avoids full re-parses for small edits.
1239    }
1240
1241    /// Invalidate all cache and checkpoints (file reload, language change, etc.)
1242    pub fn invalidate_all(&mut self) {
1243        self.cache = None;
1244        let ids: Vec<MarkerId> = self.checkpoint_states.keys().copied().collect();
1245        for id in ids {
1246            self.checkpoint_markers.delete(id);
1247        }
1248        self.checkpoint_states.clear();
1249        self.dirty_from = None;
1250    }
1251
1252    /// Get the highlight category at a byte position from the cache.
1253    ///
1254    /// Returns the category if the position falls within a cached highlight span.
1255    /// The position must be within the last highlighted viewport range for a result.
1256    pub fn category_at_position(&self, position: usize) -> Option<HighlightCategory> {
1257        let cache = self.cache.as_ref()?;
1258        cache
1259            .spans
1260            .iter()
1261            .find(|span| span.range.start <= position && position < span.range.end)
1262            .map(|span| span.category)
1263    }
1264
1265    /// Get syntax name
1266    pub fn syntax_name(&self) -> &str {
1267        &self.syntax_set.syntaxes()[self.syntax_index].name
1268    }
1269}
1270
1271impl HighlightEngine {
1272    /// Build a highlighting engine for a catalog entry.
1273    ///
1274    /// Single chokepoint for the "prefer syntect, fall back to tree-sitter"
1275    /// logic. Callers that start from a path or a syntax name should resolve
1276    /// the entry through `GrammarRegistry::find_by_path` / `find_by_name` and
1277    /// then call this.
1278    pub fn from_entry(
1279        entry: &crate::primitives::grammar::GrammarEntry,
1280        registry: &GrammarRegistry,
1281    ) -> Self {
1282        let syntax_set = registry.syntax_set_arc();
1283        if let Some(index) = entry.engines.syntect {
1284            return Self::TextMate(Box::new(TextMateEngine::with_language(
1285                syntax_set,
1286                index,
1287                entry.engines.tree_sitter,
1288            )));
1289        }
1290        if let Some(lang) = entry.engines.tree_sitter {
1291            if let Ok(highlighter) = Highlighter::new(lang) {
1292                return Self::TreeSitter(Box::new(highlighter));
1293            }
1294        }
1295        Self::None
1296    }
1297
1298    /// Create a highlighting engine for a file.
1299    ///
1300    /// Thin wrapper around `from_entry` that resolves the path via the catalog.
1301    /// User-config-declared filename/extension mappings are honoured as long as
1302    /// `GrammarRegistry::apply_language_config` has been called on the registry.
1303    /// `first_line` is used for shebang / first-line regex fallback — pass
1304    /// `None` when no content is available.
1305    pub fn for_file(path: &Path, first_line: Option<&str>, registry: &GrammarRegistry) -> Self {
1306        if let Some(entry) = registry.find_by_path(path, first_line) {
1307            return Self::from_entry(entry, registry);
1308        }
1309        Self::None
1310    }
1311
1312    /// Create a highlighting engine for a syntax by name.
1313    ///
1314    /// Thin wrapper around `from_entry` that performs the lookup via
1315    /// `find_by_name`. The catalog entry already knows which tree-sitter
1316    /// `Language` (if any) serves it, so no separate hint is needed.
1317    pub fn for_syntax_name(name: &str, registry: &GrammarRegistry) -> Self {
1318        if let Some(entry) = registry.find_by_name(name) {
1319            return Self::from_entry(entry, registry);
1320        }
1321        Self::None
1322    }
1323
1324    /// Highlight the visible viewport
1325    ///
1326    /// `context_bytes` controls how far before/after the viewport to parse for accurate
1327    /// highlighting of multi-line constructs (strings, comments, nested blocks).
1328    pub fn highlight_viewport(
1329        &mut self,
1330        buffer: &Buffer,
1331        viewport_start: usize,
1332        viewport_end: usize,
1333        theme: &Theme,
1334        context_bytes: usize,
1335    ) -> Vec<HighlightSpan> {
1336        match self {
1337            Self::TreeSitter(h) => {
1338                h.highlight_viewport(buffer, viewport_start, viewport_end, theme, context_bytes)
1339            }
1340            Self::TextMate(h) => {
1341                h.highlight_viewport(buffer, viewport_start, viewport_end, theme, context_bytes)
1342            }
1343            Self::None => Vec::new(),
1344        }
1345    }
1346
1347    /// Notify the highlighting engine of a buffer insert (for checkpoint position tracking).
1348    pub fn notify_insert(&mut self, position: usize, length: usize) {
1349        if let Self::TextMate(h) = self {
1350            h.notify_insert(position, length);
1351        }
1352    }
1353
1354    /// Notify the highlighting engine of a buffer delete (for checkpoint position tracking).
1355    pub fn notify_delete(&mut self, position: usize, length: usize) {
1356        if let Self::TextMate(h) = self {
1357            h.notify_delete(position, length);
1358        }
1359    }
1360
1361    /// Invalidate cache for an edited range
1362    pub fn invalidate_range(&mut self, edit_range: Range<usize>) {
1363        match self {
1364            Self::TreeSitter(h) => h.invalidate_range(edit_range),
1365            Self::TextMate(h) => h.invalidate_range(edit_range),
1366            Self::None => {}
1367        }
1368    }
1369
1370    /// Invalidate entire cache
1371    pub fn invalidate_all(&mut self) {
1372        match self {
1373            Self::TreeSitter(h) => h.invalidate_all(),
1374            Self::TextMate(h) => h.invalidate_all(),
1375            Self::None => {}
1376        }
1377    }
1378
1379    /// Track a sequence of bulk edits in the cache.
1380    ///
1381    /// Each edit is `(pos, del_len, ins_len)`. The slice must be sorted in
1382    /// descending position order — the same order `apply_bulk_edits` uses to
1383    /// mutate the buffer — so positions remain valid as the buffer changes.
1384    ///
1385    /// This mirrors the `notify_*` + `invalidate_range` pattern used by
1386    /// single-edit paths. It preserves the TextMate engine's checkpoints and
1387    /// dirty-from anchor (so the next render uses the partial-update path
1388    /// rather than a cold reparse from byte zero) and drops the tree-sitter
1389    /// viewport cache only when an edit overlaps it.
1390    pub fn notify_edits(&mut self, edits: &[(usize, usize, usize)]) {
1391        for &(pos, del_len, ins_len) in edits {
1392            if del_len > 0 {
1393                self.notify_delete(pos, del_len);
1394            }
1395            if ins_len > 0 {
1396                self.notify_insert(pos, ins_len);
1397            }
1398            let edit_end = pos + del_len.max(ins_len);
1399            self.invalidate_range(pos..edit_end);
1400        }
1401    }
1402
1403    /// Check if this engine has highlighting available
1404    pub fn has_highlighting(&self) -> bool {
1405        !matches!(self, Self::None)
1406    }
1407
1408    /// Get a description of the active backend
1409    pub fn backend_name(&self) -> &str {
1410        match self {
1411            Self::TreeSitter(_) => "tree-sitter",
1412            Self::TextMate(_) => "textmate",
1413            Self::None => "none",
1414        }
1415    }
1416
1417    /// Get performance stats (TextMate engine only).
1418    pub fn highlight_stats(&self) -> Option<&HighlightStats> {
1419        if let Self::TextMate(h) = self {
1420            Some(h.stats())
1421        } else {
1422            None
1423        }
1424    }
1425
1426    /// Reset performance counters.
1427    pub fn reset_highlight_stats(&mut self) {
1428        if let Self::TextMate(h) = self {
1429            h.reset_stats();
1430        }
1431    }
1432
1433    /// Get the language/syntax name if available
1434    pub fn syntax_name(&self) -> Option<&str> {
1435        match self {
1436            Self::TreeSitter(_) => None, // Tree-sitter doesn't expose name easily
1437            Self::TextMate(h) => Some(h.syntax_name()),
1438            Self::None => None,
1439        }
1440    }
1441
1442    /// Get the highlight category at a byte position from the cache.
1443    ///
1444    /// Returns the category if the position falls within a cached highlight span.
1445    /// Useful for detecting whether the cursor is inside a string, comment, etc.
1446    pub fn category_at_position(&self, position: usize) -> Option<HighlightCategory> {
1447        match self {
1448            Self::TreeSitter(h) => h.category_at_position(position),
1449            Self::TextMate(h) => h.category_at_position(position),
1450            Self::None => None,
1451        }
1452    }
1453
1454    /// Get the tree-sitter Language for non-highlighting features
1455    /// Returns the language even when using TextMate for highlighting
1456    pub fn language(&self) -> Option<&Language> {
1457        match self {
1458            Self::TreeSitter(h) => Some(h.language()),
1459            Self::TextMate(h) => h.language(),
1460            Self::None => None,
1461        }
1462    }
1463}
1464
1465/// Highlight a code string using syntect (for markdown code blocks, hover popups, etc.)
1466/// Returns spans with byte ranges relative to the input string.
1467///
1468/// This uses TextMate grammars via syntect which provides broader language coverage
1469/// than tree-sitter (~150+ languages vs ~17).
1470pub fn highlight_string(
1471    code: &str,
1472    lang_hint: &str,
1473    registry: &GrammarRegistry,
1474    theme: &Theme,
1475) -> Vec<HighlightSpan> {
1476    use syntect::parsing::{ParseState, ScopeStack};
1477
1478    // Find syntax by language token (handles aliases like "py" -> Python)
1479    let syntax = match registry.syntax_set().find_syntax_by_token(lang_hint) {
1480        Some(s) => s,
1481        None => return Vec::new(),
1482    };
1483
1484    let syntax_set = registry.syntax_set();
1485    let mut state = ParseState::new(syntax);
1486    let mut spans = Vec::new();
1487    let mut current_scopes = ScopeStack::new();
1488    let mut current_offset = 0;
1489
1490    // Parse line by line
1491    for line in code.split_inclusive('\n') {
1492        let line_start = current_offset;
1493        let line_len = line.len();
1494
1495        // Remove trailing newline for syntect, then add it back
1496        let line_content = line.trim_end_matches(&['\r', '\n'][..]);
1497        let line_for_syntect = if line.ends_with('\n') {
1498            format!("{}\n", line_content)
1499        } else {
1500            line_content.to_string()
1501        };
1502
1503        let ops = match state.parse_line(&line_for_syntect, syntax_set) {
1504            Ok(ops) => ops,
1505            Err(_) => {
1506                current_offset += line_len;
1507                continue;
1508            }
1509        };
1510
1511        let mut syntect_offset = 0;
1512        let line_content_len = line_content.len();
1513
1514        for (op_offset, op) in ops {
1515            let clamped_op_offset = op_offset.min(line_content_len);
1516            if clamped_op_offset > syntect_offset {
1517                if let Some(category) = scope_stack_to_category(&current_scopes) {
1518                    let byte_start = line_start + syntect_offset;
1519                    let byte_end = line_start + clamped_op_offset;
1520                    if byte_start < byte_end {
1521                        spans.push(HighlightSpan {
1522                            range: byte_start..byte_end,
1523                            color: highlight_color(category, theme),
1524                            bg: highlight_bg(category, theme),
1525                            category: Some(category),
1526                        });
1527                    }
1528                }
1529            }
1530            syntect_offset = clamped_op_offset;
1531            // Scope stack errors are non-fatal for highlighting
1532            #[allow(clippy::let_underscore_must_use)]
1533            let _ = current_scopes.apply(&op);
1534        }
1535
1536        // Handle remaining text on line
1537        if syntect_offset < line_content_len {
1538            if let Some(category) = scope_stack_to_category(&current_scopes) {
1539                let byte_start = line_start + syntect_offset;
1540                let byte_end = line_start + line_content_len;
1541                if byte_start < byte_end {
1542                    spans.push(HighlightSpan {
1543                        range: byte_start..byte_end,
1544                        color: highlight_color(category, theme),
1545                        bg: highlight_bg(category, theme),
1546                        category: Some(category),
1547                    });
1548                }
1549            }
1550        }
1551
1552        current_offset += line_len;
1553    }
1554
1555    // Merge adjacent spans with same color
1556    merge_adjacent_highlight_spans(&mut spans);
1557
1558    spans
1559}
1560
1561/// Map scope stack to highlight category (for highlight_string)
1562fn scope_stack_to_category(scopes: &syntect::parsing::ScopeStack) -> Option<HighlightCategory> {
1563    for scope in scopes.as_slice().iter().rev() {
1564        let scope_str = scope.build_string();
1565        if let Some(cat) = scope_to_category(&scope_str) {
1566            return Some(cat);
1567        }
1568    }
1569    None
1570}
1571
1572/// Merge adjacent spans with same color
1573fn merge_adjacent_highlight_spans(spans: &mut Vec<HighlightSpan>) {
1574    if spans.len() < 2 {
1575        return;
1576    }
1577
1578    let mut write_idx = 0;
1579    for read_idx in 1..spans.len() {
1580        if spans[write_idx].color == spans[read_idx].color
1581            && spans[write_idx].range.end == spans[read_idx].range.start
1582        {
1583            spans[write_idx].range.end = spans[read_idx].range.end;
1584        } else {
1585            write_idx += 1;
1586            if write_idx != read_idx {
1587                spans[write_idx] = spans[read_idx].clone();
1588            }
1589        }
1590    }
1591    spans.truncate(write_idx + 1);
1592}
1593
1594#[cfg(test)]
1595mod tests {
1596    use crate::model::filesystem::StdFileSystem;
1597    use std::sync::Arc;
1598
1599    fn test_fs() -> Arc<dyn crate::model::filesystem::FileSystem + Send + Sync> {
1600        Arc::new(StdFileSystem)
1601    }
1602    use super::*;
1603    use crate::view::theme;
1604
1605    #[test]
1606    fn test_highlight_engine_default() {
1607        let engine = HighlightEngine::default();
1608        assert!(!engine.has_highlighting());
1609        assert_eq!(engine.backend_name(), "none");
1610    }
1611
1612    #[test]
1613    fn test_textmate_backend_selection() {
1614        let registry =
1615            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1616
1617        // Languages with TextMate grammars use TextMate for highlighting
1618        let engine = HighlightEngine::for_file(Path::new("test.rs"), None, &registry);
1619        assert_eq!(engine.backend_name(), "textmate");
1620        // Tree-sitter language should still be detected for other features
1621        assert!(engine.language().is_some());
1622
1623        let engine = HighlightEngine::for_file(Path::new("test.py"), None, &registry);
1624        assert_eq!(engine.backend_name(), "textmate");
1625        assert!(engine.language().is_some());
1626
1627        // JavaScript is routed to tree-sitter (issue #899: syntect's JS
1628        // grammar bleeds template-literal string state past the closing
1629        // backtick).
1630        let engine = HighlightEngine::for_file(Path::new("test.js"), None, &registry);
1631        assert_eq!(engine.backend_name(), "tree-sitter");
1632        assert!(engine.language().is_some());
1633
1634        // TypeScript falls back to tree-sitter (syntect doesn't include TS by default)
1635        let engine = HighlightEngine::for_file(Path::new("test.ts"), None, &registry);
1636        assert_eq!(engine.backend_name(), "tree-sitter");
1637        assert!(engine.language().is_some());
1638
1639        let engine = HighlightEngine::for_file(Path::new("test.tsx"), None, &registry);
1640        assert_eq!(engine.backend_name(), "tree-sitter");
1641        assert!(engine.language().is_some());
1642    }
1643
1644    #[test]
1645    fn test_tree_sitter_direct() {
1646        // Verify tree-sitter highlighter can be created directly for Rust
1647        let highlighter = Highlighter::new(Language::Rust);
1648        assert!(highlighter.is_ok());
1649    }
1650
1651    #[test]
1652    fn test_unknown_extension() {
1653        let registry =
1654            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1655
1656        // Unknown extension
1657        let engine = HighlightEngine::for_file(Path::new("test.unknown_xyz_123"), None, &registry);
1658        // Might be none or might find something via syntect
1659        // Just verify it doesn't panic
1660        let _ = engine.backend_name();
1661    }
1662
1663    #[test]
1664    fn test_highlight_viewport_empty_buffer_no_panic() {
1665        // Regression test: calling highlight_viewport with an empty buffer
1666        // and non-zero viewport range previously caused subtraction overflow panic.
1667        //
1668        // The bug occurred when:
1669        // - buffer is empty (len = 0)
1670        // - viewport_start > context_bytes (so parse_start > 0 after saturating_sub)
1671        // - parse_end = min(viewport_end + context_bytes, buffer.len()) = 0
1672        // - parse_end - parse_start would underflow (0 - positive = overflow)
1673        let registry =
1674            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1675
1676        let mut engine = HighlightEngine::for_file(Path::new("test.rs"), None, &registry);
1677
1678        // Create empty buffer
1679        let buffer = Buffer::from_str("", 0, test_fs());
1680        let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
1681
1682        // Test the specific case that triggered the overflow:
1683        // viewport_start=100, context_bytes=10 => parse_start=90, parse_end=0
1684        // 0 - 90 = overflow!
1685        if let HighlightEngine::TextMate(ref mut tm) = engine {
1686            // Small context_bytes so parse_start remains > 0
1687            let spans = tm.highlight_viewport(&buffer, 100, 200, &theme, 10);
1688            assert!(spans.is_empty());
1689        }
1690    }
1691
1692    /// Test that TextMateEngine produces correct byte offsets for CRLF content.
1693    /// This is a regression test for a bug where using str::lines() caused 1-byte
1694    /// offset drift per line because it strips line terminators.
1695    #[test]
1696    fn test_textmate_engine_crlf_byte_offsets() {
1697        let registry =
1698            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1699
1700        let mut engine = HighlightEngine::for_file(Path::new("test.java"), None, &registry);
1701
1702        // Create CRLF content with keywords on each line
1703        // Each "public" keyword should be highlighted at byte positions:
1704        // Line 1: "public" at bytes 0-5
1705        // Line 2: "public" at bytes 8-13 (after "public\r\n" = 8 bytes)
1706        // Line 3: "public" at bytes 16-21 (after two "public\r\n" = 16 bytes)
1707        let content = b"public\r\npublic\r\npublic\r\n";
1708        let buffer = Buffer::from_bytes(content.to_vec(), test_fs());
1709        let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
1710
1711        if let HighlightEngine::TextMate(ref mut tm) = engine {
1712            // Highlight the entire content
1713            let spans = tm.highlight_viewport(&buffer, 0, content.len(), &theme, 0);
1714
1715            // Find spans that cover keyword positions
1716            // The keyword "public" should have spans at these byte ranges:
1717            // Line 1: 0..6
1718            // Line 2: 8..14 (NOT 7..13 which would be the buggy offset)
1719            // Line 3: 16..22 (NOT 14..20 which would be the buggy offset)
1720
1721            eprintln!(
1722                "Spans: {:?}",
1723                spans.iter().map(|s| &s.range).collect::<Vec<_>>()
1724            );
1725
1726            // Check that we have spans covering the correct positions
1727            let has_span_at = |start: usize, end: usize| -> bool {
1728                spans
1729                    .iter()
1730                    .any(|s| s.range.start <= start && s.range.end >= end)
1731            };
1732
1733            // Line 1: "public" at bytes 0-6
1734            assert!(
1735                has_span_at(0, 6),
1736                "Should have span covering bytes 0-6 (line 1 'public'). Spans: {:?}",
1737                spans.iter().map(|s| &s.range).collect::<Vec<_>>()
1738            );
1739
1740            // Line 2: "public" at bytes 8-14 (after "public\r\n")
1741            // If buggy, would be at 7-13
1742            assert!(
1743                has_span_at(8, 14),
1744                "Should have span covering bytes 8-14 (line 2 'public'). \
1745                 If this fails, CRLF offset drift is occurring. Spans: {:?}",
1746                spans.iter().map(|s| &s.range).collect::<Vec<_>>()
1747            );
1748
1749            // Line 3: "public" at bytes 16-22 (after two "public\r\n")
1750            // If buggy, would be at 14-20
1751            assert!(
1752                has_span_at(16, 22),
1753                "Should have span covering bytes 16-22 (line 3 'public'). \
1754                 If this fails, CRLF offset drift is occurring. Spans: {:?}",
1755                spans.iter().map(|s| &s.range).collect::<Vec<_>>()
1756            );
1757        } else {
1758            panic!("Expected TextMate engine for .java file");
1759        }
1760    }
1761
1762    /// When a buffer is parsed with no trailing newline (the streaming
1763    /// case for `git show` output between writes), the engine must not
1764    /// commit cache tail state at the end of the partial trailing line.
1765    /// With syntect's `Diff` grammar (line-anchored `^\+.*` etc.), the
1766    /// state at end-of-input has popped `markup.inserted`, so any
1767    /// follow-up parse from there would see the rest of the line as a
1768    /// new line in `source.diff` and emit no scope — losing the bg
1769    /// inside otherwise-green `+` lines.
1770    ///
1771    /// This test pins the boundary the cache commits at: after parsing
1772    /// a buffer ending mid-line, `cache.range.end` must be the last
1773    /// newline (or `desired_parse_start` if no newline was seen), not
1774    /// the end of the partial line.
1775    #[test]
1776    fn test_partial_trailing_line_not_committed_to_cache() {
1777        let registry =
1778            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1779        let mut engine = HighlightEngine::for_file(Path::new("commit.diff"), None, &registry);
1780        let theme = Theme::load_builtin(theme::THEME_DARK).unwrap();
1781
1782        // A complete `+` line followed by a partial `+` line (no \n).
1783        let content = "+complete\n+partial";
1784        let buffer = Buffer::from_str(content, 0, test_fs());
1785
1786        if let HighlightEngine::TextMate(ref mut tm) = engine {
1787            let _ = tm.highlight_viewport(&buffer, 0, content.len(), &theme, 0);
1788            let (cache_end, has_tail) = tm.cache_commit_for_test();
1789            assert_eq!(
1790                cache_end,
1791                "+complete\n".len(),
1792                "cache should commit at the last newline, not into the partial \
1793                 trailing line — committing past the newline causes streaming \
1794                 forward-extension to parse the line's continuation in the wrong \
1795                 grammar context, losing the diff bg."
1796            );
1797            assert!(has_tail, "tail state should be saved at the safe boundary");
1798        }
1799    }
1800
1801    /// Reproduce: artifacts inside `+` lines whose content contains
1802    /// JS template literals — `\`...\`` with `${}` interpolation.
1803    /// The whole `+` line should be one contiguous Inserted span
1804    /// carrying `theme.diff_add_bg`, with no bg-less holes mid-line.
1805    #[test]
1806    fn test_diff_inserted_line_is_fully_covered() {
1807        let registry =
1808            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1809        let mut engine = HighlightEngine::for_file(Path::new("commit.diff"), None, &registry);
1810        let theme = Theme::load_builtin(theme::THEME_DARK).unwrap();
1811
1812        let content =
1813            "diff --git a/file.ts b/file.ts\n\
1814             index aaa..bbb 100644\n\
1815             --- a/file.ts\n\
1816             +++ b/file.ts\n\
1817             @@ -1,3 +1,5 @@\n\
1818             +${seen[g.subtree] > 1 ? `**Seen ${seen[g.subtree]}× — likely cross-subtree type seam.**` : \"\"}\n\
1819             +              const k = `${b.fn}::${(b.what || \"\").slice(0, 80)}`;\n";
1820        let buffer = Buffer::from_str(content, 0, test_fs());
1821
1822        if let HighlightEngine::TextMate(ref mut tm) = engine {
1823            let spans = tm.highlight_viewport(&buffer, 0, content.len(), &theme, 0);
1824
1825            let bytes = content.as_bytes();
1826            let mut line_start = 0;
1827            while line_start < bytes.len() {
1828                let mut line_end = line_start;
1829                while line_end < bytes.len() && bytes[line_end] != b'\n' {
1830                    line_end += 1;
1831                }
1832                if bytes[line_start] == b'+' && !content[line_start..line_end].starts_with("+++") {
1833                    for byte_pos in line_start..line_end {
1834                        let span = spans
1835                            .iter()
1836                            .find(|s| s.range.start <= byte_pos && s.range.end > byte_pos);
1837                        let bg = span.and_then(|s| s.bg);
1838                        assert_eq!(
1839                            bg,
1840                            Some(theme.diff_add_bg),
1841                            "byte {} (`{}`) of `+` line starting at {} should carry diff_add_bg; \
1842                             got span={:?}",
1843                            byte_pos,
1844                            content[byte_pos..byte_pos + 1].escape_debug(),
1845                            line_start,
1846                            span,
1847                        );
1848                    }
1849                }
1850                line_start = line_end + 1;
1851            }
1852        } else {
1853            panic!("Expected TextMate engine for .diff file");
1854        }
1855    }
1856
1857    #[test]
1858    fn test_git_rebase_todo_highlighting() {
1859        let registry =
1860            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1861
1862        // git-rebase-todo files should use the Git Rebase Todo grammar
1863        let engine = HighlightEngine::for_file(Path::new("git-rebase-todo"), None, &registry);
1864        assert_eq!(engine.backend_name(), "textmate");
1865        assert!(engine.has_highlighting());
1866    }
1867
1868    #[test]
1869    fn test_git_commit_message_highlighting() {
1870        let registry =
1871            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1872
1873        // COMMIT_EDITMSG should use the Git Commit Message grammar
1874        let engine = HighlightEngine::for_file(Path::new("COMMIT_EDITMSG"), None, &registry);
1875        assert_eq!(engine.backend_name(), "textmate");
1876        assert!(engine.has_highlighting());
1877
1878        // MERGE_MSG should also work
1879        let engine = HighlightEngine::for_file(Path::new("MERGE_MSG"), None, &registry);
1880        assert_eq!(engine.backend_name(), "textmate");
1881        assert!(engine.has_highlighting());
1882    }
1883
1884    #[test]
1885    fn test_gitignore_highlighting() {
1886        let registry =
1887            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1888
1889        // .gitignore should use the Gitignore grammar
1890        let engine = HighlightEngine::for_file(Path::new(".gitignore"), None, &registry);
1891        assert_eq!(engine.backend_name(), "textmate");
1892        assert!(engine.has_highlighting());
1893
1894        // .dockerignore should also work
1895        let engine = HighlightEngine::for_file(Path::new(".dockerignore"), None, &registry);
1896        assert_eq!(engine.backend_name(), "textmate");
1897        assert!(engine.has_highlighting());
1898    }
1899
1900    #[test]
1901    fn test_gitconfig_highlighting() {
1902        let registry =
1903            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1904
1905        // .gitconfig should use the Git Config grammar
1906        let engine = HighlightEngine::for_file(Path::new(".gitconfig"), None, &registry);
1907        assert_eq!(engine.backend_name(), "textmate");
1908        assert!(engine.has_highlighting());
1909
1910        // .gitmodules should also work
1911        let engine = HighlightEngine::for_file(Path::new(".gitmodules"), None, &registry);
1912        assert_eq!(engine.backend_name(), "textmate");
1913        assert!(engine.has_highlighting());
1914    }
1915
1916    #[test]
1917    fn test_gitattributes_highlighting() {
1918        let registry =
1919            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1920
1921        // .gitattributes should use the Git Attributes grammar
1922        let engine = HighlightEngine::for_file(Path::new(".gitattributes"), None, &registry);
1923        assert_eq!(engine.backend_name(), "textmate");
1924        assert!(engine.has_highlighting());
1925    }
1926
1927    #[test]
1928    fn test_comment_delimiter_uses_comment_color() {
1929        // Comment delimiters (#, //, /*) should use comment color, not operator
1930        assert_eq!(
1931            scope_to_category("punctuation.definition.comment"),
1932            Some(HighlightCategory::Comment)
1933        );
1934        assert_eq!(
1935            scope_to_category("punctuation.definition.comment.python"),
1936            Some(HighlightCategory::Comment)
1937        );
1938        assert_eq!(
1939            scope_to_category("punctuation.definition.comment.begin"),
1940            Some(HighlightCategory::Comment)
1941        );
1942    }
1943
1944    #[test]
1945    fn test_string_delimiter_uses_string_color() {
1946        // String delimiters (", ', `) should use string color, not operator
1947        assert_eq!(
1948            scope_to_category("punctuation.definition.string.begin"),
1949            Some(HighlightCategory::String)
1950        );
1951        assert_eq!(
1952            scope_to_category("punctuation.definition.string.end"),
1953            Some(HighlightCategory::String)
1954        );
1955    }
1956
1957    #[test]
1958    fn test_punctuation_bracket() {
1959        // punctuation.section (TextMate standard for block delimiters)
1960        assert_eq!(
1961            scope_to_category("punctuation.section"),
1962            Some(HighlightCategory::PunctuationBracket)
1963        );
1964        assert_eq!(
1965            scope_to_category("punctuation.section.block.begin.c"),
1966            Some(HighlightCategory::PunctuationBracket)
1967        );
1968        assert_eq!(
1969            scope_to_category("punctuation.bracket"),
1970            Some(HighlightCategory::PunctuationBracket)
1971        );
1972        // punctuation.definition.* bracket-like scopes from sublime-syntax grammars
1973        assert_eq!(
1974            scope_to_category("punctuation.definition.array.begin.toml"),
1975            Some(HighlightCategory::PunctuationBracket)
1976        );
1977        assert_eq!(
1978            scope_to_category("punctuation.definition.block.code.typst"),
1979            Some(HighlightCategory::PunctuationBracket)
1980        );
1981        assert_eq!(
1982            scope_to_category("punctuation.definition.group.typst"),
1983            Some(HighlightCategory::PunctuationBracket)
1984        );
1985        assert_eq!(
1986            scope_to_category("punctuation.definition.inline-table.begin.toml"),
1987            Some(HighlightCategory::PunctuationBracket)
1988        );
1989        assert_eq!(
1990            scope_to_category("punctuation.definition.tag.end.svelte"),
1991            Some(HighlightCategory::PunctuationBracket)
1992        );
1993    }
1994
1995    #[test]
1996    fn test_punctuation_delimiter() {
1997        assert_eq!(
1998            scope_to_category("punctuation.separator"),
1999            Some(HighlightCategory::PunctuationDelimiter)
2000        );
2001        assert_eq!(
2002            scope_to_category("punctuation.terminator.statement.c"),
2003            Some(HighlightCategory::PunctuationDelimiter)
2004        );
2005        assert_eq!(
2006            scope_to_category("punctuation.accessor"),
2007            Some(HighlightCategory::PunctuationDelimiter)
2008        );
2009    }
2010
2011    /// First parse of a small file populates a whole-file cache; subsequent
2012    /// scrolls anywhere in the file are exact cache hits with no extra parse
2013    /// work.
2014    #[test]
2015    fn test_small_file_scroll_is_cache_hit() {
2016        let registry =
2017            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
2018        let mut engine = HighlightEngine::for_file(Path::new("test.rs"), None, &registry);
2019
2020        let mut content = String::new();
2021        for i in 0..200 {
2022            content.push_str(&format!("fn f_{i}() {{ let x = {i}; }}\n"));
2023        }
2024        let buffer = Buffer::from_str(&content, 0, test_fs());
2025        let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
2026
2027        let HighlightEngine::TextMate(ref mut tm) = engine else {
2028            panic!("expected TextMate engine for .rs");
2029        };
2030
2031        // First call: cold start, full parse.
2032        let _ = tm.highlight_viewport(&buffer, 0, 200, &theme, 10_000);
2033        let stats_after_first = tm.stats().clone();
2034        assert_eq!(
2035            stats_after_first.cache_hits, 0,
2036            "first call cannot hit cache"
2037        );
2038        assert_eq!(
2039            stats_after_first.cache_misses, 1,
2040            "first call must be a miss"
2041        );
2042
2043        // Scroll anywhere — top, middle, end. All must be cache hits.
2044        let mid = buffer.len() / 2;
2045        let near_end = buffer.len().saturating_sub(200);
2046        let probes = [(0, 200), (mid, mid + 200), (near_end, buffer.len())];
2047        for (vs, ve) in probes {
2048            let _ = tm.highlight_viewport(&buffer, vs, ve, &theme, 10_000);
2049        }
2050
2051        let stats_after_scroll = tm.stats().clone();
2052        assert_eq!(
2053            stats_after_scroll.cache_misses,
2054            1,
2055            "scrolling must not add cache misses (got extra: {})",
2056            stats_after_scroll.cache_misses - 1
2057        );
2058        assert_eq!(
2059            stats_after_scroll.cache_hits, 3,
2060            "all three scroll probes must hit the cache"
2061        );
2062        assert_eq!(
2063            stats_after_scroll.bytes_parsed, stats_after_first.bytes_parsed,
2064            "scrolling must not parse any new bytes"
2065        );
2066    }
2067
2068    /// After a small edit, the next render takes the partial-update path
2069    /// (convergence) and continues to serve cache hits afterwards. Crucially:
2070    /// the partial update parses far fewer bytes than the file is long.
2071    #[test]
2072    fn test_small_file_edit_uses_partial_update() {
2073        let registry =
2074            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
2075        let mut engine = HighlightEngine::for_file(Path::new("test.rs"), None, &registry);
2076
2077        let mut content = String::new();
2078        for i in 0..200 {
2079            content.push_str(&format!("fn f_{i}() {{ let x = {i}; }}\n"));
2080        }
2081        let buffer = Buffer::from_str(&content, 0, test_fs());
2082        let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
2083
2084        let HighlightEngine::TextMate(ref mut tm) = engine else {
2085            panic!("expected TextMate engine for .rs");
2086        };
2087
2088        // Warm cache.
2089        let _ = tm.highlight_viewport(&buffer, 0, 100, &theme, 10_000);
2090        let bytes_before_edit = tm.stats().bytes_parsed;
2091        let buf_len = buffer.len();
2092        assert!(
2093            buf_len > 4000,
2094            "test needs a buffer larger than the partial-update region"
2095        );
2096
2097        // Simulate an edit deep in the file.
2098        let edit_pos = buf_len / 2;
2099        tm.notify_insert(edit_pos, 1);
2100        // The buffer itself doesn't change here (we test the engine in isolation),
2101        // but notify_insert sets dirty_from and shifts spans, which is what the
2102        // partial-update path consumes.
2103
2104        let _ = tm.highlight_viewport(&buffer, 0, 100, &theme, 10_000);
2105        let bytes_after_edit = tm.stats().bytes_parsed;
2106        let parsed = bytes_after_edit - bytes_before_edit;
2107
2108        assert!(
2109            parsed < buf_len,
2110            "edit must not trigger a whole-file reparse (parsed {parsed}, file {buf_len})"
2111        );
2112    }
2113
2114    /// Bulk edits (multi-cursor typing, "select word + type letter" replace,
2115    /// search-replace, etc.) must take the same partial-update path as single
2116    /// edits. Regression for #1958: the previous code called `invalidate_all()`
2117    /// after a bulk edit, wiping every checkpoint and forcing a cold reparse
2118    /// from byte zero on the next keystroke.
2119    #[test]
2120    fn test_bulk_edit_uses_partial_update() {
2121        let registry =
2122            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
2123        let mut engine = HighlightEngine::for_file(Path::new("test.rs"), None, &registry);
2124
2125        let mut content = String::new();
2126        for i in 0..200 {
2127            content.push_str(&format!("fn f_{i}() {{ let x = {i}; }}\n"));
2128        }
2129        let buffer = Buffer::from_str(&content, 0, test_fs());
2130        let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
2131
2132        // Warm cache.
2133        let _ = engine.highlight_viewport(&buffer, 0, 100, &theme, 10_000);
2134        let bytes_before_edit = match &engine {
2135            HighlightEngine::TextMate(h) => h.stats().bytes_parsed,
2136            _ => panic!("expected TextMate engine for .rs"),
2137        };
2138        let buf_len = buffer.len();
2139        assert!(
2140            buf_len > 4000,
2141            "test needs a buffer larger than the partial-update region"
2142        );
2143
2144        // Simulate "select a word, type a letter" deep in the file: a single
2145        // bulk edit that deletes 8 bytes and inserts 1 byte at the same
2146        // position. This is exactly the user-facing scenario in #1958.
2147        let edit_pos = buf_len / 2;
2148        let edits = vec![(edit_pos, 8usize, 1usize)];
2149        engine.notify_edits(&edits);
2150
2151        let _ = engine.highlight_viewport(&buffer, 0, 100, &theme, 10_000);
2152        let bytes_after_edit = match &engine {
2153            HighlightEngine::TextMate(h) => h.stats().bytes_parsed,
2154            _ => unreachable!(),
2155        };
2156        let parsed = bytes_after_edit - bytes_before_edit;
2157
2158        assert!(
2159            parsed < buf_len,
2160            "bulk edit must not trigger a whole-file reparse \
2161             (parsed {parsed}, file {buf_len})"
2162        );
2163    }
2164
2165    /// Bulk edits whose positions are all outside the cached viewport must
2166    /// not invalidate the cache at all on the tree-sitter / `Highlighter`
2167    /// path. (TextMate has a richer convergence model, but for both engines
2168    /// the regression to guard against is: "any bulk edit, even a tiny one,
2169    /// destroys the cache and forces a full reparse.")
2170    #[test]
2171    fn test_bulk_edit_outside_cache_keeps_textmate_partial_update() {
2172        let registry =
2173            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
2174        let mut engine = HighlightEngine::for_file(Path::new("test.rs"), None, &registry);
2175
2176        let mut content = String::new();
2177        for i in 0..400 {
2178            content.push_str(&format!("fn f_{i}() {{ let x = {i}; }}\n"));
2179        }
2180        let buffer = Buffer::from_str(&content, 0, test_fs());
2181        let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
2182
2183        // Warm a viewport near the start.
2184        let _ = engine.highlight_viewport(&buffer, 0, 200, &theme, 1_000);
2185        let bytes_before = match &engine {
2186            HighlightEngine::TextMate(h) => h.stats().bytes_parsed,
2187            _ => panic!("expected TextMate engine for .rs"),
2188        };
2189
2190        // Apply a bulk edit far past the warmed viewport.
2191        let far_pos = buffer.len() - 100;
2192        engine.notify_edits(&[(far_pos, 3, 1)]);
2193
2194        // Re-render the original viewport. The partial-update path must keep
2195        // parsed bytes well below a whole-file reparse.
2196        let _ = engine.highlight_viewport(&buffer, 0, 200, &theme, 1_000);
2197        let bytes_after = match &engine {
2198            HighlightEngine::TextMate(h) => h.stats().bytes_parsed,
2199            _ => unreachable!(),
2200        };
2201        let parsed = bytes_after - bytes_before;
2202        let buf_len = buffer.len();
2203        assert!(
2204            parsed < buf_len,
2205            "bulk edit outside the viewport must not force a whole-file \
2206             reparse (parsed {parsed}, file {buf_len})"
2207        );
2208    }
2209
2210    /// Convergence budget caps per-pass work even when the parse state never
2211    /// agrees with any existing checkpoint. Without the cap, a non-converging
2212    /// edit would parse the rest of the file on every keystroke.
2213    #[test]
2214    fn test_partial_update_budget_caps_work() {
2215        let registry =
2216            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
2217        let mut engine = HighlightEngine::for_file(Path::new("test.rs"), None, &registry);
2218
2219        // Build a buffer comfortably larger than CONVERGENCE_BUDGET.
2220        let mut content = String::new();
2221        while content.len() < (CONVERGENCE_BUDGET * 4) {
2222            content.push_str("fn name() { let mut v = 0; v += 1; }\n");
2223        }
2224        let buffer = Buffer::from_str(&content, 0, test_fs());
2225        let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
2226
2227        let HighlightEngine::TextMate(ref mut tm) = engine else {
2228            panic!("expected TextMate engine for .rs");
2229        };
2230
2231        // Warm cache (whole-file parse).
2232        let _ = tm.highlight_viewport(&buffer, 0, 200, &theme, 10_000);
2233        // Simulate an edit and force every checkpoint to disagree by clearing
2234        // their stored states. The convergence loop will look at each marker,
2235        // find the slot empty, and never converge.
2236        tm.notify_insert(100, 0);
2237        tm.checkpoint_states.clear();
2238
2239        let bytes_before = tm.stats().bytes_parsed;
2240        let _ = tm.highlight_viewport(&buffer, 0, 200, &theme, 10_000);
2241        let parsed = tm.stats().bytes_parsed - bytes_before;
2242
2243        // Budget bounds the work to roughly CONVERGENCE_BUDGET past the dirty
2244        // point (plus the prefix back to the resume checkpoint). Allow a small
2245        // overshoot for the line that crossed the budget threshold.
2246        assert!(
2247            parsed <= CONVERGENCE_BUDGET + 4096,
2248            "partial update parsed {parsed}, expected <= {} \
2249             (budget {CONVERGENCE_BUDGET} + slack)",
2250            CONVERGENCE_BUDGET + 4096
2251        );
2252
2253        // Budget hit must leave dirty_from set for follow-up passes.
2254        assert!(
2255            tm.dirty_from.is_some(),
2256            "budget exit must keep dirty_from set"
2257        );
2258    }
2259
2260    /// Large files (above MAX_PARSE_BYTES) keep the existing windowed
2261    /// behaviour: parse range is bounded by ±context_bytes around the
2262    /// viewport, not the whole file.
2263    ///
2264    /// The viewport is placed past `MAX_PARSE_BYTES` so we exercise the
2265    /// "large file, no nearby checkpoint" branch in `find_parse_resume_point`
2266    /// — the symmetric branch that fires when `parse_end <= MAX_PARSE_BYTES`
2267    /// still parses from byte 0 even on big files (pre-existing behaviour,
2268    /// addressed in a later phase).
2269    #[test]
2270    fn test_large_file_uses_windowed_parse() {
2271        let registry =
2272            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
2273        let mut engine = HighlightEngine::for_file(Path::new("test.rs"), None, &registry);
2274
2275        // Build content well past MAX_PARSE_BYTES so we can put the viewport
2276        // beyond it.
2277        let line = "fn long_name_for_padding() { let v = 1; v + 1; }\n";
2278        let bytes_needed = MAX_PARSE_BYTES * 2;
2279        let lines_needed = bytes_needed / line.len() + 100;
2280        let mut content = String::with_capacity(lines_needed * line.len());
2281        for _ in 0..lines_needed {
2282            content.push_str(line);
2283        }
2284        assert!(content.len() > MAX_PARSE_BYTES * 2);
2285        let buffer = Buffer::from_str(&content, 0, test_fs());
2286        let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
2287
2288        let HighlightEngine::TextMate(ref mut tm) = engine else {
2289            panic!("expected TextMate engine for .rs");
2290        };
2291
2292        // Viewport past MAX_PARSE_BYTES: parse_end > MAX_PARSE_BYTES, so the
2293        // resume-from-byte-0 fallback in find_parse_resume_point doesn't fire.
2294        let context_bytes = 10_000usize;
2295        let viewport_start = MAX_PARSE_BYTES + 200_000;
2296        let viewport_end = viewport_start + 1000;
2297        let _ = tm.highlight_viewport(&buffer, viewport_start, viewport_end, &theme, context_bytes);
2298        let parsed = tm.stats().bytes_parsed;
2299
2300        // Windowed parse covers viewport ± context_bytes plus a tiny prefix
2301        // for the resume anchor. Allow generous slack (4×) but reject
2302        // anything close to whole-file.
2303        let window = (viewport_end - viewport_start) + 2 * context_bytes;
2304        assert!(
2305            parsed <= window * 4,
2306            "large file windowed parse should be ~{window} bytes, got {parsed} \
2307             (file {})",
2308            buffer.len()
2309        );
2310    }
2311
2312    /// Regression for issue #899: a class field initialised with an arrow
2313    /// function that returns a template literal must not bleed string
2314    /// highlighting onto the rest of the class body. The user-reported
2315    /// repro pinned the syntect JavaScript grammar to a string state from
2316    /// the trailing `;` until EOF; the constructor keyword, comments, and
2317    /// the closing `}` were all painted as a string.
2318    #[test]
2319    fn test_javascript_template_literal_does_not_bleed() {
2320        let registry =
2321            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
2322        let mut engine = HighlightEngine::for_file(Path::new("repro.js"), None, &registry);
2323
2324        // Reproduction code from issue #899.
2325        let source = "class ExampleClass {\n\
2326                      \texampleFunction = exampleArg => `${exampleArg}`;\n\
2327                      \n\
2328                      \tconstructor() {\n\
2329                      \t\t// constructor body\n\
2330                      \t}\n\
2331                      \n\
2332                      \t/* multiline comment */\n\
2333                      }\n";
2334        let buffer = Buffer::from_str(source, 0, test_fs());
2335        let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
2336
2337        let _ = engine.highlight_viewport(&buffer, 0, source.len(), &theme, 0);
2338
2339        // The `constructor` keyword sits well after the template literal.
2340        // If string state bleeds, this position is reported as String.
2341        let ctor_pos = source.find("constructor").expect("locate constructor");
2342        let ctor_cat = engine.category_at_position(ctor_pos);
2343        assert_ne!(
2344            ctor_cat,
2345            Some(HighlightCategory::String),
2346            "constructor keyword must not inherit string state from earlier \
2347             template literal (got {:?})",
2348            ctor_cat,
2349        );
2350
2351        // The closing brace of the class — the very last non-whitespace char
2352        // — also lives outside any string in correct JS.
2353        let last_brace = source.rfind('}').expect("locate closing brace");
2354        let brace_cat = engine.category_at_position(last_brace);
2355        assert_ne!(
2356            brace_cat,
2357            Some(HighlightCategory::String),
2358            "closing class brace must not be highlighted as string \
2359             (got {:?})",
2360            brace_cat,
2361        );
2362    }
2363
2364    /// The closing `}` of a `${…}` template substitution and the closing
2365    /// backtick of the surrounding template literal must keep template
2366    /// string colouring — not inherit the `@variable` highlight from the
2367    /// substitution's expression. Tree-sitter-highlight emits one
2368    /// HighlightEnd event per started highlight; if the editor's
2369    /// span-flattening logic doesn't pop the inner `@variable` correctly
2370    /// when the substitution closes, the variable colour bleeds across
2371    /// `}` and the trailing `\`` until the next sibling capture (here,
2372    /// the `;` operator).
2373    #[test]
2374    fn test_javascript_template_substitution_closing_tokens_are_string() {
2375        let registry =
2376            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
2377        let mut engine = HighlightEngine::for_file(Path::new("tmpl.js"), None, &registry);
2378
2379        // Minimal template literal: `${name}` — wrapped in a statement so
2380        // the parser sees a complete program.
2381        let source = "const x = `${name}`;\n";
2382        let buffer = Buffer::from_str(source, 0, test_fs());
2383        let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
2384
2385        let _ = engine.highlight_viewport(&buffer, 0, source.len(), &theme, 0);
2386
2387        // Locate the closing `}` of the substitution and the closing
2388        // backtick of the template literal.
2389        let close_brace = source
2390            .find("}`")
2391            .expect("locate substitution closing brace");
2392        let close_backtick = close_brace + 1;
2393
2394        // Sanity: the inner identifier `name` is correctly tagged as a
2395        // variable (this guards us against an unrelated regression where
2396        // the entire template gets typed wrong).
2397        let name_pos = source.find("name").expect("locate identifier");
2398        let name_cat = engine.category_at_position(name_pos);
2399        assert_eq!(
2400            name_cat,
2401            Some(HighlightCategory::Variable),
2402            "substitution identifier should be Variable (got {:?})",
2403            name_cat,
2404        );
2405
2406        // The closing `}` and `` ` `` live inside the surrounding
2407        // `template_string` node, so tree-sitter assigns them the
2408        // `@string` capture. They must surface as String here — not
2409        // as Variable (the previous symptom of the bleed) and not as
2410        // None (which would make the editor render them with the
2411        // default foreground colour, equally wrong).
2412        let brace_cat = engine.category_at_position(close_brace);
2413        assert_eq!(
2414            brace_cat,
2415            Some(HighlightCategory::String),
2416            "closing }} of ${{…}} must be String (got {:?})",
2417            brace_cat,
2418        );
2419        let backtick_cat = engine.category_at_position(close_backtick);
2420        assert_eq!(
2421            backtick_cat,
2422            Some(HighlightCategory::String),
2423            "closing backtick of template literal must be String \
2424             (got {:?})",
2425            backtick_cat,
2426        );
2427    }
2428}