Skip to main content

fresh/primitives/
highlight_engine.rs

1//! Unified highlighting engine over syntect (TextMate grammars) and
2//! tree-sitter. Syntect is the default; tree-sitter `Language` is still
3//! detected for non-highlighting features (indentation, semantic highlighting).
4//!
5//! # TextMate cache design
6//!
7//! Syntect's parser is a sequential state machine — it must process bytes
8//! in order from a known parse state to track multi-line constructs and
9//! embedded language transitions. To make scrolling cheap, the engine keeps
10//! a span cache, a `(ParseState, ScopeStack)` snapshot at the cache tail,
11//! and periodic checkpoint anchors to support resume-from-anywhere.
12//!
13//! Three render-time paths, gated by what the cache covers:
14//!
15//! - **Cache hit** — cache fully covers the parse range and there's no
16//!   pending edit; filter cached spans for the viewport. Zero parse work.
17//! - **Forward extension** — cache covers the start of the parse range but
18//!   not its end; resume from `tail_state` and parse only the uncovered
19//!   tail bytes. Steady-state scroll path.
20//! - **Partial update** — there's a pending edit; resume from the nearest
21//!   checkpoint before the dirty point and parse forward looking for
22//!   convergence (state matches an existing checkpoint), bounded by a
23//!   per-pass byte budget so pathological edits can't degenerate into
24//!   whole-file reparses.
25//! - **Cold start / fallback** — no cache, or none of the above applies;
26//!   parse the appropriate range from a fresh state or nearest checkpoint.
27//!
28//! For files at or below `MAX_PARSE_BYTES` the parse range is the whole
29//! file, so the cache is whole-file after the first parse and scrolling
30//! becomes filter-only. Larger files use a viewport-centred window of
31//! `±context_bytes` and rely on the forward-extension path to keep
32//! scroll-cost bounded.
33//!
34//! Edits go through `notify_insert` / `notify_delete`, which shift cached
35//! span byte offsets in place, set `dirty_from`, and invalidate `tail_state`
36//! when the edit lies inside the cached range.
37
38use crate::model::buffer::Buffer;
39use crate::model::marker::{MarkerId, MarkerList};
40use crate::primitives::grammar::GrammarRegistry;
41use crate::primitives::highlighter::{
42    highlight_color, HighlightCategory, HighlightSpan, Highlighter, Language,
43};
44use crate::view::theme::Theme;
45use std::collections::HashMap;
46use std::ops::Range;
47use std::path::Path;
48use std::sync::Arc;
49use syntect::parsing::SyntaxSet;
50
51/// Map TextMate scope to highlight category
52fn scope_to_category(scope: &str) -> Option<HighlightCategory> {
53    let scope_lower = scope.to_lowercase();
54
55    // Comments - highest priority
56    if scope_lower.starts_with("comment") {
57        return Some(HighlightCategory::Comment);
58    }
59
60    // Strings
61    if scope_lower.starts_with("string") {
62        return Some(HighlightCategory::String);
63    }
64
65    // Markdown/markup scopes - handle before generic keyword/punctuation checks
66    // See: https://macromates.com/manual/en/language_grammars (TextMate scope naming)
67    // Headings: markup.heading and entity.name.section (used by syntect's markdown grammar)
68    if scope_lower.starts_with("markup.heading") || scope_lower.starts_with("entity.name.section") {
69        return Some(HighlightCategory::Keyword); // Headers styled like keywords (bold, prominent)
70    }
71    // Bold: markup.bold
72    if scope_lower.starts_with("markup.bold") {
73        return Some(HighlightCategory::Constant); // Bold styled like constants (bright)
74    }
75    // Italic: markup.italic
76    if scope_lower.starts_with("markup.italic") {
77        return Some(HighlightCategory::Variable); // Italic styled like variables
78    }
79    // Inline code and code blocks: markup.raw, markup.inline.raw
80    if scope_lower.starts_with("markup.raw") || scope_lower.starts_with("markup.inline.raw") {
81        return Some(HighlightCategory::String); // Code styled like strings
82    }
83    // Links: markup.underline.link
84    if scope_lower.starts_with("markup.underline.link") {
85        return Some(HighlightCategory::Function); // Links styled like functions (distinct color)
86    }
87    // Generic underline (often links)
88    if scope_lower.starts_with("markup.underline") {
89        return Some(HighlightCategory::Function);
90    }
91    // Block quotes: markup.quote
92    if scope_lower.starts_with("markup.quote") {
93        return Some(HighlightCategory::Comment); // Quotes styled like comments (subdued)
94    }
95    // Lists: markup.list
96    if scope_lower.starts_with("markup.list") {
97        return Some(HighlightCategory::Operator); // List markers styled like operators
98    }
99    // Strikethrough: markup.strikethrough
100    if scope_lower.starts_with("markup.strikethrough") {
101        return Some(HighlightCategory::Comment); // Strikethrough styled subdued
102    }
103
104    // Keywords
105    if scope_lower.starts_with("keyword.control")
106        || scope_lower.starts_with("keyword.other")
107        || scope_lower.starts_with("keyword.declaration")
108        || scope_lower.starts_with("keyword")
109    {
110        // keyword.operator should map to Operator, not Keyword
111        if !scope_lower.starts_with("keyword.operator") {
112            return Some(HighlightCategory::Keyword);
113        }
114    }
115
116    // Punctuation that belongs to a parent construct (comment/string delimiters)
117    // These must be checked before the generic punctuation rule below.
118    // TextMate grammars assign e.g. `punctuation.definition.comment` to # // /* etc.
119    if scope_lower.starts_with("punctuation.definition.comment") {
120        return Some(HighlightCategory::Comment);
121    }
122    if scope_lower.starts_with("punctuation.definition.string") {
123        return Some(HighlightCategory::String);
124    }
125
126    // Operators (keyword.operator only)
127    if scope_lower.starts_with("keyword.operator") {
128        return Some(HighlightCategory::Operator);
129    }
130
131    // Punctuation brackets ({, }, (, ), [, ], <, >)
132    // Covers punctuation.section.*, punctuation.bracket.*,
133    // and punctuation.definition.{array,block,brackets,group,inline-table,section,table,tag}
134    if scope_lower.starts_with("punctuation.section")
135        || scope_lower.starts_with("punctuation.bracket")
136        || scope_lower.starts_with("punctuation.definition.array")
137        || scope_lower.starts_with("punctuation.definition.block")
138        || scope_lower.starts_with("punctuation.definition.brackets")
139        || scope_lower.starts_with("punctuation.definition.group")
140        || scope_lower.starts_with("punctuation.definition.inline-table")
141        || scope_lower.starts_with("punctuation.definition.section")
142        || scope_lower.starts_with("punctuation.definition.table")
143        || scope_lower.starts_with("punctuation.definition.tag")
144    {
145        return Some(HighlightCategory::PunctuationBracket);
146    }
147
148    // Punctuation delimiters (;, ,, .)
149    if scope_lower.starts_with("punctuation.separator")
150        || scope_lower.starts_with("punctuation.terminator")
151        || scope_lower.starts_with("punctuation.accessor")
152    {
153        return Some(HighlightCategory::PunctuationDelimiter);
154    }
155
156    // Functions
157    if scope_lower.starts_with("entity.name.function")
158        || scope_lower.starts_with("support.function")
159        || scope_lower.starts_with("meta.function-call")
160        || scope_lower.starts_with("variable.function")
161    {
162        return Some(HighlightCategory::Function);
163    }
164
165    // Types
166    if scope_lower.starts_with("entity.name.type")
167        || scope_lower.starts_with("entity.name.class")
168        || scope_lower.starts_with("entity.name.struct")
169        || scope_lower.starts_with("entity.name.enum")
170        || scope_lower.starts_with("entity.name.interface")
171        || scope_lower.starts_with("entity.name.trait")
172        || scope_lower.starts_with("support.type")
173        || scope_lower.starts_with("support.class")
174        || scope_lower.starts_with("storage.type")
175    {
176        return Some(HighlightCategory::Type);
177    }
178
179    // Storage modifiers (pub, static, const as keywords)
180    if scope_lower.starts_with("storage.modifier") {
181        return Some(HighlightCategory::Keyword);
182    }
183
184    // Constants and numbers
185    if scope_lower.starts_with("constant.numeric")
186        || scope_lower.starts_with("constant.language.boolean")
187    {
188        return Some(HighlightCategory::Number);
189    }
190    if scope_lower.starts_with("constant") {
191        return Some(HighlightCategory::Constant);
192    }
193
194    // Variables
195    if scope_lower.starts_with("variable.parameter")
196        || scope_lower.starts_with("variable.other")
197        || scope_lower.starts_with("variable.language")
198    {
199        return Some(HighlightCategory::Variable);
200    }
201
202    // Properties / object keys
203    if scope_lower.starts_with("entity.name.tag")
204        || scope_lower.starts_with("support.other.property")
205        || scope_lower.starts_with("meta.object-literal.key")
206        || scope_lower.starts_with("variable.other.property")
207        || scope_lower.starts_with("variable.other.object.property")
208    {
209        return Some(HighlightCategory::Property);
210    }
211
212    // Attributes (decorators, annotations)
213    if scope_lower.starts_with("entity.other.attribute")
214        || scope_lower.starts_with("meta.attribute")
215        || scope_lower.starts_with("entity.name.decorator")
216    {
217        return Some(HighlightCategory::Attribute);
218    }
219
220    // Generic variable fallback
221    if scope_lower.starts_with("variable") {
222        return Some(HighlightCategory::Variable);
223    }
224
225    None
226}
227
228/// Unified highlighting engine supporting multiple backends
229#[derive(Default)]
230pub enum HighlightEngine {
231    /// Tree-sitter based highlighting (built-in languages)
232    TreeSitter(Box<Highlighter>),
233    /// TextMate grammar based highlighting
234    TextMate(Box<TextMateEngine>),
235    /// No highlighting available
236    #[default]
237    None,
238}
239
240/// TextMate highlighting engine. See module docs for the cache design.
241pub struct TextMateEngine {
242    syntax_set: Arc<SyntaxSet>,
243    syntax_index: usize,
244    checkpoint_markers: MarkerList,
245    checkpoint_states:
246        HashMap<MarkerId, (syntect::parsing::ParseState, syntect::parsing::ScopeStack)>,
247    dirty_from: Option<usize>,
248    cache: Option<TextMateCache>,
249    last_buffer_len: usize,
250    ts_language: Option<Language>,
251    stats: HighlightStats,
252    // Scope→Category memo. Syntect Scope atoms are append-only-interned
253    // globally, so entries never need invalidation.
254    scope_category_cache: HashMap<syntect::parsing::Scope, Option<HighlightCategory>>,
255}
256
257/// Counters for monitoring highlighting performance in tests.
258#[derive(Debug, Default, Clone)]
259pub struct HighlightStats {
260    /// Number of bytes parsed by syntect (total across all highlight_viewport calls).
261    pub bytes_parsed: usize,
262    /// Number of highlight_viewport calls that hit the span cache.
263    pub cache_hits: usize,
264    /// Number of highlight_viewport calls that missed the cache and re-parsed.
265    pub cache_misses: usize,
266    /// Number of checkpoint states updated during convergence.
267    pub checkpoints_updated: usize,
268    /// Number of times convergence was detected (state matched existing checkpoint).
269    pub convergences: usize,
270}
271
272#[derive(Debug, Clone)]
273struct TextMateCache {
274    range: Range<usize>,
275    spans: Vec<CachedSpan>,
276    // Parse state at `range.end`; powers forward extension. None when the
277    // last mutation didn't end at `range.end`.
278    tail_state: Option<(syntect::parsing::ParseState, syntect::parsing::ScopeStack)>,
279}
280
281#[derive(Debug, Clone)]
282struct CachedSpan {
283    range: Range<usize>,
284    category: crate::primitives::highlighter::HighlightCategory,
285}
286
287/// Small/large file threshold (whole-file cache vs viewport window).
288const MAX_PARSE_BYTES: usize = 1024 * 1024;
289
290/// Distance between checkpoint anchors. Smaller = faster convergence on edit.
291const CHECKPOINT_INTERVAL: usize = 256;
292
293/// Per-pass cap on partial-update parsing past `dirty_pos`. Bounds work for
294/// pathological edits whose effect doesn't converge.
295const CONVERGENCE_BUDGET: usize = 64 * 1024;
296
297impl TextMateEngine {
298    /// Create a new TextMate engine for the given syntax
299    pub fn new(syntax_set: Arc<SyntaxSet>, syntax_index: usize) -> Self {
300        Self {
301            syntax_set,
302            syntax_index,
303            checkpoint_markers: MarkerList::new(),
304            checkpoint_states: HashMap::new(),
305            dirty_from: None,
306            cache: None,
307            last_buffer_len: 0,
308            ts_language: None,
309            stats: HighlightStats::default(),
310            scope_category_cache: HashMap::new(),
311        }
312    }
313
314    /// Create a new TextMate engine with a tree-sitter language for non-highlighting features
315    pub fn with_language(
316        syntax_set: Arc<SyntaxSet>,
317        syntax_index: usize,
318        ts_language: Option<Language>,
319    ) -> Self {
320        Self {
321            syntax_set,
322            syntax_index,
323            checkpoint_markers: MarkerList::new(),
324            checkpoint_states: HashMap::new(),
325            dirty_from: None,
326            cache: None,
327            last_buffer_len: 0,
328            ts_language,
329            stats: HighlightStats::default(),
330            scope_category_cache: HashMap::new(),
331        }
332    }
333
334    /// Get performance stats for testing and diagnostics.
335    pub fn stats(&self) -> &HighlightStats {
336        &self.stats
337    }
338
339    /// Reset performance counters.
340    pub fn reset_stats(&mut self) {
341        self.stats = HighlightStats::default();
342    }
343
344    /// Get the tree-sitter language (for indentation, semantic highlighting, etc.)
345    pub fn language(&self) -> Option<&Language> {
346        self.ts_language.as_ref()
347    }
348
349    /// Buffer-insert notification. Shifts span offsets in place and marks
350    /// the cache dirty so the partial-update path runs on next render.
351    pub fn notify_insert(&mut self, position: usize, length: usize) {
352        self.checkpoint_markers.adjust_for_insert(position, length);
353        self.dirty_from = Some(self.dirty_from.map_or(position, |d| d.min(position)));
354        if let Some(cache) = &mut self.cache {
355            for span in &mut cache.spans {
356                if span.range.start >= position {
357                    span.range.start += length;
358                    span.range.end += length;
359                } else if span.range.end > position {
360                    span.range.end += length;
361                }
362            }
363            if cache.range.end >= position {
364                cache.range.end += length;
365                if position < cache.range.end {
366                    cache.tail_state = None;
367                }
368            }
369        }
370    }
371
372    /// Buffer-delete notification. Mirror of `notify_insert`.
373    pub fn notify_delete(&mut self, position: usize, length: usize) {
374        self.checkpoint_markers.adjust_for_delete(position, length);
375        self.dirty_from = Some(self.dirty_from.map_or(position, |d| d.min(position)));
376        if let Some(cache) = &mut self.cache {
377            let delete_end = position + length;
378            cache.spans.retain_mut(|span| {
379                if span.range.start >= delete_end {
380                    span.range.start -= length;
381                    span.range.end -= length;
382                    true
383                } else if span.range.end <= position {
384                    true
385                } else if span.range.start >= position && span.range.end <= delete_end {
386                    false
387                } else {
388                    if span.range.start < position {
389                        span.range.end = position.min(span.range.end);
390                    } else {
391                        span.range.start = position;
392                        span.range.end = position + span.range.end.saturating_sub(delete_end);
393                    }
394                    span.range.start < span.range.end
395                }
396            });
397            if cache.range.end > delete_end {
398                cache.range.end -= length;
399            } else if cache.range.end > position {
400                cache.range.end = position;
401            }
402            if position < cache.range.end {
403                cache.tail_state = None;
404            }
405        }
406    }
407
408    /// Highlight the visible viewport. Path selection is documented in the
409    /// module-level docs ("TextMate cache design").
410    pub fn highlight_viewport(
411        &mut self,
412        buffer: &Buffer,
413        viewport_start: usize,
414        viewport_end: usize,
415        theme: &Theme,
416        context_bytes: usize,
417    ) -> Vec<HighlightSpan> {
418        let buf_len = buffer.len();
419        let (desired_parse_start, parse_end) = if buf_len <= MAX_PARSE_BYTES {
420            (0, buf_len)
421        } else {
422            let s = viewport_start.saturating_sub(context_bytes);
423            let e = (viewport_end + context_bytes).min(buf_len);
424            (s, e)
425        };
426
427        let dirty = self.dirty_from.take();
428        let cache_covers_viewport = self.cache.as_ref().is_some_and(|c| {
429            c.range.start <= desired_parse_start && c.range.end >= desired_parse_start
430        });
431        let exact_cache_hit = cache_covers_viewport
432            && dirty.is_none()
433            && self.last_buffer_len == buffer.len()
434            && self
435                .cache
436                .as_ref()
437                .is_some_and(|c| c.range.end >= parse_end);
438
439        // Cache hit.
440        if exact_cache_hit {
441            self.stats.cache_hits += 1;
442            return self.filter_cached_spans(viewport_start, viewport_end, theme);
443        }
444
445        // Forward extension.
446        if dirty.is_none()
447            && cache_covers_viewport
448            && self.last_buffer_len == buffer.len()
449            && self
450                .cache
451                .as_ref()
452                .is_some_and(|c| c.range.end < parse_end && c.tail_state.is_some())
453        {
454            return self.extend_cache_forward(
455                buffer,
456                parse_end,
457                viewport_start,
458                viewport_end,
459                theme,
460            );
461        }
462
463        // Partial update.
464        if cache_covers_viewport && dirty.is_some() {
465            if let Some(dirty_pos) = dirty {
466                if dirty_pos < parse_end {
467                    if let Some(result) = self.try_partial_update(
468                        buffer,
469                        dirty_pos,
470                        desired_parse_start,
471                        parse_end,
472                        viewport_start,
473                        viewport_end,
474                        theme,
475                    ) {
476                        return result;
477                    }
478                } else {
479                    // Dirty region past viewport: cached spans are still valid.
480                    self.dirty_from = Some(dirty_pos);
481                    self.stats.cache_hits += 1;
482                    return self.filter_cached_spans(viewport_start, viewport_end, theme);
483                }
484            }
485        } else if let Some(d) = dirty {
486            self.dirty_from = Some(d);
487        }
488
489        // Cold start / fallback.
490        self.full_parse(
491            buffer,
492            desired_parse_start,
493            parse_end,
494            viewport_start,
495            viewport_end,
496            theme,
497            context_bytes,
498        )
499    }
500
501    /// Filter cached spans for the viewport and resolve colors.
502    fn filter_cached_spans(
503        &self,
504        viewport_start: usize,
505        viewport_end: usize,
506        theme: &Theme,
507    ) -> Vec<HighlightSpan> {
508        let cache = self.cache.as_ref().unwrap();
509        cache
510            .spans
511            .iter()
512            .filter(|span| span.range.start < viewport_end && span.range.end > viewport_start)
513            .map(|span| HighlightSpan {
514                range: span.range.clone(),
515                color: highlight_color(span.category, theme),
516                category: Some(span.category),
517            })
518            .collect()
519    }
520
521    /// Partial update path. Returns `Some` whenever an anchor was available,
522    /// even on budget hit or EOF (see post-loop classification). `None` only
523    /// when no checkpoint anchor reaches the dirty point.
524    #[allow(clippy::too_many_arguments)]
525    fn try_partial_update(
526        &mut self,
527        buffer: &Buffer,
528        dirty_pos: usize,
529        desired_parse_start: usize,
530        parse_end: usize,
531        viewport_start: usize,
532        viewport_end: usize,
533        theme: &Theme,
534    ) -> Option<Vec<HighlightSpan>> {
535        let syntax = &self.syntax_set.syntaxes()[self.syntax_index];
536
537        // Find checkpoint before the dirty point (bounded search)
538        let (actual_start, mut state, mut current_scopes) = {
539            let search_start = dirty_pos.saturating_sub(MAX_PARSE_BYTES);
540            let markers = self.checkpoint_markers.query_range(search_start, dirty_pos);
541            let nearest = markers.into_iter().max_by_key(|(_, start, _)| *start);
542            if let Some((id, cp_pos, _)) = nearest {
543                if let Some((s, sc)) = self.checkpoint_states.get(&id) {
544                    (cp_pos, s.clone(), sc.clone())
545                } else {
546                    return None; // orphan, fall back
547                }
548            } else if parse_end <= MAX_PARSE_BYTES {
549                (
550                    0,
551                    syntect::parsing::ParseState::new(syntax),
552                    syntect::parsing::ScopeStack::new(),
553                )
554            } else {
555                return None; // large file, no nearby checkpoint, fall back
556            }
557        };
558
559        // Get markers from dirty point forward for convergence checking
560        let mut markers_ahead: Vec<(MarkerId, usize)> = self
561            .checkpoint_markers
562            .query_range(dirty_pos, parse_end)
563            .into_iter()
564            .map(|(id, start, _)| (id, start))
565            .collect();
566        markers_ahead.sort_by_key(|(_, pos)| *pos);
567        let mut marker_idx = 0;
568
569        // Parse from actual_start to parse_end, looking for convergence
570        let content_end = parse_end.min(buffer.len());
571        if actual_start >= content_end {
572            return None;
573        }
574        let content = buffer.slice_bytes(actual_start..content_end);
575        let content_str = match std::str::from_utf8(&content) {
576            Ok(s) => s,
577            Err(_) => return None,
578        };
579
580        let mut new_spans = Vec::new();
581        let content_bytes = content_str.as_bytes();
582        let mut pos = 0;
583        let mut current_offset = actual_start;
584        let mut converged_at: Option<usize> = None;
585        let mut budget_hit_at: Option<usize> = None;
586        let mut bytes_since_checkpoint: usize = 0;
587
588        while pos < content_bytes.len() {
589            // Create checkpoints in new territory
590            if bytes_since_checkpoint >= CHECKPOINT_INTERVAL {
591                let nearby = self.checkpoint_markers.query_range(
592                    current_offset.saturating_sub(CHECKPOINT_INTERVAL / 2),
593                    current_offset + CHECKPOINT_INTERVAL / 2,
594                );
595                if nearby.is_empty() {
596                    let marker_id = self.checkpoint_markers.create(current_offset, true);
597                    self.checkpoint_states
598                        .insert(marker_id, (state.clone(), current_scopes.clone()));
599                }
600                bytes_since_checkpoint = 0;
601            }
602
603            let line_start = pos;
604            let mut line_end = pos;
605            while line_end < content_bytes.len() {
606                if content_bytes[line_end] == b'\n' {
607                    line_end += 1;
608                    break;
609                } else if content_bytes[line_end] == b'\r' {
610                    if line_end + 1 < content_bytes.len() && content_bytes[line_end + 1] == b'\n' {
611                        line_end += 2;
612                    } else {
613                        line_end += 1;
614                    }
615                    break;
616                }
617                line_end += 1;
618            }
619
620            let line_bytes = &content_bytes[line_start..line_end];
621            let actual_line_byte_len = line_bytes.len();
622
623            let line_str = match std::str::from_utf8(line_bytes) {
624                Ok(s) => s,
625                Err(_) => {
626                    pos = line_end;
627                    current_offset += actual_line_byte_len;
628                    bytes_since_checkpoint += actual_line_byte_len;
629                    continue;
630                }
631            };
632
633            let line_content = line_str.trim_end_matches(&['\r', '\n'][..]);
634            let line_for_syntect = if line_end < content_bytes.len() || line_str.ends_with('\n') {
635                format!("{}\n", line_content)
636            } else {
637                line_content.to_string()
638            };
639
640            let ops = match state.parse_line(&line_for_syntect, &self.syntax_set) {
641                Ok(ops) => ops,
642                Err(_) => {
643                    pos = line_end;
644                    current_offset += actual_line_byte_len;
645                    bytes_since_checkpoint += actual_line_byte_len;
646                    continue;
647                }
648            };
649
650            // Collect spans for the dirty region
651            let collect_spans =
652                current_offset + actual_line_byte_len > desired_parse_start.max(actual_start);
653            let mut syntect_offset = 0;
654            let line_content_len = line_content.len();
655
656            for (op_offset, op) in ops {
657                let clamped_op_offset = op_offset.min(line_content_len);
658                if collect_spans && clamped_op_offset > syntect_offset {
659                    if let Some(category) = self.scope_stack_to_category(&current_scopes) {
660                        let byte_start = current_offset + syntect_offset;
661                        let byte_end = current_offset + clamped_op_offset;
662                        let clamped_start = byte_start.max(actual_start);
663                        if clamped_start < byte_end {
664                            new_spans.push(CachedSpan {
665                                range: clamped_start..byte_end,
666                                category,
667                            });
668                        }
669                    }
670                }
671                syntect_offset = clamped_op_offset;
672                #[allow(clippy::let_underscore_must_use)]
673                let _ = current_scopes.apply(&op);
674            }
675
676            if collect_spans && syntect_offset < line_content_len {
677                if let Some(category) = self.scope_stack_to_category(&current_scopes) {
678                    let byte_start = current_offset + syntect_offset;
679                    let byte_end = current_offset + line_content_len;
680                    let clamped_start = byte_start.max(actual_start);
681                    if clamped_start < byte_end {
682                        new_spans.push(CachedSpan {
683                            range: clamped_start..byte_end,
684                            category,
685                        });
686                    }
687                }
688            }
689
690            pos = line_end;
691            current_offset += actual_line_byte_len;
692            bytes_since_checkpoint += actual_line_byte_len;
693
694            // Check convergence at checkpoint markers
695            while marker_idx < markers_ahead.len() && markers_ahead[marker_idx].1 <= current_offset
696            {
697                let (marker_id, _) = markers_ahead[marker_idx];
698                marker_idx += 1;
699                if let Some(stored) = self.checkpoint_states.get(&marker_id) {
700                    if *stored == (state.clone(), current_scopes.clone()) {
701                        self.stats.convergences += 1;
702                        converged_at = Some(current_offset);
703                        break;
704                    }
705                }
706                self.stats.checkpoints_updated += 1;
707                self.checkpoint_states
708                    .insert(marker_id, (state.clone(), current_scopes.clone()));
709            }
710
711            if converged_at.is_some() {
712                break;
713            }
714
715            // Bound work per pass: pathological edits (e.g. unclosed `/*`
716            // re-scoping the rest of the file) can never converge. Stop here
717            // and resume from `current_offset` on the next render.
718            if current_offset.saturating_sub(dirty_pos) >= CONVERGENCE_BUDGET {
719                budget_hit_at = Some(current_offset);
720                break;
721            }
722        }
723
724        self.stats.bytes_parsed += current_offset.saturating_sub(actual_start);
725
726        // Splice classification: converged → clear dirty; budget hit → keep
727        // dirty for next pass; EOF → clear dirty.
728        let (splice_end, dirty_after) = if let Some(c) = converged_at {
729            (c, None)
730        } else if let Some(b) = budget_hit_at {
731            (b, Some(b))
732        } else {
733            (current_offset, None)
734        };
735
736        self.stats.cache_misses += 1; // partial update counts as a miss
737
738        Self::merge_adjacent_spans(&mut new_spans);
739
740        if let Some(cache) = &mut self.cache {
741            let splice_start = actual_start;
742            cache
743                .spans
744                .retain(|span| span.range.end <= splice_start || span.range.start >= splice_end);
745            cache.spans.extend(new_spans);
746            cache.spans.sort_by_key(|s| s.range.start);
747            Self::merge_adjacent_spans(&mut cache.spans);
748            if splice_end > cache.range.end {
749                cache.range.end = splice_end;
750            }
751            cache.tail_state = None;
752        }
753
754        self.last_buffer_len = buffer.len();
755        self.dirty_from = dirty_after;
756
757        Some(self.filter_cached_spans(viewport_start, viewport_end, theme))
758    }
759
760    /// Forward extension path (see module docs). Caller checks the cache
761    /// exists, has a `tail_state`, has no dirty edits, and `cache.range.end
762    /// < parse_end`.
763    fn extend_cache_forward(
764        &mut self,
765        buffer: &Buffer,
766        parse_end: usize,
767        viewport_start: usize,
768        viewport_end: usize,
769        theme: &Theme,
770    ) -> Vec<HighlightSpan> {
771        self.stats.cache_misses += 1;
772        let buf_len = buffer.len();
773        let parse_end = parse_end.min(buf_len);
774
775        let (extension_start, mut state, mut current_scopes) = {
776            let cache = self
777                .cache
778                .as_ref()
779                .expect("extend_cache_forward: cache must exist");
780            let (s, sc) = cache
781                .tail_state
782                .as_ref()
783                .expect("extend_cache_forward: tail_state must exist")
784                .clone();
785            (cache.range.end, s, sc)
786        };
787
788        if parse_end <= extension_start {
789            return self.filter_cached_spans(viewport_start, viewport_end, theme);
790        }
791
792        let content = buffer.slice_bytes(extension_start..parse_end);
793        let content_str = match std::str::from_utf8(&content) {
794            Ok(s) => s,
795            Err(_) => return self.filter_cached_spans(viewport_start, viewport_end, theme),
796        };
797
798        let mut new_spans = Vec::new();
799        let content_bytes = content_str.as_bytes();
800        let mut pos = 0;
801        let mut current_offset = extension_start;
802        let mut bytes_since_checkpoint: usize = 0;
803
804        while pos < content_bytes.len() {
805            if bytes_since_checkpoint >= CHECKPOINT_INTERVAL {
806                let nearby = self.checkpoint_markers.query_range(
807                    current_offset.saturating_sub(CHECKPOINT_INTERVAL / 2),
808                    current_offset + CHECKPOINT_INTERVAL / 2,
809                );
810                if nearby.is_empty() {
811                    let marker_id = self.checkpoint_markers.create(current_offset, true);
812                    self.checkpoint_states
813                        .insert(marker_id, (state.clone(), current_scopes.clone()));
814                }
815                bytes_since_checkpoint = 0;
816            }
817
818            let line_start = pos;
819            let mut line_end = pos;
820            while line_end < content_bytes.len() {
821                if content_bytes[line_end] == b'\n' {
822                    line_end += 1;
823                    break;
824                } else if content_bytes[line_end] == b'\r' {
825                    if line_end + 1 < content_bytes.len() && content_bytes[line_end + 1] == b'\n' {
826                        line_end += 2;
827                    } else {
828                        line_end += 1;
829                    }
830                    break;
831                }
832                line_end += 1;
833            }
834
835            let line_bytes = &content_bytes[line_start..line_end];
836            let actual_line_byte_len = line_bytes.len();
837
838            let line_str = match std::str::from_utf8(line_bytes) {
839                Ok(s) => s,
840                Err(_) => {
841                    pos = line_end;
842                    current_offset += actual_line_byte_len;
843                    bytes_since_checkpoint += actual_line_byte_len;
844                    continue;
845                }
846            };
847
848            let line_content = line_str.trim_end_matches(&['\r', '\n'][..]);
849            let line_for_syntect = if line_end < content_bytes.len() || line_str.ends_with('\n') {
850                format!("{}\n", line_content)
851            } else {
852                line_content.to_string()
853            };
854
855            let ops = match state.parse_line(&line_for_syntect, &self.syntax_set) {
856                Ok(ops) => ops,
857                Err(_) => {
858                    pos = line_end;
859                    current_offset += actual_line_byte_len;
860                    bytes_since_checkpoint += actual_line_byte_len;
861                    continue;
862                }
863            };
864
865            let mut syntect_offset = 0;
866            let line_content_len = line_content.len();
867
868            for (op_offset, op) in ops {
869                let clamped_op_offset = op_offset.min(line_content_len);
870                if clamped_op_offset > syntect_offset {
871                    if let Some(category) = self.scope_stack_to_category(&current_scopes) {
872                        let byte_start = current_offset + syntect_offset;
873                        let byte_end = current_offset + clamped_op_offset;
874                        if byte_start < byte_end {
875                            new_spans.push(CachedSpan {
876                                range: byte_start..byte_end,
877                                category,
878                            });
879                        }
880                    }
881                }
882                syntect_offset = clamped_op_offset;
883                #[allow(clippy::let_underscore_must_use)]
884                let _ = current_scopes.apply(&op);
885            }
886
887            if syntect_offset < line_content_len {
888                if let Some(category) = self.scope_stack_to_category(&current_scopes) {
889                    let byte_start = current_offset + syntect_offset;
890                    let byte_end = current_offset + line_content_len;
891                    if byte_start < byte_end {
892                        new_spans.push(CachedSpan {
893                            range: byte_start..byte_end,
894                            category,
895                        });
896                    }
897                }
898            }
899
900            pos = line_end;
901            current_offset += actual_line_byte_len;
902            bytes_since_checkpoint += actual_line_byte_len;
903        }
904
905        self.stats.bytes_parsed += parse_end - extension_start;
906
907        Self::merge_adjacent_spans(&mut new_spans);
908
909        let cache = self
910            .cache
911            .as_mut()
912            .expect("extend_cache_forward: cache must still exist");
913        cache.spans.extend(new_spans);
914        Self::merge_adjacent_spans(&mut cache.spans);
915        cache.range.end = parse_end;
916        cache.tail_state = Some((state, current_scopes));
917        self.last_buffer_len = buf_len;
918
919        self.filter_cached_spans(viewport_start, viewport_end, theme)
920    }
921
922    /// Full re-parse from desired_parse_start to parse_end. Used on cold start
923    /// or when partial update fails (no convergence).
924    #[allow(clippy::too_many_arguments)]
925    fn full_parse(
926        &mut self,
927        buffer: &Buffer,
928        desired_parse_start: usize,
929        parse_end: usize,
930        viewport_start: usize,
931        viewport_end: usize,
932        theme: &Theme,
933        _context_bytes: usize,
934    ) -> Vec<HighlightSpan> {
935        self.stats.cache_misses += 1;
936        self.dirty_from = None; // consumed
937
938        if parse_end <= desired_parse_start {
939            return Vec::new();
940        }
941
942        let syntax = &self.syntax_set.syntaxes()[self.syntax_index];
943        let (actual_start, mut state, mut current_scopes, create_checkpoints) =
944            self.find_parse_resume_point(desired_parse_start, parse_end, syntax);
945
946        let content = buffer.slice_bytes(actual_start..parse_end);
947        let content_str = match std::str::from_utf8(&content) {
948            Ok(s) => s,
949            Err(_) => return Vec::new(),
950        };
951
952        let mut spans = Vec::new();
953        let content_bytes = content_str.as_bytes();
954        let mut pos = 0;
955        let mut current_offset = actual_start;
956        let mut bytes_since_checkpoint: usize = 0;
957
958        while pos < content_bytes.len() {
959            if create_checkpoints && bytes_since_checkpoint >= CHECKPOINT_INTERVAL {
960                let nearby = self.checkpoint_markers.query_range(
961                    current_offset.saturating_sub(CHECKPOINT_INTERVAL / 2),
962                    current_offset + CHECKPOINT_INTERVAL / 2,
963                );
964                if nearby.is_empty() {
965                    let marker_id = self.checkpoint_markers.create(current_offset, true);
966                    self.checkpoint_states
967                        .insert(marker_id, (state.clone(), current_scopes.clone()));
968                }
969                bytes_since_checkpoint = 0;
970            }
971
972            let line_start = pos;
973            let mut line_end = pos;
974
975            while line_end < content_bytes.len() {
976                if content_bytes[line_end] == b'\n' {
977                    line_end += 1;
978                    break;
979                } else if content_bytes[line_end] == b'\r' {
980                    if line_end + 1 < content_bytes.len() && content_bytes[line_end + 1] == b'\n' {
981                        line_end += 2;
982                    } else {
983                        line_end += 1;
984                    }
985                    break;
986                }
987                line_end += 1;
988            }
989
990            let line_bytes = &content_bytes[line_start..line_end];
991            let actual_line_byte_len = line_bytes.len();
992
993            let line_str = match std::str::from_utf8(line_bytes) {
994                Ok(s) => s,
995                Err(_) => {
996                    pos = line_end;
997                    current_offset += actual_line_byte_len;
998                    bytes_since_checkpoint += actual_line_byte_len;
999                    continue;
1000                }
1001            };
1002
1003            let line_content = line_str.trim_end_matches(&['\r', '\n'][..]);
1004            let line_for_syntect = if line_end < content_bytes.len() || line_str.ends_with('\n') {
1005                format!("{}\n", line_content)
1006            } else {
1007                line_content.to_string()
1008            };
1009
1010            let ops = match state.parse_line(&line_for_syntect, &self.syntax_set) {
1011                Ok(ops) => ops,
1012                Err(_) => {
1013                    pos = line_end;
1014                    current_offset += actual_line_byte_len;
1015                    bytes_since_checkpoint += actual_line_byte_len;
1016                    continue;
1017                }
1018            };
1019
1020            let collect_spans = current_offset + actual_line_byte_len > desired_parse_start;
1021            let mut syntect_offset = 0;
1022            let line_content_len = line_content.len();
1023
1024            for (op_offset, op) in ops {
1025                let clamped_op_offset = op_offset.min(line_content_len);
1026                if collect_spans && clamped_op_offset > syntect_offset {
1027                    if let Some(category) = self.scope_stack_to_category(&current_scopes) {
1028                        let byte_start = current_offset + syntect_offset;
1029                        let byte_end = current_offset + clamped_op_offset;
1030                        let clamped_start = byte_start.max(desired_parse_start);
1031                        if clamped_start < byte_end {
1032                            spans.push(CachedSpan {
1033                                range: clamped_start..byte_end,
1034                                category,
1035                            });
1036                        }
1037                    }
1038                }
1039                syntect_offset = clamped_op_offset;
1040                #[allow(clippy::let_underscore_must_use)]
1041                let _ = current_scopes.apply(&op);
1042            }
1043
1044            if collect_spans && syntect_offset < line_content_len {
1045                if let Some(category) = self.scope_stack_to_category(&current_scopes) {
1046                    let byte_start = current_offset + syntect_offset;
1047                    let byte_end = current_offset + line_content_len;
1048                    let clamped_start = byte_start.max(desired_parse_start);
1049                    if clamped_start < byte_end {
1050                        spans.push(CachedSpan {
1051                            range: clamped_start..byte_end,
1052                            category,
1053                        });
1054                    }
1055                }
1056            }
1057
1058            pos = line_end;
1059            current_offset += actual_line_byte_len;
1060            bytes_since_checkpoint += actual_line_byte_len;
1061
1062            // Update checkpoint states as we pass them
1063            let markers_here: Vec<(MarkerId, usize)> = self
1064                .checkpoint_markers
1065                .query_range(
1066                    current_offset.saturating_sub(actual_line_byte_len),
1067                    current_offset,
1068                )
1069                .into_iter()
1070                .map(|(id, start, _)| (id, start))
1071                .collect();
1072            for (marker_id, _) in markers_here {
1073                self.checkpoint_states
1074                    .insert(marker_id, (state.clone(), current_scopes.clone()));
1075            }
1076        }
1077
1078        self.stats.bytes_parsed += parse_end.saturating_sub(actual_start);
1079
1080        Self::merge_adjacent_spans(&mut spans);
1081
1082        self.cache = Some(TextMateCache {
1083            range: desired_parse_start..parse_end,
1084            spans: spans.clone(),
1085            tail_state: Some((state, current_scopes)),
1086        });
1087        self.last_buffer_len = buffer.len();
1088
1089        spans
1090            .into_iter()
1091            .filter(|span| span.range.start < viewport_end && span.range.end > viewport_start)
1092            .map(|span| {
1093                let cat = span.category;
1094                HighlightSpan {
1095                    range: span.range,
1096                    color: highlight_color(cat, theme),
1097                    category: Some(cat),
1098                }
1099            })
1100            .collect()
1101    }
1102
1103    /// Find the best point to resume parsing from for the viewport.
1104    fn find_parse_resume_point(
1105        &self,
1106        desired_start: usize,
1107        parse_end: usize,
1108        syntax: &syntect::parsing::SyntaxReference,
1109    ) -> (
1110        usize,
1111        syntect::parsing::ParseState,
1112        syntect::parsing::ScopeStack,
1113        bool,
1114    ) {
1115        use syntect::parsing::{ParseState, ScopeStack};
1116
1117        // Look for a checkpoint near the desired start. For large files, only
1118        // consider checkpoints that are within MAX_PARSE_BYTES of desired_start
1119        // to avoid parsing hundreds of MB from a distant checkpoint.
1120        let search_start = desired_start.saturating_sub(MAX_PARSE_BYTES);
1121        let markers = self
1122            .checkpoint_markers
1123            .query_range(search_start, desired_start + 1);
1124        let nearest = markers.into_iter().max_by_key(|(_, start, _)| *start);
1125
1126        if let Some((id, cp_pos, _)) = nearest {
1127            if let Some((s, sc)) = self.checkpoint_states.get(&id) {
1128                return (cp_pos, s.clone(), sc.clone(), true);
1129            }
1130        }
1131
1132        if parse_end <= MAX_PARSE_BYTES {
1133            // File is small enough to parse from byte 0
1134            (0, ParseState::new(syntax), ScopeStack::new(), true)
1135        } else {
1136            // Large file, no nearby checkpoint — start fresh from desired_start.
1137            // Still create checkpoints so future visits to this region can resume.
1138            (
1139                desired_start,
1140                ParseState::new(syntax),
1141                ScopeStack::new(),
1142                true,
1143            )
1144        }
1145    }
1146
1147    /// Map scope stack to highlight category, memoising per-scope lookups.
1148    /// `scope.build_string()` is the costly step; the cache hides it after
1149    /// each scope atom has been seen once.
1150    fn scope_stack_to_category(
1151        &mut self,
1152        scopes: &syntect::parsing::ScopeStack,
1153    ) -> Option<HighlightCategory> {
1154        for scope in scopes.as_slice().iter().rev() {
1155            let cat = match self.scope_category_cache.get(scope) {
1156                Some(c) => *c,
1157                None => {
1158                    let computed = scope_to_category(&scope.build_string());
1159                    self.scope_category_cache.insert(*scope, computed);
1160                    computed
1161                }
1162            };
1163            if let Some(c) = cat {
1164                return Some(c);
1165            }
1166        }
1167        None
1168    }
1169
1170    /// Merge adjacent spans with same category
1171    fn merge_adjacent_spans(spans: &mut Vec<CachedSpan>) {
1172        if spans.len() < 2 {
1173            return;
1174        }
1175
1176        let mut write_idx = 0;
1177        for read_idx in 1..spans.len() {
1178            if spans[write_idx].category == spans[read_idx].category
1179                && spans[write_idx].range.end == spans[read_idx].range.start
1180            {
1181                spans[write_idx].range.end = spans[read_idx].range.end;
1182            } else {
1183                write_idx += 1;
1184                if write_idx != read_idx {
1185                    spans[write_idx] = spans[read_idx].clone();
1186                }
1187            }
1188        }
1189        spans.truncate(write_idx + 1);
1190    }
1191
1192    /// Invalidate span cache for an edited range.
1193    /// Checkpoint positions are handled by notify_insert/notify_delete.
1194    /// The span cache is NOT cleared here — it will be patched (partial update)
1195    /// during the next highlight_viewport call using convergence. Only dirty_from
1196    /// (set by notify_insert/notify_delete) controls re-parsing scope.
1197    pub fn invalidate_range(&mut self, _edit_range: Range<usize>) {
1198        // Intentionally does NOT clear self.cache.
1199        // The cache will be partially updated in highlight_viewport when
1200        // dirty_from is set. This avoids full re-parses for small edits.
1201    }
1202
1203    /// Invalidate all cache and checkpoints (file reload, language change, etc.)
1204    pub fn invalidate_all(&mut self) {
1205        self.cache = None;
1206        let ids: Vec<MarkerId> = self.checkpoint_states.keys().copied().collect();
1207        for id in ids {
1208            self.checkpoint_markers.delete(id);
1209        }
1210        self.checkpoint_states.clear();
1211        self.dirty_from = None;
1212    }
1213
1214    /// Get the highlight category at a byte position from the cache.
1215    ///
1216    /// Returns the category if the position falls within a cached highlight span.
1217    /// The position must be within the last highlighted viewport range for a result.
1218    pub fn category_at_position(&self, position: usize) -> Option<HighlightCategory> {
1219        let cache = self.cache.as_ref()?;
1220        cache
1221            .spans
1222            .iter()
1223            .find(|span| span.range.start <= position && position < span.range.end)
1224            .map(|span| span.category)
1225    }
1226
1227    /// Get syntax name
1228    pub fn syntax_name(&self) -> &str {
1229        &self.syntax_set.syntaxes()[self.syntax_index].name
1230    }
1231}
1232
1233impl HighlightEngine {
1234    /// Build a highlighting engine for a catalog entry.
1235    ///
1236    /// Single chokepoint for the "prefer syntect, fall back to tree-sitter"
1237    /// logic. Callers that start from a path or a syntax name should resolve
1238    /// the entry through `GrammarRegistry::find_by_path` / `find_by_name` and
1239    /// then call this.
1240    pub fn from_entry(
1241        entry: &crate::primitives::grammar::GrammarEntry,
1242        registry: &GrammarRegistry,
1243    ) -> Self {
1244        let syntax_set = registry.syntax_set_arc();
1245        if let Some(index) = entry.engines.syntect {
1246            return Self::TextMate(Box::new(TextMateEngine::with_language(
1247                syntax_set,
1248                index,
1249                entry.engines.tree_sitter,
1250            )));
1251        }
1252        if let Some(lang) = entry.engines.tree_sitter {
1253            if let Ok(highlighter) = Highlighter::new(lang) {
1254                return Self::TreeSitter(Box::new(highlighter));
1255            }
1256        }
1257        Self::None
1258    }
1259
1260    /// Create a highlighting engine for a file.
1261    ///
1262    /// Thin wrapper around `from_entry` that resolves the path via the catalog.
1263    /// User-config-declared filename/extension mappings are honoured as long as
1264    /// `GrammarRegistry::apply_language_config` has been called on the registry.
1265    /// `first_line` is used for shebang / first-line regex fallback — pass
1266    /// `None` when no content is available.
1267    pub fn for_file(path: &Path, first_line: Option<&str>, registry: &GrammarRegistry) -> Self {
1268        if let Some(entry) = registry.find_by_path(path, first_line) {
1269            return Self::from_entry(entry, registry);
1270        }
1271        Self::None
1272    }
1273
1274    /// Create a highlighting engine for a syntax by name.
1275    ///
1276    /// Thin wrapper around `from_entry` that performs the lookup via
1277    /// `find_by_name`. The catalog entry already knows which tree-sitter
1278    /// `Language` (if any) serves it, so no separate hint is needed.
1279    pub fn for_syntax_name(name: &str, registry: &GrammarRegistry) -> Self {
1280        if let Some(entry) = registry.find_by_name(name) {
1281            return Self::from_entry(entry, registry);
1282        }
1283        Self::None
1284    }
1285
1286    /// Highlight the visible viewport
1287    ///
1288    /// `context_bytes` controls how far before/after the viewport to parse for accurate
1289    /// highlighting of multi-line constructs (strings, comments, nested blocks).
1290    pub fn highlight_viewport(
1291        &mut self,
1292        buffer: &Buffer,
1293        viewport_start: usize,
1294        viewport_end: usize,
1295        theme: &Theme,
1296        context_bytes: usize,
1297    ) -> Vec<HighlightSpan> {
1298        match self {
1299            Self::TreeSitter(h) => {
1300                h.highlight_viewport(buffer, viewport_start, viewport_end, theme, context_bytes)
1301            }
1302            Self::TextMate(h) => {
1303                h.highlight_viewport(buffer, viewport_start, viewport_end, theme, context_bytes)
1304            }
1305            Self::None => Vec::new(),
1306        }
1307    }
1308
1309    /// Notify the highlighting engine of a buffer insert (for checkpoint position tracking).
1310    pub fn notify_insert(&mut self, position: usize, length: usize) {
1311        if let Self::TextMate(h) = self {
1312            h.notify_insert(position, length);
1313        }
1314    }
1315
1316    /// Notify the highlighting engine of a buffer delete (for checkpoint position tracking).
1317    pub fn notify_delete(&mut self, position: usize, length: usize) {
1318        if let Self::TextMate(h) = self {
1319            h.notify_delete(position, length);
1320        }
1321    }
1322
1323    /// Invalidate cache for an edited range
1324    pub fn invalidate_range(&mut self, edit_range: Range<usize>) {
1325        match self {
1326            Self::TreeSitter(h) => h.invalidate_range(edit_range),
1327            Self::TextMate(h) => h.invalidate_range(edit_range),
1328            Self::None => {}
1329        }
1330    }
1331
1332    /// Invalidate entire cache
1333    pub fn invalidate_all(&mut self) {
1334        match self {
1335            Self::TreeSitter(h) => h.invalidate_all(),
1336            Self::TextMate(h) => h.invalidate_all(),
1337            Self::None => {}
1338        }
1339    }
1340
1341    /// Check if this engine has highlighting available
1342    pub fn has_highlighting(&self) -> bool {
1343        !matches!(self, Self::None)
1344    }
1345
1346    /// Get a description of the active backend
1347    pub fn backend_name(&self) -> &str {
1348        match self {
1349            Self::TreeSitter(_) => "tree-sitter",
1350            Self::TextMate(_) => "textmate",
1351            Self::None => "none",
1352        }
1353    }
1354
1355    /// Get performance stats (TextMate engine only).
1356    pub fn highlight_stats(&self) -> Option<&HighlightStats> {
1357        if let Self::TextMate(h) = self {
1358            Some(h.stats())
1359        } else {
1360            None
1361        }
1362    }
1363
1364    /// Reset performance counters.
1365    pub fn reset_highlight_stats(&mut self) {
1366        if let Self::TextMate(h) = self {
1367            h.reset_stats();
1368        }
1369    }
1370
1371    /// Get the language/syntax name if available
1372    pub fn syntax_name(&self) -> Option<&str> {
1373        match self {
1374            Self::TreeSitter(_) => None, // Tree-sitter doesn't expose name easily
1375            Self::TextMate(h) => Some(h.syntax_name()),
1376            Self::None => None,
1377        }
1378    }
1379
1380    /// Get the highlight category at a byte position from the cache.
1381    ///
1382    /// Returns the category if the position falls within a cached highlight span.
1383    /// Useful for detecting whether the cursor is inside a string, comment, etc.
1384    pub fn category_at_position(&self, position: usize) -> Option<HighlightCategory> {
1385        match self {
1386            Self::TreeSitter(h) => h.category_at_position(position),
1387            Self::TextMate(h) => h.category_at_position(position),
1388            Self::None => None,
1389        }
1390    }
1391
1392    /// Get the tree-sitter Language for non-highlighting features
1393    /// Returns the language even when using TextMate for highlighting
1394    pub fn language(&self) -> Option<&Language> {
1395        match self {
1396            Self::TreeSitter(h) => Some(h.language()),
1397            Self::TextMate(h) => h.language(),
1398            Self::None => None,
1399        }
1400    }
1401}
1402
1403/// Highlight a code string using syntect (for markdown code blocks, hover popups, etc.)
1404/// Returns spans with byte ranges relative to the input string.
1405///
1406/// This uses TextMate grammars via syntect which provides broader language coverage
1407/// than tree-sitter (~150+ languages vs ~17).
1408pub fn highlight_string(
1409    code: &str,
1410    lang_hint: &str,
1411    registry: &GrammarRegistry,
1412    theme: &Theme,
1413) -> Vec<HighlightSpan> {
1414    use syntect::parsing::{ParseState, ScopeStack};
1415
1416    // Find syntax by language token (handles aliases like "py" -> Python)
1417    let syntax = match registry.syntax_set().find_syntax_by_token(lang_hint) {
1418        Some(s) => s,
1419        None => return Vec::new(),
1420    };
1421
1422    let syntax_set = registry.syntax_set();
1423    let mut state = ParseState::new(syntax);
1424    let mut spans = Vec::new();
1425    let mut current_scopes = ScopeStack::new();
1426    let mut current_offset = 0;
1427
1428    // Parse line by line
1429    for line in code.split_inclusive('\n') {
1430        let line_start = current_offset;
1431        let line_len = line.len();
1432
1433        // Remove trailing newline for syntect, then add it back
1434        let line_content = line.trim_end_matches(&['\r', '\n'][..]);
1435        let line_for_syntect = if line.ends_with('\n') {
1436            format!("{}\n", line_content)
1437        } else {
1438            line_content.to_string()
1439        };
1440
1441        let ops = match state.parse_line(&line_for_syntect, syntax_set) {
1442            Ok(ops) => ops,
1443            Err(_) => {
1444                current_offset += line_len;
1445                continue;
1446            }
1447        };
1448
1449        let mut syntect_offset = 0;
1450        let line_content_len = line_content.len();
1451
1452        for (op_offset, op) in ops {
1453            let clamped_op_offset = op_offset.min(line_content_len);
1454            if clamped_op_offset > syntect_offset {
1455                if let Some(category) = scope_stack_to_category(&current_scopes) {
1456                    let byte_start = line_start + syntect_offset;
1457                    let byte_end = line_start + clamped_op_offset;
1458                    if byte_start < byte_end {
1459                        spans.push(HighlightSpan {
1460                            range: byte_start..byte_end,
1461                            color: highlight_color(category, theme),
1462                            category: Some(category),
1463                        });
1464                    }
1465                }
1466            }
1467            syntect_offset = clamped_op_offset;
1468            // Scope stack errors are non-fatal for highlighting
1469            #[allow(clippy::let_underscore_must_use)]
1470            let _ = current_scopes.apply(&op);
1471        }
1472
1473        // Handle remaining text on line
1474        if syntect_offset < line_content_len {
1475            if let Some(category) = scope_stack_to_category(&current_scopes) {
1476                let byte_start = line_start + syntect_offset;
1477                let byte_end = line_start + line_content_len;
1478                if byte_start < byte_end {
1479                    spans.push(HighlightSpan {
1480                        range: byte_start..byte_end,
1481                        color: highlight_color(category, theme),
1482                        category: Some(category),
1483                    });
1484                }
1485            }
1486        }
1487
1488        current_offset += line_len;
1489    }
1490
1491    // Merge adjacent spans with same color
1492    merge_adjacent_highlight_spans(&mut spans);
1493
1494    spans
1495}
1496
1497/// Map scope stack to highlight category (for highlight_string)
1498fn scope_stack_to_category(scopes: &syntect::parsing::ScopeStack) -> Option<HighlightCategory> {
1499    for scope in scopes.as_slice().iter().rev() {
1500        let scope_str = scope.build_string();
1501        if let Some(cat) = scope_to_category(&scope_str) {
1502            return Some(cat);
1503        }
1504    }
1505    None
1506}
1507
1508/// Merge adjacent spans with same color
1509fn merge_adjacent_highlight_spans(spans: &mut Vec<HighlightSpan>) {
1510    if spans.len() < 2 {
1511        return;
1512    }
1513
1514    let mut write_idx = 0;
1515    for read_idx in 1..spans.len() {
1516        if spans[write_idx].color == spans[read_idx].color
1517            && spans[write_idx].range.end == spans[read_idx].range.start
1518        {
1519            spans[write_idx].range.end = spans[read_idx].range.end;
1520        } else {
1521            write_idx += 1;
1522            if write_idx != read_idx {
1523                spans[write_idx] = spans[read_idx].clone();
1524            }
1525        }
1526    }
1527    spans.truncate(write_idx + 1);
1528}
1529
1530#[cfg(test)]
1531mod tests {
1532    use crate::model::filesystem::StdFileSystem;
1533    use std::sync::Arc;
1534
1535    fn test_fs() -> Arc<dyn crate::model::filesystem::FileSystem + Send + Sync> {
1536        Arc::new(StdFileSystem)
1537    }
1538    use super::*;
1539    use crate::view::theme;
1540
1541    #[test]
1542    fn test_highlight_engine_default() {
1543        let engine = HighlightEngine::default();
1544        assert!(!engine.has_highlighting());
1545        assert_eq!(engine.backend_name(), "none");
1546    }
1547
1548    #[test]
1549    fn test_textmate_backend_selection() {
1550        let registry =
1551            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1552
1553        // Languages with TextMate grammars use TextMate for highlighting
1554        let engine = HighlightEngine::for_file(Path::new("test.rs"), None, &registry);
1555        assert_eq!(engine.backend_name(), "textmate");
1556        // Tree-sitter language should still be detected for other features
1557        assert!(engine.language().is_some());
1558
1559        let engine = HighlightEngine::for_file(Path::new("test.py"), None, &registry);
1560        assert_eq!(engine.backend_name(), "textmate");
1561        assert!(engine.language().is_some());
1562
1563        let engine = HighlightEngine::for_file(Path::new("test.js"), None, &registry);
1564        assert_eq!(engine.backend_name(), "textmate");
1565        assert!(engine.language().is_some());
1566
1567        // TypeScript falls back to tree-sitter (syntect doesn't include TS by default)
1568        let engine = HighlightEngine::for_file(Path::new("test.ts"), None, &registry);
1569        assert_eq!(engine.backend_name(), "tree-sitter");
1570        assert!(engine.language().is_some());
1571
1572        let engine = HighlightEngine::for_file(Path::new("test.tsx"), None, &registry);
1573        assert_eq!(engine.backend_name(), "tree-sitter");
1574        assert!(engine.language().is_some());
1575    }
1576
1577    #[test]
1578    fn test_tree_sitter_direct() {
1579        // Verify tree-sitter highlighter can be created directly for Rust
1580        let highlighter = Highlighter::new(Language::Rust);
1581        assert!(highlighter.is_ok());
1582    }
1583
1584    #[test]
1585    fn test_unknown_extension() {
1586        let registry =
1587            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1588
1589        // Unknown extension
1590        let engine = HighlightEngine::for_file(Path::new("test.unknown_xyz_123"), None, &registry);
1591        // Might be none or might find something via syntect
1592        // Just verify it doesn't panic
1593        let _ = engine.backend_name();
1594    }
1595
1596    #[test]
1597    fn test_highlight_viewport_empty_buffer_no_panic() {
1598        // Regression test: calling highlight_viewport with an empty buffer
1599        // and non-zero viewport range previously caused subtraction overflow panic.
1600        //
1601        // The bug occurred when:
1602        // - buffer is empty (len = 0)
1603        // - viewport_start > context_bytes (so parse_start > 0 after saturating_sub)
1604        // - parse_end = min(viewport_end + context_bytes, buffer.len()) = 0
1605        // - parse_end - parse_start would underflow (0 - positive = overflow)
1606        let registry =
1607            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1608
1609        let mut engine = HighlightEngine::for_file(Path::new("test.rs"), None, &registry);
1610
1611        // Create empty buffer
1612        let buffer = Buffer::from_str("", 0, test_fs());
1613        let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
1614
1615        // Test the specific case that triggered the overflow:
1616        // viewport_start=100, context_bytes=10 => parse_start=90, parse_end=0
1617        // 0 - 90 = overflow!
1618        if let HighlightEngine::TextMate(ref mut tm) = engine {
1619            // Small context_bytes so parse_start remains > 0
1620            let spans = tm.highlight_viewport(&buffer, 100, 200, &theme, 10);
1621            assert!(spans.is_empty());
1622        }
1623    }
1624
1625    /// Test that TextMateEngine produces correct byte offsets for CRLF content.
1626    /// This is a regression test for a bug where using str::lines() caused 1-byte
1627    /// offset drift per line because it strips line terminators.
1628    #[test]
1629    fn test_textmate_engine_crlf_byte_offsets() {
1630        let registry =
1631            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1632
1633        let mut engine = HighlightEngine::for_file(Path::new("test.java"), None, &registry);
1634
1635        // Create CRLF content with keywords on each line
1636        // Each "public" keyword should be highlighted at byte positions:
1637        // Line 1: "public" at bytes 0-5
1638        // Line 2: "public" at bytes 8-13 (after "public\r\n" = 8 bytes)
1639        // Line 3: "public" at bytes 16-21 (after two "public\r\n" = 16 bytes)
1640        let content = b"public\r\npublic\r\npublic\r\n";
1641        let buffer = Buffer::from_bytes(content.to_vec(), test_fs());
1642        let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
1643
1644        if let HighlightEngine::TextMate(ref mut tm) = engine {
1645            // Highlight the entire content
1646            let spans = tm.highlight_viewport(&buffer, 0, content.len(), &theme, 0);
1647
1648            // Find spans that cover keyword positions
1649            // The keyword "public" should have spans at these byte ranges:
1650            // Line 1: 0..6
1651            // Line 2: 8..14 (NOT 7..13 which would be the buggy offset)
1652            // Line 3: 16..22 (NOT 14..20 which would be the buggy offset)
1653
1654            eprintln!(
1655                "Spans: {:?}",
1656                spans.iter().map(|s| &s.range).collect::<Vec<_>>()
1657            );
1658
1659            // Check that we have spans covering the correct positions
1660            let has_span_at = |start: usize, end: usize| -> bool {
1661                spans
1662                    .iter()
1663                    .any(|s| s.range.start <= start && s.range.end >= end)
1664            };
1665
1666            // Line 1: "public" at bytes 0-6
1667            assert!(
1668                has_span_at(0, 6),
1669                "Should have span covering bytes 0-6 (line 1 'public'). Spans: {:?}",
1670                spans.iter().map(|s| &s.range).collect::<Vec<_>>()
1671            );
1672
1673            // Line 2: "public" at bytes 8-14 (after "public\r\n")
1674            // If buggy, would be at 7-13
1675            assert!(
1676                has_span_at(8, 14),
1677                "Should have span covering bytes 8-14 (line 2 'public'). \
1678                 If this fails, CRLF offset drift is occurring. Spans: {:?}",
1679                spans.iter().map(|s| &s.range).collect::<Vec<_>>()
1680            );
1681
1682            // Line 3: "public" at bytes 16-22 (after two "public\r\n")
1683            // If buggy, would be at 14-20
1684            assert!(
1685                has_span_at(16, 22),
1686                "Should have span covering bytes 16-22 (line 3 'public'). \
1687                 If this fails, CRLF offset drift is occurring. Spans: {:?}",
1688                spans.iter().map(|s| &s.range).collect::<Vec<_>>()
1689            );
1690        } else {
1691            panic!("Expected TextMate engine for .java file");
1692        }
1693    }
1694
1695    #[test]
1696    fn test_git_rebase_todo_highlighting() {
1697        let registry =
1698            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1699
1700        // git-rebase-todo files should use the Git Rebase Todo grammar
1701        let engine = HighlightEngine::for_file(Path::new("git-rebase-todo"), None, &registry);
1702        assert_eq!(engine.backend_name(), "textmate");
1703        assert!(engine.has_highlighting());
1704    }
1705
1706    #[test]
1707    fn test_git_commit_message_highlighting() {
1708        let registry =
1709            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1710
1711        // COMMIT_EDITMSG should use the Git Commit Message grammar
1712        let engine = HighlightEngine::for_file(Path::new("COMMIT_EDITMSG"), None, &registry);
1713        assert_eq!(engine.backend_name(), "textmate");
1714        assert!(engine.has_highlighting());
1715
1716        // MERGE_MSG should also work
1717        let engine = HighlightEngine::for_file(Path::new("MERGE_MSG"), None, &registry);
1718        assert_eq!(engine.backend_name(), "textmate");
1719        assert!(engine.has_highlighting());
1720    }
1721
1722    #[test]
1723    fn test_gitignore_highlighting() {
1724        let registry =
1725            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1726
1727        // .gitignore should use the Gitignore grammar
1728        let engine = HighlightEngine::for_file(Path::new(".gitignore"), None, &registry);
1729        assert_eq!(engine.backend_name(), "textmate");
1730        assert!(engine.has_highlighting());
1731
1732        // .dockerignore should also work
1733        let engine = HighlightEngine::for_file(Path::new(".dockerignore"), None, &registry);
1734        assert_eq!(engine.backend_name(), "textmate");
1735        assert!(engine.has_highlighting());
1736    }
1737
1738    #[test]
1739    fn test_gitconfig_highlighting() {
1740        let registry =
1741            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1742
1743        // .gitconfig should use the Git Config grammar
1744        let engine = HighlightEngine::for_file(Path::new(".gitconfig"), None, &registry);
1745        assert_eq!(engine.backend_name(), "textmate");
1746        assert!(engine.has_highlighting());
1747
1748        // .gitmodules should also work
1749        let engine = HighlightEngine::for_file(Path::new(".gitmodules"), None, &registry);
1750        assert_eq!(engine.backend_name(), "textmate");
1751        assert!(engine.has_highlighting());
1752    }
1753
1754    #[test]
1755    fn test_gitattributes_highlighting() {
1756        let registry =
1757            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1758
1759        // .gitattributes should use the Git Attributes grammar
1760        let engine = HighlightEngine::for_file(Path::new(".gitattributes"), None, &registry);
1761        assert_eq!(engine.backend_name(), "textmate");
1762        assert!(engine.has_highlighting());
1763    }
1764
1765    #[test]
1766    fn test_comment_delimiter_uses_comment_color() {
1767        // Comment delimiters (#, //, /*) should use comment color, not operator
1768        assert_eq!(
1769            scope_to_category("punctuation.definition.comment"),
1770            Some(HighlightCategory::Comment)
1771        );
1772        assert_eq!(
1773            scope_to_category("punctuation.definition.comment.python"),
1774            Some(HighlightCategory::Comment)
1775        );
1776        assert_eq!(
1777            scope_to_category("punctuation.definition.comment.begin"),
1778            Some(HighlightCategory::Comment)
1779        );
1780    }
1781
1782    #[test]
1783    fn test_string_delimiter_uses_string_color() {
1784        // String delimiters (", ', `) should use string color, not operator
1785        assert_eq!(
1786            scope_to_category("punctuation.definition.string.begin"),
1787            Some(HighlightCategory::String)
1788        );
1789        assert_eq!(
1790            scope_to_category("punctuation.definition.string.end"),
1791            Some(HighlightCategory::String)
1792        );
1793    }
1794
1795    #[test]
1796    fn test_punctuation_bracket() {
1797        // punctuation.section (TextMate standard for block delimiters)
1798        assert_eq!(
1799            scope_to_category("punctuation.section"),
1800            Some(HighlightCategory::PunctuationBracket)
1801        );
1802        assert_eq!(
1803            scope_to_category("punctuation.section.block.begin.c"),
1804            Some(HighlightCategory::PunctuationBracket)
1805        );
1806        assert_eq!(
1807            scope_to_category("punctuation.bracket"),
1808            Some(HighlightCategory::PunctuationBracket)
1809        );
1810        // punctuation.definition.* bracket-like scopes from sublime-syntax grammars
1811        assert_eq!(
1812            scope_to_category("punctuation.definition.array.begin.toml"),
1813            Some(HighlightCategory::PunctuationBracket)
1814        );
1815        assert_eq!(
1816            scope_to_category("punctuation.definition.block.code.typst"),
1817            Some(HighlightCategory::PunctuationBracket)
1818        );
1819        assert_eq!(
1820            scope_to_category("punctuation.definition.group.typst"),
1821            Some(HighlightCategory::PunctuationBracket)
1822        );
1823        assert_eq!(
1824            scope_to_category("punctuation.definition.inline-table.begin.toml"),
1825            Some(HighlightCategory::PunctuationBracket)
1826        );
1827        assert_eq!(
1828            scope_to_category("punctuation.definition.tag.end.svelte"),
1829            Some(HighlightCategory::PunctuationBracket)
1830        );
1831    }
1832
1833    #[test]
1834    fn test_punctuation_delimiter() {
1835        assert_eq!(
1836            scope_to_category("punctuation.separator"),
1837            Some(HighlightCategory::PunctuationDelimiter)
1838        );
1839        assert_eq!(
1840            scope_to_category("punctuation.terminator.statement.c"),
1841            Some(HighlightCategory::PunctuationDelimiter)
1842        );
1843        assert_eq!(
1844            scope_to_category("punctuation.accessor"),
1845            Some(HighlightCategory::PunctuationDelimiter)
1846        );
1847    }
1848
1849    /// First parse of a small file populates a whole-file cache; subsequent
1850    /// scrolls anywhere in the file are exact cache hits with no extra parse
1851    /// work.
1852    #[test]
1853    fn test_small_file_scroll_is_cache_hit() {
1854        let registry =
1855            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1856        let mut engine = HighlightEngine::for_file(Path::new("test.rs"), None, &registry);
1857
1858        let mut content = String::new();
1859        for i in 0..200 {
1860            content.push_str(&format!("fn f_{i}() {{ let x = {i}; }}\n"));
1861        }
1862        let buffer = Buffer::from_str(&content, 0, test_fs());
1863        let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
1864
1865        let HighlightEngine::TextMate(ref mut tm) = engine else {
1866            panic!("expected TextMate engine for .rs");
1867        };
1868
1869        // First call: cold start, full parse.
1870        let _ = tm.highlight_viewport(&buffer, 0, 200, &theme, 10_000);
1871        let stats_after_first = tm.stats().clone();
1872        assert_eq!(
1873            stats_after_first.cache_hits, 0,
1874            "first call cannot hit cache"
1875        );
1876        assert_eq!(
1877            stats_after_first.cache_misses, 1,
1878            "first call must be a miss"
1879        );
1880
1881        // Scroll anywhere — top, middle, end. All must be cache hits.
1882        let mid = buffer.len() / 2;
1883        let near_end = buffer.len().saturating_sub(200);
1884        let probes = [(0, 200), (mid, mid + 200), (near_end, buffer.len())];
1885        for (vs, ve) in probes {
1886            let _ = tm.highlight_viewport(&buffer, vs, ve, &theme, 10_000);
1887        }
1888
1889        let stats_after_scroll = tm.stats().clone();
1890        assert_eq!(
1891            stats_after_scroll.cache_misses,
1892            1,
1893            "scrolling must not add cache misses (got extra: {})",
1894            stats_after_scroll.cache_misses - 1
1895        );
1896        assert_eq!(
1897            stats_after_scroll.cache_hits, 3,
1898            "all three scroll probes must hit the cache"
1899        );
1900        assert_eq!(
1901            stats_after_scroll.bytes_parsed, stats_after_first.bytes_parsed,
1902            "scrolling must not parse any new bytes"
1903        );
1904    }
1905
1906    /// After a small edit, the next render takes the partial-update path
1907    /// (convergence) and continues to serve cache hits afterwards. Crucially:
1908    /// the partial update parses far fewer bytes than the file is long.
1909    #[test]
1910    fn test_small_file_edit_uses_partial_update() {
1911        let registry =
1912            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1913        let mut engine = HighlightEngine::for_file(Path::new("test.rs"), None, &registry);
1914
1915        let mut content = String::new();
1916        for i in 0..200 {
1917            content.push_str(&format!("fn f_{i}() {{ let x = {i}; }}\n"));
1918        }
1919        let buffer = Buffer::from_str(&content, 0, test_fs());
1920        let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
1921
1922        let HighlightEngine::TextMate(ref mut tm) = engine else {
1923            panic!("expected TextMate engine for .rs");
1924        };
1925
1926        // Warm cache.
1927        let _ = tm.highlight_viewport(&buffer, 0, 100, &theme, 10_000);
1928        let bytes_before_edit = tm.stats().bytes_parsed;
1929        let buf_len = buffer.len();
1930        assert!(
1931            buf_len > 4000,
1932            "test needs a buffer larger than the partial-update region"
1933        );
1934
1935        // Simulate an edit deep in the file.
1936        let edit_pos = buf_len / 2;
1937        tm.notify_insert(edit_pos, 1);
1938        // The buffer itself doesn't change here (we test the engine in isolation),
1939        // but notify_insert sets dirty_from and shifts spans, which is what the
1940        // partial-update path consumes.
1941
1942        let _ = tm.highlight_viewport(&buffer, 0, 100, &theme, 10_000);
1943        let bytes_after_edit = tm.stats().bytes_parsed;
1944        let parsed = bytes_after_edit - bytes_before_edit;
1945
1946        assert!(
1947            parsed < buf_len,
1948            "edit must not trigger a whole-file reparse (parsed {parsed}, file {buf_len})"
1949        );
1950    }
1951
1952    /// Convergence budget caps per-pass work even when the parse state never
1953    /// agrees with any existing checkpoint. Without the cap, a non-converging
1954    /// edit would parse the rest of the file on every keystroke.
1955    #[test]
1956    fn test_partial_update_budget_caps_work() {
1957        let registry =
1958            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1959        let mut engine = HighlightEngine::for_file(Path::new("test.rs"), None, &registry);
1960
1961        // Build a buffer comfortably larger than CONVERGENCE_BUDGET.
1962        let mut content = String::new();
1963        while content.len() < (CONVERGENCE_BUDGET * 4) {
1964            content.push_str("fn name() { let mut v = 0; v += 1; }\n");
1965        }
1966        let buffer = Buffer::from_str(&content, 0, test_fs());
1967        let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
1968
1969        let HighlightEngine::TextMate(ref mut tm) = engine else {
1970            panic!("expected TextMate engine for .rs");
1971        };
1972
1973        // Warm cache (whole-file parse).
1974        let _ = tm.highlight_viewport(&buffer, 0, 200, &theme, 10_000);
1975        // Simulate an edit and force every checkpoint to disagree by clearing
1976        // their stored states. The convergence loop will look at each marker,
1977        // find the slot empty, and never converge.
1978        tm.notify_insert(100, 0);
1979        tm.checkpoint_states.clear();
1980
1981        let bytes_before = tm.stats().bytes_parsed;
1982        let _ = tm.highlight_viewport(&buffer, 0, 200, &theme, 10_000);
1983        let parsed = tm.stats().bytes_parsed - bytes_before;
1984
1985        // Budget bounds the work to roughly CONVERGENCE_BUDGET past the dirty
1986        // point (plus the prefix back to the resume checkpoint). Allow a small
1987        // overshoot for the line that crossed the budget threshold.
1988        assert!(
1989            parsed <= CONVERGENCE_BUDGET + 4096,
1990            "partial update parsed {parsed}, expected <= {} \
1991             (budget {CONVERGENCE_BUDGET} + slack)",
1992            CONVERGENCE_BUDGET + 4096
1993        );
1994
1995        // Budget hit must leave dirty_from set for follow-up passes.
1996        assert!(
1997            tm.dirty_from.is_some(),
1998            "budget exit must keep dirty_from set"
1999        );
2000    }
2001
2002    /// Large files (above MAX_PARSE_BYTES) keep the existing windowed
2003    /// behaviour: parse range is bounded by ±context_bytes around the
2004    /// viewport, not the whole file.
2005    ///
2006    /// The viewport is placed past `MAX_PARSE_BYTES` so we exercise the
2007    /// "large file, no nearby checkpoint" branch in `find_parse_resume_point`
2008    /// — the symmetric branch that fires when `parse_end <= MAX_PARSE_BYTES`
2009    /// still parses from byte 0 even on big files (pre-existing behaviour,
2010    /// addressed in a later phase).
2011    #[test]
2012    fn test_large_file_uses_windowed_parse() {
2013        let registry =
2014            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
2015        let mut engine = HighlightEngine::for_file(Path::new("test.rs"), None, &registry);
2016
2017        // Build content well past MAX_PARSE_BYTES so we can put the viewport
2018        // beyond it.
2019        let line = "fn long_name_for_padding() { let v = 1; v + 1; }\n";
2020        let bytes_needed = MAX_PARSE_BYTES * 2;
2021        let lines_needed = bytes_needed / line.len() + 100;
2022        let mut content = String::with_capacity(lines_needed * line.len());
2023        for _ in 0..lines_needed {
2024            content.push_str(line);
2025        }
2026        assert!(content.len() > MAX_PARSE_BYTES * 2);
2027        let buffer = Buffer::from_str(&content, 0, test_fs());
2028        let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
2029
2030        let HighlightEngine::TextMate(ref mut tm) = engine else {
2031            panic!("expected TextMate engine for .rs");
2032        };
2033
2034        // Viewport past MAX_PARSE_BYTES: parse_end > MAX_PARSE_BYTES, so the
2035        // resume-from-byte-0 fallback in find_parse_resume_point doesn't fire.
2036        let context_bytes = 10_000usize;
2037        let viewport_start = MAX_PARSE_BYTES + 200_000;
2038        let viewport_end = viewport_start + 1000;
2039        let _ = tm.highlight_viewport(&buffer, viewport_start, viewport_end, &theme, context_bytes);
2040        let parsed = tm.stats().bytes_parsed;
2041
2042        // Windowed parse covers viewport ± context_bytes plus a tiny prefix
2043        // for the resume anchor. Allow generous slack (4×) but reject
2044        // anything close to whole-file.
2045        let window = (viewport_end - viewport_start) + 2 * context_bytes;
2046        assert!(
2047            parsed <= window * 4,
2048            "large file windowed parse should be ~{window} bytes, got {parsed} \
2049             (file {})",
2050            buffer.len()
2051        );
2052    }
2053}