Skip to main content

fresh/primitives/
highlight_engine.rs

1//! Unified highlighting engine
2//!
3//! This module provides a unified abstraction over different highlighting backends:
4//! - TextMate grammars via syntect (default for highlighting)
5//! - Tree-sitter (available via explicit preference, also used for non-highlighting features)
6//!
7//! # Backend Selection
8//! By default, syntect/TextMate is used for syntax highlighting because it provides
9//! broader language coverage. Tree-sitter language detection is still performed
10//! to support non-highlighting features like auto-indentation and semantic highlighting.
11//!
12//! # Non-Highlighting Features
13//! Even when using TextMate for highlighting, tree-sitter `Language` is detected
14//! and available via `.language()` for:
15//! - Auto-indentation (via IndentCalculator)
16//! - Semantic highlighting (variable scope tracking)
17//! - Other syntax-aware features
18
19use crate::model::buffer::Buffer;
20use crate::model::marker::{MarkerId, MarkerList};
21use crate::primitives::grammar::GrammarRegistry;
22use crate::primitives::highlighter::{
23    highlight_color, HighlightCategory, HighlightSpan, Highlighter, Language,
24};
25use crate::view::theme::Theme;
26use std::collections::HashMap;
27use std::ops::Range;
28use std::path::Path;
29use std::sync::Arc;
30use syntect::parsing::SyntaxSet;
31
32/// Map TextMate scope to highlight category
33fn scope_to_category(scope: &str) -> Option<HighlightCategory> {
34    let scope_lower = scope.to_lowercase();
35
36    // Comments - highest priority
37    if scope_lower.starts_with("comment") {
38        return Some(HighlightCategory::Comment);
39    }
40
41    // Strings
42    if scope_lower.starts_with("string") {
43        return Some(HighlightCategory::String);
44    }
45
46    // Markdown/markup scopes - handle before generic keyword/punctuation checks
47    // See: https://macromates.com/manual/en/language_grammars (TextMate scope naming)
48    // Headings: markup.heading and entity.name.section (used by syntect's markdown grammar)
49    if scope_lower.starts_with("markup.heading") || scope_lower.starts_with("entity.name.section") {
50        return Some(HighlightCategory::Keyword); // Headers styled like keywords (bold, prominent)
51    }
52    // Bold: markup.bold
53    if scope_lower.starts_with("markup.bold") {
54        return Some(HighlightCategory::Constant); // Bold styled like constants (bright)
55    }
56    // Italic: markup.italic
57    if scope_lower.starts_with("markup.italic") {
58        return Some(HighlightCategory::Variable); // Italic styled like variables
59    }
60    // Inline code and code blocks: markup.raw, markup.inline.raw
61    if scope_lower.starts_with("markup.raw") || scope_lower.starts_with("markup.inline.raw") {
62        return Some(HighlightCategory::String); // Code styled like strings
63    }
64    // Links: markup.underline.link
65    if scope_lower.starts_with("markup.underline.link") {
66        return Some(HighlightCategory::Function); // Links styled like functions (distinct color)
67    }
68    // Generic underline (often links)
69    if scope_lower.starts_with("markup.underline") {
70        return Some(HighlightCategory::Function);
71    }
72    // Block quotes: markup.quote
73    if scope_lower.starts_with("markup.quote") {
74        return Some(HighlightCategory::Comment); // Quotes styled like comments (subdued)
75    }
76    // Lists: markup.list
77    if scope_lower.starts_with("markup.list") {
78        return Some(HighlightCategory::Operator); // List markers styled like operators
79    }
80    // Strikethrough: markup.strikethrough
81    if scope_lower.starts_with("markup.strikethrough") {
82        return Some(HighlightCategory::Comment); // Strikethrough styled subdued
83    }
84
85    // Keywords
86    if scope_lower.starts_with("keyword.control")
87        || scope_lower.starts_with("keyword.other")
88        || scope_lower.starts_with("keyword.declaration")
89        || scope_lower.starts_with("keyword")
90    {
91        // keyword.operator should map to Operator, not Keyword
92        if !scope_lower.starts_with("keyword.operator") {
93            return Some(HighlightCategory::Keyword);
94        }
95    }
96
97    // Punctuation that belongs to a parent construct (comment/string delimiters)
98    // These must be checked before the generic punctuation rule below.
99    // TextMate grammars assign e.g. `punctuation.definition.comment` to # // /* etc.
100    if scope_lower.starts_with("punctuation.definition.comment") {
101        return Some(HighlightCategory::Comment);
102    }
103    if scope_lower.starts_with("punctuation.definition.string") {
104        return Some(HighlightCategory::String);
105    }
106
107    // Operators (keyword.operator only)
108    if scope_lower.starts_with("keyword.operator") {
109        return Some(HighlightCategory::Operator);
110    }
111
112    // Punctuation brackets ({, }, (, ), [, ], <, >)
113    // Covers punctuation.section.*, punctuation.bracket.*,
114    // and punctuation.definition.{array,block,brackets,group,inline-table,section,table,tag}
115    if scope_lower.starts_with("punctuation.section")
116        || scope_lower.starts_with("punctuation.bracket")
117        || scope_lower.starts_with("punctuation.definition.array")
118        || scope_lower.starts_with("punctuation.definition.block")
119        || scope_lower.starts_with("punctuation.definition.brackets")
120        || scope_lower.starts_with("punctuation.definition.group")
121        || scope_lower.starts_with("punctuation.definition.inline-table")
122        || scope_lower.starts_with("punctuation.definition.section")
123        || scope_lower.starts_with("punctuation.definition.table")
124        || scope_lower.starts_with("punctuation.definition.tag")
125    {
126        return Some(HighlightCategory::PunctuationBracket);
127    }
128
129    // Punctuation delimiters (;, ,, .)
130    if scope_lower.starts_with("punctuation.separator")
131        || scope_lower.starts_with("punctuation.terminator")
132        || scope_lower.starts_with("punctuation.accessor")
133    {
134        return Some(HighlightCategory::PunctuationDelimiter);
135    }
136
137    // Functions
138    if scope_lower.starts_with("entity.name.function")
139        || scope_lower.starts_with("support.function")
140        || scope_lower.starts_with("meta.function-call")
141        || scope_lower.starts_with("variable.function")
142    {
143        return Some(HighlightCategory::Function);
144    }
145
146    // Types
147    if scope_lower.starts_with("entity.name.type")
148        || scope_lower.starts_with("entity.name.class")
149        || scope_lower.starts_with("entity.name.struct")
150        || scope_lower.starts_with("entity.name.enum")
151        || scope_lower.starts_with("entity.name.interface")
152        || scope_lower.starts_with("entity.name.trait")
153        || scope_lower.starts_with("support.type")
154        || scope_lower.starts_with("support.class")
155        || scope_lower.starts_with("storage.type")
156    {
157        return Some(HighlightCategory::Type);
158    }
159
160    // Storage modifiers (pub, static, const as keywords)
161    if scope_lower.starts_with("storage.modifier") {
162        return Some(HighlightCategory::Keyword);
163    }
164
165    // Constants and numbers
166    if scope_lower.starts_with("constant.numeric")
167        || scope_lower.starts_with("constant.language.boolean")
168    {
169        return Some(HighlightCategory::Number);
170    }
171    if scope_lower.starts_with("constant") {
172        return Some(HighlightCategory::Constant);
173    }
174
175    // Variables
176    if scope_lower.starts_with("variable.parameter")
177        || scope_lower.starts_with("variable.other")
178        || scope_lower.starts_with("variable.language")
179    {
180        return Some(HighlightCategory::Variable);
181    }
182
183    // Properties / object keys
184    if scope_lower.starts_with("entity.name.tag")
185        || scope_lower.starts_with("support.other.property")
186        || scope_lower.starts_with("meta.object-literal.key")
187        || scope_lower.starts_with("variable.other.property")
188        || scope_lower.starts_with("variable.other.object.property")
189    {
190        return Some(HighlightCategory::Property);
191    }
192
193    // Attributes (decorators, annotations)
194    if scope_lower.starts_with("entity.other.attribute")
195        || scope_lower.starts_with("meta.attribute")
196        || scope_lower.starts_with("entity.name.decorator")
197    {
198        return Some(HighlightCategory::Attribute);
199    }
200
201    // Generic variable fallback
202    if scope_lower.starts_with("variable") {
203        return Some(HighlightCategory::Variable);
204    }
205
206    None
207}
208
209/// Preference for which highlighting backend to use
210#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
211pub enum HighlighterPreference {
212    /// Use TextMate/syntect for highlighting (default)
213    /// Tree-sitter language is still detected for other features (indentation, semantic highlighting)
214    #[default]
215    Auto,
216    /// Force tree-sitter for highlighting (useful for testing/comparison)
217    TreeSitter,
218    /// Explicitly use TextMate grammar (same as Auto)
219    TextMate,
220}
221
222/// Unified highlighting engine supporting multiple backends
223#[derive(Default)]
224pub enum HighlightEngine {
225    /// Tree-sitter based highlighting (built-in languages)
226    TreeSitter(Box<Highlighter>),
227    /// TextMate grammar based highlighting
228    TextMate(Box<TextMateEngine>),
229    /// No highlighting available
230    #[default]
231    None,
232}
233
234/// TextMate highlighting engine with marker-based parse state checkpoints.
235///
236/// Syntect's parser is a sequential state machine that must process text from the
237/// start of the file to correctly track embedded language transitions (e.g. CSS
238/// inside HTML `<style>` tags).
239///
240/// Checkpoint positions are stored as markers in an internal `MarkerList` which
241/// automatically adjusts byte offsets when the buffer is edited. The associated
242/// `ParseState` + `ScopeStack` are stored in a side `HashMap`.
243///
244/// On edit, checkpoint positions auto-adjust and a `dirty_from` marker is set.
245/// On the next render, a convergence walk re-parses from the checkpoint before
246/// the dirty point forward, stopping as soon as the new parse state matches an
247/// existing checkpoint's stored state (VSCode-style convergence). This means
248/// most single-character edits only re-parse 1-2 checkpoints (~500 bytes).
249///
250/// For large files where no checkpoint reaches the viewport, we fall back to a
251/// fresh `ParseState` from `context_bytes` before the viewport.
252pub struct TextMateEngine {
253    syntax_set: Arc<SyntaxSet>,
254    syntax_index: usize,
255    /// Marker-based checkpoint positions. Markers auto-adjust on buffer edits.
256    checkpoint_markers: MarkerList,
257    /// Parse state stored per checkpoint marker.
258    checkpoint_states:
259        HashMap<MarkerId, (syntect::parsing::ParseState, syntect::parsing::ScopeStack)>,
260    /// Earliest byte offset where an edit may have invalidated parse state.
261    /// Consumed during the next highlight_viewport call.
262    dirty_from: Option<usize>,
263    /// Cached highlight spans for the last rendered viewport.
264    cache: Option<TextMateCache>,
265    last_buffer_len: usize,
266    /// Tree-sitter language for non-highlighting features (indentation, semantic highlighting)
267    ts_language: Option<Language>,
268    /// Performance counters for testing and diagnostics.
269    stats: HighlightStats,
270}
271
272/// Counters for monitoring highlighting performance in tests.
273#[derive(Debug, Default, Clone)]
274pub struct HighlightStats {
275    /// Number of bytes parsed by syntect (total across all highlight_viewport calls).
276    pub bytes_parsed: usize,
277    /// Number of highlight_viewport calls that hit the span cache.
278    pub cache_hits: usize,
279    /// Number of highlight_viewport calls that missed the cache and re-parsed.
280    pub cache_misses: usize,
281    /// Number of checkpoint states updated during convergence.
282    pub checkpoints_updated: usize,
283    /// Number of times convergence was detected (state matched existing checkpoint).
284    pub convergences: usize,
285}
286
287#[derive(Debug, Clone)]
288struct TextMateCache {
289    range: Range<usize>,
290    spans: Vec<CachedSpan>,
291}
292
293#[derive(Debug, Clone)]
294struct CachedSpan {
295    range: Range<usize>,
296    category: crate::primitives::highlighter::HighlightCategory,
297}
298
299/// Maximum bytes to parse in a single operation
300const MAX_PARSE_BYTES: usize = 1024 * 1024;
301
302/// Interval between parse state checkpoints (in bytes).
303/// 256 bytes ≈ every 4-8 lines of code. Convergence checks happen at each
304/// checkpoint, so smaller intervals mean faster convergence after edits.
305/// A 200KB file produces ~800 markers — well within MarkerList's O(log n) range.
306const CHECKPOINT_INTERVAL: usize = 256;
307
308impl TextMateEngine {
309    /// Create a new TextMate engine for the given syntax
310    pub fn new(syntax_set: Arc<SyntaxSet>, syntax_index: usize) -> Self {
311        Self {
312            syntax_set,
313            syntax_index,
314            checkpoint_markers: MarkerList::new(),
315            checkpoint_states: HashMap::new(),
316            dirty_from: None,
317            cache: None,
318            last_buffer_len: 0,
319            ts_language: None,
320            stats: HighlightStats::default(),
321        }
322    }
323
324    /// Create a new TextMate engine with a tree-sitter language for non-highlighting features
325    pub fn with_language(
326        syntax_set: Arc<SyntaxSet>,
327        syntax_index: usize,
328        ts_language: Option<Language>,
329    ) -> Self {
330        Self {
331            syntax_set,
332            syntax_index,
333            checkpoint_markers: MarkerList::new(),
334            checkpoint_states: HashMap::new(),
335            dirty_from: None,
336            cache: None,
337            last_buffer_len: 0,
338            ts_language,
339            stats: HighlightStats::default(),
340        }
341    }
342
343    /// Get performance stats for testing and diagnostics.
344    pub fn stats(&self) -> &HighlightStats {
345        &self.stats
346    }
347
348    /// Reset performance counters.
349    pub fn reset_stats(&mut self) {
350        self.stats = HighlightStats::default();
351    }
352
353    /// Get the tree-sitter language (for indentation, semantic highlighting, etc.)
354    pub fn language(&self) -> Option<&Language> {
355        self.ts_language.as_ref()
356    }
357
358    /// Notify the checkpoint system of a buffer insert. Markers auto-adjust positions.
359    /// Also shifts cached span byte offsets after the insert point so the span cache
360    /// remains valid for the partial-update / convergence path.
361    pub fn notify_insert(&mut self, position: usize, length: usize) {
362        self.checkpoint_markers.adjust_for_insert(position, length);
363        self.dirty_from = Some(self.dirty_from.map_or(position, |d| d.min(position)));
364        // Shift cached spans after the insert point
365        if let Some(cache) = &mut self.cache {
366            for span in &mut cache.spans {
367                if span.range.start >= position {
368                    span.range.start += length;
369                    span.range.end += length;
370                } else if span.range.end > position {
371                    // Span straddles the insert point — extend its end
372                    span.range.end += length;
373                }
374            }
375            if cache.range.end >= position {
376                cache.range.end += length;
377            }
378        }
379    }
380
381    /// Notify the checkpoint system of a buffer delete. Markers auto-adjust positions.
382    /// Also adjusts cached span byte offsets after the delete point.
383    pub fn notify_delete(&mut self, position: usize, length: usize) {
384        self.checkpoint_markers.adjust_for_delete(position, length);
385        self.dirty_from = Some(self.dirty_from.map_or(position, |d| d.min(position)));
386        // Adjust cached spans after the delete point
387        if let Some(cache) = &mut self.cache {
388            let delete_end = position + length;
389            cache.spans.retain_mut(|span| {
390                if span.range.start >= delete_end {
391                    // Span is entirely after the delete — shift back
392                    span.range.start -= length;
393                    span.range.end -= length;
394                    true
395                } else if span.range.end <= position {
396                    // Span is entirely before the delete — unchanged
397                    true
398                } else if span.range.start >= position && span.range.end <= delete_end {
399                    // Span is entirely within the deleted region — remove it
400                    false
401                } else {
402                    // Span partially overlaps — clamp and adjust
403                    if span.range.start < position {
404                        span.range.end = position.min(span.range.end);
405                    } else {
406                        span.range.start = position;
407                        span.range.end = position + span.range.end.saturating_sub(delete_end);
408                    }
409                    span.range.start < span.range.end
410                }
411            });
412            if cache.range.end > delete_end {
413                cache.range.end -= length;
414            } else if cache.range.end > position {
415                cache.range.end = position;
416            }
417        }
418    }
419
420    /// Highlight the visible viewport range.
421    ///
422    /// If the span cache is valid and there are no dirty edits, returns cached spans.
423    /// If there are dirty edits, re-parses only from the dirty point until convergence
424    /// (parse state matches an existing checkpoint), then splices the new spans into
425    /// the cache. This means most single-character edits only re-parse ~256-512 bytes.
426    pub fn highlight_viewport(
427        &mut self,
428        buffer: &Buffer,
429        viewport_start: usize,
430        viewport_end: usize,
431        theme: &Theme,
432        context_bytes: usize,
433    ) -> Vec<HighlightSpan> {
434        let desired_parse_start = viewport_start.saturating_sub(context_bytes);
435        let parse_end = (viewport_end + context_bytes).min(buffer.len());
436
437        // Check cache state. For a pure cache hit (no dirty edits), we also
438        // require buffer length to match. For partial updates (dirty_from set),
439        // we only need the cache to cover the viewport — the buffer length
440        // changed due to the edit, but we'll splice the dirty region.
441        let dirty = self.dirty_from.take();
442        let cache_covers_viewport = self.cache.as_ref().is_some_and(|c| {
443            c.range.start <= desired_parse_start && c.range.end >= desired_parse_start
444        });
445        let exact_cache_hit = cache_covers_viewport
446            && dirty.is_none()
447            && self.last_buffer_len == buffer.len()
448            && self
449                .cache
450                .as_ref()
451                .is_some_and(|c| c.range.end >= parse_end);
452
453        if exact_cache_hit {
454            // Pure cache hit — no dirty edits, cache covers viewport
455            self.stats.cache_hits += 1;
456            return self.filter_cached_spans(viewport_start, viewport_end, theme);
457        }
458
459        if cache_covers_viewport && dirty.is_some() {
460            if let Some(dirty_pos) = dirty {
461                if dirty_pos < parse_end {
462                    // Partial update: re-parse from dirty point until convergence,
463                    // splice new spans into existing cache
464                    if let Some(result) = self.try_partial_update(
465                        buffer,
466                        dirty_pos,
467                        desired_parse_start,
468                        parse_end,
469                        viewport_start,
470                        viewport_end,
471                        theme,
472                    ) {
473                        return result;
474                    }
475                    // Convergence failed within parse range — fall through to full re-parse
476                } else {
477                    // Dirty region beyond viewport — cache is still valid
478                    self.dirty_from = Some(dirty_pos);
479                    self.stats.cache_hits += 1;
480                    return self.filter_cached_spans(viewport_start, viewport_end, theme);
481                }
482            }
483        } else if let Some(d) = dirty {
484            // No usable cache and dirty — put dirty back, will do full parse
485            self.dirty_from = Some(d);
486        }
487
488        // Full re-parse (cold start or convergence failed)
489        self.full_parse(
490            buffer,
491            desired_parse_start,
492            parse_end,
493            viewport_start,
494            viewport_end,
495            theme,
496            context_bytes,
497        )
498    }
499
500    /// Filter cached spans for the viewport and resolve colors.
501    fn filter_cached_spans(
502        &self,
503        viewport_start: usize,
504        viewport_end: usize,
505        theme: &Theme,
506    ) -> Vec<HighlightSpan> {
507        let cache = self.cache.as_ref().unwrap();
508        cache
509            .spans
510            .iter()
511            .filter(|span| span.range.start < viewport_end && span.range.end > viewport_start)
512            .map(|span| HighlightSpan {
513                range: span.range.clone(),
514                color: highlight_color(span.category, theme),
515                category: Some(span.category),
516            })
517            .collect()
518    }
519
520    /// Try to do a partial update: re-parse from the dirty point until convergence,
521    /// then splice new spans into the cache. Returns None if convergence doesn't
522    /// happen within parse_end (caller should fall back to full re-parse).
523    #[allow(clippy::too_many_arguments)]
524    fn try_partial_update(
525        &mut self,
526        buffer: &Buffer,
527        dirty_pos: usize,
528        desired_parse_start: usize,
529        parse_end: usize,
530        viewport_start: usize,
531        viewport_end: usize,
532        theme: &Theme,
533    ) -> Option<Vec<HighlightSpan>> {
534        let syntax = &self.syntax_set.syntaxes()[self.syntax_index];
535
536        // Find checkpoint before the dirty point (bounded search)
537        let (actual_start, mut state, mut current_scopes) = {
538            let search_start = dirty_pos.saturating_sub(MAX_PARSE_BYTES);
539            let markers = self.checkpoint_markers.query_range(search_start, dirty_pos);
540            let nearest = markers.into_iter().max_by_key(|(_, start, _)| *start);
541            if let Some((id, cp_pos, _)) = nearest {
542                if let Some((s, sc)) = self.checkpoint_states.get(&id) {
543                    (cp_pos, s.clone(), sc.clone())
544                } else {
545                    return None; // orphan, fall back
546                }
547            } else if parse_end <= MAX_PARSE_BYTES {
548                (
549                    0,
550                    syntect::parsing::ParseState::new(syntax),
551                    syntect::parsing::ScopeStack::new(),
552                )
553            } else {
554                return None; // large file, no nearby checkpoint, fall back
555            }
556        };
557
558        // Get markers from dirty point forward for convergence checking
559        let mut markers_ahead: Vec<(MarkerId, usize)> = self
560            .checkpoint_markers
561            .query_range(dirty_pos, parse_end)
562            .into_iter()
563            .map(|(id, start, _)| (id, start))
564            .collect();
565        markers_ahead.sort_by_key(|(_, pos)| *pos);
566        let mut marker_idx = 0;
567
568        // Parse from actual_start to parse_end, looking for convergence
569        let content_end = parse_end.min(buffer.len());
570        if actual_start >= content_end {
571            return None;
572        }
573        let content = buffer.slice_bytes(actual_start..content_end);
574        let content_str = match std::str::from_utf8(&content) {
575            Ok(s) => s,
576            Err(_) => return None,
577        };
578
579        let mut new_spans = Vec::new();
580        let content_bytes = content_str.as_bytes();
581        let mut pos = 0;
582        let mut current_offset = actual_start;
583        let mut converged_at: Option<usize> = None;
584        let mut bytes_since_checkpoint: usize = 0;
585
586        while pos < content_bytes.len() {
587            // Create checkpoints in new territory
588            if bytes_since_checkpoint >= CHECKPOINT_INTERVAL {
589                let nearby = self.checkpoint_markers.query_range(
590                    current_offset.saturating_sub(CHECKPOINT_INTERVAL / 2),
591                    current_offset + CHECKPOINT_INTERVAL / 2,
592                );
593                if nearby.is_empty() {
594                    let marker_id = self.checkpoint_markers.create(current_offset, true);
595                    self.checkpoint_states
596                        .insert(marker_id, (state.clone(), current_scopes.clone()));
597                }
598                bytes_since_checkpoint = 0;
599            }
600
601            let line_start = pos;
602            let mut line_end = pos;
603            while line_end < content_bytes.len() {
604                if content_bytes[line_end] == b'\n' {
605                    line_end += 1;
606                    break;
607                } else if content_bytes[line_end] == b'\r' {
608                    if line_end + 1 < content_bytes.len() && content_bytes[line_end + 1] == b'\n' {
609                        line_end += 2;
610                    } else {
611                        line_end += 1;
612                    }
613                    break;
614                }
615                line_end += 1;
616            }
617
618            let line_bytes = &content_bytes[line_start..line_end];
619            let actual_line_byte_len = line_bytes.len();
620
621            let line_str = match std::str::from_utf8(line_bytes) {
622                Ok(s) => s,
623                Err(_) => {
624                    pos = line_end;
625                    current_offset += actual_line_byte_len;
626                    bytes_since_checkpoint += actual_line_byte_len;
627                    continue;
628                }
629            };
630
631            let line_content = line_str.trim_end_matches(&['\r', '\n'][..]);
632            let line_for_syntect = if line_end < content_bytes.len() || line_str.ends_with('\n') {
633                format!("{}\n", line_content)
634            } else {
635                line_content.to_string()
636            };
637
638            let ops = match state.parse_line(&line_for_syntect, &self.syntax_set) {
639                Ok(ops) => ops,
640                Err(_) => {
641                    pos = line_end;
642                    current_offset += actual_line_byte_len;
643                    bytes_since_checkpoint += actual_line_byte_len;
644                    continue;
645                }
646            };
647
648            // Collect spans for the dirty region
649            let collect_spans =
650                current_offset + actual_line_byte_len > desired_parse_start.max(actual_start);
651            let mut syntect_offset = 0;
652            let line_content_len = line_content.len();
653
654            for (op_offset, op) in ops {
655                let clamped_op_offset = op_offset.min(line_content_len);
656                if collect_spans && clamped_op_offset > syntect_offset {
657                    if let Some(category) = Self::scope_stack_to_category(&current_scopes) {
658                        let byte_start = current_offset + syntect_offset;
659                        let byte_end = current_offset + clamped_op_offset;
660                        let clamped_start = byte_start.max(actual_start);
661                        if clamped_start < byte_end {
662                            new_spans.push(CachedSpan {
663                                range: clamped_start..byte_end,
664                                category,
665                            });
666                        }
667                    }
668                }
669                syntect_offset = clamped_op_offset;
670                #[allow(clippy::let_underscore_must_use)]
671                let _ = current_scopes.apply(&op);
672            }
673
674            if collect_spans && syntect_offset < line_content_len {
675                if let Some(category) = Self::scope_stack_to_category(&current_scopes) {
676                    let byte_start = current_offset + syntect_offset;
677                    let byte_end = current_offset + line_content_len;
678                    let clamped_start = byte_start.max(actual_start);
679                    if clamped_start < byte_end {
680                        new_spans.push(CachedSpan {
681                            range: clamped_start..byte_end,
682                            category,
683                        });
684                    }
685                }
686            }
687
688            pos = line_end;
689            current_offset += actual_line_byte_len;
690            bytes_since_checkpoint += actual_line_byte_len;
691
692            // Check convergence at checkpoint markers
693            while marker_idx < markers_ahead.len() && markers_ahead[marker_idx].1 <= current_offset
694            {
695                let (marker_id, _) = markers_ahead[marker_idx];
696                marker_idx += 1;
697                if let Some(stored) = self.checkpoint_states.get(&marker_id) {
698                    if *stored == (state.clone(), current_scopes.clone()) {
699                        self.stats.convergences += 1;
700                        converged_at = Some(current_offset);
701                        break;
702                    }
703                }
704                self.stats.checkpoints_updated += 1;
705                self.checkpoint_states
706                    .insert(marker_id, (state.clone(), current_scopes.clone()));
707            }
708
709            if converged_at.is_some() {
710                break;
711            }
712        }
713
714        self.stats.bytes_parsed += current_offset.saturating_sub(actual_start);
715
716        let convergence_point = converged_at?; // None → fall back to full parse
717
718        self.stats.cache_misses += 1; // partial update counts as a miss
719
720        // Splice: replace spans in [actual_start..convergence_point] with new_spans,
721        // keep everything outside that range from the existing cache.
722        Self::merge_adjacent_spans(&mut new_spans);
723
724        if let Some(cache) = &mut self.cache {
725            // Remove old spans that overlap the re-parsed region
726            let splice_start = actual_start;
727            let splice_end = convergence_point;
728            cache
729                .spans
730                .retain(|span| span.range.end <= splice_start || span.range.start >= splice_end);
731            // Insert new spans and re-sort by range start
732            cache.spans.extend(new_spans);
733            cache.spans.sort_by_key(|s| s.range.start);
734            Self::merge_adjacent_spans(&mut cache.spans);
735        }
736
737        self.last_buffer_len = buffer.len();
738
739        Some(self.filter_cached_spans(viewport_start, viewport_end, theme))
740    }
741
742    /// Full re-parse from desired_parse_start to parse_end. Used on cold start
743    /// or when partial update fails (no convergence).
744    #[allow(clippy::too_many_arguments)]
745    fn full_parse(
746        &mut self,
747        buffer: &Buffer,
748        desired_parse_start: usize,
749        parse_end: usize,
750        viewport_start: usize,
751        viewport_end: usize,
752        theme: &Theme,
753        _context_bytes: usize,
754    ) -> Vec<HighlightSpan> {
755        self.stats.cache_misses += 1;
756        self.dirty_from = None; // consumed
757
758        if parse_end <= desired_parse_start {
759            return Vec::new();
760        }
761
762        let syntax = &self.syntax_set.syntaxes()[self.syntax_index];
763        let (actual_start, mut state, mut current_scopes, create_checkpoints) =
764            self.find_parse_resume_point(desired_parse_start, parse_end, syntax);
765
766        let content = buffer.slice_bytes(actual_start..parse_end);
767        let content_str = match std::str::from_utf8(&content) {
768            Ok(s) => s,
769            Err(_) => return Vec::new(),
770        };
771
772        let mut spans = Vec::new();
773        let content_bytes = content_str.as_bytes();
774        let mut pos = 0;
775        let mut current_offset = actual_start;
776        let mut bytes_since_checkpoint: usize = 0;
777
778        while pos < content_bytes.len() {
779            if create_checkpoints && bytes_since_checkpoint >= CHECKPOINT_INTERVAL {
780                let nearby = self.checkpoint_markers.query_range(
781                    current_offset.saturating_sub(CHECKPOINT_INTERVAL / 2),
782                    current_offset + CHECKPOINT_INTERVAL / 2,
783                );
784                if nearby.is_empty() {
785                    let marker_id = self.checkpoint_markers.create(current_offset, true);
786                    self.checkpoint_states
787                        .insert(marker_id, (state.clone(), current_scopes.clone()));
788                }
789                bytes_since_checkpoint = 0;
790            }
791
792            let line_start = pos;
793            let mut line_end = pos;
794
795            while line_end < content_bytes.len() {
796                if content_bytes[line_end] == b'\n' {
797                    line_end += 1;
798                    break;
799                } else if content_bytes[line_end] == b'\r' {
800                    if line_end + 1 < content_bytes.len() && content_bytes[line_end + 1] == b'\n' {
801                        line_end += 2;
802                    } else {
803                        line_end += 1;
804                    }
805                    break;
806                }
807                line_end += 1;
808            }
809
810            let line_bytes = &content_bytes[line_start..line_end];
811            let actual_line_byte_len = line_bytes.len();
812
813            let line_str = match std::str::from_utf8(line_bytes) {
814                Ok(s) => s,
815                Err(_) => {
816                    pos = line_end;
817                    current_offset += actual_line_byte_len;
818                    bytes_since_checkpoint += actual_line_byte_len;
819                    continue;
820                }
821            };
822
823            let line_content = line_str.trim_end_matches(&['\r', '\n'][..]);
824            let line_for_syntect = if line_end < content_bytes.len() || line_str.ends_with('\n') {
825                format!("{}\n", line_content)
826            } else {
827                line_content.to_string()
828            };
829
830            let ops = match state.parse_line(&line_for_syntect, &self.syntax_set) {
831                Ok(ops) => ops,
832                Err(_) => {
833                    pos = line_end;
834                    current_offset += actual_line_byte_len;
835                    bytes_since_checkpoint += actual_line_byte_len;
836                    continue;
837                }
838            };
839
840            let collect_spans = current_offset + actual_line_byte_len > desired_parse_start;
841            let mut syntect_offset = 0;
842            let line_content_len = line_content.len();
843
844            for (op_offset, op) in ops {
845                let clamped_op_offset = op_offset.min(line_content_len);
846                if collect_spans && clamped_op_offset > syntect_offset {
847                    if let Some(category) = Self::scope_stack_to_category(&current_scopes) {
848                        let byte_start = current_offset + syntect_offset;
849                        let byte_end = current_offset + clamped_op_offset;
850                        let clamped_start = byte_start.max(desired_parse_start);
851                        if clamped_start < byte_end {
852                            spans.push(CachedSpan {
853                                range: clamped_start..byte_end,
854                                category,
855                            });
856                        }
857                    }
858                }
859                syntect_offset = clamped_op_offset;
860                #[allow(clippy::let_underscore_must_use)]
861                let _ = current_scopes.apply(&op);
862            }
863
864            if collect_spans && syntect_offset < line_content_len {
865                if let Some(category) = Self::scope_stack_to_category(&current_scopes) {
866                    let byte_start = current_offset + syntect_offset;
867                    let byte_end = current_offset + line_content_len;
868                    let clamped_start = byte_start.max(desired_parse_start);
869                    if clamped_start < byte_end {
870                        spans.push(CachedSpan {
871                            range: clamped_start..byte_end,
872                            category,
873                        });
874                    }
875                }
876            }
877
878            pos = line_end;
879            current_offset += actual_line_byte_len;
880            bytes_since_checkpoint += actual_line_byte_len;
881
882            // Update checkpoint states as we pass them
883            let markers_here: Vec<(MarkerId, usize)> = self
884                .checkpoint_markers
885                .query_range(
886                    current_offset.saturating_sub(actual_line_byte_len),
887                    current_offset,
888                )
889                .into_iter()
890                .map(|(id, start, _)| (id, start))
891                .collect();
892            for (marker_id, _) in markers_here {
893                self.checkpoint_states
894                    .insert(marker_id, (state.clone(), current_scopes.clone()));
895            }
896        }
897
898        self.stats.bytes_parsed += parse_end.saturating_sub(actual_start);
899
900        Self::merge_adjacent_spans(&mut spans);
901
902        self.cache = Some(TextMateCache {
903            range: desired_parse_start..parse_end,
904            spans: spans.clone(),
905        });
906        self.last_buffer_len = buffer.len();
907
908        spans
909            .into_iter()
910            .filter(|span| span.range.start < viewport_end && span.range.end > viewport_start)
911            .map(|span| {
912                let cat = span.category;
913                HighlightSpan {
914                    range: span.range,
915                    color: highlight_color(cat, theme),
916                    category: Some(cat),
917                }
918            })
919            .collect()
920    }
921
922    /// Find the best point to resume parsing from for the viewport.
923    fn find_parse_resume_point(
924        &self,
925        desired_start: usize,
926        parse_end: usize,
927        syntax: &syntect::parsing::SyntaxReference,
928    ) -> (
929        usize,
930        syntect::parsing::ParseState,
931        syntect::parsing::ScopeStack,
932        bool,
933    ) {
934        use syntect::parsing::{ParseState, ScopeStack};
935
936        // Look for a checkpoint near the desired start. For large files, only
937        // consider checkpoints that are within MAX_PARSE_BYTES of desired_start
938        // to avoid parsing hundreds of MB from a distant checkpoint.
939        let search_start = desired_start.saturating_sub(MAX_PARSE_BYTES);
940        let markers = self
941            .checkpoint_markers
942            .query_range(search_start, desired_start + 1);
943        let nearest = markers.into_iter().max_by_key(|(_, start, _)| *start);
944
945        if let Some((id, cp_pos, _)) = nearest {
946            if let Some((s, sc)) = self.checkpoint_states.get(&id) {
947                return (cp_pos, s.clone(), sc.clone(), true);
948            }
949        }
950
951        if parse_end <= MAX_PARSE_BYTES {
952            // File is small enough to parse from byte 0
953            (0, ParseState::new(syntax), ScopeStack::new(), true)
954        } else {
955            // Large file, no nearby checkpoint — start fresh from desired_start.
956            // Still create checkpoints so future visits to this region can resume.
957            (
958                desired_start,
959                ParseState::new(syntax),
960                ScopeStack::new(),
961                true,
962            )
963        }
964    }
965
966    /// Map scope stack to highlight category
967    fn scope_stack_to_category(scopes: &syntect::parsing::ScopeStack) -> Option<HighlightCategory> {
968        for scope in scopes.as_slice().iter().rev() {
969            let scope_str = scope.build_string();
970            if let Some(cat) = scope_to_category(&scope_str) {
971                return Some(cat);
972            }
973        }
974        None
975    }
976
977    /// Merge adjacent spans with same category
978    fn merge_adjacent_spans(spans: &mut Vec<CachedSpan>) {
979        if spans.len() < 2 {
980            return;
981        }
982
983        let mut write_idx = 0;
984        for read_idx in 1..spans.len() {
985            if spans[write_idx].category == spans[read_idx].category
986                && spans[write_idx].range.end == spans[read_idx].range.start
987            {
988                spans[write_idx].range.end = spans[read_idx].range.end;
989            } else {
990                write_idx += 1;
991                if write_idx != read_idx {
992                    spans[write_idx] = spans[read_idx].clone();
993                }
994            }
995        }
996        spans.truncate(write_idx + 1);
997    }
998
999    /// Invalidate span cache for an edited range.
1000    /// Checkpoint positions are handled by notify_insert/notify_delete.
1001    /// The span cache is NOT cleared here — it will be patched (partial update)
1002    /// during the next highlight_viewport call using convergence. Only dirty_from
1003    /// (set by notify_insert/notify_delete) controls re-parsing scope.
1004    pub fn invalidate_range(&mut self, _edit_range: Range<usize>) {
1005        // Intentionally does NOT clear self.cache.
1006        // The cache will be partially updated in highlight_viewport when
1007        // dirty_from is set. This avoids full re-parses for small edits.
1008    }
1009
1010    /// Invalidate all cache and checkpoints (file reload, language change, etc.)
1011    pub fn invalidate_all(&mut self) {
1012        self.cache = None;
1013        let ids: Vec<MarkerId> = self.checkpoint_states.keys().copied().collect();
1014        for id in ids {
1015            self.checkpoint_markers.delete(id);
1016        }
1017        self.checkpoint_states.clear();
1018        self.dirty_from = None;
1019    }
1020
1021    /// Get the highlight category at a byte position from the cache.
1022    ///
1023    /// Returns the category if the position falls within a cached highlight span.
1024    /// The position must be within the last highlighted viewport range for a result.
1025    pub fn category_at_position(&self, position: usize) -> Option<HighlightCategory> {
1026        let cache = self.cache.as_ref()?;
1027        cache
1028            .spans
1029            .iter()
1030            .find(|span| span.range.start <= position && position < span.range.end)
1031            .map(|span| span.category)
1032    }
1033
1034    /// Get syntax name
1035    pub fn syntax_name(&self) -> &str {
1036        &self.syntax_set.syntaxes()[self.syntax_index].name
1037    }
1038}
1039
1040impl HighlightEngine {
1041    /// Create a highlighting engine for a file
1042    ///
1043    /// Always uses syntect/TextMate for highlighting, but detects tree-sitter
1044    /// language for other features (indentation, semantic highlighting).
1045    pub fn for_file(path: &Path, registry: &GrammarRegistry) -> Self {
1046        Self::for_file_with_preference(path, registry, HighlighterPreference::Auto)
1047    }
1048
1049    /// Create a highlighting engine for a file, using language configuration for detection.
1050    ///
1051    /// This method checks the provided languages configuration for filename and extension
1052    /// matches before falling back to built-in detection. This allows users to configure
1053    /// custom filename patterns (like PKGBUILD for bash) that will be respected for
1054    /// syntax highlighting.
1055    pub fn for_file_with_languages(
1056        path: &Path,
1057        registry: &GrammarRegistry,
1058        languages: &std::collections::HashMap<String, crate::config::LanguageConfig>,
1059    ) -> Self {
1060        Self::for_file_with_languages_and_preference(
1061            path,
1062            registry,
1063            languages,
1064            HighlighterPreference::Auto,
1065        )
1066    }
1067
1068    /// Create a highlighting engine with explicit preference and language configuration.
1069    pub fn for_file_with_languages_and_preference(
1070        path: &Path,
1071        registry: &GrammarRegistry,
1072        languages: &std::collections::HashMap<String, crate::config::LanguageConfig>,
1073        preference: HighlighterPreference,
1074    ) -> Self {
1075        match preference {
1076            // Auto now defaults to TextMate for highlighting (syntect has broader coverage)
1077            // but still detects tree-sitter language for indentation/semantic features
1078            HighlighterPreference::Auto | HighlighterPreference::TextMate => {
1079                Self::textmate_for_file_with_languages(path, registry, languages)
1080            }
1081            HighlighterPreference::TreeSitter => {
1082                if let Some(lang) = Language::from_path(path) {
1083                    if let Ok(highlighter) = Highlighter::new(lang) {
1084                        return Self::TreeSitter(Box::new(highlighter));
1085                    }
1086                }
1087                Self::None
1088            }
1089        }
1090    }
1091
1092    /// Create a highlighting engine with explicit preference
1093    pub fn for_file_with_preference(
1094        path: &Path,
1095        registry: &GrammarRegistry,
1096        preference: HighlighterPreference,
1097    ) -> Self {
1098        match preference {
1099            // Auto now defaults to TextMate for highlighting (syntect has broader coverage)
1100            // but still detects tree-sitter language for indentation/semantic features
1101            HighlighterPreference::Auto | HighlighterPreference::TextMate => {
1102                Self::textmate_for_file(path, registry)
1103            }
1104            HighlighterPreference::TreeSitter => {
1105                if let Some(lang) = Language::from_path(path) {
1106                    if let Ok(highlighter) = Highlighter::new(lang) {
1107                        return Self::TreeSitter(Box::new(highlighter));
1108                    }
1109                }
1110                Self::None
1111            }
1112        }
1113    }
1114
1115    /// Create a TextMate engine for a file, falling back to tree-sitter if no TextMate grammar
1116    fn textmate_for_file(path: &Path, registry: &GrammarRegistry) -> Self {
1117        let syntax_set = registry.syntax_set_arc();
1118
1119        // Detect tree-sitter language for non-highlighting features
1120        let ts_language = Language::from_path(path);
1121
1122        // Find syntax by file extension
1123        if let Some(syntax) = registry.find_syntax_for_file(path) {
1124            // Find the index of this syntax in the set
1125            if let Some(index) = syntax_set
1126                .syntaxes()
1127                .iter()
1128                .position(|s| s.name == syntax.name)
1129            {
1130                return Self::TextMate(Box::new(TextMateEngine::with_language(
1131                    syntax_set,
1132                    index,
1133                    ts_language,
1134                )));
1135            }
1136        }
1137
1138        // No TextMate grammar found - fall back to tree-sitter if available
1139        // This handles languages like TypeScript that syntect doesn't include by default
1140        if let Some(lang) = ts_language {
1141            if let Ok(highlighter) = Highlighter::new(lang) {
1142                tracing::debug!(
1143                    "No TextMate grammar for {:?}, falling back to tree-sitter",
1144                    path.extension()
1145                );
1146                return Self::TreeSitter(Box::new(highlighter));
1147            }
1148        }
1149
1150        Self::None
1151    }
1152
1153    /// Create a TextMate engine for a file with language configuration support
1154    fn textmate_for_file_with_languages(
1155        path: &Path,
1156        registry: &GrammarRegistry,
1157        languages: &std::collections::HashMap<String, crate::config::LanguageConfig>,
1158    ) -> Self {
1159        let syntax_set = registry.syntax_set_arc();
1160
1161        // Detect tree-sitter language for non-highlighting features
1162        let ts_language = Language::from_path(path);
1163
1164        // Find syntax by file extension, checking languages config first
1165        if let Some(syntax) = registry.find_syntax_for_file_with_languages(path, languages) {
1166            // Find the index of this syntax in the set
1167            if let Some(index) = syntax_set
1168                .syntaxes()
1169                .iter()
1170                .position(|s| s.name == syntax.name)
1171            {
1172                return Self::TextMate(Box::new(TextMateEngine::with_language(
1173                    syntax_set,
1174                    index,
1175                    ts_language,
1176                )));
1177            }
1178        }
1179
1180        // No TextMate grammar found - fall back to tree-sitter if available
1181        // This handles languages like TypeScript that syntect doesn't include by default
1182        if let Some(lang) = ts_language {
1183            if let Ok(highlighter) = Highlighter::new(lang) {
1184                tracing::debug!(
1185                    "No TextMate grammar for {:?}, falling back to tree-sitter",
1186                    path.extension()
1187                );
1188                return Self::TreeSitter(Box::new(highlighter));
1189            }
1190        }
1191
1192        Self::None
1193    }
1194
1195    /// Create a highlighting engine for a specific tree-sitter language.
1196    ///
1197    /// This is useful when manually setting the language (e.g., from UI).
1198    /// Uses tree-sitter for the specified language.
1199    pub fn for_language(language: Language) -> Self {
1200        if let Ok(highlighter) = Highlighter::new(language) {
1201            Self::TreeSitter(Box::new(highlighter))
1202        } else {
1203            Self::None
1204        }
1205    }
1206
1207    /// Create a highlighting engine for a syntax by name.
1208    ///
1209    /// This looks up the syntax in the grammar registry and creates a TextMate
1210    /// highlighter for it. This supports all syntect syntaxes (100+) including
1211    /// user-configured grammars.
1212    ///
1213    /// The `ts_language` parameter optionally provides a tree-sitter language
1214    /// for non-highlighting features (indentation, semantic highlighting).
1215    pub fn for_syntax_name(
1216        name: &str,
1217        registry: &GrammarRegistry,
1218        ts_language: Option<Language>,
1219    ) -> Self {
1220        let syntax_set = registry.syntax_set_arc();
1221
1222        if let Some(syntax) = registry.find_syntax_by_name(name) {
1223            // Find the index of this syntax in the set
1224            if let Some(index) = syntax_set
1225                .syntaxes()
1226                .iter()
1227                .position(|s| s.name == syntax.name)
1228            {
1229                return Self::TextMate(Box::new(TextMateEngine::with_language(
1230                    syntax_set,
1231                    index,
1232                    ts_language,
1233                )));
1234            }
1235        }
1236
1237        Self::None
1238    }
1239
1240    /// Highlight the visible viewport
1241    ///
1242    /// `context_bytes` controls how far before/after the viewport to parse for accurate
1243    /// highlighting of multi-line constructs (strings, comments, nested blocks).
1244    pub fn highlight_viewport(
1245        &mut self,
1246        buffer: &Buffer,
1247        viewport_start: usize,
1248        viewport_end: usize,
1249        theme: &Theme,
1250        context_bytes: usize,
1251    ) -> Vec<HighlightSpan> {
1252        match self {
1253            Self::TreeSitter(h) => {
1254                h.highlight_viewport(buffer, viewport_start, viewport_end, theme, context_bytes)
1255            }
1256            Self::TextMate(h) => {
1257                h.highlight_viewport(buffer, viewport_start, viewport_end, theme, context_bytes)
1258            }
1259            Self::None => Vec::new(),
1260        }
1261    }
1262
1263    /// Notify the highlighting engine of a buffer insert (for checkpoint position tracking).
1264    pub fn notify_insert(&mut self, position: usize, length: usize) {
1265        if let Self::TextMate(h) = self {
1266            h.notify_insert(position, length);
1267        }
1268    }
1269
1270    /// Notify the highlighting engine of a buffer delete (for checkpoint position tracking).
1271    pub fn notify_delete(&mut self, position: usize, length: usize) {
1272        if let Self::TextMate(h) = self {
1273            h.notify_delete(position, length);
1274        }
1275    }
1276
1277    /// Invalidate cache for an edited range
1278    pub fn invalidate_range(&mut self, edit_range: Range<usize>) {
1279        match self {
1280            Self::TreeSitter(h) => h.invalidate_range(edit_range),
1281            Self::TextMate(h) => h.invalidate_range(edit_range),
1282            Self::None => {}
1283        }
1284    }
1285
1286    /// Invalidate entire cache
1287    pub fn invalidate_all(&mut self) {
1288        match self {
1289            Self::TreeSitter(h) => h.invalidate_all(),
1290            Self::TextMate(h) => h.invalidate_all(),
1291            Self::None => {}
1292        }
1293    }
1294
1295    /// Check if this engine has highlighting available
1296    pub fn has_highlighting(&self) -> bool {
1297        !matches!(self, Self::None)
1298    }
1299
1300    /// Get a description of the active backend
1301    pub fn backend_name(&self) -> &str {
1302        match self {
1303            Self::TreeSitter(_) => "tree-sitter",
1304            Self::TextMate(_) => "textmate",
1305            Self::None => "none",
1306        }
1307    }
1308
1309    /// Get performance stats (TextMate engine only).
1310    pub fn highlight_stats(&self) -> Option<&HighlightStats> {
1311        if let Self::TextMate(h) = self {
1312            Some(h.stats())
1313        } else {
1314            None
1315        }
1316    }
1317
1318    /// Reset performance counters.
1319    pub fn reset_highlight_stats(&mut self) {
1320        if let Self::TextMate(h) = self {
1321            h.reset_stats();
1322        }
1323    }
1324
1325    /// Get the language/syntax name if available
1326    pub fn syntax_name(&self) -> Option<&str> {
1327        match self {
1328            Self::TreeSitter(_) => None, // Tree-sitter doesn't expose name easily
1329            Self::TextMate(h) => Some(h.syntax_name()),
1330            Self::None => None,
1331        }
1332    }
1333
1334    /// Get the highlight category at a byte position from the cache.
1335    ///
1336    /// Returns the category if the position falls within a cached highlight span.
1337    /// Useful for detecting whether the cursor is inside a string, comment, etc.
1338    pub fn category_at_position(&self, position: usize) -> Option<HighlightCategory> {
1339        match self {
1340            Self::TreeSitter(h) => h.category_at_position(position),
1341            Self::TextMate(h) => h.category_at_position(position),
1342            Self::None => None,
1343        }
1344    }
1345
1346    /// Get the tree-sitter Language for non-highlighting features
1347    /// Returns the language even when using TextMate for highlighting
1348    pub fn language(&self) -> Option<&Language> {
1349        match self {
1350            Self::TreeSitter(h) => Some(h.language()),
1351            Self::TextMate(h) => h.language(),
1352            Self::None => None,
1353        }
1354    }
1355}
1356
1357/// Highlight a code string using syntect (for markdown code blocks, hover popups, etc.)
1358/// Returns spans with byte ranges relative to the input string.
1359///
1360/// This uses TextMate grammars via syntect which provides broader language coverage
1361/// than tree-sitter (~150+ languages vs ~17).
1362pub fn highlight_string(
1363    code: &str,
1364    lang_hint: &str,
1365    registry: &GrammarRegistry,
1366    theme: &Theme,
1367) -> Vec<HighlightSpan> {
1368    use syntect::parsing::{ParseState, ScopeStack};
1369
1370    // Find syntax by language token (handles aliases like "py" -> Python)
1371    let syntax = match registry.syntax_set().find_syntax_by_token(lang_hint) {
1372        Some(s) => s,
1373        None => return Vec::new(),
1374    };
1375
1376    let syntax_set = registry.syntax_set();
1377    let mut state = ParseState::new(syntax);
1378    let mut spans = Vec::new();
1379    let mut current_scopes = ScopeStack::new();
1380    let mut current_offset = 0;
1381
1382    // Parse line by line
1383    for line in code.split_inclusive('\n') {
1384        let line_start = current_offset;
1385        let line_len = line.len();
1386
1387        // Remove trailing newline for syntect, then add it back
1388        let line_content = line.trim_end_matches(&['\r', '\n'][..]);
1389        let line_for_syntect = if line.ends_with('\n') {
1390            format!("{}\n", line_content)
1391        } else {
1392            line_content.to_string()
1393        };
1394
1395        let ops = match state.parse_line(&line_for_syntect, syntax_set) {
1396            Ok(ops) => ops,
1397            Err(_) => {
1398                current_offset += line_len;
1399                continue;
1400            }
1401        };
1402
1403        let mut syntect_offset = 0;
1404        let line_content_len = line_content.len();
1405
1406        for (op_offset, op) in ops {
1407            let clamped_op_offset = op_offset.min(line_content_len);
1408            if clamped_op_offset > syntect_offset {
1409                if let Some(category) = scope_stack_to_category(&current_scopes) {
1410                    let byte_start = line_start + syntect_offset;
1411                    let byte_end = line_start + clamped_op_offset;
1412                    if byte_start < byte_end {
1413                        spans.push(HighlightSpan {
1414                            range: byte_start..byte_end,
1415                            color: highlight_color(category, theme),
1416                            category: Some(category),
1417                        });
1418                    }
1419                }
1420            }
1421            syntect_offset = clamped_op_offset;
1422            // Scope stack errors are non-fatal for highlighting
1423            #[allow(clippy::let_underscore_must_use)]
1424            let _ = current_scopes.apply(&op);
1425        }
1426
1427        // Handle remaining text on line
1428        if syntect_offset < line_content_len {
1429            if let Some(category) = scope_stack_to_category(&current_scopes) {
1430                let byte_start = line_start + syntect_offset;
1431                let byte_end = line_start + line_content_len;
1432                if byte_start < byte_end {
1433                    spans.push(HighlightSpan {
1434                        range: byte_start..byte_end,
1435                        color: highlight_color(category, theme),
1436                        category: Some(category),
1437                    });
1438                }
1439            }
1440        }
1441
1442        current_offset += line_len;
1443    }
1444
1445    // Merge adjacent spans with same color
1446    merge_adjacent_highlight_spans(&mut spans);
1447
1448    spans
1449}
1450
1451/// Map scope stack to highlight category (for highlight_string)
1452fn scope_stack_to_category(scopes: &syntect::parsing::ScopeStack) -> Option<HighlightCategory> {
1453    for scope in scopes.as_slice().iter().rev() {
1454        let scope_str = scope.build_string();
1455        if let Some(cat) = scope_to_category(&scope_str) {
1456            return Some(cat);
1457        }
1458    }
1459    None
1460}
1461
1462/// Merge adjacent spans with same color
1463fn merge_adjacent_highlight_spans(spans: &mut Vec<HighlightSpan>) {
1464    if spans.len() < 2 {
1465        return;
1466    }
1467
1468    let mut write_idx = 0;
1469    for read_idx in 1..spans.len() {
1470        if spans[write_idx].color == spans[read_idx].color
1471            && spans[write_idx].range.end == spans[read_idx].range.start
1472        {
1473            spans[write_idx].range.end = spans[read_idx].range.end;
1474        } else {
1475            write_idx += 1;
1476            if write_idx != read_idx {
1477                spans[write_idx] = spans[read_idx].clone();
1478            }
1479        }
1480    }
1481    spans.truncate(write_idx + 1);
1482}
1483
1484#[cfg(test)]
1485mod tests {
1486    use crate::model::filesystem::StdFileSystem;
1487    use std::sync::Arc;
1488
1489    fn test_fs() -> Arc<dyn crate::model::filesystem::FileSystem + Send + Sync> {
1490        Arc::new(StdFileSystem)
1491    }
1492    use super::*;
1493    use crate::view::theme;
1494
1495    #[test]
1496    fn test_highlighter_preference_default() {
1497        let pref = HighlighterPreference::default();
1498        assert_eq!(pref, HighlighterPreference::Auto);
1499    }
1500
1501    #[test]
1502    fn test_highlight_engine_default() {
1503        let engine = HighlightEngine::default();
1504        assert!(!engine.has_highlighting());
1505        assert_eq!(engine.backend_name(), "none");
1506    }
1507
1508    #[test]
1509    fn test_textmate_backend_selection() {
1510        let registry =
1511            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1512
1513        // Languages with TextMate grammars use TextMate for highlighting
1514        let engine = HighlightEngine::for_file(Path::new("test.rs"), &registry);
1515        assert_eq!(engine.backend_name(), "textmate");
1516        // Tree-sitter language should still be detected for other features
1517        assert!(engine.language().is_some());
1518
1519        let engine = HighlightEngine::for_file(Path::new("test.py"), &registry);
1520        assert_eq!(engine.backend_name(), "textmate");
1521        assert!(engine.language().is_some());
1522
1523        let engine = HighlightEngine::for_file(Path::new("test.js"), &registry);
1524        assert_eq!(engine.backend_name(), "textmate");
1525        assert!(engine.language().is_some());
1526
1527        // TypeScript falls back to tree-sitter (syntect doesn't include TS by default)
1528        let engine = HighlightEngine::for_file(Path::new("test.ts"), &registry);
1529        assert_eq!(engine.backend_name(), "tree-sitter");
1530        assert!(engine.language().is_some());
1531
1532        let engine = HighlightEngine::for_file(Path::new("test.tsx"), &registry);
1533        assert_eq!(engine.backend_name(), "tree-sitter");
1534        assert!(engine.language().is_some());
1535    }
1536
1537    #[test]
1538    fn test_tree_sitter_explicit_preference() {
1539        let registry =
1540            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1541
1542        // Force tree-sitter for highlighting
1543        let engine = HighlightEngine::for_file_with_preference(
1544            Path::new("test.rs"),
1545            &registry,
1546            HighlighterPreference::TreeSitter,
1547        );
1548        assert_eq!(engine.backend_name(), "tree-sitter");
1549    }
1550
1551    #[test]
1552    fn test_unknown_extension() {
1553        let registry =
1554            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1555
1556        // Unknown extension
1557        let engine = HighlightEngine::for_file(Path::new("test.unknown_xyz_123"), &registry);
1558        // Might be none or might find something via syntect
1559        // Just verify it doesn't panic
1560        let _ = engine.backend_name();
1561    }
1562
1563    #[test]
1564    fn test_highlight_viewport_empty_buffer_no_panic() {
1565        // Regression test: calling highlight_viewport with an empty buffer
1566        // and non-zero viewport range previously caused subtraction overflow panic.
1567        //
1568        // The bug occurred when:
1569        // - buffer is empty (len = 0)
1570        // - viewport_start > context_bytes (so parse_start > 0 after saturating_sub)
1571        // - parse_end = min(viewport_end + context_bytes, buffer.len()) = 0
1572        // - parse_end - parse_start would underflow (0 - positive = overflow)
1573        let registry =
1574            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1575
1576        let mut engine = HighlightEngine::for_file(Path::new("test.rs"), &registry);
1577
1578        // Create empty buffer
1579        let buffer = Buffer::from_str("", 0, test_fs());
1580        let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
1581
1582        // Test the specific case that triggered the overflow:
1583        // viewport_start=100, context_bytes=10 => parse_start=90, parse_end=0
1584        // 0 - 90 = overflow!
1585        if let HighlightEngine::TextMate(ref mut tm) = engine {
1586            // Small context_bytes so parse_start remains > 0
1587            let spans = tm.highlight_viewport(&buffer, 100, 200, &theme, 10);
1588            assert!(spans.is_empty());
1589        }
1590    }
1591
1592    /// Test that TextMateEngine produces correct byte offsets for CRLF content.
1593    /// This is a regression test for a bug where using str::lines() caused 1-byte
1594    /// offset drift per line because it strips line terminators.
1595    #[test]
1596    fn test_textmate_engine_crlf_byte_offsets() {
1597        let registry =
1598            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1599
1600        let mut engine = HighlightEngine::for_file(Path::new("test.java"), &registry);
1601
1602        // Create CRLF content with keywords on each line
1603        // Each "public" keyword should be highlighted at byte positions:
1604        // Line 1: "public" at bytes 0-5
1605        // Line 2: "public" at bytes 8-13 (after "public\r\n" = 8 bytes)
1606        // Line 3: "public" at bytes 16-21 (after two "public\r\n" = 16 bytes)
1607        let content = b"public\r\npublic\r\npublic\r\n";
1608        let buffer = Buffer::from_bytes(content.to_vec(), test_fs());
1609        let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
1610
1611        if let HighlightEngine::TextMate(ref mut tm) = engine {
1612            // Highlight the entire content
1613            let spans = tm.highlight_viewport(&buffer, 0, content.len(), &theme, 0);
1614
1615            // Find spans that cover keyword positions
1616            // The keyword "public" should have spans at these byte ranges:
1617            // Line 1: 0..6
1618            // Line 2: 8..14 (NOT 7..13 which would be the buggy offset)
1619            // Line 3: 16..22 (NOT 14..20 which would be the buggy offset)
1620
1621            eprintln!(
1622                "Spans: {:?}",
1623                spans.iter().map(|s| &s.range).collect::<Vec<_>>()
1624            );
1625
1626            // Check that we have spans covering the correct positions
1627            let has_span_at = |start: usize, end: usize| -> bool {
1628                spans
1629                    .iter()
1630                    .any(|s| s.range.start <= start && s.range.end >= end)
1631            };
1632
1633            // Line 1: "public" at bytes 0-6
1634            assert!(
1635                has_span_at(0, 6),
1636                "Should have span covering bytes 0-6 (line 1 'public'). Spans: {:?}",
1637                spans.iter().map(|s| &s.range).collect::<Vec<_>>()
1638            );
1639
1640            // Line 2: "public" at bytes 8-14 (after "public\r\n")
1641            // If buggy, would be at 7-13
1642            assert!(
1643                has_span_at(8, 14),
1644                "Should have span covering bytes 8-14 (line 2 'public'). \
1645                 If this fails, CRLF offset drift is occurring. Spans: {:?}",
1646                spans.iter().map(|s| &s.range).collect::<Vec<_>>()
1647            );
1648
1649            // Line 3: "public" at bytes 16-22 (after two "public\r\n")
1650            // If buggy, would be at 14-20
1651            assert!(
1652                has_span_at(16, 22),
1653                "Should have span covering bytes 16-22 (line 3 'public'). \
1654                 If this fails, CRLF offset drift is occurring. Spans: {:?}",
1655                spans.iter().map(|s| &s.range).collect::<Vec<_>>()
1656            );
1657        } else {
1658            panic!("Expected TextMate engine for .java file");
1659        }
1660    }
1661
1662    #[test]
1663    fn test_git_rebase_todo_highlighting() {
1664        let registry =
1665            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1666
1667        // git-rebase-todo files should use the Git Rebase Todo grammar
1668        let engine = HighlightEngine::for_file(Path::new("git-rebase-todo"), &registry);
1669        assert_eq!(engine.backend_name(), "textmate");
1670        assert!(engine.has_highlighting());
1671    }
1672
1673    #[test]
1674    fn test_git_commit_message_highlighting() {
1675        let registry =
1676            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1677
1678        // COMMIT_EDITMSG should use the Git Commit Message grammar
1679        let engine = HighlightEngine::for_file(Path::new("COMMIT_EDITMSG"), &registry);
1680        assert_eq!(engine.backend_name(), "textmate");
1681        assert!(engine.has_highlighting());
1682
1683        // MERGE_MSG should also work
1684        let engine = HighlightEngine::for_file(Path::new("MERGE_MSG"), &registry);
1685        assert_eq!(engine.backend_name(), "textmate");
1686        assert!(engine.has_highlighting());
1687    }
1688
1689    #[test]
1690    fn test_gitignore_highlighting() {
1691        let registry =
1692            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1693
1694        // .gitignore should use the Gitignore grammar
1695        let engine = HighlightEngine::for_file(Path::new(".gitignore"), &registry);
1696        assert_eq!(engine.backend_name(), "textmate");
1697        assert!(engine.has_highlighting());
1698
1699        // .dockerignore should also work
1700        let engine = HighlightEngine::for_file(Path::new(".dockerignore"), &registry);
1701        assert_eq!(engine.backend_name(), "textmate");
1702        assert!(engine.has_highlighting());
1703    }
1704
1705    #[test]
1706    fn test_gitconfig_highlighting() {
1707        let registry =
1708            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1709
1710        // .gitconfig should use the Git Config grammar
1711        let engine = HighlightEngine::for_file(Path::new(".gitconfig"), &registry);
1712        assert_eq!(engine.backend_name(), "textmate");
1713        assert!(engine.has_highlighting());
1714
1715        // .gitmodules should also work
1716        let engine = HighlightEngine::for_file(Path::new(".gitmodules"), &registry);
1717        assert_eq!(engine.backend_name(), "textmate");
1718        assert!(engine.has_highlighting());
1719    }
1720
1721    #[test]
1722    fn test_gitattributes_highlighting() {
1723        let registry =
1724            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1725
1726        // .gitattributes should use the Git Attributes grammar
1727        let engine = HighlightEngine::for_file(Path::new(".gitattributes"), &registry);
1728        assert_eq!(engine.backend_name(), "textmate");
1729        assert!(engine.has_highlighting());
1730    }
1731
1732    #[test]
1733    fn test_comment_delimiter_uses_comment_color() {
1734        // Comment delimiters (#, //, /*) should use comment color, not operator
1735        assert_eq!(
1736            scope_to_category("punctuation.definition.comment"),
1737            Some(HighlightCategory::Comment)
1738        );
1739        assert_eq!(
1740            scope_to_category("punctuation.definition.comment.python"),
1741            Some(HighlightCategory::Comment)
1742        );
1743        assert_eq!(
1744            scope_to_category("punctuation.definition.comment.begin"),
1745            Some(HighlightCategory::Comment)
1746        );
1747    }
1748
1749    #[test]
1750    fn test_string_delimiter_uses_string_color() {
1751        // String delimiters (", ', `) should use string color, not operator
1752        assert_eq!(
1753            scope_to_category("punctuation.definition.string.begin"),
1754            Some(HighlightCategory::String)
1755        );
1756        assert_eq!(
1757            scope_to_category("punctuation.definition.string.end"),
1758            Some(HighlightCategory::String)
1759        );
1760    }
1761
1762    #[test]
1763    fn test_punctuation_bracket() {
1764        // punctuation.section (TextMate standard for block delimiters)
1765        assert_eq!(
1766            scope_to_category("punctuation.section"),
1767            Some(HighlightCategory::PunctuationBracket)
1768        );
1769        assert_eq!(
1770            scope_to_category("punctuation.section.block.begin.c"),
1771            Some(HighlightCategory::PunctuationBracket)
1772        );
1773        assert_eq!(
1774            scope_to_category("punctuation.bracket"),
1775            Some(HighlightCategory::PunctuationBracket)
1776        );
1777        // punctuation.definition.* bracket-like scopes from sublime-syntax grammars
1778        assert_eq!(
1779            scope_to_category("punctuation.definition.array.begin.toml"),
1780            Some(HighlightCategory::PunctuationBracket)
1781        );
1782        assert_eq!(
1783            scope_to_category("punctuation.definition.block.code.typst"),
1784            Some(HighlightCategory::PunctuationBracket)
1785        );
1786        assert_eq!(
1787            scope_to_category("punctuation.definition.group.typst"),
1788            Some(HighlightCategory::PunctuationBracket)
1789        );
1790        assert_eq!(
1791            scope_to_category("punctuation.definition.inline-table.begin.toml"),
1792            Some(HighlightCategory::PunctuationBracket)
1793        );
1794        assert_eq!(
1795            scope_to_category("punctuation.definition.tag.end.svelte"),
1796            Some(HighlightCategory::PunctuationBracket)
1797        );
1798    }
1799
1800    #[test]
1801    fn test_punctuation_delimiter() {
1802        assert_eq!(
1803            scope_to_category("punctuation.separator"),
1804            Some(HighlightCategory::PunctuationDelimiter)
1805        );
1806        assert_eq!(
1807            scope_to_category("punctuation.terminator.statement.c"),
1808            Some(HighlightCategory::PunctuationDelimiter)
1809        );
1810        assert_eq!(
1811            scope_to_category("punctuation.accessor"),
1812            Some(HighlightCategory::PunctuationDelimiter)
1813        );
1814    }
1815}