Skip to main content

fresh/primitives/
highlight_engine.rs

1//! Unified highlighting engine
2//!
3//! This module provides a unified abstraction over different highlighting backends:
4//! - TextMate grammars via syntect (default for highlighting)
5//! - Tree-sitter (available via explicit preference, also used for non-highlighting features)
6//!
7//! # Backend Selection
8//! By default, syntect/TextMate is used for syntax highlighting because it provides
9//! broader language coverage. Tree-sitter language detection is still performed
10//! to support non-highlighting features like auto-indentation and semantic highlighting.
11//!
12//! # Non-Highlighting Features
13//! Even when using TextMate for highlighting, tree-sitter `Language` is detected
14//! and available via `.language()` for:
15//! - Auto-indentation (via IndentCalculator)
16//! - Semantic highlighting (variable scope tracking)
17//! - Other syntax-aware features
18
19use crate::model::buffer::Buffer;
20use crate::model::marker::{MarkerId, MarkerList};
21use crate::primitives::grammar::GrammarRegistry;
22use crate::primitives::highlighter::{
23    highlight_color, HighlightCategory, HighlightSpan, Highlighter, Language,
24};
25use crate::view::theme::Theme;
26use std::collections::HashMap;
27use std::ops::Range;
28use std::path::Path;
29use std::sync::Arc;
30use syntect::parsing::SyntaxSet;
31
32/// Map TextMate scope to highlight category
33fn scope_to_category(scope: &str) -> Option<HighlightCategory> {
34    let scope_lower = scope.to_lowercase();
35
36    // Comments - highest priority
37    if scope_lower.starts_with("comment") {
38        return Some(HighlightCategory::Comment);
39    }
40
41    // Strings
42    if scope_lower.starts_with("string") {
43        return Some(HighlightCategory::String);
44    }
45
46    // Markdown/markup scopes - handle before generic keyword/punctuation checks
47    // See: https://macromates.com/manual/en/language_grammars (TextMate scope naming)
48    // Headings: markup.heading and entity.name.section (used by syntect's markdown grammar)
49    if scope_lower.starts_with("markup.heading") || scope_lower.starts_with("entity.name.section") {
50        return Some(HighlightCategory::Keyword); // Headers styled like keywords (bold, prominent)
51    }
52    // Bold: markup.bold
53    if scope_lower.starts_with("markup.bold") {
54        return Some(HighlightCategory::Constant); // Bold styled like constants (bright)
55    }
56    // Italic: markup.italic
57    if scope_lower.starts_with("markup.italic") {
58        return Some(HighlightCategory::Variable); // Italic styled like variables
59    }
60    // Inline code and code blocks: markup.raw, markup.inline.raw
61    if scope_lower.starts_with("markup.raw") || scope_lower.starts_with("markup.inline.raw") {
62        return Some(HighlightCategory::String); // Code styled like strings
63    }
64    // Links: markup.underline.link
65    if scope_lower.starts_with("markup.underline.link") {
66        return Some(HighlightCategory::Function); // Links styled like functions (distinct color)
67    }
68    // Generic underline (often links)
69    if scope_lower.starts_with("markup.underline") {
70        return Some(HighlightCategory::Function);
71    }
72    // Block quotes: markup.quote
73    if scope_lower.starts_with("markup.quote") {
74        return Some(HighlightCategory::Comment); // Quotes styled like comments (subdued)
75    }
76    // Lists: markup.list
77    if scope_lower.starts_with("markup.list") {
78        return Some(HighlightCategory::Operator); // List markers styled like operators
79    }
80    // Strikethrough: markup.strikethrough
81    if scope_lower.starts_with("markup.strikethrough") {
82        return Some(HighlightCategory::Comment); // Strikethrough styled subdued
83    }
84
85    // Keywords
86    if scope_lower.starts_with("keyword.control")
87        || scope_lower.starts_with("keyword.other")
88        || scope_lower.starts_with("keyword.declaration")
89        || scope_lower.starts_with("keyword")
90    {
91        // keyword.operator should map to Operator, not Keyword
92        if !scope_lower.starts_with("keyword.operator") {
93            return Some(HighlightCategory::Keyword);
94        }
95    }
96
97    // Punctuation that belongs to a parent construct (comment/string delimiters)
98    // These must be checked before the generic punctuation rule below.
99    // TextMate grammars assign e.g. `punctuation.definition.comment` to # // /* etc.
100    if scope_lower.starts_with("punctuation.definition.comment") {
101        return Some(HighlightCategory::Comment);
102    }
103    if scope_lower.starts_with("punctuation.definition.string") {
104        return Some(HighlightCategory::String);
105    }
106
107    // Operators (keyword.operator only)
108    if scope_lower.starts_with("keyword.operator") {
109        return Some(HighlightCategory::Operator);
110    }
111
112    // Punctuation brackets ({, }, (, ), [, ], <, >)
113    // Covers punctuation.section.*, punctuation.bracket.*,
114    // and punctuation.definition.{array,block,brackets,group,inline-table,section,table,tag}
115    if scope_lower.starts_with("punctuation.section")
116        || scope_lower.starts_with("punctuation.bracket")
117        || scope_lower.starts_with("punctuation.definition.array")
118        || scope_lower.starts_with("punctuation.definition.block")
119        || scope_lower.starts_with("punctuation.definition.brackets")
120        || scope_lower.starts_with("punctuation.definition.group")
121        || scope_lower.starts_with("punctuation.definition.inline-table")
122        || scope_lower.starts_with("punctuation.definition.section")
123        || scope_lower.starts_with("punctuation.definition.table")
124        || scope_lower.starts_with("punctuation.definition.tag")
125    {
126        return Some(HighlightCategory::PunctuationBracket);
127    }
128
129    // Punctuation delimiters (;, ,, .)
130    if scope_lower.starts_with("punctuation.separator")
131        || scope_lower.starts_with("punctuation.terminator")
132        || scope_lower.starts_with("punctuation.accessor")
133    {
134        return Some(HighlightCategory::PunctuationDelimiter);
135    }
136
137    // Functions
138    if scope_lower.starts_with("entity.name.function")
139        || scope_lower.starts_with("support.function")
140        || scope_lower.starts_with("meta.function-call")
141        || scope_lower.starts_with("variable.function")
142    {
143        return Some(HighlightCategory::Function);
144    }
145
146    // Types
147    if scope_lower.starts_with("entity.name.type")
148        || scope_lower.starts_with("entity.name.class")
149        || scope_lower.starts_with("entity.name.struct")
150        || scope_lower.starts_with("entity.name.enum")
151        || scope_lower.starts_with("entity.name.interface")
152        || scope_lower.starts_with("entity.name.trait")
153        || scope_lower.starts_with("support.type")
154        || scope_lower.starts_with("support.class")
155        || scope_lower.starts_with("storage.type")
156    {
157        return Some(HighlightCategory::Type);
158    }
159
160    // Storage modifiers (pub, static, const as keywords)
161    if scope_lower.starts_with("storage.modifier") {
162        return Some(HighlightCategory::Keyword);
163    }
164
165    // Constants and numbers
166    if scope_lower.starts_with("constant.numeric")
167        || scope_lower.starts_with("constant.language.boolean")
168    {
169        return Some(HighlightCategory::Number);
170    }
171    if scope_lower.starts_with("constant") {
172        return Some(HighlightCategory::Constant);
173    }
174
175    // Variables
176    if scope_lower.starts_with("variable.parameter")
177        || scope_lower.starts_with("variable.other")
178        || scope_lower.starts_with("variable.language")
179    {
180        return Some(HighlightCategory::Variable);
181    }
182
183    // Properties / object keys
184    if scope_lower.starts_with("entity.name.tag")
185        || scope_lower.starts_with("support.other.property")
186        || scope_lower.starts_with("meta.object-literal.key")
187        || scope_lower.starts_with("variable.other.property")
188        || scope_lower.starts_with("variable.other.object.property")
189    {
190        return Some(HighlightCategory::Property);
191    }
192
193    // Attributes (decorators, annotations)
194    if scope_lower.starts_with("entity.other.attribute")
195        || scope_lower.starts_with("meta.attribute")
196        || scope_lower.starts_with("entity.name.decorator")
197    {
198        return Some(HighlightCategory::Attribute);
199    }
200
201    // Generic variable fallback
202    if scope_lower.starts_with("variable") {
203        return Some(HighlightCategory::Variable);
204    }
205
206    None
207}
208
209/// Unified highlighting engine supporting multiple backends
210#[derive(Default)]
211pub enum HighlightEngine {
212    /// Tree-sitter based highlighting (built-in languages)
213    TreeSitter(Box<Highlighter>),
214    /// TextMate grammar based highlighting
215    TextMate(Box<TextMateEngine>),
216    /// No highlighting available
217    #[default]
218    None,
219}
220
221/// TextMate highlighting engine with marker-based parse state checkpoints.
222///
223/// Syntect's parser is a sequential state machine that must process text from the
224/// start of the file to correctly track embedded language transitions (e.g. CSS
225/// inside HTML `<style>` tags).
226///
227/// Checkpoint positions are stored as markers in an internal `MarkerList` which
228/// automatically adjusts byte offsets when the buffer is edited. The associated
229/// `ParseState` + `ScopeStack` are stored in a side `HashMap`.
230///
231/// On edit, checkpoint positions auto-adjust and a `dirty_from` marker is set.
232/// On the next render, a convergence walk re-parses from the checkpoint before
233/// the dirty point forward, stopping as soon as the new parse state matches an
234/// existing checkpoint's stored state (VSCode-style convergence). This means
235/// most single-character edits only re-parse 1-2 checkpoints (~500 bytes).
236///
237/// For large files where no checkpoint reaches the viewport, we fall back to a
238/// fresh `ParseState` from `context_bytes` before the viewport.
239pub struct TextMateEngine {
240    syntax_set: Arc<SyntaxSet>,
241    syntax_index: usize,
242    /// Marker-based checkpoint positions. Markers auto-adjust on buffer edits.
243    checkpoint_markers: MarkerList,
244    /// Parse state stored per checkpoint marker.
245    checkpoint_states:
246        HashMap<MarkerId, (syntect::parsing::ParseState, syntect::parsing::ScopeStack)>,
247    /// Earliest byte offset where an edit may have invalidated parse state.
248    /// Consumed during the next highlight_viewport call.
249    dirty_from: Option<usize>,
250    /// Cached highlight spans for the last rendered viewport.
251    cache: Option<TextMateCache>,
252    last_buffer_len: usize,
253    /// Tree-sitter language for non-highlighting features (indentation, semantic highlighting)
254    ts_language: Option<Language>,
255    /// Performance counters for testing and diagnostics.
256    stats: HighlightStats,
257}
258
259/// Counters for monitoring highlighting performance in tests.
260#[derive(Debug, Default, Clone)]
261pub struct HighlightStats {
262    /// Number of bytes parsed by syntect (total across all highlight_viewport calls).
263    pub bytes_parsed: usize,
264    /// Number of highlight_viewport calls that hit the span cache.
265    pub cache_hits: usize,
266    /// Number of highlight_viewport calls that missed the cache and re-parsed.
267    pub cache_misses: usize,
268    /// Number of checkpoint states updated during convergence.
269    pub checkpoints_updated: usize,
270    /// Number of times convergence was detected (state matched existing checkpoint).
271    pub convergences: usize,
272}
273
274#[derive(Debug, Clone)]
275struct TextMateCache {
276    range: Range<usize>,
277    spans: Vec<CachedSpan>,
278}
279
280#[derive(Debug, Clone)]
281struct CachedSpan {
282    range: Range<usize>,
283    category: crate::primitives::highlighter::HighlightCategory,
284}
285
286/// Maximum bytes to parse in a single operation
287const MAX_PARSE_BYTES: usize = 1024 * 1024;
288
289/// Interval between parse state checkpoints (in bytes).
290/// 256 bytes ≈ every 4-8 lines of code. Convergence checks happen at each
291/// checkpoint, so smaller intervals mean faster convergence after edits.
292/// A 200KB file produces ~800 markers — well within MarkerList's O(log n) range.
293const CHECKPOINT_INTERVAL: usize = 256;
294
295impl TextMateEngine {
296    /// Create a new TextMate engine for the given syntax
297    pub fn new(syntax_set: Arc<SyntaxSet>, syntax_index: usize) -> Self {
298        Self {
299            syntax_set,
300            syntax_index,
301            checkpoint_markers: MarkerList::new(),
302            checkpoint_states: HashMap::new(),
303            dirty_from: None,
304            cache: None,
305            last_buffer_len: 0,
306            ts_language: None,
307            stats: HighlightStats::default(),
308        }
309    }
310
311    /// Create a new TextMate engine with a tree-sitter language for non-highlighting features
312    pub fn with_language(
313        syntax_set: Arc<SyntaxSet>,
314        syntax_index: usize,
315        ts_language: Option<Language>,
316    ) -> Self {
317        Self {
318            syntax_set,
319            syntax_index,
320            checkpoint_markers: MarkerList::new(),
321            checkpoint_states: HashMap::new(),
322            dirty_from: None,
323            cache: None,
324            last_buffer_len: 0,
325            ts_language,
326            stats: HighlightStats::default(),
327        }
328    }
329
330    /// Get performance stats for testing and diagnostics.
331    pub fn stats(&self) -> &HighlightStats {
332        &self.stats
333    }
334
335    /// Reset performance counters.
336    pub fn reset_stats(&mut self) {
337        self.stats = HighlightStats::default();
338    }
339
340    /// Get the tree-sitter language (for indentation, semantic highlighting, etc.)
341    pub fn language(&self) -> Option<&Language> {
342        self.ts_language.as_ref()
343    }
344
345    /// Notify the checkpoint system of a buffer insert. Markers auto-adjust positions.
346    /// Also shifts cached span byte offsets after the insert point so the span cache
347    /// remains valid for the partial-update / convergence path.
348    pub fn notify_insert(&mut self, position: usize, length: usize) {
349        self.checkpoint_markers.adjust_for_insert(position, length);
350        self.dirty_from = Some(self.dirty_from.map_or(position, |d| d.min(position)));
351        // Shift cached spans after the insert point
352        if let Some(cache) = &mut self.cache {
353            for span in &mut cache.spans {
354                if span.range.start >= position {
355                    span.range.start += length;
356                    span.range.end += length;
357                } else if span.range.end > position {
358                    // Span straddles the insert point — extend its end
359                    span.range.end += length;
360                }
361            }
362            if cache.range.end >= position {
363                cache.range.end += length;
364            }
365        }
366    }
367
368    /// Notify the checkpoint system of a buffer delete. Markers auto-adjust positions.
369    /// Also adjusts cached span byte offsets after the delete point.
370    pub fn notify_delete(&mut self, position: usize, length: usize) {
371        self.checkpoint_markers.adjust_for_delete(position, length);
372        self.dirty_from = Some(self.dirty_from.map_or(position, |d| d.min(position)));
373        // Adjust cached spans after the delete point
374        if let Some(cache) = &mut self.cache {
375            let delete_end = position + length;
376            cache.spans.retain_mut(|span| {
377                if span.range.start >= delete_end {
378                    // Span is entirely after the delete — shift back
379                    span.range.start -= length;
380                    span.range.end -= length;
381                    true
382                } else if span.range.end <= position {
383                    // Span is entirely before the delete — unchanged
384                    true
385                } else if span.range.start >= position && span.range.end <= delete_end {
386                    // Span is entirely within the deleted region — remove it
387                    false
388                } else {
389                    // Span partially overlaps — clamp and adjust
390                    if span.range.start < position {
391                        span.range.end = position.min(span.range.end);
392                    } else {
393                        span.range.start = position;
394                        span.range.end = position + span.range.end.saturating_sub(delete_end);
395                    }
396                    span.range.start < span.range.end
397                }
398            });
399            if cache.range.end > delete_end {
400                cache.range.end -= length;
401            } else if cache.range.end > position {
402                cache.range.end = position;
403            }
404        }
405    }
406
407    /// Highlight the visible viewport range.
408    ///
409    /// If the span cache is valid and there are no dirty edits, returns cached spans.
410    /// If there are dirty edits, re-parses only from the dirty point until convergence
411    /// (parse state matches an existing checkpoint), then splices the new spans into
412    /// the cache. This means most single-character edits only re-parse ~256-512 bytes.
413    pub fn highlight_viewport(
414        &mut self,
415        buffer: &Buffer,
416        viewport_start: usize,
417        viewport_end: usize,
418        theme: &Theme,
419        context_bytes: usize,
420    ) -> Vec<HighlightSpan> {
421        let desired_parse_start = viewport_start.saturating_sub(context_bytes);
422        let parse_end = (viewport_end + context_bytes).min(buffer.len());
423
424        // Check cache state. For a pure cache hit (no dirty edits), we also
425        // require buffer length to match. For partial updates (dirty_from set),
426        // we only need the cache to cover the viewport — the buffer length
427        // changed due to the edit, but we'll splice the dirty region.
428        let dirty = self.dirty_from.take();
429        let cache_covers_viewport = self.cache.as_ref().is_some_and(|c| {
430            c.range.start <= desired_parse_start && c.range.end >= desired_parse_start
431        });
432        let exact_cache_hit = cache_covers_viewport
433            && dirty.is_none()
434            && self.last_buffer_len == buffer.len()
435            && self
436                .cache
437                .as_ref()
438                .is_some_and(|c| c.range.end >= parse_end);
439
440        if exact_cache_hit {
441            // Pure cache hit — no dirty edits, cache covers viewport
442            self.stats.cache_hits += 1;
443            return self.filter_cached_spans(viewport_start, viewport_end, theme);
444        }
445
446        if cache_covers_viewport && dirty.is_some() {
447            if let Some(dirty_pos) = dirty {
448                if dirty_pos < parse_end {
449                    // Partial update: re-parse from dirty point until convergence,
450                    // splice new spans into existing cache
451                    if let Some(result) = self.try_partial_update(
452                        buffer,
453                        dirty_pos,
454                        desired_parse_start,
455                        parse_end,
456                        viewport_start,
457                        viewport_end,
458                        theme,
459                    ) {
460                        return result;
461                    }
462                    // Convergence failed within parse range — fall through to full re-parse
463                } else {
464                    // Dirty region beyond viewport — cache is still valid
465                    self.dirty_from = Some(dirty_pos);
466                    self.stats.cache_hits += 1;
467                    return self.filter_cached_spans(viewport_start, viewport_end, theme);
468                }
469            }
470        } else if let Some(d) = dirty {
471            // No usable cache and dirty — put dirty back, will do full parse
472            self.dirty_from = Some(d);
473        }
474
475        // Full re-parse (cold start or convergence failed)
476        self.full_parse(
477            buffer,
478            desired_parse_start,
479            parse_end,
480            viewport_start,
481            viewport_end,
482            theme,
483            context_bytes,
484        )
485    }
486
487    /// Filter cached spans for the viewport and resolve colors.
488    fn filter_cached_spans(
489        &self,
490        viewport_start: usize,
491        viewport_end: usize,
492        theme: &Theme,
493    ) -> Vec<HighlightSpan> {
494        let cache = self.cache.as_ref().unwrap();
495        cache
496            .spans
497            .iter()
498            .filter(|span| span.range.start < viewport_end && span.range.end > viewport_start)
499            .map(|span| HighlightSpan {
500                range: span.range.clone(),
501                color: highlight_color(span.category, theme),
502                category: Some(span.category),
503            })
504            .collect()
505    }
506
507    /// Try to do a partial update: re-parse from the dirty point until convergence,
508    /// then splice new spans into the cache. Returns None if convergence doesn't
509    /// happen within parse_end (caller should fall back to full re-parse).
510    #[allow(clippy::too_many_arguments)]
511    fn try_partial_update(
512        &mut self,
513        buffer: &Buffer,
514        dirty_pos: usize,
515        desired_parse_start: usize,
516        parse_end: usize,
517        viewport_start: usize,
518        viewport_end: usize,
519        theme: &Theme,
520    ) -> Option<Vec<HighlightSpan>> {
521        let syntax = &self.syntax_set.syntaxes()[self.syntax_index];
522
523        // Find checkpoint before the dirty point (bounded search)
524        let (actual_start, mut state, mut current_scopes) = {
525            let search_start = dirty_pos.saturating_sub(MAX_PARSE_BYTES);
526            let markers = self.checkpoint_markers.query_range(search_start, dirty_pos);
527            let nearest = markers.into_iter().max_by_key(|(_, start, _)| *start);
528            if let Some((id, cp_pos, _)) = nearest {
529                if let Some((s, sc)) = self.checkpoint_states.get(&id) {
530                    (cp_pos, s.clone(), sc.clone())
531                } else {
532                    return None; // orphan, fall back
533                }
534            } else if parse_end <= MAX_PARSE_BYTES {
535                (
536                    0,
537                    syntect::parsing::ParseState::new(syntax),
538                    syntect::parsing::ScopeStack::new(),
539                )
540            } else {
541                return None; // large file, no nearby checkpoint, fall back
542            }
543        };
544
545        // Get markers from dirty point forward for convergence checking
546        let mut markers_ahead: Vec<(MarkerId, usize)> = self
547            .checkpoint_markers
548            .query_range(dirty_pos, parse_end)
549            .into_iter()
550            .map(|(id, start, _)| (id, start))
551            .collect();
552        markers_ahead.sort_by_key(|(_, pos)| *pos);
553        let mut marker_idx = 0;
554
555        // Parse from actual_start to parse_end, looking for convergence
556        let content_end = parse_end.min(buffer.len());
557        if actual_start >= content_end {
558            return None;
559        }
560        let content = buffer.slice_bytes(actual_start..content_end);
561        let content_str = match std::str::from_utf8(&content) {
562            Ok(s) => s,
563            Err(_) => return None,
564        };
565
566        let mut new_spans = Vec::new();
567        let content_bytes = content_str.as_bytes();
568        let mut pos = 0;
569        let mut current_offset = actual_start;
570        let mut converged_at: Option<usize> = None;
571        let mut bytes_since_checkpoint: usize = 0;
572
573        while pos < content_bytes.len() {
574            // Create checkpoints in new territory
575            if bytes_since_checkpoint >= CHECKPOINT_INTERVAL {
576                let nearby = self.checkpoint_markers.query_range(
577                    current_offset.saturating_sub(CHECKPOINT_INTERVAL / 2),
578                    current_offset + CHECKPOINT_INTERVAL / 2,
579                );
580                if nearby.is_empty() {
581                    let marker_id = self.checkpoint_markers.create(current_offset, true);
582                    self.checkpoint_states
583                        .insert(marker_id, (state.clone(), current_scopes.clone()));
584                }
585                bytes_since_checkpoint = 0;
586            }
587
588            let line_start = pos;
589            let mut line_end = pos;
590            while line_end < content_bytes.len() {
591                if content_bytes[line_end] == b'\n' {
592                    line_end += 1;
593                    break;
594                } else if content_bytes[line_end] == b'\r' {
595                    if line_end + 1 < content_bytes.len() && content_bytes[line_end + 1] == b'\n' {
596                        line_end += 2;
597                    } else {
598                        line_end += 1;
599                    }
600                    break;
601                }
602                line_end += 1;
603            }
604
605            let line_bytes = &content_bytes[line_start..line_end];
606            let actual_line_byte_len = line_bytes.len();
607
608            let line_str = match std::str::from_utf8(line_bytes) {
609                Ok(s) => s,
610                Err(_) => {
611                    pos = line_end;
612                    current_offset += actual_line_byte_len;
613                    bytes_since_checkpoint += actual_line_byte_len;
614                    continue;
615                }
616            };
617
618            let line_content = line_str.trim_end_matches(&['\r', '\n'][..]);
619            let line_for_syntect = if line_end < content_bytes.len() || line_str.ends_with('\n') {
620                format!("{}\n", line_content)
621            } else {
622                line_content.to_string()
623            };
624
625            let ops = match state.parse_line(&line_for_syntect, &self.syntax_set) {
626                Ok(ops) => ops,
627                Err(_) => {
628                    pos = line_end;
629                    current_offset += actual_line_byte_len;
630                    bytes_since_checkpoint += actual_line_byte_len;
631                    continue;
632                }
633            };
634
635            // Collect spans for the dirty region
636            let collect_spans =
637                current_offset + actual_line_byte_len > desired_parse_start.max(actual_start);
638            let mut syntect_offset = 0;
639            let line_content_len = line_content.len();
640
641            for (op_offset, op) in ops {
642                let clamped_op_offset = op_offset.min(line_content_len);
643                if collect_spans && clamped_op_offset > syntect_offset {
644                    if let Some(category) = Self::scope_stack_to_category(&current_scopes) {
645                        let byte_start = current_offset + syntect_offset;
646                        let byte_end = current_offset + clamped_op_offset;
647                        let clamped_start = byte_start.max(actual_start);
648                        if clamped_start < byte_end {
649                            new_spans.push(CachedSpan {
650                                range: clamped_start..byte_end,
651                                category,
652                            });
653                        }
654                    }
655                }
656                syntect_offset = clamped_op_offset;
657                #[allow(clippy::let_underscore_must_use)]
658                let _ = current_scopes.apply(&op);
659            }
660
661            if collect_spans && syntect_offset < line_content_len {
662                if let Some(category) = Self::scope_stack_to_category(&current_scopes) {
663                    let byte_start = current_offset + syntect_offset;
664                    let byte_end = current_offset + line_content_len;
665                    let clamped_start = byte_start.max(actual_start);
666                    if clamped_start < byte_end {
667                        new_spans.push(CachedSpan {
668                            range: clamped_start..byte_end,
669                            category,
670                        });
671                    }
672                }
673            }
674
675            pos = line_end;
676            current_offset += actual_line_byte_len;
677            bytes_since_checkpoint += actual_line_byte_len;
678
679            // Check convergence at checkpoint markers
680            while marker_idx < markers_ahead.len() && markers_ahead[marker_idx].1 <= current_offset
681            {
682                let (marker_id, _) = markers_ahead[marker_idx];
683                marker_idx += 1;
684                if let Some(stored) = self.checkpoint_states.get(&marker_id) {
685                    if *stored == (state.clone(), current_scopes.clone()) {
686                        self.stats.convergences += 1;
687                        converged_at = Some(current_offset);
688                        break;
689                    }
690                }
691                self.stats.checkpoints_updated += 1;
692                self.checkpoint_states
693                    .insert(marker_id, (state.clone(), current_scopes.clone()));
694            }
695
696            if converged_at.is_some() {
697                break;
698            }
699        }
700
701        self.stats.bytes_parsed += current_offset.saturating_sub(actual_start);
702
703        let convergence_point = converged_at?; // None → fall back to full parse
704
705        self.stats.cache_misses += 1; // partial update counts as a miss
706
707        // Splice: replace spans in [actual_start..convergence_point] with new_spans,
708        // keep everything outside that range from the existing cache.
709        Self::merge_adjacent_spans(&mut new_spans);
710
711        if let Some(cache) = &mut self.cache {
712            // Remove old spans that overlap the re-parsed region
713            let splice_start = actual_start;
714            let splice_end = convergence_point;
715            cache
716                .spans
717                .retain(|span| span.range.end <= splice_start || span.range.start >= splice_end);
718            // Insert new spans and re-sort by range start
719            cache.spans.extend(new_spans);
720            cache.spans.sort_by_key(|s| s.range.start);
721            Self::merge_adjacent_spans(&mut cache.spans);
722        }
723
724        self.last_buffer_len = buffer.len();
725
726        Some(self.filter_cached_spans(viewport_start, viewport_end, theme))
727    }
728
729    /// Full re-parse from desired_parse_start to parse_end. Used on cold start
730    /// or when partial update fails (no convergence).
731    #[allow(clippy::too_many_arguments)]
732    fn full_parse(
733        &mut self,
734        buffer: &Buffer,
735        desired_parse_start: usize,
736        parse_end: usize,
737        viewport_start: usize,
738        viewport_end: usize,
739        theme: &Theme,
740        _context_bytes: usize,
741    ) -> Vec<HighlightSpan> {
742        self.stats.cache_misses += 1;
743        self.dirty_from = None; // consumed
744
745        if parse_end <= desired_parse_start {
746            return Vec::new();
747        }
748
749        let syntax = &self.syntax_set.syntaxes()[self.syntax_index];
750        let (actual_start, mut state, mut current_scopes, create_checkpoints) =
751            self.find_parse_resume_point(desired_parse_start, parse_end, syntax);
752
753        let content = buffer.slice_bytes(actual_start..parse_end);
754        let content_str = match std::str::from_utf8(&content) {
755            Ok(s) => s,
756            Err(_) => return Vec::new(),
757        };
758
759        let mut spans = Vec::new();
760        let content_bytes = content_str.as_bytes();
761        let mut pos = 0;
762        let mut current_offset = actual_start;
763        let mut bytes_since_checkpoint: usize = 0;
764
765        while pos < content_bytes.len() {
766            if create_checkpoints && bytes_since_checkpoint >= CHECKPOINT_INTERVAL {
767                let nearby = self.checkpoint_markers.query_range(
768                    current_offset.saturating_sub(CHECKPOINT_INTERVAL / 2),
769                    current_offset + CHECKPOINT_INTERVAL / 2,
770                );
771                if nearby.is_empty() {
772                    let marker_id = self.checkpoint_markers.create(current_offset, true);
773                    self.checkpoint_states
774                        .insert(marker_id, (state.clone(), current_scopes.clone()));
775                }
776                bytes_since_checkpoint = 0;
777            }
778
779            let line_start = pos;
780            let mut line_end = pos;
781
782            while line_end < content_bytes.len() {
783                if content_bytes[line_end] == b'\n' {
784                    line_end += 1;
785                    break;
786                } else if content_bytes[line_end] == b'\r' {
787                    if line_end + 1 < content_bytes.len() && content_bytes[line_end + 1] == b'\n' {
788                        line_end += 2;
789                    } else {
790                        line_end += 1;
791                    }
792                    break;
793                }
794                line_end += 1;
795            }
796
797            let line_bytes = &content_bytes[line_start..line_end];
798            let actual_line_byte_len = line_bytes.len();
799
800            let line_str = match std::str::from_utf8(line_bytes) {
801                Ok(s) => s,
802                Err(_) => {
803                    pos = line_end;
804                    current_offset += actual_line_byte_len;
805                    bytes_since_checkpoint += actual_line_byte_len;
806                    continue;
807                }
808            };
809
810            let line_content = line_str.trim_end_matches(&['\r', '\n'][..]);
811            let line_for_syntect = if line_end < content_bytes.len() || line_str.ends_with('\n') {
812                format!("{}\n", line_content)
813            } else {
814                line_content.to_string()
815            };
816
817            let ops = match state.parse_line(&line_for_syntect, &self.syntax_set) {
818                Ok(ops) => ops,
819                Err(_) => {
820                    pos = line_end;
821                    current_offset += actual_line_byte_len;
822                    bytes_since_checkpoint += actual_line_byte_len;
823                    continue;
824                }
825            };
826
827            let collect_spans = current_offset + actual_line_byte_len > desired_parse_start;
828            let mut syntect_offset = 0;
829            let line_content_len = line_content.len();
830
831            for (op_offset, op) in ops {
832                let clamped_op_offset = op_offset.min(line_content_len);
833                if collect_spans && clamped_op_offset > syntect_offset {
834                    if let Some(category) = Self::scope_stack_to_category(&current_scopes) {
835                        let byte_start = current_offset + syntect_offset;
836                        let byte_end = current_offset + clamped_op_offset;
837                        let clamped_start = byte_start.max(desired_parse_start);
838                        if clamped_start < byte_end {
839                            spans.push(CachedSpan {
840                                range: clamped_start..byte_end,
841                                category,
842                            });
843                        }
844                    }
845                }
846                syntect_offset = clamped_op_offset;
847                #[allow(clippy::let_underscore_must_use)]
848                let _ = current_scopes.apply(&op);
849            }
850
851            if collect_spans && syntect_offset < line_content_len {
852                if let Some(category) = Self::scope_stack_to_category(&current_scopes) {
853                    let byte_start = current_offset + syntect_offset;
854                    let byte_end = current_offset + line_content_len;
855                    let clamped_start = byte_start.max(desired_parse_start);
856                    if clamped_start < byte_end {
857                        spans.push(CachedSpan {
858                            range: clamped_start..byte_end,
859                            category,
860                        });
861                    }
862                }
863            }
864
865            pos = line_end;
866            current_offset += actual_line_byte_len;
867            bytes_since_checkpoint += actual_line_byte_len;
868
869            // Update checkpoint states as we pass them
870            let markers_here: Vec<(MarkerId, usize)> = self
871                .checkpoint_markers
872                .query_range(
873                    current_offset.saturating_sub(actual_line_byte_len),
874                    current_offset,
875                )
876                .into_iter()
877                .map(|(id, start, _)| (id, start))
878                .collect();
879            for (marker_id, _) in markers_here {
880                self.checkpoint_states
881                    .insert(marker_id, (state.clone(), current_scopes.clone()));
882            }
883        }
884
885        self.stats.bytes_parsed += parse_end.saturating_sub(actual_start);
886
887        Self::merge_adjacent_spans(&mut spans);
888
889        self.cache = Some(TextMateCache {
890            range: desired_parse_start..parse_end,
891            spans: spans.clone(),
892        });
893        self.last_buffer_len = buffer.len();
894
895        spans
896            .into_iter()
897            .filter(|span| span.range.start < viewport_end && span.range.end > viewport_start)
898            .map(|span| {
899                let cat = span.category;
900                HighlightSpan {
901                    range: span.range,
902                    color: highlight_color(cat, theme),
903                    category: Some(cat),
904                }
905            })
906            .collect()
907    }
908
909    /// Find the best point to resume parsing from for the viewport.
910    fn find_parse_resume_point(
911        &self,
912        desired_start: usize,
913        parse_end: usize,
914        syntax: &syntect::parsing::SyntaxReference,
915    ) -> (
916        usize,
917        syntect::parsing::ParseState,
918        syntect::parsing::ScopeStack,
919        bool,
920    ) {
921        use syntect::parsing::{ParseState, ScopeStack};
922
923        // Look for a checkpoint near the desired start. For large files, only
924        // consider checkpoints that are within MAX_PARSE_BYTES of desired_start
925        // to avoid parsing hundreds of MB from a distant checkpoint.
926        let search_start = desired_start.saturating_sub(MAX_PARSE_BYTES);
927        let markers = self
928            .checkpoint_markers
929            .query_range(search_start, desired_start + 1);
930        let nearest = markers.into_iter().max_by_key(|(_, start, _)| *start);
931
932        if let Some((id, cp_pos, _)) = nearest {
933            if let Some((s, sc)) = self.checkpoint_states.get(&id) {
934                return (cp_pos, s.clone(), sc.clone(), true);
935            }
936        }
937
938        if parse_end <= MAX_PARSE_BYTES {
939            // File is small enough to parse from byte 0
940            (0, ParseState::new(syntax), ScopeStack::new(), true)
941        } else {
942            // Large file, no nearby checkpoint — start fresh from desired_start.
943            // Still create checkpoints so future visits to this region can resume.
944            (
945                desired_start,
946                ParseState::new(syntax),
947                ScopeStack::new(),
948                true,
949            )
950        }
951    }
952
953    /// Map scope stack to highlight category
954    fn scope_stack_to_category(scopes: &syntect::parsing::ScopeStack) -> Option<HighlightCategory> {
955        for scope in scopes.as_slice().iter().rev() {
956            let scope_str = scope.build_string();
957            if let Some(cat) = scope_to_category(&scope_str) {
958                return Some(cat);
959            }
960        }
961        None
962    }
963
964    /// Merge adjacent spans with same category
965    fn merge_adjacent_spans(spans: &mut Vec<CachedSpan>) {
966        if spans.len() < 2 {
967            return;
968        }
969
970        let mut write_idx = 0;
971        for read_idx in 1..spans.len() {
972            if spans[write_idx].category == spans[read_idx].category
973                && spans[write_idx].range.end == spans[read_idx].range.start
974            {
975                spans[write_idx].range.end = spans[read_idx].range.end;
976            } else {
977                write_idx += 1;
978                if write_idx != read_idx {
979                    spans[write_idx] = spans[read_idx].clone();
980                }
981            }
982        }
983        spans.truncate(write_idx + 1);
984    }
985
986    /// Invalidate span cache for an edited range.
987    /// Checkpoint positions are handled by notify_insert/notify_delete.
988    /// The span cache is NOT cleared here — it will be patched (partial update)
989    /// during the next highlight_viewport call using convergence. Only dirty_from
990    /// (set by notify_insert/notify_delete) controls re-parsing scope.
991    pub fn invalidate_range(&mut self, _edit_range: Range<usize>) {
992        // Intentionally does NOT clear self.cache.
993        // The cache will be partially updated in highlight_viewport when
994        // dirty_from is set. This avoids full re-parses for small edits.
995    }
996
997    /// Invalidate all cache and checkpoints (file reload, language change, etc.)
998    pub fn invalidate_all(&mut self) {
999        self.cache = None;
1000        let ids: Vec<MarkerId> = self.checkpoint_states.keys().copied().collect();
1001        for id in ids {
1002            self.checkpoint_markers.delete(id);
1003        }
1004        self.checkpoint_states.clear();
1005        self.dirty_from = None;
1006    }
1007
1008    /// Get the highlight category at a byte position from the cache.
1009    ///
1010    /// Returns the category if the position falls within a cached highlight span.
1011    /// The position must be within the last highlighted viewport range for a result.
1012    pub fn category_at_position(&self, position: usize) -> Option<HighlightCategory> {
1013        let cache = self.cache.as_ref()?;
1014        cache
1015            .spans
1016            .iter()
1017            .find(|span| span.range.start <= position && position < span.range.end)
1018            .map(|span| span.category)
1019    }
1020
1021    /// Get syntax name
1022    pub fn syntax_name(&self) -> &str {
1023        &self.syntax_set.syntaxes()[self.syntax_index].name
1024    }
1025}
1026
1027impl HighlightEngine {
1028    /// Build a highlighting engine for a catalog entry.
1029    ///
1030    /// Single chokepoint for the "prefer syntect, fall back to tree-sitter"
1031    /// logic. Callers that start from a path or a syntax name should resolve
1032    /// the entry through `GrammarRegistry::find_by_path` / `find_by_name` and
1033    /// then call this.
1034    pub fn from_entry(
1035        entry: &crate::primitives::grammar::GrammarEntry,
1036        registry: &GrammarRegistry,
1037    ) -> Self {
1038        let syntax_set = registry.syntax_set_arc();
1039        if let Some(index) = entry.engines.syntect {
1040            return Self::TextMate(Box::new(TextMateEngine::with_language(
1041                syntax_set,
1042                index,
1043                entry.engines.tree_sitter,
1044            )));
1045        }
1046        if let Some(lang) = entry.engines.tree_sitter {
1047            if let Ok(highlighter) = Highlighter::new(lang) {
1048                return Self::TreeSitter(Box::new(highlighter));
1049            }
1050        }
1051        Self::None
1052    }
1053
1054    /// Create a highlighting engine for a file.
1055    ///
1056    /// Thin wrapper around `from_entry` that resolves the path via the catalog.
1057    /// User-config-declared filename/extension mappings are honoured as long as
1058    /// `GrammarRegistry::apply_language_config` has been called on the registry.
1059    /// `first_line` is used for shebang / first-line regex fallback — pass
1060    /// `None` when no content is available.
1061    pub fn for_file(path: &Path, first_line: Option<&str>, registry: &GrammarRegistry) -> Self {
1062        if let Some(entry) = registry.find_by_path(path, first_line) {
1063            return Self::from_entry(entry, registry);
1064        }
1065        Self::None
1066    }
1067
1068    /// Create a highlighting engine for a syntax by name.
1069    ///
1070    /// Thin wrapper around `from_entry` that performs the lookup via
1071    /// `find_by_name`. The catalog entry already knows which tree-sitter
1072    /// `Language` (if any) serves it, so no separate hint is needed.
1073    pub fn for_syntax_name(name: &str, registry: &GrammarRegistry) -> Self {
1074        if let Some(entry) = registry.find_by_name(name) {
1075            return Self::from_entry(entry, registry);
1076        }
1077        Self::None
1078    }
1079
1080    /// Highlight the visible viewport
1081    ///
1082    /// `context_bytes` controls how far before/after the viewport to parse for accurate
1083    /// highlighting of multi-line constructs (strings, comments, nested blocks).
1084    pub fn highlight_viewport(
1085        &mut self,
1086        buffer: &Buffer,
1087        viewport_start: usize,
1088        viewport_end: usize,
1089        theme: &Theme,
1090        context_bytes: usize,
1091    ) -> Vec<HighlightSpan> {
1092        match self {
1093            Self::TreeSitter(h) => {
1094                h.highlight_viewport(buffer, viewport_start, viewport_end, theme, context_bytes)
1095            }
1096            Self::TextMate(h) => {
1097                h.highlight_viewport(buffer, viewport_start, viewport_end, theme, context_bytes)
1098            }
1099            Self::None => Vec::new(),
1100        }
1101    }
1102
1103    /// Notify the highlighting engine of a buffer insert (for checkpoint position tracking).
1104    pub fn notify_insert(&mut self, position: usize, length: usize) {
1105        if let Self::TextMate(h) = self {
1106            h.notify_insert(position, length);
1107        }
1108    }
1109
1110    /// Notify the highlighting engine of a buffer delete (for checkpoint position tracking).
1111    pub fn notify_delete(&mut self, position: usize, length: usize) {
1112        if let Self::TextMate(h) = self {
1113            h.notify_delete(position, length);
1114        }
1115    }
1116
1117    /// Invalidate cache for an edited range
1118    pub fn invalidate_range(&mut self, edit_range: Range<usize>) {
1119        match self {
1120            Self::TreeSitter(h) => h.invalidate_range(edit_range),
1121            Self::TextMate(h) => h.invalidate_range(edit_range),
1122            Self::None => {}
1123        }
1124    }
1125
1126    /// Invalidate entire cache
1127    pub fn invalidate_all(&mut self) {
1128        match self {
1129            Self::TreeSitter(h) => h.invalidate_all(),
1130            Self::TextMate(h) => h.invalidate_all(),
1131            Self::None => {}
1132        }
1133    }
1134
1135    /// Check if this engine has highlighting available
1136    pub fn has_highlighting(&self) -> bool {
1137        !matches!(self, Self::None)
1138    }
1139
1140    /// Get a description of the active backend
1141    pub fn backend_name(&self) -> &str {
1142        match self {
1143            Self::TreeSitter(_) => "tree-sitter",
1144            Self::TextMate(_) => "textmate",
1145            Self::None => "none",
1146        }
1147    }
1148
1149    /// Get performance stats (TextMate engine only).
1150    pub fn highlight_stats(&self) -> Option<&HighlightStats> {
1151        if let Self::TextMate(h) = self {
1152            Some(h.stats())
1153        } else {
1154            None
1155        }
1156    }
1157
1158    /// Reset performance counters.
1159    pub fn reset_highlight_stats(&mut self) {
1160        if let Self::TextMate(h) = self {
1161            h.reset_stats();
1162        }
1163    }
1164
1165    /// Get the language/syntax name if available
1166    pub fn syntax_name(&self) -> Option<&str> {
1167        match self {
1168            Self::TreeSitter(_) => None, // Tree-sitter doesn't expose name easily
1169            Self::TextMate(h) => Some(h.syntax_name()),
1170            Self::None => None,
1171        }
1172    }
1173
1174    /// Get the highlight category at a byte position from the cache.
1175    ///
1176    /// Returns the category if the position falls within a cached highlight span.
1177    /// Useful for detecting whether the cursor is inside a string, comment, etc.
1178    pub fn category_at_position(&self, position: usize) -> Option<HighlightCategory> {
1179        match self {
1180            Self::TreeSitter(h) => h.category_at_position(position),
1181            Self::TextMate(h) => h.category_at_position(position),
1182            Self::None => None,
1183        }
1184    }
1185
1186    /// Get the tree-sitter Language for non-highlighting features
1187    /// Returns the language even when using TextMate for highlighting
1188    pub fn language(&self) -> Option<&Language> {
1189        match self {
1190            Self::TreeSitter(h) => Some(h.language()),
1191            Self::TextMate(h) => h.language(),
1192            Self::None => None,
1193        }
1194    }
1195}
1196
1197/// Highlight a code string using syntect (for markdown code blocks, hover popups, etc.)
1198/// Returns spans with byte ranges relative to the input string.
1199///
1200/// This uses TextMate grammars via syntect which provides broader language coverage
1201/// than tree-sitter (~150+ languages vs ~17).
1202pub fn highlight_string(
1203    code: &str,
1204    lang_hint: &str,
1205    registry: &GrammarRegistry,
1206    theme: &Theme,
1207) -> Vec<HighlightSpan> {
1208    use syntect::parsing::{ParseState, ScopeStack};
1209
1210    // Find syntax by language token (handles aliases like "py" -> Python)
1211    let syntax = match registry.syntax_set().find_syntax_by_token(lang_hint) {
1212        Some(s) => s,
1213        None => return Vec::new(),
1214    };
1215
1216    let syntax_set = registry.syntax_set();
1217    let mut state = ParseState::new(syntax);
1218    let mut spans = Vec::new();
1219    let mut current_scopes = ScopeStack::new();
1220    let mut current_offset = 0;
1221
1222    // Parse line by line
1223    for line in code.split_inclusive('\n') {
1224        let line_start = current_offset;
1225        let line_len = line.len();
1226
1227        // Remove trailing newline for syntect, then add it back
1228        let line_content = line.trim_end_matches(&['\r', '\n'][..]);
1229        let line_for_syntect = if line.ends_with('\n') {
1230            format!("{}\n", line_content)
1231        } else {
1232            line_content.to_string()
1233        };
1234
1235        let ops = match state.parse_line(&line_for_syntect, syntax_set) {
1236            Ok(ops) => ops,
1237            Err(_) => {
1238                current_offset += line_len;
1239                continue;
1240            }
1241        };
1242
1243        let mut syntect_offset = 0;
1244        let line_content_len = line_content.len();
1245
1246        for (op_offset, op) in ops {
1247            let clamped_op_offset = op_offset.min(line_content_len);
1248            if clamped_op_offset > syntect_offset {
1249                if let Some(category) = scope_stack_to_category(&current_scopes) {
1250                    let byte_start = line_start + syntect_offset;
1251                    let byte_end = line_start + clamped_op_offset;
1252                    if byte_start < byte_end {
1253                        spans.push(HighlightSpan {
1254                            range: byte_start..byte_end,
1255                            color: highlight_color(category, theme),
1256                            category: Some(category),
1257                        });
1258                    }
1259                }
1260            }
1261            syntect_offset = clamped_op_offset;
1262            // Scope stack errors are non-fatal for highlighting
1263            #[allow(clippy::let_underscore_must_use)]
1264            let _ = current_scopes.apply(&op);
1265        }
1266
1267        // Handle remaining text on line
1268        if syntect_offset < line_content_len {
1269            if let Some(category) = scope_stack_to_category(&current_scopes) {
1270                let byte_start = line_start + syntect_offset;
1271                let byte_end = line_start + line_content_len;
1272                if byte_start < byte_end {
1273                    spans.push(HighlightSpan {
1274                        range: byte_start..byte_end,
1275                        color: highlight_color(category, theme),
1276                        category: Some(category),
1277                    });
1278                }
1279            }
1280        }
1281
1282        current_offset += line_len;
1283    }
1284
1285    // Merge adjacent spans with same color
1286    merge_adjacent_highlight_spans(&mut spans);
1287
1288    spans
1289}
1290
1291/// Map scope stack to highlight category (for highlight_string)
1292fn scope_stack_to_category(scopes: &syntect::parsing::ScopeStack) -> Option<HighlightCategory> {
1293    for scope in scopes.as_slice().iter().rev() {
1294        let scope_str = scope.build_string();
1295        if let Some(cat) = scope_to_category(&scope_str) {
1296            return Some(cat);
1297        }
1298    }
1299    None
1300}
1301
1302/// Merge adjacent spans with same color
1303fn merge_adjacent_highlight_spans(spans: &mut Vec<HighlightSpan>) {
1304    if spans.len() < 2 {
1305        return;
1306    }
1307
1308    let mut write_idx = 0;
1309    for read_idx in 1..spans.len() {
1310        if spans[write_idx].color == spans[read_idx].color
1311            && spans[write_idx].range.end == spans[read_idx].range.start
1312        {
1313            spans[write_idx].range.end = spans[read_idx].range.end;
1314        } else {
1315            write_idx += 1;
1316            if write_idx != read_idx {
1317                spans[write_idx] = spans[read_idx].clone();
1318            }
1319        }
1320    }
1321    spans.truncate(write_idx + 1);
1322}
1323
1324#[cfg(test)]
1325mod tests {
1326    use crate::model::filesystem::StdFileSystem;
1327    use std::sync::Arc;
1328
1329    fn test_fs() -> Arc<dyn crate::model::filesystem::FileSystem + Send + Sync> {
1330        Arc::new(StdFileSystem)
1331    }
1332    use super::*;
1333    use crate::view::theme;
1334
1335    #[test]
1336    fn test_highlight_engine_default() {
1337        let engine = HighlightEngine::default();
1338        assert!(!engine.has_highlighting());
1339        assert_eq!(engine.backend_name(), "none");
1340    }
1341
1342    #[test]
1343    fn test_textmate_backend_selection() {
1344        let registry =
1345            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1346
1347        // Languages with TextMate grammars use TextMate for highlighting
1348        let engine = HighlightEngine::for_file(Path::new("test.rs"), None, &registry);
1349        assert_eq!(engine.backend_name(), "textmate");
1350        // Tree-sitter language should still be detected for other features
1351        assert!(engine.language().is_some());
1352
1353        let engine = HighlightEngine::for_file(Path::new("test.py"), None, &registry);
1354        assert_eq!(engine.backend_name(), "textmate");
1355        assert!(engine.language().is_some());
1356
1357        let engine = HighlightEngine::for_file(Path::new("test.js"), None, &registry);
1358        assert_eq!(engine.backend_name(), "textmate");
1359        assert!(engine.language().is_some());
1360
1361        // TypeScript falls back to tree-sitter (syntect doesn't include TS by default)
1362        let engine = HighlightEngine::for_file(Path::new("test.ts"), None, &registry);
1363        assert_eq!(engine.backend_name(), "tree-sitter");
1364        assert!(engine.language().is_some());
1365
1366        let engine = HighlightEngine::for_file(Path::new("test.tsx"), None, &registry);
1367        assert_eq!(engine.backend_name(), "tree-sitter");
1368        assert!(engine.language().is_some());
1369    }
1370
1371    #[test]
1372    fn test_tree_sitter_direct() {
1373        // Verify tree-sitter highlighter can be created directly for Rust
1374        let highlighter = Highlighter::new(Language::Rust);
1375        assert!(highlighter.is_ok());
1376    }
1377
1378    #[test]
1379    fn test_unknown_extension() {
1380        let registry =
1381            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1382
1383        // Unknown extension
1384        let engine = HighlightEngine::for_file(Path::new("test.unknown_xyz_123"), None, &registry);
1385        // Might be none or might find something via syntect
1386        // Just verify it doesn't panic
1387        let _ = engine.backend_name();
1388    }
1389
1390    #[test]
1391    fn test_highlight_viewport_empty_buffer_no_panic() {
1392        // Regression test: calling highlight_viewport with an empty buffer
1393        // and non-zero viewport range previously caused subtraction overflow panic.
1394        //
1395        // The bug occurred when:
1396        // - buffer is empty (len = 0)
1397        // - viewport_start > context_bytes (so parse_start > 0 after saturating_sub)
1398        // - parse_end = min(viewport_end + context_bytes, buffer.len()) = 0
1399        // - parse_end - parse_start would underflow (0 - positive = overflow)
1400        let registry =
1401            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1402
1403        let mut engine = HighlightEngine::for_file(Path::new("test.rs"), None, &registry);
1404
1405        // Create empty buffer
1406        let buffer = Buffer::from_str("", 0, test_fs());
1407        let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
1408
1409        // Test the specific case that triggered the overflow:
1410        // viewport_start=100, context_bytes=10 => parse_start=90, parse_end=0
1411        // 0 - 90 = overflow!
1412        if let HighlightEngine::TextMate(ref mut tm) = engine {
1413            // Small context_bytes so parse_start remains > 0
1414            let spans = tm.highlight_viewport(&buffer, 100, 200, &theme, 10);
1415            assert!(spans.is_empty());
1416        }
1417    }
1418
1419    /// Test that TextMateEngine produces correct byte offsets for CRLF content.
1420    /// This is a regression test for a bug where using str::lines() caused 1-byte
1421    /// offset drift per line because it strips line terminators.
1422    #[test]
1423    fn test_textmate_engine_crlf_byte_offsets() {
1424        let registry =
1425            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1426
1427        let mut engine = HighlightEngine::for_file(Path::new("test.java"), None, &registry);
1428
1429        // Create CRLF content with keywords on each line
1430        // Each "public" keyword should be highlighted at byte positions:
1431        // Line 1: "public" at bytes 0-5
1432        // Line 2: "public" at bytes 8-13 (after "public\r\n" = 8 bytes)
1433        // Line 3: "public" at bytes 16-21 (after two "public\r\n" = 16 bytes)
1434        let content = b"public\r\npublic\r\npublic\r\n";
1435        let buffer = Buffer::from_bytes(content.to_vec(), test_fs());
1436        let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
1437
1438        if let HighlightEngine::TextMate(ref mut tm) = engine {
1439            // Highlight the entire content
1440            let spans = tm.highlight_viewport(&buffer, 0, content.len(), &theme, 0);
1441
1442            // Find spans that cover keyword positions
1443            // The keyword "public" should have spans at these byte ranges:
1444            // Line 1: 0..6
1445            // Line 2: 8..14 (NOT 7..13 which would be the buggy offset)
1446            // Line 3: 16..22 (NOT 14..20 which would be the buggy offset)
1447
1448            eprintln!(
1449                "Spans: {:?}",
1450                spans.iter().map(|s| &s.range).collect::<Vec<_>>()
1451            );
1452
1453            // Check that we have spans covering the correct positions
1454            let has_span_at = |start: usize, end: usize| -> bool {
1455                spans
1456                    .iter()
1457                    .any(|s| s.range.start <= start && s.range.end >= end)
1458            };
1459
1460            // Line 1: "public" at bytes 0-6
1461            assert!(
1462                has_span_at(0, 6),
1463                "Should have span covering bytes 0-6 (line 1 'public'). Spans: {:?}",
1464                spans.iter().map(|s| &s.range).collect::<Vec<_>>()
1465            );
1466
1467            // Line 2: "public" at bytes 8-14 (after "public\r\n")
1468            // If buggy, would be at 7-13
1469            assert!(
1470                has_span_at(8, 14),
1471                "Should have span covering bytes 8-14 (line 2 'public'). \
1472                 If this fails, CRLF offset drift is occurring. Spans: {:?}",
1473                spans.iter().map(|s| &s.range).collect::<Vec<_>>()
1474            );
1475
1476            // Line 3: "public" at bytes 16-22 (after two "public\r\n")
1477            // If buggy, would be at 14-20
1478            assert!(
1479                has_span_at(16, 22),
1480                "Should have span covering bytes 16-22 (line 3 'public'). \
1481                 If this fails, CRLF offset drift is occurring. Spans: {:?}",
1482                spans.iter().map(|s| &s.range).collect::<Vec<_>>()
1483            );
1484        } else {
1485            panic!("Expected TextMate engine for .java file");
1486        }
1487    }
1488
1489    #[test]
1490    fn test_git_rebase_todo_highlighting() {
1491        let registry =
1492            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1493
1494        // git-rebase-todo files should use the Git Rebase Todo grammar
1495        let engine = HighlightEngine::for_file(Path::new("git-rebase-todo"), None, &registry);
1496        assert_eq!(engine.backend_name(), "textmate");
1497        assert!(engine.has_highlighting());
1498    }
1499
1500    #[test]
1501    fn test_git_commit_message_highlighting() {
1502        let registry =
1503            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1504
1505        // COMMIT_EDITMSG should use the Git Commit Message grammar
1506        let engine = HighlightEngine::for_file(Path::new("COMMIT_EDITMSG"), None, &registry);
1507        assert_eq!(engine.backend_name(), "textmate");
1508        assert!(engine.has_highlighting());
1509
1510        // MERGE_MSG should also work
1511        let engine = HighlightEngine::for_file(Path::new("MERGE_MSG"), None, &registry);
1512        assert_eq!(engine.backend_name(), "textmate");
1513        assert!(engine.has_highlighting());
1514    }
1515
1516    #[test]
1517    fn test_gitignore_highlighting() {
1518        let registry =
1519            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1520
1521        // .gitignore should use the Gitignore grammar
1522        let engine = HighlightEngine::for_file(Path::new(".gitignore"), None, &registry);
1523        assert_eq!(engine.backend_name(), "textmate");
1524        assert!(engine.has_highlighting());
1525
1526        // .dockerignore should also work
1527        let engine = HighlightEngine::for_file(Path::new(".dockerignore"), None, &registry);
1528        assert_eq!(engine.backend_name(), "textmate");
1529        assert!(engine.has_highlighting());
1530    }
1531
1532    #[test]
1533    fn test_gitconfig_highlighting() {
1534        let registry =
1535            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1536
1537        // .gitconfig should use the Git Config grammar
1538        let engine = HighlightEngine::for_file(Path::new(".gitconfig"), None, &registry);
1539        assert_eq!(engine.backend_name(), "textmate");
1540        assert!(engine.has_highlighting());
1541
1542        // .gitmodules should also work
1543        let engine = HighlightEngine::for_file(Path::new(".gitmodules"), None, &registry);
1544        assert_eq!(engine.backend_name(), "textmate");
1545        assert!(engine.has_highlighting());
1546    }
1547
1548    #[test]
1549    fn test_gitattributes_highlighting() {
1550        let registry =
1551            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1552
1553        // .gitattributes should use the Git Attributes grammar
1554        let engine = HighlightEngine::for_file(Path::new(".gitattributes"), None, &registry);
1555        assert_eq!(engine.backend_name(), "textmate");
1556        assert!(engine.has_highlighting());
1557    }
1558
1559    #[test]
1560    fn test_comment_delimiter_uses_comment_color() {
1561        // Comment delimiters (#, //, /*) should use comment color, not operator
1562        assert_eq!(
1563            scope_to_category("punctuation.definition.comment"),
1564            Some(HighlightCategory::Comment)
1565        );
1566        assert_eq!(
1567            scope_to_category("punctuation.definition.comment.python"),
1568            Some(HighlightCategory::Comment)
1569        );
1570        assert_eq!(
1571            scope_to_category("punctuation.definition.comment.begin"),
1572            Some(HighlightCategory::Comment)
1573        );
1574    }
1575
1576    #[test]
1577    fn test_string_delimiter_uses_string_color() {
1578        // String delimiters (", ', `) should use string color, not operator
1579        assert_eq!(
1580            scope_to_category("punctuation.definition.string.begin"),
1581            Some(HighlightCategory::String)
1582        );
1583        assert_eq!(
1584            scope_to_category("punctuation.definition.string.end"),
1585            Some(HighlightCategory::String)
1586        );
1587    }
1588
1589    #[test]
1590    fn test_punctuation_bracket() {
1591        // punctuation.section (TextMate standard for block delimiters)
1592        assert_eq!(
1593            scope_to_category("punctuation.section"),
1594            Some(HighlightCategory::PunctuationBracket)
1595        );
1596        assert_eq!(
1597            scope_to_category("punctuation.section.block.begin.c"),
1598            Some(HighlightCategory::PunctuationBracket)
1599        );
1600        assert_eq!(
1601            scope_to_category("punctuation.bracket"),
1602            Some(HighlightCategory::PunctuationBracket)
1603        );
1604        // punctuation.definition.* bracket-like scopes from sublime-syntax grammars
1605        assert_eq!(
1606            scope_to_category("punctuation.definition.array.begin.toml"),
1607            Some(HighlightCategory::PunctuationBracket)
1608        );
1609        assert_eq!(
1610            scope_to_category("punctuation.definition.block.code.typst"),
1611            Some(HighlightCategory::PunctuationBracket)
1612        );
1613        assert_eq!(
1614            scope_to_category("punctuation.definition.group.typst"),
1615            Some(HighlightCategory::PunctuationBracket)
1616        );
1617        assert_eq!(
1618            scope_to_category("punctuation.definition.inline-table.begin.toml"),
1619            Some(HighlightCategory::PunctuationBracket)
1620        );
1621        assert_eq!(
1622            scope_to_category("punctuation.definition.tag.end.svelte"),
1623            Some(HighlightCategory::PunctuationBracket)
1624        );
1625    }
1626
1627    #[test]
1628    fn test_punctuation_delimiter() {
1629        assert_eq!(
1630            scope_to_category("punctuation.separator"),
1631            Some(HighlightCategory::PunctuationDelimiter)
1632        );
1633        assert_eq!(
1634            scope_to_category("punctuation.terminator.statement.c"),
1635            Some(HighlightCategory::PunctuationDelimiter)
1636        );
1637        assert_eq!(
1638            scope_to_category("punctuation.accessor"),
1639            Some(HighlightCategory::PunctuationDelimiter)
1640        );
1641    }
1642}