Skip to main content

fresh/primitives/
highlight_engine.rs

1//! Unified highlighting engine
2//!
3//! This module provides a unified abstraction over different highlighting backends:
4//! - TextMate grammars via syntect (default for highlighting)
5//! - Tree-sitter (available via explicit preference, also used for non-highlighting features)
6//!
7//! # Backend Selection
8//! By default, syntect/TextMate is used for syntax highlighting because it provides
9//! broader language coverage. Tree-sitter language detection is still performed
10//! to support non-highlighting features like auto-indentation and semantic highlighting.
11//!
12//! # Non-Highlighting Features
13//! Even when using TextMate for highlighting, tree-sitter `Language` is detected
14//! and available via `.language()` for:
15//! - Auto-indentation (via IndentCalculator)
16//! - Semantic highlighting (variable scope tracking)
17//! - Other syntax-aware features
18
19use crate::model::buffer::Buffer;
20use crate::primitives::grammar::GrammarRegistry;
21use crate::primitives::highlighter::{
22    highlight_color, HighlightCategory, HighlightSpan, Highlighter, Language,
23};
24use crate::view::theme::Theme;
25use std::ops::Range;
26use std::path::Path;
27use std::sync::Arc;
28use syntect::parsing::SyntaxSet;
29
30/// Map TextMate scope to highlight category
31fn scope_to_category(scope: &str) -> Option<HighlightCategory> {
32    let scope_lower = scope.to_lowercase();
33
34    // Comments - highest priority
35    if scope_lower.starts_with("comment") {
36        return Some(HighlightCategory::Comment);
37    }
38
39    // Strings
40    if scope_lower.starts_with("string") {
41        return Some(HighlightCategory::String);
42    }
43
44    // Markdown/markup scopes - handle before generic keyword/punctuation checks
45    // See: https://macromates.com/manual/en/language_grammars (TextMate scope naming)
46    // Headings: markup.heading and entity.name.section (used by syntect's markdown grammar)
47    if scope_lower.starts_with("markup.heading") || scope_lower.starts_with("entity.name.section") {
48        return Some(HighlightCategory::Keyword); // Headers styled like keywords (bold, prominent)
49    }
50    // Bold: markup.bold
51    if scope_lower.starts_with("markup.bold") {
52        return Some(HighlightCategory::Constant); // Bold styled like constants (bright)
53    }
54    // Italic: markup.italic
55    if scope_lower.starts_with("markup.italic") {
56        return Some(HighlightCategory::Variable); // Italic styled like variables
57    }
58    // Inline code and code blocks: markup.raw, markup.inline.raw
59    if scope_lower.starts_with("markup.raw") || scope_lower.starts_with("markup.inline.raw") {
60        return Some(HighlightCategory::String); // Code styled like strings
61    }
62    // Links: markup.underline.link
63    if scope_lower.starts_with("markup.underline.link") {
64        return Some(HighlightCategory::Function); // Links styled like functions (distinct color)
65    }
66    // Generic underline (often links)
67    if scope_lower.starts_with("markup.underline") {
68        return Some(HighlightCategory::Function);
69    }
70    // Block quotes: markup.quote
71    if scope_lower.starts_with("markup.quote") {
72        return Some(HighlightCategory::Comment); // Quotes styled like comments (subdued)
73    }
74    // Lists: markup.list
75    if scope_lower.starts_with("markup.list") {
76        return Some(HighlightCategory::Operator); // List markers styled like operators
77    }
78    // Strikethrough: markup.strikethrough
79    if scope_lower.starts_with("markup.strikethrough") {
80        return Some(HighlightCategory::Comment); // Strikethrough styled subdued
81    }
82
83    // Keywords
84    if scope_lower.starts_with("keyword.control")
85        || scope_lower.starts_with("keyword.other")
86        || scope_lower.starts_with("keyword.declaration")
87        || scope_lower.starts_with("keyword")
88    {
89        // keyword.operator should map to Operator, not Keyword
90        if !scope_lower.starts_with("keyword.operator") {
91            return Some(HighlightCategory::Keyword);
92        }
93    }
94
95    // Punctuation that belongs to a parent construct (comment/string delimiters)
96    // These must be checked before the generic punctuation rule below.
97    // TextMate grammars assign e.g. `punctuation.definition.comment` to # // /* etc.
98    if scope_lower.starts_with("punctuation.definition.comment") {
99        return Some(HighlightCategory::Comment);
100    }
101    if scope_lower.starts_with("punctuation.definition.string") {
102        return Some(HighlightCategory::String);
103    }
104
105    // Operators (including keyword.operator)
106    if scope_lower.starts_with("keyword.operator") || scope_lower.starts_with("punctuation") {
107        return Some(HighlightCategory::Operator);
108    }
109
110    // Functions
111    if scope_lower.starts_with("entity.name.function")
112        || scope_lower.starts_with("support.function")
113        || scope_lower.starts_with("meta.function-call")
114        || scope_lower.starts_with("variable.function")
115    {
116        return Some(HighlightCategory::Function);
117    }
118
119    // Types
120    if scope_lower.starts_with("entity.name.type")
121        || scope_lower.starts_with("entity.name.class")
122        || scope_lower.starts_with("entity.name.struct")
123        || scope_lower.starts_with("entity.name.enum")
124        || scope_lower.starts_with("entity.name.interface")
125        || scope_lower.starts_with("entity.name.trait")
126        || scope_lower.starts_with("support.type")
127        || scope_lower.starts_with("support.class")
128        || scope_lower.starts_with("storage.type")
129    {
130        return Some(HighlightCategory::Type);
131    }
132
133    // Storage modifiers (pub, static, const as keywords)
134    if scope_lower.starts_with("storage.modifier") {
135        return Some(HighlightCategory::Keyword);
136    }
137
138    // Constants and numbers
139    if scope_lower.starts_with("constant.numeric")
140        || scope_lower.starts_with("constant.language.boolean")
141    {
142        return Some(HighlightCategory::Number);
143    }
144    if scope_lower.starts_with("constant") {
145        return Some(HighlightCategory::Constant);
146    }
147
148    // Variables
149    if scope_lower.starts_with("variable.parameter")
150        || scope_lower.starts_with("variable.other")
151        || scope_lower.starts_with("variable.language")
152    {
153        return Some(HighlightCategory::Variable);
154    }
155
156    // Properties / object keys
157    if scope_lower.starts_with("entity.name.tag")
158        || scope_lower.starts_with("support.other.property")
159        || scope_lower.starts_with("meta.object-literal.key")
160        || scope_lower.starts_with("variable.other.property")
161        || scope_lower.starts_with("variable.other.object.property")
162    {
163        return Some(HighlightCategory::Property);
164    }
165
166    // Attributes (decorators, annotations)
167    if scope_lower.starts_with("entity.other.attribute")
168        || scope_lower.starts_with("meta.attribute")
169        || scope_lower.starts_with("entity.name.decorator")
170    {
171        return Some(HighlightCategory::Attribute);
172    }
173
174    // Generic variable fallback
175    if scope_lower.starts_with("variable") {
176        return Some(HighlightCategory::Variable);
177    }
178
179    None
180}
181
182/// Preference for which highlighting backend to use
183#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
184pub enum HighlighterPreference {
185    /// Use TextMate/syntect for highlighting (default)
186    /// Tree-sitter language is still detected for other features (indentation, semantic highlighting)
187    #[default]
188    Auto,
189    /// Force tree-sitter for highlighting (useful for testing/comparison)
190    TreeSitter,
191    /// Explicitly use TextMate grammar (same as Auto)
192    TextMate,
193}
194
195/// Unified highlighting engine supporting multiple backends
196#[derive(Default)]
197pub enum HighlightEngine {
198    /// Tree-sitter based highlighting (built-in languages)
199    TreeSitter(Box<Highlighter>),
200    /// TextMate grammar based highlighting
201    TextMate(Box<TextMateEngine>),
202    /// No highlighting available
203    #[default]
204    None,
205}
206
207/// TextMate highlighting engine wrapper
208///
209/// This struct handles the lifetime complexities of syntect by storing
210/// the syntax set and using indices rather than references.
211pub struct TextMateEngine {
212    syntax_set: Arc<SyntaxSet>,
213    syntax_index: usize,
214    cache: Option<TextMateCache>,
215    last_buffer_len: usize,
216    /// Tree-sitter language for non-highlighting features (indentation, semantic highlighting)
217    /// Even when using syntect for highlighting, we track the language for other features
218    ts_language: Option<Language>,
219}
220
221#[derive(Debug, Clone)]
222struct TextMateCache {
223    range: Range<usize>,
224    spans: Vec<CachedSpan>,
225}
226
227#[derive(Debug, Clone)]
228struct CachedSpan {
229    range: Range<usize>,
230    category: crate::primitives::highlighter::HighlightCategory,
231}
232
233/// Maximum bytes to parse in a single operation
234const MAX_PARSE_BYTES: usize = 1024 * 1024;
235
236impl TextMateEngine {
237    /// Create a new TextMate engine for the given syntax
238    pub fn new(syntax_set: Arc<SyntaxSet>, syntax_index: usize) -> Self {
239        Self {
240            syntax_set,
241            syntax_index,
242            cache: None,
243            last_buffer_len: 0,
244            ts_language: None,
245        }
246    }
247
248    /// Create a new TextMate engine with a tree-sitter language for non-highlighting features
249    pub fn with_language(
250        syntax_set: Arc<SyntaxSet>,
251        syntax_index: usize,
252        ts_language: Option<Language>,
253    ) -> Self {
254        Self {
255            syntax_set,
256            syntax_index,
257            cache: None,
258            last_buffer_len: 0,
259            ts_language,
260        }
261    }
262
263    /// Get the tree-sitter language (for indentation, semantic highlighting, etc.)
264    pub fn language(&self) -> Option<&Language> {
265        self.ts_language.as_ref()
266    }
267
268    /// Highlight the visible viewport range
269    ///
270    /// `context_bytes` controls how far before/after the viewport to parse for accurate
271    /// highlighting of multi-line constructs (strings, comments, nested blocks).
272    pub fn highlight_viewport(
273        &mut self,
274        buffer: &Buffer,
275        viewport_start: usize,
276        viewport_end: usize,
277        theme: &Theme,
278        context_bytes: usize,
279    ) -> Vec<HighlightSpan> {
280        use syntect::parsing::{ParseState, ScopeStack};
281
282        // Check cache validity
283        if let Some(cache) = &self.cache {
284            if cache.range.start <= viewport_start
285                && cache.range.end >= viewport_end
286                && self.last_buffer_len == buffer.len()
287            {
288                return cache
289                    .spans
290                    .iter()
291                    .filter(|span| {
292                        span.range.start < viewport_end && span.range.end > viewport_start
293                    })
294                    .map(|span| HighlightSpan {
295                        range: span.range.clone(),
296                        color: highlight_color(span.category, theme),
297                        category: Some(span.category),
298                    })
299                    .collect();
300            }
301        }
302
303        // Cache miss - parse viewport region
304        let parse_start = viewport_start.saturating_sub(context_bytes);
305        let parse_end = (viewport_end + context_bytes).min(buffer.len());
306
307        if parse_end <= parse_start || parse_end - parse_start > MAX_PARSE_BYTES {
308            return Vec::new();
309        }
310
311        let syntax = &self.syntax_set.syntaxes()[self.syntax_index];
312        let mut state = ParseState::new(syntax);
313        let mut spans = Vec::new();
314
315        // Get content
316        let content = buffer.slice_bytes(parse_start..parse_end);
317        let content_str = match std::str::from_utf8(&content) {
318            Ok(s) => s,
319            Err(_) => return Vec::new(),
320        };
321
322        // Parse line by line - manually track line boundaries to handle CRLF correctly
323        // str::lines() strips both \n and \r\n, losing the distinction
324        let content_bytes = content_str.as_bytes();
325        let mut pos = 0;
326        let mut current_offset = parse_start;
327        let mut current_scopes = ScopeStack::new();
328
329        while pos < content_bytes.len() {
330            let line_start = pos;
331            let mut line_end = pos;
332
333            // Scan for line ending (find \n or \r\n or end of content)
334            while line_end < content_bytes.len() {
335                if content_bytes[line_end] == b'\n' {
336                    line_end += 1;
337                    break;
338                } else if content_bytes[line_end] == b'\r' {
339                    if line_end + 1 < content_bytes.len() && content_bytes[line_end + 1] == b'\n' {
340                        line_end += 2; // CRLF
341                    } else {
342                        line_end += 1; // CR only
343                    }
344                    break;
345                }
346                line_end += 1;
347            }
348
349            // Get the line content and actual byte length
350            let line_bytes = &content_bytes[line_start..line_end];
351            let actual_line_byte_len = line_bytes.len();
352
353            // Create line string for syntect - strip CR if present, ensure single \n
354            let line_str = match std::str::from_utf8(line_bytes) {
355                Ok(s) => s,
356                Err(_) => {
357                    pos = line_end;
358                    current_offset += actual_line_byte_len;
359                    continue;
360                }
361            };
362
363            // Remove trailing \r\n or \n, then add single \n for syntect
364            let line_content = line_str.trim_end_matches(&['\r', '\n'][..]);
365            let line_for_syntect = if line_end < content_bytes.len() || line_str.ends_with('\n') {
366                format!("{}\n", line_content)
367            } else {
368                line_content.to_string()
369            };
370
371            let ops = match state.parse_line(&line_for_syntect, &self.syntax_set) {
372                Ok(ops) => ops,
373                Err(_) => {
374                    pos = line_end;
375                    current_offset += actual_line_byte_len;
376                    continue;
377                }
378            };
379
380            // Convert operations to spans
381            // Note: syntect offsets are relative to line_for_syntect, but we need
382            // to map them to the actual buffer positions
383            let mut syntect_offset = 0;
384            let line_content_len = line_content.len();
385
386            for (op_offset, op) in ops {
387                // Handle any text before this operation (but only within content, not newline)
388                let clamped_op_offset = op_offset.min(line_content_len);
389                if clamped_op_offset > syntect_offset {
390                    if let Some(category) = Self::scope_stack_to_category(&current_scopes) {
391                        let byte_start = current_offset + syntect_offset;
392                        let byte_end = current_offset + clamped_op_offset;
393                        if byte_start < byte_end {
394                            spans.push(CachedSpan {
395                                range: byte_start..byte_end,
396                                category,
397                            });
398                        }
399                    }
400                }
401                syntect_offset = clamped_op_offset;
402
403                // Scope stack errors are non-fatal for highlighting
404                #[allow(clippy::let_underscore_must_use)]
405                let _ = current_scopes.apply(&op);
406            }
407
408            // Handle remaining text on line (content only, not line ending)
409            if syntect_offset < line_content_len {
410                if let Some(category) = Self::scope_stack_to_category(&current_scopes) {
411                    let byte_start = current_offset + syntect_offset;
412                    let byte_end = current_offset + line_content_len;
413                    if byte_start < byte_end {
414                        spans.push(CachedSpan {
415                            range: byte_start..byte_end,
416                            category,
417                        });
418                    }
419                }
420            }
421
422            // Advance by actual byte length (including real line terminator)
423            pos = line_end;
424            current_offset += actual_line_byte_len;
425        }
426
427        // Merge adjacent spans
428        Self::merge_adjacent_spans(&mut spans);
429
430        // Update cache
431        self.cache = Some(TextMateCache {
432            range: parse_start..parse_end,
433            spans: spans.clone(),
434        });
435        self.last_buffer_len = buffer.len();
436
437        // Filter and resolve colors
438        spans
439            .into_iter()
440            .filter(|span| span.range.start < viewport_end && span.range.end > viewport_start)
441            .map(|span| {
442                let cat = span.category;
443                HighlightSpan {
444                    range: span.range,
445                    color: highlight_color(cat, theme),
446                    category: Some(cat),
447                }
448            })
449            .collect()
450    }
451
452    /// Map scope stack to highlight category
453    fn scope_stack_to_category(scopes: &syntect::parsing::ScopeStack) -> Option<HighlightCategory> {
454        for scope in scopes.as_slice().iter().rev() {
455            let scope_str = scope.build_string();
456            if let Some(cat) = scope_to_category(&scope_str) {
457                return Some(cat);
458            }
459        }
460        None
461    }
462
463    /// Merge adjacent spans with same category
464    fn merge_adjacent_spans(spans: &mut Vec<CachedSpan>) {
465        if spans.len() < 2 {
466            return;
467        }
468
469        let mut write_idx = 0;
470        for read_idx in 1..spans.len() {
471            if spans[write_idx].category == spans[read_idx].category
472                && spans[write_idx].range.end == spans[read_idx].range.start
473            {
474                spans[write_idx].range.end = spans[read_idx].range.end;
475            } else {
476                write_idx += 1;
477                if write_idx != read_idx {
478                    spans[write_idx] = spans[read_idx].clone();
479                }
480            }
481        }
482        spans.truncate(write_idx + 1);
483    }
484
485    /// Invalidate cache for edited range
486    pub fn invalidate_range(&mut self, edit_range: Range<usize>) {
487        if let Some(cache) = &self.cache {
488            if edit_range.start < cache.range.end && edit_range.end > cache.range.start {
489                self.cache = None;
490            }
491        }
492    }
493
494    /// Invalidate all cache
495    pub fn invalidate_all(&mut self) {
496        self.cache = None;
497    }
498
499    /// Get the highlight category at a byte position from the cache.
500    ///
501    /// Returns the category if the position falls within a cached highlight span.
502    /// The position must be within the last highlighted viewport range for a result.
503    pub fn category_at_position(&self, position: usize) -> Option<HighlightCategory> {
504        let cache = self.cache.as_ref()?;
505        cache
506            .spans
507            .iter()
508            .find(|span| span.range.start <= position && position < span.range.end)
509            .map(|span| span.category)
510    }
511
512    /// Get syntax name
513    pub fn syntax_name(&self) -> &str {
514        &self.syntax_set.syntaxes()[self.syntax_index].name
515    }
516}
517
518impl HighlightEngine {
519    /// Create a highlighting engine for a file
520    ///
521    /// Always uses syntect/TextMate for highlighting, but detects tree-sitter
522    /// language for other features (indentation, semantic highlighting).
523    pub fn for_file(path: &Path, registry: &GrammarRegistry) -> Self {
524        Self::for_file_with_preference(path, registry, HighlighterPreference::Auto)
525    }
526
527    /// Create a highlighting engine for a file, using language configuration for detection.
528    ///
529    /// This method checks the provided languages configuration for filename and extension
530    /// matches before falling back to built-in detection. This allows users to configure
531    /// custom filename patterns (like PKGBUILD for bash) that will be respected for
532    /// syntax highlighting.
533    pub fn for_file_with_languages(
534        path: &Path,
535        registry: &GrammarRegistry,
536        languages: &std::collections::HashMap<String, crate::config::LanguageConfig>,
537    ) -> Self {
538        Self::for_file_with_languages_and_preference(
539            path,
540            registry,
541            languages,
542            HighlighterPreference::Auto,
543        )
544    }
545
546    /// Create a highlighting engine with explicit preference and language configuration.
547    pub fn for_file_with_languages_and_preference(
548        path: &Path,
549        registry: &GrammarRegistry,
550        languages: &std::collections::HashMap<String, crate::config::LanguageConfig>,
551        preference: HighlighterPreference,
552    ) -> Self {
553        match preference {
554            // Auto now defaults to TextMate for highlighting (syntect has broader coverage)
555            // but still detects tree-sitter language for indentation/semantic features
556            HighlighterPreference::Auto | HighlighterPreference::TextMate => {
557                Self::textmate_for_file_with_languages(path, registry, languages)
558            }
559            HighlighterPreference::TreeSitter => {
560                if let Some(lang) = Language::from_path(path) {
561                    if let Ok(highlighter) = Highlighter::new(lang) {
562                        return Self::TreeSitter(Box::new(highlighter));
563                    }
564                }
565                Self::None
566            }
567        }
568    }
569
570    /// Create a highlighting engine with explicit preference
571    pub fn for_file_with_preference(
572        path: &Path,
573        registry: &GrammarRegistry,
574        preference: HighlighterPreference,
575    ) -> Self {
576        match preference {
577            // Auto now defaults to TextMate for highlighting (syntect has broader coverage)
578            // but still detects tree-sitter language for indentation/semantic features
579            HighlighterPreference::Auto | HighlighterPreference::TextMate => {
580                Self::textmate_for_file(path, registry)
581            }
582            HighlighterPreference::TreeSitter => {
583                if let Some(lang) = Language::from_path(path) {
584                    if let Ok(highlighter) = Highlighter::new(lang) {
585                        return Self::TreeSitter(Box::new(highlighter));
586                    }
587                }
588                Self::None
589            }
590        }
591    }
592
593    /// Create a TextMate engine for a file, falling back to tree-sitter if no TextMate grammar
594    fn textmate_for_file(path: &Path, registry: &GrammarRegistry) -> Self {
595        let syntax_set = registry.syntax_set_arc();
596
597        // Detect tree-sitter language for non-highlighting features
598        let ts_language = Language::from_path(path);
599
600        // Find syntax by file extension
601        if let Some(syntax) = registry.find_syntax_for_file(path) {
602            // Find the index of this syntax in the set
603            if let Some(index) = syntax_set
604                .syntaxes()
605                .iter()
606                .position(|s| s.name == syntax.name)
607            {
608                return Self::TextMate(Box::new(TextMateEngine::with_language(
609                    syntax_set,
610                    index,
611                    ts_language,
612                )));
613            }
614        }
615
616        // No TextMate grammar found - fall back to tree-sitter if available
617        // This handles languages like TypeScript that syntect doesn't include by default
618        if let Some(lang) = ts_language {
619            if let Ok(highlighter) = Highlighter::new(lang) {
620                tracing::debug!(
621                    "No TextMate grammar for {:?}, falling back to tree-sitter",
622                    path.extension()
623                );
624                return Self::TreeSitter(Box::new(highlighter));
625            }
626        }
627
628        Self::None
629    }
630
631    /// Create a TextMate engine for a file with language configuration support
632    fn textmate_for_file_with_languages(
633        path: &Path,
634        registry: &GrammarRegistry,
635        languages: &std::collections::HashMap<String, crate::config::LanguageConfig>,
636    ) -> Self {
637        let syntax_set = registry.syntax_set_arc();
638
639        // Detect tree-sitter language for non-highlighting features
640        let ts_language = Language::from_path(path);
641
642        // Find syntax by file extension, checking languages config first
643        if let Some(syntax) = registry.find_syntax_for_file_with_languages(path, languages) {
644            // Find the index of this syntax in the set
645            if let Some(index) = syntax_set
646                .syntaxes()
647                .iter()
648                .position(|s| s.name == syntax.name)
649            {
650                return Self::TextMate(Box::new(TextMateEngine::with_language(
651                    syntax_set,
652                    index,
653                    ts_language,
654                )));
655            }
656        }
657
658        // No TextMate grammar found - fall back to tree-sitter if available
659        // This handles languages like TypeScript that syntect doesn't include by default
660        if let Some(lang) = ts_language {
661            if let Ok(highlighter) = Highlighter::new(lang) {
662                tracing::debug!(
663                    "No TextMate grammar for {:?}, falling back to tree-sitter",
664                    path.extension()
665                );
666                return Self::TreeSitter(Box::new(highlighter));
667            }
668        }
669
670        Self::None
671    }
672
673    /// Create a highlighting engine for a specific tree-sitter language.
674    ///
675    /// This is useful when manually setting the language (e.g., from UI).
676    /// Uses tree-sitter for the specified language.
677    pub fn for_language(language: Language) -> Self {
678        if let Ok(highlighter) = Highlighter::new(language) {
679            Self::TreeSitter(Box::new(highlighter))
680        } else {
681            Self::None
682        }
683    }
684
685    /// Create a highlighting engine for a syntax by name.
686    ///
687    /// This looks up the syntax in the grammar registry and creates a TextMate
688    /// highlighter for it. This supports all syntect syntaxes (100+) including
689    /// user-configured grammars.
690    ///
691    /// The `ts_language` parameter optionally provides a tree-sitter language
692    /// for non-highlighting features (indentation, semantic highlighting).
693    pub fn for_syntax_name(
694        name: &str,
695        registry: &GrammarRegistry,
696        ts_language: Option<Language>,
697    ) -> Self {
698        let syntax_set = registry.syntax_set_arc();
699
700        if let Some(syntax) = registry.find_syntax_by_name(name) {
701            // Find the index of this syntax in the set
702            if let Some(index) = syntax_set
703                .syntaxes()
704                .iter()
705                .position(|s| s.name == syntax.name)
706            {
707                return Self::TextMate(Box::new(TextMateEngine::with_language(
708                    syntax_set,
709                    index,
710                    ts_language,
711                )));
712            }
713        }
714
715        Self::None
716    }
717
718    /// Highlight the visible viewport
719    ///
720    /// `context_bytes` controls how far before/after the viewport to parse for accurate
721    /// highlighting of multi-line constructs (strings, comments, nested blocks).
722    pub fn highlight_viewport(
723        &mut self,
724        buffer: &Buffer,
725        viewport_start: usize,
726        viewport_end: usize,
727        theme: &Theme,
728        context_bytes: usize,
729    ) -> Vec<HighlightSpan> {
730        match self {
731            Self::TreeSitter(h) => {
732                h.highlight_viewport(buffer, viewport_start, viewport_end, theme, context_bytes)
733            }
734            Self::TextMate(h) => {
735                h.highlight_viewport(buffer, viewport_start, viewport_end, theme, context_bytes)
736            }
737            Self::None => Vec::new(),
738        }
739    }
740
741    /// Invalidate cache for an edited range
742    pub fn invalidate_range(&mut self, edit_range: Range<usize>) {
743        match self {
744            Self::TreeSitter(h) => h.invalidate_range(edit_range),
745            Self::TextMate(h) => h.invalidate_range(edit_range),
746            Self::None => {}
747        }
748    }
749
750    /// Invalidate entire cache
751    pub fn invalidate_all(&mut self) {
752        match self {
753            Self::TreeSitter(h) => h.invalidate_all(),
754            Self::TextMate(h) => h.invalidate_all(),
755            Self::None => {}
756        }
757    }
758
759    /// Check if this engine has highlighting available
760    pub fn has_highlighting(&self) -> bool {
761        !matches!(self, Self::None)
762    }
763
764    /// Get a description of the active backend
765    pub fn backend_name(&self) -> &str {
766        match self {
767            Self::TreeSitter(_) => "tree-sitter",
768            Self::TextMate(_) => "textmate",
769            Self::None => "none",
770        }
771    }
772
773    /// Get the language/syntax name if available
774    pub fn syntax_name(&self) -> Option<&str> {
775        match self {
776            Self::TreeSitter(_) => None, // Tree-sitter doesn't expose name easily
777            Self::TextMate(h) => Some(h.syntax_name()),
778            Self::None => None,
779        }
780    }
781
782    /// Get the highlight category at a byte position from the cache.
783    ///
784    /// Returns the category if the position falls within a cached highlight span.
785    /// Useful for detecting whether the cursor is inside a string, comment, etc.
786    pub fn category_at_position(&self, position: usize) -> Option<HighlightCategory> {
787        match self {
788            Self::TreeSitter(h) => h.category_at_position(position),
789            Self::TextMate(h) => h.category_at_position(position),
790            Self::None => None,
791        }
792    }
793
794    /// Get the tree-sitter Language for non-highlighting features
795    /// Returns the language even when using TextMate for highlighting
796    pub fn language(&self) -> Option<&Language> {
797        match self {
798            Self::TreeSitter(h) => Some(h.language()),
799            Self::TextMate(h) => h.language(),
800            Self::None => None,
801        }
802    }
803}
804
805/// Highlight a code string using syntect (for markdown code blocks, hover popups, etc.)
806/// Returns spans with byte ranges relative to the input string.
807///
808/// This uses TextMate grammars via syntect which provides broader language coverage
809/// than tree-sitter (~150+ languages vs ~17).
810pub fn highlight_string(
811    code: &str,
812    lang_hint: &str,
813    registry: &GrammarRegistry,
814    theme: &Theme,
815) -> Vec<HighlightSpan> {
816    use syntect::parsing::{ParseState, ScopeStack};
817
818    // Find syntax by language token (handles aliases like "py" -> Python)
819    let syntax = match registry.syntax_set().find_syntax_by_token(lang_hint) {
820        Some(s) => s,
821        None => return Vec::new(),
822    };
823
824    let syntax_set = registry.syntax_set();
825    let mut state = ParseState::new(syntax);
826    let mut spans = Vec::new();
827    let mut current_scopes = ScopeStack::new();
828    let mut current_offset = 0;
829
830    // Parse line by line
831    for line in code.split_inclusive('\n') {
832        let line_start = current_offset;
833        let line_len = line.len();
834
835        // Remove trailing newline for syntect, then add it back
836        let line_content = line.trim_end_matches(&['\r', '\n'][..]);
837        let line_for_syntect = if line.ends_with('\n') {
838            format!("{}\n", line_content)
839        } else {
840            line_content.to_string()
841        };
842
843        let ops = match state.parse_line(&line_for_syntect, syntax_set) {
844            Ok(ops) => ops,
845            Err(_) => {
846                current_offset += line_len;
847                continue;
848            }
849        };
850
851        let mut syntect_offset = 0;
852        let line_content_len = line_content.len();
853
854        for (op_offset, op) in ops {
855            let clamped_op_offset = op_offset.min(line_content_len);
856            if clamped_op_offset > syntect_offset {
857                if let Some(category) = scope_stack_to_category(&current_scopes) {
858                    let byte_start = line_start + syntect_offset;
859                    let byte_end = line_start + clamped_op_offset;
860                    if byte_start < byte_end {
861                        spans.push(HighlightSpan {
862                            range: byte_start..byte_end,
863                            color: highlight_color(category, theme),
864                            category: Some(category),
865                        });
866                    }
867                }
868            }
869            syntect_offset = clamped_op_offset;
870            // Scope stack errors are non-fatal for highlighting
871            #[allow(clippy::let_underscore_must_use)]
872            let _ = current_scopes.apply(&op);
873        }
874
875        // Handle remaining text on line
876        if syntect_offset < line_content_len {
877            if let Some(category) = scope_stack_to_category(&current_scopes) {
878                let byte_start = line_start + syntect_offset;
879                let byte_end = line_start + line_content_len;
880                if byte_start < byte_end {
881                    spans.push(HighlightSpan {
882                        range: byte_start..byte_end,
883                        color: highlight_color(category, theme),
884                        category: Some(category),
885                    });
886                }
887            }
888        }
889
890        current_offset += line_len;
891    }
892
893    // Merge adjacent spans with same color
894    merge_adjacent_highlight_spans(&mut spans);
895
896    spans
897}
898
899/// Map scope stack to highlight category (for highlight_string)
900fn scope_stack_to_category(scopes: &syntect::parsing::ScopeStack) -> Option<HighlightCategory> {
901    for scope in scopes.as_slice().iter().rev() {
902        let scope_str = scope.build_string();
903        if let Some(cat) = scope_to_category(&scope_str) {
904            return Some(cat);
905        }
906    }
907    None
908}
909
910/// Merge adjacent spans with same color
911fn merge_adjacent_highlight_spans(spans: &mut Vec<HighlightSpan>) {
912    if spans.len() < 2 {
913        return;
914    }
915
916    let mut write_idx = 0;
917    for read_idx in 1..spans.len() {
918        if spans[write_idx].color == spans[read_idx].color
919            && spans[write_idx].range.end == spans[read_idx].range.start
920        {
921            spans[write_idx].range.end = spans[read_idx].range.end;
922        } else {
923            write_idx += 1;
924            if write_idx != read_idx {
925                spans[write_idx] = spans[read_idx].clone();
926            }
927        }
928    }
929    spans.truncate(write_idx + 1);
930}
931
932#[cfg(test)]
933mod tests {
934    use crate::model::filesystem::StdFileSystem;
935    use std::sync::Arc;
936
937    fn test_fs() -> Arc<dyn crate::model::filesystem::FileSystem + Send + Sync> {
938        Arc::new(StdFileSystem)
939    }
940    use super::*;
941    use crate::view::theme;
942
943    #[test]
944    fn test_highlighter_preference_default() {
945        let pref = HighlighterPreference::default();
946        assert_eq!(pref, HighlighterPreference::Auto);
947    }
948
949    #[test]
950    fn test_highlight_engine_default() {
951        let engine = HighlightEngine::default();
952        assert!(!engine.has_highlighting());
953        assert_eq!(engine.backend_name(), "none");
954    }
955
956    #[test]
957    fn test_textmate_backend_selection() {
958        let registry =
959            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
960
961        // Languages with TextMate grammars use TextMate for highlighting
962        let engine = HighlightEngine::for_file(Path::new("test.rs"), &registry);
963        assert_eq!(engine.backend_name(), "textmate");
964        // Tree-sitter language should still be detected for other features
965        assert!(engine.language().is_some());
966
967        let engine = HighlightEngine::for_file(Path::new("test.py"), &registry);
968        assert_eq!(engine.backend_name(), "textmate");
969        assert!(engine.language().is_some());
970
971        let engine = HighlightEngine::for_file(Path::new("test.js"), &registry);
972        assert_eq!(engine.backend_name(), "textmate");
973        assert!(engine.language().is_some());
974
975        // TypeScript falls back to tree-sitter (syntect doesn't include TS by default)
976        let engine = HighlightEngine::for_file(Path::new("test.ts"), &registry);
977        assert_eq!(engine.backend_name(), "tree-sitter");
978        assert!(engine.language().is_some());
979
980        let engine = HighlightEngine::for_file(Path::new("test.tsx"), &registry);
981        assert_eq!(engine.backend_name(), "tree-sitter");
982        assert!(engine.language().is_some());
983    }
984
985    #[test]
986    fn test_tree_sitter_explicit_preference() {
987        let registry =
988            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
989
990        // Force tree-sitter for highlighting
991        let engine = HighlightEngine::for_file_with_preference(
992            Path::new("test.rs"),
993            &registry,
994            HighlighterPreference::TreeSitter,
995        );
996        assert_eq!(engine.backend_name(), "tree-sitter");
997    }
998
999    #[test]
1000    fn test_unknown_extension() {
1001        let registry =
1002            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1003
1004        // Unknown extension
1005        let engine = HighlightEngine::for_file(Path::new("test.unknown_xyz_123"), &registry);
1006        // Might be none or might find something via syntect
1007        // Just verify it doesn't panic
1008        let _ = engine.backend_name();
1009    }
1010
1011    #[test]
1012    fn test_highlight_viewport_empty_buffer_no_panic() {
1013        // Regression test: calling highlight_viewport with an empty buffer
1014        // and non-zero viewport range previously caused subtraction overflow panic.
1015        //
1016        // The bug occurred when:
1017        // - buffer is empty (len = 0)
1018        // - viewport_start > context_bytes (so parse_start > 0 after saturating_sub)
1019        // - parse_end = min(viewport_end + context_bytes, buffer.len()) = 0
1020        // - parse_end - parse_start would underflow (0 - positive = overflow)
1021        let registry =
1022            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1023
1024        let mut engine = HighlightEngine::for_file(Path::new("test.rs"), &registry);
1025
1026        // Create empty buffer
1027        let buffer = Buffer::from_str("", 0, test_fs());
1028        let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
1029
1030        // Test the specific case that triggered the overflow:
1031        // viewport_start=100, context_bytes=10 => parse_start=90, parse_end=0
1032        // 0 - 90 = overflow!
1033        if let HighlightEngine::TextMate(ref mut tm) = engine {
1034            // Small context_bytes so parse_start remains > 0
1035            let spans = tm.highlight_viewport(&buffer, 100, 200, &theme, 10);
1036            assert!(spans.is_empty());
1037        }
1038    }
1039
1040    /// Test that TextMateEngine produces correct byte offsets for CRLF content.
1041    /// This is a regression test for a bug where using str::lines() caused 1-byte
1042    /// offset drift per line because it strips line terminators.
1043    #[test]
1044    fn test_textmate_engine_crlf_byte_offsets() {
1045        let registry =
1046            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1047
1048        let mut engine = HighlightEngine::for_file(Path::new("test.java"), &registry);
1049
1050        // Create CRLF content with keywords on each line
1051        // Each "public" keyword should be highlighted at byte positions:
1052        // Line 1: "public" at bytes 0-5
1053        // Line 2: "public" at bytes 8-13 (after "public\r\n" = 8 bytes)
1054        // Line 3: "public" at bytes 16-21 (after two "public\r\n" = 16 bytes)
1055        let content = b"public\r\npublic\r\npublic\r\n";
1056        let buffer = Buffer::from_bytes(content.to_vec(), test_fs());
1057        let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
1058
1059        if let HighlightEngine::TextMate(ref mut tm) = engine {
1060            // Highlight the entire content
1061            let spans = tm.highlight_viewport(&buffer, 0, content.len(), &theme, 0);
1062
1063            // Find spans that cover keyword positions
1064            // The keyword "public" should have spans at these byte ranges:
1065            // Line 1: 0..6
1066            // Line 2: 8..14 (NOT 7..13 which would be the buggy offset)
1067            // Line 3: 16..22 (NOT 14..20 which would be the buggy offset)
1068
1069            eprintln!(
1070                "Spans: {:?}",
1071                spans.iter().map(|s| &s.range).collect::<Vec<_>>()
1072            );
1073
1074            // Check that we have spans covering the correct positions
1075            let has_span_at = |start: usize, end: usize| -> bool {
1076                spans
1077                    .iter()
1078                    .any(|s| s.range.start <= start && s.range.end >= end)
1079            };
1080
1081            // Line 1: "public" at bytes 0-6
1082            assert!(
1083                has_span_at(0, 6),
1084                "Should have span covering bytes 0-6 (line 1 'public'). Spans: {:?}",
1085                spans.iter().map(|s| &s.range).collect::<Vec<_>>()
1086            );
1087
1088            // Line 2: "public" at bytes 8-14 (after "public\r\n")
1089            // If buggy, would be at 7-13
1090            assert!(
1091                has_span_at(8, 14),
1092                "Should have span covering bytes 8-14 (line 2 'public'). \
1093                 If this fails, CRLF offset drift is occurring. Spans: {:?}",
1094                spans.iter().map(|s| &s.range).collect::<Vec<_>>()
1095            );
1096
1097            // Line 3: "public" at bytes 16-22 (after two "public\r\n")
1098            // If buggy, would be at 14-20
1099            assert!(
1100                has_span_at(16, 22),
1101                "Should have span covering bytes 16-22 (line 3 'public'). \
1102                 If this fails, CRLF offset drift is occurring. Spans: {:?}",
1103                spans.iter().map(|s| &s.range).collect::<Vec<_>>()
1104            );
1105        } else {
1106            panic!("Expected TextMate engine for .java file");
1107        }
1108    }
1109
1110    #[test]
1111    fn test_git_rebase_todo_highlighting() {
1112        let registry =
1113            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1114
1115        // git-rebase-todo files should use the Git Rebase Todo grammar
1116        let engine = HighlightEngine::for_file(Path::new("git-rebase-todo"), &registry);
1117        assert_eq!(engine.backend_name(), "textmate");
1118        assert!(engine.has_highlighting());
1119    }
1120
1121    #[test]
1122    fn test_git_commit_message_highlighting() {
1123        let registry =
1124            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1125
1126        // COMMIT_EDITMSG should use the Git Commit Message grammar
1127        let engine = HighlightEngine::for_file(Path::new("COMMIT_EDITMSG"), &registry);
1128        assert_eq!(engine.backend_name(), "textmate");
1129        assert!(engine.has_highlighting());
1130
1131        // MERGE_MSG should also work
1132        let engine = HighlightEngine::for_file(Path::new("MERGE_MSG"), &registry);
1133        assert_eq!(engine.backend_name(), "textmate");
1134        assert!(engine.has_highlighting());
1135    }
1136
1137    #[test]
1138    fn test_gitignore_highlighting() {
1139        let registry =
1140            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1141
1142        // .gitignore should use the Gitignore grammar
1143        let engine = HighlightEngine::for_file(Path::new(".gitignore"), &registry);
1144        assert_eq!(engine.backend_name(), "textmate");
1145        assert!(engine.has_highlighting());
1146
1147        // .dockerignore should also work
1148        let engine = HighlightEngine::for_file(Path::new(".dockerignore"), &registry);
1149        assert_eq!(engine.backend_name(), "textmate");
1150        assert!(engine.has_highlighting());
1151    }
1152
1153    #[test]
1154    fn test_gitconfig_highlighting() {
1155        let registry =
1156            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1157
1158        // .gitconfig should use the Git Config grammar
1159        let engine = HighlightEngine::for_file(Path::new(".gitconfig"), &registry);
1160        assert_eq!(engine.backend_name(), "textmate");
1161        assert!(engine.has_highlighting());
1162
1163        // .gitmodules should also work
1164        let engine = HighlightEngine::for_file(Path::new(".gitmodules"), &registry);
1165        assert_eq!(engine.backend_name(), "textmate");
1166        assert!(engine.has_highlighting());
1167    }
1168
1169    #[test]
1170    fn test_gitattributes_highlighting() {
1171        let registry =
1172            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1173
1174        // .gitattributes should use the Git Attributes grammar
1175        let engine = HighlightEngine::for_file(Path::new(".gitattributes"), &registry);
1176        assert_eq!(engine.backend_name(), "textmate");
1177        assert!(engine.has_highlighting());
1178    }
1179
1180    #[test]
1181    fn test_comment_delimiter_uses_comment_color() {
1182        // Comment delimiters (#, //, /*) should use comment color, not operator
1183        assert_eq!(
1184            scope_to_category("punctuation.definition.comment"),
1185            Some(HighlightCategory::Comment)
1186        );
1187        assert_eq!(
1188            scope_to_category("punctuation.definition.comment.python"),
1189            Some(HighlightCategory::Comment)
1190        );
1191        assert_eq!(
1192            scope_to_category("punctuation.definition.comment.begin"),
1193            Some(HighlightCategory::Comment)
1194        );
1195    }
1196
1197    #[test]
1198    fn test_string_delimiter_uses_string_color() {
1199        // String delimiters (", ', `) should use string color, not operator
1200        assert_eq!(
1201            scope_to_category("punctuation.definition.string.begin"),
1202            Some(HighlightCategory::String)
1203        );
1204        assert_eq!(
1205            scope_to_category("punctuation.definition.string.end"),
1206            Some(HighlightCategory::String)
1207        );
1208    }
1209
1210    #[test]
1211    fn test_other_punctuation_still_operator() {
1212        // Other punctuation (brackets, delimiters) should still be operator
1213        assert_eq!(
1214            scope_to_category("punctuation.separator"),
1215            Some(HighlightCategory::Operator)
1216        );
1217        assert_eq!(
1218            scope_to_category("punctuation.section"),
1219            Some(HighlightCategory::Operator)
1220        );
1221    }
1222}