Skip to main content

fresh/primitives/
highlight_engine.rs

1//! Unified highlighting engine
2//!
3//! This module provides a unified abstraction over different highlighting backends:
4//! - TextMate grammars via syntect (default for highlighting)
5//! - Tree-sitter (available via explicit preference, also used for non-highlighting features)
6//!
7//! # Backend Selection
8//! By default, syntect/TextMate is used for syntax highlighting because it provides
9//! broader language coverage. Tree-sitter language detection is still performed
10//! to support non-highlighting features like auto-indentation and semantic highlighting.
11//!
12//! # Non-Highlighting Features
13//! Even when using TextMate for highlighting, tree-sitter `Language` is detected
14//! and available via `.language()` for:
15//! - Auto-indentation (via IndentCalculator)
16//! - Semantic highlighting (variable scope tracking)
17//! - Other syntax-aware features
18
19use crate::model::buffer::Buffer;
20use crate::primitives::grammar::GrammarRegistry;
21use crate::primitives::highlighter::{
22    highlight_color, HighlightCategory, HighlightSpan, Highlighter, Language,
23};
24use crate::view::theme::Theme;
25use std::ops::Range;
26use std::path::Path;
27use std::sync::Arc;
28use syntect::parsing::SyntaxSet;
29
30/// Map TextMate scope to highlight category
31fn scope_to_category(scope: &str) -> Option<HighlightCategory> {
32    let scope_lower = scope.to_lowercase();
33
34    // Comments - highest priority
35    if scope_lower.starts_with("comment") {
36        return Some(HighlightCategory::Comment);
37    }
38
39    // Strings
40    if scope_lower.starts_with("string") {
41        return Some(HighlightCategory::String);
42    }
43
44    // Markdown/markup scopes - handle before generic keyword/punctuation checks
45    // See: https://macromates.com/manual/en/language_grammars (TextMate scope naming)
46    // Headings: markup.heading and entity.name.section (used by syntect's markdown grammar)
47    if scope_lower.starts_with("markup.heading") || scope_lower.starts_with("entity.name.section") {
48        return Some(HighlightCategory::Keyword); // Headers styled like keywords (bold, prominent)
49    }
50    // Bold: markup.bold
51    if scope_lower.starts_with("markup.bold") {
52        return Some(HighlightCategory::Constant); // Bold styled like constants (bright)
53    }
54    // Italic: markup.italic
55    if scope_lower.starts_with("markup.italic") {
56        return Some(HighlightCategory::Variable); // Italic styled like variables
57    }
58    // Inline code and code blocks: markup.raw, markup.inline.raw
59    if scope_lower.starts_with("markup.raw") || scope_lower.starts_with("markup.inline.raw") {
60        return Some(HighlightCategory::String); // Code styled like strings
61    }
62    // Links: markup.underline.link
63    if scope_lower.starts_with("markup.underline.link") {
64        return Some(HighlightCategory::Function); // Links styled like functions (distinct color)
65    }
66    // Generic underline (often links)
67    if scope_lower.starts_with("markup.underline") {
68        return Some(HighlightCategory::Function);
69    }
70    // Block quotes: markup.quote
71    if scope_lower.starts_with("markup.quote") {
72        return Some(HighlightCategory::Comment); // Quotes styled like comments (subdued)
73    }
74    // Lists: markup.list
75    if scope_lower.starts_with("markup.list") {
76        return Some(HighlightCategory::Operator); // List markers styled like operators
77    }
78    // Strikethrough: markup.strikethrough
79    if scope_lower.starts_with("markup.strikethrough") {
80        return Some(HighlightCategory::Comment); // Strikethrough styled subdued
81    }
82
83    // Keywords
84    if scope_lower.starts_with("keyword.control")
85        || scope_lower.starts_with("keyword.other")
86        || scope_lower.starts_with("keyword.declaration")
87        || scope_lower.starts_with("keyword")
88    {
89        // keyword.operator should map to Operator, not Keyword
90        if !scope_lower.starts_with("keyword.operator") {
91            return Some(HighlightCategory::Keyword);
92        }
93    }
94
95    // Operators (including keyword.operator)
96    if scope_lower.starts_with("keyword.operator") || scope_lower.starts_with("punctuation") {
97        return Some(HighlightCategory::Operator);
98    }
99
100    // Functions
101    if scope_lower.starts_with("entity.name.function")
102        || scope_lower.starts_with("support.function")
103        || scope_lower.starts_with("meta.function-call")
104        || scope_lower.starts_with("variable.function")
105    {
106        return Some(HighlightCategory::Function);
107    }
108
109    // Types
110    if scope_lower.starts_with("entity.name.type")
111        || scope_lower.starts_with("entity.name.class")
112        || scope_lower.starts_with("entity.name.struct")
113        || scope_lower.starts_with("entity.name.enum")
114        || scope_lower.starts_with("entity.name.interface")
115        || scope_lower.starts_with("entity.name.trait")
116        || scope_lower.starts_with("support.type")
117        || scope_lower.starts_with("support.class")
118        || scope_lower.starts_with("storage.type")
119    {
120        return Some(HighlightCategory::Type);
121    }
122
123    // Storage modifiers (pub, static, const as keywords)
124    if scope_lower.starts_with("storage.modifier") {
125        return Some(HighlightCategory::Keyword);
126    }
127
128    // Constants and numbers
129    if scope_lower.starts_with("constant.numeric")
130        || scope_lower.starts_with("constant.language.boolean")
131    {
132        return Some(HighlightCategory::Number);
133    }
134    if scope_lower.starts_with("constant") {
135        return Some(HighlightCategory::Constant);
136    }
137
138    // Variables
139    if scope_lower.starts_with("variable.parameter")
140        || scope_lower.starts_with("variable.other")
141        || scope_lower.starts_with("variable.language")
142    {
143        return Some(HighlightCategory::Variable);
144    }
145
146    // Properties / object keys
147    if scope_lower.starts_with("entity.name.tag")
148        || scope_lower.starts_with("support.other.property")
149        || scope_lower.starts_with("meta.object-literal.key")
150        || scope_lower.starts_with("variable.other.property")
151        || scope_lower.starts_with("variable.other.object.property")
152    {
153        return Some(HighlightCategory::Property);
154    }
155
156    // Attributes (decorators, annotations)
157    if scope_lower.starts_with("entity.other.attribute")
158        || scope_lower.starts_with("meta.attribute")
159        || scope_lower.starts_with("entity.name.decorator")
160    {
161        return Some(HighlightCategory::Attribute);
162    }
163
164    // Generic variable fallback
165    if scope_lower.starts_with("variable") {
166        return Some(HighlightCategory::Variable);
167    }
168
169    None
170}
171
172/// Preference for which highlighting backend to use
173#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
174pub enum HighlighterPreference {
175    /// Use TextMate/syntect for highlighting (default)
176    /// Tree-sitter language is still detected for other features (indentation, semantic highlighting)
177    #[default]
178    Auto,
179    /// Force tree-sitter for highlighting (useful for testing/comparison)
180    TreeSitter,
181    /// Explicitly use TextMate grammar (same as Auto)
182    TextMate,
183}
184
185/// Unified highlighting engine supporting multiple backends
186#[derive(Default)]
187pub enum HighlightEngine {
188    /// Tree-sitter based highlighting (built-in languages)
189    TreeSitter(Box<Highlighter>),
190    /// TextMate grammar based highlighting
191    TextMate(Box<TextMateEngine>),
192    /// No highlighting available
193    #[default]
194    None,
195}
196
197/// TextMate highlighting engine wrapper
198///
199/// This struct handles the lifetime complexities of syntect by storing
200/// the syntax set and using indices rather than references.
201pub struct TextMateEngine {
202    syntax_set: Arc<SyntaxSet>,
203    syntax_index: usize,
204    cache: Option<TextMateCache>,
205    last_buffer_len: usize,
206    /// Tree-sitter language for non-highlighting features (indentation, semantic highlighting)
207    /// Even when using syntect for highlighting, we track the language for other features
208    ts_language: Option<Language>,
209}
210
211#[derive(Debug, Clone)]
212struct TextMateCache {
213    range: Range<usize>,
214    spans: Vec<CachedSpan>,
215}
216
217#[derive(Debug, Clone)]
218struct CachedSpan {
219    range: Range<usize>,
220    category: crate::primitives::highlighter::HighlightCategory,
221}
222
223/// Maximum bytes to parse in a single operation
224const MAX_PARSE_BYTES: usize = 1024 * 1024;
225
226impl TextMateEngine {
227    /// Create a new TextMate engine for the given syntax
228    pub fn new(syntax_set: Arc<SyntaxSet>, syntax_index: usize) -> Self {
229        Self {
230            syntax_set,
231            syntax_index,
232            cache: None,
233            last_buffer_len: 0,
234            ts_language: None,
235        }
236    }
237
238    /// Create a new TextMate engine with a tree-sitter language for non-highlighting features
239    pub fn with_language(
240        syntax_set: Arc<SyntaxSet>,
241        syntax_index: usize,
242        ts_language: Option<Language>,
243    ) -> Self {
244        Self {
245            syntax_set,
246            syntax_index,
247            cache: None,
248            last_buffer_len: 0,
249            ts_language,
250        }
251    }
252
253    /// Get the tree-sitter language (for indentation, semantic highlighting, etc.)
254    pub fn language(&self) -> Option<&Language> {
255        self.ts_language.as_ref()
256    }
257
258    /// Highlight the visible viewport range
259    ///
260    /// `context_bytes` controls how far before/after the viewport to parse for accurate
261    /// highlighting of multi-line constructs (strings, comments, nested blocks).
262    pub fn highlight_viewport(
263        &mut self,
264        buffer: &Buffer,
265        viewport_start: usize,
266        viewport_end: usize,
267        theme: &Theme,
268        context_bytes: usize,
269    ) -> Vec<HighlightSpan> {
270        use syntect::parsing::{ParseState, ScopeStack};
271
272        // Check cache validity
273        if let Some(cache) = &self.cache {
274            if cache.range.start <= viewport_start
275                && cache.range.end >= viewport_end
276                && self.last_buffer_len == buffer.len()
277            {
278                return cache
279                    .spans
280                    .iter()
281                    .filter(|span| {
282                        span.range.start < viewport_end && span.range.end > viewport_start
283                    })
284                    .map(|span| HighlightSpan {
285                        range: span.range.clone(),
286                        color: highlight_color(span.category, theme),
287                    })
288                    .collect();
289            }
290        }
291
292        // Cache miss - parse viewport region
293        let parse_start = viewport_start.saturating_sub(context_bytes);
294        let parse_end = (viewport_end + context_bytes).min(buffer.len());
295
296        if parse_end <= parse_start || parse_end - parse_start > MAX_PARSE_BYTES {
297            return Vec::new();
298        }
299
300        let syntax = &self.syntax_set.syntaxes()[self.syntax_index];
301        let mut state = ParseState::new(syntax);
302        let mut spans = Vec::new();
303
304        // Get content
305        let content = buffer.slice_bytes(parse_start..parse_end);
306        let content_str = match std::str::from_utf8(&content) {
307            Ok(s) => s,
308            Err(_) => return Vec::new(),
309        };
310
311        // Parse line by line - manually track line boundaries to handle CRLF correctly
312        // str::lines() strips both \n and \r\n, losing the distinction
313        let content_bytes = content_str.as_bytes();
314        let mut pos = 0;
315        let mut current_offset = parse_start;
316        let mut current_scopes = ScopeStack::new();
317
318        while pos < content_bytes.len() {
319            let line_start = pos;
320            let mut line_end = pos;
321
322            // Scan for line ending (find \n or \r\n or end of content)
323            while line_end < content_bytes.len() {
324                if content_bytes[line_end] == b'\n' {
325                    line_end += 1;
326                    break;
327                } else if content_bytes[line_end] == b'\r' {
328                    if line_end + 1 < content_bytes.len() && content_bytes[line_end + 1] == b'\n' {
329                        line_end += 2; // CRLF
330                    } else {
331                        line_end += 1; // CR only
332                    }
333                    break;
334                }
335                line_end += 1;
336            }
337
338            // Get the line content and actual byte length
339            let line_bytes = &content_bytes[line_start..line_end];
340            let actual_line_byte_len = line_bytes.len();
341
342            // Create line string for syntect - strip CR if present, ensure single \n
343            let line_str = match std::str::from_utf8(line_bytes) {
344                Ok(s) => s,
345                Err(_) => {
346                    pos = line_end;
347                    current_offset += actual_line_byte_len;
348                    continue;
349                }
350            };
351
352            // Remove trailing \r\n or \n, then add single \n for syntect
353            let line_content = line_str.trim_end_matches(&['\r', '\n'][..]);
354            let line_for_syntect = if line_end < content_bytes.len() || line_str.ends_with('\n') {
355                format!("{}\n", line_content)
356            } else {
357                line_content.to_string()
358            };
359
360            let ops = match state.parse_line(&line_for_syntect, &self.syntax_set) {
361                Ok(ops) => ops,
362                Err(_) => {
363                    pos = line_end;
364                    current_offset += actual_line_byte_len;
365                    continue;
366                }
367            };
368
369            // Convert operations to spans
370            // Note: syntect offsets are relative to line_for_syntect, but we need
371            // to map them to the actual buffer positions
372            let mut syntect_offset = 0;
373            let line_content_len = line_content.len();
374
375            for (op_offset, op) in ops {
376                // Handle any text before this operation (but only within content, not newline)
377                let clamped_op_offset = op_offset.min(line_content_len);
378                if clamped_op_offset > syntect_offset {
379                    if let Some(category) = Self::scope_stack_to_category(&current_scopes) {
380                        let byte_start = current_offset + syntect_offset;
381                        let byte_end = current_offset + clamped_op_offset;
382                        if byte_start < byte_end {
383                            spans.push(CachedSpan {
384                                range: byte_start..byte_end,
385                                category,
386                            });
387                        }
388                    }
389                }
390                syntect_offset = clamped_op_offset;
391
392                let _ = current_scopes.apply(&op);
393            }
394
395            // Handle remaining text on line (content only, not line ending)
396            if syntect_offset < line_content_len {
397                if let Some(category) = Self::scope_stack_to_category(&current_scopes) {
398                    let byte_start = current_offset + syntect_offset;
399                    let byte_end = current_offset + line_content_len;
400                    if byte_start < byte_end {
401                        spans.push(CachedSpan {
402                            range: byte_start..byte_end,
403                            category,
404                        });
405                    }
406                }
407            }
408
409            // Advance by actual byte length (including real line terminator)
410            pos = line_end;
411            current_offset += actual_line_byte_len;
412        }
413
414        // Merge adjacent spans
415        Self::merge_adjacent_spans(&mut spans);
416
417        // Update cache
418        self.cache = Some(TextMateCache {
419            range: parse_start..parse_end,
420            spans: spans.clone(),
421        });
422        self.last_buffer_len = buffer.len();
423
424        // Filter and resolve colors
425        spans
426            .into_iter()
427            .filter(|span| span.range.start < viewport_end && span.range.end > viewport_start)
428            .map(|span| HighlightSpan {
429                range: span.range,
430                color: highlight_color(span.category, theme),
431            })
432            .collect()
433    }
434
435    /// Map scope stack to highlight category
436    fn scope_stack_to_category(scopes: &syntect::parsing::ScopeStack) -> Option<HighlightCategory> {
437        for scope in scopes.as_slice().iter().rev() {
438            let scope_str = scope.build_string();
439            if let Some(cat) = scope_to_category(&scope_str) {
440                return Some(cat);
441            }
442        }
443        None
444    }
445
446    /// Merge adjacent spans with same category
447    fn merge_adjacent_spans(spans: &mut Vec<CachedSpan>) {
448        if spans.len() < 2 {
449            return;
450        }
451
452        let mut write_idx = 0;
453        for read_idx in 1..spans.len() {
454            if spans[write_idx].category == spans[read_idx].category
455                && spans[write_idx].range.end == spans[read_idx].range.start
456            {
457                spans[write_idx].range.end = spans[read_idx].range.end;
458            } else {
459                write_idx += 1;
460                if write_idx != read_idx {
461                    spans[write_idx] = spans[read_idx].clone();
462                }
463            }
464        }
465        spans.truncate(write_idx + 1);
466    }
467
468    /// Invalidate cache for edited range
469    pub fn invalidate_range(&mut self, edit_range: Range<usize>) {
470        if let Some(cache) = &self.cache {
471            if edit_range.start < cache.range.end && edit_range.end > cache.range.start {
472                self.cache = None;
473            }
474        }
475    }
476
477    /// Invalidate all cache
478    pub fn invalidate_all(&mut self) {
479        self.cache = None;
480    }
481
482    /// Get syntax name
483    pub fn syntax_name(&self) -> &str {
484        &self.syntax_set.syntaxes()[self.syntax_index].name
485    }
486}
487
488impl HighlightEngine {
489    /// Create a highlighting engine for a file
490    ///
491    /// Always uses syntect/TextMate for highlighting, but detects tree-sitter
492    /// language for other features (indentation, semantic highlighting).
493    pub fn for_file(path: &Path, registry: &GrammarRegistry) -> Self {
494        Self::for_file_with_preference(path, registry, HighlighterPreference::Auto)
495    }
496
497    /// Create a highlighting engine for a file, using language configuration for detection.
498    ///
499    /// This method checks the provided languages configuration for filename and extension
500    /// matches before falling back to built-in detection. This allows users to configure
501    /// custom filename patterns (like PKGBUILD for bash) that will be respected for
502    /// syntax highlighting.
503    pub fn for_file_with_languages(
504        path: &Path,
505        registry: &GrammarRegistry,
506        languages: &std::collections::HashMap<String, crate::config::LanguageConfig>,
507    ) -> Self {
508        Self::for_file_with_languages_and_preference(
509            path,
510            registry,
511            languages,
512            HighlighterPreference::Auto,
513        )
514    }
515
516    /// Create a highlighting engine with explicit preference and language configuration.
517    pub fn for_file_with_languages_and_preference(
518        path: &Path,
519        registry: &GrammarRegistry,
520        languages: &std::collections::HashMap<String, crate::config::LanguageConfig>,
521        preference: HighlighterPreference,
522    ) -> Self {
523        match preference {
524            // Auto now defaults to TextMate for highlighting (syntect has broader coverage)
525            // but still detects tree-sitter language for indentation/semantic features
526            HighlighterPreference::Auto | HighlighterPreference::TextMate => {
527                Self::textmate_for_file_with_languages(path, registry, languages)
528            }
529            HighlighterPreference::TreeSitter => {
530                if let Some(lang) = Language::from_path(path) {
531                    if let Ok(highlighter) = Highlighter::new(lang) {
532                        return Self::TreeSitter(Box::new(highlighter));
533                    }
534                }
535                Self::None
536            }
537        }
538    }
539
540    /// Create a highlighting engine with explicit preference
541    pub fn for_file_with_preference(
542        path: &Path,
543        registry: &GrammarRegistry,
544        preference: HighlighterPreference,
545    ) -> Self {
546        match preference {
547            // Auto now defaults to TextMate for highlighting (syntect has broader coverage)
548            // but still detects tree-sitter language for indentation/semantic features
549            HighlighterPreference::Auto | HighlighterPreference::TextMate => {
550                Self::textmate_for_file(path, registry)
551            }
552            HighlighterPreference::TreeSitter => {
553                if let Some(lang) = Language::from_path(path) {
554                    if let Ok(highlighter) = Highlighter::new(lang) {
555                        return Self::TreeSitter(Box::new(highlighter));
556                    }
557                }
558                Self::None
559            }
560        }
561    }
562
563    /// Create a TextMate engine for a file, falling back to tree-sitter if no TextMate grammar
564    fn textmate_for_file(path: &Path, registry: &GrammarRegistry) -> Self {
565        let syntax_set = registry.syntax_set_arc();
566
567        // Detect tree-sitter language for non-highlighting features
568        let ts_language = Language::from_path(path);
569
570        // Find syntax by file extension
571        if let Some(syntax) = registry.find_syntax_for_file(path) {
572            // Find the index of this syntax in the set
573            if let Some(index) = syntax_set
574                .syntaxes()
575                .iter()
576                .position(|s| s.name == syntax.name)
577            {
578                return Self::TextMate(Box::new(TextMateEngine::with_language(
579                    syntax_set,
580                    index,
581                    ts_language,
582                )));
583            }
584        }
585
586        // No TextMate grammar found - fall back to tree-sitter if available
587        // This handles languages like TypeScript that syntect doesn't include by default
588        if let Some(lang) = ts_language {
589            if let Ok(highlighter) = Highlighter::new(lang) {
590                tracing::debug!(
591                    "No TextMate grammar for {:?}, falling back to tree-sitter",
592                    path.extension()
593                );
594                return Self::TreeSitter(Box::new(highlighter));
595            }
596        }
597
598        Self::None
599    }
600
601    /// Create a TextMate engine for a file with language configuration support
602    fn textmate_for_file_with_languages(
603        path: &Path,
604        registry: &GrammarRegistry,
605        languages: &std::collections::HashMap<String, crate::config::LanguageConfig>,
606    ) -> Self {
607        let syntax_set = registry.syntax_set_arc();
608
609        // Detect tree-sitter language for non-highlighting features
610        let ts_language = Language::from_path(path);
611
612        // Find syntax by file extension, checking languages config first
613        if let Some(syntax) = registry.find_syntax_for_file_with_languages(path, languages) {
614            // Find the index of this syntax in the set
615            if let Some(index) = syntax_set
616                .syntaxes()
617                .iter()
618                .position(|s| s.name == syntax.name)
619            {
620                return Self::TextMate(Box::new(TextMateEngine::with_language(
621                    syntax_set,
622                    index,
623                    ts_language,
624                )));
625            }
626        }
627
628        // No TextMate grammar found - fall back to tree-sitter if available
629        // This handles languages like TypeScript that syntect doesn't include by default
630        if let Some(lang) = ts_language {
631            if let Ok(highlighter) = Highlighter::new(lang) {
632                tracing::debug!(
633                    "No TextMate grammar for {:?}, falling back to tree-sitter",
634                    path.extension()
635                );
636                return Self::TreeSitter(Box::new(highlighter));
637            }
638        }
639
640        Self::None
641    }
642
643    /// Create a highlighting engine for a specific tree-sitter language.
644    ///
645    /// This is useful when manually setting the language (e.g., from UI).
646    /// Uses tree-sitter for the specified language.
647    pub fn for_language(language: Language) -> Self {
648        if let Ok(highlighter) = Highlighter::new(language) {
649            Self::TreeSitter(Box::new(highlighter))
650        } else {
651            Self::None
652        }
653    }
654
655    /// Create a highlighting engine for a syntax by name.
656    ///
657    /// This looks up the syntax in the grammar registry and creates a TextMate
658    /// highlighter for it. This supports all syntect syntaxes (100+) including
659    /// user-configured grammars.
660    ///
661    /// The `ts_language` parameter optionally provides a tree-sitter language
662    /// for non-highlighting features (indentation, semantic highlighting).
663    pub fn for_syntax_name(
664        name: &str,
665        registry: &GrammarRegistry,
666        ts_language: Option<Language>,
667    ) -> Self {
668        let syntax_set = registry.syntax_set_arc();
669
670        if let Some(syntax) = registry.find_syntax_by_name(name) {
671            // Find the index of this syntax in the set
672            if let Some(index) = syntax_set
673                .syntaxes()
674                .iter()
675                .position(|s| s.name == syntax.name)
676            {
677                return Self::TextMate(Box::new(TextMateEngine::with_language(
678                    syntax_set,
679                    index,
680                    ts_language,
681                )));
682            }
683        }
684
685        Self::None
686    }
687
688    /// Highlight the visible viewport
689    ///
690    /// `context_bytes` controls how far before/after the viewport to parse for accurate
691    /// highlighting of multi-line constructs (strings, comments, nested blocks).
692    pub fn highlight_viewport(
693        &mut self,
694        buffer: &Buffer,
695        viewport_start: usize,
696        viewport_end: usize,
697        theme: &Theme,
698        context_bytes: usize,
699    ) -> Vec<HighlightSpan> {
700        match self {
701            Self::TreeSitter(h) => {
702                h.highlight_viewport(buffer, viewport_start, viewport_end, theme, context_bytes)
703            }
704            Self::TextMate(h) => {
705                h.highlight_viewport(buffer, viewport_start, viewport_end, theme, context_bytes)
706            }
707            Self::None => Vec::new(),
708        }
709    }
710
711    /// Invalidate cache for an edited range
712    pub fn invalidate_range(&mut self, edit_range: Range<usize>) {
713        match self {
714            Self::TreeSitter(h) => h.invalidate_range(edit_range),
715            Self::TextMate(h) => h.invalidate_range(edit_range),
716            Self::None => {}
717        }
718    }
719
720    /// Invalidate entire cache
721    pub fn invalidate_all(&mut self) {
722        match self {
723            Self::TreeSitter(h) => h.invalidate_all(),
724            Self::TextMate(h) => h.invalidate_all(),
725            Self::None => {}
726        }
727    }
728
729    /// Check if this engine has highlighting available
730    pub fn has_highlighting(&self) -> bool {
731        !matches!(self, Self::None)
732    }
733
734    /// Get a description of the active backend
735    pub fn backend_name(&self) -> &str {
736        match self {
737            Self::TreeSitter(_) => "tree-sitter",
738            Self::TextMate(_) => "textmate",
739            Self::None => "none",
740        }
741    }
742
743    /// Get the language/syntax name if available
744    pub fn syntax_name(&self) -> Option<&str> {
745        match self {
746            Self::TreeSitter(_) => None, // Tree-sitter doesn't expose name easily
747            Self::TextMate(h) => Some(h.syntax_name()),
748            Self::None => None,
749        }
750    }
751
752    /// Get the tree-sitter Language for non-highlighting features
753    /// Returns the language even when using TextMate for highlighting
754    pub fn language(&self) -> Option<&Language> {
755        match self {
756            Self::TreeSitter(h) => Some(h.language()),
757            Self::TextMate(h) => h.language(),
758            Self::None => None,
759        }
760    }
761}
762
763/// Highlight a code string using syntect (for markdown code blocks, hover popups, etc.)
764/// Returns spans with byte ranges relative to the input string.
765///
766/// This uses TextMate grammars via syntect which provides broader language coverage
767/// than tree-sitter (~150+ languages vs ~17).
768pub fn highlight_string(
769    code: &str,
770    lang_hint: &str,
771    registry: &GrammarRegistry,
772    theme: &Theme,
773) -> Vec<HighlightSpan> {
774    use syntect::parsing::{ParseState, ScopeStack};
775
776    // Find syntax by language token (handles aliases like "py" -> Python)
777    let syntax = match registry.syntax_set().find_syntax_by_token(lang_hint) {
778        Some(s) => s,
779        None => return Vec::new(),
780    };
781
782    let syntax_set = registry.syntax_set();
783    let mut state = ParseState::new(syntax);
784    let mut spans = Vec::new();
785    let mut current_scopes = ScopeStack::new();
786    let mut current_offset = 0;
787
788    // Parse line by line
789    for line in code.split_inclusive('\n') {
790        let line_start = current_offset;
791        let line_len = line.len();
792
793        // Remove trailing newline for syntect, then add it back
794        let line_content = line.trim_end_matches(&['\r', '\n'][..]);
795        let line_for_syntect = if line.ends_with('\n') {
796            format!("{}\n", line_content)
797        } else {
798            line_content.to_string()
799        };
800
801        let ops = match state.parse_line(&line_for_syntect, syntax_set) {
802            Ok(ops) => ops,
803            Err(_) => {
804                current_offset += line_len;
805                continue;
806            }
807        };
808
809        let mut syntect_offset = 0;
810        let line_content_len = line_content.len();
811
812        for (op_offset, op) in ops {
813            let clamped_op_offset = op_offset.min(line_content_len);
814            if clamped_op_offset > syntect_offset {
815                if let Some(category) = scope_stack_to_category(&current_scopes) {
816                    let byte_start = line_start + syntect_offset;
817                    let byte_end = line_start + clamped_op_offset;
818                    if byte_start < byte_end {
819                        spans.push(HighlightSpan {
820                            range: byte_start..byte_end,
821                            color: highlight_color(category, theme),
822                        });
823                    }
824                }
825            }
826            syntect_offset = clamped_op_offset;
827            let _ = current_scopes.apply(&op);
828        }
829
830        // Handle remaining text on line
831        if syntect_offset < line_content_len {
832            if let Some(category) = scope_stack_to_category(&current_scopes) {
833                let byte_start = line_start + syntect_offset;
834                let byte_end = line_start + line_content_len;
835                if byte_start < byte_end {
836                    spans.push(HighlightSpan {
837                        range: byte_start..byte_end,
838                        color: highlight_color(category, theme),
839                    });
840                }
841            }
842        }
843
844        current_offset += line_len;
845    }
846
847    // Merge adjacent spans with same color
848    merge_adjacent_highlight_spans(&mut spans);
849
850    spans
851}
852
853/// Map scope stack to highlight category (for highlight_string)
854fn scope_stack_to_category(scopes: &syntect::parsing::ScopeStack) -> Option<HighlightCategory> {
855    for scope in scopes.as_slice().iter().rev() {
856        let scope_str = scope.build_string();
857        if let Some(cat) = scope_to_category(&scope_str) {
858            return Some(cat);
859        }
860    }
861    None
862}
863
864/// Merge adjacent spans with same color
865fn merge_adjacent_highlight_spans(spans: &mut Vec<HighlightSpan>) {
866    if spans.len() < 2 {
867        return;
868    }
869
870    let mut write_idx = 0;
871    for read_idx in 1..spans.len() {
872        if spans[write_idx].color == spans[read_idx].color
873            && spans[write_idx].range.end == spans[read_idx].range.start
874        {
875            spans[write_idx].range.end = spans[read_idx].range.end;
876        } else {
877            write_idx += 1;
878            if write_idx != read_idx {
879                spans[write_idx] = spans[read_idx].clone();
880            }
881        }
882    }
883    spans.truncate(write_idx + 1);
884}
885
886#[cfg(test)]
887mod tests {
888    use crate::model::filesystem::StdFileSystem;
889    use std::sync::Arc;
890
891    fn test_fs() -> Arc<dyn crate::model::filesystem::FileSystem + Send + Sync> {
892        Arc::new(StdFileSystem)
893    }
894    use super::*;
895    use crate::view::theme;
896
897    #[test]
898    fn test_highlighter_preference_default() {
899        let pref = HighlighterPreference::default();
900        assert_eq!(pref, HighlighterPreference::Auto);
901    }
902
903    #[test]
904    fn test_highlight_engine_default() {
905        let engine = HighlightEngine::default();
906        assert!(!engine.has_highlighting());
907        assert_eq!(engine.backend_name(), "none");
908    }
909
910    #[test]
911    fn test_textmate_backend_selection() {
912        let registry =
913            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::new());
914
915        // Languages with TextMate grammars use TextMate for highlighting
916        let engine = HighlightEngine::for_file(Path::new("test.rs"), &registry);
917        assert_eq!(engine.backend_name(), "textmate");
918        // Tree-sitter language should still be detected for other features
919        assert!(engine.language().is_some());
920
921        let engine = HighlightEngine::for_file(Path::new("test.py"), &registry);
922        assert_eq!(engine.backend_name(), "textmate");
923        assert!(engine.language().is_some());
924
925        let engine = HighlightEngine::for_file(Path::new("test.js"), &registry);
926        assert_eq!(engine.backend_name(), "textmate");
927        assert!(engine.language().is_some());
928
929        // TypeScript falls back to tree-sitter (syntect doesn't include TS by default)
930        let engine = HighlightEngine::for_file(Path::new("test.ts"), &registry);
931        assert_eq!(engine.backend_name(), "tree-sitter");
932        assert!(engine.language().is_some());
933
934        let engine = HighlightEngine::for_file(Path::new("test.tsx"), &registry);
935        assert_eq!(engine.backend_name(), "tree-sitter");
936        assert!(engine.language().is_some());
937    }
938
939    #[test]
940    fn test_tree_sitter_explicit_preference() {
941        let registry =
942            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::new());
943
944        // Force tree-sitter for highlighting
945        let engine = HighlightEngine::for_file_with_preference(
946            Path::new("test.rs"),
947            &registry,
948            HighlighterPreference::TreeSitter,
949        );
950        assert_eq!(engine.backend_name(), "tree-sitter");
951    }
952
953    #[test]
954    fn test_unknown_extension() {
955        let registry =
956            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::new());
957
958        // Unknown extension
959        let engine = HighlightEngine::for_file(Path::new("test.unknown_xyz_123"), &registry);
960        // Might be none or might find something via syntect
961        // Just verify it doesn't panic
962        let _ = engine.backend_name();
963    }
964
965    #[test]
966    fn test_highlight_viewport_empty_buffer_no_panic() {
967        // Regression test: calling highlight_viewport with an empty buffer
968        // and non-zero viewport range previously caused subtraction overflow panic.
969        //
970        // The bug occurred when:
971        // - buffer is empty (len = 0)
972        // - viewport_start > context_bytes (so parse_start > 0 after saturating_sub)
973        // - parse_end = min(viewport_end + context_bytes, buffer.len()) = 0
974        // - parse_end - parse_start would underflow (0 - positive = overflow)
975        let registry =
976            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::new());
977
978        let mut engine = HighlightEngine::for_file(Path::new("test.rs"), &registry);
979
980        // Create empty buffer
981        let buffer = Buffer::from_str("", 0, test_fs());
982        let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
983
984        // Test the specific case that triggered the overflow:
985        // viewport_start=100, context_bytes=10 => parse_start=90, parse_end=0
986        // 0 - 90 = overflow!
987        if let HighlightEngine::TextMate(ref mut tm) = engine {
988            // Small context_bytes so parse_start remains > 0
989            let spans = tm.highlight_viewport(&buffer, 100, 200, &theme, 10);
990            assert!(spans.is_empty());
991        }
992    }
993
994    /// Test that TextMateEngine produces correct byte offsets for CRLF content.
995    /// This is a regression test for a bug where using str::lines() caused 1-byte
996    /// offset drift per line because it strips line terminators.
997    #[test]
998    fn test_textmate_engine_crlf_byte_offsets() {
999        let registry =
1000            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::new());
1001
1002        let mut engine = HighlightEngine::for_file(Path::new("test.java"), &registry);
1003
1004        // Create CRLF content with keywords on each line
1005        // Each "public" keyword should be highlighted at byte positions:
1006        // Line 1: "public" at bytes 0-5
1007        // Line 2: "public" at bytes 8-13 (after "public\r\n" = 8 bytes)
1008        // Line 3: "public" at bytes 16-21 (after two "public\r\n" = 16 bytes)
1009        let content = b"public\r\npublic\r\npublic\r\n";
1010        let buffer = Buffer::from_bytes(content.to_vec(), test_fs());
1011        let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
1012
1013        if let HighlightEngine::TextMate(ref mut tm) = engine {
1014            // Highlight the entire content
1015            let spans = tm.highlight_viewport(&buffer, 0, content.len(), &theme, 0);
1016
1017            // Find spans that cover keyword positions
1018            // The keyword "public" should have spans at these byte ranges:
1019            // Line 1: 0..6
1020            // Line 2: 8..14 (NOT 7..13 which would be the buggy offset)
1021            // Line 3: 16..22 (NOT 14..20 which would be the buggy offset)
1022
1023            eprintln!(
1024                "Spans: {:?}",
1025                spans.iter().map(|s| &s.range).collect::<Vec<_>>()
1026            );
1027
1028            // Check that we have spans covering the correct positions
1029            let has_span_at = |start: usize, end: usize| -> bool {
1030                spans
1031                    .iter()
1032                    .any(|s| s.range.start <= start && s.range.end >= end)
1033            };
1034
1035            // Line 1: "public" at bytes 0-6
1036            assert!(
1037                has_span_at(0, 6),
1038                "Should have span covering bytes 0-6 (line 1 'public'). Spans: {:?}",
1039                spans.iter().map(|s| &s.range).collect::<Vec<_>>()
1040            );
1041
1042            // Line 2: "public" at bytes 8-14 (after "public\r\n")
1043            // If buggy, would be at 7-13
1044            assert!(
1045                has_span_at(8, 14),
1046                "Should have span covering bytes 8-14 (line 2 'public'). \
1047                 If this fails, CRLF offset drift is occurring. Spans: {:?}",
1048                spans.iter().map(|s| &s.range).collect::<Vec<_>>()
1049            );
1050
1051            // Line 3: "public" at bytes 16-22 (after two "public\r\n")
1052            // If buggy, would be at 14-20
1053            assert!(
1054                has_span_at(16, 22),
1055                "Should have span covering bytes 16-22 (line 3 'public'). \
1056                 If this fails, CRLF offset drift is occurring. Spans: {:?}",
1057                spans.iter().map(|s| &s.range).collect::<Vec<_>>()
1058            );
1059        } else {
1060            panic!("Expected TextMate engine for .java file");
1061        }
1062    }
1063
1064    #[test]
1065    fn test_git_rebase_todo_highlighting() {
1066        let registry =
1067            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::new());
1068
1069        // git-rebase-todo files should use the Git Rebase Todo grammar
1070        let engine = HighlightEngine::for_file(Path::new("git-rebase-todo"), &registry);
1071        assert_eq!(engine.backend_name(), "textmate");
1072        assert!(engine.has_highlighting());
1073    }
1074
1075    #[test]
1076    fn test_git_commit_message_highlighting() {
1077        let registry =
1078            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::new());
1079
1080        // COMMIT_EDITMSG should use the Git Commit Message grammar
1081        let engine = HighlightEngine::for_file(Path::new("COMMIT_EDITMSG"), &registry);
1082        assert_eq!(engine.backend_name(), "textmate");
1083        assert!(engine.has_highlighting());
1084
1085        // MERGE_MSG should also work
1086        let engine = HighlightEngine::for_file(Path::new("MERGE_MSG"), &registry);
1087        assert_eq!(engine.backend_name(), "textmate");
1088        assert!(engine.has_highlighting());
1089    }
1090
1091    #[test]
1092    fn test_gitignore_highlighting() {
1093        let registry =
1094            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::new());
1095
1096        // .gitignore should use the Gitignore grammar
1097        let engine = HighlightEngine::for_file(Path::new(".gitignore"), &registry);
1098        assert_eq!(engine.backend_name(), "textmate");
1099        assert!(engine.has_highlighting());
1100
1101        // .dockerignore should also work
1102        let engine = HighlightEngine::for_file(Path::new(".dockerignore"), &registry);
1103        assert_eq!(engine.backend_name(), "textmate");
1104        assert!(engine.has_highlighting());
1105    }
1106
1107    #[test]
1108    fn test_gitconfig_highlighting() {
1109        let registry =
1110            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::new());
1111
1112        // .gitconfig should use the Git Config grammar
1113        let engine = HighlightEngine::for_file(Path::new(".gitconfig"), &registry);
1114        assert_eq!(engine.backend_name(), "textmate");
1115        assert!(engine.has_highlighting());
1116
1117        // .gitmodules should also work
1118        let engine = HighlightEngine::for_file(Path::new(".gitmodules"), &registry);
1119        assert_eq!(engine.backend_name(), "textmate");
1120        assert!(engine.has_highlighting());
1121    }
1122
1123    #[test]
1124    fn test_gitattributes_highlighting() {
1125        let registry =
1126            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::new());
1127
1128        // .gitattributes should use the Git Attributes grammar
1129        let engine = HighlightEngine::for_file(Path::new(".gitattributes"), &registry);
1130        assert_eq!(engine.backend_name(), "textmate");
1131        assert!(engine.has_highlighting());
1132    }
1133}