Skip to main content

fresh/primitives/
highlight_engine.rs

1//! Unified highlighting engine
2//!
3//! This module provides a unified abstraction over different highlighting backends:
4//! - TextMate grammars via syntect (default for highlighting)
5//! - Tree-sitter (available via explicit preference, also used for non-highlighting features)
6//!
7//! # Backend Selection
8//! By default, syntect/TextMate is used for syntax highlighting because it provides
9//! broader language coverage. Tree-sitter language detection is still performed
10//! to support non-highlighting features like auto-indentation and semantic highlighting.
11//!
12//! # Non-Highlighting Features
13//! Even when using TextMate for highlighting, tree-sitter `Language` is detected
14//! and available via `.language()` for:
15//! - Auto-indentation (via IndentCalculator)
16//! - Semantic highlighting (variable scope tracking)
17//! - Other syntax-aware features
18
19use crate::model::buffer::Buffer;
20use crate::model::marker::{MarkerId, MarkerList};
21use crate::primitives::grammar::GrammarRegistry;
22use crate::primitives::highlighter::{
23    highlight_color, HighlightCategory, HighlightSpan, Highlighter, Language,
24};
25use crate::view::theme::Theme;
26use std::collections::HashMap;
27use std::ops::Range;
28use std::path::Path;
29use std::sync::Arc;
30use syntect::parsing::SyntaxSet;
31
32/// Map TextMate scope to highlight category
33fn scope_to_category(scope: &str) -> Option<HighlightCategory> {
34    let scope_lower = scope.to_lowercase();
35
36    // Comments - highest priority
37    if scope_lower.starts_with("comment") {
38        return Some(HighlightCategory::Comment);
39    }
40
41    // Strings
42    if scope_lower.starts_with("string") {
43        return Some(HighlightCategory::String);
44    }
45
46    // Markdown/markup scopes - handle before generic keyword/punctuation checks
47    // See: https://macromates.com/manual/en/language_grammars (TextMate scope naming)
48    // Headings: markup.heading and entity.name.section (used by syntect's markdown grammar)
49    if scope_lower.starts_with("markup.heading") || scope_lower.starts_with("entity.name.section") {
50        return Some(HighlightCategory::Keyword); // Headers styled like keywords (bold, prominent)
51    }
52    // Bold: markup.bold
53    if scope_lower.starts_with("markup.bold") {
54        return Some(HighlightCategory::Constant); // Bold styled like constants (bright)
55    }
56    // Italic: markup.italic
57    if scope_lower.starts_with("markup.italic") {
58        return Some(HighlightCategory::Variable); // Italic styled like variables
59    }
60    // Inline code and code blocks: markup.raw, markup.inline.raw
61    if scope_lower.starts_with("markup.raw") || scope_lower.starts_with("markup.inline.raw") {
62        return Some(HighlightCategory::String); // Code styled like strings
63    }
64    // Links: markup.underline.link
65    if scope_lower.starts_with("markup.underline.link") {
66        return Some(HighlightCategory::Function); // Links styled like functions (distinct color)
67    }
68    // Generic underline (often links)
69    if scope_lower.starts_with("markup.underline") {
70        return Some(HighlightCategory::Function);
71    }
72    // Block quotes: markup.quote
73    if scope_lower.starts_with("markup.quote") {
74        return Some(HighlightCategory::Comment); // Quotes styled like comments (subdued)
75    }
76    // Lists: markup.list
77    if scope_lower.starts_with("markup.list") {
78        return Some(HighlightCategory::Operator); // List markers styled like operators
79    }
80    // Strikethrough: markup.strikethrough
81    if scope_lower.starts_with("markup.strikethrough") {
82        return Some(HighlightCategory::Comment); // Strikethrough styled subdued
83    }
84
85    // Keywords
86    if scope_lower.starts_with("keyword.control")
87        || scope_lower.starts_with("keyword.other")
88        || scope_lower.starts_with("keyword.declaration")
89        || scope_lower.starts_with("keyword")
90    {
91        // keyword.operator should map to Operator, not Keyword
92        if !scope_lower.starts_with("keyword.operator") {
93            return Some(HighlightCategory::Keyword);
94        }
95    }
96
97    // Punctuation that belongs to a parent construct (comment/string delimiters)
98    // These must be checked before the generic punctuation rule below.
99    // TextMate grammars assign e.g. `punctuation.definition.comment` to # // /* etc.
100    if scope_lower.starts_with("punctuation.definition.comment") {
101        return Some(HighlightCategory::Comment);
102    }
103    if scope_lower.starts_with("punctuation.definition.string") {
104        return Some(HighlightCategory::String);
105    }
106
107    // Operators (keyword.operator only)
108    if scope_lower.starts_with("keyword.operator") {
109        return Some(HighlightCategory::Operator);
110    }
111
112    // Punctuation brackets ({, }, (, ), [, ], <, >)
113    // Covers punctuation.section.*, punctuation.bracket.*,
114    // and punctuation.definition.{array,block,brackets,group,inline-table,section,table,tag}
115    if scope_lower.starts_with("punctuation.section")
116        || scope_lower.starts_with("punctuation.bracket")
117        || scope_lower.starts_with("punctuation.definition.array")
118        || scope_lower.starts_with("punctuation.definition.block")
119        || scope_lower.starts_with("punctuation.definition.brackets")
120        || scope_lower.starts_with("punctuation.definition.group")
121        || scope_lower.starts_with("punctuation.definition.inline-table")
122        || scope_lower.starts_with("punctuation.definition.section")
123        || scope_lower.starts_with("punctuation.definition.table")
124        || scope_lower.starts_with("punctuation.definition.tag")
125    {
126        return Some(HighlightCategory::PunctuationBracket);
127    }
128
129    // Punctuation delimiters (;, ,, .)
130    if scope_lower.starts_with("punctuation.separator")
131        || scope_lower.starts_with("punctuation.terminator")
132        || scope_lower.starts_with("punctuation.accessor")
133    {
134        return Some(HighlightCategory::PunctuationDelimiter);
135    }
136
137    // Functions
138    if scope_lower.starts_with("entity.name.function")
139        || scope_lower.starts_with("support.function")
140        || scope_lower.starts_with("meta.function-call")
141        || scope_lower.starts_with("variable.function")
142    {
143        return Some(HighlightCategory::Function);
144    }
145
146    // Types
147    if scope_lower.starts_with("entity.name.type")
148        || scope_lower.starts_with("entity.name.class")
149        || scope_lower.starts_with("entity.name.struct")
150        || scope_lower.starts_with("entity.name.enum")
151        || scope_lower.starts_with("entity.name.interface")
152        || scope_lower.starts_with("entity.name.trait")
153        || scope_lower.starts_with("support.type")
154        || scope_lower.starts_with("support.class")
155        || scope_lower.starts_with("storage.type")
156    {
157        return Some(HighlightCategory::Type);
158    }
159
160    // Storage modifiers (pub, static, const as keywords)
161    if scope_lower.starts_with("storage.modifier") {
162        return Some(HighlightCategory::Keyword);
163    }
164
165    // Constants and numbers
166    if scope_lower.starts_with("constant.numeric")
167        || scope_lower.starts_with("constant.language.boolean")
168    {
169        return Some(HighlightCategory::Number);
170    }
171    if scope_lower.starts_with("constant") {
172        return Some(HighlightCategory::Constant);
173    }
174
175    // Variables
176    if scope_lower.starts_with("variable.parameter")
177        || scope_lower.starts_with("variable.other")
178        || scope_lower.starts_with("variable.language")
179    {
180        return Some(HighlightCategory::Variable);
181    }
182
183    // Properties / object keys
184    if scope_lower.starts_with("entity.name.tag")
185        || scope_lower.starts_with("support.other.property")
186        || scope_lower.starts_with("meta.object-literal.key")
187        || scope_lower.starts_with("variable.other.property")
188        || scope_lower.starts_with("variable.other.object.property")
189    {
190        return Some(HighlightCategory::Property);
191    }
192
193    // Attributes (decorators, annotations)
194    if scope_lower.starts_with("entity.other.attribute")
195        || scope_lower.starts_with("meta.attribute")
196        || scope_lower.starts_with("entity.name.decorator")
197    {
198        return Some(HighlightCategory::Attribute);
199    }
200
201    // Generic variable fallback
202    if scope_lower.starts_with("variable") {
203        return Some(HighlightCategory::Variable);
204    }
205
206    None
207}
208
209/// Unified highlighting engine supporting multiple backends
210#[derive(Default)]
211pub enum HighlightEngine {
212    /// Tree-sitter based highlighting (built-in languages)
213    TreeSitter(Box<Highlighter>),
214    /// TextMate grammar based highlighting
215    TextMate(Box<TextMateEngine>),
216    /// No highlighting available
217    #[default]
218    None,
219}
220
221/// TextMate highlighting engine with marker-based parse state checkpoints.
222///
223/// Syntect's parser is a sequential state machine that must process text from the
224/// start of the file to correctly track embedded language transitions (e.g. CSS
225/// inside HTML `<style>` tags).
226///
227/// Checkpoint positions are stored as markers in an internal `MarkerList` which
228/// automatically adjusts byte offsets when the buffer is edited. The associated
229/// `ParseState` + `ScopeStack` are stored in a side `HashMap`.
230///
231/// On edit, checkpoint positions auto-adjust and a `dirty_from` marker is set.
232/// On the next render, a convergence walk re-parses from the checkpoint before
233/// the dirty point forward, stopping as soon as the new parse state matches an
234/// existing checkpoint's stored state (VSCode-style convergence). This means
235/// most single-character edits only re-parse 1-2 checkpoints (~500 bytes).
236///
237/// For large files where no checkpoint reaches the viewport, we fall back to a
238/// fresh `ParseState` from `context_bytes` before the viewport.
239pub struct TextMateEngine {
240    syntax_set: Arc<SyntaxSet>,
241    syntax_index: usize,
242    /// Marker-based checkpoint positions. Markers auto-adjust on buffer edits.
243    checkpoint_markers: MarkerList,
244    /// Parse state stored per checkpoint marker.
245    checkpoint_states:
246        HashMap<MarkerId, (syntect::parsing::ParseState, syntect::parsing::ScopeStack)>,
247    /// Earliest byte offset where an edit may have invalidated parse state.
248    /// Consumed during the next highlight_viewport call.
249    dirty_from: Option<usize>,
250    /// Cached highlight spans for the last rendered viewport.
251    cache: Option<TextMateCache>,
252    last_buffer_len: usize,
253    /// Tree-sitter language for non-highlighting features (indentation, semantic highlighting)
254    ts_language: Option<Language>,
255    /// Performance counters for testing and diagnostics.
256    stats: HighlightStats,
257}
258
259/// Counters for monitoring highlighting performance in tests.
260#[derive(Debug, Default, Clone)]
261pub struct HighlightStats {
262    /// Number of bytes parsed by syntect (total across all highlight_viewport calls).
263    pub bytes_parsed: usize,
264    /// Number of highlight_viewport calls that hit the span cache.
265    pub cache_hits: usize,
266    /// Number of highlight_viewport calls that missed the cache and re-parsed.
267    pub cache_misses: usize,
268    /// Number of checkpoint states updated during convergence.
269    pub checkpoints_updated: usize,
270    /// Number of times convergence was detected (state matched existing checkpoint).
271    pub convergences: usize,
272}
273
274#[derive(Debug, Clone)]
275struct TextMateCache {
276    range: Range<usize>,
277    spans: Vec<CachedSpan>,
278}
279
280#[derive(Debug, Clone)]
281struct CachedSpan {
282    range: Range<usize>,
283    category: crate::primitives::highlighter::HighlightCategory,
284}
285
286/// Maximum bytes to parse in a single operation
287const MAX_PARSE_BYTES: usize = 1024 * 1024;
288
289/// Interval between parse state checkpoints (in bytes).
290/// 256 bytes ≈ every 4-8 lines of code. Convergence checks happen at each
291/// checkpoint, so smaller intervals mean faster convergence after edits.
292/// A 200KB file produces ~800 markers — well within MarkerList's O(log n) range.
293const CHECKPOINT_INTERVAL: usize = 256;
294
295impl TextMateEngine {
296    /// Create a new TextMate engine for the given syntax
297    pub fn new(syntax_set: Arc<SyntaxSet>, syntax_index: usize) -> Self {
298        Self {
299            syntax_set,
300            syntax_index,
301            checkpoint_markers: MarkerList::new(),
302            checkpoint_states: HashMap::new(),
303            dirty_from: None,
304            cache: None,
305            last_buffer_len: 0,
306            ts_language: None,
307            stats: HighlightStats::default(),
308        }
309    }
310
311    /// Create a new TextMate engine with a tree-sitter language for non-highlighting features
312    pub fn with_language(
313        syntax_set: Arc<SyntaxSet>,
314        syntax_index: usize,
315        ts_language: Option<Language>,
316    ) -> Self {
317        Self {
318            syntax_set,
319            syntax_index,
320            checkpoint_markers: MarkerList::new(),
321            checkpoint_states: HashMap::new(),
322            dirty_from: None,
323            cache: None,
324            last_buffer_len: 0,
325            ts_language,
326            stats: HighlightStats::default(),
327        }
328    }
329
330    /// Get performance stats for testing and diagnostics.
331    pub fn stats(&self) -> &HighlightStats {
332        &self.stats
333    }
334
335    /// Reset performance counters.
336    pub fn reset_stats(&mut self) {
337        self.stats = HighlightStats::default();
338    }
339
340    /// Get the tree-sitter language (for indentation, semantic highlighting, etc.)
341    pub fn language(&self) -> Option<&Language> {
342        self.ts_language.as_ref()
343    }
344
345    /// Notify the checkpoint system of a buffer insert. Markers auto-adjust positions.
346    /// Also shifts cached span byte offsets after the insert point so the span cache
347    /// remains valid for the partial-update / convergence path.
348    pub fn notify_insert(&mut self, position: usize, length: usize) {
349        self.checkpoint_markers.adjust_for_insert(position, length);
350        self.dirty_from = Some(self.dirty_from.map_or(position, |d| d.min(position)));
351        // Shift cached spans after the insert point
352        if let Some(cache) = &mut self.cache {
353            for span in &mut cache.spans {
354                if span.range.start >= position {
355                    span.range.start += length;
356                    span.range.end += length;
357                } else if span.range.end > position {
358                    // Span straddles the insert point — extend its end
359                    span.range.end += length;
360                }
361            }
362            if cache.range.end >= position {
363                cache.range.end += length;
364            }
365        }
366    }
367
368    /// Notify the checkpoint system of a buffer delete. Markers auto-adjust positions.
369    /// Also adjusts cached span byte offsets after the delete point.
370    pub fn notify_delete(&mut self, position: usize, length: usize) {
371        self.checkpoint_markers.adjust_for_delete(position, length);
372        self.dirty_from = Some(self.dirty_from.map_or(position, |d| d.min(position)));
373        // Adjust cached spans after the delete point
374        if let Some(cache) = &mut self.cache {
375            let delete_end = position + length;
376            cache.spans.retain_mut(|span| {
377                if span.range.start >= delete_end {
378                    // Span is entirely after the delete — shift back
379                    span.range.start -= length;
380                    span.range.end -= length;
381                    true
382                } else if span.range.end <= position {
383                    // Span is entirely before the delete — unchanged
384                    true
385                } else if span.range.start >= position && span.range.end <= delete_end {
386                    // Span is entirely within the deleted region — remove it
387                    false
388                } else {
389                    // Span partially overlaps — clamp and adjust
390                    if span.range.start < position {
391                        span.range.end = position.min(span.range.end);
392                    } else {
393                        span.range.start = position;
394                        span.range.end = position + span.range.end.saturating_sub(delete_end);
395                    }
396                    span.range.start < span.range.end
397                }
398            });
399            if cache.range.end > delete_end {
400                cache.range.end -= length;
401            } else if cache.range.end > position {
402                cache.range.end = position;
403            }
404        }
405    }
406
407    /// Highlight the visible viewport range.
408    ///
409    /// If the span cache is valid and there are no dirty edits, returns cached spans.
410    /// If there are dirty edits, re-parses only from the dirty point until convergence
411    /// (parse state matches an existing checkpoint), then splices the new spans into
412    /// the cache. This means most single-character edits only re-parse ~256-512 bytes.
413    pub fn highlight_viewport(
414        &mut self,
415        buffer: &Buffer,
416        viewport_start: usize,
417        viewport_end: usize,
418        theme: &Theme,
419        context_bytes: usize,
420    ) -> Vec<HighlightSpan> {
421        let desired_parse_start = viewport_start.saturating_sub(context_bytes);
422        let parse_end = (viewport_end + context_bytes).min(buffer.len());
423
424        // Check cache state. For a pure cache hit (no dirty edits), we also
425        // require buffer length to match. For partial updates (dirty_from set),
426        // we only need the cache to cover the viewport — the buffer length
427        // changed due to the edit, but we'll splice the dirty region.
428        let dirty = self.dirty_from.take();
429        let cache_covers_viewport = self.cache.as_ref().is_some_and(|c| {
430            c.range.start <= desired_parse_start && c.range.end >= desired_parse_start
431        });
432        let exact_cache_hit = cache_covers_viewport
433            && dirty.is_none()
434            && self.last_buffer_len == buffer.len()
435            && self
436                .cache
437                .as_ref()
438                .is_some_and(|c| c.range.end >= parse_end);
439
440        if exact_cache_hit {
441            // Pure cache hit — no dirty edits, cache covers viewport
442            self.stats.cache_hits += 1;
443            return self.filter_cached_spans(viewport_start, viewport_end, theme);
444        }
445
446        if cache_covers_viewport && dirty.is_some() {
447            if let Some(dirty_pos) = dirty {
448                if dirty_pos < parse_end {
449                    // Partial update: re-parse from dirty point until convergence,
450                    // splice new spans into existing cache
451                    if let Some(result) = self.try_partial_update(
452                        buffer,
453                        dirty_pos,
454                        desired_parse_start,
455                        parse_end,
456                        viewport_start,
457                        viewport_end,
458                        theme,
459                    ) {
460                        return result;
461                    }
462                    // Convergence failed within parse range — fall through to full re-parse
463                } else {
464                    // Dirty region beyond viewport — cache is still valid
465                    self.dirty_from = Some(dirty_pos);
466                    self.stats.cache_hits += 1;
467                    return self.filter_cached_spans(viewport_start, viewport_end, theme);
468                }
469            }
470        } else if let Some(d) = dirty {
471            // No usable cache and dirty — put dirty back, will do full parse
472            self.dirty_from = Some(d);
473        }
474
475        // Full re-parse (cold start or convergence failed)
476        self.full_parse(
477            buffer,
478            desired_parse_start,
479            parse_end,
480            viewport_start,
481            viewport_end,
482            theme,
483            context_bytes,
484        )
485    }
486
487    /// Filter cached spans for the viewport and resolve colors.
488    fn filter_cached_spans(
489        &self,
490        viewport_start: usize,
491        viewport_end: usize,
492        theme: &Theme,
493    ) -> Vec<HighlightSpan> {
494        let cache = self.cache.as_ref().unwrap();
495        cache
496            .spans
497            .iter()
498            .filter(|span| span.range.start < viewport_end && span.range.end > viewport_start)
499            .map(|span| HighlightSpan {
500                range: span.range.clone(),
501                color: highlight_color(span.category, theme),
502                category: Some(span.category),
503            })
504            .collect()
505    }
506
507    /// Try to do a partial update: re-parse from the dirty point until convergence,
508    /// then splice new spans into the cache. Returns None if convergence doesn't
509    /// happen within parse_end (caller should fall back to full re-parse).
510    #[allow(clippy::too_many_arguments)]
511    fn try_partial_update(
512        &mut self,
513        buffer: &Buffer,
514        dirty_pos: usize,
515        desired_parse_start: usize,
516        parse_end: usize,
517        viewport_start: usize,
518        viewport_end: usize,
519        theme: &Theme,
520    ) -> Option<Vec<HighlightSpan>> {
521        let syntax = &self.syntax_set.syntaxes()[self.syntax_index];
522
523        // Find checkpoint before the dirty point (bounded search)
524        let (actual_start, mut state, mut current_scopes) = {
525            let search_start = dirty_pos.saturating_sub(MAX_PARSE_BYTES);
526            let markers = self.checkpoint_markers.query_range(search_start, dirty_pos);
527            let nearest = markers.into_iter().max_by_key(|(_, start, _)| *start);
528            if let Some((id, cp_pos, _)) = nearest {
529                if let Some((s, sc)) = self.checkpoint_states.get(&id) {
530                    (cp_pos, s.clone(), sc.clone())
531                } else {
532                    return None; // orphan, fall back
533                }
534            } else if parse_end <= MAX_PARSE_BYTES {
535                (
536                    0,
537                    syntect::parsing::ParseState::new(syntax),
538                    syntect::parsing::ScopeStack::new(),
539                )
540            } else {
541                return None; // large file, no nearby checkpoint, fall back
542            }
543        };
544
545        // Get markers from dirty point forward for convergence checking
546        let mut markers_ahead: Vec<(MarkerId, usize)> = self
547            .checkpoint_markers
548            .query_range(dirty_pos, parse_end)
549            .into_iter()
550            .map(|(id, start, _)| (id, start))
551            .collect();
552        markers_ahead.sort_by_key(|(_, pos)| *pos);
553        let mut marker_idx = 0;
554
555        // Parse from actual_start to parse_end, looking for convergence
556        let content_end = parse_end.min(buffer.len());
557        if actual_start >= content_end {
558            return None;
559        }
560        let content = buffer.slice_bytes(actual_start..content_end);
561        let content_str = match std::str::from_utf8(&content) {
562            Ok(s) => s,
563            Err(_) => return None,
564        };
565
566        let mut new_spans = Vec::new();
567        let content_bytes = content_str.as_bytes();
568        let mut pos = 0;
569        let mut current_offset = actual_start;
570        let mut converged_at: Option<usize> = None;
571        let mut bytes_since_checkpoint: usize = 0;
572
573        while pos < content_bytes.len() {
574            // Create checkpoints in new territory
575            if bytes_since_checkpoint >= CHECKPOINT_INTERVAL {
576                let nearby = self.checkpoint_markers.query_range(
577                    current_offset.saturating_sub(CHECKPOINT_INTERVAL / 2),
578                    current_offset + CHECKPOINT_INTERVAL / 2,
579                );
580                if nearby.is_empty() {
581                    let marker_id = self.checkpoint_markers.create(current_offset, true);
582                    self.checkpoint_states
583                        .insert(marker_id, (state.clone(), current_scopes.clone()));
584                }
585                bytes_since_checkpoint = 0;
586            }
587
588            let line_start = pos;
589            let mut line_end = pos;
590            while line_end < content_bytes.len() {
591                if content_bytes[line_end] == b'\n' {
592                    line_end += 1;
593                    break;
594                } else if content_bytes[line_end] == b'\r' {
595                    if line_end + 1 < content_bytes.len() && content_bytes[line_end + 1] == b'\n' {
596                        line_end += 2;
597                    } else {
598                        line_end += 1;
599                    }
600                    break;
601                }
602                line_end += 1;
603            }
604
605            let line_bytes = &content_bytes[line_start..line_end];
606            let actual_line_byte_len = line_bytes.len();
607
608            let line_str = match std::str::from_utf8(line_bytes) {
609                Ok(s) => s,
610                Err(_) => {
611                    pos = line_end;
612                    current_offset += actual_line_byte_len;
613                    bytes_since_checkpoint += actual_line_byte_len;
614                    continue;
615                }
616            };
617
618            let line_content = line_str.trim_end_matches(&['\r', '\n'][..]);
619            let line_for_syntect = if line_end < content_bytes.len() || line_str.ends_with('\n') {
620                format!("{}\n", line_content)
621            } else {
622                line_content.to_string()
623            };
624
625            let ops = match state.parse_line(&line_for_syntect, &self.syntax_set) {
626                Ok(ops) => ops,
627                Err(_) => {
628                    pos = line_end;
629                    current_offset += actual_line_byte_len;
630                    bytes_since_checkpoint += actual_line_byte_len;
631                    continue;
632                }
633            };
634
635            // Collect spans for the dirty region
636            let collect_spans =
637                current_offset + actual_line_byte_len > desired_parse_start.max(actual_start);
638            let mut syntect_offset = 0;
639            let line_content_len = line_content.len();
640
641            for (op_offset, op) in ops {
642                let clamped_op_offset = op_offset.min(line_content_len);
643                if collect_spans && clamped_op_offset > syntect_offset {
644                    if let Some(category) = Self::scope_stack_to_category(&current_scopes) {
645                        let byte_start = current_offset + syntect_offset;
646                        let byte_end = current_offset + clamped_op_offset;
647                        let clamped_start = byte_start.max(actual_start);
648                        if clamped_start < byte_end {
649                            new_spans.push(CachedSpan {
650                                range: clamped_start..byte_end,
651                                category,
652                            });
653                        }
654                    }
655                }
656                syntect_offset = clamped_op_offset;
657                #[allow(clippy::let_underscore_must_use)]
658                let _ = current_scopes.apply(&op);
659            }
660
661            if collect_spans && syntect_offset < line_content_len {
662                if let Some(category) = Self::scope_stack_to_category(&current_scopes) {
663                    let byte_start = current_offset + syntect_offset;
664                    let byte_end = current_offset + line_content_len;
665                    let clamped_start = byte_start.max(actual_start);
666                    if clamped_start < byte_end {
667                        new_spans.push(CachedSpan {
668                            range: clamped_start..byte_end,
669                            category,
670                        });
671                    }
672                }
673            }
674
675            pos = line_end;
676            current_offset += actual_line_byte_len;
677            bytes_since_checkpoint += actual_line_byte_len;
678
679            // Check convergence at checkpoint markers
680            while marker_idx < markers_ahead.len() && markers_ahead[marker_idx].1 <= current_offset
681            {
682                let (marker_id, _) = markers_ahead[marker_idx];
683                marker_idx += 1;
684                if let Some(stored) = self.checkpoint_states.get(&marker_id) {
685                    if *stored == (state.clone(), current_scopes.clone()) {
686                        self.stats.convergences += 1;
687                        converged_at = Some(current_offset);
688                        break;
689                    }
690                }
691                self.stats.checkpoints_updated += 1;
692                self.checkpoint_states
693                    .insert(marker_id, (state.clone(), current_scopes.clone()));
694            }
695
696            if converged_at.is_some() {
697                break;
698            }
699        }
700
701        self.stats.bytes_parsed += current_offset.saturating_sub(actual_start);
702
703        let convergence_point = converged_at?; // None → fall back to full parse
704
705        self.stats.cache_misses += 1; // partial update counts as a miss
706
707        // Splice: replace spans in [actual_start..convergence_point] with new_spans,
708        // keep everything outside that range from the existing cache.
709        Self::merge_adjacent_spans(&mut new_spans);
710
711        if let Some(cache) = &mut self.cache {
712            // Remove old spans that overlap the re-parsed region
713            let splice_start = actual_start;
714            let splice_end = convergence_point;
715            cache
716                .spans
717                .retain(|span| span.range.end <= splice_start || span.range.start >= splice_end);
718            // Insert new spans and re-sort by range start
719            cache.spans.extend(new_spans);
720            cache.spans.sort_by_key(|s| s.range.start);
721            Self::merge_adjacent_spans(&mut cache.spans);
722        }
723
724        self.last_buffer_len = buffer.len();
725
726        Some(self.filter_cached_spans(viewport_start, viewport_end, theme))
727    }
728
729    /// Full re-parse from desired_parse_start to parse_end. Used on cold start
730    /// or when partial update fails (no convergence).
731    #[allow(clippy::too_many_arguments)]
732    fn full_parse(
733        &mut self,
734        buffer: &Buffer,
735        desired_parse_start: usize,
736        parse_end: usize,
737        viewport_start: usize,
738        viewport_end: usize,
739        theme: &Theme,
740        _context_bytes: usize,
741    ) -> Vec<HighlightSpan> {
742        self.stats.cache_misses += 1;
743        self.dirty_from = None; // consumed
744
745        if parse_end <= desired_parse_start {
746            return Vec::new();
747        }
748
749        let syntax = &self.syntax_set.syntaxes()[self.syntax_index];
750        let (actual_start, mut state, mut current_scopes, create_checkpoints) =
751            self.find_parse_resume_point(desired_parse_start, parse_end, syntax);
752
753        let content = buffer.slice_bytes(actual_start..parse_end);
754        let content_str = match std::str::from_utf8(&content) {
755            Ok(s) => s,
756            Err(_) => return Vec::new(),
757        };
758
759        let mut spans = Vec::new();
760        let content_bytes = content_str.as_bytes();
761        let mut pos = 0;
762        let mut current_offset = actual_start;
763        let mut bytes_since_checkpoint: usize = 0;
764
765        while pos < content_bytes.len() {
766            if create_checkpoints && bytes_since_checkpoint >= CHECKPOINT_INTERVAL {
767                let nearby = self.checkpoint_markers.query_range(
768                    current_offset.saturating_sub(CHECKPOINT_INTERVAL / 2),
769                    current_offset + CHECKPOINT_INTERVAL / 2,
770                );
771                if nearby.is_empty() {
772                    let marker_id = self.checkpoint_markers.create(current_offset, true);
773                    self.checkpoint_states
774                        .insert(marker_id, (state.clone(), current_scopes.clone()));
775                }
776                bytes_since_checkpoint = 0;
777            }
778
779            let line_start = pos;
780            let mut line_end = pos;
781
782            while line_end < content_bytes.len() {
783                if content_bytes[line_end] == b'\n' {
784                    line_end += 1;
785                    break;
786                } else if content_bytes[line_end] == b'\r' {
787                    if line_end + 1 < content_bytes.len() && content_bytes[line_end + 1] == b'\n' {
788                        line_end += 2;
789                    } else {
790                        line_end += 1;
791                    }
792                    break;
793                }
794                line_end += 1;
795            }
796
797            let line_bytes = &content_bytes[line_start..line_end];
798            let actual_line_byte_len = line_bytes.len();
799
800            let line_str = match std::str::from_utf8(line_bytes) {
801                Ok(s) => s,
802                Err(_) => {
803                    pos = line_end;
804                    current_offset += actual_line_byte_len;
805                    bytes_since_checkpoint += actual_line_byte_len;
806                    continue;
807                }
808            };
809
810            let line_content = line_str.trim_end_matches(&['\r', '\n'][..]);
811            let line_for_syntect = if line_end < content_bytes.len() || line_str.ends_with('\n') {
812                format!("{}\n", line_content)
813            } else {
814                line_content.to_string()
815            };
816
817            let ops = match state.parse_line(&line_for_syntect, &self.syntax_set) {
818                Ok(ops) => ops,
819                Err(_) => {
820                    pos = line_end;
821                    current_offset += actual_line_byte_len;
822                    bytes_since_checkpoint += actual_line_byte_len;
823                    continue;
824                }
825            };
826
827            let collect_spans = current_offset + actual_line_byte_len > desired_parse_start;
828            let mut syntect_offset = 0;
829            let line_content_len = line_content.len();
830
831            for (op_offset, op) in ops {
832                let clamped_op_offset = op_offset.min(line_content_len);
833                if collect_spans && clamped_op_offset > syntect_offset {
834                    if let Some(category) = Self::scope_stack_to_category(&current_scopes) {
835                        let byte_start = current_offset + syntect_offset;
836                        let byte_end = current_offset + clamped_op_offset;
837                        let clamped_start = byte_start.max(desired_parse_start);
838                        if clamped_start < byte_end {
839                            spans.push(CachedSpan {
840                                range: clamped_start..byte_end,
841                                category,
842                            });
843                        }
844                    }
845                }
846                syntect_offset = clamped_op_offset;
847                #[allow(clippy::let_underscore_must_use)]
848                let _ = current_scopes.apply(&op);
849            }
850
851            if collect_spans && syntect_offset < line_content_len {
852                if let Some(category) = Self::scope_stack_to_category(&current_scopes) {
853                    let byte_start = current_offset + syntect_offset;
854                    let byte_end = current_offset + line_content_len;
855                    let clamped_start = byte_start.max(desired_parse_start);
856                    if clamped_start < byte_end {
857                        spans.push(CachedSpan {
858                            range: clamped_start..byte_end,
859                            category,
860                        });
861                    }
862                }
863            }
864
865            pos = line_end;
866            current_offset += actual_line_byte_len;
867            bytes_since_checkpoint += actual_line_byte_len;
868
869            // Update checkpoint states as we pass them
870            let markers_here: Vec<(MarkerId, usize)> = self
871                .checkpoint_markers
872                .query_range(
873                    current_offset.saturating_sub(actual_line_byte_len),
874                    current_offset,
875                )
876                .into_iter()
877                .map(|(id, start, _)| (id, start))
878                .collect();
879            for (marker_id, _) in markers_here {
880                self.checkpoint_states
881                    .insert(marker_id, (state.clone(), current_scopes.clone()));
882            }
883        }
884
885        self.stats.bytes_parsed += parse_end.saturating_sub(actual_start);
886
887        Self::merge_adjacent_spans(&mut spans);
888
889        self.cache = Some(TextMateCache {
890            range: desired_parse_start..parse_end,
891            spans: spans.clone(),
892        });
893        self.last_buffer_len = buffer.len();
894
895        spans
896            .into_iter()
897            .filter(|span| span.range.start < viewport_end && span.range.end > viewport_start)
898            .map(|span| {
899                let cat = span.category;
900                HighlightSpan {
901                    range: span.range,
902                    color: highlight_color(cat, theme),
903                    category: Some(cat),
904                }
905            })
906            .collect()
907    }
908
909    /// Find the best point to resume parsing from for the viewport.
910    fn find_parse_resume_point(
911        &self,
912        desired_start: usize,
913        parse_end: usize,
914        syntax: &syntect::parsing::SyntaxReference,
915    ) -> (
916        usize,
917        syntect::parsing::ParseState,
918        syntect::parsing::ScopeStack,
919        bool,
920    ) {
921        use syntect::parsing::{ParseState, ScopeStack};
922
923        // Look for a checkpoint near the desired start. For large files, only
924        // consider checkpoints that are within MAX_PARSE_BYTES of desired_start
925        // to avoid parsing hundreds of MB from a distant checkpoint.
926        let search_start = desired_start.saturating_sub(MAX_PARSE_BYTES);
927        let markers = self
928            .checkpoint_markers
929            .query_range(search_start, desired_start + 1);
930        let nearest = markers.into_iter().max_by_key(|(_, start, _)| *start);
931
932        if let Some((id, cp_pos, _)) = nearest {
933            if let Some((s, sc)) = self.checkpoint_states.get(&id) {
934                return (cp_pos, s.clone(), sc.clone(), true);
935            }
936        }
937
938        if parse_end <= MAX_PARSE_BYTES {
939            // File is small enough to parse from byte 0
940            (0, ParseState::new(syntax), ScopeStack::new(), true)
941        } else {
942            // Large file, no nearby checkpoint — start fresh from desired_start.
943            // Still create checkpoints so future visits to this region can resume.
944            (
945                desired_start,
946                ParseState::new(syntax),
947                ScopeStack::new(),
948                true,
949            )
950        }
951    }
952
953    /// Map scope stack to highlight category
954    fn scope_stack_to_category(scopes: &syntect::parsing::ScopeStack) -> Option<HighlightCategory> {
955        for scope in scopes.as_slice().iter().rev() {
956            let scope_str = scope.build_string();
957            if let Some(cat) = scope_to_category(&scope_str) {
958                return Some(cat);
959            }
960        }
961        None
962    }
963
964    /// Merge adjacent spans with same category
965    fn merge_adjacent_spans(spans: &mut Vec<CachedSpan>) {
966        if spans.len() < 2 {
967            return;
968        }
969
970        let mut write_idx = 0;
971        for read_idx in 1..spans.len() {
972            if spans[write_idx].category == spans[read_idx].category
973                && spans[write_idx].range.end == spans[read_idx].range.start
974            {
975                spans[write_idx].range.end = spans[read_idx].range.end;
976            } else {
977                write_idx += 1;
978                if write_idx != read_idx {
979                    spans[write_idx] = spans[read_idx].clone();
980                }
981            }
982        }
983        spans.truncate(write_idx + 1);
984    }
985
986    /// Invalidate span cache for an edited range.
987    /// Checkpoint positions are handled by notify_insert/notify_delete.
988    /// The span cache is NOT cleared here — it will be patched (partial update)
989    /// during the next highlight_viewport call using convergence. Only dirty_from
990    /// (set by notify_insert/notify_delete) controls re-parsing scope.
991    pub fn invalidate_range(&mut self, _edit_range: Range<usize>) {
992        // Intentionally does NOT clear self.cache.
993        // The cache will be partially updated in highlight_viewport when
994        // dirty_from is set. This avoids full re-parses for small edits.
995    }
996
997    /// Invalidate all cache and checkpoints (file reload, language change, etc.)
998    pub fn invalidate_all(&mut self) {
999        self.cache = None;
1000        let ids: Vec<MarkerId> = self.checkpoint_states.keys().copied().collect();
1001        for id in ids {
1002            self.checkpoint_markers.delete(id);
1003        }
1004        self.checkpoint_states.clear();
1005        self.dirty_from = None;
1006    }
1007
1008    /// Get the highlight category at a byte position from the cache.
1009    ///
1010    /// Returns the category if the position falls within a cached highlight span.
1011    /// The position must be within the last highlighted viewport range for a result.
1012    pub fn category_at_position(&self, position: usize) -> Option<HighlightCategory> {
1013        let cache = self.cache.as_ref()?;
1014        cache
1015            .spans
1016            .iter()
1017            .find(|span| span.range.start <= position && position < span.range.end)
1018            .map(|span| span.category)
1019    }
1020
1021    /// Get syntax name
1022    pub fn syntax_name(&self) -> &str {
1023        &self.syntax_set.syntaxes()[self.syntax_index].name
1024    }
1025}
1026
1027/// Find the index of a syntax by name in a syntax set.
1028fn syntax_index(syntax_set: &syntect::parsing::SyntaxSet, name: &str) -> Option<usize> {
1029    syntax_set.syntaxes().iter().position(|s| s.name == name)
1030}
1031
1032impl HighlightEngine {
1033    /// Create a highlighting engine for a file.
1034    ///
1035    /// Uses TextMate/syntect for highlighting (broadest language coverage), falling
1036    /// back to tree-sitter for languages syntect lacks (e.g. TypeScript). Also
1037    /// detects tree-sitter language for non-highlighting features (indentation,
1038    /// semantic highlighting).
1039    ///
1040    /// If `languages` is provided, user-configured filename/extension mappings are
1041    /// checked before built-in detection.
1042    pub fn for_file(
1043        path: &Path,
1044        registry: &GrammarRegistry,
1045        languages: Option<&std::collections::HashMap<String, crate::config::LanguageConfig>>,
1046    ) -> Self {
1047        let syntax_set = registry.syntax_set_arc();
1048        let ts_language = Language::from_path(path);
1049
1050        // Find syntax, checking user language config first if provided
1051        let syntax = if let Some(langs) = languages {
1052            registry.find_syntax_for_file_with_languages(path, langs)
1053        } else {
1054            registry.find_syntax_for_file(path)
1055        };
1056
1057        if let Some(syntax) = syntax {
1058            if let Some(index) = syntax_index(&syntax_set, &syntax.name) {
1059                return Self::TextMate(Box::new(TextMateEngine::with_language(
1060                    syntax_set,
1061                    index,
1062                    ts_language,
1063                )));
1064            }
1065        }
1066
1067        // No TextMate grammar found - fall back to tree-sitter if available
1068        // This handles languages like TypeScript that syntect doesn't include by default
1069        if let Some(lang) = ts_language {
1070            if let Ok(highlighter) = Highlighter::new(lang) {
1071                tracing::debug!(
1072                    "No TextMate grammar for {:?}, falling back to tree-sitter",
1073                    path.extension()
1074                );
1075                return Self::TreeSitter(Box::new(highlighter));
1076            }
1077        }
1078
1079        Self::None
1080    }
1081
1082    /// Create a highlighting engine for a syntax by name.
1083    ///
1084    /// This looks up the syntax in the grammar registry and creates a TextMate
1085    /// highlighter for it. This supports all syntect syntaxes (100+) including
1086    /// user-configured grammars.
1087    ///
1088    /// The `ts_language` parameter optionally provides a tree-sitter language
1089    /// for non-highlighting features (indentation, semantic highlighting).
1090    pub fn for_syntax_name(
1091        name: &str,
1092        registry: &GrammarRegistry,
1093        ts_language: Option<Language>,
1094    ) -> Self {
1095        let syntax_set = registry.syntax_set_arc();
1096
1097        if let Some(syntax) = registry.find_syntax_by_name(name) {
1098            if let Some(index) = syntax_index(&syntax_set, &syntax.name) {
1099                return Self::TextMate(Box::new(TextMateEngine::with_language(
1100                    syntax_set,
1101                    index,
1102                    ts_language,
1103                )));
1104            }
1105        }
1106
1107        Self::None
1108    }
1109
1110    /// Highlight the visible viewport
1111    ///
1112    /// `context_bytes` controls how far before/after the viewport to parse for accurate
1113    /// highlighting of multi-line constructs (strings, comments, nested blocks).
1114    pub fn highlight_viewport(
1115        &mut self,
1116        buffer: &Buffer,
1117        viewport_start: usize,
1118        viewport_end: usize,
1119        theme: &Theme,
1120        context_bytes: usize,
1121    ) -> Vec<HighlightSpan> {
1122        match self {
1123            Self::TreeSitter(h) => {
1124                h.highlight_viewport(buffer, viewport_start, viewport_end, theme, context_bytes)
1125            }
1126            Self::TextMate(h) => {
1127                h.highlight_viewport(buffer, viewport_start, viewport_end, theme, context_bytes)
1128            }
1129            Self::None => Vec::new(),
1130        }
1131    }
1132
1133    /// Notify the highlighting engine of a buffer insert (for checkpoint position tracking).
1134    pub fn notify_insert(&mut self, position: usize, length: usize) {
1135        if let Self::TextMate(h) = self {
1136            h.notify_insert(position, length);
1137        }
1138    }
1139
1140    /// Notify the highlighting engine of a buffer delete (for checkpoint position tracking).
1141    pub fn notify_delete(&mut self, position: usize, length: usize) {
1142        if let Self::TextMate(h) = self {
1143            h.notify_delete(position, length);
1144        }
1145    }
1146
1147    /// Invalidate cache for an edited range
1148    pub fn invalidate_range(&mut self, edit_range: Range<usize>) {
1149        match self {
1150            Self::TreeSitter(h) => h.invalidate_range(edit_range),
1151            Self::TextMate(h) => h.invalidate_range(edit_range),
1152            Self::None => {}
1153        }
1154    }
1155
1156    /// Invalidate entire cache
1157    pub fn invalidate_all(&mut self) {
1158        match self {
1159            Self::TreeSitter(h) => h.invalidate_all(),
1160            Self::TextMate(h) => h.invalidate_all(),
1161            Self::None => {}
1162        }
1163    }
1164
1165    /// Check if this engine has highlighting available
1166    pub fn has_highlighting(&self) -> bool {
1167        !matches!(self, Self::None)
1168    }
1169
1170    /// Get a description of the active backend
1171    pub fn backend_name(&self) -> &str {
1172        match self {
1173            Self::TreeSitter(_) => "tree-sitter",
1174            Self::TextMate(_) => "textmate",
1175            Self::None => "none",
1176        }
1177    }
1178
1179    /// Get performance stats (TextMate engine only).
1180    pub fn highlight_stats(&self) -> Option<&HighlightStats> {
1181        if let Self::TextMate(h) = self {
1182            Some(h.stats())
1183        } else {
1184            None
1185        }
1186    }
1187
1188    /// Reset performance counters.
1189    pub fn reset_highlight_stats(&mut self) {
1190        if let Self::TextMate(h) = self {
1191            h.reset_stats();
1192        }
1193    }
1194
1195    /// Get the language/syntax name if available
1196    pub fn syntax_name(&self) -> Option<&str> {
1197        match self {
1198            Self::TreeSitter(_) => None, // Tree-sitter doesn't expose name easily
1199            Self::TextMate(h) => Some(h.syntax_name()),
1200            Self::None => None,
1201        }
1202    }
1203
1204    /// Get the highlight category at a byte position from the cache.
1205    ///
1206    /// Returns the category if the position falls within a cached highlight span.
1207    /// Useful for detecting whether the cursor is inside a string, comment, etc.
1208    pub fn category_at_position(&self, position: usize) -> Option<HighlightCategory> {
1209        match self {
1210            Self::TreeSitter(h) => h.category_at_position(position),
1211            Self::TextMate(h) => h.category_at_position(position),
1212            Self::None => None,
1213        }
1214    }
1215
1216    /// Get the tree-sitter Language for non-highlighting features
1217    /// Returns the language even when using TextMate for highlighting
1218    pub fn language(&self) -> Option<&Language> {
1219        match self {
1220            Self::TreeSitter(h) => Some(h.language()),
1221            Self::TextMate(h) => h.language(),
1222            Self::None => None,
1223        }
1224    }
1225}
1226
1227/// Highlight a code string using syntect (for markdown code blocks, hover popups, etc.)
1228/// Returns spans with byte ranges relative to the input string.
1229///
1230/// This uses TextMate grammars via syntect which provides broader language coverage
1231/// than tree-sitter (~150+ languages vs ~17).
1232pub fn highlight_string(
1233    code: &str,
1234    lang_hint: &str,
1235    registry: &GrammarRegistry,
1236    theme: &Theme,
1237) -> Vec<HighlightSpan> {
1238    use syntect::parsing::{ParseState, ScopeStack};
1239
1240    // Find syntax by language token (handles aliases like "py" -> Python)
1241    let syntax = match registry.syntax_set().find_syntax_by_token(lang_hint) {
1242        Some(s) => s,
1243        None => return Vec::new(),
1244    };
1245
1246    let syntax_set = registry.syntax_set();
1247    let mut state = ParseState::new(syntax);
1248    let mut spans = Vec::new();
1249    let mut current_scopes = ScopeStack::new();
1250    let mut current_offset = 0;
1251
1252    // Parse line by line
1253    for line in code.split_inclusive('\n') {
1254        let line_start = current_offset;
1255        let line_len = line.len();
1256
1257        // Remove trailing newline for syntect, then add it back
1258        let line_content = line.trim_end_matches(&['\r', '\n'][..]);
1259        let line_for_syntect = if line.ends_with('\n') {
1260            format!("{}\n", line_content)
1261        } else {
1262            line_content.to_string()
1263        };
1264
1265        let ops = match state.parse_line(&line_for_syntect, syntax_set) {
1266            Ok(ops) => ops,
1267            Err(_) => {
1268                current_offset += line_len;
1269                continue;
1270            }
1271        };
1272
1273        let mut syntect_offset = 0;
1274        let line_content_len = line_content.len();
1275
1276        for (op_offset, op) in ops {
1277            let clamped_op_offset = op_offset.min(line_content_len);
1278            if clamped_op_offset > syntect_offset {
1279                if let Some(category) = scope_stack_to_category(&current_scopes) {
1280                    let byte_start = line_start + syntect_offset;
1281                    let byte_end = line_start + clamped_op_offset;
1282                    if byte_start < byte_end {
1283                        spans.push(HighlightSpan {
1284                            range: byte_start..byte_end,
1285                            color: highlight_color(category, theme),
1286                            category: Some(category),
1287                        });
1288                    }
1289                }
1290            }
1291            syntect_offset = clamped_op_offset;
1292            // Scope stack errors are non-fatal for highlighting
1293            #[allow(clippy::let_underscore_must_use)]
1294            let _ = current_scopes.apply(&op);
1295        }
1296
1297        // Handle remaining text on line
1298        if syntect_offset < line_content_len {
1299            if let Some(category) = scope_stack_to_category(&current_scopes) {
1300                let byte_start = line_start + syntect_offset;
1301                let byte_end = line_start + line_content_len;
1302                if byte_start < byte_end {
1303                    spans.push(HighlightSpan {
1304                        range: byte_start..byte_end,
1305                        color: highlight_color(category, theme),
1306                        category: Some(category),
1307                    });
1308                }
1309            }
1310        }
1311
1312        current_offset += line_len;
1313    }
1314
1315    // Merge adjacent spans with same color
1316    merge_adjacent_highlight_spans(&mut spans);
1317
1318    spans
1319}
1320
1321/// Map scope stack to highlight category (for highlight_string)
1322fn scope_stack_to_category(scopes: &syntect::parsing::ScopeStack) -> Option<HighlightCategory> {
1323    for scope in scopes.as_slice().iter().rev() {
1324        let scope_str = scope.build_string();
1325        if let Some(cat) = scope_to_category(&scope_str) {
1326            return Some(cat);
1327        }
1328    }
1329    None
1330}
1331
1332/// Merge adjacent spans with same color
1333fn merge_adjacent_highlight_spans(spans: &mut Vec<HighlightSpan>) {
1334    if spans.len() < 2 {
1335        return;
1336    }
1337
1338    let mut write_idx = 0;
1339    for read_idx in 1..spans.len() {
1340        if spans[write_idx].color == spans[read_idx].color
1341            && spans[write_idx].range.end == spans[read_idx].range.start
1342        {
1343            spans[write_idx].range.end = spans[read_idx].range.end;
1344        } else {
1345            write_idx += 1;
1346            if write_idx != read_idx {
1347                spans[write_idx] = spans[read_idx].clone();
1348            }
1349        }
1350    }
1351    spans.truncate(write_idx + 1);
1352}
1353
1354#[cfg(test)]
1355mod tests {
1356    use crate::model::filesystem::StdFileSystem;
1357    use std::sync::Arc;
1358
1359    fn test_fs() -> Arc<dyn crate::model::filesystem::FileSystem + Send + Sync> {
1360        Arc::new(StdFileSystem)
1361    }
1362    use super::*;
1363    use crate::view::theme;
1364
1365    #[test]
1366    fn test_highlight_engine_default() {
1367        let engine = HighlightEngine::default();
1368        assert!(!engine.has_highlighting());
1369        assert_eq!(engine.backend_name(), "none");
1370    }
1371
1372    #[test]
1373    fn test_textmate_backend_selection() {
1374        let registry =
1375            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1376
1377        // Languages with TextMate grammars use TextMate for highlighting
1378        let engine = HighlightEngine::for_file(Path::new("test.rs"), &registry, None);
1379        assert_eq!(engine.backend_name(), "textmate");
1380        // Tree-sitter language should still be detected for other features
1381        assert!(engine.language().is_some());
1382
1383        let engine = HighlightEngine::for_file(Path::new("test.py"), &registry, None);
1384        assert_eq!(engine.backend_name(), "textmate");
1385        assert!(engine.language().is_some());
1386
1387        let engine = HighlightEngine::for_file(Path::new("test.js"), &registry, None);
1388        assert_eq!(engine.backend_name(), "textmate");
1389        assert!(engine.language().is_some());
1390
1391        // TypeScript falls back to tree-sitter (syntect doesn't include TS by default)
1392        let engine = HighlightEngine::for_file(Path::new("test.ts"), &registry, None);
1393        assert_eq!(engine.backend_name(), "tree-sitter");
1394        assert!(engine.language().is_some());
1395
1396        let engine = HighlightEngine::for_file(Path::new("test.tsx"), &registry, None);
1397        assert_eq!(engine.backend_name(), "tree-sitter");
1398        assert!(engine.language().is_some());
1399    }
1400
1401    #[test]
1402    fn test_tree_sitter_direct() {
1403        // Verify tree-sitter highlighter can be created directly for Rust
1404        let highlighter = Highlighter::new(Language::Rust);
1405        assert!(highlighter.is_ok());
1406    }
1407
1408    #[test]
1409    fn test_unknown_extension() {
1410        let registry =
1411            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1412
1413        // Unknown extension
1414        let engine = HighlightEngine::for_file(Path::new("test.unknown_xyz_123"), &registry, None);
1415        // Might be none or might find something via syntect
1416        // Just verify it doesn't panic
1417        let _ = engine.backend_name();
1418    }
1419
1420    #[test]
1421    fn test_highlight_viewport_empty_buffer_no_panic() {
1422        // Regression test: calling highlight_viewport with an empty buffer
1423        // and non-zero viewport range previously caused subtraction overflow panic.
1424        //
1425        // The bug occurred when:
1426        // - buffer is empty (len = 0)
1427        // - viewport_start > context_bytes (so parse_start > 0 after saturating_sub)
1428        // - parse_end = min(viewport_end + context_bytes, buffer.len()) = 0
1429        // - parse_end - parse_start would underflow (0 - positive = overflow)
1430        let registry =
1431            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1432
1433        let mut engine = HighlightEngine::for_file(Path::new("test.rs"), &registry, None);
1434
1435        // Create empty buffer
1436        let buffer = Buffer::from_str("", 0, test_fs());
1437        let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
1438
1439        // Test the specific case that triggered the overflow:
1440        // viewport_start=100, context_bytes=10 => parse_start=90, parse_end=0
1441        // 0 - 90 = overflow!
1442        if let HighlightEngine::TextMate(ref mut tm) = engine {
1443            // Small context_bytes so parse_start remains > 0
1444            let spans = tm.highlight_viewport(&buffer, 100, 200, &theme, 10);
1445            assert!(spans.is_empty());
1446        }
1447    }
1448
1449    /// Test that TextMateEngine produces correct byte offsets for CRLF content.
1450    /// This is a regression test for a bug where using str::lines() caused 1-byte
1451    /// offset drift per line because it strips line terminators.
1452    #[test]
1453    fn test_textmate_engine_crlf_byte_offsets() {
1454        let registry =
1455            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1456
1457        let mut engine = HighlightEngine::for_file(Path::new("test.java"), &registry, None);
1458
1459        // Create CRLF content with keywords on each line
1460        // Each "public" keyword should be highlighted at byte positions:
1461        // Line 1: "public" at bytes 0-5
1462        // Line 2: "public" at bytes 8-13 (after "public\r\n" = 8 bytes)
1463        // Line 3: "public" at bytes 16-21 (after two "public\r\n" = 16 bytes)
1464        let content = b"public\r\npublic\r\npublic\r\n";
1465        let buffer = Buffer::from_bytes(content.to_vec(), test_fs());
1466        let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
1467
1468        if let HighlightEngine::TextMate(ref mut tm) = engine {
1469            // Highlight the entire content
1470            let spans = tm.highlight_viewport(&buffer, 0, content.len(), &theme, 0);
1471
1472            // Find spans that cover keyword positions
1473            // The keyword "public" should have spans at these byte ranges:
1474            // Line 1: 0..6
1475            // Line 2: 8..14 (NOT 7..13 which would be the buggy offset)
1476            // Line 3: 16..22 (NOT 14..20 which would be the buggy offset)
1477
1478            eprintln!(
1479                "Spans: {:?}",
1480                spans.iter().map(|s| &s.range).collect::<Vec<_>>()
1481            );
1482
1483            // Check that we have spans covering the correct positions
1484            let has_span_at = |start: usize, end: usize| -> bool {
1485                spans
1486                    .iter()
1487                    .any(|s| s.range.start <= start && s.range.end >= end)
1488            };
1489
1490            // Line 1: "public" at bytes 0-6
1491            assert!(
1492                has_span_at(0, 6),
1493                "Should have span covering bytes 0-6 (line 1 'public'). Spans: {:?}",
1494                spans.iter().map(|s| &s.range).collect::<Vec<_>>()
1495            );
1496
1497            // Line 2: "public" at bytes 8-14 (after "public\r\n")
1498            // If buggy, would be at 7-13
1499            assert!(
1500                has_span_at(8, 14),
1501                "Should have span covering bytes 8-14 (line 2 'public'). \
1502                 If this fails, CRLF offset drift is occurring. Spans: {:?}",
1503                spans.iter().map(|s| &s.range).collect::<Vec<_>>()
1504            );
1505
1506            // Line 3: "public" at bytes 16-22 (after two "public\r\n")
1507            // If buggy, would be at 14-20
1508            assert!(
1509                has_span_at(16, 22),
1510                "Should have span covering bytes 16-22 (line 3 'public'). \
1511                 If this fails, CRLF offset drift is occurring. Spans: {:?}",
1512                spans.iter().map(|s| &s.range).collect::<Vec<_>>()
1513            );
1514        } else {
1515            panic!("Expected TextMate engine for .java file");
1516        }
1517    }
1518
1519    #[test]
1520    fn test_git_rebase_todo_highlighting() {
1521        let registry =
1522            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1523
1524        // git-rebase-todo files should use the Git Rebase Todo grammar
1525        let engine = HighlightEngine::for_file(Path::new("git-rebase-todo"), &registry, None);
1526        assert_eq!(engine.backend_name(), "textmate");
1527        assert!(engine.has_highlighting());
1528    }
1529
1530    #[test]
1531    fn test_git_commit_message_highlighting() {
1532        let registry =
1533            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1534
1535        // COMMIT_EDITMSG should use the Git Commit Message grammar
1536        let engine = HighlightEngine::for_file(Path::new("COMMIT_EDITMSG"), &registry, None);
1537        assert_eq!(engine.backend_name(), "textmate");
1538        assert!(engine.has_highlighting());
1539
1540        // MERGE_MSG should also work
1541        let engine = HighlightEngine::for_file(Path::new("MERGE_MSG"), &registry, None);
1542        assert_eq!(engine.backend_name(), "textmate");
1543        assert!(engine.has_highlighting());
1544    }
1545
1546    #[test]
1547    fn test_gitignore_highlighting() {
1548        let registry =
1549            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1550
1551        // .gitignore should use the Gitignore grammar
1552        let engine = HighlightEngine::for_file(Path::new(".gitignore"), &registry, None);
1553        assert_eq!(engine.backend_name(), "textmate");
1554        assert!(engine.has_highlighting());
1555
1556        // .dockerignore should also work
1557        let engine = HighlightEngine::for_file(Path::new(".dockerignore"), &registry, None);
1558        assert_eq!(engine.backend_name(), "textmate");
1559        assert!(engine.has_highlighting());
1560    }
1561
1562    #[test]
1563    fn test_gitconfig_highlighting() {
1564        let registry =
1565            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1566
1567        // .gitconfig should use the Git Config grammar
1568        let engine = HighlightEngine::for_file(Path::new(".gitconfig"), &registry, None);
1569        assert_eq!(engine.backend_name(), "textmate");
1570        assert!(engine.has_highlighting());
1571
1572        // .gitmodules should also work
1573        let engine = HighlightEngine::for_file(Path::new(".gitmodules"), &registry, None);
1574        assert_eq!(engine.backend_name(), "textmate");
1575        assert!(engine.has_highlighting());
1576    }
1577
1578    #[test]
1579    fn test_gitattributes_highlighting() {
1580        let registry =
1581            GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1582
1583        // .gitattributes should use the Git Attributes grammar
1584        let engine = HighlightEngine::for_file(Path::new(".gitattributes"), &registry, None);
1585        assert_eq!(engine.backend_name(), "textmate");
1586        assert!(engine.has_highlighting());
1587    }
1588
1589    #[test]
1590    fn test_comment_delimiter_uses_comment_color() {
1591        // Comment delimiters (#, //, /*) should use comment color, not operator
1592        assert_eq!(
1593            scope_to_category("punctuation.definition.comment"),
1594            Some(HighlightCategory::Comment)
1595        );
1596        assert_eq!(
1597            scope_to_category("punctuation.definition.comment.python"),
1598            Some(HighlightCategory::Comment)
1599        );
1600        assert_eq!(
1601            scope_to_category("punctuation.definition.comment.begin"),
1602            Some(HighlightCategory::Comment)
1603        );
1604    }
1605
1606    #[test]
1607    fn test_string_delimiter_uses_string_color() {
1608        // String delimiters (", ', `) should use string color, not operator
1609        assert_eq!(
1610            scope_to_category("punctuation.definition.string.begin"),
1611            Some(HighlightCategory::String)
1612        );
1613        assert_eq!(
1614            scope_to_category("punctuation.definition.string.end"),
1615            Some(HighlightCategory::String)
1616        );
1617    }
1618
1619    #[test]
1620    fn test_punctuation_bracket() {
1621        // punctuation.section (TextMate standard for block delimiters)
1622        assert_eq!(
1623            scope_to_category("punctuation.section"),
1624            Some(HighlightCategory::PunctuationBracket)
1625        );
1626        assert_eq!(
1627            scope_to_category("punctuation.section.block.begin.c"),
1628            Some(HighlightCategory::PunctuationBracket)
1629        );
1630        assert_eq!(
1631            scope_to_category("punctuation.bracket"),
1632            Some(HighlightCategory::PunctuationBracket)
1633        );
1634        // punctuation.definition.* bracket-like scopes from sublime-syntax grammars
1635        assert_eq!(
1636            scope_to_category("punctuation.definition.array.begin.toml"),
1637            Some(HighlightCategory::PunctuationBracket)
1638        );
1639        assert_eq!(
1640            scope_to_category("punctuation.definition.block.code.typst"),
1641            Some(HighlightCategory::PunctuationBracket)
1642        );
1643        assert_eq!(
1644            scope_to_category("punctuation.definition.group.typst"),
1645            Some(HighlightCategory::PunctuationBracket)
1646        );
1647        assert_eq!(
1648            scope_to_category("punctuation.definition.inline-table.begin.toml"),
1649            Some(HighlightCategory::PunctuationBracket)
1650        );
1651        assert_eq!(
1652            scope_to_category("punctuation.definition.tag.end.svelte"),
1653            Some(HighlightCategory::PunctuationBracket)
1654        );
1655    }
1656
1657    #[test]
1658    fn test_punctuation_delimiter() {
1659        assert_eq!(
1660            scope_to_category("punctuation.separator"),
1661            Some(HighlightCategory::PunctuationDelimiter)
1662        );
1663        assert_eq!(
1664            scope_to_category("punctuation.terminator.statement.c"),
1665            Some(HighlightCategory::PunctuationDelimiter)
1666        );
1667        assert_eq!(
1668            scope_to_category("punctuation.accessor"),
1669            Some(HighlightCategory::PunctuationDelimiter)
1670        );
1671    }
1672}