Skip to main content

fresh/primitives/
textmate_engine.rs

1//! TextMate-based syntax highlighting engine (WASM-compatible)
2//!
3//! This module provides syntax highlighting using syntect's TextMate grammar engine.
4//! It's completely WASM-compatible as syntect can use pure-Rust regex (fancy-regex).
5//!
6//! # Features
7//!
8//! - Syntax highlighting for 100+ languages via TextMate grammars
9//! - Viewport-based highlighting with caching for performance
10//! - No tree-sitter or native code dependencies
11
12use crate::model::buffer::Buffer;
13use crate::primitives::grammar::GrammarRegistry;
14use crate::primitives::highlight_types::{highlight_color, HighlightCategory, HighlightSpan};
15use crate::view::theme::Theme;
16use std::ops::Range;
17use std::path::Path;
18use std::sync::Arc;
19use syntect::parsing::SyntaxSet;
20
21/// Maximum bytes to parse in a single operation
22const MAX_PARSE_BYTES: usize = 1024 * 1024;
23
24/// TextMate highlighting engine
25///
26/// Uses syntect for TextMate grammar-based syntax highlighting.
27/// This is WASM-compatible when syntect uses the `fancy-regex` feature.
28pub struct TextMateEngine {
29    syntax_set: Arc<SyntaxSet>,
30    syntax_index: usize,
31    cache: Option<TextMateCache>,
32    last_buffer_len: usize,
33}
34
35#[derive(Debug, Clone)]
36struct TextMateCache {
37    range: Range<usize>,
38    spans: Vec<CachedSpan>,
39}
40
41#[derive(Debug, Clone)]
42struct CachedSpan {
43    range: Range<usize>,
44    category: HighlightCategory,
45}
46
47impl TextMateEngine {
48    /// Create a new TextMate engine for the given syntax
49    pub fn new(syntax_set: Arc<SyntaxSet>, syntax_index: usize) -> Self {
50        Self {
51            syntax_set,
52            syntax_index,
53            cache: None,
54            last_buffer_len: 0,
55        }
56    }
57
58    /// Create a TextMate engine for a file path
59    pub fn for_file(path: &Path, registry: &GrammarRegistry) -> Option<Self> {
60        let syntax_set = registry.syntax_set_arc();
61
62        // Find syntax by file extension
63        let syntax = registry.find_syntax_for_file(path)?;
64
65        // Find the index of this syntax in the set
66        let index = syntax_set
67            .syntaxes()
68            .iter()
69            .position(|s| s.name == syntax.name)?;
70
71        Some(Self::new(syntax_set, index))
72    }
73
74    /// Highlight the visible viewport range
75    ///
76    /// `context_bytes` controls how far before/after the viewport to parse for accurate
77    /// highlighting of multi-line constructs (strings, comments, nested blocks).
78    pub fn highlight_viewport(
79        &mut self,
80        buffer: &Buffer,
81        viewport_start: usize,
82        viewport_end: usize,
83        theme: &Theme,
84        context_bytes: usize,
85    ) -> Vec<HighlightSpan> {
86        use syntect::parsing::{ParseState, ScopeStack};
87
88        // Check cache validity
89        if let Some(cache) = &self.cache {
90            if cache.range.start <= viewport_start
91                && cache.range.end >= viewport_end
92                && self.last_buffer_len == buffer.len()
93            {
94                return cache
95                    .spans
96                    .iter()
97                    .filter(|span| {
98                        span.range.start < viewport_end && span.range.end > viewport_start
99                    })
100                    .map(|span| HighlightSpan {
101                        range: span.range.clone(),
102                        color: highlight_color(span.category, theme),
103                        category: Some(span.category),
104                    })
105                    .collect();
106            }
107        }
108
109        // Cache miss - parse viewport region
110        let parse_start = viewport_start.saturating_sub(context_bytes);
111        let parse_end = (viewport_end + context_bytes).min(buffer.len());
112
113        if parse_end <= parse_start || parse_end - parse_start > MAX_PARSE_BYTES {
114            return Vec::new();
115        }
116
117        let syntax = &self.syntax_set.syntaxes()[self.syntax_index];
118        let mut state = ParseState::new(syntax);
119        let mut spans = Vec::new();
120
121        // Get content
122        let content = buffer.slice_bytes(parse_start..parse_end);
123        let content_str = match std::str::from_utf8(&content) {
124            Ok(s) => s,
125            Err(_) => return Vec::new(),
126        };
127
128        // Parse line by line
129        let content_bytes = content_str.as_bytes();
130        let mut pos = 0;
131        let mut current_offset = parse_start;
132        let mut current_scopes = ScopeStack::new();
133
134        while pos < content_bytes.len() {
135            let line_start = pos;
136            let mut line_end = pos;
137
138            // Scan for line ending
139            while line_end < content_bytes.len() {
140                if content_bytes[line_end] == b'\n' {
141                    line_end += 1;
142                    break;
143                } else if content_bytes[line_end] == b'\r' {
144                    if line_end + 1 < content_bytes.len() && content_bytes[line_end + 1] == b'\n' {
145                        line_end += 2; // CRLF
146                    } else {
147                        line_end += 1; // CR only
148                    }
149                    break;
150                }
151                line_end += 1;
152            }
153
154            let line_bytes = &content_bytes[line_start..line_end];
155            let actual_line_byte_len = line_bytes.len();
156
157            let line_str = match std::str::from_utf8(line_bytes) {
158                Ok(s) => s,
159                Err(_) => {
160                    pos = line_end;
161                    current_offset += actual_line_byte_len;
162                    continue;
163                }
164            };
165
166            // Prepare line for syntect
167            let line_content = line_str.trim_end_matches(&['\r', '\n'][..]);
168            let line_for_syntect = if line_end < content_bytes.len() || line_str.ends_with('\n') {
169                format!("{}\n", line_content)
170            } else {
171                line_content.to_string()
172            };
173
174            let ops = match state.parse_line(&line_for_syntect, &self.syntax_set) {
175                Ok(ops) => ops,
176                Err(_) => {
177                    pos = line_end;
178                    current_offset += actual_line_byte_len;
179                    continue;
180                }
181            };
182
183            // Convert operations to spans
184            let mut syntect_offset = 0;
185            let line_content_len = line_content.len();
186
187            for (op_offset, op) in ops {
188                let clamped_op_offset = op_offset.min(line_content_len);
189                if clamped_op_offset > syntect_offset {
190                    if let Some(category) = Self::scope_stack_to_category(&current_scopes) {
191                        let byte_start = current_offset + syntect_offset;
192                        let byte_end = current_offset + clamped_op_offset;
193                        if byte_start < byte_end {
194                            spans.push(CachedSpan {
195                                range: byte_start..byte_end,
196                                category,
197                            });
198                        }
199                    }
200                }
201                syntect_offset = clamped_op_offset;
202                // Scope stack errors are non-fatal for highlighting
203                #[allow(clippy::let_underscore_must_use)]
204                let _ = current_scopes.apply(&op);
205            }
206
207            // Handle remaining text on line
208            if syntect_offset < line_content_len {
209                if let Some(category) = Self::scope_stack_to_category(&current_scopes) {
210                    let byte_start = current_offset + syntect_offset;
211                    let byte_end = current_offset + line_content_len;
212                    if byte_start < byte_end {
213                        spans.push(CachedSpan {
214                            range: byte_start..byte_end,
215                            category,
216                        });
217                    }
218                }
219            }
220
221            pos = line_end;
222            current_offset += actual_line_byte_len;
223        }
224
225        // Merge adjacent spans
226        Self::merge_adjacent_spans(&mut spans);
227
228        // Update cache
229        self.cache = Some(TextMateCache {
230            range: parse_start..parse_end,
231            spans: spans.clone(),
232        });
233        self.last_buffer_len = buffer.len();
234
235        // Filter and resolve colors
236        spans
237            .into_iter()
238            .filter(|span| span.range.start < viewport_end && span.range.end > viewport_start)
239            .map(|span| {
240                let cat = span.category;
241                HighlightSpan {
242                    range: span.range,
243                    color: highlight_color(cat, theme),
244                    category: Some(cat),
245                }
246            })
247            .collect()
248    }
249
250    /// Map scope stack to highlight category
251    fn scope_stack_to_category(scopes: &syntect::parsing::ScopeStack) -> Option<HighlightCategory> {
252        for scope in scopes.as_slice().iter().rev() {
253            let scope_str = scope.build_string();
254            if let Some(cat) = scope_to_category(&scope_str) {
255                return Some(cat);
256            }
257        }
258        None
259    }
260
261    /// Merge adjacent spans with same category
262    fn merge_adjacent_spans(spans: &mut Vec<CachedSpan>) {
263        if spans.len() < 2 {
264            return;
265        }
266
267        let mut write_idx = 0;
268        for read_idx in 1..spans.len() {
269            if spans[write_idx].category == spans[read_idx].category
270                && spans[write_idx].range.end == spans[read_idx].range.start
271            {
272                spans[write_idx].range.end = spans[read_idx].range.end;
273            } else {
274                write_idx += 1;
275                if write_idx != read_idx {
276                    spans[write_idx] = spans[read_idx].clone();
277                }
278            }
279        }
280        spans.truncate(write_idx + 1);
281    }
282
283    /// Invalidate cache for edited range
284    pub fn invalidate_range(&mut self, edit_range: Range<usize>) {
285        if let Some(cache) = &self.cache {
286            if edit_range.start < cache.range.end && edit_range.end > cache.range.start {
287                self.cache = None;
288            }
289        }
290    }
291
292    /// Invalidate all cache
293    pub fn invalidate_all(&mut self) {
294        self.cache = None;
295    }
296
297    /// Get syntax name
298    pub fn syntax_name(&self) -> &str {
299        &self.syntax_set.syntaxes()[self.syntax_index].name
300    }
301}
302
303/// Map TextMate scope to highlight category
304fn scope_to_category(scope: &str) -> Option<HighlightCategory> {
305    let scope_lower = scope.to_lowercase();
306
307    // Comments - highest priority
308    if scope_lower.starts_with("comment") {
309        return Some(HighlightCategory::Comment);
310    }
311
312    // Strings
313    if scope_lower.starts_with("string") {
314        return Some(HighlightCategory::String);
315    }
316
317    // Markdown/markup scopes
318    if scope_lower.starts_with("markup.heading") || scope_lower.starts_with("entity.name.section") {
319        return Some(HighlightCategory::Keyword);
320    }
321    if scope_lower.starts_with("markup.bold") {
322        return Some(HighlightCategory::Constant);
323    }
324    if scope_lower.starts_with("markup.italic") {
325        return Some(HighlightCategory::Variable);
326    }
327    if scope_lower.starts_with("markup.raw") || scope_lower.starts_with("markup.inline.raw") {
328        return Some(HighlightCategory::String);
329    }
330    if scope_lower.starts_with("markup.underline.link")
331        || scope_lower.starts_with("markup.underline")
332    {
333        return Some(HighlightCategory::Function);
334    }
335    if scope_lower.starts_with("markup.quote") || scope_lower.starts_with("markup.strikethrough") {
336        return Some(HighlightCategory::Comment);
337    }
338    if scope_lower.starts_with("markup.list") {
339        return Some(HighlightCategory::Operator);
340    }
341
342    // Keywords (but not keyword.operator)
343    if scope_lower.starts_with("keyword") && !scope_lower.starts_with("keyword.operator") {
344        return Some(HighlightCategory::Keyword);
345    }
346
347    // Punctuation that belongs to a parent construct (comment/string delimiters)
348    // These must be checked before the generic punctuation rule below.
349    // TextMate grammars assign e.g. `punctuation.definition.comment` to # // /* etc.
350    if scope_lower.starts_with("punctuation.definition.comment") {
351        return Some(HighlightCategory::Comment);
352    }
353    if scope_lower.starts_with("punctuation.definition.string") {
354        return Some(HighlightCategory::String);
355    }
356
357    // Operators
358    if scope_lower.starts_with("keyword.operator") || scope_lower.starts_with("punctuation") {
359        return Some(HighlightCategory::Operator);
360    }
361
362    // Functions
363    if scope_lower.starts_with("entity.name.function")
364        || scope_lower.starts_with("meta.function-call")
365        || scope_lower.starts_with("support.function")
366    {
367        return Some(HighlightCategory::Function);
368    }
369
370    // Types
371    if scope_lower.starts_with("entity.name.type")
372        || scope_lower.starts_with("storage.type")
373        || scope_lower.starts_with("support.type")
374        || scope_lower.starts_with("entity.name.class")
375    {
376        return Some(HighlightCategory::Type);
377    }
378
379    // Constants and numbers
380    if scope_lower.starts_with("constant.numeric")
381        || scope_lower.starts_with("constant.language")
382        || scope_lower.starts_with("constant.character")
383    {
384        return Some(HighlightCategory::Constant);
385    }
386    if scope_lower.starts_with("constant") {
387        return Some(HighlightCategory::Constant);
388    }
389
390    // Variables and parameters
391    if scope_lower.starts_with("variable.parameter") {
392        return Some(HighlightCategory::Variable);
393    }
394    if scope_lower.starts_with("variable") {
395        return Some(HighlightCategory::Variable);
396    }
397
398    // Storage modifiers (pub, static, const, etc.)
399    if scope_lower.starts_with("storage.modifier") {
400        return Some(HighlightCategory::Keyword);
401    }
402
403    // Entity names (catch-all for other named things)
404    if scope_lower.starts_with("entity.name") {
405        return Some(HighlightCategory::Function);
406    }
407
408    None
409}
410
411#[cfg(test)]
412mod tests {
413    use super::*;
414
415    #[test]
416    fn test_scope_to_category() {
417        assert_eq!(
418            scope_to_category("comment.line"),
419            Some(HighlightCategory::Comment)
420        );
421        assert_eq!(
422            scope_to_category("string.quoted"),
423            Some(HighlightCategory::String)
424        );
425        assert_eq!(
426            scope_to_category("keyword.control"),
427            Some(HighlightCategory::Keyword)
428        );
429        assert_eq!(
430            scope_to_category("keyword.operator"),
431            Some(HighlightCategory::Operator)
432        );
433        assert_eq!(
434            scope_to_category("entity.name.function"),
435            Some(HighlightCategory::Function)
436        );
437        assert_eq!(
438            scope_to_category("constant.numeric"),
439            Some(HighlightCategory::Constant)
440        );
441        assert_eq!(
442            scope_to_category("variable.parameter"),
443            Some(HighlightCategory::Variable)
444        );
445    }
446
447    #[test]
448    fn test_comment_delimiter_uses_comment_color() {
449        // Comment delimiters (#, //, /*) should use comment color, not operator
450        assert_eq!(
451            scope_to_category("punctuation.definition.comment"),
452            Some(HighlightCategory::Comment)
453        );
454        assert_eq!(
455            scope_to_category("punctuation.definition.comment.python"),
456            Some(HighlightCategory::Comment)
457        );
458        assert_eq!(
459            scope_to_category("punctuation.definition.comment.begin"),
460            Some(HighlightCategory::Comment)
461        );
462    }
463
464    #[test]
465    fn test_string_delimiter_uses_string_color() {
466        // String delimiters (", ', `) should use string color, not operator
467        assert_eq!(
468            scope_to_category("punctuation.definition.string.begin"),
469            Some(HighlightCategory::String)
470        );
471        assert_eq!(
472            scope_to_category("punctuation.definition.string.end"),
473            Some(HighlightCategory::String)
474        );
475    }
476}