Skip to main content

fresh/primitives/
textmate_engine.rs

1//! TextMate-based syntax highlighting engine (WASM-compatible)
2//!
3//! This module provides syntax highlighting using syntect's TextMate grammar engine.
4//! It's completely WASM-compatible as syntect can use pure-Rust regex (fancy-regex).
5//!
6//! # Features
7//!
8//! - Syntax highlighting for 100+ languages via TextMate grammars
9//! - Viewport-based highlighting with caching for performance
10//! - No tree-sitter or native code dependencies
11
12use crate::model::buffer::Buffer;
13use crate::primitives::grammar::GrammarRegistry;
14use crate::primitives::highlight_types::{highlight_color, HighlightCategory, HighlightSpan};
15use crate::view::theme::Theme;
16use std::ops::Range;
17use std::path::Path;
18use std::sync::Arc;
19use syntect::parsing::SyntaxSet;
20
21/// Maximum bytes to parse in a single operation
22const MAX_PARSE_BYTES: usize = 1024 * 1024;
23
24/// TextMate highlighting engine
25///
26/// Uses syntect for TextMate grammar-based syntax highlighting.
27/// This is WASM-compatible when syntect uses the `fancy-regex` feature.
28pub struct TextMateEngine {
29    syntax_set: Arc<SyntaxSet>,
30    syntax_index: usize,
31    cache: Option<TextMateCache>,
32    last_buffer_len: usize,
33}
34
35#[derive(Debug, Clone)]
36struct TextMateCache {
37    range: Range<usize>,
38    spans: Vec<CachedSpan>,
39}
40
41#[derive(Debug, Clone)]
42struct CachedSpan {
43    range: Range<usize>,
44    category: HighlightCategory,
45}
46
47impl TextMateEngine {
48    /// Create a new TextMate engine for the given syntax
49    pub fn new(syntax_set: Arc<SyntaxSet>, syntax_index: usize) -> Self {
50        Self {
51            syntax_set,
52            syntax_index,
53            cache: None,
54            last_buffer_len: 0,
55        }
56    }
57
58    /// Create a TextMate engine for a file path
59    pub fn for_file(path: &Path, registry: &GrammarRegistry) -> Option<Self> {
60        let syntax_set = registry.syntax_set_arc();
61
62        // Find syntax by file extension
63        let syntax = registry.find_syntax_for_file(path)?;
64
65        // Find the index of this syntax in the set
66        let index = syntax_set
67            .syntaxes()
68            .iter()
69            .position(|s| s.name == syntax.name)?;
70
71        Some(Self::new(syntax_set, index))
72    }
73
74    /// Highlight the visible viewport range
75    ///
76    /// `context_bytes` controls how far before/after the viewport to parse for accurate
77    /// highlighting of multi-line constructs (strings, comments, nested blocks).
78    pub fn highlight_viewport(
79        &mut self,
80        buffer: &Buffer,
81        viewport_start: usize,
82        viewport_end: usize,
83        theme: &Theme,
84        context_bytes: usize,
85    ) -> Vec<HighlightSpan> {
86        use syntect::parsing::{ParseState, ScopeStack};
87
88        // Check cache validity
89        if let Some(cache) = &self.cache {
90            if cache.range.start <= viewport_start
91                && cache.range.end >= viewport_end
92                && self.last_buffer_len == buffer.len()
93            {
94                return cache
95                    .spans
96                    .iter()
97                    .filter(|span| {
98                        span.range.start < viewport_end && span.range.end > viewport_start
99                    })
100                    .map(|span| HighlightSpan {
101                        range: span.range.clone(),
102                        color: highlight_color(span.category, theme),
103                    })
104                    .collect();
105            }
106        }
107
108        // Cache miss - parse viewport region
109        let parse_start = viewport_start.saturating_sub(context_bytes);
110        let parse_end = (viewport_end + context_bytes).min(buffer.len());
111
112        if parse_end <= parse_start || parse_end - parse_start > MAX_PARSE_BYTES {
113            return Vec::new();
114        }
115
116        let syntax = &self.syntax_set.syntaxes()[self.syntax_index];
117        let mut state = ParseState::new(syntax);
118        let mut spans = Vec::new();
119
120        // Get content
121        let content = buffer.slice_bytes(parse_start..parse_end);
122        let content_str = match std::str::from_utf8(&content) {
123            Ok(s) => s,
124            Err(_) => return Vec::new(),
125        };
126
127        // Parse line by line
128        let content_bytes = content_str.as_bytes();
129        let mut pos = 0;
130        let mut current_offset = parse_start;
131        let mut current_scopes = ScopeStack::new();
132
133        while pos < content_bytes.len() {
134            let line_start = pos;
135            let mut line_end = pos;
136
137            // Scan for line ending
138            while line_end < content_bytes.len() {
139                if content_bytes[line_end] == b'\n' {
140                    line_end += 1;
141                    break;
142                } else if content_bytes[line_end] == b'\r' {
143                    if line_end + 1 < content_bytes.len() && content_bytes[line_end + 1] == b'\n' {
144                        line_end += 2; // CRLF
145                    } else {
146                        line_end += 1; // CR only
147                    }
148                    break;
149                }
150                line_end += 1;
151            }
152
153            let line_bytes = &content_bytes[line_start..line_end];
154            let actual_line_byte_len = line_bytes.len();
155
156            let line_str = match std::str::from_utf8(line_bytes) {
157                Ok(s) => s,
158                Err(_) => {
159                    pos = line_end;
160                    current_offset += actual_line_byte_len;
161                    continue;
162                }
163            };
164
165            // Prepare line for syntect
166            let line_content = line_str.trim_end_matches(&['\r', '\n'][..]);
167            let line_for_syntect = if line_end < content_bytes.len() || line_str.ends_with('\n') {
168                format!("{}\n", line_content)
169            } else {
170                line_content.to_string()
171            };
172
173            let ops = match state.parse_line(&line_for_syntect, &self.syntax_set) {
174                Ok(ops) => ops,
175                Err(_) => {
176                    pos = line_end;
177                    current_offset += actual_line_byte_len;
178                    continue;
179                }
180            };
181
182            // Convert operations to spans
183            let mut syntect_offset = 0;
184            let line_content_len = line_content.len();
185
186            for (op_offset, op) in ops {
187                let clamped_op_offset = op_offset.min(line_content_len);
188                if clamped_op_offset > syntect_offset {
189                    if let Some(category) = Self::scope_stack_to_category(&current_scopes) {
190                        let byte_start = current_offset + syntect_offset;
191                        let byte_end = current_offset + clamped_op_offset;
192                        if byte_start < byte_end {
193                            spans.push(CachedSpan {
194                                range: byte_start..byte_end,
195                                category,
196                            });
197                        }
198                    }
199                }
200                syntect_offset = clamped_op_offset;
201                let _ = current_scopes.apply(&op);
202            }
203
204            // Handle remaining text on line
205            if syntect_offset < line_content_len {
206                if let Some(category) = Self::scope_stack_to_category(&current_scopes) {
207                    let byte_start = current_offset + syntect_offset;
208                    let byte_end = current_offset + line_content_len;
209                    if byte_start < byte_end {
210                        spans.push(CachedSpan {
211                            range: byte_start..byte_end,
212                            category,
213                        });
214                    }
215                }
216            }
217
218            pos = line_end;
219            current_offset += actual_line_byte_len;
220        }
221
222        // Merge adjacent spans
223        Self::merge_adjacent_spans(&mut spans);
224
225        // Update cache
226        self.cache = Some(TextMateCache {
227            range: parse_start..parse_end,
228            spans: spans.clone(),
229        });
230        self.last_buffer_len = buffer.len();
231
232        // Filter and resolve colors
233        spans
234            .into_iter()
235            .filter(|span| span.range.start < viewport_end && span.range.end > viewport_start)
236            .map(|span| HighlightSpan {
237                range: span.range,
238                color: highlight_color(span.category, theme),
239            })
240            .collect()
241    }
242
243    /// Map scope stack to highlight category
244    fn scope_stack_to_category(scopes: &syntect::parsing::ScopeStack) -> Option<HighlightCategory> {
245        for scope in scopes.as_slice().iter().rev() {
246            let scope_str = scope.build_string();
247            if let Some(cat) = scope_to_category(&scope_str) {
248                return Some(cat);
249            }
250        }
251        None
252    }
253
254    /// Merge adjacent spans with same category
255    fn merge_adjacent_spans(spans: &mut Vec<CachedSpan>) {
256        if spans.len() < 2 {
257            return;
258        }
259
260        let mut write_idx = 0;
261        for read_idx in 1..spans.len() {
262            if spans[write_idx].category == spans[read_idx].category
263                && spans[write_idx].range.end == spans[read_idx].range.start
264            {
265                spans[write_idx].range.end = spans[read_idx].range.end;
266            } else {
267                write_idx += 1;
268                if write_idx != read_idx {
269                    spans[write_idx] = spans[read_idx].clone();
270                }
271            }
272        }
273        spans.truncate(write_idx + 1);
274    }
275
276    /// Invalidate cache for edited range
277    pub fn invalidate_range(&mut self, edit_range: Range<usize>) {
278        if let Some(cache) = &self.cache {
279            if edit_range.start < cache.range.end && edit_range.end > cache.range.start {
280                self.cache = None;
281            }
282        }
283    }
284
285    /// Invalidate all cache
286    pub fn invalidate_all(&mut self) {
287        self.cache = None;
288    }
289
290    /// Get syntax name
291    pub fn syntax_name(&self) -> &str {
292        &self.syntax_set.syntaxes()[self.syntax_index].name
293    }
294}
295
296/// Map TextMate scope to highlight category
297fn scope_to_category(scope: &str) -> Option<HighlightCategory> {
298    let scope_lower = scope.to_lowercase();
299
300    // Comments - highest priority
301    if scope_lower.starts_with("comment") {
302        return Some(HighlightCategory::Comment);
303    }
304
305    // Strings
306    if scope_lower.starts_with("string") {
307        return Some(HighlightCategory::String);
308    }
309
310    // Markdown/markup scopes
311    if scope_lower.starts_with("markup.heading") || scope_lower.starts_with("entity.name.section") {
312        return Some(HighlightCategory::Keyword);
313    }
314    if scope_lower.starts_with("markup.bold") {
315        return Some(HighlightCategory::Constant);
316    }
317    if scope_lower.starts_with("markup.italic") {
318        return Some(HighlightCategory::Variable);
319    }
320    if scope_lower.starts_with("markup.raw") || scope_lower.starts_with("markup.inline.raw") {
321        return Some(HighlightCategory::String);
322    }
323    if scope_lower.starts_with("markup.underline.link")
324        || scope_lower.starts_with("markup.underline")
325    {
326        return Some(HighlightCategory::Function);
327    }
328    if scope_lower.starts_with("markup.quote") || scope_lower.starts_with("markup.strikethrough") {
329        return Some(HighlightCategory::Comment);
330    }
331    if scope_lower.starts_with("markup.list") {
332        return Some(HighlightCategory::Operator);
333    }
334
335    // Keywords (but not keyword.operator)
336    if scope_lower.starts_with("keyword") && !scope_lower.starts_with("keyword.operator") {
337        return Some(HighlightCategory::Keyword);
338    }
339
340    // Operators
341    if scope_lower.starts_with("keyword.operator") || scope_lower.starts_with("punctuation") {
342        return Some(HighlightCategory::Operator);
343    }
344
345    // Functions
346    if scope_lower.starts_with("entity.name.function")
347        || scope_lower.starts_with("meta.function-call")
348        || scope_lower.starts_with("support.function")
349    {
350        return Some(HighlightCategory::Function);
351    }
352
353    // Types
354    if scope_lower.starts_with("entity.name.type")
355        || scope_lower.starts_with("storage.type")
356        || scope_lower.starts_with("support.type")
357        || scope_lower.starts_with("entity.name.class")
358    {
359        return Some(HighlightCategory::Type);
360    }
361
362    // Constants and numbers
363    if scope_lower.starts_with("constant.numeric")
364        || scope_lower.starts_with("constant.language")
365        || scope_lower.starts_with("constant.character")
366    {
367        return Some(HighlightCategory::Constant);
368    }
369    if scope_lower.starts_with("constant") {
370        return Some(HighlightCategory::Constant);
371    }
372
373    // Variables and parameters
374    if scope_lower.starts_with("variable.parameter") {
375        return Some(HighlightCategory::Variable);
376    }
377    if scope_lower.starts_with("variable") {
378        return Some(HighlightCategory::Variable);
379    }
380
381    // Storage modifiers (pub, static, const, etc.)
382    if scope_lower.starts_with("storage.modifier") {
383        return Some(HighlightCategory::Keyword);
384    }
385
386    // Entity names (catch-all for other named things)
387    if scope_lower.starts_with("entity.name") {
388        return Some(HighlightCategory::Function);
389    }
390
391    None
392}
393
394#[cfg(test)]
395mod tests {
396    use super::*;
397
398    #[test]
399    fn test_scope_to_category() {
400        assert_eq!(
401            scope_to_category("comment.line"),
402            Some(HighlightCategory::Comment)
403        );
404        assert_eq!(
405            scope_to_category("string.quoted"),
406            Some(HighlightCategory::String)
407        );
408        assert_eq!(
409            scope_to_category("keyword.control"),
410            Some(HighlightCategory::Keyword)
411        );
412        assert_eq!(
413            scope_to_category("keyword.operator"),
414            Some(HighlightCategory::Operator)
415        );
416        assert_eq!(
417            scope_to_category("entity.name.function"),
418            Some(HighlightCategory::Function)
419        );
420        assert_eq!(
421            scope_to_category("constant.numeric"),
422            Some(HighlightCategory::Constant)
423        );
424        assert_eq!(
425            scope_to_category("variable.parameter"),
426            Some(HighlightCategory::Variable)
427        );
428    }
429}