Skip to main content

fresh/primitives/
textmate_engine.rs

1//! TextMate-based syntax highlighting engine (WASM-compatible)
2//!
3//! This module provides syntax highlighting using syntect's TextMate grammar engine.
4//! It's completely WASM-compatible as syntect can use pure-Rust regex (fancy-regex).
5//!
6//! # Features
7//!
8//! - Syntax highlighting for 100+ languages via TextMate grammars
9//! - Viewport-based highlighting with caching for performance
10//! - No tree-sitter or native code dependencies
11
12use crate::model::buffer::Buffer;
13use crate::model::marker::{MarkerId, MarkerList};
14use crate::primitives::grammar::GrammarRegistry;
15use crate::primitives::highlight_types::{highlight_color, HighlightCategory, HighlightSpan};
16use crate::view::theme::Theme;
17use std::collections::HashMap;
18use std::ops::Range;
19use std::path::Path;
20use std::sync::Arc;
21use syntect::parsing::SyntaxSet;
22
23/// Maximum bytes to parse in a single operation
24const MAX_PARSE_BYTES: usize = 1024 * 1024;
25
26/// Interval between parse state checkpoints (in bytes).
27const CHECKPOINT_INTERVAL: usize = 256;
28
29/// TextMate highlighting engine (WASM-compatible)
30///
31/// Marker-based checkpoint system identical to the runtime engine in
32/// `highlight_engine.rs`. See that file for detailed documentation.
33pub struct TextMateEngine {
34    syntax_set: Arc<SyntaxSet>,
35    syntax_index: usize,
36    checkpoint_markers: MarkerList,
37    checkpoint_states:
38        HashMap<MarkerId, (syntect::parsing::ParseState, syntect::parsing::ScopeStack)>,
39    dirty_from: Option<usize>,
40    cache: Option<TextMateCache>,
41    last_buffer_len: usize,
42}
43
44#[derive(Debug, Clone)]
45struct TextMateCache {
46    range: Range<usize>,
47    spans: Vec<CachedSpan>,
48}
49
50#[derive(Debug, Clone)]
51struct CachedSpan {
52    range: Range<usize>,
53    category: HighlightCategory,
54}
55
56impl TextMateEngine {
57    /// Create a new TextMate engine for the given syntax
58    pub fn new(syntax_set: Arc<SyntaxSet>, syntax_index: usize) -> Self {
59        Self {
60            syntax_set,
61            syntax_index,
62            checkpoint_markers: MarkerList::new(),
63            checkpoint_states: HashMap::new(),
64            dirty_from: None,
65            cache: None,
66            last_buffer_len: 0,
67        }
68    }
69
70    /// Create a TextMate engine for a file path.
71    ///
72    /// Purely metadata-based: resolves the grammar by filename/extension via
73    /// the catalog. Shebang / first-line detection is not applied here —
74    /// callers with buffer content should go through
75    /// `DetectedLanguage::from_path`, which handles that fallback.
76    pub fn for_file(path: &Path, registry: &GrammarRegistry) -> Option<Self> {
77        let syntax_set = registry.syntax_set_arc();
78        let syntax = registry.find_syntax_for_file(path)?;
79
80        // Find the index of this syntax in the set
81        let index = syntax_set
82            .syntaxes()
83            .iter()
84            .position(|s| s.name == syntax.name)?;
85
86        Some(Self::new(syntax_set, index))
87    }
88
89    pub fn notify_insert(&mut self, position: usize, length: usize) {
90        self.checkpoint_markers.adjust_for_insert(position, length);
91        self.dirty_from = Some(self.dirty_from.map_or(position, |d| d.min(position)));
92    }
93
94    pub fn notify_delete(&mut self, position: usize, length: usize) {
95        self.checkpoint_markers.adjust_for_delete(position, length);
96        self.dirty_from = Some(self.dirty_from.map_or(position, |d| d.min(position)));
97    }
98
99    /// Highlight the visible viewport range. See runtime engine for detailed docs.
100    pub fn highlight_viewport(
101        &mut self,
102        buffer: &Buffer,
103        viewport_start: usize,
104        viewport_end: usize,
105        theme: &Theme,
106        context_bytes: usize,
107    ) -> Vec<HighlightSpan> {
108        if let Some(cache) = &self.cache {
109            if cache.range.start <= viewport_start
110                && cache.range.end >= viewport_end
111                && self.last_buffer_len == buffer.len()
112            {
113                return cache
114                    .spans
115                    .iter()
116                    .filter(|span| {
117                        span.range.start < viewport_end && span.range.end > viewport_start
118                    })
119                    .map(|span| HighlightSpan {
120                        range: span.range.clone(),
121                        color: highlight_color(span.category, theme),
122                        bg: None,
123                        category: Some(span.category),
124                    })
125                    .collect();
126            }
127        }
128
129        let desired_parse_start = viewport_start.saturating_sub(context_bytes);
130        let parse_end = (viewport_end + context_bytes).min(buffer.len());
131        if parse_end <= desired_parse_start {
132            return Vec::new();
133        }
134
135        if let Some(dirty) = self.dirty_from {
136            if dirty < parse_end {
137                self.run_convergence_walk(buffer, parse_end);
138            }
139        }
140
141        let syntax = &self.syntax_set.syntaxes()[self.syntax_index];
142        let (actual_start, mut state, mut current_scopes, create_checkpoints) =
143            self.find_parse_resume_point(desired_parse_start, parse_end, syntax);
144
145        let content = buffer.slice_bytes(actual_start..parse_end);
146        let content_str = match std::str::from_utf8(&content) {
147            Ok(s) => s,
148            Err(_) => return Vec::new(),
149        };
150
151        let mut spans = Vec::new();
152        let content_bytes = content_str.as_bytes();
153        let mut pos = 0;
154        let mut current_offset = actual_start;
155        let mut bytes_since_checkpoint: usize = 0;
156
157        while pos < content_bytes.len() {
158            if create_checkpoints && bytes_since_checkpoint >= CHECKPOINT_INTERVAL {
159                let nearby = self.checkpoint_markers.query_range(
160                    current_offset.saturating_sub(CHECKPOINT_INTERVAL / 2),
161                    current_offset + CHECKPOINT_INTERVAL / 2,
162                );
163                if nearby.is_empty() {
164                    let marker_id = self.checkpoint_markers.create(current_offset, true);
165                    self.checkpoint_states
166                        .insert(marker_id, (state.clone(), current_scopes.clone()));
167                }
168                bytes_since_checkpoint = 0;
169            }
170
171            let mut line_end = pos;
172            while line_end < content_bytes.len() {
173                if content_bytes[line_end] == b'\n' {
174                    line_end += 1;
175                    break;
176                } else if content_bytes[line_end] == b'\r' {
177                    if line_end + 1 < content_bytes.len() && content_bytes[line_end + 1] == b'\n' {
178                        line_end += 2;
179                    } else {
180                        line_end += 1;
181                    }
182                    break;
183                }
184                line_end += 1;
185            }
186
187            let line_bytes = &content_bytes[pos..line_end];
188            let actual_line_byte_len = line_bytes.len();
189
190            let line_str = match std::str::from_utf8(line_bytes) {
191                Ok(s) => s,
192                Err(_) => {
193                    pos = line_end;
194                    current_offset += actual_line_byte_len;
195                    bytes_since_checkpoint += actual_line_byte_len;
196                    continue;
197                }
198            };
199
200            let line_content = line_str.trim_end_matches(&['\r', '\n'][..]);
201            let line_for_syntect = if line_end < content_bytes.len() || line_str.ends_with('\n') {
202                format!("{}\n", line_content)
203            } else {
204                line_content.to_string()
205            };
206
207            let ops = match state.parse_line(&line_for_syntect, &self.syntax_set) {
208                Ok(ops) => ops,
209                Err(_) => {
210                    pos = line_end;
211                    current_offset += actual_line_byte_len;
212                    bytes_since_checkpoint += actual_line_byte_len;
213                    continue;
214                }
215            };
216
217            let collect_spans = current_offset + actual_line_byte_len > desired_parse_start;
218            let mut syntect_offset = 0;
219            let line_content_len = line_content.len();
220
221            for (op_offset, op) in ops {
222                let clamped_op_offset = op_offset.min(line_content_len);
223                if collect_spans && clamped_op_offset > syntect_offset {
224                    if let Some(category) = Self::scope_stack_to_category(&current_scopes) {
225                        let byte_start = current_offset + syntect_offset;
226                        let byte_end = current_offset + clamped_op_offset;
227                        let clamped_start = byte_start.max(desired_parse_start);
228                        if clamped_start < byte_end {
229                            spans.push(CachedSpan {
230                                range: clamped_start..byte_end,
231                                category,
232                            });
233                        }
234                    }
235                }
236                syntect_offset = clamped_op_offset;
237                #[allow(clippy::let_underscore_must_use)]
238                let _ = current_scopes.apply(&op);
239            }
240
241            if collect_spans && syntect_offset < line_content_len {
242                if let Some(category) = Self::scope_stack_to_category(&current_scopes) {
243                    let byte_start = current_offset + syntect_offset;
244                    let byte_end = current_offset + line_content_len;
245                    let clamped_start = byte_start.max(desired_parse_start);
246                    if clamped_start < byte_end {
247                        spans.push(CachedSpan {
248                            range: clamped_start..byte_end,
249                            category,
250                        });
251                    }
252                }
253            }
254
255            pos = line_end;
256            current_offset += actual_line_byte_len;
257            bytes_since_checkpoint += actual_line_byte_len;
258        }
259
260        Self::merge_adjacent_spans(&mut spans);
261
262        self.cache = Some(TextMateCache {
263            range: desired_parse_start..parse_end,
264            spans: spans.clone(),
265        });
266        self.last_buffer_len = buffer.len();
267
268        spans
269            .into_iter()
270            .filter(|span| span.range.start < viewport_end && span.range.end > viewport_start)
271            .map(|span| {
272                let cat = span.category;
273                HighlightSpan {
274                    range: span.range,
275                    color: highlight_color(cat, theme),
276                    bg: None,
277                    category: Some(cat),
278                }
279            })
280            .collect()
281    }
282
283    fn run_convergence_walk(&mut self, buffer: &Buffer, walk_end: usize) {
284        let dirty = match self.dirty_from.take() {
285            Some(d) => d,
286            None => return,
287        };
288
289        let syntax = &self.syntax_set.syntaxes()[self.syntax_index];
290
291        let (resume_pos, mut state, mut current_scopes) = {
292            let search_start = dirty.saturating_sub(MAX_PARSE_BYTES);
293            let markers = self.checkpoint_markers.query_range(search_start, dirty);
294            let nearest = markers.into_iter().max_by_key(|(_, start, _)| *start);
295            if let Some((id, cp_pos, _)) = nearest {
296                if let Some((s, sc)) = self.checkpoint_states.get(&id) {
297                    (cp_pos, s.clone(), sc.clone())
298                } else {
299                    self.checkpoint_markers.delete(id);
300                    (
301                        0,
302                        syntect::parsing::ParseState::new(syntax),
303                        syntect::parsing::ScopeStack::new(),
304                    )
305                }
306            } else if walk_end <= MAX_PARSE_BYTES {
307                (
308                    0,
309                    syntect::parsing::ParseState::new(syntax),
310                    syntect::parsing::ScopeStack::new(),
311                )
312            } else {
313                self.dirty_from = Some(dirty);
314                return;
315            }
316        };
317
318        let mut markers_ahead: Vec<(MarkerId, usize)> = self
319            .checkpoint_markers
320            .query_range(dirty, walk_end)
321            .into_iter()
322            .map(|(id, start, _)| (id, start))
323            .collect();
324        markers_ahead.sort_by_key(|(_, pos)| *pos);
325
326        if markers_ahead.is_empty() {
327            return;
328        }
329
330        let content_end = walk_end.min(buffer.len());
331        if resume_pos >= content_end {
332            return;
333        }
334        let content = buffer.slice_bytes(resume_pos..content_end);
335        let content_str = match std::str::from_utf8(&content) {
336            Ok(s) => s,
337            Err(_) => return,
338        };
339
340        let content_bytes = content_str.as_bytes();
341        let mut pos = 0;
342        let mut current_offset = resume_pos;
343        let mut marker_idx = 0;
344
345        while pos < content_bytes.len() && marker_idx < markers_ahead.len() {
346            let mut line_end = pos;
347            while line_end < content_bytes.len() {
348                if content_bytes[line_end] == b'\n' {
349                    line_end += 1;
350                    break;
351                } else if content_bytes[line_end] == b'\r' {
352                    if line_end + 1 < content_bytes.len() && content_bytes[line_end + 1] == b'\n' {
353                        line_end += 2;
354                    } else {
355                        line_end += 1;
356                    }
357                    break;
358                }
359                line_end += 1;
360            }
361
362            let line_bytes = &content_bytes[pos..line_end];
363            let actual_line_byte_len = line_bytes.len();
364
365            let line_str = match std::str::from_utf8(line_bytes) {
366                Ok(s) => s,
367                Err(_) => {
368                    pos = line_end;
369                    current_offset += actual_line_byte_len;
370                    continue;
371                }
372            };
373
374            let line_content = line_str.trim_end_matches(&['\r', '\n'][..]);
375            let line_for_syntect = if line_end < content_bytes.len() || line_str.ends_with('\n') {
376                format!("{}\n", line_content)
377            } else {
378                line_content.to_string()
379            };
380
381            if let Ok(ops) = state.parse_line(&line_for_syntect, &self.syntax_set) {
382                for (_op_offset, op) in ops {
383                    #[allow(clippy::let_underscore_must_use)]
384                    let _ = current_scopes.apply(&op);
385                }
386            }
387
388            pos = line_end;
389            current_offset += actual_line_byte_len;
390
391            while marker_idx < markers_ahead.len() && markers_ahead[marker_idx].1 <= current_offset
392            {
393                let (marker_id, _) = markers_ahead[marker_idx];
394                marker_idx += 1;
395
396                if let Some(stored) = self.checkpoint_states.get(&marker_id) {
397                    if state == stored.0 && current_scopes == stored.1 {
398                        return;
399                    }
400                }
401                self.checkpoint_states
402                    .insert(marker_id, (state.clone(), current_scopes.clone()));
403            }
404        }
405
406        if marker_idx < markers_ahead.len() {
407            self.dirty_from = Some(markers_ahead[marker_idx].1);
408        }
409    }
410
411    fn find_parse_resume_point(
412        &self,
413        desired_start: usize,
414        parse_end: usize,
415        syntax: &syntect::parsing::SyntaxReference,
416    ) -> (
417        usize,
418        syntect::parsing::ParseState,
419        syntect::parsing::ScopeStack,
420        bool,
421    ) {
422        use syntect::parsing::{ParseState, ScopeStack};
423
424        let search_start = desired_start.saturating_sub(MAX_PARSE_BYTES);
425        let markers = self
426            .checkpoint_markers
427            .query_range(search_start, desired_start + 1);
428        let nearest = markers.into_iter().max_by_key(|(_, start, _)| *start);
429
430        if let Some((id, cp_pos, _)) = nearest {
431            if let Some((s, sc)) = self.checkpoint_states.get(&id) {
432                return (cp_pos, s.clone(), sc.clone(), true);
433            }
434        }
435        if parse_end <= MAX_PARSE_BYTES {
436            (0, ParseState::new(syntax), ScopeStack::new(), true)
437        } else {
438            (
439                desired_start,
440                ParseState::new(syntax),
441                ScopeStack::new(),
442                true,
443            )
444        }
445    }
446
447    fn scope_stack_to_category(scopes: &syntect::parsing::ScopeStack) -> Option<HighlightCategory> {
448        for scope in scopes.as_slice().iter().rev() {
449            let scope_str = scope.build_string();
450            if let Some(cat) = scope_to_category(&scope_str) {
451                return Some(cat);
452            }
453        }
454        None
455    }
456
457    fn merge_adjacent_spans(spans: &mut Vec<CachedSpan>) {
458        if spans.len() < 2 {
459            return;
460        }
461        let mut write_idx = 0;
462        for read_idx in 1..spans.len() {
463            if spans[write_idx].category == spans[read_idx].category
464                && spans[write_idx].range.end == spans[read_idx].range.start
465            {
466                spans[write_idx].range.end = spans[read_idx].range.end;
467            } else {
468                write_idx += 1;
469                if write_idx != read_idx {
470                    spans[write_idx] = spans[read_idx].clone();
471                }
472            }
473        }
474        spans.truncate(write_idx + 1);
475    }
476
477    pub fn invalidate_range(&mut self, edit_range: Range<usize>) {
478        if let Some(cache) = &self.cache {
479            if edit_range.start < cache.range.end && edit_range.end > cache.range.start {
480                self.cache = None;
481            }
482        }
483    }
484
485    pub fn invalidate_all(&mut self) {
486        self.cache = None;
487        let ids: Vec<MarkerId> = self.checkpoint_states.keys().copied().collect();
488        for id in ids {
489            self.checkpoint_markers.delete(id);
490        }
491        self.checkpoint_states.clear();
492        self.dirty_from = None;
493    }
494
495    pub fn syntax_name(&self) -> &str {
496        &self.syntax_set.syntaxes()[self.syntax_index].name
497    }
498}
499
500/// Map TextMate scope to highlight category
501fn scope_to_category(scope: &str) -> Option<HighlightCategory> {
502    let scope_lower = scope.to_lowercase();
503
504    // Comments - highest priority
505    if scope_lower.starts_with("comment") {
506        return Some(HighlightCategory::Comment);
507    }
508
509    // Strings
510    if scope_lower.starts_with("string") {
511        return Some(HighlightCategory::String);
512    }
513
514    // Markdown/markup scopes
515    if scope_lower.starts_with("markup.heading") || scope_lower.starts_with("entity.name.section") {
516        return Some(HighlightCategory::Keyword);
517    }
518    if scope_lower.starts_with("markup.bold") {
519        return Some(HighlightCategory::Constant);
520    }
521    if scope_lower.starts_with("markup.italic") {
522        return Some(HighlightCategory::Variable);
523    }
524    if scope_lower.starts_with("markup.raw") || scope_lower.starts_with("markup.inline.raw") {
525        return Some(HighlightCategory::String);
526    }
527    if scope_lower.starts_with("markup.underline.link")
528        || scope_lower.starts_with("markup.underline")
529    {
530        return Some(HighlightCategory::Function);
531    }
532    if scope_lower.starts_with("markup.quote") || scope_lower.starts_with("markup.strikethrough") {
533        return Some(HighlightCategory::Comment);
534    }
535    if scope_lower.starts_with("markup.list") {
536        return Some(HighlightCategory::Operator);
537    }
538    // Diff markup: inserted/deleted lines
539    if scope_lower.starts_with("markup.inserted") {
540        return Some(HighlightCategory::String); // green
541    }
542    if scope_lower.starts_with("markup.deleted") {
543        return Some(HighlightCategory::Keyword); // red/magenta
544    }
545    // Diff metadata (range info like @@ -1,5 +1,6 @@)
546    if scope_lower.starts_with("meta.diff.range")
547        || scope_lower.starts_with("meta.diff.header")
548        || scope_lower.starts_with("meta.diff.index")
549    {
550        return Some(HighlightCategory::Function); // cyan/yellow
551    }
552    // Diff from-file/to-file headers (--- a/file, +++ b/file)
553    if scope_lower.starts_with("punctuation.definition.from-file")
554        || scope_lower.starts_with("punctuation.definition.to-file")
555    {
556        return Some(HighlightCategory::Type); // type color
557    }
558
559    // Keywords (but not keyword.operator)
560    if scope_lower.starts_with("keyword") && !scope_lower.starts_with("keyword.operator") {
561        return Some(HighlightCategory::Keyword);
562    }
563
564    // Punctuation that belongs to a parent construct (comment/string delimiters)
565    // These must be checked before the generic punctuation rule below.
566    // TextMate grammars assign e.g. `punctuation.definition.comment` to # // /* etc.
567    if scope_lower.starts_with("punctuation.definition.comment") {
568        return Some(HighlightCategory::Comment);
569    }
570    if scope_lower.starts_with("punctuation.definition.string") {
571        return Some(HighlightCategory::String);
572    }
573
574    // Operators (keyword.operator only)
575    if scope_lower.starts_with("keyword.operator") {
576        return Some(HighlightCategory::Operator);
577    }
578
579    // Punctuation brackets ({, }, (, ), [, ], <, >)
580    // Covers punctuation.section.*, punctuation.bracket.*,
581    // and punctuation.definition.{array,block,brackets,group,inline-table,section,table,tag}
582    if scope_lower.starts_with("punctuation.section")
583        || scope_lower.starts_with("punctuation.bracket")
584        || scope_lower.starts_with("punctuation.definition.array")
585        || scope_lower.starts_with("punctuation.definition.block")
586        || scope_lower.starts_with("punctuation.definition.brackets")
587        || scope_lower.starts_with("punctuation.definition.group")
588        || scope_lower.starts_with("punctuation.definition.inline-table")
589        || scope_lower.starts_with("punctuation.definition.section")
590        || scope_lower.starts_with("punctuation.definition.table")
591        || scope_lower.starts_with("punctuation.definition.tag")
592    {
593        return Some(HighlightCategory::PunctuationBracket);
594    }
595
596    // Punctuation delimiters (;, ,, .)
597    if scope_lower.starts_with("punctuation.separator")
598        || scope_lower.starts_with("punctuation.terminator")
599        || scope_lower.starts_with("punctuation.accessor")
600    {
601        return Some(HighlightCategory::PunctuationDelimiter);
602    }
603
604    // Functions
605    if scope_lower.starts_with("entity.name.function")
606        || scope_lower.starts_with("meta.function-call")
607        || scope_lower.starts_with("support.function")
608    {
609        return Some(HighlightCategory::Function);
610    }
611
612    // Types
613    if scope_lower.starts_with("entity.name.type")
614        || scope_lower.starts_with("storage.type")
615        || scope_lower.starts_with("support.type")
616        || scope_lower.starts_with("entity.name.class")
617    {
618        return Some(HighlightCategory::Type);
619    }
620
621    // Constants and numbers
622    if scope_lower.starts_with("constant.numeric")
623        || scope_lower.starts_with("constant.language")
624        || scope_lower.starts_with("constant.character")
625    {
626        return Some(HighlightCategory::Constant);
627    }
628    if scope_lower.starts_with("constant") {
629        return Some(HighlightCategory::Constant);
630    }
631
632    // Variables and parameters
633    if scope_lower.starts_with("variable.parameter") {
634        return Some(HighlightCategory::Variable);
635    }
636    if scope_lower.starts_with("variable") {
637        return Some(HighlightCategory::Variable);
638    }
639
640    // Storage modifiers (pub, static, const, etc.)
641    if scope_lower.starts_with("storage.modifier") {
642        return Some(HighlightCategory::Keyword);
643    }
644
645    // Entity names (catch-all for other named things)
646    if scope_lower.starts_with("entity.name") {
647        return Some(HighlightCategory::Function);
648    }
649
650    None
651}
652
653#[cfg(test)]
654mod tests {
655    use super::*;
656
657    #[test]
658    fn test_scope_to_category() {
659        assert_eq!(
660            scope_to_category("comment.line"),
661            Some(HighlightCategory::Comment)
662        );
663        assert_eq!(
664            scope_to_category("string.quoted"),
665            Some(HighlightCategory::String)
666        );
667        assert_eq!(
668            scope_to_category("keyword.control"),
669            Some(HighlightCategory::Keyword)
670        );
671        assert_eq!(
672            scope_to_category("keyword.operator"),
673            Some(HighlightCategory::Operator)
674        );
675        assert_eq!(
676            scope_to_category("entity.name.function"),
677            Some(HighlightCategory::Function)
678        );
679        assert_eq!(
680            scope_to_category("constant.numeric"),
681            Some(HighlightCategory::Constant)
682        );
683        assert_eq!(
684            scope_to_category("variable.parameter"),
685            Some(HighlightCategory::Variable)
686        );
687    }
688
689    #[test]
690    fn test_comment_delimiter_uses_comment_color() {
691        // Comment delimiters (#, //, /*) should use comment color, not operator
692        assert_eq!(
693            scope_to_category("punctuation.definition.comment"),
694            Some(HighlightCategory::Comment)
695        );
696        assert_eq!(
697            scope_to_category("punctuation.definition.comment.python"),
698            Some(HighlightCategory::Comment)
699        );
700        assert_eq!(
701            scope_to_category("punctuation.definition.comment.begin"),
702            Some(HighlightCategory::Comment)
703        );
704    }
705
706    #[test]
707    fn test_string_delimiter_uses_string_color() {
708        // String delimiters (", ', `) should use string color, not operator
709        assert_eq!(
710            scope_to_category("punctuation.definition.string.begin"),
711            Some(HighlightCategory::String)
712        );
713        assert_eq!(
714            scope_to_category("punctuation.definition.string.end"),
715            Some(HighlightCategory::String)
716        );
717    }
718
719    #[test]
720    fn test_diff_scopes_produce_categories() {
721        // Diff-specific scopes should map to categories
722        assert_eq!(
723            scope_to_category("markup.inserted"),
724            Some(HighlightCategory::String)
725        );
726        assert_eq!(
727            scope_to_category("markup.inserted.diff"),
728            Some(HighlightCategory::String)
729        );
730        assert_eq!(
731            scope_to_category("markup.deleted"),
732            Some(HighlightCategory::Keyword)
733        );
734        assert_eq!(
735            scope_to_category("markup.deleted.diff"),
736            Some(HighlightCategory::Keyword)
737        );
738        assert_eq!(
739            scope_to_category("meta.diff.range"),
740            Some(HighlightCategory::Function)
741        );
742        assert_eq!(
743            scope_to_category("meta.diff.header"),
744            Some(HighlightCategory::Function)
745        );
746    }
747
748    #[test]
749    fn test_diff_parsing_produces_scopes() {
750        use syntect::parsing::{ParseState, ScopeStack, SyntaxSet};
751
752        let ss = SyntaxSet::load_defaults_newlines();
753        let syntax = ss
754            .find_syntax_by_extension("diff")
755            .expect("Diff syntax should exist");
756        let mut state = ParseState::new(syntax);
757
758        let lines = [
759            "--- a/file.txt\n",
760            "+++ b/file.txt\n",
761            "@@ -1,3 +1,4 @@\n",
762            " unchanged\n",
763            "-removed line\n",
764            "+added line\n",
765        ];
766
767        let mut found_inserted = false;
768        let mut found_deleted = false;
769        let mut found_range = false;
770        let mut scopes = ScopeStack::new();
771
772        for line in &lines {
773            let ops = state.parse_line(line, &ss).unwrap();
774            for (_offset, op) in &ops {
775                scopes.apply(op).unwrap();
776                let scope_str = scopes
777                    .as_slice()
778                    .iter()
779                    .map(|s| s.build_string())
780                    .collect::<Vec<_>>()
781                    .join(" ");
782                if scope_str.contains("markup.inserted") {
783                    found_inserted = true;
784                }
785                if scope_str.contains("markup.deleted") {
786                    found_deleted = true;
787                }
788                if scope_str.contains("meta.diff") {
789                    found_range = true;
790                }
791            }
792        }
793
794        eprintln!(
795            "found_inserted={}, found_deleted={}, found_range={}",
796            found_inserted, found_deleted, found_range
797        );
798        assert!(
799            found_inserted || found_deleted || found_range,
800            "Diff grammar should produce markup.inserted, markup.deleted, or meta.diff scopes"
801        );
802    }
803}