Skip to main content

fresh/primitives/
textmate_engine.rs

1//! TextMate-based syntax highlighting engine (WASM-compatible)
2//!
3//! This module provides syntax highlighting using syntect's TextMate grammar engine.
4//! It's completely WASM-compatible as syntect can use pure-Rust regex (fancy-regex).
5//!
6//! # Features
7//!
8//! - Syntax highlighting for 100+ languages via TextMate grammars
9//! - Viewport-based highlighting with caching for performance
10//! - No tree-sitter or native code dependencies
11
12use crate::model::buffer::Buffer;
13use crate::model::marker::{MarkerId, MarkerList};
14use crate::primitives::grammar::GrammarRegistry;
15use crate::primitives::highlight_types::{highlight_color, HighlightCategory, HighlightSpan};
16use crate::view::theme::Theme;
17use std::collections::HashMap;
18use std::ops::Range;
19use std::path::Path;
20use std::sync::Arc;
21use syntect::parsing::SyntaxSet;
22
23/// Maximum bytes to parse in a single operation
24const MAX_PARSE_BYTES: usize = 1024 * 1024;
25
26/// Interval between parse state checkpoints (in bytes).
27const CHECKPOINT_INTERVAL: usize = 256;
28
29/// TextMate highlighting engine (WASM-compatible)
30///
31/// Marker-based checkpoint system identical to the runtime engine in
32/// `highlight_engine.rs`. See that file for detailed documentation.
33pub struct TextMateEngine {
34    syntax_set: Arc<SyntaxSet>,
35    syntax_index: usize,
36    checkpoint_markers: MarkerList,
37    checkpoint_states:
38        HashMap<MarkerId, (syntect::parsing::ParseState, syntect::parsing::ScopeStack)>,
39    dirty_from: Option<usize>,
40    cache: Option<TextMateCache>,
41    last_buffer_len: usize,
42}
43
44#[derive(Debug, Clone)]
45struct TextMateCache {
46    range: Range<usize>,
47    spans: Vec<CachedSpan>,
48}
49
50#[derive(Debug, Clone)]
51struct CachedSpan {
52    range: Range<usize>,
53    category: HighlightCategory,
54}
55
56impl TextMateEngine {
57    /// Create a new TextMate engine for the given syntax
58    pub fn new(syntax_set: Arc<SyntaxSet>, syntax_index: usize) -> Self {
59        Self {
60            syntax_set,
61            syntax_index,
62            checkpoint_markers: MarkerList::new(),
63            checkpoint_states: HashMap::new(),
64            dirty_from: None,
65            cache: None,
66            last_buffer_len: 0,
67        }
68    }
69
70    /// Create a TextMate engine for a file path
71    pub fn for_file(path: &Path, registry: &GrammarRegistry) -> Option<Self> {
72        let syntax_set = registry.syntax_set_arc();
73
74        // Find syntax by file extension
75        let syntax = registry.find_syntax_for_file(path)?;
76
77        // Find the index of this syntax in the set
78        let index = syntax_set
79            .syntaxes()
80            .iter()
81            .position(|s| s.name == syntax.name)?;
82
83        Some(Self::new(syntax_set, index))
84    }
85
86    pub fn notify_insert(&mut self, position: usize, length: usize) {
87        self.checkpoint_markers.adjust_for_insert(position, length);
88        self.dirty_from = Some(self.dirty_from.map_or(position, |d| d.min(position)));
89    }
90
91    pub fn notify_delete(&mut self, position: usize, length: usize) {
92        self.checkpoint_markers.adjust_for_delete(position, length);
93        self.dirty_from = Some(self.dirty_from.map_or(position, |d| d.min(position)));
94    }
95
96    /// Highlight the visible viewport range. See runtime engine for detailed docs.
97    pub fn highlight_viewport(
98        &mut self,
99        buffer: &Buffer,
100        viewport_start: usize,
101        viewport_end: usize,
102        theme: &Theme,
103        context_bytes: usize,
104    ) -> Vec<HighlightSpan> {
105        if let Some(cache) = &self.cache {
106            if cache.range.start <= viewport_start
107                && cache.range.end >= viewport_end
108                && self.last_buffer_len == buffer.len()
109            {
110                return cache
111                    .spans
112                    .iter()
113                    .filter(|span| {
114                        span.range.start < viewport_end && span.range.end > viewport_start
115                    })
116                    .map(|span| HighlightSpan {
117                        range: span.range.clone(),
118                        color: highlight_color(span.category, theme),
119                        category: Some(span.category),
120                    })
121                    .collect();
122            }
123        }
124
125        let desired_parse_start = viewport_start.saturating_sub(context_bytes);
126        let parse_end = (viewport_end + context_bytes).min(buffer.len());
127        if parse_end <= desired_parse_start {
128            return Vec::new();
129        }
130
131        if let Some(dirty) = self.dirty_from {
132            if dirty < parse_end {
133                self.run_convergence_walk(buffer, parse_end);
134            }
135        }
136
137        let syntax = &self.syntax_set.syntaxes()[self.syntax_index];
138        let (actual_start, mut state, mut current_scopes, create_checkpoints) =
139            self.find_parse_resume_point(desired_parse_start, parse_end, syntax);
140
141        let content = buffer.slice_bytes(actual_start..parse_end);
142        let content_str = match std::str::from_utf8(&content) {
143            Ok(s) => s,
144            Err(_) => return Vec::new(),
145        };
146
147        let mut spans = Vec::new();
148        let content_bytes = content_str.as_bytes();
149        let mut pos = 0;
150        let mut current_offset = actual_start;
151        let mut bytes_since_checkpoint: usize = 0;
152
153        while pos < content_bytes.len() {
154            if create_checkpoints && bytes_since_checkpoint >= CHECKPOINT_INTERVAL {
155                let nearby = self.checkpoint_markers.query_range(
156                    current_offset.saturating_sub(CHECKPOINT_INTERVAL / 2),
157                    current_offset + CHECKPOINT_INTERVAL / 2,
158                );
159                if nearby.is_empty() {
160                    let marker_id = self.checkpoint_markers.create(current_offset, true);
161                    self.checkpoint_states
162                        .insert(marker_id, (state.clone(), current_scopes.clone()));
163                }
164                bytes_since_checkpoint = 0;
165            }
166
167            let mut line_end = pos;
168            while line_end < content_bytes.len() {
169                if content_bytes[line_end] == b'\n' {
170                    line_end += 1;
171                    break;
172                } else if content_bytes[line_end] == b'\r' {
173                    if line_end + 1 < content_bytes.len() && content_bytes[line_end + 1] == b'\n' {
174                        line_end += 2;
175                    } else {
176                        line_end += 1;
177                    }
178                    break;
179                }
180                line_end += 1;
181            }
182
183            let line_bytes = &content_bytes[pos..line_end];
184            let actual_line_byte_len = line_bytes.len();
185
186            let line_str = match std::str::from_utf8(line_bytes) {
187                Ok(s) => s,
188                Err(_) => {
189                    pos = line_end;
190                    current_offset += actual_line_byte_len;
191                    bytes_since_checkpoint += actual_line_byte_len;
192                    continue;
193                }
194            };
195
196            let line_content = line_str.trim_end_matches(&['\r', '\n'][..]);
197            let line_for_syntect = if line_end < content_bytes.len() || line_str.ends_with('\n') {
198                format!("{}\n", line_content)
199            } else {
200                line_content.to_string()
201            };
202
203            let ops = match state.parse_line(&line_for_syntect, &self.syntax_set) {
204                Ok(ops) => ops,
205                Err(_) => {
206                    pos = line_end;
207                    current_offset += actual_line_byte_len;
208                    bytes_since_checkpoint += actual_line_byte_len;
209                    continue;
210                }
211            };
212
213            let collect_spans = current_offset + actual_line_byte_len > desired_parse_start;
214            let mut syntect_offset = 0;
215            let line_content_len = line_content.len();
216
217            for (op_offset, op) in ops {
218                let clamped_op_offset = op_offset.min(line_content_len);
219                if collect_spans && clamped_op_offset > syntect_offset {
220                    if let Some(category) = Self::scope_stack_to_category(&current_scopes) {
221                        let byte_start = current_offset + syntect_offset;
222                        let byte_end = current_offset + clamped_op_offset;
223                        let clamped_start = byte_start.max(desired_parse_start);
224                        if clamped_start < byte_end {
225                            spans.push(CachedSpan {
226                                range: clamped_start..byte_end,
227                                category,
228                            });
229                        }
230                    }
231                }
232                syntect_offset = clamped_op_offset;
233                #[allow(clippy::let_underscore_must_use)]
234                let _ = current_scopes.apply(&op);
235            }
236
237            if collect_spans && syntect_offset < line_content_len {
238                if let Some(category) = Self::scope_stack_to_category(&current_scopes) {
239                    let byte_start = current_offset + syntect_offset;
240                    let byte_end = current_offset + line_content_len;
241                    let clamped_start = byte_start.max(desired_parse_start);
242                    if clamped_start < byte_end {
243                        spans.push(CachedSpan {
244                            range: clamped_start..byte_end,
245                            category,
246                        });
247                    }
248                }
249            }
250
251            pos = line_end;
252            current_offset += actual_line_byte_len;
253            bytes_since_checkpoint += actual_line_byte_len;
254        }
255
256        Self::merge_adjacent_spans(&mut spans);
257
258        self.cache = Some(TextMateCache {
259            range: desired_parse_start..parse_end,
260            spans: spans.clone(),
261        });
262        self.last_buffer_len = buffer.len();
263
264        spans
265            .into_iter()
266            .filter(|span| span.range.start < viewport_end && span.range.end > viewport_start)
267            .map(|span| {
268                let cat = span.category;
269                HighlightSpan {
270                    range: span.range,
271                    color: highlight_color(cat, theme),
272                    category: Some(cat),
273                }
274            })
275            .collect()
276    }
277
278    fn run_convergence_walk(&mut self, buffer: &Buffer, walk_end: usize) {
279        let dirty = match self.dirty_from.take() {
280            Some(d) => d,
281            None => return,
282        };
283
284        let syntax = &self.syntax_set.syntaxes()[self.syntax_index];
285
286        let (resume_pos, mut state, mut current_scopes) = {
287            let search_start = dirty.saturating_sub(MAX_PARSE_BYTES);
288            let markers = self.checkpoint_markers.query_range(search_start, dirty);
289            let nearest = markers.into_iter().max_by_key(|(_, start, _)| *start);
290            if let Some((id, cp_pos, _)) = nearest {
291                if let Some((s, sc)) = self.checkpoint_states.get(&id) {
292                    (cp_pos, s.clone(), sc.clone())
293                } else {
294                    self.checkpoint_markers.delete(id);
295                    (
296                        0,
297                        syntect::parsing::ParseState::new(syntax),
298                        syntect::parsing::ScopeStack::new(),
299                    )
300                }
301            } else if walk_end <= MAX_PARSE_BYTES {
302                (
303                    0,
304                    syntect::parsing::ParseState::new(syntax),
305                    syntect::parsing::ScopeStack::new(),
306                )
307            } else {
308                self.dirty_from = Some(dirty);
309                return;
310            }
311        };
312
313        let mut markers_ahead: Vec<(MarkerId, usize)> = self
314            .checkpoint_markers
315            .query_range(dirty, walk_end)
316            .into_iter()
317            .map(|(id, start, _)| (id, start))
318            .collect();
319        markers_ahead.sort_by_key(|(_, pos)| *pos);
320
321        if markers_ahead.is_empty() {
322            return;
323        }
324
325        let content_end = walk_end.min(buffer.len());
326        if resume_pos >= content_end {
327            return;
328        }
329        let content = buffer.slice_bytes(resume_pos..content_end);
330        let content_str = match std::str::from_utf8(&content) {
331            Ok(s) => s,
332            Err(_) => return,
333        };
334
335        let content_bytes = content_str.as_bytes();
336        let mut pos = 0;
337        let mut current_offset = resume_pos;
338        let mut marker_idx = 0;
339
340        while pos < content_bytes.len() && marker_idx < markers_ahead.len() {
341            let mut line_end = pos;
342            while line_end < content_bytes.len() {
343                if content_bytes[line_end] == b'\n' {
344                    line_end += 1;
345                    break;
346                } else if content_bytes[line_end] == b'\r' {
347                    if line_end + 1 < content_bytes.len() && content_bytes[line_end + 1] == b'\n' {
348                        line_end += 2;
349                    } else {
350                        line_end += 1;
351                    }
352                    break;
353                }
354                line_end += 1;
355            }
356
357            let line_bytes = &content_bytes[pos..line_end];
358            let actual_line_byte_len = line_bytes.len();
359
360            let line_str = match std::str::from_utf8(line_bytes) {
361                Ok(s) => s,
362                Err(_) => {
363                    pos = line_end;
364                    current_offset += actual_line_byte_len;
365                    continue;
366                }
367            };
368
369            let line_content = line_str.trim_end_matches(&['\r', '\n'][..]);
370            let line_for_syntect = if line_end < content_bytes.len() || line_str.ends_with('\n') {
371                format!("{}\n", line_content)
372            } else {
373                line_content.to_string()
374            };
375
376            if let Ok(ops) = state.parse_line(&line_for_syntect, &self.syntax_set) {
377                for (_op_offset, op) in ops {
378                    #[allow(clippy::let_underscore_must_use)]
379                    let _ = current_scopes.apply(&op);
380                }
381            }
382
383            pos = line_end;
384            current_offset += actual_line_byte_len;
385
386            while marker_idx < markers_ahead.len() && markers_ahead[marker_idx].1 <= current_offset
387            {
388                let (marker_id, _) = markers_ahead[marker_idx];
389                marker_idx += 1;
390
391                if let Some(stored) = self.checkpoint_states.get(&marker_id) {
392                    if state == stored.0 && current_scopes == stored.1 {
393                        return;
394                    }
395                }
396                self.checkpoint_states
397                    .insert(marker_id, (state.clone(), current_scopes.clone()));
398            }
399        }
400
401        if marker_idx < markers_ahead.len() {
402            self.dirty_from = Some(markers_ahead[marker_idx].1);
403        }
404    }
405
406    fn find_parse_resume_point(
407        &self,
408        desired_start: usize,
409        parse_end: usize,
410        syntax: &syntect::parsing::SyntaxReference,
411    ) -> (
412        usize,
413        syntect::parsing::ParseState,
414        syntect::parsing::ScopeStack,
415        bool,
416    ) {
417        use syntect::parsing::{ParseState, ScopeStack};
418
419        let search_start = desired_start.saturating_sub(MAX_PARSE_BYTES);
420        let markers = self
421            .checkpoint_markers
422            .query_range(search_start, desired_start + 1);
423        let nearest = markers.into_iter().max_by_key(|(_, start, _)| *start);
424
425        if let Some((id, cp_pos, _)) = nearest {
426            if let Some((s, sc)) = self.checkpoint_states.get(&id) {
427                return (cp_pos, s.clone(), sc.clone(), true);
428            }
429        }
430        if parse_end <= MAX_PARSE_BYTES {
431            (0, ParseState::new(syntax), ScopeStack::new(), true)
432        } else {
433            (
434                desired_start,
435                ParseState::new(syntax),
436                ScopeStack::new(),
437                true,
438            )
439        }
440    }
441
442    fn scope_stack_to_category(scopes: &syntect::parsing::ScopeStack) -> Option<HighlightCategory> {
443        for scope in scopes.as_slice().iter().rev() {
444            let scope_str = scope.build_string();
445            if let Some(cat) = scope_to_category(&scope_str) {
446                return Some(cat);
447            }
448        }
449        None
450    }
451
452    fn merge_adjacent_spans(spans: &mut Vec<CachedSpan>) {
453        if spans.len() < 2 {
454            return;
455        }
456        let mut write_idx = 0;
457        for read_idx in 1..spans.len() {
458            if spans[write_idx].category == spans[read_idx].category
459                && spans[write_idx].range.end == spans[read_idx].range.start
460            {
461                spans[write_idx].range.end = spans[read_idx].range.end;
462            } else {
463                write_idx += 1;
464                if write_idx != read_idx {
465                    spans[write_idx] = spans[read_idx].clone();
466                }
467            }
468        }
469        spans.truncate(write_idx + 1);
470    }
471
472    pub fn invalidate_range(&mut self, edit_range: Range<usize>) {
473        if let Some(cache) = &self.cache {
474            if edit_range.start < cache.range.end && edit_range.end > cache.range.start {
475                self.cache = None;
476            }
477        }
478    }
479
480    pub fn invalidate_all(&mut self) {
481        self.cache = None;
482        let ids: Vec<MarkerId> = self.checkpoint_states.keys().copied().collect();
483        for id in ids {
484            self.checkpoint_markers.delete(id);
485        }
486        self.checkpoint_states.clear();
487        self.dirty_from = None;
488    }
489
490    pub fn syntax_name(&self) -> &str {
491        &self.syntax_set.syntaxes()[self.syntax_index].name
492    }
493}
494
495/// Map TextMate scope to highlight category
496fn scope_to_category(scope: &str) -> Option<HighlightCategory> {
497    let scope_lower = scope.to_lowercase();
498
499    // Comments - highest priority
500    if scope_lower.starts_with("comment") {
501        return Some(HighlightCategory::Comment);
502    }
503
504    // Strings
505    if scope_lower.starts_with("string") {
506        return Some(HighlightCategory::String);
507    }
508
509    // Markdown/markup scopes
510    if scope_lower.starts_with("markup.heading") || scope_lower.starts_with("entity.name.section") {
511        return Some(HighlightCategory::Keyword);
512    }
513    if scope_lower.starts_with("markup.bold") {
514        return Some(HighlightCategory::Constant);
515    }
516    if scope_lower.starts_with("markup.italic") {
517        return Some(HighlightCategory::Variable);
518    }
519    if scope_lower.starts_with("markup.raw") || scope_lower.starts_with("markup.inline.raw") {
520        return Some(HighlightCategory::String);
521    }
522    if scope_lower.starts_with("markup.underline.link")
523        || scope_lower.starts_with("markup.underline")
524    {
525        return Some(HighlightCategory::Function);
526    }
527    if scope_lower.starts_with("markup.quote") || scope_lower.starts_with("markup.strikethrough") {
528        return Some(HighlightCategory::Comment);
529    }
530    if scope_lower.starts_with("markup.list") {
531        return Some(HighlightCategory::Operator);
532    }
533    // Diff markup: inserted/deleted lines
534    if scope_lower.starts_with("markup.inserted") {
535        return Some(HighlightCategory::String); // green
536    }
537    if scope_lower.starts_with("markup.deleted") {
538        return Some(HighlightCategory::Keyword); // red/magenta
539    }
540    // Diff metadata (range info like @@ -1,5 +1,6 @@)
541    if scope_lower.starts_with("meta.diff.range")
542        || scope_lower.starts_with("meta.diff.header")
543        || scope_lower.starts_with("meta.diff.index")
544    {
545        return Some(HighlightCategory::Function); // cyan/yellow
546    }
547    // Diff from-file/to-file headers (--- a/file, +++ b/file)
548    if scope_lower.starts_with("punctuation.definition.from-file")
549        || scope_lower.starts_with("punctuation.definition.to-file")
550    {
551        return Some(HighlightCategory::Type); // type color
552    }
553
554    // Keywords (but not keyword.operator)
555    if scope_lower.starts_with("keyword") && !scope_lower.starts_with("keyword.operator") {
556        return Some(HighlightCategory::Keyword);
557    }
558
559    // Punctuation that belongs to a parent construct (comment/string delimiters)
560    // These must be checked before the generic punctuation rule below.
561    // TextMate grammars assign e.g. `punctuation.definition.comment` to # // /* etc.
562    if scope_lower.starts_with("punctuation.definition.comment") {
563        return Some(HighlightCategory::Comment);
564    }
565    if scope_lower.starts_with("punctuation.definition.string") {
566        return Some(HighlightCategory::String);
567    }
568
569    // Operators (keyword.operator only)
570    if scope_lower.starts_with("keyword.operator") {
571        return Some(HighlightCategory::Operator);
572    }
573
574    // Punctuation brackets ({, }, (, ), [, ], <, >)
575    // Covers punctuation.section.*, punctuation.bracket.*,
576    // and punctuation.definition.{array,block,brackets,group,inline-table,section,table,tag}
577    if scope_lower.starts_with("punctuation.section")
578        || scope_lower.starts_with("punctuation.bracket")
579        || scope_lower.starts_with("punctuation.definition.array")
580        || scope_lower.starts_with("punctuation.definition.block")
581        || scope_lower.starts_with("punctuation.definition.brackets")
582        || scope_lower.starts_with("punctuation.definition.group")
583        || scope_lower.starts_with("punctuation.definition.inline-table")
584        || scope_lower.starts_with("punctuation.definition.section")
585        || scope_lower.starts_with("punctuation.definition.table")
586        || scope_lower.starts_with("punctuation.definition.tag")
587    {
588        return Some(HighlightCategory::PunctuationBracket);
589    }
590
591    // Punctuation delimiters (;, ,, .)
592    if scope_lower.starts_with("punctuation.separator")
593        || scope_lower.starts_with("punctuation.terminator")
594        || scope_lower.starts_with("punctuation.accessor")
595    {
596        return Some(HighlightCategory::PunctuationDelimiter);
597    }
598
599    // Functions
600    if scope_lower.starts_with("entity.name.function")
601        || scope_lower.starts_with("meta.function-call")
602        || scope_lower.starts_with("support.function")
603    {
604        return Some(HighlightCategory::Function);
605    }
606
607    // Types
608    if scope_lower.starts_with("entity.name.type")
609        || scope_lower.starts_with("storage.type")
610        || scope_lower.starts_with("support.type")
611        || scope_lower.starts_with("entity.name.class")
612    {
613        return Some(HighlightCategory::Type);
614    }
615
616    // Constants and numbers
617    if scope_lower.starts_with("constant.numeric")
618        || scope_lower.starts_with("constant.language")
619        || scope_lower.starts_with("constant.character")
620    {
621        return Some(HighlightCategory::Constant);
622    }
623    if scope_lower.starts_with("constant") {
624        return Some(HighlightCategory::Constant);
625    }
626
627    // Variables and parameters
628    if scope_lower.starts_with("variable.parameter") {
629        return Some(HighlightCategory::Variable);
630    }
631    if scope_lower.starts_with("variable") {
632        return Some(HighlightCategory::Variable);
633    }
634
635    // Storage modifiers (pub, static, const, etc.)
636    if scope_lower.starts_with("storage.modifier") {
637        return Some(HighlightCategory::Keyword);
638    }
639
640    // Entity names (catch-all for other named things)
641    if scope_lower.starts_with("entity.name") {
642        return Some(HighlightCategory::Function);
643    }
644
645    None
646}
647
648#[cfg(test)]
649mod tests {
650    use super::*;
651
652    #[test]
653    fn test_scope_to_category() {
654        assert_eq!(
655            scope_to_category("comment.line"),
656            Some(HighlightCategory::Comment)
657        );
658        assert_eq!(
659            scope_to_category("string.quoted"),
660            Some(HighlightCategory::String)
661        );
662        assert_eq!(
663            scope_to_category("keyword.control"),
664            Some(HighlightCategory::Keyword)
665        );
666        assert_eq!(
667            scope_to_category("keyword.operator"),
668            Some(HighlightCategory::Operator)
669        );
670        assert_eq!(
671            scope_to_category("entity.name.function"),
672            Some(HighlightCategory::Function)
673        );
674        assert_eq!(
675            scope_to_category("constant.numeric"),
676            Some(HighlightCategory::Constant)
677        );
678        assert_eq!(
679            scope_to_category("variable.parameter"),
680            Some(HighlightCategory::Variable)
681        );
682    }
683
684    #[test]
685    fn test_comment_delimiter_uses_comment_color() {
686        // Comment delimiters (#, //, /*) should use comment color, not operator
687        assert_eq!(
688            scope_to_category("punctuation.definition.comment"),
689            Some(HighlightCategory::Comment)
690        );
691        assert_eq!(
692            scope_to_category("punctuation.definition.comment.python"),
693            Some(HighlightCategory::Comment)
694        );
695        assert_eq!(
696            scope_to_category("punctuation.definition.comment.begin"),
697            Some(HighlightCategory::Comment)
698        );
699    }
700
701    #[test]
702    fn test_string_delimiter_uses_string_color() {
703        // String delimiters (", ', `) should use string color, not operator
704        assert_eq!(
705            scope_to_category("punctuation.definition.string.begin"),
706            Some(HighlightCategory::String)
707        );
708        assert_eq!(
709            scope_to_category("punctuation.definition.string.end"),
710            Some(HighlightCategory::String)
711        );
712    }
713
714    #[test]
715    fn test_diff_scopes_produce_categories() {
716        // Diff-specific scopes should map to categories
717        assert_eq!(
718            scope_to_category("markup.inserted"),
719            Some(HighlightCategory::String)
720        );
721        assert_eq!(
722            scope_to_category("markup.inserted.diff"),
723            Some(HighlightCategory::String)
724        );
725        assert_eq!(
726            scope_to_category("markup.deleted"),
727            Some(HighlightCategory::Keyword)
728        );
729        assert_eq!(
730            scope_to_category("markup.deleted.diff"),
731            Some(HighlightCategory::Keyword)
732        );
733        assert_eq!(
734            scope_to_category("meta.diff.range"),
735            Some(HighlightCategory::Function)
736        );
737        assert_eq!(
738            scope_to_category("meta.diff.header"),
739            Some(HighlightCategory::Function)
740        );
741    }
742
743    #[test]
744    fn test_diff_parsing_produces_scopes() {
745        use syntect::parsing::{ParseState, ScopeStack, SyntaxSet};
746
747        let ss = SyntaxSet::load_defaults_newlines();
748        let syntax = ss
749            .find_syntax_by_extension("diff")
750            .expect("Diff syntax should exist");
751        let mut state = ParseState::new(syntax);
752
753        let lines = [
754            "--- a/file.txt\n",
755            "+++ b/file.txt\n",
756            "@@ -1,3 +1,4 @@\n",
757            " unchanged\n",
758            "-removed line\n",
759            "+added line\n",
760        ];
761
762        let mut found_inserted = false;
763        let mut found_deleted = false;
764        let mut found_range = false;
765        let mut scopes = ScopeStack::new();
766
767        for line in &lines {
768            let ops = state.parse_line(line, &ss).unwrap();
769            for (_offset, op) in &ops {
770                scopes.apply(op).unwrap();
771                let scope_str = scopes
772                    .as_slice()
773                    .iter()
774                    .map(|s| s.build_string())
775                    .collect::<Vec<_>>()
776                    .join(" ");
777                if scope_str.contains("markup.inserted") {
778                    found_inserted = true;
779                }
780                if scope_str.contains("markup.deleted") {
781                    found_deleted = true;
782                }
783                if scope_str.contains("meta.diff") {
784                    found_range = true;
785                }
786            }
787        }
788
789        eprintln!(
790            "found_inserted={}, found_deleted={}, found_range={}",
791            found_inserted, found_deleted, found_range
792        );
793        assert!(
794            found_inserted || found_deleted || found_range,
795            "Diff grammar should produce markup.inserted, markup.deleted, or meta.diff scopes"
796        );
797    }
798}