Skip to main content

fresh/primitives/
textmate_engine.rs

1//! TextMate-based syntax highlighting engine (WASM-compatible)
2//!
3//! This module provides syntax highlighting using syntect's TextMate grammar engine.
4//! It's completely WASM-compatible as syntect can use pure-Rust regex (fancy-regex).
5//!
6//! # Features
7//!
8//! - Syntax highlighting for 100+ languages via TextMate grammars
9//! - Viewport-based highlighting with caching for performance
10//! - No tree-sitter or native code dependencies
11
12use crate::model::buffer::Buffer;
13use crate::model::marker::{MarkerId, MarkerList};
14use crate::primitives::grammar::GrammarRegistry;
15use crate::primitives::highlight_types::{highlight_color, HighlightCategory, HighlightSpan};
16use crate::view::theme::Theme;
17use std::collections::HashMap;
18use std::ops::Range;
19use std::path::Path;
20use std::sync::Arc;
21use syntect::parsing::SyntaxSet;
22
23/// Maximum bytes to parse in a single operation
24const MAX_PARSE_BYTES: usize = 1024 * 1024;
25
26/// Interval between parse state checkpoints (in bytes).
27const CHECKPOINT_INTERVAL: usize = 256;
28
29/// TextMate highlighting engine (WASM-compatible)
30///
31/// Marker-based checkpoint system identical to the runtime engine in
32/// `highlight_engine.rs`. See that file for detailed documentation.
33pub struct TextMateEngine {
34    syntax_set: Arc<SyntaxSet>,
35    syntax_index: usize,
36    checkpoint_markers: MarkerList,
37    checkpoint_states:
38        HashMap<MarkerId, (syntect::parsing::ParseState, syntect::parsing::ScopeStack)>,
39    dirty_from: Option<usize>,
40    cache: Option<TextMateCache>,
41    last_buffer_len: usize,
42}
43
44#[derive(Debug, Clone)]
45struct TextMateCache {
46    range: Range<usize>,
47    spans: Vec<CachedSpan>,
48}
49
50#[derive(Debug, Clone)]
51struct CachedSpan {
52    range: Range<usize>,
53    category: HighlightCategory,
54}
55
56impl TextMateEngine {
57    /// Create a new TextMate engine for the given syntax
58    pub fn new(syntax_set: Arc<SyntaxSet>, syntax_index: usize) -> Self {
59        Self {
60            syntax_set,
61            syntax_index,
62            checkpoint_markers: MarkerList::new(),
63            checkpoint_states: HashMap::new(),
64            dirty_from: None,
65            cache: None,
66            last_buffer_len: 0,
67        }
68    }
69
70    /// Create a TextMate engine for a file path.
71    ///
72    /// Purely metadata-based: resolves the grammar by filename/extension via
73    /// the catalog. Shebang / first-line detection is not applied here —
74    /// callers with buffer content should go through
75    /// `DetectedLanguage::from_path`, which handles that fallback.
76    pub fn for_file(path: &Path, registry: &GrammarRegistry) -> Option<Self> {
77        let syntax_set = registry.syntax_set_arc();
78        let syntax = registry.find_syntax_for_file(path)?;
79
80        // Find the index of this syntax in the set
81        let index = syntax_set
82            .syntaxes()
83            .iter()
84            .position(|s| s.name == syntax.name)?;
85
86        Some(Self::new(syntax_set, index))
87    }
88
89    pub fn notify_insert(&mut self, position: usize, length: usize) {
90        self.checkpoint_markers.adjust_for_insert(position, length);
91        self.dirty_from = Some(self.dirty_from.map_or(position, |d| d.min(position)));
92    }
93
94    pub fn notify_delete(&mut self, position: usize, length: usize) {
95        self.checkpoint_markers.adjust_for_delete(position, length);
96        self.dirty_from = Some(self.dirty_from.map_or(position, |d| d.min(position)));
97    }
98
99    /// Highlight the visible viewport range. See runtime engine for detailed docs.
100    pub fn highlight_viewport(
101        &mut self,
102        buffer: &Buffer,
103        viewport_start: usize,
104        viewport_end: usize,
105        theme: &Theme,
106        context_bytes: usize,
107    ) -> Vec<HighlightSpan> {
108        if let Some(cache) = &self.cache {
109            if cache.range.start <= viewport_start
110                && cache.range.end >= viewport_end
111                && self.last_buffer_len == buffer.len()
112            {
113                return cache
114                    .spans
115                    .iter()
116                    .filter(|span| {
117                        span.range.start < viewport_end && span.range.end > viewport_start
118                    })
119                    .map(|span| HighlightSpan {
120                        range: span.range.clone(),
121                        color: highlight_color(span.category, theme),
122                        category: Some(span.category),
123                    })
124                    .collect();
125            }
126        }
127
128        let desired_parse_start = viewport_start.saturating_sub(context_bytes);
129        let parse_end = (viewport_end + context_bytes).min(buffer.len());
130        if parse_end <= desired_parse_start {
131            return Vec::new();
132        }
133
134        if let Some(dirty) = self.dirty_from {
135            if dirty < parse_end {
136                self.run_convergence_walk(buffer, parse_end);
137            }
138        }
139
140        let syntax = &self.syntax_set.syntaxes()[self.syntax_index];
141        let (actual_start, mut state, mut current_scopes, create_checkpoints) =
142            self.find_parse_resume_point(desired_parse_start, parse_end, syntax);
143
144        let content = buffer.slice_bytes(actual_start..parse_end);
145        let content_str = match std::str::from_utf8(&content) {
146            Ok(s) => s,
147            Err(_) => return Vec::new(),
148        };
149
150        let mut spans = Vec::new();
151        let content_bytes = content_str.as_bytes();
152        let mut pos = 0;
153        let mut current_offset = actual_start;
154        let mut bytes_since_checkpoint: usize = 0;
155
156        while pos < content_bytes.len() {
157            if create_checkpoints && bytes_since_checkpoint >= CHECKPOINT_INTERVAL {
158                let nearby = self.checkpoint_markers.query_range(
159                    current_offset.saturating_sub(CHECKPOINT_INTERVAL / 2),
160                    current_offset + CHECKPOINT_INTERVAL / 2,
161                );
162                if nearby.is_empty() {
163                    let marker_id = self.checkpoint_markers.create(current_offset, true);
164                    self.checkpoint_states
165                        .insert(marker_id, (state.clone(), current_scopes.clone()));
166                }
167                bytes_since_checkpoint = 0;
168            }
169
170            let mut line_end = pos;
171            while line_end < content_bytes.len() {
172                if content_bytes[line_end] == b'\n' {
173                    line_end += 1;
174                    break;
175                } else if content_bytes[line_end] == b'\r' {
176                    if line_end + 1 < content_bytes.len() && content_bytes[line_end + 1] == b'\n' {
177                        line_end += 2;
178                    } else {
179                        line_end += 1;
180                    }
181                    break;
182                }
183                line_end += 1;
184            }
185
186            let line_bytes = &content_bytes[pos..line_end];
187            let actual_line_byte_len = line_bytes.len();
188
189            let line_str = match std::str::from_utf8(line_bytes) {
190                Ok(s) => s,
191                Err(_) => {
192                    pos = line_end;
193                    current_offset += actual_line_byte_len;
194                    bytes_since_checkpoint += actual_line_byte_len;
195                    continue;
196                }
197            };
198
199            let line_content = line_str.trim_end_matches(&['\r', '\n'][..]);
200            let line_for_syntect = if line_end < content_bytes.len() || line_str.ends_with('\n') {
201                format!("{}\n", line_content)
202            } else {
203                line_content.to_string()
204            };
205
206            let ops = match state.parse_line(&line_for_syntect, &self.syntax_set) {
207                Ok(ops) => ops,
208                Err(_) => {
209                    pos = line_end;
210                    current_offset += actual_line_byte_len;
211                    bytes_since_checkpoint += actual_line_byte_len;
212                    continue;
213                }
214            };
215
216            let collect_spans = current_offset + actual_line_byte_len > desired_parse_start;
217            let mut syntect_offset = 0;
218            let line_content_len = line_content.len();
219
220            for (op_offset, op) in ops {
221                let clamped_op_offset = op_offset.min(line_content_len);
222                if collect_spans && clamped_op_offset > syntect_offset {
223                    if let Some(category) = Self::scope_stack_to_category(&current_scopes) {
224                        let byte_start = current_offset + syntect_offset;
225                        let byte_end = current_offset + clamped_op_offset;
226                        let clamped_start = byte_start.max(desired_parse_start);
227                        if clamped_start < byte_end {
228                            spans.push(CachedSpan {
229                                range: clamped_start..byte_end,
230                                category,
231                            });
232                        }
233                    }
234                }
235                syntect_offset = clamped_op_offset;
236                #[allow(clippy::let_underscore_must_use)]
237                let _ = current_scopes.apply(&op);
238            }
239
240            if collect_spans && syntect_offset < line_content_len {
241                if let Some(category) = Self::scope_stack_to_category(&current_scopes) {
242                    let byte_start = current_offset + syntect_offset;
243                    let byte_end = current_offset + line_content_len;
244                    let clamped_start = byte_start.max(desired_parse_start);
245                    if clamped_start < byte_end {
246                        spans.push(CachedSpan {
247                            range: clamped_start..byte_end,
248                            category,
249                        });
250                    }
251                }
252            }
253
254            pos = line_end;
255            current_offset += actual_line_byte_len;
256            bytes_since_checkpoint += actual_line_byte_len;
257        }
258
259        Self::merge_adjacent_spans(&mut spans);
260
261        self.cache = Some(TextMateCache {
262            range: desired_parse_start..parse_end,
263            spans: spans.clone(),
264        });
265        self.last_buffer_len = buffer.len();
266
267        spans
268            .into_iter()
269            .filter(|span| span.range.start < viewport_end && span.range.end > viewport_start)
270            .map(|span| {
271                let cat = span.category;
272                HighlightSpan {
273                    range: span.range,
274                    color: highlight_color(cat, theme),
275                    category: Some(cat),
276                }
277            })
278            .collect()
279    }
280
281    fn run_convergence_walk(&mut self, buffer: &Buffer, walk_end: usize) {
282        let dirty = match self.dirty_from.take() {
283            Some(d) => d,
284            None => return,
285        };
286
287        let syntax = &self.syntax_set.syntaxes()[self.syntax_index];
288
289        let (resume_pos, mut state, mut current_scopes) = {
290            let search_start = dirty.saturating_sub(MAX_PARSE_BYTES);
291            let markers = self.checkpoint_markers.query_range(search_start, dirty);
292            let nearest = markers.into_iter().max_by_key(|(_, start, _)| *start);
293            if let Some((id, cp_pos, _)) = nearest {
294                if let Some((s, sc)) = self.checkpoint_states.get(&id) {
295                    (cp_pos, s.clone(), sc.clone())
296                } else {
297                    self.checkpoint_markers.delete(id);
298                    (
299                        0,
300                        syntect::parsing::ParseState::new(syntax),
301                        syntect::parsing::ScopeStack::new(),
302                    )
303                }
304            } else if walk_end <= MAX_PARSE_BYTES {
305                (
306                    0,
307                    syntect::parsing::ParseState::new(syntax),
308                    syntect::parsing::ScopeStack::new(),
309                )
310            } else {
311                self.dirty_from = Some(dirty);
312                return;
313            }
314        };
315
316        let mut markers_ahead: Vec<(MarkerId, usize)> = self
317            .checkpoint_markers
318            .query_range(dirty, walk_end)
319            .into_iter()
320            .map(|(id, start, _)| (id, start))
321            .collect();
322        markers_ahead.sort_by_key(|(_, pos)| *pos);
323
324        if markers_ahead.is_empty() {
325            return;
326        }
327
328        let content_end = walk_end.min(buffer.len());
329        if resume_pos >= content_end {
330            return;
331        }
332        let content = buffer.slice_bytes(resume_pos..content_end);
333        let content_str = match std::str::from_utf8(&content) {
334            Ok(s) => s,
335            Err(_) => return,
336        };
337
338        let content_bytes = content_str.as_bytes();
339        let mut pos = 0;
340        let mut current_offset = resume_pos;
341        let mut marker_idx = 0;
342
343        while pos < content_bytes.len() && marker_idx < markers_ahead.len() {
344            let mut line_end = pos;
345            while line_end < content_bytes.len() {
346                if content_bytes[line_end] == b'\n' {
347                    line_end += 1;
348                    break;
349                } else if content_bytes[line_end] == b'\r' {
350                    if line_end + 1 < content_bytes.len() && content_bytes[line_end + 1] == b'\n' {
351                        line_end += 2;
352                    } else {
353                        line_end += 1;
354                    }
355                    break;
356                }
357                line_end += 1;
358            }
359
360            let line_bytes = &content_bytes[pos..line_end];
361            let actual_line_byte_len = line_bytes.len();
362
363            let line_str = match std::str::from_utf8(line_bytes) {
364                Ok(s) => s,
365                Err(_) => {
366                    pos = line_end;
367                    current_offset += actual_line_byte_len;
368                    continue;
369                }
370            };
371
372            let line_content = line_str.trim_end_matches(&['\r', '\n'][..]);
373            let line_for_syntect = if line_end < content_bytes.len() || line_str.ends_with('\n') {
374                format!("{}\n", line_content)
375            } else {
376                line_content.to_string()
377            };
378
379            if let Ok(ops) = state.parse_line(&line_for_syntect, &self.syntax_set) {
380                for (_op_offset, op) in ops {
381                    #[allow(clippy::let_underscore_must_use)]
382                    let _ = current_scopes.apply(&op);
383                }
384            }
385
386            pos = line_end;
387            current_offset += actual_line_byte_len;
388
389            while marker_idx < markers_ahead.len() && markers_ahead[marker_idx].1 <= current_offset
390            {
391                let (marker_id, _) = markers_ahead[marker_idx];
392                marker_idx += 1;
393
394                if let Some(stored) = self.checkpoint_states.get(&marker_id) {
395                    if state == stored.0 && current_scopes == stored.1 {
396                        return;
397                    }
398                }
399                self.checkpoint_states
400                    .insert(marker_id, (state.clone(), current_scopes.clone()));
401            }
402        }
403
404        if marker_idx < markers_ahead.len() {
405            self.dirty_from = Some(markers_ahead[marker_idx].1);
406        }
407    }
408
409    fn find_parse_resume_point(
410        &self,
411        desired_start: usize,
412        parse_end: usize,
413        syntax: &syntect::parsing::SyntaxReference,
414    ) -> (
415        usize,
416        syntect::parsing::ParseState,
417        syntect::parsing::ScopeStack,
418        bool,
419    ) {
420        use syntect::parsing::{ParseState, ScopeStack};
421
422        let search_start = desired_start.saturating_sub(MAX_PARSE_BYTES);
423        let markers = self
424            .checkpoint_markers
425            .query_range(search_start, desired_start + 1);
426        let nearest = markers.into_iter().max_by_key(|(_, start, _)| *start);
427
428        if let Some((id, cp_pos, _)) = nearest {
429            if let Some((s, sc)) = self.checkpoint_states.get(&id) {
430                return (cp_pos, s.clone(), sc.clone(), true);
431            }
432        }
433        if parse_end <= MAX_PARSE_BYTES {
434            (0, ParseState::new(syntax), ScopeStack::new(), true)
435        } else {
436            (
437                desired_start,
438                ParseState::new(syntax),
439                ScopeStack::new(),
440                true,
441            )
442        }
443    }
444
445    fn scope_stack_to_category(scopes: &syntect::parsing::ScopeStack) -> Option<HighlightCategory> {
446        for scope in scopes.as_slice().iter().rev() {
447            let scope_str = scope.build_string();
448            if let Some(cat) = scope_to_category(&scope_str) {
449                return Some(cat);
450            }
451        }
452        None
453    }
454
455    fn merge_adjacent_spans(spans: &mut Vec<CachedSpan>) {
456        if spans.len() < 2 {
457            return;
458        }
459        let mut write_idx = 0;
460        for read_idx in 1..spans.len() {
461            if spans[write_idx].category == spans[read_idx].category
462                && spans[write_idx].range.end == spans[read_idx].range.start
463            {
464                spans[write_idx].range.end = spans[read_idx].range.end;
465            } else {
466                write_idx += 1;
467                if write_idx != read_idx {
468                    spans[write_idx] = spans[read_idx].clone();
469                }
470            }
471        }
472        spans.truncate(write_idx + 1);
473    }
474
475    pub fn invalidate_range(&mut self, edit_range: Range<usize>) {
476        if let Some(cache) = &self.cache {
477            if edit_range.start < cache.range.end && edit_range.end > cache.range.start {
478                self.cache = None;
479            }
480        }
481    }
482
483    pub fn invalidate_all(&mut self) {
484        self.cache = None;
485        let ids: Vec<MarkerId> = self.checkpoint_states.keys().copied().collect();
486        for id in ids {
487            self.checkpoint_markers.delete(id);
488        }
489        self.checkpoint_states.clear();
490        self.dirty_from = None;
491    }
492
493    pub fn syntax_name(&self) -> &str {
494        &self.syntax_set.syntaxes()[self.syntax_index].name
495    }
496}
497
498/// Map TextMate scope to highlight category
499fn scope_to_category(scope: &str) -> Option<HighlightCategory> {
500    let scope_lower = scope.to_lowercase();
501
502    // Comments - highest priority
503    if scope_lower.starts_with("comment") {
504        return Some(HighlightCategory::Comment);
505    }
506
507    // Strings
508    if scope_lower.starts_with("string") {
509        return Some(HighlightCategory::String);
510    }
511
512    // Markdown/markup scopes
513    if scope_lower.starts_with("markup.heading") || scope_lower.starts_with("entity.name.section") {
514        return Some(HighlightCategory::Keyword);
515    }
516    if scope_lower.starts_with("markup.bold") {
517        return Some(HighlightCategory::Constant);
518    }
519    if scope_lower.starts_with("markup.italic") {
520        return Some(HighlightCategory::Variable);
521    }
522    if scope_lower.starts_with("markup.raw") || scope_lower.starts_with("markup.inline.raw") {
523        return Some(HighlightCategory::String);
524    }
525    if scope_lower.starts_with("markup.underline.link")
526        || scope_lower.starts_with("markup.underline")
527    {
528        return Some(HighlightCategory::Function);
529    }
530    if scope_lower.starts_with("markup.quote") || scope_lower.starts_with("markup.strikethrough") {
531        return Some(HighlightCategory::Comment);
532    }
533    if scope_lower.starts_with("markup.list") {
534        return Some(HighlightCategory::Operator);
535    }
536    // Diff markup: inserted/deleted lines
537    if scope_lower.starts_with("markup.inserted") {
538        return Some(HighlightCategory::String); // green
539    }
540    if scope_lower.starts_with("markup.deleted") {
541        return Some(HighlightCategory::Keyword); // red/magenta
542    }
543    // Diff metadata (range info like @@ -1,5 +1,6 @@)
544    if scope_lower.starts_with("meta.diff.range")
545        || scope_lower.starts_with("meta.diff.header")
546        || scope_lower.starts_with("meta.diff.index")
547    {
548        return Some(HighlightCategory::Function); // cyan/yellow
549    }
550    // Diff from-file/to-file headers (--- a/file, +++ b/file)
551    if scope_lower.starts_with("punctuation.definition.from-file")
552        || scope_lower.starts_with("punctuation.definition.to-file")
553    {
554        return Some(HighlightCategory::Type); // type color
555    }
556
557    // Keywords (but not keyword.operator)
558    if scope_lower.starts_with("keyword") && !scope_lower.starts_with("keyword.operator") {
559        return Some(HighlightCategory::Keyword);
560    }
561
562    // Punctuation that belongs to a parent construct (comment/string delimiters)
563    // These must be checked before the generic punctuation rule below.
564    // TextMate grammars assign e.g. `punctuation.definition.comment` to # // /* etc.
565    if scope_lower.starts_with("punctuation.definition.comment") {
566        return Some(HighlightCategory::Comment);
567    }
568    if scope_lower.starts_with("punctuation.definition.string") {
569        return Some(HighlightCategory::String);
570    }
571
572    // Operators (keyword.operator only)
573    if scope_lower.starts_with("keyword.operator") {
574        return Some(HighlightCategory::Operator);
575    }
576
577    // Punctuation brackets ({, }, (, ), [, ], <, >)
578    // Covers punctuation.section.*, punctuation.bracket.*,
579    // and punctuation.definition.{array,block,brackets,group,inline-table,section,table,tag}
580    if scope_lower.starts_with("punctuation.section")
581        || scope_lower.starts_with("punctuation.bracket")
582        || scope_lower.starts_with("punctuation.definition.array")
583        || scope_lower.starts_with("punctuation.definition.block")
584        || scope_lower.starts_with("punctuation.definition.brackets")
585        || scope_lower.starts_with("punctuation.definition.group")
586        || scope_lower.starts_with("punctuation.definition.inline-table")
587        || scope_lower.starts_with("punctuation.definition.section")
588        || scope_lower.starts_with("punctuation.definition.table")
589        || scope_lower.starts_with("punctuation.definition.tag")
590    {
591        return Some(HighlightCategory::PunctuationBracket);
592    }
593
594    // Punctuation delimiters (;, ,, .)
595    if scope_lower.starts_with("punctuation.separator")
596        || scope_lower.starts_with("punctuation.terminator")
597        || scope_lower.starts_with("punctuation.accessor")
598    {
599        return Some(HighlightCategory::PunctuationDelimiter);
600    }
601
602    // Functions
603    if scope_lower.starts_with("entity.name.function")
604        || scope_lower.starts_with("meta.function-call")
605        || scope_lower.starts_with("support.function")
606    {
607        return Some(HighlightCategory::Function);
608    }
609
610    // Types
611    if scope_lower.starts_with("entity.name.type")
612        || scope_lower.starts_with("storage.type")
613        || scope_lower.starts_with("support.type")
614        || scope_lower.starts_with("entity.name.class")
615    {
616        return Some(HighlightCategory::Type);
617    }
618
619    // Constants and numbers
620    if scope_lower.starts_with("constant.numeric")
621        || scope_lower.starts_with("constant.language")
622        || scope_lower.starts_with("constant.character")
623    {
624        return Some(HighlightCategory::Constant);
625    }
626    if scope_lower.starts_with("constant") {
627        return Some(HighlightCategory::Constant);
628    }
629
630    // Variables and parameters
631    if scope_lower.starts_with("variable.parameter") {
632        return Some(HighlightCategory::Variable);
633    }
634    if scope_lower.starts_with("variable") {
635        return Some(HighlightCategory::Variable);
636    }
637
638    // Storage modifiers (pub, static, const, etc.)
639    if scope_lower.starts_with("storage.modifier") {
640        return Some(HighlightCategory::Keyword);
641    }
642
643    // Entity names (catch-all for other named things)
644    if scope_lower.starts_with("entity.name") {
645        return Some(HighlightCategory::Function);
646    }
647
648    None
649}
650
651#[cfg(test)]
652mod tests {
653    use super::*;
654
655    #[test]
656    fn test_scope_to_category() {
657        assert_eq!(
658            scope_to_category("comment.line"),
659            Some(HighlightCategory::Comment)
660        );
661        assert_eq!(
662            scope_to_category("string.quoted"),
663            Some(HighlightCategory::String)
664        );
665        assert_eq!(
666            scope_to_category("keyword.control"),
667            Some(HighlightCategory::Keyword)
668        );
669        assert_eq!(
670            scope_to_category("keyword.operator"),
671            Some(HighlightCategory::Operator)
672        );
673        assert_eq!(
674            scope_to_category("entity.name.function"),
675            Some(HighlightCategory::Function)
676        );
677        assert_eq!(
678            scope_to_category("constant.numeric"),
679            Some(HighlightCategory::Constant)
680        );
681        assert_eq!(
682            scope_to_category("variable.parameter"),
683            Some(HighlightCategory::Variable)
684        );
685    }
686
687    #[test]
688    fn test_comment_delimiter_uses_comment_color() {
689        // Comment delimiters (#, //, /*) should use comment color, not operator
690        assert_eq!(
691            scope_to_category("punctuation.definition.comment"),
692            Some(HighlightCategory::Comment)
693        );
694        assert_eq!(
695            scope_to_category("punctuation.definition.comment.python"),
696            Some(HighlightCategory::Comment)
697        );
698        assert_eq!(
699            scope_to_category("punctuation.definition.comment.begin"),
700            Some(HighlightCategory::Comment)
701        );
702    }
703
704    #[test]
705    fn test_string_delimiter_uses_string_color() {
706        // String delimiters (", ', `) should use string color, not operator
707        assert_eq!(
708            scope_to_category("punctuation.definition.string.begin"),
709            Some(HighlightCategory::String)
710        );
711        assert_eq!(
712            scope_to_category("punctuation.definition.string.end"),
713            Some(HighlightCategory::String)
714        );
715    }
716
717    #[test]
718    fn test_diff_scopes_produce_categories() {
719        // Diff-specific scopes should map to categories
720        assert_eq!(
721            scope_to_category("markup.inserted"),
722            Some(HighlightCategory::String)
723        );
724        assert_eq!(
725            scope_to_category("markup.inserted.diff"),
726            Some(HighlightCategory::String)
727        );
728        assert_eq!(
729            scope_to_category("markup.deleted"),
730            Some(HighlightCategory::Keyword)
731        );
732        assert_eq!(
733            scope_to_category("markup.deleted.diff"),
734            Some(HighlightCategory::Keyword)
735        );
736        assert_eq!(
737            scope_to_category("meta.diff.range"),
738            Some(HighlightCategory::Function)
739        );
740        assert_eq!(
741            scope_to_category("meta.diff.header"),
742            Some(HighlightCategory::Function)
743        );
744    }
745
746    #[test]
747    fn test_diff_parsing_produces_scopes() {
748        use syntect::parsing::{ParseState, ScopeStack, SyntaxSet};
749
750        let ss = SyntaxSet::load_defaults_newlines();
751        let syntax = ss
752            .find_syntax_by_extension("diff")
753            .expect("Diff syntax should exist");
754        let mut state = ParseState::new(syntax);
755
756        let lines = [
757            "--- a/file.txt\n",
758            "+++ b/file.txt\n",
759            "@@ -1,3 +1,4 @@\n",
760            " unchanged\n",
761            "-removed line\n",
762            "+added line\n",
763        ];
764
765        let mut found_inserted = false;
766        let mut found_deleted = false;
767        let mut found_range = false;
768        let mut scopes = ScopeStack::new();
769
770        for line in &lines {
771            let ops = state.parse_line(line, &ss).unwrap();
772            for (_offset, op) in &ops {
773                scopes.apply(op).unwrap();
774                let scope_str = scopes
775                    .as_slice()
776                    .iter()
777                    .map(|s| s.build_string())
778                    .collect::<Vec<_>>()
779                    .join(" ");
780                if scope_str.contains("markup.inserted") {
781                    found_inserted = true;
782                }
783                if scope_str.contains("markup.deleted") {
784                    found_deleted = true;
785                }
786                if scope_str.contains("meta.diff") {
787                    found_range = true;
788                }
789            }
790        }
791
792        eprintln!(
793            "found_inserted={}, found_deleted={}, found_range={}",
794            found_inserted, found_deleted, found_range
795        );
796        assert!(
797            found_inserted || found_deleted || found_range,
798            "Diff grammar should produce markup.inserted, markup.deleted, or meta.diff scopes"
799        );
800    }
801}