Skip to main content

neco_syntax_textmate/
lib.rs

1use std::ops::Range;
2use std::path::Path;
3
4use syntect::dumps::from_uncompressed_data;
5use syntect::parsing::{ParseState, ScopeStack, SyntaxDefinition, SyntaxReference, SyntaxSet};
6
7/// Classifies a token span into a stable highlighting category.
8#[non_exhaustive]
9#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
10pub enum TokenKind {
11    /// Matches language keywords and storage scopes.
12    Keyword,
13    /// Matches named types and type-like support scopes.
14    Type,
15    /// Matches function and method names.
16    Function,
17    /// Matches string literal content.
18    String,
19    /// Matches numeric literals.
20    Number,
21    /// Matches comment text.
22    Comment,
23    /// Matches operator scopes before generic keywords.
24    Operator,
25    /// Matches punctuation and delimiter scopes.
26    Punctuation,
27    /// Matches variable-like scopes.
28    Variable,
29    /// Matches non-numeric constant scopes.
30    Constant,
31    /// Matches tag names such as HTML elements.
32    Tag,
33    /// Matches attribute and property names.
34    Attribute,
35    /// Matches escape sequences inside literals.
36    Escape,
37    /// Fallback when no supported scope prefix matches.
38    Plain,
39}
40
41/// Describes one byte range and its token kind within a single input line.
42#[derive(Debug, Clone, PartialEq, Eq)]
43pub struct TokenSpan {
44    /// Byte range inside the line passed to `SyntaxHighlighter::tokenize_line`.
45    pub range: Range<usize>,
46    /// Highlighting category inferred for `range`.
47    pub kind: TokenKind,
48}
49
50/// Reports why loading an external grammar file failed.
51#[non_exhaustive]
52#[derive(Debug)]
53pub enum GrammarLoadError {
54    /// Reading the grammar file from disk failed.
55    Io(std::io::Error),
56    /// Parsing the loaded grammar text failed.
57    Parse(String),
58}
59
60impl From<std::io::Error> for GrammarLoadError {
61    fn from(error: std::io::Error) -> Self {
62        Self::Io(error)
63    }
64}
65
66impl std::fmt::Display for GrammarLoadError {
67    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
68        match self {
69            Self::Io(e) => write!(f, "I/O error: {e}"),
70            Self::Parse(msg) => write!(f, "parse error: {msg}"),
71        }
72    }
73}
74
75impl std::error::Error for GrammarLoadError {
76    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
77        match self {
78            Self::Io(e) => Some(e),
79            Self::Parse(_) => None,
80        }
81    }
82}
83
84/// Maps a TextMate scope string to the closest supported `TokenKind`.
85pub fn scope_to_token_kind(scope: &str) -> TokenKind {
86    if scope.starts_with("keyword.operator") {
87        TokenKind::Operator
88    } else if scope.starts_with("keyword") || scope.starts_with("storage") {
89        TokenKind::Keyword
90    } else if scope.starts_with("entity.name.function") || scope.starts_with("support.function") {
91        TokenKind::Function
92    } else if scope.starts_with("entity.name.type")
93        || scope.starts_with("entity.name.class")
94        || scope.starts_with("entity.name.struct")
95        || scope.starts_with("entity.name.enum")
96        || scope.starts_with("support.type")
97        || scope.starts_with("support.class")
98    {
99        TokenKind::Type
100    } else if scope.starts_with("entity.name.tag") {
101        TokenKind::Tag
102    } else if scope.starts_with("entity.other.attribute-name") {
103        TokenKind::Attribute
104    } else if scope.starts_with("string") {
105        TokenKind::String
106    } else if scope.starts_with("constant.character.escape") {
107        TokenKind::Escape
108    } else if scope.starts_with("constant.numeric") {
109        TokenKind::Number
110    } else if scope.starts_with("constant") {
111        TokenKind::Constant
112    } else if scope.starts_with("comment") {
113        TokenKind::Comment
114    } else if scope.starts_with("variable") {
115        TokenKind::Variable
116    } else if scope.starts_with("punctuation") {
117        TokenKind::Punctuation
118    } else {
119        TokenKind::Plain
120    }
121}
122
123fn resolve_language_alias<'a>(
124    syntax_set: &'a SyntaxSet,
125    language: &str,
126) -> Option<&'a SyntaxReference> {
127    syntax_set
128        .find_syntax_by_token(language)
129        .or_else(|| match language {
130            "TypeScript" | "typescript" | "ts" | "tsx" => {
131                syntax_set.find_syntax_by_token("JavaScript")
132            }
133            "KDL" | "kdl" => syntax_set.find_syntax_by_token("KDL2"),
134            _ => None,
135        })
136}
137
138fn display_language_name<'a>(extension: &str, syntax_name: &'a str) -> &'a str {
139    match (extension, syntax_name) {
140        ("ts", "JavaScript") | ("tsx", "JavaScript") => "TypeScript",
141        ("kdl", "KDL2") => "KDL",
142        _ => syntax_name,
143    }
144}
145
146#[derive(Debug, Clone)]
147/// Owns the loaded grammar set used to resolve languages and build highlighters.
148pub struct GrammarSet {
149    syntax_set: SyntaxSet,
150}
151
152impl GrammarSet {
153    /// Returns the bundled grammar set and panics only if embedded data is invalid.
154    pub fn default_set() -> Self {
155        let data = include_bytes!(concat!(env!("OUT_DIR"), "/syntaxes.packdump"));
156        let syntax_set = from_uncompressed_data(data).expect("embedded syntax dump must be valid");
157        Self { syntax_set }
158    }
159
160    /// Loads one grammar file and restores the previous set if reading or parsing fails.
161    pub fn load_grammar(&mut self, path: &Path) -> Result<(), GrammarLoadError> {
162        let original = std::mem::take(&mut self.syntax_set);
163        let result = (|| {
164            let content = std::fs::read_to_string(path)?;
165            let name = path.file_stem().and_then(|stem| stem.to_str());
166            let syntax_definition = SyntaxDefinition::load_from_str(&content, true, name)
167                .map_err(|error| GrammarLoadError::Parse(error.to_string()))?;
168            let mut builder = original.clone().into_builder();
169            builder.add(syntax_definition);
170            Ok(builder.build())
171        })();
172
173        match result {
174            Ok(syntax_set) => {
175                self.syntax_set = syntax_set;
176                Ok(())
177            }
178            Err(error) => {
179                self.syntax_set = original;
180                Err(error)
181            }
182        }
183    }
184
185    /// Returns the display language name for an extension when a matching grammar exists.
186    pub fn detect_language<'a>(&'a self, extension: &str) -> Option<&'a str> {
187        match extension {
188            "ts" | "tsx" if self.syntax_set.find_syntax_by_token("JavaScript").is_some() => {
189                return Some("TypeScript");
190            }
191            "kdl" if self.syntax_set.find_syntax_by_token("KDL2").is_some() => {
192                return Some("KDL");
193            }
194            _ => {}
195        }
196
197        self.syntax_set
198            .find_syntax_by_extension(extension)
199            .map(|syntax| display_language_name(extension, syntax.name.as_str()))
200    }
201
202    /// Lists all loaded language names in their stored display form.
203    pub fn languages(&self) -> Vec<&str> {
204        self.syntax_set
205            .syntaxes()
206            .iter()
207            .map(|syntax| syntax.name.as_str())
208            .collect()
209    }
210}
211
212#[derive(Debug, Clone)]
213/// Tokenizes lines for one language while preserving multi-line parse state.
214pub struct SyntaxHighlighter {
215    syntax_set: SyntaxSet,
216    parse_state: ParseState,
217    scope_stack: ScopeStack,
218    language_name: String,
219}
220
221impl SyntaxHighlighter {
222    /// Creates a highlighter for `language`, or returns `None` when no loaded grammar matches it.
223    pub fn new(grammar_set: &GrammarSet, language: &str) -> Option<Self> {
224        let syntax_set = grammar_set.syntax_set.clone();
225        let syntax = resolve_language_alias(&syntax_set, language)?;
226        let language_name = syntax.name.clone();
227
228        Some(Self {
229            parse_state: ParseState::new(syntax),
230            scope_stack: ScopeStack::new(),
231            syntax_set,
232            language_name,
233        })
234    }
235
236    /// Tokenizes one line, updates continuation state, and falls back to a plain full-line span on parse errors.
237    pub fn tokenize_line(&mut self, line: &str) -> Vec<TokenSpan> {
238        let ops = match self.parse_state.parse_line(line, &self.syntax_set) {
239            Ok(ops) => ops,
240            Err(_) => {
241                return vec![TokenSpan {
242                    range: 0..line.len(),
243                    kind: TokenKind::Plain,
244                }];
245            }
246        };
247
248        let mut spans = Vec::new();
249        let mut last_index = 0;
250
251        for (index, op) in &ops {
252            let index = (*index).min(line.len());
253            if index > last_index {
254                let kind = self.current_token_kind();
255                Self::push_span(&mut spans, last_index..index, kind);
256            }
257
258            if self.scope_stack.apply(op).is_err() {
259                return vec![TokenSpan {
260                    range: 0..line.len(),
261                    kind: TokenKind::Plain,
262                }];
263            }
264            last_index = index;
265        }
266
267        if last_index < line.len() {
268            let kind = self.current_token_kind();
269            Self::push_span(&mut spans, last_index..line.len(), kind);
270        }
271
272        spans
273    }
274
275    /// Clears parser state so the next tokenized line is treated as a new file start.
276    pub fn reset(&mut self) {
277        let syntax = self
278            .syntax_set
279            .find_syntax_by_token(&self.language_name)
280            .expect("syntax must exist in owned syntax set");
281        self.parse_state = ParseState::new(syntax);
282        self.scope_stack = ScopeStack::new();
283    }
284
285    fn current_token_kind(&self) -> TokenKind {
286        self.scope_stack
287            .as_slice()
288            .iter()
289            .rev()
290            .map(|scope| scope_to_token_kind(&scope.build_string()))
291            .find(|kind| *kind != TokenKind::Plain)
292            .unwrap_or(TokenKind::Plain)
293    }
294
295    fn push_span(spans: &mut Vec<TokenSpan>, range: Range<usize>, kind: TokenKind) {
296        if range.is_empty() {
297            return;
298        }
299
300        if let Some(previous) = spans.last_mut() {
301            if previous.kind == kind && previous.range.end == range.start {
302                previous.range.end = range.end;
303                return;
304            }
305        }
306
307        spans.push(TokenSpan { range, kind });
308    }
309}
310
311#[cfg(test)]
312mod tests {
313    use super::*;
314
315    use std::error::Error;
316    use std::fs;
317    use std::time::{SystemTime, UNIX_EPOCH};
318
319    fn has_token(spans: &[TokenSpan], kind: TokenKind) -> bool {
320        spans.iter().any(|span| span.kind == kind)
321    }
322
323    fn token_text<'a>(line: &'a str, spans: &[TokenSpan], kind: TokenKind) -> Vec<&'a str> {
324        spans
325            .iter()
326            .filter(|span| span.kind == kind)
327            .map(|span| &line[span.range.clone()])
328            .collect()
329    }
330
331    fn unique_temp_path(name: &str) -> std::path::PathBuf {
332        let nanos = SystemTime::now()
333            .duration_since(UNIX_EPOCH)
334            .expect("system time must be after unix epoch")
335            .as_nanos();
336        std::env::temp_dir().join(format!(
337            "neco-syntax-textmate-{name}-{nanos}.sublime-syntax"
338        ))
339    }
340
341    #[test]
342    fn scope_to_token_kind_covers_supported_variants_and_edges() {
343        let cases = [
344            ("keyword.control.rust", TokenKind::Keyword),
345            ("storage.type.function.rust", TokenKind::Keyword),
346            ("entity.name.function.rust", TokenKind::Function),
347            ("support.function.builtin.python", TokenKind::Function),
348            ("entity.name.type.struct.rust", TokenKind::Type),
349            ("entity.name.class.typescript", TokenKind::Type),
350            ("entity.name.struct.rust", TokenKind::Type),
351            ("entity.name.enum.rust", TokenKind::Type),
352            ("support.type.primitive.ts", TokenKind::Type),
353            ("support.class.python", TokenKind::Type),
354            ("entity.name.tag.html", TokenKind::Tag),
355            ("entity.other.attribute-name.html", TokenKind::Attribute),
356            ("string.quoted.double", TokenKind::String),
357            ("constant.character.escape.rust", TokenKind::Escape),
358            ("constant.numeric.decimal", TokenKind::Number),
359            ("constant.language.boolean", TokenKind::Constant),
360            ("comment.line.double-slash", TokenKind::Comment),
361            ("variable.parameter.function", TokenKind::Variable),
362            ("punctuation.section.block.begin", TokenKind::Punctuation),
363            ("", TokenKind::Plain),
364            ("meta.embedded.unknown", TokenKind::Plain),
365        ];
366
367        for (scope, expected) in cases {
368            assert_eq!(scope_to_token_kind(scope), expected, "scope={scope}");
369        }
370    }
371
372    #[test]
373    fn scope_to_token_kind_prefers_operator_before_keyword() {
374        assert_eq!(
375            scope_to_token_kind("keyword.operator.assignment"),
376            TokenKind::Operator
377        );
378    }
379
380    #[test]
381    fn grammar_set_default_set_finds_default_and_bundled_languages() {
382        let grammar_set = GrammarSet::default_set();
383
384        assert_eq!(grammar_set.detect_language("rs"), Some("Rust"));
385        assert_eq!(grammar_set.detect_language("ts"), Some("TypeScript"));
386        assert_eq!(grammar_set.detect_language("json"), Some("JSON"));
387        assert_eq!(grammar_set.detect_language("py"), Some("Python"));
388        assert_eq!(grammar_set.detect_language("kdl"), Some("KDL"));
389        assert_eq!(grammar_set.detect_language("fish"), Some("Fish"));
390        assert_eq!(grammar_set.detect_language("nix"), Some("Nix"));
391        assert_eq!(grammar_set.detect_language("typ"), Some("Typst"));
392        assert_eq!(grammar_set.detect_language("pkl"), Some("Pkl"));
393        assert_eq!(grammar_set.detect_language("mojo"), Some("Mojo"));
394        assert_eq!(grammar_set.detect_language("does-not-exist"), None);
395        assert!(!grammar_set.languages().is_empty());
396    }
397
398    #[test]
399    fn syntax_highlighter_tokenizes_rust_key_tokens() {
400        let grammar_set = GrammarSet::default_set();
401        let mut highlighter =
402            SyntaxHighlighter::new(&grammar_set, "Rust").expect("Rust syntax must exist");
403
404        let line1 = "fn main() {\n";
405        let line2 = "    let x = 42;\n";
406        let line3 = "}\n";
407        let spans1 = highlighter.tokenize_line(line1);
408        let spans2 = highlighter.tokenize_line(line2);
409        let spans3 = highlighter.tokenize_line(line3);
410
411        assert!(has_token(&spans1, TokenKind::Keyword));
412        assert!(token_text(line1, &spans1, TokenKind::Keyword).contains(&"fn"));
413        assert!(has_token(&spans1, TokenKind::Function));
414        assert!(token_text(line1, &spans1, TokenKind::Function).contains(&"main"));
415        assert!(has_token(&spans1, TokenKind::Punctuation));
416        assert!(has_token(&spans2, TokenKind::Keyword));
417        assert!(token_text(line2, &spans2, TokenKind::Keyword).contains(&"let"));
418        assert!(has_token(&spans2, TokenKind::Number));
419        assert!(token_text(line2, &spans2, TokenKind::Number).contains(&"42"));
420        assert!(has_token(&spans2, TokenKind::Operator));
421        assert!(token_text(line2, &spans2, TokenKind::Operator).contains(&"="));
422        assert!(has_token(&spans3, TokenKind::Punctuation));
423    }
424
425    #[test]
426    fn syntax_highlighter_tokenizes_typescript_key_tokens() {
427        let grammar_set = GrammarSet::default_set();
428        let mut highlighter = SyntaxHighlighter::new(&grammar_set, "TypeScript")
429            .expect("TypeScript syntax must exist");
430
431        let line1 = "const foo = hello;\n";
432        let line2 = "console.log(foo);\n";
433        let line3 = "let n = 1;\n";
434        let spans1 = highlighter.tokenize_line(line1);
435        let spans2 = highlighter.tokenize_line(line2);
436        let spans3 = highlighter.tokenize_line(line3);
437
438        assert!(has_token(&spans1, TokenKind::Keyword));
439        assert!(token_text(line1, &spans1, TokenKind::Keyword).contains(&"const"));
440        assert!(has_token(&spans1, TokenKind::Operator));
441        assert!(token_text(line1, &spans1, TokenKind::Operator).contains(&"="));
442        assert!(has_token(&spans2, TokenKind::Function));
443        assert!(token_text(line2, &spans2, TokenKind::Function).contains(&"log"));
444        assert!(has_token(&spans3, TokenKind::Keyword));
445        assert!(token_text(line3, &spans3, TokenKind::Keyword).contains(&"let"));
446        assert!(has_token(&spans3, TokenKind::Number));
447        assert!(token_text(line3, &spans3, TokenKind::Number).contains(&"1"));
448    }
449
450    #[test]
451    fn syntax_highlighter_tokenizes_python_key_tokens() {
452        let grammar_set = GrammarSet::default_set();
453        let mut highlighter =
454            SyntaxHighlighter::new(&grammar_set, "Python").expect("Python syntax must exist");
455
456        let line1 = "def greet(name):\n";
457        let line2 = "    print(f\"Hello {name}\")\n";
458        let line3 = "    return 7\n";
459        let spans1 = highlighter.tokenize_line(line1);
460        let spans2 = highlighter.tokenize_line(line2);
461        let spans3 = highlighter.tokenize_line(line3);
462
463        assert!(has_token(&spans1, TokenKind::Keyword));
464        assert!(token_text(line1, &spans1, TokenKind::Keyword).contains(&"def"));
465        assert!(has_token(&spans1, TokenKind::Variable));
466        assert!(token_text(line1, &spans1, TokenKind::Variable).contains(&"name"));
467        assert!(has_token(&spans1, TokenKind::Punctuation));
468        assert!(has_token(&spans2, TokenKind::Function));
469        assert!(token_text(line2, &spans2, TokenKind::Function).contains(&"print"));
470        assert!(has_token(&spans2, TokenKind::String));
471        assert!(token_text(line2, &spans2, TokenKind::String)
472            .iter()
473            .any(|text| text.contains("Hello")));
474        assert!(has_token(&spans3, TokenKind::Keyword));
475        assert!(token_text(line3, &spans3, TokenKind::Keyword).contains(&"return"));
476        assert!(has_token(&spans3, TokenKind::Number));
477        assert!(token_text(line3, &spans3, TokenKind::Number).contains(&"7"));
478    }
479
480    #[test]
481    fn syntax_highlighter_tokenizes_json_key_tokens() {
482        let grammar_set = GrammarSet::default_set();
483        let mut highlighter =
484            SyntaxHighlighter::new(&grammar_set, "JSON").expect("JSON syntax must exist");
485
486        let line1 = "{\"key\": \"value\",\n";
487        let line2 = " \"num\": 42\n";
488        let line3 = "}\n";
489        let spans1 = highlighter.tokenize_line(line1);
490        let spans2 = highlighter.tokenize_line(line2);
491        let spans3 = highlighter.tokenize_line(line3);
492
493        assert!(has_token(&spans1, TokenKind::String));
494        assert!(token_text(line1, &spans1, TokenKind::String)
495            .iter()
496            .any(|text| text.contains("key")));
497        assert!(token_text(line1, &spans1, TokenKind::String)
498            .iter()
499            .any(|text| text.contains("value")));
500        assert!(has_token(&spans1, TokenKind::Punctuation));
501        assert!(token_text(line1, &spans1, TokenKind::Punctuation)
502            .iter()
503            .any(|text| text.contains('{')));
504        assert!(has_token(&spans2, TokenKind::String));
505        assert!(token_text(line2, &spans2, TokenKind::String)
506            .iter()
507            .any(|text| text.contains("num")));
508        assert!(has_token(&spans2, TokenKind::Number));
509        assert!(token_text(line2, &spans2, TokenKind::Number).contains(&"42"));
510        assert!(has_token(&spans3, TokenKind::Punctuation));
511        assert!(token_text(line3, &spans3, TokenKind::Punctuation)
512            .iter()
513            .any(|text| text.contains('}')));
514    }
515
516    #[test]
517    fn syntax_highlighter_tokenizes_kdl_key_tokens() {
518        let grammar_set = GrammarSet::default_set();
519        let mut highlighter =
520            SyntaxHighlighter::new(&grammar_set, "KDL").expect("KDL syntax must exist");
521
522        let line1 = "node \"arg\" key=42\n";
523        let line2 = "// comment\n";
524        let line3 = "{\n";
525        let line4 = "}\n";
526        let spans1 = highlighter.tokenize_line(line1);
527        let spans2 = highlighter.tokenize_line(line2);
528        let spans3 = highlighter.tokenize_line(line3);
529        let spans4 = highlighter.tokenize_line(line4);
530
531        assert!(has_token(&spans1, TokenKind::String));
532        assert!(token_text(line1, &spans1, TokenKind::String)
533            .iter()
534            .any(|text| text.contains("arg")));
535        assert!(has_token(&spans2, TokenKind::Comment));
536        assert!(token_text(line2, &spans2, TokenKind::Comment)
537            .iter()
538            .any(|text| text.contains("comment")));
539        assert!(has_token(&spans3, TokenKind::Punctuation));
540        assert!(has_token(&spans4, TokenKind::Punctuation));
541    }
542
543    #[test]
544    fn syntax_highlighter_tokenizes_fish_key_tokens() {
545        let grammar_set = GrammarSet::default_set();
546        let mut highlighter =
547            SyntaxHighlighter::new(&grammar_set, "Fish").expect("Fish syntax must exist");
548
549        let line1 = "function greet\n";
550        let line2 = "    echo \"Hello\"\n";
551        let line3 = "end\n";
552        let spans1 = highlighter.tokenize_line(line1);
553        let spans2 = highlighter.tokenize_line(line2);
554        let spans3 = highlighter.tokenize_line(line3);
555
556        assert!(has_token(&spans1, TokenKind::Keyword));
557        assert!(token_text(line1, &spans1, TokenKind::Keyword).contains(&"function"));
558        assert!(has_token(&spans2, TokenKind::String));
559        assert!(token_text(line2, &spans2, TokenKind::String)
560            .iter()
561            .any(|text| text.contains("Hello")));
562        assert!(has_token(&spans3, TokenKind::Keyword));
563        assert!(token_text(line3, &spans3, TokenKind::Keyword).contains(&"end"));
564    }
565
566    #[test]
567    fn syntax_highlighter_tokenizes_typst_key_tokens() {
568        let grammar_set = GrammarSet::default_set();
569        let mut highlighter =
570            SyntaxHighlighter::new(&grammar_set, "Typst").expect("Typst syntax must exist");
571
572        let line1 = "= Heading\n";
573        let line2 = "#let x = 42\n";
574        let line3 = "Some text\n";
575        let spans1 = highlighter.tokenize_line(line1);
576        let spans2 = highlighter.tokenize_line(line2);
577        let spans3 = highlighter.tokenize_line(line3);
578
579        assert!(!spans1.is_empty() || !spans2.is_empty() || !spans3.is_empty());
580        assert!(has_token(&spans2, TokenKind::Number));
581        assert!(token_text(line2, &spans2, TokenKind::Number).contains(&"42"));
582    }
583
584    #[test]
585    fn syntax_highlighter_tokenizes_nix_key_tokens() {
586        let grammar_set = GrammarSet::default_set();
587        let mut highlighter =
588            SyntaxHighlighter::new(&grammar_set, "Nix").expect("Nix syntax must exist");
589
590        let line1 = "{ pkgs ? import <nixpkgs> {} }:\n";
591        let line2 = "pkgs.mkShell {\n";
592        let line3 = "  buildInputs = [ pkgs.hello ];\n";
593        let line4 = "}\n";
594        let spans1 = highlighter.tokenize_line(line1);
595        let spans2 = highlighter.tokenize_line(line2);
596        let spans3 = highlighter.tokenize_line(line3);
597        let spans4 = highlighter.tokenize_line(line4);
598
599        assert!(has_token(&spans1, TokenKind::Punctuation));
600        assert!(token_text(line1, &spans1, TokenKind::Punctuation).contains(&"{"));
601        assert!(
602            has_token(&spans1, TokenKind::Keyword)
603                || has_token(&spans1, TokenKind::Variable)
604                || has_token(&spans2, TokenKind::Keyword)
605                || has_token(&spans2, TokenKind::Variable)
606                || has_token(&spans3, TokenKind::Keyword)
607                || has_token(&spans3, TokenKind::Variable)
608        );
609        assert!(has_token(&spans4, TokenKind::Punctuation));
610        assert!(token_text(line4, &spans4, TokenKind::Punctuation).contains(&"}"));
611    }
612
613    #[test]
614    fn syntax_highlighter_tokenizes_pkl_key_tokens() {
615        let grammar_set = GrammarSet::default_set();
616        let mut highlighter =
617            SyntaxHighlighter::new(&grammar_set, "Pkl").expect("Pkl syntax must exist");
618
619        let line1 = "module MyConfig\n";
620        let line2 = "name: String = \"hello\"\n";
621        let line3 = "if (count > 0) 42 else 0\n";
622        let spans1 = highlighter.tokenize_line(line1);
623        let spans2 = highlighter.tokenize_line(line2);
624        let spans3 = highlighter.tokenize_line(line3);
625
626        assert!(!spans1.is_empty());
627        assert!(has_token(&spans2, TokenKind::String));
628        assert!(token_text(line2, &spans2, TokenKind::String)
629            .iter()
630            .any(|text| text.contains("hello")));
631        assert!(has_token(&spans3, TokenKind::Keyword));
632        assert!(token_text(line3, &spans3, TokenKind::Keyword).contains(&"if"));
633        assert!(has_token(&spans3, TokenKind::Number));
634        assert!(token_text(line3, &spans3, TokenKind::Number).contains(&"42"));
635    }
636
637    #[test]
638    fn syntax_highlighter_tokenizes_mojo_key_tokens() {
639        let grammar_set = GrammarSet::default_set();
640        let mut highlighter =
641            SyntaxHighlighter::new(&grammar_set, "Mojo").expect("Mojo syntax must exist");
642
643        let line1 = "fn greet(name: String):\n";
644        let line2 = "    print(\"Hello\")\n";
645        let line3 = "    let x = 42\n";
646        let spans1 = highlighter.tokenize_line(line1);
647        let spans2 = highlighter.tokenize_line(line2);
648        let spans3 = highlighter.tokenize_line(line3);
649
650        assert!(has_token(&spans1, TokenKind::Keyword));
651        assert!(token_text(line1, &spans1, TokenKind::Keyword).contains(&"fn"));
652        assert!(has_token(&spans2, TokenKind::Function));
653        assert!(token_text(line2, &spans2, TokenKind::Function).contains(&"print"));
654        assert!(has_token(&spans2, TokenKind::String));
655        assert!(token_text(line2, &spans2, TokenKind::String)
656            .iter()
657            .any(|text| text.contains("Hello")));
658        assert!(has_token(&spans3, TokenKind::Number));
659        assert!(token_text(line3, &spans3, TokenKind::Number).contains(&"42"));
660    }
661
662    #[test]
663    fn syntax_highlighter_reset_restores_initial_state() {
664        let grammar_set = GrammarSet::default_set();
665        let mut highlighter =
666            SyntaxHighlighter::new(&grammar_set, "Rust").expect("Rust syntax must exist");
667
668        let prior_line = "fn main() {\n";
669        let target_line = "    let x = 42;\n";
670
671        let _ = highlighter.tokenize_line(prior_line);
672        let before_reset = highlighter.tokenize_line(target_line);
673        highlighter.reset();
674        let _ = highlighter.tokenize_line(prior_line);
675        let after_reset = highlighter.tokenize_line(target_line);
676
677        assert_eq!(before_reset, after_reset);
678    }
679
680    #[test]
681    fn syntax_highlighter_accepts_lowercase_language_aliases() {
682        let grammar_set = GrammarSet::default_set();
683
684        assert!(
685            SyntaxHighlighter::new(&grammar_set, "typescript").is_some(),
686            "lowercase 'typescript' must resolve via alias"
687        );
688        assert!(
689            SyntaxHighlighter::new(&grammar_set, "kdl").is_some(),
690            "lowercase 'kdl' must resolve via alias"
691        );
692        assert!(
693            SyntaxHighlighter::new(&grammar_set, "rust").is_some(),
694            "lowercase 'rust' must resolve via case-insensitive name match"
695        );
696    }
697
698    #[test]
699    fn load_grammar_succeeds_for_valid_syntax_file() {
700        let path = unique_temp_path("valid");
701        fs::write(
702            &path,
703            r#"name: MiniSyntax
704file_extensions: [mini]
705scope: source.mini
706contexts:
707  main:
708    - match: '\b(todo)\b'
709      scope: keyword.control.mini
710"#,
711        )
712        .expect("temporary syntax file must be writable");
713
714        let mut grammar_set = GrammarSet::default_set();
715        let result = grammar_set.load_grammar(&path);
716
717        assert!(result.is_ok());
718        assert_eq!(grammar_set.detect_language("mini"), Some("MiniSyntax"));
719
720        let _ = fs::remove_file(path);
721    }
722
723    #[test]
724    fn load_grammar_returns_parse_error_for_invalid_syntax_file() {
725        let path = unique_temp_path("invalid");
726        fs::write(&path, "not: valid: yaml: [")
727            .expect("temporary invalid syntax file must be writable");
728
729        let mut grammar_set = GrammarSet::default_set();
730        let result = grammar_set.load_grammar(&path);
731
732        assert!(matches!(result, Err(GrammarLoadError::Parse(_))));
733
734        let _ = fs::remove_file(path);
735    }
736
737    #[test]
738    fn load_grammar_returns_io_error_for_missing_file() {
739        let path = unique_temp_path("missing");
740        let mut grammar_set = GrammarSet::default_set();
741
742        let result = grammar_set.load_grammar(&path);
743
744        assert!(matches!(result, Err(GrammarLoadError::Io(_))));
745    }
746
747    #[test]
748    fn grammar_load_error_formats_and_exposes_sources() {
749        let io_error = GrammarLoadError::Io(std::io::Error::other("disk"));
750        assert_eq!(io_error.to_string(), "I/O error: disk");
751        assert_eq!(
752            io_error
753                .source()
754                .expect("I/O variant must expose its source")
755                .to_string(),
756            "disk"
757        );
758
759        let parse_error = GrammarLoadError::Parse("bad syntax".to_string());
760        assert_eq!(parse_error.to_string(), "parse error: bad syntax");
761        assert!(parse_error.source().is_none());
762    }
763}