1use std::ops::Range;
2use std::path::Path;
3
4use syntect::dumps::from_uncompressed_data;
5use syntect::parsing::{ParseState, ScopeStack, SyntaxDefinition, SyntaxReference, SyntaxSet};
6
7#[non_exhaustive]
9#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
10pub enum TokenKind {
11 Keyword,
13 Type,
15 Function,
17 String,
19 Number,
21 Comment,
23 Operator,
25 Punctuation,
27 Variable,
29 Constant,
31 Tag,
33 Attribute,
35 Escape,
37 Plain,
39}
40
41#[derive(Debug, Clone, PartialEq, Eq)]
43pub struct TokenSpan {
44 pub range: Range<usize>,
46 pub kind: TokenKind,
48}
49
50#[non_exhaustive]
52#[derive(Debug)]
53pub enum GrammarLoadError {
54 Io(std::io::Error),
56 Parse(String),
58}
59
60impl From<std::io::Error> for GrammarLoadError {
61 fn from(error: std::io::Error) -> Self {
62 Self::Io(error)
63 }
64}
65
66impl std::fmt::Display for GrammarLoadError {
67 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
68 match self {
69 Self::Io(e) => write!(f, "I/O error: {e}"),
70 Self::Parse(msg) => write!(f, "parse error: {msg}"),
71 }
72 }
73}
74
75impl std::error::Error for GrammarLoadError {
76 fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
77 match self {
78 Self::Io(e) => Some(e),
79 Self::Parse(_) => None,
80 }
81 }
82}
83
84pub fn scope_to_token_kind(scope: &str) -> TokenKind {
86 if scope.starts_with("keyword.operator") {
87 TokenKind::Operator
88 } else if scope.starts_with("keyword") || scope.starts_with("storage") {
89 TokenKind::Keyword
90 } else if scope.starts_with("entity.name.function") || scope.starts_with("support.function") {
91 TokenKind::Function
92 } else if scope.starts_with("entity.name.type")
93 || scope.starts_with("entity.name.class")
94 || scope.starts_with("entity.name.struct")
95 || scope.starts_with("entity.name.enum")
96 || scope.starts_with("support.type")
97 || scope.starts_with("support.class")
98 {
99 TokenKind::Type
100 } else if scope.starts_with("entity.name.tag") {
101 TokenKind::Tag
102 } else if scope.starts_with("entity.other.attribute-name") {
103 TokenKind::Attribute
104 } else if scope.starts_with("string") {
105 TokenKind::String
106 } else if scope.starts_with("constant.character.escape") {
107 TokenKind::Escape
108 } else if scope.starts_with("constant.numeric") {
109 TokenKind::Number
110 } else if scope.starts_with("constant") {
111 TokenKind::Constant
112 } else if scope.starts_with("comment") {
113 TokenKind::Comment
114 } else if scope.starts_with("variable") {
115 TokenKind::Variable
116 } else if scope.starts_with("punctuation") {
117 TokenKind::Punctuation
118 } else {
119 TokenKind::Plain
120 }
121}
122
123fn resolve_language_alias<'a>(
124 syntax_set: &'a SyntaxSet,
125 language: &str,
126) -> Option<&'a SyntaxReference> {
127 syntax_set
128 .find_syntax_by_token(language)
129 .or_else(|| match language {
130 "TypeScript" | "typescript" | "ts" | "tsx" => {
131 syntax_set.find_syntax_by_token("JavaScript")
132 }
133 "KDL" | "kdl" => syntax_set.find_syntax_by_token("KDL2"),
134 _ => None,
135 })
136}
137
138fn display_language_name<'a>(extension: &str, syntax_name: &'a str) -> &'a str {
139 match (extension, syntax_name) {
140 ("ts", "JavaScript") | ("tsx", "JavaScript") => "TypeScript",
141 ("kdl", "KDL2") => "KDL",
142 _ => syntax_name,
143 }
144}
145
146#[derive(Debug, Clone)]
147pub struct GrammarSet {
149 syntax_set: SyntaxSet,
150}
151
152impl GrammarSet {
153 pub fn default_set() -> Self {
155 let data = include_bytes!(concat!(env!("OUT_DIR"), "/syntaxes.packdump"));
156 let syntax_set = from_uncompressed_data(data).expect("embedded syntax dump must be valid");
157 Self { syntax_set }
158 }
159
160 pub fn load_grammar(&mut self, path: &Path) -> Result<(), GrammarLoadError> {
162 let original = std::mem::take(&mut self.syntax_set);
163 let result = (|| {
164 let content = std::fs::read_to_string(path)?;
165 let name = path.file_stem().and_then(|stem| stem.to_str());
166 let syntax_definition = SyntaxDefinition::load_from_str(&content, true, name)
167 .map_err(|error| GrammarLoadError::Parse(error.to_string()))?;
168 let mut builder = original.clone().into_builder();
169 builder.add(syntax_definition);
170 Ok(builder.build())
171 })();
172
173 match result {
174 Ok(syntax_set) => {
175 self.syntax_set = syntax_set;
176 Ok(())
177 }
178 Err(error) => {
179 self.syntax_set = original;
180 Err(error)
181 }
182 }
183 }
184
185 pub fn detect_language<'a>(&'a self, extension: &str) -> Option<&'a str> {
187 match extension {
188 "ts" | "tsx" if self.syntax_set.find_syntax_by_token("JavaScript").is_some() => {
189 return Some("TypeScript");
190 }
191 "kdl" if self.syntax_set.find_syntax_by_token("KDL2").is_some() => {
192 return Some("KDL");
193 }
194 _ => {}
195 }
196
197 self.syntax_set
198 .find_syntax_by_extension(extension)
199 .map(|syntax| display_language_name(extension, syntax.name.as_str()))
200 }
201
202 pub fn languages(&self) -> Vec<&str> {
204 self.syntax_set
205 .syntaxes()
206 .iter()
207 .map(|syntax| syntax.name.as_str())
208 .collect()
209 }
210}
211
212#[derive(Debug, Clone)]
213pub struct SyntaxHighlighter {
215 syntax_set: SyntaxSet,
216 parse_state: ParseState,
217 scope_stack: ScopeStack,
218 language_name: String,
219}
220
221impl SyntaxHighlighter {
222 pub fn new(grammar_set: &GrammarSet, language: &str) -> Option<Self> {
224 let syntax_set = grammar_set.syntax_set.clone();
225 let syntax = resolve_language_alias(&syntax_set, language)?;
226 let language_name = syntax.name.clone();
227
228 Some(Self {
229 parse_state: ParseState::new(syntax),
230 scope_stack: ScopeStack::new(),
231 syntax_set,
232 language_name,
233 })
234 }
235
236 pub fn tokenize_line(&mut self, line: &str) -> Vec<TokenSpan> {
238 let ops = match self.parse_state.parse_line(line, &self.syntax_set) {
239 Ok(ops) => ops,
240 Err(_) => {
241 return vec![TokenSpan {
242 range: 0..line.len(),
243 kind: TokenKind::Plain,
244 }];
245 }
246 };
247
248 let mut spans = Vec::new();
249 let mut last_index = 0;
250
251 for (index, op) in &ops {
252 let index = (*index).min(line.len());
253 if index > last_index {
254 let kind = self.current_token_kind();
255 Self::push_span(&mut spans, last_index..index, kind);
256 }
257
258 if self.scope_stack.apply(op).is_err() {
259 return vec![TokenSpan {
260 range: 0..line.len(),
261 kind: TokenKind::Plain,
262 }];
263 }
264 last_index = index;
265 }
266
267 if last_index < line.len() {
268 let kind = self.current_token_kind();
269 Self::push_span(&mut spans, last_index..line.len(), kind);
270 }
271
272 spans
273 }
274
275 pub fn reset(&mut self) {
277 let syntax = self
278 .syntax_set
279 .find_syntax_by_token(&self.language_name)
280 .expect("syntax must exist in owned syntax set");
281 self.parse_state = ParseState::new(syntax);
282 self.scope_stack = ScopeStack::new();
283 }
284
285 fn current_token_kind(&self) -> TokenKind {
286 self.scope_stack
287 .as_slice()
288 .iter()
289 .rev()
290 .map(|scope| scope_to_token_kind(&scope.build_string()))
291 .find(|kind| *kind != TokenKind::Plain)
292 .unwrap_or(TokenKind::Plain)
293 }
294
295 fn push_span(spans: &mut Vec<TokenSpan>, range: Range<usize>, kind: TokenKind) {
296 if range.is_empty() {
297 return;
298 }
299
300 if let Some(previous) = spans.last_mut() {
301 if previous.kind == kind && previous.range.end == range.start {
302 previous.range.end = range.end;
303 return;
304 }
305 }
306
307 spans.push(TokenSpan { range, kind });
308 }
309}
310
311#[cfg(test)]
312mod tests {
313 use super::*;
314
315 use std::error::Error;
316 use std::fs;
317 use std::time::{SystemTime, UNIX_EPOCH};
318
319 fn has_token(spans: &[TokenSpan], kind: TokenKind) -> bool {
320 spans.iter().any(|span| span.kind == kind)
321 }
322
323 fn token_text<'a>(line: &'a str, spans: &[TokenSpan], kind: TokenKind) -> Vec<&'a str> {
324 spans
325 .iter()
326 .filter(|span| span.kind == kind)
327 .map(|span| &line[span.range.clone()])
328 .collect()
329 }
330
331 fn unique_temp_path(name: &str) -> std::path::PathBuf {
332 let nanos = SystemTime::now()
333 .duration_since(UNIX_EPOCH)
334 .expect("system time must be after unix epoch")
335 .as_nanos();
336 std::env::temp_dir().join(format!(
337 "neco-syntax-textmate-{name}-{nanos}.sublime-syntax"
338 ))
339 }
340
341 #[test]
342 fn scope_to_token_kind_covers_supported_variants_and_edges() {
343 let cases = [
344 ("keyword.control.rust", TokenKind::Keyword),
345 ("storage.type.function.rust", TokenKind::Keyword),
346 ("entity.name.function.rust", TokenKind::Function),
347 ("support.function.builtin.python", TokenKind::Function),
348 ("entity.name.type.struct.rust", TokenKind::Type),
349 ("entity.name.class.typescript", TokenKind::Type),
350 ("entity.name.struct.rust", TokenKind::Type),
351 ("entity.name.enum.rust", TokenKind::Type),
352 ("support.type.primitive.ts", TokenKind::Type),
353 ("support.class.python", TokenKind::Type),
354 ("entity.name.tag.html", TokenKind::Tag),
355 ("entity.other.attribute-name.html", TokenKind::Attribute),
356 ("string.quoted.double", TokenKind::String),
357 ("constant.character.escape.rust", TokenKind::Escape),
358 ("constant.numeric.decimal", TokenKind::Number),
359 ("constant.language.boolean", TokenKind::Constant),
360 ("comment.line.double-slash", TokenKind::Comment),
361 ("variable.parameter.function", TokenKind::Variable),
362 ("punctuation.section.block.begin", TokenKind::Punctuation),
363 ("", TokenKind::Plain),
364 ("meta.embedded.unknown", TokenKind::Plain),
365 ];
366
367 for (scope, expected) in cases {
368 assert_eq!(scope_to_token_kind(scope), expected, "scope={scope}");
369 }
370 }
371
372 #[test]
373 fn scope_to_token_kind_prefers_operator_before_keyword() {
374 assert_eq!(
375 scope_to_token_kind("keyword.operator.assignment"),
376 TokenKind::Operator
377 );
378 }
379
380 #[test]
381 fn grammar_set_default_set_finds_default_and_bundled_languages() {
382 let grammar_set = GrammarSet::default_set();
383
384 assert_eq!(grammar_set.detect_language("rs"), Some("Rust"));
385 assert_eq!(grammar_set.detect_language("ts"), Some("TypeScript"));
386 assert_eq!(grammar_set.detect_language("json"), Some("JSON"));
387 assert_eq!(grammar_set.detect_language("py"), Some("Python"));
388 assert_eq!(grammar_set.detect_language("kdl"), Some("KDL"));
389 assert_eq!(grammar_set.detect_language("fish"), Some("Fish"));
390 assert_eq!(grammar_set.detect_language("nix"), Some("Nix"));
391 assert_eq!(grammar_set.detect_language("typ"), Some("Typst"));
392 assert_eq!(grammar_set.detect_language("pkl"), Some("Pkl"));
393 assert_eq!(grammar_set.detect_language("mojo"), Some("Mojo"));
394 assert_eq!(grammar_set.detect_language("does-not-exist"), None);
395 assert!(!grammar_set.languages().is_empty());
396 }
397
398 #[test]
399 fn syntax_highlighter_tokenizes_rust_key_tokens() {
400 let grammar_set = GrammarSet::default_set();
401 let mut highlighter =
402 SyntaxHighlighter::new(&grammar_set, "Rust").expect("Rust syntax must exist");
403
404 let line1 = "fn main() {\n";
405 let line2 = " let x = 42;\n";
406 let line3 = "}\n";
407 let spans1 = highlighter.tokenize_line(line1);
408 let spans2 = highlighter.tokenize_line(line2);
409 let spans3 = highlighter.tokenize_line(line3);
410
411 assert!(has_token(&spans1, TokenKind::Keyword));
412 assert!(token_text(line1, &spans1, TokenKind::Keyword).contains(&"fn"));
413 assert!(has_token(&spans1, TokenKind::Function));
414 assert!(token_text(line1, &spans1, TokenKind::Function).contains(&"main"));
415 assert!(has_token(&spans1, TokenKind::Punctuation));
416 assert!(has_token(&spans2, TokenKind::Keyword));
417 assert!(token_text(line2, &spans2, TokenKind::Keyword).contains(&"let"));
418 assert!(has_token(&spans2, TokenKind::Number));
419 assert!(token_text(line2, &spans2, TokenKind::Number).contains(&"42"));
420 assert!(has_token(&spans2, TokenKind::Operator));
421 assert!(token_text(line2, &spans2, TokenKind::Operator).contains(&"="));
422 assert!(has_token(&spans3, TokenKind::Punctuation));
423 }
424
425 #[test]
426 fn syntax_highlighter_tokenizes_typescript_key_tokens() {
427 let grammar_set = GrammarSet::default_set();
428 let mut highlighter = SyntaxHighlighter::new(&grammar_set, "TypeScript")
429 .expect("TypeScript syntax must exist");
430
431 let line1 = "const foo = hello;\n";
432 let line2 = "console.log(foo);\n";
433 let line3 = "let n = 1;\n";
434 let spans1 = highlighter.tokenize_line(line1);
435 let spans2 = highlighter.tokenize_line(line2);
436 let spans3 = highlighter.tokenize_line(line3);
437
438 assert!(has_token(&spans1, TokenKind::Keyword));
439 assert!(token_text(line1, &spans1, TokenKind::Keyword).contains(&"const"));
440 assert!(has_token(&spans1, TokenKind::Operator));
441 assert!(token_text(line1, &spans1, TokenKind::Operator).contains(&"="));
442 assert!(has_token(&spans2, TokenKind::Function));
443 assert!(token_text(line2, &spans2, TokenKind::Function).contains(&"log"));
444 assert!(has_token(&spans3, TokenKind::Keyword));
445 assert!(token_text(line3, &spans3, TokenKind::Keyword).contains(&"let"));
446 assert!(has_token(&spans3, TokenKind::Number));
447 assert!(token_text(line3, &spans3, TokenKind::Number).contains(&"1"));
448 }
449
450 #[test]
451 fn syntax_highlighter_tokenizes_python_key_tokens() {
452 let grammar_set = GrammarSet::default_set();
453 let mut highlighter =
454 SyntaxHighlighter::new(&grammar_set, "Python").expect("Python syntax must exist");
455
456 let line1 = "def greet(name):\n";
457 let line2 = " print(f\"Hello {name}\")\n";
458 let line3 = " return 7\n";
459 let spans1 = highlighter.tokenize_line(line1);
460 let spans2 = highlighter.tokenize_line(line2);
461 let spans3 = highlighter.tokenize_line(line3);
462
463 assert!(has_token(&spans1, TokenKind::Keyword));
464 assert!(token_text(line1, &spans1, TokenKind::Keyword).contains(&"def"));
465 assert!(has_token(&spans1, TokenKind::Variable));
466 assert!(token_text(line1, &spans1, TokenKind::Variable).contains(&"name"));
467 assert!(has_token(&spans1, TokenKind::Punctuation));
468 assert!(has_token(&spans2, TokenKind::Function));
469 assert!(token_text(line2, &spans2, TokenKind::Function).contains(&"print"));
470 assert!(has_token(&spans2, TokenKind::String));
471 assert!(token_text(line2, &spans2, TokenKind::String)
472 .iter()
473 .any(|text| text.contains("Hello")));
474 assert!(has_token(&spans3, TokenKind::Keyword));
475 assert!(token_text(line3, &spans3, TokenKind::Keyword).contains(&"return"));
476 assert!(has_token(&spans3, TokenKind::Number));
477 assert!(token_text(line3, &spans3, TokenKind::Number).contains(&"7"));
478 }
479
480 #[test]
481 fn syntax_highlighter_tokenizes_json_key_tokens() {
482 let grammar_set = GrammarSet::default_set();
483 let mut highlighter =
484 SyntaxHighlighter::new(&grammar_set, "JSON").expect("JSON syntax must exist");
485
486 let line1 = "{\"key\": \"value\",\n";
487 let line2 = " \"num\": 42\n";
488 let line3 = "}\n";
489 let spans1 = highlighter.tokenize_line(line1);
490 let spans2 = highlighter.tokenize_line(line2);
491 let spans3 = highlighter.tokenize_line(line3);
492
493 assert!(has_token(&spans1, TokenKind::String));
494 assert!(token_text(line1, &spans1, TokenKind::String)
495 .iter()
496 .any(|text| text.contains("key")));
497 assert!(token_text(line1, &spans1, TokenKind::String)
498 .iter()
499 .any(|text| text.contains("value")));
500 assert!(has_token(&spans1, TokenKind::Punctuation));
501 assert!(token_text(line1, &spans1, TokenKind::Punctuation)
502 .iter()
503 .any(|text| text.contains('{')));
504 assert!(has_token(&spans2, TokenKind::String));
505 assert!(token_text(line2, &spans2, TokenKind::String)
506 .iter()
507 .any(|text| text.contains("num")));
508 assert!(has_token(&spans2, TokenKind::Number));
509 assert!(token_text(line2, &spans2, TokenKind::Number).contains(&"42"));
510 assert!(has_token(&spans3, TokenKind::Punctuation));
511 assert!(token_text(line3, &spans3, TokenKind::Punctuation)
512 .iter()
513 .any(|text| text.contains('}')));
514 }
515
516 #[test]
517 fn syntax_highlighter_tokenizes_kdl_key_tokens() {
518 let grammar_set = GrammarSet::default_set();
519 let mut highlighter =
520 SyntaxHighlighter::new(&grammar_set, "KDL").expect("KDL syntax must exist");
521
522 let line1 = "node \"arg\" key=42\n";
523 let line2 = "// comment\n";
524 let line3 = "{\n";
525 let line4 = "}\n";
526 let spans1 = highlighter.tokenize_line(line1);
527 let spans2 = highlighter.tokenize_line(line2);
528 let spans3 = highlighter.tokenize_line(line3);
529 let spans4 = highlighter.tokenize_line(line4);
530
531 assert!(has_token(&spans1, TokenKind::String));
532 assert!(token_text(line1, &spans1, TokenKind::String)
533 .iter()
534 .any(|text| text.contains("arg")));
535 assert!(has_token(&spans2, TokenKind::Comment));
536 assert!(token_text(line2, &spans2, TokenKind::Comment)
537 .iter()
538 .any(|text| text.contains("comment")));
539 assert!(has_token(&spans3, TokenKind::Punctuation));
540 assert!(has_token(&spans4, TokenKind::Punctuation));
541 }
542
543 #[test]
544 fn syntax_highlighter_tokenizes_fish_key_tokens() {
545 let grammar_set = GrammarSet::default_set();
546 let mut highlighter =
547 SyntaxHighlighter::new(&grammar_set, "Fish").expect("Fish syntax must exist");
548
549 let line1 = "function greet\n";
550 let line2 = " echo \"Hello\"\n";
551 let line3 = "end\n";
552 let spans1 = highlighter.tokenize_line(line1);
553 let spans2 = highlighter.tokenize_line(line2);
554 let spans3 = highlighter.tokenize_line(line3);
555
556 assert!(has_token(&spans1, TokenKind::Keyword));
557 assert!(token_text(line1, &spans1, TokenKind::Keyword).contains(&"function"));
558 assert!(has_token(&spans2, TokenKind::String));
559 assert!(token_text(line2, &spans2, TokenKind::String)
560 .iter()
561 .any(|text| text.contains("Hello")));
562 assert!(has_token(&spans3, TokenKind::Keyword));
563 assert!(token_text(line3, &spans3, TokenKind::Keyword).contains(&"end"));
564 }
565
566 #[test]
567 fn syntax_highlighter_tokenizes_typst_key_tokens() {
568 let grammar_set = GrammarSet::default_set();
569 let mut highlighter =
570 SyntaxHighlighter::new(&grammar_set, "Typst").expect("Typst syntax must exist");
571
572 let line1 = "= Heading\n";
573 let line2 = "#let x = 42\n";
574 let line3 = "Some text\n";
575 let spans1 = highlighter.tokenize_line(line1);
576 let spans2 = highlighter.tokenize_line(line2);
577 let spans3 = highlighter.tokenize_line(line3);
578
579 assert!(!spans1.is_empty() || !spans2.is_empty() || !spans3.is_empty());
580 assert!(has_token(&spans2, TokenKind::Number));
581 assert!(token_text(line2, &spans2, TokenKind::Number).contains(&"42"));
582 }
583
584 #[test]
585 fn syntax_highlighter_tokenizes_nix_key_tokens() {
586 let grammar_set = GrammarSet::default_set();
587 let mut highlighter =
588 SyntaxHighlighter::new(&grammar_set, "Nix").expect("Nix syntax must exist");
589
590 let line1 = "{ pkgs ? import <nixpkgs> {} }:\n";
591 let line2 = "pkgs.mkShell {\n";
592 let line3 = " buildInputs = [ pkgs.hello ];\n";
593 let line4 = "}\n";
594 let spans1 = highlighter.tokenize_line(line1);
595 let spans2 = highlighter.tokenize_line(line2);
596 let spans3 = highlighter.tokenize_line(line3);
597 let spans4 = highlighter.tokenize_line(line4);
598
599 assert!(has_token(&spans1, TokenKind::Punctuation));
600 assert!(token_text(line1, &spans1, TokenKind::Punctuation).contains(&"{"));
601 assert!(
602 has_token(&spans1, TokenKind::Keyword)
603 || has_token(&spans1, TokenKind::Variable)
604 || has_token(&spans2, TokenKind::Keyword)
605 || has_token(&spans2, TokenKind::Variable)
606 || has_token(&spans3, TokenKind::Keyword)
607 || has_token(&spans3, TokenKind::Variable)
608 );
609 assert!(has_token(&spans4, TokenKind::Punctuation));
610 assert!(token_text(line4, &spans4, TokenKind::Punctuation).contains(&"}"));
611 }
612
613 #[test]
614 fn syntax_highlighter_tokenizes_pkl_key_tokens() {
615 let grammar_set = GrammarSet::default_set();
616 let mut highlighter =
617 SyntaxHighlighter::new(&grammar_set, "Pkl").expect("Pkl syntax must exist");
618
619 let line1 = "module MyConfig\n";
620 let line2 = "name: String = \"hello\"\n";
621 let line3 = "if (count > 0) 42 else 0\n";
622 let spans1 = highlighter.tokenize_line(line1);
623 let spans2 = highlighter.tokenize_line(line2);
624 let spans3 = highlighter.tokenize_line(line3);
625
626 assert!(!spans1.is_empty());
627 assert!(has_token(&spans2, TokenKind::String));
628 assert!(token_text(line2, &spans2, TokenKind::String)
629 .iter()
630 .any(|text| text.contains("hello")));
631 assert!(has_token(&spans3, TokenKind::Keyword));
632 assert!(token_text(line3, &spans3, TokenKind::Keyword).contains(&"if"));
633 assert!(has_token(&spans3, TokenKind::Number));
634 assert!(token_text(line3, &spans3, TokenKind::Number).contains(&"42"));
635 }
636
637 #[test]
638 fn syntax_highlighter_tokenizes_mojo_key_tokens() {
639 let grammar_set = GrammarSet::default_set();
640 let mut highlighter =
641 SyntaxHighlighter::new(&grammar_set, "Mojo").expect("Mojo syntax must exist");
642
643 let line1 = "fn greet(name: String):\n";
644 let line2 = " print(\"Hello\")\n";
645 let line3 = " let x = 42\n";
646 let spans1 = highlighter.tokenize_line(line1);
647 let spans2 = highlighter.tokenize_line(line2);
648 let spans3 = highlighter.tokenize_line(line3);
649
650 assert!(has_token(&spans1, TokenKind::Keyword));
651 assert!(token_text(line1, &spans1, TokenKind::Keyword).contains(&"fn"));
652 assert!(has_token(&spans2, TokenKind::Function));
653 assert!(token_text(line2, &spans2, TokenKind::Function).contains(&"print"));
654 assert!(has_token(&spans2, TokenKind::String));
655 assert!(token_text(line2, &spans2, TokenKind::String)
656 .iter()
657 .any(|text| text.contains("Hello")));
658 assert!(has_token(&spans3, TokenKind::Number));
659 assert!(token_text(line3, &spans3, TokenKind::Number).contains(&"42"));
660 }
661
662 #[test]
663 fn syntax_highlighter_reset_restores_initial_state() {
664 let grammar_set = GrammarSet::default_set();
665 let mut highlighter =
666 SyntaxHighlighter::new(&grammar_set, "Rust").expect("Rust syntax must exist");
667
668 let prior_line = "fn main() {\n";
669 let target_line = " let x = 42;\n";
670
671 let _ = highlighter.tokenize_line(prior_line);
672 let before_reset = highlighter.tokenize_line(target_line);
673 highlighter.reset();
674 let _ = highlighter.tokenize_line(prior_line);
675 let after_reset = highlighter.tokenize_line(target_line);
676
677 assert_eq!(before_reset, after_reset);
678 }
679
680 #[test]
681 fn syntax_highlighter_accepts_lowercase_language_aliases() {
682 let grammar_set = GrammarSet::default_set();
683
684 assert!(
685 SyntaxHighlighter::new(&grammar_set, "typescript").is_some(),
686 "lowercase 'typescript' must resolve via alias"
687 );
688 assert!(
689 SyntaxHighlighter::new(&grammar_set, "kdl").is_some(),
690 "lowercase 'kdl' must resolve via alias"
691 );
692 assert!(
693 SyntaxHighlighter::new(&grammar_set, "rust").is_some(),
694 "lowercase 'rust' must resolve via case-insensitive name match"
695 );
696 }
697
698 #[test]
699 fn load_grammar_succeeds_for_valid_syntax_file() {
700 let path = unique_temp_path("valid");
701 fs::write(
702 &path,
703 r#"name: MiniSyntax
704file_extensions: [mini]
705scope: source.mini
706contexts:
707 main:
708 - match: '\b(todo)\b'
709 scope: keyword.control.mini
710"#,
711 )
712 .expect("temporary syntax file must be writable");
713
714 let mut grammar_set = GrammarSet::default_set();
715 let result = grammar_set.load_grammar(&path);
716
717 assert!(result.is_ok());
718 assert_eq!(grammar_set.detect_language("mini"), Some("MiniSyntax"));
719
720 let _ = fs::remove_file(path);
721 }
722
723 #[test]
724 fn load_grammar_returns_parse_error_for_invalid_syntax_file() {
725 let path = unique_temp_path("invalid");
726 fs::write(&path, "not: valid: yaml: [")
727 .expect("temporary invalid syntax file must be writable");
728
729 let mut grammar_set = GrammarSet::default_set();
730 let result = grammar_set.load_grammar(&path);
731
732 assert!(matches!(result, Err(GrammarLoadError::Parse(_))));
733
734 let _ = fs::remove_file(path);
735 }
736
737 #[test]
738 fn load_grammar_returns_io_error_for_missing_file() {
739 let path = unique_temp_path("missing");
740 let mut grammar_set = GrammarSet::default_set();
741
742 let result = grammar_set.load_grammar(&path);
743
744 assert!(matches!(result, Err(GrammarLoadError::Io(_))));
745 }
746
747 #[test]
748 fn grammar_load_error_formats_and_exposes_sources() {
749 let io_error = GrammarLoadError::Io(std::io::Error::other("disk"));
750 assert_eq!(io_error.to_string(), "I/O error: disk");
751 assert_eq!(
752 io_error
753 .source()
754 .expect("I/O variant must expose its source")
755 .to_string(),
756 "disk"
757 );
758
759 let parse_error = GrammarLoadError::Parse("bad syntax".to_string());
760 assert_eq!(parse_error.to_string(), "parse error: bad syntax");
761 assert!(parse_error.source().is_none());
762 }
763}