1use serde::Deserialize;
7use std::collections::HashMap;
8use std::path::{Path, PathBuf};
9use std::sync::Arc;
10use syntect::parsing::{SyntaxDefinition, SyntaxReference, SyntaxSet, SyntaxSetBuilder};
11
12pub const TOML_GRAMMAR: &str = include_str!("../../grammars/toml.sublime-syntax");
14
15pub const ODIN_GRAMMAR: &str = include_str!("../../grammars/odin/Odin.sublime-syntax");
18
19pub const ZIG_GRAMMAR: &str = include_str!("../../grammars/zig.sublime-syntax");
21
22pub const GIT_REBASE_GRAMMAR: &str = include_str!("../../grammars/git-rebase.sublime-syntax");
24
25pub const GIT_COMMIT_GRAMMAR: &str = include_str!("../../grammars/git-commit.sublime-syntax");
27
28pub const GITIGNORE_GRAMMAR: &str = include_str!("../../grammars/gitignore.sublime-syntax");
30
31pub const GITCONFIG_GRAMMAR: &str = include_str!("../../grammars/gitconfig.sublime-syntax");
33
34pub const GITATTRIBUTES_GRAMMAR: &str = include_str!("../../grammars/gitattributes.sublime-syntax");
36
37pub struct GrammarRegistry {
42 syntax_set: Arc<SyntaxSet>,
44 user_extensions: HashMap<String, String>,
46 filename_scopes: HashMap<String, String>,
48 loaded_grammar_paths: Vec<(String, PathBuf, Vec<String>)>,
50}
51
52impl GrammarRegistry {
53 pub fn new(
58 syntax_set: SyntaxSet,
59 user_extensions: HashMap<String, String>,
60 filename_scopes: HashMap<String, String>,
61 ) -> Self {
62 Self {
63 syntax_set: Arc::new(syntax_set),
64 user_extensions,
65 filename_scopes,
66 loaded_grammar_paths: Vec::new(),
67 }
68 }
69
70 pub fn empty() -> Arc<Self> {
72 let mut builder = SyntaxSetBuilder::new();
73 builder.add_plain_text_syntax();
74 Arc::new(Self {
75 syntax_set: Arc::new(builder.build()),
76 user_extensions: HashMap::new(),
77 filename_scopes: HashMap::new(),
78 loaded_grammar_paths: Vec::new(),
79 })
80 }
81
82 pub fn build_filename_scopes() -> HashMap<String, String> {
84 let mut map = HashMap::new();
85
86 let shell_scope = "source.shell.bash".to_string();
88 for filename in [
89 ".zshrc",
90 ".zprofile",
91 ".zshenv",
92 ".zlogin",
93 ".zlogout",
94 ".bash_aliases",
95 "PKGBUILD",
98 "APKBUILD",
99 ] {
100 map.insert(filename.to_string(), shell_scope.clone());
101 }
102
103 let git_rebase_scope = "source.git-rebase-todo".to_string();
105 map.insert("git-rebase-todo".to_string(), git_rebase_scope);
106
107 let git_commit_scope = "source.git-commit".to_string();
109 for filename in ["COMMIT_EDITMSG", "MERGE_MSG", "SQUASH_MSG", "TAG_EDITMSG"] {
110 map.insert(filename.to_string(), git_commit_scope.clone());
111 }
112
113 let gitignore_scope = "source.gitignore".to_string();
115 for filename in [".gitignore", ".dockerignore", ".npmignore", ".hgignore"] {
116 map.insert(filename.to_string(), gitignore_scope.clone());
117 }
118
119 let gitconfig_scope = "source.gitconfig".to_string();
121 for filename in [".gitconfig", ".gitmodules"] {
122 map.insert(filename.to_string(), gitconfig_scope.clone());
123 }
124
125 let gitattributes_scope = "source.gitattributes".to_string();
127 map.insert(".gitattributes".to_string(), gitattributes_scope);
128
129 map
130 }
131
132 pub fn add_embedded_grammars(builder: &mut SyntaxSetBuilder) {
134 match SyntaxDefinition::load_from_str(TOML_GRAMMAR, true, Some("TOML")) {
136 Ok(syntax) => {
137 builder.add(syntax);
138 tracing::debug!("Loaded embedded TOML grammar");
139 }
140 Err(e) => {
141 tracing::warn!("Failed to load embedded TOML grammar: {}", e);
142 }
143 }
144
145 match SyntaxDefinition::load_from_str(ODIN_GRAMMAR, true, Some("Odin")) {
147 Ok(syntax) => {
148 builder.add(syntax);
149 tracing::debug!("Loaded embedded Odin grammar");
150 }
151 Err(e) => {
152 tracing::warn!("Failed to load embedded Odin grammar: {}", e);
153 }
154 }
155
156 match SyntaxDefinition::load_from_str(ZIG_GRAMMAR, true, Some("Zig")) {
158 Ok(syntax) => {
159 builder.add(syntax);
160 tracing::debug!("Loaded embedded Zig grammar");
161 }
162 Err(e) => {
163 tracing::warn!("Failed to load embedded Zig grammar: {}", e);
164 }
165 }
166
167 match SyntaxDefinition::load_from_str(GIT_REBASE_GRAMMAR, true, Some("Git Rebase Todo")) {
169 Ok(syntax) => {
170 builder.add(syntax);
171 tracing::debug!("Loaded embedded Git Rebase Todo grammar");
172 }
173 Err(e) => {
174 tracing::warn!("Failed to load embedded Git Rebase Todo grammar: {}", e);
175 }
176 }
177
178 match SyntaxDefinition::load_from_str(GIT_COMMIT_GRAMMAR, true, Some("Git Commit Message"))
180 {
181 Ok(syntax) => {
182 builder.add(syntax);
183 tracing::debug!("Loaded embedded Git Commit Message grammar");
184 }
185 Err(e) => {
186 tracing::warn!("Failed to load embedded Git Commit Message grammar: {}", e);
187 }
188 }
189
190 match SyntaxDefinition::load_from_str(GITIGNORE_GRAMMAR, true, Some("Gitignore")) {
192 Ok(syntax) => {
193 builder.add(syntax);
194 tracing::debug!("Loaded embedded Gitignore grammar");
195 }
196 Err(e) => {
197 tracing::warn!("Failed to load embedded Gitignore grammar: {}", e);
198 }
199 }
200
201 match SyntaxDefinition::load_from_str(GITCONFIG_GRAMMAR, true, Some("Git Config")) {
203 Ok(syntax) => {
204 builder.add(syntax);
205 tracing::debug!("Loaded embedded Git Config grammar");
206 }
207 Err(e) => {
208 tracing::warn!("Failed to load embedded Git Config grammar: {}", e);
209 }
210 }
211
212 match SyntaxDefinition::load_from_str(GITATTRIBUTES_GRAMMAR, true, Some("Git Attributes")) {
214 Ok(syntax) => {
215 builder.add(syntax);
216 tracing::debug!("Loaded embedded Git Attributes grammar");
217 }
218 Err(e) => {
219 tracing::warn!("Failed to load embedded Git Attributes grammar: {}", e);
220 }
221 }
222 }
223
224 pub fn find_syntax_for_file(&self, path: &Path) -> Option<&SyntaxReference> {
232 if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
234 if let Some(scope) = self.user_extensions.get(ext) {
236 tracing::info!("[SYNTAX DEBUG] find_syntax_for_file: found ext '{}' in user_extensions -> scope '{}'", ext, scope);
237 if let Some(syntax) = syntect::parsing::Scope::new(scope)
238 .ok()
239 .and_then(|s| self.syntax_set.find_syntax_by_scope(s))
240 {
241 tracing::info!(
242 "[SYNTAX DEBUG] find_syntax_for_file: found syntax by scope: {}",
243 syntax.name
244 );
245 return Some(syntax);
246 } else {
247 tracing::info!(
248 "[SYNTAX DEBUG] find_syntax_for_file: scope '{}' not found in syntax_set",
249 scope
250 );
251 }
252 } else {
253 tracing::info!(
254 "[SYNTAX DEBUG] find_syntax_for_file: ext '{}' NOT in user_extensions",
255 ext
256 );
257 }
258
259 if let Some(syntax) = self.syntax_set.find_syntax_by_extension(ext) {
261 tracing::info!(
262 "[SYNTAX DEBUG] find_syntax_for_file: found by syntect extension: {}",
263 syntax.name
264 );
265 return Some(syntax);
266 }
267 }
268
269 if let Some(filename) = path.file_name().and_then(|n| n.to_str()) {
271 if let Some(scope) = self.filename_scopes.get(filename) {
272 if let Some(syntax) = syntect::parsing::Scope::new(scope)
273 .ok()
274 .and_then(|s| self.syntax_set.find_syntax_by_scope(s))
275 {
276 return Some(syntax);
277 }
278 }
279 }
280
281 if let Ok(Some(syntax)) = self.syntax_set.find_syntax_for_file(path) {
284 return Some(syntax);
285 }
286
287 tracing::info!(
288 "[SYNTAX DEBUG] find_syntax_for_file: no syntax found for {:?}",
289 path
290 );
291 None
292 }
293
294 pub fn find_syntax_for_file_with_languages(
306 &self,
307 path: &Path,
308 languages: &std::collections::HashMap<String, crate::config::LanguageConfig>,
309 ) -> Option<&SyntaxReference> {
310 let extension = path.extension().and_then(|e| e.to_str());
311 tracing::info!(
312 "[SYNTAX DEBUG] find_syntax_for_file_with_languages: path={:?}, ext={:?}, languages_config_keys={:?}",
313 path,
314 extension,
315 languages.keys().collect::<Vec<_>>()
316 );
317
318 if let Some(filename) = path.file_name().and_then(|f| f.to_str()) {
320 for (lang_name, lang_config) in languages.iter() {
321 if lang_config.filenames.iter().any(|f| f == filename) {
322 tracing::info!(
323 "[SYNTAX DEBUG] filename match: {} -> grammar '{}'",
324 lang_name,
325 lang_config.grammar
326 );
327 if let Some(syntax) = self.find_syntax_by_name(&lang_config.grammar) {
329 tracing::info!(
330 "[SYNTAX DEBUG] found syntax by grammar name: {}",
331 syntax.name
332 );
333 return Some(syntax);
334 }
335 if !lang_config.extensions.is_empty() {
338 if let Some(ext) = lang_config.extensions.first() {
339 if let Some(syntax) = self.syntax_set.find_syntax_by_extension(ext) {
340 tracing::info!(
341 "[SYNTAX DEBUG] found syntax by extension fallback: {}",
342 syntax.name
343 );
344 return Some(syntax);
345 }
346 }
347 }
348 }
349 }
350 }
351
352 if let Some(extension) = extension {
354 for (lang_name, lang_config) in languages.iter() {
355 if lang_config.extensions.iter().any(|ext| ext == extension) {
356 tracing::info!(
357 "[SYNTAX DEBUG] extension match in config: ext={}, lang={}, grammar='{}'",
358 extension,
359 lang_name,
360 lang_config.grammar
361 );
362 if let Some(syntax) = self.find_syntax_by_name(&lang_config.grammar) {
364 tracing::info!(
365 "[SYNTAX DEBUG] found syntax by grammar name: {}",
366 syntax.name
367 );
368 return Some(syntax);
369 } else {
370 tracing::info!(
371 "[SYNTAX DEBUG] grammar name '{}' not found in registry",
372 lang_config.grammar
373 );
374 }
375 }
376 }
377 }
378
379 tracing::info!("[SYNTAX DEBUG] falling back to find_syntax_for_file");
381 let result = self.find_syntax_for_file(path);
382 tracing::info!(
383 "[SYNTAX DEBUG] find_syntax_for_file result: {:?}",
384 result.map(|s| &s.name)
385 );
386 result
387 }
388
389 pub fn find_syntax_by_first_line(&self, first_line: &str) -> Option<&SyntaxReference> {
393 self.syntax_set.find_syntax_by_first_line(first_line)
394 }
395
396 pub fn find_syntax_by_scope(&self, scope: &str) -> Option<&SyntaxReference> {
398 let scope = syntect::parsing::Scope::new(scope).ok()?;
399 self.syntax_set.find_syntax_by_scope(scope)
400 }
401
402 pub fn find_syntax_by_name(&self, name: &str) -> Option<&SyntaxReference> {
407 if let Some(syntax) = self.syntax_set.find_syntax_by_name(name) {
409 return Some(syntax);
410 }
411 let name_lower = name.to_lowercase();
413 self.syntax_set
414 .syntaxes()
415 .iter()
416 .find(|s| s.name.to_lowercase() == name_lower)
417 }
418
419 pub fn syntax_set(&self) -> &Arc<SyntaxSet> {
421 &self.syntax_set
422 }
423
424 pub fn syntax_set_arc(&self) -> Arc<SyntaxSet> {
426 Arc::clone(&self.syntax_set)
427 }
428
429 pub fn available_syntaxes(&self) -> Vec<&str> {
431 self.syntax_set
432 .syntaxes()
433 .iter()
434 .map(|s| s.name.as_str())
435 .collect()
436 }
437
438 pub fn user_extensions_debug(&self) -> String {
440 format!("{:?}", self.user_extensions.keys().collect::<Vec<_>>())
441 }
442
443 pub fn has_syntax_for_extension(&self, ext: &str) -> bool {
445 if self.user_extensions.contains_key(ext) {
446 return true;
447 }
448
449 let dummy_path = PathBuf::from(format!("file.{}", ext));
451 self.syntax_set
452 .find_syntax_for_file(&dummy_path)
453 .ok()
454 .flatten()
455 .is_some()
456 }
457
458 pub fn user_extensions(&self) -> &HashMap<String, String> {
460 &self.user_extensions
461 }
462
463 pub fn filename_scopes(&self) -> &HashMap<String, String> {
465 &self.filename_scopes
466 }
467
468 pub fn with_additional_grammars(
480 base: &GrammarRegistry,
481 additional: &[(String, PathBuf, Vec<String>)],
482 ) -> Option<Self> {
483 tracing::info!(
484 "[SYNTAX DEBUG] with_additional_grammars: adding {} grammars, base has {} user_extensions, {} previously loaded grammars",
485 additional.len(),
486 base.user_extensions.len(),
487 base.loaded_grammar_paths.len()
488 );
489
490 let defaults = SyntaxSet::load_defaults_newlines();
492 let mut builder = defaults.into_builder();
493 Self::add_embedded_grammars(&mut builder);
494
495 let mut user_extensions = HashMap::new();
497
498 let mut loaded_grammar_paths = base.loaded_grammar_paths.clone();
500
501 for (language, path, extensions) in &base.loaded_grammar_paths {
503 tracing::info!(
504 "[SYNTAX DEBUG] reloading existing grammar: lang='{}', path={:?}",
505 language,
506 path
507 );
508 match Self::load_grammar_file(path) {
509 Ok(syntax) => {
510 let scope = syntax.scope.to_string();
511 builder.add(syntax);
512 for ext in extensions {
513 user_extensions.insert(ext.clone(), scope.clone());
514 }
515 }
516 Err(e) => {
517 tracing::warn!(
518 "Failed to reload grammar for '{}' from {:?}: {}",
519 language,
520 path,
521 e
522 );
523 }
524 }
525 }
526
527 for (language, path, extensions) in additional {
529 tracing::info!(
530 "[SYNTAX DEBUG] loading new grammar file: lang='{}', path={:?}, extensions={:?}",
531 language,
532 path,
533 extensions
534 );
535 match Self::load_grammar_file(path) {
536 Ok(syntax) => {
537 let scope = syntax.scope.to_string();
538 tracing::info!(
539 "[SYNTAX DEBUG] grammar loaded successfully: name='{}', scope='{}'",
540 syntax.name,
541 scope
542 );
543 builder.add(syntax);
544 tracing::info!(
545 "Loaded grammar for '{}' from {:?} with extensions {:?}",
546 language,
547 path,
548 extensions
549 );
550 for ext in extensions {
552 user_extensions.insert(ext.clone(), scope.clone());
553 }
554 loaded_grammar_paths.push((language.clone(), path.clone(), extensions.clone()));
556 }
557 Err(e) => {
558 tracing::warn!(
559 "Failed to load grammar for '{}' from {:?}: {}",
560 language,
561 path,
562 e
563 );
564 }
565 }
566 }
567
568 Some(Self {
569 syntax_set: Arc::new(builder.build()),
570 user_extensions,
571 filename_scopes: base.filename_scopes.clone(),
572 loaded_grammar_paths,
573 })
574 }
575
576 fn load_grammar_file(path: &Path) -> Result<SyntaxDefinition, String> {
582 let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
583
584 match ext {
585 "sublime-syntax" => {
586 let content = std::fs::read_to_string(path)
587 .map_err(|e| format!("Failed to read file: {}", e))?;
588 SyntaxDefinition::load_from_str(
589 &content,
590 true,
591 path.file_stem().and_then(|s| s.to_str()),
592 )
593 .map_err(|e| format!("Failed to parse sublime-syntax: {}", e))
594 }
595 _ => Err(format!(
596 "Unsupported grammar format: .{}. Only .sublime-syntax is supported.",
597 ext
598 )),
599 }
600 }
601}
602
603impl Default for GrammarRegistry {
604 fn default() -> Self {
605 let defaults = SyntaxSet::load_defaults_newlines();
607 let mut builder = defaults.into_builder();
608 Self::add_embedded_grammars(&mut builder);
609 let syntax_set = builder.build();
610 let filename_scopes = Self::build_filename_scopes();
611
612 Self::new(syntax_set, HashMap::new(), filename_scopes)
613 }
614}
615
616#[derive(Debug, Deserialize)]
619pub struct PackageManifest {
620 #[serde(default)]
621 pub contributes: Option<Contributes>,
622}
623
624#[derive(Debug, Deserialize, Default)]
625pub struct Contributes {
626 #[serde(default)]
627 pub languages: Vec<LanguageContribution>,
628 #[serde(default)]
629 pub grammars: Vec<GrammarContribution>,
630}
631
632#[derive(Debug, Deserialize)]
633pub struct LanguageContribution {
634 pub id: String,
635 #[serde(default)]
636 pub extensions: Vec<String>,
637}
638
639#[derive(Debug, Deserialize)]
640pub struct GrammarContribution {
641 pub language: String,
642 #[serde(rename = "scopeName")]
643 pub scope_name: String,
644 pub path: String,
645}
646
647#[cfg(test)]
648mod tests {
649 use super::*;
650
651 #[test]
652 fn test_empty_registry() {
653 let registry = GrammarRegistry::empty();
654 assert!(!registry.available_syntaxes().is_empty());
656 }
657
658 #[test]
659 fn test_default_registry() {
660 let registry = GrammarRegistry::default();
661 assert!(!registry.available_syntaxes().is_empty());
663 }
664
665 #[test]
666 fn test_find_syntax_for_common_extensions() {
667 let registry = GrammarRegistry::default();
668
669 let test_cases = [
671 ("test.py", true),
672 ("test.rs", true),
673 ("test.js", true),
674 ("test.json", true),
675 ("test.md", true),
676 ("test.html", true),
677 ("test.css", true),
678 ("test.unknown_extension_xyz", false),
679 ];
680
681 for (filename, should_exist) in test_cases {
682 let path = Path::new(filename);
683 let result = registry.find_syntax_for_file(path);
684 assert_eq!(
685 result.is_some(),
686 should_exist,
687 "Expected {:?} for {}",
688 should_exist,
689 filename
690 );
691 }
692 }
693
694 #[test]
695 fn test_syntax_set_arc() {
696 let registry = GrammarRegistry::default();
697 let arc1 = registry.syntax_set_arc();
698 let arc2 = registry.syntax_set_arc();
699 assert!(Arc::ptr_eq(&arc1, &arc2));
701 }
702
703 #[test]
704 fn test_shell_dotfiles_detection() {
705 let registry = GrammarRegistry::default();
706
707 let shell_files = [".zshrc", ".zprofile", ".zshenv", ".bash_aliases"];
709
710 for filename in shell_files {
711 let path = Path::new(filename);
712 let result = registry.find_syntax_for_file(path);
713 assert!(
714 result.is_some(),
715 "{} should be detected as a syntax",
716 filename
717 );
718 let syntax = result.unwrap();
719 assert!(
721 syntax.name.to_lowercase().contains("bash")
722 || syntax.name.to_lowercase().contains("shell"),
723 "{} should be detected as shell/bash, got: {}",
724 filename,
725 syntax.name
726 );
727 }
728 }
729
730 #[test]
731 fn test_pkgbuild_detection() {
732 let registry = GrammarRegistry::default();
733
734 for filename in ["PKGBUILD", "APKBUILD"] {
736 let path = Path::new(filename);
737 let result = registry.find_syntax_for_file(path);
738 assert!(
739 result.is_some(),
740 "{} should be detected as a syntax",
741 filename
742 );
743 let syntax = result.unwrap();
744 assert!(
746 syntax.name.to_lowercase().contains("bash")
747 || syntax.name.to_lowercase().contains("shell"),
748 "{} should be detected as shell/bash, got: {}",
749 filename,
750 syntax.name
751 );
752 }
753 }
754}