fresh/primitives/grammar/
types.rs

1//! Pure grammar registry types without I/O operations.
2//!
3//! This module contains the `GrammarRegistry` struct and all syntax lookup methods
4//! that don't require filesystem access. This enables WASM compatibility and easier testing.
5
6use serde::Deserialize;
7use std::collections::HashMap;
8use std::path::{Path, PathBuf};
9use std::sync::Arc;
10use syntect::parsing::{SyntaxDefinition, SyntaxReference, SyntaxSet, SyntaxSetBuilder};
11
12/// Embedded TOML grammar (syntect doesn't include one)
13pub const TOML_GRAMMAR: &str = include_str!("../../grammars/toml.sublime-syntax");
14
15/// Embedded Odin grammar (syntect doesn't include one)
16/// From: https://github.com/Tetralux/sublime-odin (MIT License)
17pub const ODIN_GRAMMAR: &str = include_str!("../../grammars/odin/Odin.sublime-syntax");
18
19/// Embedded Zig grammar (syntect doesn't include one)
20pub const ZIG_GRAMMAR: &str = include_str!("../../grammars/zig.sublime-syntax");
21
22/// Embedded Git Rebase Todo grammar for interactive rebase
23pub const GIT_REBASE_GRAMMAR: &str = include_str!("../../grammars/git-rebase.sublime-syntax");
24
25/// Embedded Git Commit Message grammar for COMMIT_EDITMSG, MERGE_MSG, etc.
26pub const GIT_COMMIT_GRAMMAR: &str = include_str!("../../grammars/git-commit.sublime-syntax");
27
28/// Embedded Gitignore grammar for .gitignore and similar files
29pub const GITIGNORE_GRAMMAR: &str = include_str!("../../grammars/gitignore.sublime-syntax");
30
31/// Embedded Git Config grammar for .gitconfig, .gitmodules
32pub const GITCONFIG_GRAMMAR: &str = include_str!("../../grammars/gitconfig.sublime-syntax");
33
34/// Embedded Git Attributes grammar for .gitattributes
35pub const GITATTRIBUTES_GRAMMAR: &str = include_str!("../../grammars/gitattributes.sublime-syntax");
36
37/// Registry of all available TextMate grammars.
38///
39/// This struct holds the compiled syntax set and provides lookup methods.
40/// It does not perform I/O directly - use `GrammarLoader` for loading grammars.
41pub struct GrammarRegistry {
42    /// Combined syntax set (built-in + embedded + user grammars)
43    syntax_set: Arc<SyntaxSet>,
44    /// Extension -> scope name mapping for user grammars (takes priority)
45    user_extensions: HashMap<String, String>,
46    /// Filename -> scope name mapping for dotfiles and special files
47    filename_scopes: HashMap<String, String>,
48}
49
50impl GrammarRegistry {
51    /// Create a new GrammarRegistry from pre-built components.
52    ///
53    /// This is typically called by `GrammarLoader` implementations after
54    /// loading grammars from various sources.
55    pub fn new(
56        syntax_set: SyntaxSet,
57        user_extensions: HashMap<String, String>,
58        filename_scopes: HashMap<String, String>,
59    ) -> Self {
60        Self {
61            syntax_set: Arc::new(syntax_set),
62            user_extensions,
63            filename_scopes,
64        }
65    }
66
67    /// Create an empty grammar registry (fast, for tests that don't need syntax highlighting)
68    pub fn empty() -> Arc<Self> {
69        let mut builder = SyntaxSetBuilder::new();
70        builder.add_plain_text_syntax();
71        Arc::new(Self {
72            syntax_set: Arc::new(builder.build()),
73            user_extensions: HashMap::new(),
74            filename_scopes: HashMap::new(),
75        })
76    }
77
78    /// Build the default filename -> scope mappings for dotfiles and special files.
79    pub fn build_filename_scopes() -> HashMap<String, String> {
80        let mut map = HashMap::new();
81
82        // Shell configuration files -> Bash/Shell script scope
83        let shell_scope = "source.shell.bash".to_string();
84        for filename in [
85            ".zshrc",
86            ".zprofile",
87            ".zshenv",
88            ".zlogin",
89            ".zlogout",
90            ".bash_aliases",
91            // .bashrc and .bash_profile are already recognized by syntect
92            // Common shell script files without extensions
93            "PKGBUILD",
94            "APKBUILD",
95        ] {
96            map.insert(filename.to_string(), shell_scope.clone());
97        }
98
99        // Git rebase todo files
100        let git_rebase_scope = "source.git-rebase-todo".to_string();
101        map.insert("git-rebase-todo".to_string(), git_rebase_scope);
102
103        // Git commit message files
104        let git_commit_scope = "source.git-commit".to_string();
105        for filename in ["COMMIT_EDITMSG", "MERGE_MSG", "SQUASH_MSG", "TAG_EDITMSG"] {
106            map.insert(filename.to_string(), git_commit_scope.clone());
107        }
108
109        // Gitignore and similar files
110        let gitignore_scope = "source.gitignore".to_string();
111        for filename in [".gitignore", ".dockerignore", ".npmignore", ".hgignore"] {
112            map.insert(filename.to_string(), gitignore_scope.clone());
113        }
114
115        // Git config files
116        let gitconfig_scope = "source.gitconfig".to_string();
117        for filename in [".gitconfig", ".gitmodules"] {
118            map.insert(filename.to_string(), gitconfig_scope.clone());
119        }
120
121        // Git attributes files
122        let gitattributes_scope = "source.gitattributes".to_string();
123        map.insert(".gitattributes".to_string(), gitattributes_scope);
124
125        map
126    }
127
128    /// Add embedded grammars (TOML, Odin, etc.) to a syntax set builder.
129    pub fn add_embedded_grammars(builder: &mut SyntaxSetBuilder) {
130        // TOML grammar
131        match SyntaxDefinition::load_from_str(TOML_GRAMMAR, true, Some("TOML")) {
132            Ok(syntax) => {
133                builder.add(syntax);
134                tracing::debug!("Loaded embedded TOML grammar");
135            }
136            Err(e) => {
137                tracing::warn!("Failed to load embedded TOML grammar: {}", e);
138            }
139        }
140
141        // Odin grammar
142        match SyntaxDefinition::load_from_str(ODIN_GRAMMAR, true, Some("Odin")) {
143            Ok(syntax) => {
144                builder.add(syntax);
145                tracing::debug!("Loaded embedded Odin grammar");
146            }
147            Err(e) => {
148                tracing::warn!("Failed to load embedded Odin grammar: {}", e);
149            }
150        }
151
152        // Zig grammar
153        match SyntaxDefinition::load_from_str(ZIG_GRAMMAR, true, Some("Zig")) {
154            Ok(syntax) => {
155                builder.add(syntax);
156                tracing::debug!("Loaded embedded Zig grammar");
157            }
158            Err(e) => {
159                tracing::warn!("Failed to load embedded Zig grammar: {}", e);
160            }
161        }
162
163        // Git Rebase Todo grammar
164        match SyntaxDefinition::load_from_str(GIT_REBASE_GRAMMAR, true, Some("Git Rebase Todo")) {
165            Ok(syntax) => {
166                builder.add(syntax);
167                tracing::debug!("Loaded embedded Git Rebase Todo grammar");
168            }
169            Err(e) => {
170                tracing::warn!("Failed to load embedded Git Rebase Todo grammar: {}", e);
171            }
172        }
173
174        // Git Commit Message grammar
175        match SyntaxDefinition::load_from_str(GIT_COMMIT_GRAMMAR, true, Some("Git Commit Message"))
176        {
177            Ok(syntax) => {
178                builder.add(syntax);
179                tracing::debug!("Loaded embedded Git Commit Message grammar");
180            }
181            Err(e) => {
182                tracing::warn!("Failed to load embedded Git Commit Message grammar: {}", e);
183            }
184        }
185
186        // Gitignore grammar
187        match SyntaxDefinition::load_from_str(GITIGNORE_GRAMMAR, true, Some("Gitignore")) {
188            Ok(syntax) => {
189                builder.add(syntax);
190                tracing::debug!("Loaded embedded Gitignore grammar");
191            }
192            Err(e) => {
193                tracing::warn!("Failed to load embedded Gitignore grammar: {}", e);
194            }
195        }
196
197        // Git Config grammar
198        match SyntaxDefinition::load_from_str(GITCONFIG_GRAMMAR, true, Some("Git Config")) {
199            Ok(syntax) => {
200                builder.add(syntax);
201                tracing::debug!("Loaded embedded Git Config grammar");
202            }
203            Err(e) => {
204                tracing::warn!("Failed to load embedded Git Config grammar: {}", e);
205            }
206        }
207
208        // Git Attributes grammar
209        match SyntaxDefinition::load_from_str(GITATTRIBUTES_GRAMMAR, true, Some("Git Attributes")) {
210            Ok(syntax) => {
211                builder.add(syntax);
212                tracing::debug!("Loaded embedded Git Attributes grammar");
213            }
214            Err(e) => {
215                tracing::warn!("Failed to load embedded Git Attributes grammar: {}", e);
216            }
217        }
218    }
219
220    /// Find syntax for a file by path/extension/filename.
221    ///
222    /// Checks in order:
223    /// 1. User-configured grammar extensions (by scope)
224    /// 2. By extension (includes built-in + embedded grammars)
225    /// 3. By filename (custom dotfile mappings like .zshrc)
226    /// 4. By filename via syntect (handles Makefile, .bashrc, etc.)
227    pub fn find_syntax_for_file(&self, path: &Path) -> Option<&SyntaxReference> {
228        // Try extension-based lookup first
229        if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
230            // Check user grammars first (higher priority)
231            if let Some(scope) = self.user_extensions.get(ext) {
232                if let Some(syntax) = syntect::parsing::Scope::new(scope)
233                    .ok()
234                    .and_then(|s| self.syntax_set.find_syntax_by_scope(s))
235                {
236                    return Some(syntax);
237                }
238            }
239
240            // Try extension lookup (includes embedded grammars like TOML)
241            if let Some(syntax) = self.syntax_set.find_syntax_by_extension(ext) {
242                return Some(syntax);
243            }
244        }
245
246        // Try filename-based lookup for dotfiles and special files
247        if let Some(filename) = path.file_name().and_then(|n| n.to_str()) {
248            if let Some(scope) = self.filename_scopes.get(filename) {
249                if let Some(syntax) = syntect::parsing::Scope::new(scope)
250                    .ok()
251                    .and_then(|s| self.syntax_set.find_syntax_by_scope(s))
252                {
253                    return Some(syntax);
254                }
255            }
256        }
257
258        // Try syntect's full file detection (handles special filenames like Makefile)
259        // This may do I/O for first-line detection, but handles many cases
260        if let Ok(Some(syntax)) = self.syntax_set.find_syntax_for_file(path) {
261            return Some(syntax);
262        }
263
264        None
265    }
266
267    /// Find syntax for a file, checking user-configured languages first.
268    ///
269    /// This method extends `find_syntax_for_file` by first checking the provided
270    /// languages configuration for filename and extension matches. This allows
271    /// users to configure custom filename patterns (like PKGBUILD for bash) that
272    /// will be respected for syntax highlighting.
273    ///
274    /// Checks in order:
275    /// 1. User-configured language filenames from config
276    /// 2. User-configured language extensions from config
277    /// 3. Falls back to `find_syntax_for_file` for built-in detection
278    pub fn find_syntax_for_file_with_languages(
279        &self,
280        path: &Path,
281        languages: &std::collections::HashMap<String, crate::config::LanguageConfig>,
282    ) -> Option<&SyntaxReference> {
283        // Try filename match from languages config first
284        if let Some(filename) = path.file_name().and_then(|f| f.to_str()) {
285            for lang_config in languages.values() {
286                if lang_config.filenames.iter().any(|f| f == filename) {
287                    // Found a match - try to find syntax by grammar name
288                    if let Some(syntax) = self.find_syntax_by_name(&lang_config.grammar) {
289                        return Some(syntax);
290                    }
291                    // Also try finding by extension if grammar name didn't work
292                    // (some grammars are named differently)
293                    if !lang_config.extensions.is_empty() {
294                        if let Some(ext) = lang_config.extensions.first() {
295                            if let Some(syntax) = self.syntax_set.find_syntax_by_extension(ext) {
296                                return Some(syntax);
297                            }
298                        }
299                    }
300                }
301            }
302        }
303
304        // Try extension match from languages config
305        if let Some(extension) = path.extension().and_then(|e| e.to_str()) {
306            for lang_config in languages.values() {
307                if lang_config.extensions.iter().any(|ext| ext == extension) {
308                    // Found a match - try to find syntax by grammar name
309                    if let Some(syntax) = self.find_syntax_by_name(&lang_config.grammar) {
310                        return Some(syntax);
311                    }
312                }
313            }
314        }
315
316        // Fall back to built-in detection
317        self.find_syntax_for_file(path)
318    }
319
320    /// Find syntax by first line content (shebang, mode line, etc.)
321    ///
322    /// Use this when you have the file content but path-based detection failed.
323    pub fn find_syntax_by_first_line(&self, first_line: &str) -> Option<&SyntaxReference> {
324        self.syntax_set.find_syntax_by_first_line(first_line)
325    }
326
327    /// Find syntax by scope name
328    pub fn find_syntax_by_scope(&self, scope: &str) -> Option<&SyntaxReference> {
329        let scope = syntect::parsing::Scope::new(scope).ok()?;
330        self.syntax_set.find_syntax_by_scope(scope)
331    }
332
333    /// Find syntax by name (case-insensitive)
334    ///
335    /// This allows config files to use lowercase grammar names like "go" while
336    /// matching syntect's actual names like "Go".
337    pub fn find_syntax_by_name(&self, name: &str) -> Option<&SyntaxReference> {
338        // Try exact match first
339        if let Some(syntax) = self.syntax_set.find_syntax_by_name(name) {
340            return Some(syntax);
341        }
342        // Fall back to case-insensitive match
343        let name_lower = name.to_lowercase();
344        self.syntax_set
345            .syntaxes()
346            .iter()
347            .find(|s| s.name.to_lowercase() == name_lower)
348    }
349
350    /// Get the underlying syntax set
351    pub fn syntax_set(&self) -> &Arc<SyntaxSet> {
352        &self.syntax_set
353    }
354
355    /// Get a clone of the Arc for sharing
356    pub fn syntax_set_arc(&self) -> Arc<SyntaxSet> {
357        Arc::clone(&self.syntax_set)
358    }
359
360    /// List all available syntax names
361    pub fn available_syntaxes(&self) -> Vec<&str> {
362        self.syntax_set
363            .syntaxes()
364            .iter()
365            .map(|s| s.name.as_str())
366            .collect()
367    }
368
369    /// Check if a syntax is available for an extension
370    pub fn has_syntax_for_extension(&self, ext: &str) -> bool {
371        if self.user_extensions.contains_key(ext) {
372            return true;
373        }
374
375        // Check built-in syntaxes
376        let dummy_path = PathBuf::from(format!("file.{}", ext));
377        self.syntax_set
378            .find_syntax_for_file(&dummy_path)
379            .ok()
380            .flatten()
381            .is_some()
382    }
383
384    /// Get the user extensions mapping (extension -> scope name)
385    pub fn user_extensions(&self) -> &HashMap<String, String> {
386        &self.user_extensions
387    }
388
389    /// Get the filename scopes mapping (filename -> scope name)
390    pub fn filename_scopes(&self) -> &HashMap<String, String> {
391        &self.filename_scopes
392    }
393}
394
395impl Default for GrammarRegistry {
396    fn default() -> Self {
397        // Create with defaults and embedded grammars only (no user grammars)
398        let defaults = SyntaxSet::load_defaults_newlines();
399        let mut builder = defaults.into_builder();
400        Self::add_embedded_grammars(&mut builder);
401        let syntax_set = builder.build();
402        let filename_scopes = Self::build_filename_scopes();
403
404        Self::new(syntax_set, HashMap::new(), filename_scopes)
405    }
406}
407
408// VSCode package.json structures for parsing grammar manifests
409
410#[derive(Debug, Deserialize)]
411pub struct PackageManifest {
412    #[serde(default)]
413    pub contributes: Option<Contributes>,
414}
415
416#[derive(Debug, Deserialize, Default)]
417pub struct Contributes {
418    #[serde(default)]
419    pub languages: Vec<LanguageContribution>,
420    #[serde(default)]
421    pub grammars: Vec<GrammarContribution>,
422}
423
424#[derive(Debug, Deserialize)]
425pub struct LanguageContribution {
426    pub id: String,
427    #[serde(default)]
428    pub extensions: Vec<String>,
429}
430
431#[derive(Debug, Deserialize)]
432pub struct GrammarContribution {
433    pub language: String,
434    #[serde(rename = "scopeName")]
435    pub scope_name: String,
436    pub path: String,
437}
438
439#[cfg(test)]
440mod tests {
441    use super::*;
442
443    #[test]
444    fn test_empty_registry() {
445        let registry = GrammarRegistry::empty();
446        // Should have at least plain text
447        assert!(!registry.available_syntaxes().is_empty());
448    }
449
450    #[test]
451    fn test_default_registry() {
452        let registry = GrammarRegistry::default();
453        // Should have built-in syntaxes
454        assert!(!registry.available_syntaxes().is_empty());
455    }
456
457    #[test]
458    fn test_find_syntax_for_common_extensions() {
459        let registry = GrammarRegistry::default();
460
461        // Test common extensions that syntect should support
462        let test_cases = [
463            ("test.py", true),
464            ("test.rs", true),
465            ("test.js", true),
466            ("test.json", true),
467            ("test.md", true),
468            ("test.html", true),
469            ("test.css", true),
470            ("test.unknown_extension_xyz", false),
471        ];
472
473        for (filename, should_exist) in test_cases {
474            let path = Path::new(filename);
475            let result = registry.find_syntax_for_file(path);
476            assert_eq!(
477                result.is_some(),
478                should_exist,
479                "Expected {:?} for {}",
480                should_exist,
481                filename
482            );
483        }
484    }
485
486    #[test]
487    fn test_syntax_set_arc() {
488        let registry = GrammarRegistry::default();
489        let arc1 = registry.syntax_set_arc();
490        let arc2 = registry.syntax_set_arc();
491        // Both should point to the same data
492        assert!(Arc::ptr_eq(&arc1, &arc2));
493    }
494
495    #[test]
496    fn test_shell_dotfiles_detection() {
497        let registry = GrammarRegistry::default();
498
499        // All these should be detected as shell scripts
500        let shell_files = [".zshrc", ".zprofile", ".zshenv", ".bash_aliases"];
501
502        for filename in shell_files {
503            let path = Path::new(filename);
504            let result = registry.find_syntax_for_file(path);
505            assert!(
506                result.is_some(),
507                "{} should be detected as a syntax",
508                filename
509            );
510            let syntax = result.unwrap();
511            // Should be detected as Bash/Shell
512            assert!(
513                syntax.name.to_lowercase().contains("bash")
514                    || syntax.name.to_lowercase().contains("shell"),
515                "{} should be detected as shell/bash, got: {}",
516                filename,
517                syntax.name
518            );
519        }
520    }
521
522    #[test]
523    fn test_pkgbuild_detection() {
524        let registry = GrammarRegistry::default();
525
526        // PKGBUILD and APKBUILD should be detected as shell scripts
527        for filename in ["PKGBUILD", "APKBUILD"] {
528            let path = Path::new(filename);
529            let result = registry.find_syntax_for_file(path);
530            assert!(
531                result.is_some(),
532                "{} should be detected as a syntax",
533                filename
534            );
535            let syntax = result.unwrap();
536            // Should be detected as Bash/Shell
537            assert!(
538                syntax.name.to_lowercase().contains("bash")
539                    || syntax.name.to_lowercase().contains("shell"),
540                "{} should be detected as shell/bash, got: {}",
541                filename,
542                syntax.name
543            );
544        }
545    }
546}