Skip to main content

fresh/primitives/grammar/
loader.rs

1//! Grammar loading with I/O abstraction.
2//!
3//! This module provides the `GrammarLoader` trait for loading grammars from various sources,
4//! and `LocalGrammarLoader` as the default filesystem-based implementation.
5
6use std::collections::HashMap;
7use std::io;
8use std::path::{Path, PathBuf};
9use std::sync::Arc;
10
11use syntect::parsing::{SyntaxSet, SyntaxSetBuilder};
12
13use super::types::{GrammarRegistry, PackageManifest};
14
15/// Trait for loading grammar files from various sources.
16///
17/// This abstraction allows:
18/// - Testing with mock implementations
19/// - WASM builds with fetch-based loaders
20/// - Custom grammar sources (network, embedded, etc.)
21pub trait GrammarLoader: Send + Sync {
22    /// Get the user grammars directory path.
23    fn grammars_dir(&self) -> Option<PathBuf>;
24
25    /// Get the language packages directory path (installed via pkg manager).
26    fn languages_packages_dir(&self) -> Option<PathBuf>;
27
28    /// Read file contents as string.
29    fn read_file(&self, path: &Path) -> io::Result<String>;
30
31    /// List entries in a directory.
32    fn read_dir(&self, path: &Path) -> io::Result<Vec<PathBuf>>;
33
34    /// Check if path exists.
35    fn exists(&self, path: &Path) -> bool;
36
37    /// Check if path is a directory.
38    fn is_dir(&self, path: &Path) -> bool;
39}
40
41/// Default implementation using local filesystem.
42pub struct LocalGrammarLoader {
43    config_dir: Option<PathBuf>,
44}
45
46impl LocalGrammarLoader {
47    /// Create a LocalGrammarLoader with the given config directory.
48    pub fn new(config_dir: PathBuf) -> Self {
49        Self {
50            config_dir: Some(config_dir),
51        }
52    }
53
54    /// Create a LocalGrammarLoader with no config directory (embedded grammars only).
55    pub fn embedded_only() -> Self {
56        Self { config_dir: None }
57    }
58}
59
60impl GrammarLoader for LocalGrammarLoader {
61    fn grammars_dir(&self) -> Option<PathBuf> {
62        self.config_dir.as_ref().map(|p| p.join("grammars"))
63    }
64
65    fn languages_packages_dir(&self) -> Option<PathBuf> {
66        self.config_dir
67            .as_ref()
68            .map(|p| p.join("languages/packages"))
69    }
70
71    fn read_file(&self, path: &Path) -> io::Result<String> {
72        std::fs::read_to_string(path)
73    }
74
75    fn read_dir(&self, path: &Path) -> io::Result<Vec<PathBuf>> {
76        let mut entries = Vec::new();
77        for entry in std::fs::read_dir(path)? {
78            entries.push(entry?.path());
79        }
80        Ok(entries)
81    }
82
83    fn exists(&self, path: &Path) -> bool {
84        path.exists()
85    }
86
87    fn is_dir(&self, path: &Path) -> bool {
88        path.is_dir()
89    }
90}
91
92// Builder/factory methods that use GrammarLoader
93impl GrammarRegistry {
94    /// Load grammar registry using a GrammarLoader.
95    ///
96    /// This loads:
97    /// 1. Built-in syntect grammars
98    /// 2. Embedded grammars (TOML, Odin, etc.)
99    /// 3. User-installed grammars from ~/.config/fresh/grammars/
100    /// 4. Language pack grammars from ~/.config/fresh/languages/packages/
101    pub fn load(loader: &dyn GrammarLoader) -> Self {
102        let mut user_extensions = HashMap::new();
103
104        // Start with syntect defaults, convert to builder to add more
105        let defaults = SyntaxSet::load_defaults_newlines();
106        let mut builder = defaults.into_builder();
107
108        // Add embedded grammars (TOML, etc.)
109        Self::add_embedded_grammars(&mut builder);
110
111        // Add user grammars from ~/.config/fresh/grammars/
112        if let Some(grammars_dir) = loader.grammars_dir() {
113            if loader.exists(&grammars_dir) {
114                load_user_grammars(loader, &grammars_dir, &mut builder, &mut user_extensions);
115            }
116        }
117
118        // Add language pack grammars from ~/.config/fresh/languages/packages/
119        if let Some(packages_dir) = loader.languages_packages_dir() {
120            if loader.exists(&packages_dir) {
121                load_language_pack_grammars(
122                    loader,
123                    &packages_dir,
124                    &mut builder,
125                    &mut user_extensions,
126                );
127            }
128        }
129
130        let syntax_set = builder.build();
131        let filename_scopes = Self::build_filename_scopes();
132
133        tracing::info!(
134            "Loaded {} syntaxes, {} user extension mappings, {} filename mappings",
135            syntax_set.syntaxes().len(),
136            user_extensions.len(),
137            filename_scopes.len()
138        );
139
140        Self::new(syntax_set, user_extensions, filename_scopes)
141    }
142
143    /// Create a fully-loaded grammar registry for the editor.
144    /// Uses LocalGrammarLoader to load grammars from the filesystem.
145    pub fn for_editor(config_dir: std::path::PathBuf) -> Arc<Self> {
146        Arc::new(Self::load(&LocalGrammarLoader::new(config_dir)))
147    }
148
149    /// Get the grammars directory path for the given config directory.
150    pub fn grammars_directory(config_dir: &std::path::Path) -> PathBuf {
151        config_dir.join("grammars")
152    }
153}
154
155/// Load user grammars from a directory using the provided loader.
156fn load_user_grammars(
157    loader: &dyn GrammarLoader,
158    dir: &Path,
159    builder: &mut SyntaxSetBuilder,
160    user_extensions: &mut HashMap<String, String>,
161) {
162    // Iterate through subdirectories looking for package.json or direct grammar files
163    let entries = match loader.read_dir(dir) {
164        Ok(entries) => entries,
165        Err(e) => {
166            tracing::warn!("Failed to read grammars directory {:?}: {}", dir, e);
167            return;
168        }
169    };
170
171    for path in entries {
172        if !loader.is_dir(&path) {
173            continue;
174        }
175
176        // Check for package.json (VSCode extension format)
177        let manifest_path = path.join("package.json");
178        if loader.exists(&manifest_path) {
179            if let Ok(manifest) = parse_package_json(loader, &manifest_path) {
180                process_manifest(loader, &path, manifest, builder, user_extensions);
181            }
182            continue;
183        }
184
185        // Check for direct grammar files
186        let mut found_any = false;
187        load_direct_grammar(loader, &path, builder, &mut found_any);
188    }
189}
190
191/// Parse a VSCode package.json manifest using the loader.
192fn parse_package_json(loader: &dyn GrammarLoader, path: &Path) -> Result<PackageManifest, String> {
193    let content = loader
194        .read_file(path)
195        .map_err(|e| format!("Failed to read file: {}", e))?;
196
197    serde_json::from_str(&content).map_err(|e| format!("Failed to parse JSON: {}", e))
198}
199
200/// Process a package manifest and load its grammars.
201fn process_manifest(
202    loader: &dyn GrammarLoader,
203    package_dir: &Path,
204    manifest: PackageManifest,
205    builder: &mut SyntaxSetBuilder,
206    user_extensions: &mut HashMap<String, String>,
207) {
208    let contributes = match manifest.contributes {
209        Some(c) => c,
210        None => return,
211    };
212
213    // Build language ID -> extensions mapping
214    let mut lang_extensions: HashMap<String, Vec<String>> = HashMap::new();
215    for lang in &contributes.languages {
216        lang_extensions.insert(lang.id.clone(), lang.extensions.clone());
217    }
218
219    // Process each grammar
220    for grammar in &contributes.grammars {
221        let grammar_path = package_dir.join(&grammar.path);
222
223        if !loader.exists(&grammar_path) {
224            tracing::warn!("Grammar file not found: {:?}", grammar_path);
225            continue;
226        }
227
228        // Try to load the grammar
229        let grammar_dir = grammar_path.parent().unwrap_or(package_dir);
230        if let Err(e) = builder.add_from_folder(grammar_dir, false) {
231            tracing::warn!("Failed to load grammar {:?}: {}", grammar_path, e);
232            continue;
233        }
234
235        tracing::info!(
236            "Loaded grammar {} from {:?}",
237            grammar.scope_name,
238            grammar_path
239        );
240
241        // Map extensions to scope name
242        if let Some(extensions) = lang_extensions.get(&grammar.language) {
243            for ext in extensions {
244                let ext_clean = ext.trim_start_matches('.');
245                user_extensions.insert(ext_clean.to_string(), grammar.scope_name.clone());
246                tracing::debug!("Mapped extension .{} to {}", ext_clean, grammar.scope_name);
247            }
248        }
249    }
250}
251
252/// Load a grammar directly from a .sublime-syntax or .tmLanguage file.
253fn load_direct_grammar(
254    loader: &dyn GrammarLoader,
255    dir: &Path,
256    builder: &mut SyntaxSetBuilder,
257    found_any: &mut bool,
258) {
259    // Look for .sublime-syntax or .tmLanguage files
260    let entries = match loader.read_dir(dir) {
261        Ok(e) => e,
262        Err(_) => return,
263    };
264
265    for path in entries {
266        let file_name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
267
268        if file_name.ends_with(".tmLanguage") || file_name.ends_with(".sublime-syntax") {
269            if let Err(e) = builder.add_from_folder(dir, false) {
270                tracing::warn!("Failed to load grammar from {:?}: {}", dir, e);
271            } else {
272                tracing::info!("Loaded grammar from {:?}", dir);
273                *found_any = true;
274            }
275            break;
276        }
277    }
278}
279
280/// Fresh-specific language pack manifest format
281#[derive(Debug, serde::Deserialize)]
282struct FreshPackageManifest {
283    name: String,
284    #[serde(default)]
285    fresh: Option<FreshConfig>,
286}
287
288#[derive(Debug, serde::Deserialize)]
289struct FreshConfig {
290    #[serde(default)]
291    grammar: Option<FreshGrammarConfig>,
292}
293
294#[derive(Debug, serde::Deserialize)]
295struct FreshGrammarConfig {
296    file: String,
297    #[serde(default)]
298    extensions: Vec<String>,
299}
300
301/// Load grammars from Fresh language packages (installed via pkg manager).
302///
303/// These packages use a Fresh-specific package.json format with:
304/// ```json
305/// {
306///   "name": "hare",
307///   "fresh": {
308///     "grammar": {
309///       "file": "grammars/Hare.sublime-syntax",
310///       "extensions": ["ha"]
311///     }
312///   }
313/// }
314/// ```
315fn load_language_pack_grammars(
316    loader: &dyn GrammarLoader,
317    packages_dir: &Path,
318    builder: &mut SyntaxSetBuilder,
319    user_extensions: &mut HashMap<String, String>,
320) {
321    let entries = match loader.read_dir(packages_dir) {
322        Ok(entries) => entries,
323        Err(e) => {
324            tracing::debug!(
325                "Failed to read language packages directory {:?}: {}",
326                packages_dir,
327                e
328            );
329            return;
330        }
331    };
332
333    for package_path in entries {
334        if !loader.is_dir(&package_path) {
335            continue;
336        }
337
338        let manifest_path = package_path.join("package.json");
339        if !loader.exists(&manifest_path) {
340            continue;
341        }
342
343        // Try to parse as Fresh language pack format
344        let content = match loader.read_file(&manifest_path) {
345            Ok(c) => c,
346            Err(e) => {
347                tracing::debug!("Failed to read {:?}: {}", manifest_path, e);
348                continue;
349            }
350        };
351
352        let manifest: FreshPackageManifest = match serde_json::from_str(&content) {
353            Ok(m) => m,
354            Err(e) => {
355                tracing::debug!("Failed to parse {:?}: {}", manifest_path, e);
356                continue;
357            }
358        };
359
360        // Check for Fresh grammar config
361        let grammar_config = match manifest.fresh.and_then(|f| f.grammar) {
362            Some(g) => g,
363            None => continue,
364        };
365
366        let grammar_path = package_path.join(&grammar_config.file);
367        if !loader.exists(&grammar_path) {
368            tracing::warn!(
369                "Grammar file not found for language pack '{}': {:?}",
370                manifest.name,
371                grammar_path
372            );
373            continue;
374        }
375
376        // Load the grammar file
377        let content = match loader.read_file(&grammar_path) {
378            Ok(c) => c,
379            Err(e) => {
380                tracing::warn!("Failed to read grammar file {:?}: {}", grammar_path, e);
381                continue;
382            }
383        };
384
385        // Parse and add the syntax
386        match syntect::parsing::SyntaxDefinition::load_from_str(
387            &content,
388            true,
389            grammar_path.file_stem().and_then(|s| s.to_str()),
390        ) {
391            Ok(syntax) => {
392                let scope = syntax.scope.to_string();
393                tracing::info!(
394                    "Loaded language pack grammar '{}' from {:?} (scope: {}, extensions: {:?})",
395                    manifest.name,
396                    grammar_path,
397                    scope,
398                    grammar_config.extensions
399                );
400                builder.add(syntax);
401
402                // Map extensions to scope
403                for ext in &grammar_config.extensions {
404                    let ext_clean = ext.trim_start_matches('.');
405                    user_extensions.insert(ext_clean.to_string(), scope.clone());
406                }
407            }
408            Err(e) => {
409                tracing::warn!(
410                    "Failed to parse grammar for language pack '{}': {}",
411                    manifest.name,
412                    e
413                );
414            }
415        }
416    }
417}
418
419#[cfg(test)]
420mod tests {
421    use super::*;
422
423    /// Mock grammar loader for testing
424    struct MockGrammarLoader {
425        grammars_dir: Option<PathBuf>,
426        files: HashMap<PathBuf, String>,
427        dirs: HashMap<PathBuf, Vec<PathBuf>>,
428    }
429
430    impl MockGrammarLoader {
431        fn new() -> Self {
432            Self {
433                grammars_dir: None,
434                files: HashMap::new(),
435                dirs: HashMap::new(),
436            }
437        }
438
439        #[allow(dead_code)]
440        fn with_grammars_dir(mut self, dir: PathBuf) -> Self {
441            self.grammars_dir = Some(dir);
442            self
443        }
444    }
445
446    impl GrammarLoader for MockGrammarLoader {
447        fn grammars_dir(&self) -> Option<PathBuf> {
448            self.grammars_dir.clone()
449        }
450
451        fn languages_packages_dir(&self) -> Option<PathBuf> {
452            None // Not used in current tests
453        }
454
455        fn read_file(&self, path: &Path) -> io::Result<String> {
456            self.files
457                .get(path)
458                .cloned()
459                .ok_or_else(|| io::Error::new(io::ErrorKind::NotFound, "File not found"))
460        }
461
462        fn read_dir(&self, path: &Path) -> io::Result<Vec<PathBuf>> {
463            self.dirs
464                .get(path)
465                .cloned()
466                .ok_or_else(|| io::Error::new(io::ErrorKind::NotFound, "Directory not found"))
467        }
468
469        fn exists(&self, path: &Path) -> bool {
470            self.files.contains_key(path) || self.dirs.contains_key(path)
471        }
472
473        fn is_dir(&self, path: &Path) -> bool {
474            self.dirs.contains_key(path)
475        }
476    }
477
478    #[test]
479    fn test_mock_loader_no_grammars() {
480        let loader = MockGrammarLoader::new();
481        let registry = GrammarRegistry::load(&loader);
482
483        // Should still have built-in syntaxes
484        assert!(!registry.available_syntaxes().is_empty());
485    }
486
487    #[test]
488    fn test_local_loader_grammars_dir() {
489        let temp_dir = tempfile::tempdir().unwrap();
490        let config_dir = temp_dir.path().to_path_buf();
491        let loader = LocalGrammarLoader::new(config_dir.clone());
492        let grammars_dir = loader.grammars_dir();
493
494        // Should return the grammars subdirectory
495        assert!(grammars_dir.is_some());
496        let dir = grammars_dir.unwrap();
497        assert_eq!(dir, config_dir.join("grammars"));
498    }
499
500    #[test]
501    fn test_for_editor() {
502        let temp_dir = tempfile::tempdir().unwrap();
503        let config_dir = temp_dir.path().to_path_buf();
504        let registry = GrammarRegistry::for_editor(config_dir);
505        // Should have built-in syntaxes
506        assert!(!registry.available_syntaxes().is_empty());
507    }
508
509    #[test]
510    fn test_find_syntax_with_custom_languages_config() {
511        let temp_dir = tempfile::tempdir().unwrap();
512        let registry = GrammarRegistry::for_editor(temp_dir.path().to_path_buf());
513
514        // Create a custom languages config that maps "custom.myext" files to bash
515        let mut languages = std::collections::HashMap::new();
516        languages.insert(
517            "bash".to_string(),
518            crate::config::LanguageConfig {
519                extensions: vec!["myext".to_string()],
520                filenames: vec!["CUSTOMBUILD".to_string()],
521                grammar: "Bourne Again Shell (bash)".to_string(),
522                comment_prefix: Some("#".to_string()),
523                auto_indent: true,
524                auto_close: None,
525                auto_surround: None,
526                highlighter: crate::config::HighlighterPreference::Auto,
527                textmate_grammar: None,
528                show_whitespace_tabs: true,
529                use_tabs: false,
530                tab_size: None,
531                formatter: None,
532                format_on_save: false,
533                on_save: vec![],
534            },
535        );
536
537        // Test that custom filename is detected via languages config
538        let path = Path::new("CUSTOMBUILD");
539        let result = registry.find_syntax_for_file_with_languages(path, &languages);
540        assert!(
541            result.is_some(),
542            "CUSTOMBUILD should be detected via languages config"
543        );
544        let syntax = result.unwrap();
545        assert!(
546            syntax.name.to_lowercase().contains("bash")
547                || syntax.name.to_lowercase().contains("shell"),
548            "CUSTOMBUILD should be detected as shell/bash, got: {}",
549            syntax.name
550        );
551
552        // Test that custom extension is detected via languages config
553        let path = Path::new("script.myext");
554        let result = registry.find_syntax_for_file_with_languages(path, &languages);
555        assert!(
556            result.is_some(),
557            "script.myext should be detected via languages config"
558        );
559        let syntax = result.unwrap();
560        assert!(
561            syntax.name.to_lowercase().contains("bash")
562                || syntax.name.to_lowercase().contains("shell"),
563            "script.myext should be detected as shell/bash, got: {}",
564            syntax.name
565        );
566    }
567
568    #[test]
569    fn test_list_all_syntaxes() {
570        let temp_dir = tempfile::tempdir().unwrap();
571        let registry = GrammarRegistry::for_editor(temp_dir.path().to_path_buf());
572        let syntax_set = registry.syntax_set();
573
574        let mut syntaxes: Vec<_> = syntax_set
575            .syntaxes()
576            .iter()
577            .map(|s| (s.name.as_str(), s.file_extensions.clone()))
578            .collect();
579        syntaxes.sort_by(|a, b| a.0.cmp(b.0));
580
581        println!("\n=== Available Syntaxes ({} total) ===", syntaxes.len());
582        for (name, exts) in &syntaxes {
583            println!("  {} -> {:?}", name, exts);
584        }
585
586        // Check TypeScript specifically
587        println!("\n=== TypeScript Check ===");
588        let ts_syntax = syntax_set.find_syntax_by_extension("ts");
589        let tsx_syntax = syntax_set.find_syntax_by_extension("tsx");
590        println!("  .ts  -> {:?}", ts_syntax.map(|s| &s.name));
591        println!("  .tsx -> {:?}", tsx_syntax.map(|s| &s.name));
592
593        // This test always passes - it's for dumping info
594        assert!(!syntaxes.is_empty());
595    }
596}