use serde::Deserialize;
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use syntect::parsing::{SyntaxDefinition, SyntaxReference, SyntaxSet, SyntaxSetBuilder};
const TOML_GRAMMAR: &str = include_str!("../grammars/toml.sublime-syntax");
pub struct GrammarRegistry {
syntax_set: Arc<SyntaxSet>,
user_extensions: HashMap<String, String>,
filename_scopes: HashMap<String, String>,
}
impl GrammarRegistry {
pub fn for_editor() -> Arc<Self> {
Arc::new(Self::load())
}
pub fn load() -> Self {
let mut user_extensions = HashMap::new();
let defaults = SyntaxSet::load_defaults_newlines();
let mut builder = defaults.into_builder();
Self::add_embedded_grammars(&mut builder);
if let Some(grammars_dir) = Self::grammars_directory() {
if grammars_dir.exists() {
Self::load_user_grammars_into(&grammars_dir, &mut builder, &mut user_extensions);
}
}
let syntax_set = builder.build();
let filename_scopes = Self::build_filename_scopes();
tracing::info!(
"Loaded {} syntaxes, {} user extension mappings, {} filename mappings",
syntax_set.syntaxes().len(),
user_extensions.len(),
filename_scopes.len()
);
Self {
syntax_set: Arc::new(syntax_set),
user_extensions,
filename_scopes,
}
}
pub fn empty() -> Arc<Self> {
let mut builder = SyntaxSetBuilder::new();
builder.add_plain_text_syntax();
Arc::new(Self {
syntax_set: Arc::new(builder.build()),
user_extensions: HashMap::new(),
filename_scopes: HashMap::new(),
})
}
pub fn grammars_directory() -> Option<PathBuf> {
dirs::config_dir().map(|p| p.join("fresh/grammars"))
}
fn add_embedded_grammars(builder: &mut SyntaxSetBuilder) {
match SyntaxDefinition::load_from_str(TOML_GRAMMAR, true, Some("TOML")) {
Ok(syntax) => {
builder.add(syntax);
tracing::debug!("Loaded embedded TOML grammar");
}
Err(e) => {
tracing::warn!("Failed to load embedded TOML grammar: {}", e);
}
}
}
fn build_filename_scopes() -> HashMap<String, String> {
let mut map = HashMap::new();
let shell_scope = "source.shell.bash".to_string();
for filename in [
".zshrc",
".zprofile",
".zshenv",
".zlogin",
".zlogout",
".bash_aliases",
"PKGBUILD",
"APKBUILD",
] {
map.insert(filename.to_string(), shell_scope.clone());
}
map
}
fn load_user_grammars_into(
dir: &Path,
builder: &mut SyntaxSetBuilder,
user_extensions: &mut HashMap<String, String>,
) {
let entries = match std::fs::read_dir(dir) {
Ok(entries) => entries,
Err(e) => {
tracing::warn!("Failed to read grammars directory {:?}: {}", dir, e);
return;
}
};
for entry in entries.flatten() {
let path = entry.path();
if !path.is_dir() {
continue;
}
let manifest_path = path.join("package.json");
if manifest_path.exists() {
if let Ok(manifest) = Self::parse_package_json(&manifest_path) {
Self::process_manifest(&path, manifest, builder, user_extensions);
}
continue;
}
let mut found_any = false;
Self::load_direct_grammar(&path, builder, user_extensions, &mut found_any);
}
}
fn load_direct_grammar(
dir: &Path,
builder: &mut SyntaxSetBuilder,
_user_extensions: &mut HashMap<String, String>,
found_any: &mut bool,
) {
let entries = match std::fs::read_dir(dir) {
Ok(e) => e,
Err(_) => return,
};
for entry in entries.flatten() {
let path = entry.path();
let file_name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
if file_name.ends_with(".tmLanguage.json")
|| file_name.ends_with(".tmLanguage")
|| file_name.ends_with(".sublime-syntax")
{
if let Err(e) = builder.add_from_folder(&dir, false) {
tracing::warn!("Failed to load grammar from {:?}: {}", dir, e);
} else {
tracing::info!("Loaded grammar from {:?}", dir);
*found_any = true;
}
break;
}
}
}
fn parse_package_json(path: &Path) -> Result<PackageManifest, String> {
let content =
std::fs::read_to_string(path).map_err(|e| format!("Failed to read file: {}", e))?;
serde_json::from_str(&content).map_err(|e| format!("Failed to parse JSON: {}", e))
}
fn process_manifest(
package_dir: &Path,
manifest: PackageManifest,
builder: &mut SyntaxSetBuilder,
user_extensions: &mut HashMap<String, String>,
) {
let contributes = match manifest.contributes {
Some(c) => c,
None => return,
};
let mut lang_extensions: HashMap<String, Vec<String>> = HashMap::new();
for lang in &contributes.languages {
lang_extensions.insert(lang.id.clone(), lang.extensions.clone());
}
for grammar in &contributes.grammars {
let grammar_path = package_dir.join(&grammar.path);
if !grammar_path.exists() {
tracing::warn!("Grammar file not found: {:?}", grammar_path);
continue;
}
let grammar_dir = grammar_path.parent().unwrap_or(package_dir);
if let Err(e) = builder.add_from_folder(grammar_dir, false) {
tracing::warn!("Failed to load grammar {:?}: {}", grammar_path, e);
continue;
}
tracing::info!(
"Loaded grammar {} from {:?}",
grammar.scope_name,
grammar_path
);
if let Some(extensions) = lang_extensions.get(&grammar.language) {
for ext in extensions {
let ext_clean = ext.trim_start_matches('.');
user_extensions.insert(ext_clean.to_string(), grammar.scope_name.clone());
tracing::debug!("Mapped extension .{} to {}", ext_clean, grammar.scope_name);
}
}
}
}
pub fn find_syntax_for_file(&self, path: &Path) -> Option<&SyntaxReference> {
if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
if let Some(scope) = self.user_extensions.get(ext) {
if let Some(syntax) = syntect::parsing::Scope::new(scope)
.ok()
.and_then(|s| self.syntax_set.find_syntax_by_scope(s))
{
return Some(syntax);
}
}
if let Some(syntax) = self.syntax_set.find_syntax_by_extension(ext) {
return Some(syntax);
}
}
if let Some(filename) = path.file_name().and_then(|n| n.to_str()) {
if let Some(scope) = self.filename_scopes.get(filename) {
if let Some(syntax) = syntect::parsing::Scope::new(scope)
.ok()
.and_then(|s| self.syntax_set.find_syntax_by_scope(s))
{
return Some(syntax);
}
}
}
if let Ok(Some(syntax)) = self.syntax_set.find_syntax_for_file(path) {
return Some(syntax);
}
None
}
pub fn find_syntax_for_file_with_languages(
&self,
path: &Path,
languages: &std::collections::HashMap<String, crate::config::LanguageConfig>,
) -> Option<&SyntaxReference> {
if let Some(filename) = path.file_name().and_then(|f| f.to_str()) {
for (_language_name, lang_config) in languages {
if lang_config.filenames.iter().any(|f| f == filename) {
if let Some(syntax) = self.find_syntax_by_name(&lang_config.grammar) {
return Some(syntax);
}
if !lang_config.extensions.is_empty() {
if let Some(ext) = lang_config.extensions.first() {
if let Some(syntax) = self.syntax_set.find_syntax_by_extension(ext) {
return Some(syntax);
}
}
}
}
}
}
if let Some(extension) = path.extension().and_then(|e| e.to_str()) {
for (_language_name, lang_config) in languages {
if lang_config.extensions.iter().any(|ext| ext == extension) {
if let Some(syntax) = self.find_syntax_by_name(&lang_config.grammar) {
return Some(syntax);
}
}
}
}
self.find_syntax_for_file(path)
}
pub fn find_syntax_by_first_line(&self, first_line: &str) -> Option<&SyntaxReference> {
self.syntax_set.find_syntax_by_first_line(first_line)
}
pub fn find_syntax_by_scope(&self, scope: &str) -> Option<&SyntaxReference> {
let scope = syntect::parsing::Scope::new(scope).ok()?;
self.syntax_set.find_syntax_by_scope(scope)
}
pub fn find_syntax_by_name(&self, name: &str) -> Option<&SyntaxReference> {
self.syntax_set.find_syntax_by_name(name)
}
pub fn syntax_set(&self) -> &Arc<SyntaxSet> {
&self.syntax_set
}
pub fn syntax_set_arc(&self) -> Arc<SyntaxSet> {
Arc::clone(&self.syntax_set)
}
pub fn available_syntaxes(&self) -> Vec<&str> {
self.syntax_set
.syntaxes()
.iter()
.map(|s| s.name.as_str())
.collect()
}
pub fn has_syntax_for_extension(&self, ext: &str) -> bool {
if self.user_extensions.contains_key(ext) {
return true;
}
let dummy_path = PathBuf::from(format!("file.{}", ext));
self.syntax_set
.find_syntax_for_file(&dummy_path)
.ok()
.flatten()
.is_some()
}
}
impl Default for GrammarRegistry {
fn default() -> Self {
Self::load()
}
}
#[derive(Debug, Deserialize)]
struct PackageManifest {
#[serde(default)]
contributes: Option<Contributes>,
}
#[derive(Debug, Deserialize, Default)]
struct Contributes {
#[serde(default)]
languages: Vec<LanguageContribution>,
#[serde(default)]
grammars: Vec<GrammarContribution>,
}
#[derive(Debug, Deserialize)]
struct LanguageContribution {
id: String,
#[serde(default)]
extensions: Vec<String>,
}
#[derive(Debug, Deserialize)]
struct GrammarContribution {
language: String,
#[serde(rename = "scopeName")]
scope_name: String,
path: String,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_registry_creation() {
let registry = GrammarRegistry::load();
assert!(!registry.available_syntaxes().is_empty());
}
#[test]
fn test_find_syntax_for_common_extensions() {
let registry = GrammarRegistry::load();
let test_cases = [
("test.py", true),
("test.rs", true),
("test.js", true),
("test.json", true),
("test.md", true),
("test.html", true),
("test.css", true),
("test.unknown_extension_xyz", false),
];
for (filename, should_exist) in test_cases {
let path = Path::new(filename);
let result = registry.find_syntax_for_file(path);
assert_eq!(
result.is_some(),
should_exist,
"Expected {:?} for {}",
should_exist,
filename
);
}
}
#[test]
fn test_syntax_set_arc() {
let registry = GrammarRegistry::load();
let arc1 = registry.syntax_set_arc();
let arc2 = registry.syntax_set_arc();
assert!(Arc::ptr_eq(&arc1, &arc2));
}
#[test]
fn test_shell_dotfiles_detection() {
let registry = GrammarRegistry::load();
let shell_files = [".zshrc", ".zprofile", ".zshenv", ".bash_aliases"];
for filename in shell_files {
let path = Path::new(filename);
let result = registry.find_syntax_for_file(path);
assert!(
result.is_some(),
"{} should be detected as a syntax",
filename
);
let syntax = result.unwrap();
assert!(
syntax.name.to_lowercase().contains("bash")
|| syntax.name.to_lowercase().contains("shell"),
"{} should be detected as shell/bash, got: {}",
filename,
syntax.name
);
}
}
#[test]
fn test_pkgbuild_detection() {
let registry = GrammarRegistry::load();
for filename in ["PKGBUILD", "APKBUILD"] {
let path = Path::new(filename);
let result = registry.find_syntax_for_file(path);
assert!(
result.is_some(),
"{} should be detected as a syntax",
filename
);
let syntax = result.unwrap();
assert!(
syntax.name.to_lowercase().contains("bash")
|| syntax.name.to_lowercase().contains("shell"),
"{} should be detected as shell/bash, got: {}",
filename,
syntax.name
);
}
}
#[test]
fn test_find_syntax_with_custom_languages_config() {
let registry = GrammarRegistry::load();
let mut languages = std::collections::HashMap::new();
languages.insert(
"bash".to_string(),
crate::config::LanguageConfig {
extensions: vec!["myext".to_string()],
filenames: vec!["CUSTOMBUILD".to_string()],
grammar: "Bourne Again Shell (bash)".to_string(),
comment_prefix: Some("#".to_string()),
auto_indent: true,
highlighter: crate::config::HighlighterPreference::Auto,
textmate_grammar: None,
show_whitespace_tabs: true,
use_tabs: false,
tab_size: None,
formatter: None,
format_on_save: false,
on_save: vec![],
},
);
let path = Path::new("CUSTOMBUILD");
let result = registry.find_syntax_for_file_with_languages(path, &languages);
assert!(
result.is_some(),
"CUSTOMBUILD should be detected via languages config"
);
let syntax = result.unwrap();
assert!(
syntax.name.to_lowercase().contains("bash")
|| syntax.name.to_lowercase().contains("shell"),
"CUSTOMBUILD should be detected as shell/bash, got: {}",
syntax.name
);
let path = Path::new("script.myext");
let result = registry.find_syntax_for_file_with_languages(path, &languages);
assert!(
result.is_some(),
"script.myext should be detected via languages config"
);
let syntax = result.unwrap();
assert!(
syntax.name.to_lowercase().contains("bash")
|| syntax.name.to_lowercase().contains("shell"),
"script.myext should be detected as shell/bash, got: {}",
syntax.name
);
}
#[test]
fn test_list_all_syntaxes() {
let registry = GrammarRegistry::load();
let syntax_set = registry.syntax_set();
let mut syntaxes: Vec<_> = syntax_set
.syntaxes()
.iter()
.map(|s| (s.name.as_str(), s.file_extensions.clone()))
.collect();
syntaxes.sort_by(|a, b| a.0.cmp(b.0));
println!("\n=== Available Syntaxes ({} total) ===", syntaxes.len());
for (name, exts) in &syntaxes {
println!(" {} -> {:?}", name, exts);
}
println!("\n=== TypeScript Check ===");
let ts_syntax = syntax_set.find_syntax_by_extension("ts");
let tsx_syntax = syntax_set.find_syntax_by_extension("tsx");
println!(" .ts -> {:?}", ts_syntax.map(|s| &s.name));
println!(" .tsx -> {:?}", tsx_syntax.map(|s| &s.name));
assert!(syntaxes.len() > 0);
}
}