use crate::Language;
use std::collections::HashMap;
use std::path::Path;
use std::sync::{OnceLock, RwLock};
static LANGUAGES: RwLock<Vec<&'static dyn Language>> = RwLock::new(Vec::new());
static INITIALIZED: OnceLock<()> = OnceLock::new();
static EXTENSION_MAP: OnceLock<HashMap<&'static str, &'static dyn Language>> = OnceLock::new();
static GRAMMAR_MAP: OnceLock<HashMap<&'static str, &'static dyn Language>> = OnceLock::new();
pub fn register(lang: &'static dyn Language) {
LANGUAGES.write().unwrap().push(lang);
}
fn init_builtin() {
INITIALIZED.get_or_init(|| {
#[cfg(feature = "lang-python")]
register(&crate::python::Python);
#[cfg(feature = "lang-rust")]
register(&crate::rust::Rust);
#[cfg(feature = "lang-javascript")]
register(&crate::javascript::JavaScript);
#[cfg(feature = "lang-typescript")]
{
register(&crate::typescript::TypeScript);
register(&crate::typescript::Tsx);
}
#[cfg(feature = "lang-go")]
register(&crate::go::Go);
#[cfg(feature = "lang-java")]
register(&crate::java::Java);
#[cfg(feature = "lang-kotlin")]
register(&crate::kotlin::Kotlin);
#[cfg(feature = "lang-csharp")]
register(&crate::csharp::CSharp);
#[cfg(feature = "lang-swift")]
register(&crate::swift::Swift);
#[cfg(feature = "lang-php")]
register(&crate::php::Php);
#[cfg(feature = "lang-dockerfile")]
register(&crate::dockerfile::Dockerfile);
#[cfg(feature = "lang-c")]
register(&crate::c::C);
#[cfg(feature = "lang-cpp")]
register(&crate::cpp::Cpp);
#[cfg(feature = "lang-ruby")]
register(&crate::ruby::Ruby);
#[cfg(feature = "lang-scala")]
register(&crate::scala::Scala);
#[cfg(feature = "lang-vue")]
register(&crate::vue::Vue);
#[cfg(feature = "lang-markdown")]
register(&crate::markdown::Markdown);
#[cfg(feature = "lang-json")]
register(&crate::json::Json);
#[cfg(feature = "lang-yaml")]
register(&crate::yaml::Yaml);
#[cfg(feature = "lang-toml")]
register(&crate::toml::Toml);
#[cfg(feature = "lang-html")]
register(&crate::html::Html);
#[cfg(feature = "lang-css")]
register(&crate::css::Css);
#[cfg(feature = "lang-bash")]
register(&crate::bash::Bash);
#[cfg(feature = "lang-lua")]
register(&crate::lua::Lua);
#[cfg(feature = "lang-zig")]
register(&crate::zig::Zig);
#[cfg(feature = "lang-elixir")]
register(&crate::elixir::Elixir);
#[cfg(feature = "lang-erlang")]
register(&crate::erlang::Erlang);
#[cfg(feature = "lang-dart")]
register(&crate::dart::Dart);
#[cfg(feature = "lang-fsharp")]
register(&crate::fsharp::FSharp);
#[cfg(feature = "lang-sql")]
register(&crate::sql::Sql);
#[cfg(feature = "lang-graphql")]
register(&crate::graphql::GraphQL);
#[cfg(feature = "lang-hcl")]
register(&crate::hcl::Hcl);
#[cfg(feature = "lang-scss")]
register(&crate::scss::Scss);
#[cfg(feature = "lang-svelte")]
register(&crate::svelte::Svelte);
#[cfg(feature = "lang-xml")]
register(&crate::xml::Xml);
#[cfg(feature = "lang-clojure")]
register(&crate::clojure::Clojure);
#[cfg(feature = "lang-haskell")]
register(&crate::haskell::Haskell);
#[cfg(feature = "lang-ocaml")]
register(&crate::ocaml::OCaml);
#[cfg(feature = "lang-nix")]
register(&crate::nix::Nix);
#[cfg(feature = "lang-perl")]
register(&crate::perl::Perl);
#[cfg(feature = "lang-r")]
register(&crate::r::R);
#[cfg(feature = "lang-julia")]
register(&crate::julia::Julia);
#[cfg(feature = "lang-elm")]
register(&crate::elm::Elm);
#[cfg(feature = "lang-cmake")]
register(&crate::cmake::CMake);
#[cfg(feature = "lang-vim")]
register(&crate::vim::Vim);
#[cfg(feature = "lang-awk")]
register(&crate::awk::Awk);
#[cfg(feature = "lang-fish")]
register(&crate::fish::Fish);
#[cfg(feature = "lang-jq")]
register(&crate::jq::Jq);
#[cfg(feature = "lang-powershell")]
register(&crate::powershell::PowerShell);
#[cfg(feature = "lang-zsh")]
register(&crate::zsh::Zsh);
#[cfg(feature = "lang-groovy")]
register(&crate::groovy::Groovy);
#[cfg(feature = "lang-glsl")]
register(&crate::glsl::Glsl);
#[cfg(feature = "lang-hlsl")]
register(&crate::hlsl::Hlsl);
#[cfg(feature = "lang-commonlisp")]
register(&crate::commonlisp::CommonLisp);
#[cfg(feature = "lang-elisp")]
register(&crate::elisp::Elisp);
#[cfg(feature = "lang-gleam")]
register(&crate::gleam::Gleam);
#[cfg(feature = "lang-scheme")]
register(&crate::scheme::Scheme);
#[cfg(feature = "lang-ini")]
register(&crate::ini::Ini);
#[cfg(feature = "lang-diff")]
register(&crate::diff::Diff);
#[cfg(feature = "lang-dot")]
register(&crate::dot::Dot);
#[cfg(feature = "lang-kdl")]
register(&crate::kdl::Kdl);
#[cfg(feature = "lang-ada")]
register(&crate::ada::Ada);
#[cfg(feature = "lang-agda")]
register(&crate::agda::Agda);
#[cfg(feature = "lang-d")]
register(&crate::d::D);
#[cfg(feature = "lang-matlab")]
register(&crate::matlab::Matlab);
#[cfg(feature = "lang-meson")]
register(&crate::meson::Meson);
#[cfg(feature = "lang-nginx")]
register(&crate::nginx::Nginx);
#[cfg(feature = "lang-prolog")]
register(&crate::prolog::Prolog);
#[cfg(feature = "lang-batch")]
register(&crate::batch::Batch);
#[cfg(feature = "lang-asm")]
register(&crate::asm::Asm);
#[cfg(feature = "lang-objc")]
register(&crate::objc::ObjC);
#[cfg(feature = "lang-typst")]
register(&crate::typst::Typst);
#[cfg(feature = "lang-asciidoc")]
register(&crate::asciidoc::AsciiDoc);
#[cfg(feature = "lang-vb")]
register(&crate::vb::VB);
#[cfg(feature = "lang-idris")]
register(&crate::idris::Idris);
#[cfg(feature = "lang-rescript")]
register(&crate::rescript::ReScript);
#[cfg(feature = "lang-lean")]
register(&crate::lean::Lean);
#[cfg(feature = "lang-caddy")]
register(&crate::caddy::Caddy);
#[cfg(feature = "lang-capnp")]
register(&crate::capnp::Capnp);
#[cfg(feature = "lang-devicetree")]
register(&crate::devicetree::DeviceTree);
#[cfg(feature = "lang-jinja2")]
register(&crate::jinja2::Jinja2);
#[cfg(feature = "lang-ninja")]
register(&crate::ninja::Ninja);
#[cfg(feature = "lang-postscript")]
register(&crate::postscript::PostScript);
#[cfg(feature = "lang-query")]
register(&crate::query::Query);
#[cfg(feature = "lang-ron")]
register(&crate::ron::Ron);
#[cfg(feature = "lang-sparql")]
register(&crate::sparql::Sparql);
#[cfg(feature = "lang-sshconfig")]
register(&crate::sshconfig::SshConfig);
#[cfg(feature = "lang-starlark")]
register(&crate::starlark::Starlark);
#[cfg(feature = "lang-textproto")]
register(&crate::textproto::TextProto);
#[cfg(feature = "lang-thrift")]
register(&crate::thrift::Thrift);
#[cfg(feature = "lang-tlaplus")]
register(&crate::tlaplus::TlaPlus);
#[cfg(feature = "lang-uiua")]
register(&crate::uiua::Uiua);
#[cfg(feature = "lang-verilog")]
register(&crate::verilog::Verilog);
#[cfg(feature = "lang-vhdl")]
register(&crate::vhdl::Vhdl);
#[cfg(feature = "lang-wit")]
register(&crate::wit::Wit);
#[cfg(feature = "lang-x86asm")]
register(&crate::x86asm::X86Asm);
#[cfg(feature = "lang-yuri")]
register(&crate::yuri::Yuri);
});
}
fn extension_map() -> &'static HashMap<&'static str, &'static dyn Language> {
init_builtin();
EXTENSION_MAP.get_or_init(|| {
let mut map = HashMap::new();
let langs = LANGUAGES.read().unwrap();
for lang in langs.iter() {
for ext in lang.extensions() {
map.insert(*ext, *lang);
}
}
map
})
}
fn grammar_map() -> &'static HashMap<&'static str, &'static dyn Language> {
init_builtin();
GRAMMAR_MAP.get_or_init(|| {
let mut map = HashMap::new();
let langs = LANGUAGES.read().unwrap();
for lang in langs.iter() {
map.insert(lang.grammar_name(), *lang);
}
map
})
}
pub fn support_for_extension(ext: &str) -> Option<&'static dyn Language> {
extension_map()
.get(ext)
.or_else(|| extension_map().get(ext.to_lowercase().as_str()))
.copied()
}
pub fn support_for_grammar(grammar: &str) -> Option<&'static dyn Language> {
grammar_map().get(grammar).copied()
}
pub fn support_for_path(path: &Path) -> Option<&'static dyn Language> {
path.extension()
.and_then(|e| e.to_str())
.and_then(support_for_extension)
}
pub fn supported_languages() -> Vec<&'static dyn Language> {
init_builtin();
LANGUAGES.read().unwrap().clone()
}
#[cfg(test)]
mod tests {
use super::*;
use crate::GrammarLoader;
#[test]
#[ignore]
fn dump_node_kinds() {
let loader = GrammarLoader::new();
let grammar_name = std::env::var("DUMP_GRAMMAR").unwrap_or_else(|_| "python".to_string());
let ts_lang = loader.get(&grammar_name).expect("grammar not found");
println!("\n=== Valid node kinds for '{}' ===\n", grammar_name);
let count = ts_lang.node_kind_count();
for id in 0..count as u16 {
if let Some(kind) = ts_lang.node_kind_for_id(id) {
let named = ts_lang.node_kind_is_named(id);
if named && !kind.starts_with('_') {
println!("{}", kind);
}
}
}
}
#[test]
fn validate_node_kinds() {
let loader = GrammarLoader::new();
let mut errors: Vec<String> = Vec::new();
for lang in supported_languages() {
let grammar_name = lang.grammar_name();
let ts_lang = match loader.get(grammar_name) {
Some(l) => l,
None => {
continue;
}
};
let all_kinds: Vec<(&str, &[&str])> = vec![
("container_kinds", lang.container_kinds()),
("function_kinds", lang.function_kinds()),
("type_kinds", lang.type_kinds()),
("import_kinds", lang.import_kinds()),
("public_symbol_kinds", lang.public_symbol_kinds()),
("scope_creating_kinds", lang.scope_creating_kinds()),
("control_flow_kinds", lang.control_flow_kinds()),
("complexity_nodes", lang.complexity_nodes()),
("nesting_nodes", lang.nesting_nodes()),
];
for (method, kinds) in all_kinds {
for kind in kinds {
let id = ts_lang.id_for_node_kind(kind, true);
if id == 0 {
let unnamed_id = ts_lang.id_for_node_kind(kind, false);
if unnamed_id == 0 {
errors.push(format!(
"{}: {}() contains invalid node kind '{}'",
lang.name(),
method,
kind
));
}
}
}
}
}
if !errors.is_empty() {
panic!(
"Found {} invalid node kinds:\n{}",
errors.len(),
errors.join("\n")
);
}
}
#[test]
#[ignore]
fn cross_check_node_kinds() {
use std::collections::HashSet;
let loader = GrammarLoader::new();
let interesting_patterns = [
"statement",
"expression",
"definition",
"declaration",
"clause",
"block",
"body",
"import",
"export",
"function",
"method",
"class",
"struct",
"enum",
"interface",
"trait",
"module",
"type",
"return",
"if",
"else",
"for",
"while",
"loop",
"match",
"case",
"try",
"catch",
"except",
"throw",
"raise",
"with",
"async",
"await",
"yield",
"lambda",
"comprehension",
"generator",
"operator",
];
for lang in supported_languages() {
let grammar_name = lang.grammar_name();
let ts_lang = match loader.get(grammar_name) {
Some(l) => l,
None => continue,
};
let mut used_kinds: HashSet<&str> = HashSet::new();
for kind in lang.container_kinds() {
used_kinds.insert(kind);
}
for kind in lang.function_kinds() {
used_kinds.insert(kind);
}
for kind in lang.type_kinds() {
used_kinds.insert(kind);
}
for kind in lang.import_kinds() {
used_kinds.insert(kind);
}
for kind in lang.public_symbol_kinds() {
used_kinds.insert(kind);
}
for kind in lang.scope_creating_kinds() {
used_kinds.insert(kind);
}
for kind in lang.control_flow_kinds() {
used_kinds.insert(kind);
}
for kind in lang.complexity_nodes() {
used_kinds.insert(kind);
}
for kind in lang.nesting_nodes() {
used_kinds.insert(kind);
}
let mut all_kinds: Vec<&str> = Vec::new();
let count = ts_lang.node_kind_count();
for id in 0..count as u16 {
if let Some(kind) = ts_lang.node_kind_for_id(id) {
let named = ts_lang.node_kind_is_named(id);
if named && !kind.starts_with('_') {
all_kinds.push(kind);
}
}
}
let mut unused_interesting: Vec<&str> = all_kinds
.into_iter()
.filter(|kind| !used_kinds.contains(*kind))
.filter(|kind| {
let lower = kind.to_lowercase();
interesting_patterns.iter().any(|p| lower.contains(p))
})
.collect();
unused_interesting.sort();
if !unused_interesting.is_empty() {
println!(
"\n=== {} ({}) - {} potentially useful unused kinds ===",
lang.name(),
grammar_name,
unused_interesting.len()
);
for kind in &unused_interesting {
println!(" {}", kind);
}
}
}
}
}
pub fn validate_unused_kinds_audit(
lang: &dyn Language,
documented_unused: &[&str],
) -> Result<(), String> {
use crate::GrammarLoader;
use std::collections::HashSet;
let loader = GrammarLoader::new();
let ts_lang = loader
.get(lang.grammar_name())
.ok_or_else(|| format!("Grammar '{}' not found", lang.grammar_name()))?;
let interesting_patterns = [
"statement",
"expression",
"definition",
"declaration",
"clause",
"block",
"body",
"import",
"export",
"function",
"method",
"class",
"struct",
"enum",
"interface",
"trait",
"module",
"type",
"return",
"if",
"else",
"for",
"while",
"loop",
"match",
"case",
"try",
"catch",
"except",
"throw",
"raise",
"with",
"async",
"await",
"yield",
"lambda",
"comprehension",
"generator",
"operator",
];
let mut used_kinds: HashSet<&str> = HashSet::new();
for kind in lang.container_kinds() {
used_kinds.insert(kind);
}
for kind in lang.function_kinds() {
used_kinds.insert(kind);
}
for kind in lang.type_kinds() {
used_kinds.insert(kind);
}
for kind in lang.import_kinds() {
used_kinds.insert(kind);
}
for kind in lang.public_symbol_kinds() {
used_kinds.insert(kind);
}
for kind in lang.scope_creating_kinds() {
used_kinds.insert(kind);
}
for kind in lang.control_flow_kinds() {
used_kinds.insert(kind);
}
for kind in lang.complexity_nodes() {
used_kinds.insert(kind);
}
for kind in lang.nesting_nodes() {
used_kinds.insert(kind);
}
let documented_set: HashSet<&str> = documented_unused.iter().copied().collect();
let mut grammar_kinds: HashSet<&str> = HashSet::new();
let count = ts_lang.node_kind_count();
for id in 0..count as u16 {
if let Some(kind) = ts_lang.node_kind_for_id(id) {
let named = ts_lang.node_kind_is_named(id);
if named && !kind.starts_with('_') {
grammar_kinds.insert(kind);
}
}
}
let mut errors: Vec<String> = Vec::new();
for kind in documented_unused {
if !grammar_kinds.contains(*kind) {
errors.push(format!(
"Documented kind '{}' doesn't exist in grammar",
kind
));
}
if used_kinds.contains(*kind) {
errors.push(format!(
"Documented kind '{}' is actually used in trait methods",
kind
));
}
}
for kind in &grammar_kinds {
let lower = kind.to_lowercase();
let is_interesting = interesting_patterns.iter().any(|p| lower.contains(p));
if is_interesting && !used_kinds.contains(*kind) && !documented_set.contains(*kind) {
errors.push(format!(
"Potentially useful kind '{}' is neither used nor documented",
kind
));
}
}
if errors.is_empty() {
Ok(())
} else {
Err(format!(
"{} validation errors:\n - {}",
errors.len(),
errors.join("\n - ")
))
}
}