1use crate::Language;
4use std::collections::HashMap;
5use std::path::Path;
6use std::sync::{OnceLock, RwLock};
7
8static LANGUAGES: RwLock<Vec<&'static dyn Language>> = RwLock::new(Vec::new());
10static INITIALIZED: OnceLock<()> = OnceLock::new();
11
12static EXTENSION_MAP: OnceLock<HashMap<&'static str, &'static dyn Language>> = OnceLock::new();
14
15static GRAMMAR_MAP: OnceLock<HashMap<&'static str, &'static dyn Language>> = OnceLock::new();
17
18pub fn register(lang: &'static dyn Language) {
21 LANGUAGES
22 .write()
23 .unwrap_or_else(|e| e.into_inner())
24 .push(lang);
25}
26
27fn init_builtin() {
29 INITIALIZED.get_or_init(|| {
30 #[cfg(feature = "lang-python")]
31 register(&crate::python::Python);
32 #[cfg(feature = "lang-rust")]
33 register(&crate::rust::Rust);
34 #[cfg(feature = "lang-javascript")]
35 register(&crate::javascript::JavaScript);
36 #[cfg(feature = "lang-typescript")]
37 {
38 register(&crate::typescript::TypeScript);
39 register(&crate::typescript::Tsx);
40 }
41 #[cfg(feature = "lang-go")]
42 register(&crate::go::Go);
43 #[cfg(feature = "lang-java")]
44 register(&crate::java::Java);
45 #[cfg(feature = "lang-kotlin")]
46 register(&crate::kotlin::Kotlin);
47 #[cfg(feature = "lang-csharp")]
48 register(&crate::csharp::CSharp);
49 #[cfg(feature = "lang-swift")]
50 register(&crate::swift::Swift);
51 #[cfg(feature = "lang-php")]
52 register(&crate::php::Php);
53 #[cfg(feature = "lang-dockerfile")]
54 register(&crate::dockerfile::Dockerfile);
55 #[cfg(feature = "lang-c")]
56 register(&crate::c::C);
57 #[cfg(feature = "lang-cpp")]
58 register(&crate::cpp::Cpp);
59 #[cfg(feature = "lang-ruby")]
60 register(&crate::ruby::Ruby);
61 #[cfg(feature = "lang-scala")]
62 register(&crate::scala::Scala);
63 #[cfg(feature = "lang-vue")]
64 register(&crate::vue::Vue);
65 #[cfg(feature = "lang-markdown")]
66 register(&crate::markdown::Markdown);
67 #[cfg(feature = "lang-json")]
68 register(&crate::json::Json);
69 #[cfg(feature = "lang-yaml")]
70 register(&crate::yaml::Yaml);
71 #[cfg(feature = "lang-toml")]
72 register(&crate::toml::Toml);
73 #[cfg(feature = "lang-html")]
74 register(&crate::html::Html);
75 #[cfg(feature = "lang-css")]
76 register(&crate::css::Css);
77 #[cfg(feature = "lang-bash")]
78 register(&crate::bash::Bash);
79 #[cfg(feature = "lang-lua")]
80 register(&crate::lua::Lua);
81 #[cfg(feature = "lang-zig")]
82 register(&crate::zig::Zig);
83 #[cfg(feature = "lang-elixir")]
84 register(&crate::elixir::Elixir);
85 #[cfg(feature = "lang-erlang")]
86 register(&crate::erlang::Erlang);
87 #[cfg(feature = "lang-dart")]
88 register(&crate::dart::Dart);
89 #[cfg(feature = "lang-fsharp")]
90 register(&crate::fsharp::FSharp);
91 #[cfg(feature = "lang-sql")]
92 register(&crate::sql::Sql);
93 #[cfg(feature = "lang-graphql")]
94 register(&crate::graphql::GraphQL);
95 #[cfg(feature = "lang-hcl")]
96 register(&crate::hcl::Hcl);
97 #[cfg(feature = "lang-scss")]
98 register(&crate::scss::Scss);
99 #[cfg(feature = "lang-svelte")]
100 register(&crate::svelte::Svelte);
101 #[cfg(feature = "lang-xml")]
102 register(&crate::xml::Xml);
103 #[cfg(feature = "lang-clojure")]
104 register(&crate::clojure::Clojure);
105 #[cfg(feature = "lang-haskell")]
106 register(&crate::haskell::Haskell);
107 #[cfg(feature = "lang-ocaml")]
108 register(&crate::ocaml::OCaml);
109 #[cfg(feature = "lang-nix")]
110 register(&crate::nix::Nix);
111 #[cfg(feature = "lang-perl")]
112 register(&crate::perl::Perl);
113 #[cfg(feature = "lang-r")]
114 register(&crate::r::R);
115 #[cfg(feature = "lang-julia")]
116 register(&crate::julia::Julia);
117 #[cfg(feature = "lang-elm")]
118 register(&crate::elm::Elm);
119 #[cfg(feature = "lang-cmake")]
120 register(&crate::cmake::CMake);
121 #[cfg(feature = "lang-vim")]
122 register(&crate::vim::Vim);
123 #[cfg(feature = "lang-awk")]
124 register(&crate::awk::Awk);
125 #[cfg(feature = "lang-fish")]
126 register(&crate::fish::Fish);
127 #[cfg(feature = "lang-jq")]
128 register(&crate::jq::Jq);
129 #[cfg(feature = "lang-powershell")]
130 register(&crate::powershell::PowerShell);
131 #[cfg(feature = "lang-zsh")]
132 register(&crate::zsh::Zsh);
133 #[cfg(feature = "lang-groovy")]
134 register(&crate::groovy::Groovy);
135 #[cfg(feature = "lang-glsl")]
136 register(&crate::glsl::Glsl);
137 #[cfg(feature = "lang-hlsl")]
138 register(&crate::hlsl::Hlsl);
139 #[cfg(feature = "lang-commonlisp")]
140 register(&crate::commonlisp::CommonLisp);
141 #[cfg(feature = "lang-elisp")]
142 register(&crate::elisp::Elisp);
143 #[cfg(feature = "lang-gleam")]
144 register(&crate::gleam::Gleam);
145 #[cfg(feature = "lang-ini")]
146 register(&crate::ini::Ini);
147 #[cfg(feature = "lang-diff")]
148 register(&crate::diff::Diff);
149 #[cfg(feature = "lang-dot")]
150 register(&crate::dot::Dot);
151 #[cfg(feature = "lang-kdl")]
152 register(&crate::kdl::Kdl);
153 #[cfg(feature = "lang-ada")]
154 register(&crate::ada::Ada);
155 #[cfg(feature = "lang-agda")]
156 register(&crate::agda::Agda);
157 #[cfg(feature = "lang-d")]
158 register(&crate::d::D);
159 #[cfg(feature = "lang-matlab")]
160 register(&crate::matlab::Matlab);
161 #[cfg(feature = "lang-meson")]
162 register(&crate::meson::Meson);
163 #[cfg(feature = "lang-nginx")]
164 register(&crate::nginx::Nginx);
165 #[cfg(feature = "lang-prolog")]
166 register(&crate::prolog::Prolog);
167 #[cfg(feature = "lang-batch")]
168 register(&crate::batch::Batch);
169 #[cfg(feature = "lang-asm")]
170 register(&crate::asm::Asm);
171 #[cfg(feature = "lang-objc")]
172 register(&crate::objc::ObjC);
173 #[cfg(feature = "lang-typst")]
174 register(&crate::typst::Typst);
175 #[cfg(feature = "lang-asciidoc")]
176 register(&crate::asciidoc::AsciiDoc);
177 #[cfg(feature = "lang-vb")]
178 register(&crate::vb::VB);
179 #[cfg(feature = "lang-idris")]
180 register(&crate::idris::Idris);
181 #[cfg(feature = "lang-rescript")]
182 register(&crate::rescript::ReScript);
183 #[cfg(feature = "lang-lean")]
184 register(&crate::lean::Lean);
185 #[cfg(feature = "lang-caddy")]
186 register(&crate::caddy::Caddy);
187 #[cfg(feature = "lang-capnp")]
188 register(&crate::capnp::Capnp);
189 #[cfg(feature = "lang-devicetree")]
190 register(&crate::devicetree::DeviceTree);
191 #[cfg(feature = "lang-jinja2")]
192 register(&crate::jinja2::Jinja2);
193 #[cfg(feature = "lang-ninja")]
194 register(&crate::ninja::Ninja);
195 #[cfg(feature = "lang-postscript")]
196 register(&crate::postscript::PostScript);
197 #[cfg(feature = "lang-query")]
198 register(&crate::query::Query);
199 #[cfg(feature = "lang-scheme")]
201 register(&crate::scheme::Scheme);
202 #[cfg(feature = "lang-ron")]
203 register(&crate::ron::Ron);
204 #[cfg(feature = "lang-sparql")]
205 register(&crate::sparql::Sparql);
206 #[cfg(feature = "lang-sshconfig")]
207 register(&crate::sshconfig::SshConfig);
208 #[cfg(feature = "lang-starlark")]
209 register(&crate::starlark::Starlark);
210 #[cfg(feature = "lang-textproto")]
211 register(&crate::textproto::TextProto);
212 #[cfg(feature = "lang-thrift")]
213 register(&crate::thrift::Thrift);
214 #[cfg(feature = "lang-tlaplus")]
215 register(&crate::tlaplus::TlaPlus);
216 #[cfg(feature = "lang-uiua")]
217 register(&crate::uiua::Uiua);
218 #[cfg(feature = "lang-verilog")]
219 register(&crate::verilog::Verilog);
220 #[cfg(feature = "lang-vhdl")]
221 register(&crate::vhdl::Vhdl);
222 #[cfg(feature = "lang-wit")]
223 register(&crate::wit::Wit);
224 #[cfg(feature = "lang-x86asm")]
225 register(&crate::x86asm::X86Asm);
226 #[cfg(feature = "lang-yuri")]
227 register(&crate::yuri::Yuri);
228 });
229}
230
231fn extension_map() -> &'static HashMap<&'static str, &'static dyn Language> {
232 init_builtin();
233 EXTENSION_MAP.get_or_init(|| {
234 let mut map = HashMap::new();
235 let langs = LANGUAGES.read().unwrap_or_else(|e| e.into_inner());
236 for lang in langs.iter() {
237 for ext in lang.extensions() {
238 map.insert(*ext, *lang);
239 }
240 }
241 map
242 })
243}
244
245fn grammar_map() -> &'static HashMap<&'static str, &'static dyn Language> {
246 init_builtin();
247 GRAMMAR_MAP.get_or_init(|| {
248 let mut map = HashMap::new();
249 let langs = LANGUAGES.read().unwrap_or_else(|e| e.into_inner());
250 for lang in langs.iter() {
251 map.insert(lang.grammar_name(), *lang);
252 }
253 map
254 })
255}
256
257pub fn support_for_extension(ext: &str) -> Option<&'static dyn Language> {
261 extension_map()
262 .get(ext)
263 .or_else(|| extension_map().get(ext.to_lowercase().as_str()))
264 .copied()
265}
266
267pub fn support_for_grammar(grammar: &str) -> Option<&'static dyn Language> {
271 grammar_map().get(grammar).copied()
272}
273
274pub fn support_for_path(path: &Path) -> Option<&'static dyn Language> {
279 path.extension()
280 .and_then(|e| e.to_str())
281 .and_then(support_for_extension)
282}
283
284pub fn is_test_path(path: &Path) -> bool {
289 let lang = match support_for_path(path) {
290 Some(l) => l,
291 None => return false,
292 };
293 let globs = lang.test_file_globs();
294 if globs.is_empty() {
295 return false;
296 }
297 let mut builder = globset::GlobSetBuilder::new();
298 for g in globs {
299 if let Ok(glob) = globset::Glob::new(g) {
300 builder.add(glob);
301 }
302 }
303 let Ok(set) = builder.build() else {
304 return false;
305 };
306 set.is_match(path)
307}
308
309pub fn test_file_globs_for_path(path: &Path) -> &'static [&'static str] {
311 support_for_path(path)
312 .map(|lang| lang.test_file_globs())
313 .unwrap_or(&[])
314}
315
316pub fn supported_languages() -> Vec<&'static dyn Language> {
318 init_builtin();
319 LANGUAGES.read().unwrap_or_else(|e| e.into_inner()).clone()
320}
321
322pub fn is_programming_language(path: &Path) -> bool {
331 let lang = match support_for_path(path) {
332 Some(l) => l,
333 None => return false,
334 };
335
336 let caps = normalize_language_meta::capabilities_for(lang.name());
337 caps.executable
338}
339
340pub fn validate_unused_kinds_audit(
348 lang: &dyn Language,
349 documented_unused: &[&str],
350) -> Result<(), String> {
351 use crate::GrammarLoader;
352 use crate::grammar_loader::GrammarLoadError;
353 use std::collections::HashSet;
354
355 let loader = GrammarLoader::new();
356 let ts_lang = match loader.get(lang.grammar_name()) {
357 Ok(l) => l,
358 Err(GrammarLoadError::NotFound(_)) => return Ok(()),
362 Err(e) => return Err(format!("Grammar '{}' not found: {e}", lang.grammar_name())),
363 };
364
365 let interesting_patterns = [
367 "statement",
368 "expression",
369 "definition",
370 "declaration",
371 "clause",
372 "block",
373 "body",
374 "import",
375 "export",
376 "function",
377 "method",
378 "class",
379 "struct",
380 "enum",
381 "interface",
382 "trait",
383 "module",
384 "type",
385 "return",
386 "if",
387 "else",
388 "for",
389 "while",
390 "loop",
391 "match",
392 "case",
393 "try",
394 "catch",
395 "except",
396 "throw",
397 "raise",
398 "with",
399 "async",
400 "await",
401 "yield",
402 "lambda",
403 "comprehension",
404 "generator",
405 "operator",
406 ];
407
408 let tags_kinds: HashSet<String> = {
410 let mut kinds = HashSet::new();
411 if let Some(tags_content) = loader.get_tags(lang.grammar_name()) {
412 for line in tags_content.lines() {
415 let trimmed = line.trim_start();
416 if trimmed.starts_with('(')
417 && !trimmed.starts_with(";;")
418 && !trimmed.starts_with(";")
419 {
420 let inner = &trimmed[1..];
422 let kind_name: String = inner
423 .chars()
424 .take_while(|c| c.is_alphanumeric() || *c == '_' || *c == '-')
425 .collect();
426 if !kind_name.is_empty() && !kind_name.starts_with('@') {
427 kinds.insert(kind_name);
428 }
429 }
430 }
431 }
432 kinds
433 };
434
435 let documented_set: HashSet<&str> = documented_unused.iter().copied().collect();
436
437 let mut grammar_kinds: HashSet<&str> = HashSet::new();
439 let count = ts_lang.node_kind_count();
440 for id in 0..count as u16 {
441 if let Some(kind) = ts_lang.node_kind_for_id(id) {
442 let named = ts_lang.node_kind_is_named(id);
443 if named && !kind.starts_with('_') {
444 grammar_kinds.insert(kind);
445 }
446 }
447 }
448
449 let mut errors: Vec<String> = Vec::new();
450
451 for kind in documented_unused {
453 if !grammar_kinds.contains(*kind) {
454 errors.push(format!(
455 "Documented kind '{}' doesn't exist in grammar",
456 kind
457 ));
458 }
459 if tags_kinds.contains(*kind) {
461 errors.push(format!(
462 "Documented kind '{}' is actually used in tags.scm",
463 kind
464 ));
465 }
466 }
467
468 for kind in &grammar_kinds {
470 let lower = kind.to_lowercase();
471 let is_interesting = interesting_patterns.iter().any(|p| lower.contains(p));
472
473 if is_interesting && !tags_kinds.contains(*kind) && !documented_set.contains(*kind) {
474 errors.push(format!(
475 "Potentially useful kind '{}' is neither used nor documented",
476 kind
477 ));
478 }
479 }
480
481 if errors.is_empty() {
482 Ok(())
483 } else {
484 Err(format!(
485 "{} validation errors:\n - {}",
486 errors.len(),
487 errors.join("\n - ")
488 ))
489 }
490}
491
492#[cfg(test)]
493mod tests {
494 use super::*;
495 use crate::GrammarLoader;
496
497 #[test]
500 #[ignore]
501 fn dump_node_kinds() {
502 let loader = GrammarLoader::new();
503 let grammar_name = std::env::var("DUMP_GRAMMAR").unwrap_or_else(|_| "python".to_string());
505
506 let ts_lang = loader.get(&grammar_name).expect("grammar not found");
507
508 println!("\n=== Valid node kinds for '{}' ===\n", grammar_name);
509 let count = ts_lang.node_kind_count();
510 for id in 0..count as u16 {
511 if let Some(kind) = ts_lang.node_kind_for_id(id) {
512 let named = ts_lang.node_kind_is_named(id);
513 if named && !kind.starts_with('_') {
514 println!("{}", kind);
515 }
516 }
517 }
518 }
519
520 #[test]
527 fn validate_node_kinds() {
528 }
530
531 #[test]
535 #[ignore]
536 fn cross_check_node_kinds() {
537 use std::collections::HashSet;
538
539 let loader = GrammarLoader::new();
540
541 let interesting_patterns = [
543 "statement",
544 "expression",
545 "definition",
546 "declaration",
547 "clause",
548 "block",
549 "body",
550 "import",
551 "export",
552 "function",
553 "method",
554 "class",
555 "struct",
556 "enum",
557 "interface",
558 "trait",
559 "module",
560 "type",
561 "return",
562 "if",
563 "else",
564 "for",
565 "while",
566 "loop",
567 "match",
568 "case",
569 "try",
570 "catch",
571 "except",
572 "throw",
573 "raise",
574 "with",
575 "async",
576 "await",
577 "yield",
578 "lambda",
579 "comprehension",
580 "generator",
581 "operator",
582 ];
583
584 for lang in supported_languages() {
585 let grammar_name = lang.grammar_name();
586 let ts_lang = match loader.get(grammar_name).ok() {
587 Some(l) => l,
588 None => continue,
589 };
590
591 let used_kinds: HashSet<&str> = HashSet::new();
594
595 let mut all_kinds: Vec<&str> = Vec::new();
597 let count = ts_lang.node_kind_count();
598 for id in 0..count as u16 {
599 if let Some(kind) = ts_lang.node_kind_for_id(id) {
600 let named = ts_lang.node_kind_is_named(id);
601 if named && !kind.starts_with('_') {
602 all_kinds.push(kind);
603 }
604 }
605 }
606
607 let mut unused_interesting: Vec<&str> = all_kinds
609 .into_iter()
610 .filter(|kind| !used_kinds.contains(*kind))
611 .filter(|kind| {
612 let lower = kind.to_lowercase();
613 interesting_patterns.iter().any(|p| lower.contains(p))
614 })
615 .collect();
616
617 unused_interesting.sort();
618
619 if !unused_interesting.is_empty() {
620 println!(
621 "\n=== {} ({}) - {} potentially useful unused kinds ===",
622 lang.name(),
623 grammar_name,
624 unused_interesting.len()
625 );
626 for kind in &unused_interesting {
627 println!(" {}", kind);
628 }
629 }
630 }
631 }
632}