1use crate::Language;
4use std::collections::HashMap;
5use std::path::Path;
6use std::sync::{OnceLock, RwLock};
7
8static LANGUAGES: RwLock<Vec<&'static dyn Language>> = RwLock::new(Vec::new());
10static INITIALIZED: OnceLock<()> = OnceLock::new();
11
12static EXTENSION_MAP: OnceLock<HashMap<&'static str, &'static dyn Language>> = OnceLock::new();
14
15static GRAMMAR_MAP: OnceLock<HashMap<&'static str, &'static dyn Language>> = OnceLock::new();
17
18pub fn register(lang: &'static dyn Language) {
21 LANGUAGES.write().unwrap().push(lang);
22}
23
24fn init_builtin() {
26 INITIALIZED.get_or_init(|| {
27 #[cfg(feature = "lang-python")]
28 register(&crate::python::Python);
29 #[cfg(feature = "lang-rust")]
30 register(&crate::rust::Rust);
31 #[cfg(feature = "lang-javascript")]
32 register(&crate::javascript::JavaScript);
33 #[cfg(feature = "lang-typescript")]
34 {
35 register(&crate::typescript::TypeScript);
36 register(&crate::typescript::Tsx);
37 }
38 #[cfg(feature = "lang-go")]
39 register(&crate::go::Go);
40 #[cfg(feature = "lang-java")]
41 register(&crate::java::Java);
42 #[cfg(feature = "lang-kotlin")]
43 register(&crate::kotlin::Kotlin);
44 #[cfg(feature = "lang-csharp")]
45 register(&crate::csharp::CSharp);
46 #[cfg(feature = "lang-swift")]
47 register(&crate::swift::Swift);
48 #[cfg(feature = "lang-php")]
49 register(&crate::php::Php);
50 #[cfg(feature = "lang-dockerfile")]
51 register(&crate::dockerfile::Dockerfile);
52 #[cfg(feature = "lang-c")]
53 register(&crate::c::C);
54 #[cfg(feature = "lang-cpp")]
55 register(&crate::cpp::Cpp);
56 #[cfg(feature = "lang-ruby")]
57 register(&crate::ruby::Ruby);
58 #[cfg(feature = "lang-scala")]
59 register(&crate::scala::Scala);
60 #[cfg(feature = "lang-vue")]
61 register(&crate::vue::Vue);
62 #[cfg(feature = "lang-markdown")]
63 register(&crate::markdown::Markdown);
64 #[cfg(feature = "lang-json")]
65 register(&crate::json::Json);
66 #[cfg(feature = "lang-yaml")]
67 register(&crate::yaml::Yaml);
68 #[cfg(feature = "lang-toml")]
69 register(&crate::toml::Toml);
70 #[cfg(feature = "lang-html")]
71 register(&crate::html::Html);
72 #[cfg(feature = "lang-css")]
73 register(&crate::css::Css);
74 #[cfg(feature = "lang-bash")]
75 register(&crate::bash::Bash);
76 #[cfg(feature = "lang-lua")]
77 register(&crate::lua::Lua);
78 #[cfg(feature = "lang-zig")]
79 register(&crate::zig::Zig);
80 #[cfg(feature = "lang-elixir")]
81 register(&crate::elixir::Elixir);
82 #[cfg(feature = "lang-erlang")]
83 register(&crate::erlang::Erlang);
84 #[cfg(feature = "lang-dart")]
85 register(&crate::dart::Dart);
86 #[cfg(feature = "lang-fsharp")]
87 register(&crate::fsharp::FSharp);
88 #[cfg(feature = "lang-sql")]
89 register(&crate::sql::Sql);
90 #[cfg(feature = "lang-graphql")]
91 register(&crate::graphql::GraphQL);
92 #[cfg(feature = "lang-hcl")]
93 register(&crate::hcl::Hcl);
94 #[cfg(feature = "lang-scss")]
95 register(&crate::scss::Scss);
96 #[cfg(feature = "lang-svelte")]
97 register(&crate::svelte::Svelte);
98 #[cfg(feature = "lang-xml")]
99 register(&crate::xml::Xml);
100 #[cfg(feature = "lang-clojure")]
101 register(&crate::clojure::Clojure);
102 #[cfg(feature = "lang-haskell")]
103 register(&crate::haskell::Haskell);
104 #[cfg(feature = "lang-ocaml")]
105 register(&crate::ocaml::OCaml);
106 #[cfg(feature = "lang-nix")]
107 register(&crate::nix::Nix);
108 #[cfg(feature = "lang-perl")]
109 register(&crate::perl::Perl);
110 #[cfg(feature = "lang-r")]
111 register(&crate::r::R);
112 #[cfg(feature = "lang-julia")]
113 register(&crate::julia::Julia);
114 #[cfg(feature = "lang-elm")]
115 register(&crate::elm::Elm);
116 #[cfg(feature = "lang-cmake")]
117 register(&crate::cmake::CMake);
118 #[cfg(feature = "lang-vim")]
119 register(&crate::vim::Vim);
120 #[cfg(feature = "lang-awk")]
121 register(&crate::awk::Awk);
122 #[cfg(feature = "lang-fish")]
123 register(&crate::fish::Fish);
124 #[cfg(feature = "lang-jq")]
125 register(&crate::jq::Jq);
126 #[cfg(feature = "lang-powershell")]
127 register(&crate::powershell::PowerShell);
128 #[cfg(feature = "lang-zsh")]
129 register(&crate::zsh::Zsh);
130 #[cfg(feature = "lang-groovy")]
131 register(&crate::groovy::Groovy);
132 #[cfg(feature = "lang-glsl")]
133 register(&crate::glsl::Glsl);
134 #[cfg(feature = "lang-hlsl")]
135 register(&crate::hlsl::Hlsl);
136 #[cfg(feature = "lang-commonlisp")]
137 register(&crate::commonlisp::CommonLisp);
138 #[cfg(feature = "lang-elisp")]
139 register(&crate::elisp::Elisp);
140 #[cfg(feature = "lang-gleam")]
141 register(&crate::gleam::Gleam);
142 #[cfg(feature = "lang-scheme")]
143 register(&crate::scheme::Scheme);
144 #[cfg(feature = "lang-ini")]
145 register(&crate::ini::Ini);
146 #[cfg(feature = "lang-diff")]
147 register(&crate::diff::Diff);
148 #[cfg(feature = "lang-dot")]
149 register(&crate::dot::Dot);
150 #[cfg(feature = "lang-kdl")]
151 register(&crate::kdl::Kdl);
152 #[cfg(feature = "lang-ada")]
153 register(&crate::ada::Ada);
154 #[cfg(feature = "lang-agda")]
155 register(&crate::agda::Agda);
156 #[cfg(feature = "lang-d")]
157 register(&crate::d::D);
158 #[cfg(feature = "lang-matlab")]
159 register(&crate::matlab::Matlab);
160 #[cfg(feature = "lang-meson")]
161 register(&crate::meson::Meson);
162 #[cfg(feature = "lang-nginx")]
163 register(&crate::nginx::Nginx);
164 #[cfg(feature = "lang-prolog")]
165 register(&crate::prolog::Prolog);
166 #[cfg(feature = "lang-batch")]
167 register(&crate::batch::Batch);
168 #[cfg(feature = "lang-asm")]
169 register(&crate::asm::Asm);
170 #[cfg(feature = "lang-objc")]
171 register(&crate::objc::ObjC);
172 #[cfg(feature = "lang-typst")]
173 register(&crate::typst::Typst);
174 #[cfg(feature = "lang-asciidoc")]
175 register(&crate::asciidoc::AsciiDoc);
176 #[cfg(feature = "lang-vb")]
177 register(&crate::vb::VB);
178 #[cfg(feature = "lang-idris")]
179 register(&crate::idris::Idris);
180 #[cfg(feature = "lang-rescript")]
181 register(&crate::rescript::ReScript);
182 #[cfg(feature = "lang-lean")]
183 register(&crate::lean::Lean);
184 #[cfg(feature = "lang-caddy")]
185 register(&crate::caddy::Caddy);
186 #[cfg(feature = "lang-capnp")]
187 register(&crate::capnp::Capnp);
188 #[cfg(feature = "lang-devicetree")]
189 register(&crate::devicetree::DeviceTree);
190 #[cfg(feature = "lang-jinja2")]
191 register(&crate::jinja2::Jinja2);
192 #[cfg(feature = "lang-ninja")]
193 register(&crate::ninja::Ninja);
194 #[cfg(feature = "lang-postscript")]
195 register(&crate::postscript::PostScript);
196 #[cfg(feature = "lang-query")]
197 register(&crate::query::Query);
198 #[cfg(feature = "lang-ron")]
199 register(&crate::ron::Ron);
200 #[cfg(feature = "lang-sparql")]
201 register(&crate::sparql::Sparql);
202 #[cfg(feature = "lang-sshconfig")]
203 register(&crate::sshconfig::SshConfig);
204 #[cfg(feature = "lang-starlark")]
205 register(&crate::starlark::Starlark);
206 #[cfg(feature = "lang-textproto")]
207 register(&crate::textproto::TextProto);
208 #[cfg(feature = "lang-thrift")]
209 register(&crate::thrift::Thrift);
210 #[cfg(feature = "lang-tlaplus")]
211 register(&crate::tlaplus::TlaPlus);
212 #[cfg(feature = "lang-uiua")]
213 register(&crate::uiua::Uiua);
214 #[cfg(feature = "lang-verilog")]
215 register(&crate::verilog::Verilog);
216 #[cfg(feature = "lang-vhdl")]
217 register(&crate::vhdl::Vhdl);
218 #[cfg(feature = "lang-wit")]
219 register(&crate::wit::Wit);
220 #[cfg(feature = "lang-x86asm")]
221 register(&crate::x86asm::X86Asm);
222 #[cfg(feature = "lang-yuri")]
223 register(&crate::yuri::Yuri);
224 });
225}
226
227fn extension_map() -> &'static HashMap<&'static str, &'static dyn Language> {
228 init_builtin();
229 EXTENSION_MAP.get_or_init(|| {
230 let mut map = HashMap::new();
231 let langs = LANGUAGES.read().unwrap();
232 for lang in langs.iter() {
233 for ext in lang.extensions() {
234 map.insert(*ext, *lang);
235 }
236 }
237 map
238 })
239}
240
241fn grammar_map() -> &'static HashMap<&'static str, &'static dyn Language> {
242 init_builtin();
243 GRAMMAR_MAP.get_or_init(|| {
244 let mut map = HashMap::new();
245 let langs = LANGUAGES.read().unwrap();
246 for lang in langs.iter() {
247 map.insert(lang.grammar_name(), *lang);
248 }
249 map
250 })
251}
252
253pub fn support_for_extension(ext: &str) -> Option<&'static dyn Language> {
257 extension_map()
258 .get(ext)
259 .or_else(|| extension_map().get(ext.to_lowercase().as_str()))
260 .copied()
261}
262
263pub fn support_for_grammar(grammar: &str) -> Option<&'static dyn Language> {
267 grammar_map().get(grammar).copied()
268}
269
270pub fn support_for_path(path: &Path) -> Option<&'static dyn Language> {
275 path.extension()
276 .and_then(|e| e.to_str())
277 .and_then(support_for_extension)
278}
279
280pub fn supported_languages() -> Vec<&'static dyn Language> {
282 init_builtin();
283 LANGUAGES.read().unwrap().clone()
284}
285
286#[cfg(test)]
287mod tests {
288 use super::*;
289 use crate::GrammarLoader;
290
291 #[test]
294 #[ignore]
295 fn dump_node_kinds() {
296 let loader = GrammarLoader::new();
297 let grammar_name = std::env::var("DUMP_GRAMMAR").unwrap_or_else(|_| "python".to_string());
299
300 let ts_lang = loader.get(&grammar_name).expect("grammar not found");
301
302 println!("\n=== Valid node kinds for '{}' ===\n", grammar_name);
303 let count = ts_lang.node_kind_count();
304 for id in 0..count as u16 {
305 if let Some(kind) = ts_lang.node_kind_for_id(id) {
306 let named = ts_lang.node_kind_is_named(id);
307 if named && !kind.starts_with('_') {
308 println!("{}", kind);
309 }
310 }
311 }
312 }
313
314 #[test]
317 fn validate_node_kinds() {
318 let loader = GrammarLoader::new();
319 let mut errors: Vec<String> = Vec::new();
320
321 for lang in supported_languages() {
322 let grammar_name = lang.grammar_name();
323 let ts_lang = match loader.get(grammar_name) {
324 Some(l) => l,
325 None => {
326 continue;
328 }
329 };
330
331 let all_kinds: Vec<(&str, &[&str])> = vec![
333 ("container_kinds", lang.container_kinds()),
334 ("function_kinds", lang.function_kinds()),
335 ("type_kinds", lang.type_kinds()),
336 ("import_kinds", lang.import_kinds()),
337 ("public_symbol_kinds", lang.public_symbol_kinds()),
338 ("scope_creating_kinds", lang.scope_creating_kinds()),
339 ("control_flow_kinds", lang.control_flow_kinds()),
340 ("complexity_nodes", lang.complexity_nodes()),
341 ("nesting_nodes", lang.nesting_nodes()),
342 ];
343
344 for (method, kinds) in all_kinds {
345 for kind in kinds {
346 let id = ts_lang.id_for_node_kind(kind, true);
348 if id == 0 {
349 let unnamed_id = ts_lang.id_for_node_kind(kind, false);
351 if unnamed_id == 0 {
352 errors.push(format!(
353 "{}: {}() contains invalid node kind '{}'",
354 lang.name(),
355 method,
356 kind
357 ));
358 }
359 }
360 }
361 }
362 }
363
364 if !errors.is_empty() {
365 panic!(
366 "Found {} invalid node kinds:\n{}",
367 errors.len(),
368 errors.join("\n")
369 );
370 }
371 }
372
373 #[test]
377 #[ignore]
378 fn cross_check_node_kinds() {
379 use std::collections::HashSet;
380
381 let loader = GrammarLoader::new();
382
383 let interesting_patterns = [
385 "statement",
386 "expression",
387 "definition",
388 "declaration",
389 "clause",
390 "block",
391 "body",
392 "import",
393 "export",
394 "function",
395 "method",
396 "class",
397 "struct",
398 "enum",
399 "interface",
400 "trait",
401 "module",
402 "type",
403 "return",
404 "if",
405 "else",
406 "for",
407 "while",
408 "loop",
409 "match",
410 "case",
411 "try",
412 "catch",
413 "except",
414 "throw",
415 "raise",
416 "with",
417 "async",
418 "await",
419 "yield",
420 "lambda",
421 "comprehension",
422 "generator",
423 "operator",
424 ];
425
426 for lang in supported_languages() {
427 let grammar_name = lang.grammar_name();
428 let ts_lang = match loader.get(grammar_name) {
429 Some(l) => l,
430 None => continue,
431 };
432
433 let mut used_kinds: HashSet<&str> = HashSet::new();
435 for kind in lang.container_kinds() {
436 used_kinds.insert(kind);
437 }
438 for kind in lang.function_kinds() {
439 used_kinds.insert(kind);
440 }
441 for kind in lang.type_kinds() {
442 used_kinds.insert(kind);
443 }
444 for kind in lang.import_kinds() {
445 used_kinds.insert(kind);
446 }
447 for kind in lang.public_symbol_kinds() {
448 used_kinds.insert(kind);
449 }
450 for kind in lang.scope_creating_kinds() {
451 used_kinds.insert(kind);
452 }
453 for kind in lang.control_flow_kinds() {
454 used_kinds.insert(kind);
455 }
456 for kind in lang.complexity_nodes() {
457 used_kinds.insert(kind);
458 }
459 for kind in lang.nesting_nodes() {
460 used_kinds.insert(kind);
461 }
462
463 let mut all_kinds: Vec<&str> = Vec::new();
465 let count = ts_lang.node_kind_count();
466 for id in 0..count as u16 {
467 if let Some(kind) = ts_lang.node_kind_for_id(id) {
468 let named = ts_lang.node_kind_is_named(id);
469 if named && !kind.starts_with('_') {
470 all_kinds.push(kind);
471 }
472 }
473 }
474
475 let mut unused_interesting: Vec<&str> = all_kinds
477 .into_iter()
478 .filter(|kind| !used_kinds.contains(*kind))
479 .filter(|kind| {
480 let lower = kind.to_lowercase();
481 interesting_patterns.iter().any(|p| lower.contains(p))
482 })
483 .collect();
484
485 unused_interesting.sort();
486
487 if !unused_interesting.is_empty() {
488 println!(
489 "\n=== {} ({}) - {} potentially useful unused kinds ===",
490 lang.name(),
491 grammar_name,
492 unused_interesting.len()
493 );
494 for kind in &unused_interesting {
495 println!(" {}", kind);
496 }
497 }
498 }
499 }
500}
501
502pub fn validate_unused_kinds_audit(
510 lang: &dyn Language,
511 documented_unused: &[&str],
512) -> Result<(), String> {
513 use crate::GrammarLoader;
514 use std::collections::HashSet;
515
516 let loader = GrammarLoader::new();
517 let ts_lang = loader
518 .get(lang.grammar_name())
519 .ok_or_else(|| format!("Grammar '{}' not found", lang.grammar_name()))?;
520
521 let interesting_patterns = [
523 "statement",
524 "expression",
525 "definition",
526 "declaration",
527 "clause",
528 "block",
529 "body",
530 "import",
531 "export",
532 "function",
533 "method",
534 "class",
535 "struct",
536 "enum",
537 "interface",
538 "trait",
539 "module",
540 "type",
541 "return",
542 "if",
543 "else",
544 "for",
545 "while",
546 "loop",
547 "match",
548 "case",
549 "try",
550 "catch",
551 "except",
552 "throw",
553 "raise",
554 "with",
555 "async",
556 "await",
557 "yield",
558 "lambda",
559 "comprehension",
560 "generator",
561 "operator",
562 ];
563
564 let mut used_kinds: HashSet<&str> = HashSet::new();
566 for kind in lang.container_kinds() {
567 used_kinds.insert(kind);
568 }
569 for kind in lang.function_kinds() {
570 used_kinds.insert(kind);
571 }
572 for kind in lang.type_kinds() {
573 used_kinds.insert(kind);
574 }
575 for kind in lang.import_kinds() {
576 used_kinds.insert(kind);
577 }
578 for kind in lang.public_symbol_kinds() {
579 used_kinds.insert(kind);
580 }
581 for kind in lang.scope_creating_kinds() {
582 used_kinds.insert(kind);
583 }
584 for kind in lang.control_flow_kinds() {
585 used_kinds.insert(kind);
586 }
587 for kind in lang.complexity_nodes() {
588 used_kinds.insert(kind);
589 }
590 for kind in lang.nesting_nodes() {
591 used_kinds.insert(kind);
592 }
593
594 let documented_set: HashSet<&str> = documented_unused.iter().copied().collect();
595
596 let mut grammar_kinds: HashSet<&str> = HashSet::new();
598 let count = ts_lang.node_kind_count();
599 for id in 0..count as u16 {
600 if let Some(kind) = ts_lang.node_kind_for_id(id) {
601 let named = ts_lang.node_kind_is_named(id);
602 if named && !kind.starts_with('_') {
603 grammar_kinds.insert(kind);
604 }
605 }
606 }
607
608 let mut errors: Vec<String> = Vec::new();
609
610 for kind in documented_unused {
612 if !grammar_kinds.contains(*kind) {
613 errors.push(format!(
614 "Documented kind '{}' doesn't exist in grammar",
615 kind
616 ));
617 }
618 if used_kinds.contains(*kind) {
620 errors.push(format!(
621 "Documented kind '{}' is actually used in trait methods",
622 kind
623 ));
624 }
625 }
626
627 for kind in &grammar_kinds {
629 let lower = kind.to_lowercase();
630 let is_interesting = interesting_patterns.iter().any(|p| lower.contains(p));
631
632 if is_interesting && !used_kinds.contains(*kind) && !documented_set.contains(*kind) {
633 errors.push(format!(
634 "Potentially useful kind '{}' is neither used nor documented",
635 kind
636 ));
637 }
638 }
639
640 if errors.is_empty() {
641 Ok(())
642 } else {
643 Err(format!(
644 "{} validation errors:\n - {}",
645 errors.len(),
646 errors.join("\n - ")
647 ))
648 }
649}