use std::path::Path;
use std::sync::Arc;
use panproto_schema::{AbstractSchema, DecoratedSchema, Schema};
use rustc_hash::FxHashMap;
use crate::error::ParseError;
use crate::layout_policy::LayoutPolicy;
use crate::theory_extract::ExtractedTheoryMeta;
pub trait AstParser: Send + Sync {
fn protocol_name(&self) -> &str;
fn parse(&self, source: &[u8], file_path: &str) -> Result<Schema, ParseError>;
fn emit(&self, schema: &Schema) -> Result<Vec<u8>, ParseError>;
fn supported_extensions(&self) -> &[&str];
fn theory_meta(&self) -> &ExtractedTheoryMeta;
fn emit_pretty(&self, schema: &Schema) -> Result<Vec<u8>, ParseError> {
self.emit_pretty_with_policy(schema, &crate::emit_pretty::FormatPolicy::default())
}
fn emit_pretty_with_policy(
&self,
schema: &Schema,
policy: &crate::emit_pretty::FormatPolicy,
) -> Result<Vec<u8>, ParseError> {
let _ = (schema, policy);
Err(ParseError::EmitFailed {
protocol: self.protocol_name().to_owned(),
reason: format!(
"emit_pretty_with_policy not implemented for protocol '{}'",
self.protocol_name()
),
})
}
}
pub struct ParserRegistry {
parsers: FxHashMap<String, Arc<dyn AstParser>>,
extension_map: FxHashMap<String, String>,
}
impl ParserRegistry {
#[must_use]
pub fn new() -> Self {
let mut registry = Self {
parsers: FxHashMap::default(),
extension_map: FxHashMap::default(),
};
#[cfg(feature = "grammars")]
for grammar in panproto_grammars::grammars() {
let config = crate::languages::walker_configs::walker_config_for(grammar.name);
match crate::languages::common::LanguageParser::from_language_with_grammar_json(
grammar.name,
grammar.extensions.to_vec(),
grammar.language,
grammar.node_types,
grammar.tags_query,
config,
grammar.grammar_json,
) {
Ok(p) => registry.register(Box::new(p)),
Err(err) => {
let _ = err;
#[cfg(debug_assertions)]
eprintln!(
"warning: grammar '{}' theory extraction failed: {err}",
grammar.name
);
}
}
}
registry
}
pub fn register(&mut self, parser: Box<dyn AstParser>) {
let name = parser.protocol_name().to_owned();
for ext in parser.supported_extensions() {
self.extension_map.insert((*ext).to_owned(), name.clone());
}
let arc: Arc<dyn AstParser> = Arc::from(parser);
crate::decorate::register_layout_enricher(Arc::clone(&arc));
self.parsers.insert(name, arc);
}
pub fn register_external_grammar(
&mut self,
name: &'static str,
extensions: Vec<&'static str>,
language: tree_sitter::Language,
node_types_json: &'static [u8],
tags_query: Option<&'static str>,
grammar_json: Option<&'static [u8]>,
) -> Result<(), crate::error::ParseError> {
let config = crate::languages::walker_configs::walker_config_for(name);
let parser = crate::languages::common::LanguageParser::from_language_with_grammar_json(
name,
extensions,
language,
node_types_json,
tags_query,
config,
grammar_json,
)?;
self.register(Box::new(parser));
Ok(())
}
pub fn register_external_grammar_owned(
&mut self,
name: String,
extensions: Vec<String>,
language: tree_sitter::Language,
node_types_json: Vec<u8>,
tags_query: Option<String>,
grammar_json: Option<Vec<u8>>,
) -> Result<(), crate::error::ParseError> {
let name_static: &'static str = Box::leak(name.into_boxed_str());
let extensions_static: Vec<&'static str> = extensions
.into_iter()
.map(|s| Box::leak(s.into_boxed_str()) as &'static str)
.collect();
let node_types_static: &'static [u8] = Box::leak(node_types_json.into_boxed_slice());
let tags_query_static: Option<&'static str> =
tags_query.map(|s| Box::leak(s.into_boxed_str()) as &'static str);
let grammar_json_static: Option<&'static [u8]> =
grammar_json.map(|v| Box::leak(v.into_boxed_slice()) as &'static [u8]);
self.register_external_grammar(
name_static,
extensions_static,
language,
node_types_static,
tags_query_static,
grammar_json_static,
)
}
pub fn unregister(&mut self, name: &str) -> bool {
let removed = self.parsers.remove(name).is_some();
if removed {
self.extension_map.retain(|_, v| v != name);
}
removed
}
pub fn override_grammar(
&mut self,
name: String,
extensions: Vec<String>,
language: tree_sitter::Language,
node_types_json: Vec<u8>,
tags_query: Option<String>,
grammar_json: Option<Vec<u8>>,
) -> Result<(), crate::error::ParseError> {
self.unregister(&name);
self.register_external_grammar_owned(
name,
extensions,
language,
node_types_json,
tags_query,
grammar_json,
)
}
#[must_use]
pub fn detect_language(&self, path: &Path) -> Option<&str> {
path.extension()
.and_then(|ext| ext.to_str())
.and_then(|ext| self.extension_map.get(ext))
.map(String::as_str)
}
pub fn parse_file(&self, path: &Path, content: &[u8]) -> Result<Schema, ParseError> {
let protocol = self
.detect_language(path)
.ok_or_else(|| ParseError::UnknownLanguage {
extension: path
.extension()
.and_then(|e| e.to_str())
.unwrap_or("")
.to_owned(),
})?;
self.parse_with_protocol(protocol, content, &path.display().to_string())
}
pub fn parse_with_protocol(
&self,
protocol: &str,
content: &[u8],
file_path: &str,
) -> Result<Schema, ParseError> {
let parser = self
.parsers
.get(protocol)
.ok_or_else(|| ParseError::UnknownLanguage {
extension: protocol.to_owned(),
})?;
parser.parse(content, file_path)
}
pub fn emit_with_protocol(
&self,
protocol: &str,
schema: &Schema,
) -> Result<Vec<u8>, ParseError> {
let parser = self
.parsers
.get(protocol)
.ok_or_else(|| ParseError::UnknownLanguage {
extension: protocol.to_owned(),
})?;
parser.emit(schema)
}
pub fn emit_pretty_with_protocol(
&self,
protocol: &str,
schema: &Schema,
) -> Result<Vec<u8>, ParseError> {
let parser = self
.parsers
.get(protocol)
.ok_or_else(|| ParseError::UnknownLanguage {
extension: protocol.to_owned(),
})?;
parser.emit_pretty(schema)
}
#[must_use]
pub fn emit_verification_status(&self, protocol: &str) -> EmitVerificationStatus {
if !self.parsers.contains_key(protocol) {
return EmitVerificationStatus::Unsupported;
}
if VERIFIED_EMIT_PROTOCOLS.binary_search(&protocol).is_ok() {
EmitVerificationStatus::Verified
} else {
EmitVerificationStatus::Generic
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum EmitVerificationStatus {
Verified,
Generic,
Unsupported,
}
const VERIFIED_EMIT_PROTOCOLS: &[&str] = &[
"abc",
"actionscript",
"ada",
"agda",
"al",
"angular",
"apex",
"arduino",
"asciidoc",
"asm",
"astro",
"awk",
"bash",
"bass",
"batch",
"beancount",
"bibtex",
"bicep",
"bitbake",
"blade",
"brightscript",
"bsl",
"bugs",
"c",
"caddy",
"cairo",
"capnp",
"cedar",
"cedarschema",
"chatito",
"chuck",
"circom",
"clarity",
"clojure",
"cmake",
"cobol",
"commonlisp",
"cooklang",
"corn",
"cpon",
"cpp",
"crystal",
"csharp",
"csound",
"css",
"csv",
"cuda",
"cue",
"cylc",
"d",
"dart",
"desktop",
"devicetree",
"diff",
"djot",
"dockerfile",
"dot",
"doxygen",
"dtd",
"earthfile",
"ebnf",
"editorconfig",
"eds",
"eex",
"elisp",
"elixir",
"elm",
"elsa",
"embedded_template",
"enforce",
"erlang",
"facility",
"faust",
"fennel",
"fidl",
"firrtl",
"fish",
"foam",
"forth",
"fortran",
"fsharp",
"fsharp_signature",
"func",
"gdscript",
"git_config",
"git_rebase",
"gitattributes",
"gitcommit",
"gitignore",
"gleam",
"glicol",
"glsl",
"gn",
"go",
"godot_resource",
"gomod",
"gosum",
"graphql",
"groovy",
"gstlaunch",
"hack",
"hare",
"haskell",
"haxe",
"hcl",
"heex",
"hlsl",
"html",
"http",
"hurl",
"hyprlang",
"idris",
"ini",
"ispc",
"jags",
"janet",
"java",
"javascript",
"jinja2",
"jq",
"jsdoc",
"json",
"jsonnet",
"julia",
"just",
"kconfig",
"kdl",
"kotlin",
"latex",
"lean",
"ledger",
"lilypond",
"linkerscript",
"liquid",
"llvm",
"lua",
"luadoc",
"luap",
"luau",
"magik",
"make",
"markdown",
"markdown_inline",
"matlab",
"mermaid",
"meson",
"mojo",
"netlinx",
"nginx",
"nickel",
"nim",
"ninja",
"nix",
"norg",
"nqc",
"nushell",
"objc",
"ocaml",
"ocaml_interface",
"odin",
"org",
"pascal",
"pem",
"perl",
"pgn",
"php",
"pkl",
"po",
"pony",
"postscript",
"powershell",
"printf",
"prisma",
"prolog",
"promql",
"properties",
"protobuf",
"psv",
"pug",
"puppet",
"purescript",
"pymanifest",
"python",
"ql",
"qml",
"qmldir",
"query",
"qvr",
"r",
"racket",
"re2c",
"readline",
"regex",
"rego",
"requirements",
"rescript",
"robot",
"ron",
"rst",
"ruby",
"rust",
"scala",
"scheme",
"scss",
"smali",
"smithy",
"solidity",
"sparql",
"sql",
"squirrel",
"ssh_config",
"stan",
"stanfunctions",
"starlark",
"strudel_mini",
"supercollider",
"svelte",
"swift",
"tablegen",
"tcl",
"teal",
"templ",
"terraform",
"textproto",
"thrift",
"tidal_mini",
"tlaplus",
"tmux",
"toml",
"tsv",
"tsx",
"turtle",
"twig",
"typescript",
"typst",
"udev",
"ungrammar",
"uxntal",
"v",
"vb",
"verilog",
"vhdl",
"vim",
"vimdoc",
"vue",
"wast",
"wat",
"wgsl",
"wit",
"xcompose",
"xml",
"yaml",
"yuck",
"zig",
"zsh",
];
impl ParserRegistry {
pub fn decorate(
&self,
protocol: &str,
abstract_schema: &AbstractSchema,
policy: &LayoutPolicy,
) -> Result<DecoratedSchema, ParseError> {
let parser = self
.parsers
.get(protocol)
.ok_or_else(|| ParseError::UnknownLanguage {
extension: protocol.to_owned(),
})?;
crate::decorate::decorate_with_parser(parser.as_ref(), abstract_schema, policy)
}
pub fn pretty_with_protocol(
&self,
protocol: &str,
abstract_schema: &AbstractSchema,
policy: &LayoutPolicy,
) -> Result<Vec<u8>, ParseError> {
let parser = self
.parsers
.get(protocol)
.ok_or_else(|| ParseError::UnknownLanguage {
extension: protocol.to_owned(),
})?;
check_protocol_match(
protocol,
abstract_schema.as_schema(),
"pretty_with_protocol",
)?;
parser.emit_pretty_with_policy(abstract_schema.as_schema(), policy)
}
pub fn parse_emit_protolens(
&self,
protocol: &str,
policy: &LayoutPolicy,
) -> Result<panproto_lens::Protolens, ParseError> {
if !self.parsers.contains_key(protocol) {
return Err(ParseError::UnknownLanguage {
extension: protocol.to_owned(),
});
}
Ok(crate::parse_emit_protolens::parse_emit_protolens(
protocol, policy,
))
}
#[must_use]
pub fn theory_meta(&self, protocol: &str) -> Option<&ExtractedTheoryMeta> {
self.parsers.get(protocol).map(|p| p.theory_meta())
}
pub fn protocol_names(&self) -> impl Iterator<Item = &str> {
self.parsers.keys().map(String::as_str)
}
#[must_use]
pub fn has_parser(&self, protocol: &str) -> bool {
self.parsers.contains_key(protocol)
}
#[must_use]
pub fn len(&self) -> usize {
self.parsers.len()
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.parsers.is_empty()
}
}
impl Default for ParserRegistry {
fn default() -> Self {
Self::new()
}
}
fn check_protocol_match(
expected: &str,
schema: &Schema,
operation: &'static str,
) -> Result<(), ParseError> {
if schema.protocol == expected {
Ok(())
} else {
Err(ParseError::SchemaConstruction {
reason: format!(
"{operation}: protocol mismatch — registry called with '{expected}' but \
schema carries protocol '{}'",
schema.protocol,
),
})
}
}