panproto_parse/registry.rs
1//! Parser registry mapping protocol names to full-AST parser implementations.
2
3use std::path::Path;
4
5use panproto_schema::Schema;
6use rustc_hash::FxHashMap;
7
8use crate::error::ParseError;
9use crate::theory_extract::ExtractedTheoryMeta;
10
11/// A full-AST parser and emitter for a specific programming language.
12///
13/// Each implementation wraps a tree-sitter grammar and its auto-derived theory,
14/// providing parse (source → Schema) and emit (Schema → source) operations.
15pub trait AstParser: Send + Sync {
16 /// The panproto protocol name (e.g. `"typescript"`, `"python"`).
17 fn protocol_name(&self) -> &str;
18
19 /// Parse source code into a full-AST [`Schema`].
20 ///
21 /// # Errors
22 ///
23 /// Returns [`ParseError`] if tree-sitter parsing fails or schema construction fails.
24 fn parse(&self, source: &[u8], file_path: &str) -> Result<Schema, ParseError>;
25
26 /// Emit a [`Schema`] back to source code bytes.
27 ///
28 /// The emitter walks the schema graph top-down, using formatting constraints
29 /// (comment, indent, blank-lines-before) to reproduce the original formatting.
30 ///
31 /// # Errors
32 ///
33 /// Returns [`ParseError::EmitFailed`] if emission fails.
34 fn emit(&self, schema: &Schema) -> Result<Vec<u8>, ParseError>;
35
36 /// File extensions this parser handles (e.g. `["ts", "tsx"]`).
37 fn supported_extensions(&self) -> &[&str];
38
39 /// The auto-derived theory metadata for this language.
40 fn theory_meta(&self) -> &ExtractedTheoryMeta;
41}
42
43/// Registry of all full-AST parsers, keyed by protocol name.
44///
45/// Provides language detection by file extension and dispatches parse/emit
46/// operations to the appropriate language parser.
47pub struct ParserRegistry {
48 /// Parsers keyed by protocol name.
49 parsers: FxHashMap<String, Box<dyn AstParser>>,
50 /// Extension → protocol name mapping.
51 extension_map: FxHashMap<String, String>,
52}
53
54impl ParserRegistry {
55 /// Create a new registry populated with all enabled language parsers.
56 ///
57 /// With the `grammars` feature (default), this populates the registry from
58 /// `panproto-grammars`, which provides up to 248 tree-sitter languages.
59 /// Without the `grammars` feature, this returns an empty registry; call
60 /// [`register`](Self::register) to add parsers manually using individual
61 /// grammar crates.
62 #[must_use]
63 pub fn new() -> Self {
64 let mut registry = Self {
65 parsers: FxHashMap::default(),
66 extension_map: FxHashMap::default(),
67 };
68
69 #[cfg(feature = "grammars")]
70 for grammar in panproto_grammars::grammars() {
71 let config = crate::languages::walker_configs::walker_config_for(grammar.name);
72 match crate::languages::common::LanguageParser::from_language(
73 grammar.name,
74 grammar.extensions.to_vec(),
75 grammar.language,
76 grammar.node_types,
77 config,
78 ) {
79 Ok(p) => registry.register(Box::new(p)),
80 Err(err) => {
81 #[cfg(debug_assertions)]
82 eprintln!(
83 "warning: grammar '{}' theory extraction failed: {err}",
84 grammar.name
85 );
86 }
87 }
88 }
89
90 registry
91 }
92
93 /// Register a parser implementation.
94 pub fn register(&mut self, parser: Box<dyn AstParser>) {
95 let name = parser.protocol_name().to_owned();
96 for ext in parser.supported_extensions() {
97 self.extension_map.insert((*ext).to_owned(), name.clone());
98 }
99 self.parsers.insert(name, parser);
100 }
101
102 /// Detect the language protocol for a file path by its extension.
103 ///
104 /// Returns `None` if the extension is not recognized (caller should
105 /// fall back to the `raw_file` protocol).
106 #[must_use]
107 pub fn detect_language(&self, path: &Path) -> Option<&str> {
108 path.extension()
109 .and_then(|ext| ext.to_str())
110 .and_then(|ext| self.extension_map.get(ext))
111 .map(String::as_str)
112 }
113
114 /// Parse a file by detecting its language from the file path.
115 ///
116 /// # Errors
117 ///
118 /// Returns [`ParseError::UnknownLanguage`] if the file extension is not recognized.
119 /// Returns other [`ParseError`] variants if parsing fails.
120 pub fn parse_file(&self, path: &Path, content: &[u8]) -> Result<Schema, ParseError> {
121 let protocol = self
122 .detect_language(path)
123 .ok_or_else(|| ParseError::UnknownLanguage {
124 extension: path
125 .extension()
126 .and_then(|e| e.to_str())
127 .unwrap_or("")
128 .to_owned(),
129 })?;
130
131 self.parse_with_protocol(protocol, content, &path.display().to_string())
132 }
133
134 /// Parse source code with a specific protocol name.
135 ///
136 /// # Errors
137 ///
138 /// Returns [`ParseError::UnknownLanguage`] if the protocol is not registered.
139 pub fn parse_with_protocol(
140 &self,
141 protocol: &str,
142 content: &[u8],
143 file_path: &str,
144 ) -> Result<Schema, ParseError> {
145 let parser = self
146 .parsers
147 .get(protocol)
148 .ok_or_else(|| ParseError::UnknownLanguage {
149 extension: protocol.to_owned(),
150 })?;
151
152 parser.parse(content, file_path)
153 }
154
155 /// Emit a schema back to source code bytes using the specified protocol.
156 ///
157 /// # Errors
158 ///
159 /// Returns [`ParseError::UnknownLanguage`] if the protocol is not registered.
160 pub fn emit_with_protocol(
161 &self,
162 protocol: &str,
163 schema: &Schema,
164 ) -> Result<Vec<u8>, ParseError> {
165 let parser = self
166 .parsers
167 .get(protocol)
168 .ok_or_else(|| ParseError::UnknownLanguage {
169 extension: protocol.to_owned(),
170 })?;
171
172 parser.emit(schema)
173 }
174
175 /// Get the theory metadata for a specific protocol.
176 #[must_use]
177 pub fn theory_meta(&self, protocol: &str) -> Option<&ExtractedTheoryMeta> {
178 self.parsers.get(protocol).map(|p| p.theory_meta())
179 }
180
181 /// List all registered protocol names.
182 pub fn protocol_names(&self) -> impl Iterator<Item = &str> {
183 self.parsers.keys().map(String::as_str)
184 }
185
186 /// Get the number of registered parsers.
187 #[must_use]
188 pub fn len(&self) -> usize {
189 self.parsers.len()
190 }
191
192 /// Check if the registry is empty.
193 #[must_use]
194 pub fn is_empty(&self) -> bool {
195 self.parsers.is_empty()
196 }
197}
198
199impl Default for ParserRegistry {
200 fn default() -> Self {
201 Self::new()
202 }
203}