panproto_parse/registry.rs
1//! Parser registry mapping protocol names to full-AST parser implementations.
2
3use std::path::Path;
4
5use panproto_schema::Schema;
6use rustc_hash::FxHashMap;
7
8use crate::error::ParseError;
9use crate::theory_extract::ExtractedTheoryMeta;
10
11/// A full-AST parser and emitter for a specific programming language.
12///
13/// Each implementation wraps a tree-sitter grammar and its auto-derived theory,
14/// providing parse (source → Schema) and emit (Schema → source) operations.
15pub trait AstParser: Send + Sync {
16 /// The panproto protocol name (e.g. `"typescript"`, `"python"`).
17 fn protocol_name(&self) -> &str;
18
19 /// Parse source code into a full-AST [`Schema`].
20 ///
21 /// # Errors
22 ///
23 /// Returns [`ParseError`] if tree-sitter parsing fails or schema construction fails.
24 fn parse(&self, source: &[u8], file_path: &str) -> Result<Schema, ParseError>;
25
26 /// Emit a [`Schema`] back to source code bytes.
27 ///
28 /// The emitter walks the schema graph top-down, using formatting constraints
29 /// (comment, indent, blank-lines-before) to reproduce the original formatting.
30 ///
31 /// # Errors
32 ///
33 /// Returns [`ParseError::EmitFailed`] if emission fails.
34 fn emit(&self, schema: &Schema) -> Result<Vec<u8>, ParseError>;
35
36 /// File extensions this parser handles (e.g. `["ts", "tsx"]`).
37 fn supported_extensions(&self) -> &[&str];
38
39 /// The auto-derived theory metadata for this language.
40 fn theory_meta(&self) -> &ExtractedTheoryMeta;
41
42 /// Render a by-construction [`Schema`] (one with no parse-recovered
43 /// byte positions or interstitials) to source bytes.
44 ///
45 /// Unlike [`emit`](Self::emit), which reconstructs source from
46 /// byte-position fragments stored on the schema during `parse`,
47 /// `emit_pretty` walks tree-sitter `grammar.json` production rules
48 /// to render schemas built from scratch via `SchemaBuilder`.
49 ///
50 /// # Errors
51 ///
52 /// Returns [`ParseError::EmitFailed`] when the language has no
53 /// vendored `grammar.json`, when a vertex's kind is not a grammar
54 /// rule, or when a required field has no corresponding schema edge.
55 fn emit_pretty(&self, schema: &Schema) -> Result<Vec<u8>, ParseError> {
56 let _ = schema;
57 Err(ParseError::EmitFailed {
58 protocol: self.protocol_name().to_owned(),
59 reason: format!(
60 "emit_pretty not implemented for protocol '{}'",
61 self.protocol_name()
62 ),
63 })
64 }
65}
66
67/// Registry of all full-AST parsers, keyed by protocol name.
68///
69/// Provides language detection by file extension and dispatches parse/emit
70/// operations to the appropriate language parser.
71pub struct ParserRegistry {
72 /// Parsers keyed by protocol name.
73 parsers: FxHashMap<String, Box<dyn AstParser>>,
74 /// Extension → protocol name mapping.
75 extension_map: FxHashMap<String, String>,
76}
77
78impl ParserRegistry {
79 /// Create a new registry populated with all enabled language parsers.
80 ///
81 /// With the `grammars` feature (default), this populates the registry from
82 /// `panproto-grammars`, which provides up to 248 tree-sitter languages.
83 /// Without the `grammars` feature, this returns an empty registry; call
84 /// [`register`](Self::register) to add parsers manually using individual
85 /// grammar crates.
86 #[must_use]
87 pub fn new() -> Self {
88 let mut registry = Self {
89 parsers: FxHashMap::default(),
90 extension_map: FxHashMap::default(),
91 };
92
93 #[cfg(feature = "grammars")]
94 for grammar in panproto_grammars::grammars() {
95 let config = crate::languages::walker_configs::walker_config_for(grammar.name);
96 match crate::languages::common::LanguageParser::from_language_with_grammar_json(
97 grammar.name,
98 grammar.extensions.to_vec(),
99 grammar.language,
100 grammar.node_types,
101 grammar.tags_query,
102 config,
103 grammar.grammar_json,
104 ) {
105 Ok(p) => registry.register(Box::new(p)),
106 Err(err) => {
107 let _ = err;
108 #[cfg(debug_assertions)]
109 eprintln!(
110 "warning: grammar '{}' theory extraction failed: {err}",
111 grammar.name
112 );
113 }
114 }
115 }
116
117 registry
118 }
119
120 /// Register a parser implementation.
121 pub fn register(&mut self, parser: Box<dyn AstParser>) {
122 let name = parser.protocol_name().to_owned();
123 for ext in parser.supported_extensions() {
124 self.extension_map.insert((*ext).to_owned(), name.clone());
125 }
126 self.parsers.insert(name, parser);
127 }
128
129 /// Detect the language protocol for a file path by its extension.
130 ///
131 /// Returns `None` if the extension is not recognized (caller should
132 /// fall back to the `raw_file` protocol).
133 #[must_use]
134 pub fn detect_language(&self, path: &Path) -> Option<&str> {
135 path.extension()
136 .and_then(|ext| ext.to_str())
137 .and_then(|ext| self.extension_map.get(ext))
138 .map(String::as_str)
139 }
140
141 /// Parse a file by detecting its language from the file path.
142 ///
143 /// # Errors
144 ///
145 /// Returns [`ParseError::UnknownLanguage`] if the file extension is not recognized.
146 /// Returns other [`ParseError`] variants if parsing fails.
147 pub fn parse_file(&self, path: &Path, content: &[u8]) -> Result<Schema, ParseError> {
148 let protocol = self
149 .detect_language(path)
150 .ok_or_else(|| ParseError::UnknownLanguage {
151 extension: path
152 .extension()
153 .and_then(|e| e.to_str())
154 .unwrap_or("")
155 .to_owned(),
156 })?;
157
158 self.parse_with_protocol(protocol, content, &path.display().to_string())
159 }
160
161 /// Parse source code with a specific protocol name.
162 ///
163 /// # Errors
164 ///
165 /// Returns [`ParseError::UnknownLanguage`] if the protocol is not registered.
166 pub fn parse_with_protocol(
167 &self,
168 protocol: &str,
169 content: &[u8],
170 file_path: &str,
171 ) -> Result<Schema, ParseError> {
172 let parser = self
173 .parsers
174 .get(protocol)
175 .ok_or_else(|| ParseError::UnknownLanguage {
176 extension: protocol.to_owned(),
177 })?;
178
179 parser.parse(content, file_path)
180 }
181
182 /// Emit a schema back to source code bytes using the specified protocol.
183 ///
184 /// # Errors
185 ///
186 /// Returns [`ParseError::UnknownLanguage`] if the protocol is not registered.
187 pub fn emit_with_protocol(
188 &self,
189 protocol: &str,
190 schema: &Schema,
191 ) -> Result<Vec<u8>, ParseError> {
192 let parser = self
193 .parsers
194 .get(protocol)
195 .ok_or_else(|| ParseError::UnknownLanguage {
196 extension: protocol.to_owned(),
197 })?;
198
199 parser.emit(schema)
200 }
201
202 /// Render a by-construction schema using the named protocol.
203 ///
204 /// # Errors
205 ///
206 /// Returns [`ParseError::UnknownLanguage`] if the protocol is not
207 /// registered, or [`ParseError::EmitFailed`] from the underlying
208 /// parser's `emit_pretty`.
209 pub fn emit_pretty_with_protocol(
210 &self,
211 protocol: &str,
212 schema: &Schema,
213 ) -> Result<Vec<u8>, ParseError> {
214 let parser = self
215 .parsers
216 .get(protocol)
217 .ok_or_else(|| ParseError::UnknownLanguage {
218 extension: protocol.to_owned(),
219 })?;
220
221 parser.emit_pretty(schema)
222 }
223
224 /// Get the theory metadata for a specific protocol.
225 #[must_use]
226 pub fn theory_meta(&self, protocol: &str) -> Option<&ExtractedTheoryMeta> {
227 self.parsers.get(protocol).map(|p| p.theory_meta())
228 }
229
230 /// List all registered protocol names.
231 pub fn protocol_names(&self) -> impl Iterator<Item = &str> {
232 self.parsers.keys().map(String::as_str)
233 }
234
235 /// Get the number of registered parsers.
236 #[must_use]
237 pub fn len(&self) -> usize {
238 self.parsers.len()
239 }
240
241 /// Check if the registry is empty.
242 #[must_use]
243 pub fn is_empty(&self) -> bool {
244 self.parsers.is_empty()
245 }
246}
247
248impl Default for ParserRegistry {
249 fn default() -> Self {
250 Self::new()
251 }
252}