Skip to main content

panproto_parse/
registry.rs

1//! Parser registry mapping protocol names to full-AST parser implementations.
2
3use std::path::Path;
4
5use panproto_schema::Schema;
6use rustc_hash::FxHashMap;
7
8use crate::error::ParseError;
9use crate::theory_extract::ExtractedTheoryMeta;
10
11/// A full-AST parser and emitter for a specific programming language.
12///
13/// Each implementation wraps a tree-sitter grammar and its auto-derived theory,
14/// providing parse (source → Schema) and emit (Schema → source) operations.
15pub trait AstParser: Send + Sync {
16    /// The panproto protocol name (e.g. `"typescript"`, `"python"`).
17    fn protocol_name(&self) -> &str;
18
19    /// Parse source code into a full-AST [`Schema`].
20    ///
21    /// # Errors
22    ///
23    /// Returns [`ParseError`] if tree-sitter parsing fails or schema construction fails.
24    fn parse(&self, source: &[u8], file_path: &str) -> Result<Schema, ParseError>;
25
26    /// Emit a [`Schema`] back to source code bytes.
27    ///
28    /// The emitter walks the schema graph top-down, using formatting constraints
29    /// (comment, indent, blank-lines-before) to reproduce the original formatting.
30    ///
31    /// # Errors
32    ///
33    /// Returns [`ParseError::EmitFailed`] if emission fails.
34    fn emit(&self, schema: &Schema) -> Result<Vec<u8>, ParseError>;
35
36    /// File extensions this parser handles (e.g. `["ts", "tsx"]`).
37    fn supported_extensions(&self) -> &[&str];
38
39    /// The auto-derived theory metadata for this language.
40    fn theory_meta(&self) -> &ExtractedTheoryMeta;
41}
42
43/// Registry of all full-AST parsers, keyed by protocol name.
44///
45/// Provides language detection by file extension and dispatches parse/emit
46/// operations to the appropriate language parser.
47pub struct ParserRegistry {
48    /// Parsers keyed by protocol name.
49    parsers: FxHashMap<String, Box<dyn AstParser>>,
50    /// Extension → protocol name mapping.
51    extension_map: FxHashMap<String, String>,
52}
53
54impl ParserRegistry {
55    /// Create a new registry populated with all enabled language parsers.
56    ///
57    /// With the `grammars` feature (default), this populates the registry from
58    /// `panproto-grammars`, which provides up to 248 tree-sitter languages.
59    /// Without the `grammars` feature, this returns an empty registry; call
60    /// [`register`](Self::register) to add parsers manually using individual
61    /// grammar crates.
62    #[must_use]
63    pub fn new() -> Self {
64        let mut registry = Self {
65            parsers: FxHashMap::default(),
66            extension_map: FxHashMap::default(),
67        };
68
69        #[cfg(feature = "grammars")]
70        for grammar in panproto_grammars::grammars() {
71            let config = crate::languages::walker_configs::walker_config_for(grammar.name);
72            match crate::languages::common::LanguageParser::from_language(
73                grammar.name,
74                grammar.extensions.to_vec(),
75                grammar.language,
76                grammar.node_types,
77                grammar.tags_query,
78                config,
79            ) {
80                Ok(p) => registry.register(Box::new(p)),
81                Err(err) => {
82                    #[cfg(debug_assertions)]
83                    eprintln!(
84                        "warning: grammar '{}' theory extraction failed: {err}",
85                        grammar.name
86                    );
87                }
88            }
89        }
90
91        registry
92    }
93
94    /// Register a parser implementation.
95    pub fn register(&mut self, parser: Box<dyn AstParser>) {
96        let name = parser.protocol_name().to_owned();
97        for ext in parser.supported_extensions() {
98            self.extension_map.insert((*ext).to_owned(), name.clone());
99        }
100        self.parsers.insert(name, parser);
101    }
102
103    /// Detect the language protocol for a file path by its extension.
104    ///
105    /// Returns `None` if the extension is not recognized (caller should
106    /// fall back to the `raw_file` protocol).
107    #[must_use]
108    pub fn detect_language(&self, path: &Path) -> Option<&str> {
109        path.extension()
110            .and_then(|ext| ext.to_str())
111            .and_then(|ext| self.extension_map.get(ext))
112            .map(String::as_str)
113    }
114
115    /// Parse a file by detecting its language from the file path.
116    ///
117    /// # Errors
118    ///
119    /// Returns [`ParseError::UnknownLanguage`] if the file extension is not recognized.
120    /// Returns other [`ParseError`] variants if parsing fails.
121    pub fn parse_file(&self, path: &Path, content: &[u8]) -> Result<Schema, ParseError> {
122        let protocol = self
123            .detect_language(path)
124            .ok_or_else(|| ParseError::UnknownLanguage {
125                extension: path
126                    .extension()
127                    .and_then(|e| e.to_str())
128                    .unwrap_or("")
129                    .to_owned(),
130            })?;
131
132        self.parse_with_protocol(protocol, content, &path.display().to_string())
133    }
134
135    /// Parse source code with a specific protocol name.
136    ///
137    /// # Errors
138    ///
139    /// Returns [`ParseError::UnknownLanguage`] if the protocol is not registered.
140    pub fn parse_with_protocol(
141        &self,
142        protocol: &str,
143        content: &[u8],
144        file_path: &str,
145    ) -> Result<Schema, ParseError> {
146        let parser = self
147            .parsers
148            .get(protocol)
149            .ok_or_else(|| ParseError::UnknownLanguage {
150                extension: protocol.to_owned(),
151            })?;
152
153        parser.parse(content, file_path)
154    }
155
156    /// Emit a schema back to source code bytes using the specified protocol.
157    ///
158    /// # Errors
159    ///
160    /// Returns [`ParseError::UnknownLanguage`] if the protocol is not registered.
161    pub fn emit_with_protocol(
162        &self,
163        protocol: &str,
164        schema: &Schema,
165    ) -> Result<Vec<u8>, ParseError> {
166        let parser = self
167            .parsers
168            .get(protocol)
169            .ok_or_else(|| ParseError::UnknownLanguage {
170                extension: protocol.to_owned(),
171            })?;
172
173        parser.emit(schema)
174    }
175
176    /// Get the theory metadata for a specific protocol.
177    #[must_use]
178    pub fn theory_meta(&self, protocol: &str) -> Option<&ExtractedTheoryMeta> {
179        self.parsers.get(protocol).map(|p| p.theory_meta())
180    }
181
182    /// List all registered protocol names.
183    pub fn protocol_names(&self) -> impl Iterator<Item = &str> {
184        self.parsers.keys().map(String::as_str)
185    }
186
187    /// Get the number of registered parsers.
188    #[must_use]
189    pub fn len(&self) -> usize {
190        self.parsers.len()
191    }
192
193    /// Check if the registry is empty.
194    #[must_use]
195    pub fn is_empty(&self) -> bool {
196        self.parsers.is_empty()
197    }
198}
199
200impl Default for ParserRegistry {
201    fn default() -> Self {
202        Self::new()
203    }
204}