Skip to main content

panproto_parse/
registry.rs

1//! Parser registry mapping protocol names to full-AST parser implementations.
2
3use std::path::Path;
4
5use panproto_schema::Schema;
6use rustc_hash::FxHashMap;
7
8use crate::error::ParseError;
9use crate::theory_extract::ExtractedTheoryMeta;
10
11/// A full-AST parser and emitter for a specific programming language.
12///
13/// Each implementation wraps a tree-sitter grammar and its auto-derived theory,
14/// providing parse (source → Schema) and emit (Schema → source) operations.
15pub trait AstParser: Send + Sync {
16    /// The panproto protocol name (e.g. `"typescript"`, `"python"`).
17    fn protocol_name(&self) -> &str;
18
19    /// Parse source code into a full-AST [`Schema`].
20    ///
21    /// # Errors
22    ///
23    /// Returns [`ParseError`] if tree-sitter parsing fails or schema construction fails.
24    fn parse(&self, source: &[u8], file_path: &str) -> Result<Schema, ParseError>;
25
26    /// Emit a [`Schema`] back to source code bytes.
27    ///
28    /// The emitter walks the schema graph top-down, using formatting constraints
29    /// (comment, indent, blank-lines-before) to reproduce the original formatting.
30    ///
31    /// # Errors
32    ///
33    /// Returns [`ParseError::EmitFailed`] if emission fails.
34    fn emit(&self, schema: &Schema) -> Result<Vec<u8>, ParseError>;
35
36    /// File extensions this parser handles (e.g. `["ts", "tsx"]`).
37    fn supported_extensions(&self) -> &[&str];
38
39    /// The auto-derived theory metadata for this language.
40    fn theory_meta(&self) -> &ExtractedTheoryMeta;
41}
42
43/// Registry of all full-AST parsers, keyed by protocol name.
44///
45/// Provides language detection by file extension and dispatches parse/emit
46/// operations to the appropriate language parser.
47pub struct ParserRegistry {
48    /// Parsers keyed by protocol name.
49    parsers: FxHashMap<String, Box<dyn AstParser>>,
50    /// Extension → protocol name mapping.
51    extension_map: FxHashMap<String, String>,
52}
53
54impl ParserRegistry {
55    /// Create a new registry populated with all built-in language parsers.
56    #[must_use]
57    pub fn new() -> Self {
58        let mut registry = Self {
59            parsers: FxHashMap::default(),
60            extension_map: FxHashMap::default(),
61        };
62
63        // Register all 10 language parsers.
64        registry.register(Box::new(
65            crate::languages::typescript::TypeScriptParser::new(),
66        ));
67        registry.register(Box::new(crate::languages::typescript::TsxParser::new()));
68        registry.register(Box::new(crate::languages::python::PythonParser::new()));
69        registry.register(Box::new(crate::languages::rust_lang::RustParser::new()));
70        registry.register(Box::new(crate::languages::java::JavaParser::new()));
71        registry.register(Box::new(crate::languages::go_lang::GoParser::new()));
72        registry.register(Box::new(crate::languages::swift::SwiftParser::new()));
73        registry.register(Box::new(crate::languages::kotlin::KotlinParser::new()));
74        registry.register(Box::new(crate::languages::csharp::CSharpParser::new()));
75        registry.register(Box::new(crate::languages::c_lang::CParser::new()));
76        registry.register(Box::new(crate::languages::cpp::CppParser::new()));
77
78        registry
79    }
80
81    /// Register a parser implementation.
82    pub fn register(&mut self, parser: Box<dyn AstParser>) {
83        let name = parser.protocol_name().to_owned();
84        for ext in parser.supported_extensions() {
85            self.extension_map.insert((*ext).to_owned(), name.clone());
86        }
87        self.parsers.insert(name, parser);
88    }
89
90    /// Detect the language protocol for a file path by its extension.
91    ///
92    /// Returns `None` if the extension is not recognized (caller should
93    /// fall back to the `raw_file` protocol).
94    #[must_use]
95    pub fn detect_language(&self, path: &Path) -> Option<&str> {
96        path.extension()
97            .and_then(|ext| ext.to_str())
98            .and_then(|ext| self.extension_map.get(ext))
99            .map(String::as_str)
100    }
101
102    /// Parse a file by detecting its language from the file path.
103    ///
104    /// # Errors
105    ///
106    /// Returns [`ParseError::UnknownLanguage`] if the file extension is not recognized.
107    /// Returns other [`ParseError`] variants if parsing fails.
108    pub fn parse_file(&self, path: &Path, content: &[u8]) -> Result<Schema, ParseError> {
109        let protocol = self
110            .detect_language(path)
111            .ok_or_else(|| ParseError::UnknownLanguage {
112                extension: path
113                    .extension()
114                    .and_then(|e| e.to_str())
115                    .unwrap_or("")
116                    .to_owned(),
117            })?;
118
119        self.parse_with_protocol(protocol, content, &path.display().to_string())
120    }
121
122    /// Parse source code with a specific protocol name.
123    ///
124    /// # Errors
125    ///
126    /// Returns [`ParseError::UnknownLanguage`] if the protocol is not registered.
127    pub fn parse_with_protocol(
128        &self,
129        protocol: &str,
130        content: &[u8],
131        file_path: &str,
132    ) -> Result<Schema, ParseError> {
133        let parser = self
134            .parsers
135            .get(protocol)
136            .ok_or_else(|| ParseError::UnknownLanguage {
137                extension: protocol.to_owned(),
138            })?;
139
140        parser.parse(content, file_path)
141    }
142
143    /// Emit a schema back to source code bytes using the specified protocol.
144    ///
145    /// # Errors
146    ///
147    /// Returns [`ParseError::UnknownLanguage`] if the protocol is not registered.
148    pub fn emit_with_protocol(
149        &self,
150        protocol: &str,
151        schema: &Schema,
152    ) -> Result<Vec<u8>, ParseError> {
153        let parser = self
154            .parsers
155            .get(protocol)
156            .ok_or_else(|| ParseError::UnknownLanguage {
157                extension: protocol.to_owned(),
158            })?;
159
160        parser.emit(schema)
161    }
162
163    /// Get the theory metadata for a specific protocol.
164    #[must_use]
165    pub fn theory_meta(&self, protocol: &str) -> Option<&ExtractedTheoryMeta> {
166        self.parsers.get(protocol).map(|p| p.theory_meta())
167    }
168
169    /// List all registered protocol names.
170    pub fn protocol_names(&self) -> impl Iterator<Item = &str> {
171        self.parsers.keys().map(String::as_str)
172    }
173
174    /// Get the number of registered parsers.
175    #[must_use]
176    pub fn len(&self) -> usize {
177        self.parsers.len()
178    }
179
180    /// Check if the registry is empty.
181    #[must_use]
182    pub fn is_empty(&self) -> bool {
183        self.parsers.is_empty()
184    }
185}
186
187impl Default for ParserRegistry {
188    fn default() -> Self {
189        Self::new()
190    }
191}