Skip to main content

octorus/syntax/
parser_pool.rs

1//! Parser pool for efficient tree-sitter parser management.
2//!
3//! Parsers are relatively heavy objects (~200KB each), so we maintain a pool
4//! to reuse them across multiple files rather than creating new ones for each file.
5//!
6//! Also caches compiled queries, which are expensive to create (they require
7//! parsing the query string and building internal data structures).
8
9use std::collections::hash_map::Entry;
10use std::collections::HashMap;
11use tree_sitter::{Parser, Query};
12
13use crate::language::SupportedLanguage;
14
15/// Pool of tree-sitter parsers and compiled queries, one per language.
16///
17/// Parsers and queries are lazily created on first use and reused for subsequent operations.
18/// This avoids the overhead of creating new parsers/compiling queries for each file.
19pub struct ParserPool {
20    parsers: HashMap<SupportedLanguage, Parser>,
21    /// Cached compiled queries for highlight queries
22    queries: HashMap<SupportedLanguage, Query>,
23}
24
25impl Default for ParserPool {
26    fn default() -> Self {
27        Self::new()
28    }
29}
30
31impl ParserPool {
32    /// Create a new empty parser pool.
33    pub fn new() -> Self {
34        Self {
35            parsers: HashMap::new(),
36            queries: HashMap::new(),
37        }
38    }
39
40    /// Get or create a compiled highlight query for the given language.
41    ///
42    /// Queries are cached to avoid recompilation overhead on each use.
43    /// This is particularly important for injection processing (e.g., Svelte)
44    /// where multiple queries are needed per file.
45    pub fn get_or_create_query(&mut self, lang: SupportedLanguage) -> Option<&Query> {
46        if let Entry::Vacant(e) = self.queries.entry(lang) {
47            let ts_language = lang.ts_language();
48            let query_source = lang.highlights_query();
49            let query = Query::new(&ts_language, query_source).ok()?;
50            e.insert(query);
51        }
52
53        self.queries.get(&lang)
54    }
55
56    /// Get or create a parser for the given file extension.
57    ///
58    /// Returns `None` if the extension is not supported by tree-sitter.
59    pub fn get_or_create(&mut self, ext: &str) -> Option<&mut Parser> {
60        let lang = SupportedLanguage::from_extension(ext)?;
61
62        if let Entry::Vacant(e) = self.parsers.entry(lang) {
63            let ts_language = lang.ts_language();
64            let mut parser = Parser::new();
65            if parser.set_language(&ts_language).is_err() {
66                return None;
67            }
68            e.insert(parser);
69        }
70
71        self.parsers.get_mut(&lang)
72    }
73
74    /// Check if tree-sitter supports the given file extension.
75    pub fn supports_extension(ext: &str) -> bool {
76        SupportedLanguage::is_supported(ext)
77    }
78}
79
80#[cfg(test)]
81mod tests {
82    use super::*;
83
84    #[test]
85    fn test_parser_pool_rust() {
86        let mut pool = ParserPool::new();
87        let parser = pool.get_or_create("rs");
88        assert!(parser.is_some(), "Should create Rust parser");
89
90        // Second call should return the same parser
91        let parser2 = pool.get_or_create("rs");
92        assert!(parser2.is_some(), "Should reuse Rust parser");
93    }
94
95    #[test]
96    fn test_parser_pool_typescript() {
97        let mut pool = ParserPool::new();
98        assert!(pool.get_or_create("ts").is_some());
99        assert!(pool.get_or_create("tsx").is_some());
100    }
101
102    #[test]
103    fn test_parser_pool_javascript() {
104        let mut pool = ParserPool::new();
105        assert!(pool.get_or_create("js").is_some());
106        assert!(pool.get_or_create("jsx").is_some());
107    }
108
109    #[test]
110    fn test_parser_pool_go() {
111        let mut pool = ParserPool::new();
112        assert!(pool.get_or_create("go").is_some());
113    }
114
115    #[test]
116    fn test_parser_pool_python() {
117        let mut pool = ParserPool::new();
118        assert!(pool.get_or_create("py").is_some());
119    }
120
121    #[test]
122    fn test_parser_pool_ruby() {
123        let mut pool = ParserPool::new();
124        assert!(pool.get_or_create("rb").is_some());
125        assert!(pool.get_or_create("rake").is_some());
126        assert!(pool.get_or_create("gemspec").is_some());
127    }
128
129    #[test]
130    fn test_parser_pool_zig() {
131        let mut pool = ParserPool::new();
132        assert!(pool.get_or_create("zig").is_some());
133    }
134
135    #[test]
136    fn test_parser_pool_c() {
137        let mut pool = ParserPool::new();
138        assert!(pool.get_or_create("c").is_some());
139        // .h files are treated as C
140        assert!(pool.get_or_create("h").is_some());
141    }
142
143    #[test]
144    fn test_parser_pool_cpp() {
145        let mut pool = ParserPool::new();
146        assert!(pool.get_or_create("cpp").is_some());
147        assert!(pool.get_or_create("cc").is_some());
148        assert!(pool.get_or_create("cxx").is_some());
149        assert!(pool.get_or_create("hpp").is_some());
150        assert!(pool.get_or_create("hxx").is_some());
151    }
152
153    #[test]
154    fn test_parser_pool_java() {
155        let mut pool = ParserPool::new();
156        assert!(pool.get_or_create("java").is_some());
157    }
158
159    #[test]
160    fn test_parser_pool_csharp() {
161        let mut pool = ParserPool::new();
162        assert!(pool.get_or_create("cs").is_some());
163    }
164
165    #[test]
166    fn test_parser_pool_lua() {
167        let mut pool = ParserPool::new();
168        assert!(pool.get_or_create("lua").is_some());
169    }
170
171    #[test]
172    fn test_parser_pool_bash() {
173        let mut pool = ParserPool::new();
174        assert!(pool.get_or_create("sh").is_some());
175        assert!(pool.get_or_create("bash").is_some());
176        assert!(pool.get_or_create("zsh").is_some());
177    }
178
179    #[test]
180    fn test_parser_pool_php() {
181        let mut pool = ParserPool::new();
182        assert!(pool.get_or_create("php").is_some());
183    }
184
185    #[test]
186    fn test_parser_pool_swift() {
187        let mut pool = ParserPool::new();
188        assert!(pool.get_or_create("swift").is_some());
189    }
190
191    #[test]
192    fn test_parser_pool_haskell() {
193        let mut pool = ParserPool::new();
194        assert!(pool.get_or_create("hs").is_some());
195        assert!(pool.get_or_create("lhs").is_some());
196    }
197
198    // Svelte falls back to syntect (tree-sitter-svelte-ng requires injection)
199
200    #[test]
201    fn test_parser_pool_moonbit() {
202        let mut pool = ParserPool::new();
203        assert!(pool.get_or_create("mbt").is_some());
204    }
205
206    #[test]
207    fn test_parser_pool_svelte() {
208        let mut pool = ParserPool::new();
209        assert!(pool.get_or_create("svelte").is_some());
210    }
211
212    #[test]
213    fn test_parser_pool_vue() {
214        let mut pool = ParserPool::new();
215        assert!(pool.get_or_create("vue").is_some());
216    }
217
218    #[test]
219    fn test_parser_pool_markdown() {
220        let mut pool = ParserPool::new();
221        assert!(pool.get_or_create("md").is_some());
222        assert!(pool.get_or_create("markdown").is_some());
223    }
224
225    #[test]
226    fn test_parser_pool_markdown_inline() {
227        let mut pool = ParserPool::new();
228        assert!(pool.get_or_create("md_inline").is_some());
229    }
230
231    #[test]
232    fn test_parser_pool_unsupported() {
233        let mut pool = ParserPool::new();
234        // Vue is now supported in Phase 3c
235        assert!(pool.get_or_create("yaml").is_none());
236        assert!(pool.get_or_create("toml").is_none());
237    }
238
239    #[test]
240    fn test_supports_extension() {
241        // Original languages
242        assert!(ParserPool::supports_extension("rs"));
243        assert!(ParserPool::supports_extension("ts"));
244        assert!(ParserPool::supports_extension("tsx"));
245        assert!(ParserPool::supports_extension("js"));
246        assert!(ParserPool::supports_extension("jsx"));
247        assert!(ParserPool::supports_extension("go"));
248        assert!(ParserPool::supports_extension("py"));
249
250        // Phase 1 languages
251        assert!(ParserPool::supports_extension("lua"));
252        assert!(ParserPool::supports_extension("sh"));
253        assert!(ParserPool::supports_extension("php"));
254        assert!(ParserPool::supports_extension("swift"));
255        assert!(ParserPool::supports_extension("hs"));
256
257        // Phase 3: Svelte is now supported
258        assert!(ParserPool::supports_extension("svelte"));
259        // Phase 3c: Vue is now supported
260        assert!(ParserPool::supports_extension("vue"));
261
262        // Phase 4: Markdown
263        assert!(ParserPool::supports_extension("md"));
264
265        assert!(!ParserPool::supports_extension("yaml"));
266    }
267
268    #[test]
269    fn test_parser_can_parse() {
270        let mut pool = ParserPool::new();
271        let parser = pool.get_or_create("rs").unwrap();
272
273        let code = "fn main() { println!(\"Hello\"); }";
274        let tree = parser.parse(code, None);
275        assert!(tree.is_some(), "Should parse Rust code");
276    }
277
278    #[test]
279    fn test_query_cache_creates_query() {
280        let mut pool = ParserPool::new();
281        let query = pool.get_or_create_query(SupportedLanguage::Rust);
282        assert!(query.is_some(), "Should create Rust highlight query");
283    }
284
285    #[test]
286    fn test_query_cache_reuses_query() {
287        let mut pool = ParserPool::new();
288
289        // First call creates the query
290        let query1 = pool.get_or_create_query(SupportedLanguage::TypeScript);
291        assert!(query1.is_some());
292
293        // Second call should return the same query (cached)
294        let query2 = pool.get_or_create_query(SupportedLanguage::TypeScript);
295        assert!(query2.is_some());
296
297        // Verify cache is populated
298        assert!(
299            pool.queries.contains_key(&SupportedLanguage::TypeScript),
300            "Query should be cached"
301        );
302    }
303
304    #[test]
305    fn test_query_cache_multiple_languages() {
306        let mut pool = ParserPool::new();
307
308        // Create queries for multiple languages
309        assert!(pool.get_or_create_query(SupportedLanguage::Rust).is_some());
310        assert!(pool
311            .get_or_create_query(SupportedLanguage::TypeScript)
312            .is_some());
313        assert!(pool
314            .get_or_create_query(SupportedLanguage::JavaScript)
315            .is_some());
316        assert!(pool.get_or_create_query(SupportedLanguage::Css).is_some());
317
318        // All should be cached
319        assert_eq!(pool.queries.len(), 4);
320    }
321
322    #[test]
323    fn test_query_has_capture_names() {
324        let mut pool = ParserPool::new();
325        let query = pool.get_or_create_query(SupportedLanguage::Rust).unwrap();
326
327        // Query should have capture names for highlighting
328        let capture_names = query.capture_names();
329        assert!(!capture_names.is_empty(), "Query should have capture names");
330    }
331
332    #[test]
333    fn test_query_cache_markdown() {
334        let mut pool = ParserPool::new();
335        let query = pool.get_or_create_query(SupportedLanguage::Markdown);
336        assert!(
337            query.is_some(),
338            "Should compile Markdown block highlight query"
339        );
340    }
341
342    #[test]
343    fn test_query_cache_markdown_inline() {
344        let mut pool = ParserPool::new();
345        let query = pool.get_or_create_query(SupportedLanguage::MarkdownInline);
346        assert!(
347            query.is_some(),
348            "Should compile MarkdownInline highlight query"
349        );
350    }
351
352    #[test]
353    fn test_markdown_parser_can_parse() {
354        let mut pool = ParserPool::new();
355        let parser = pool.get_or_create("md").unwrap();
356
357        let code = "# Hello\n\nSome **bold** text.\n";
358        let tree = parser.parse(code, None);
359        assert!(tree.is_some(), "Should parse Markdown code");
360    }
361
362    #[test]
363    fn test_supports_extension_markdown() {
364        assert!(ParserPool::supports_extension("md"));
365        assert!(ParserPool::supports_extension("markdown"));
366        assert!(ParserPool::supports_extension("md_inline"));
367    }
368}