Skip to main content

tree_sitter_language_pack/
parse.rs

1use std::cell::RefCell;
2
3use ahash::AHashMap;
4
5use crate::Error;
6
7thread_local! {
8    static PARSER_CACHE: RefCell<AHashMap<String, tree_sitter::Parser>> = RefCell::new(AHashMap::new());
9}
10
11/// Parse source code with the named language, returning the syntax tree.
12///
13/// Uses the global registry to look up the language by name.
14/// Caches parsers per-thread so repeated calls for the same language avoid
15/// re-creating the parser.
16///
17/// # Examples
18///
19/// ```no_run
20/// let tree = tree_sitter_language_pack::parse::parse_string("python", b"def hello(): pass").unwrap();
21/// assert_eq!(tree.root_node().kind(), "module");
22/// ```
23pub fn parse_string(language: &str, source: &[u8]) -> Result<tree_sitter::Tree, Error> {
24    PARSER_CACHE.with(|cache| {
25        let mut cache = cache.borrow_mut();
26        if let Some(parser) = cache.get_mut(language) {
27            return parser.parse(source, None).ok_or(Error::ParseFailed);
28        }
29        let language_obj = crate::get_language(language)?;
30        let mut parser = tree_sitter::Parser::new();
31        parser
32            .set_language(&language_obj)
33            .map_err(|e| Error::ParserSetup(format!("{e}")))?;
34        let tree = parser.parse(source, None).ok_or(Error::ParseFailed)?;
35        cache.insert(language.to_string(), parser);
36        Ok(tree)
37    })
38}
39
40/// Parse source code with a pre-loaded `Language`, using the thread-local parser cache.
41///
42/// Avoids a redundant registry lookup when the caller already has the `Language`
43/// (e.g., from `CompiledExtraction` or `LanguageRegistry::get_language`).
44pub(crate) fn parse_with_language(
45    language_name: &str,
46    language: &tree_sitter::Language,
47    source: &[u8],
48) -> Result<tree_sitter::Tree, Error> {
49    PARSER_CACHE.with(|cache| {
50        let mut cache = cache.borrow_mut();
51        if let Some(parser) = cache.get_mut(language_name) {
52            return parser.parse(source, None).ok_or(Error::ParseFailed);
53        }
54        let mut parser = tree_sitter::Parser::new();
55        parser
56            .set_language(language)
57            .map_err(|e| Error::ParserSetup(format!("{e}")))?;
58        let tree = parser.parse(source, None).ok_or(Error::ParseFailed)?;
59        cache.insert(language_name.to_string(), parser);
60        Ok(tree)
61    })
62}
63
64#[cfg(test)]
65pub(crate) fn cached_parser_count_for_tests() -> usize {
66    PARSER_CACHE.with(|cache| cache.borrow().len())
67}
68
69/// Check whether any node in the tree matches the given type name.
70///
71/// Performs a depth-first traversal using `TreeCursor`.
72pub fn tree_contains_node_type(tree: &tree_sitter::Tree, node_type: &str) -> bool {
73    let mut cursor = tree.walk();
74    traverse_with_cursor(&mut cursor, |node| node.kind() == node_type)
75}
76
77/// Check whether the tree contains any ERROR or MISSING nodes.
78///
79/// Useful for determining if the parse was clean or had syntax errors.
80pub fn tree_has_error_nodes(tree: &tree_sitter::Tree) -> bool {
81    let mut cursor = tree.walk();
82    traverse_with_cursor(&mut cursor, |node| node.is_error() || node.is_missing())
83}
84
85/// Return the S-expression representation of the entire tree.
86///
87/// This is the standard tree-sitter debug format, useful for logging,
88/// snapshot testing, and debugging grammars.
89pub fn tree_to_sexp(tree: &tree_sitter::Tree) -> String {
90    tree.root_node().to_sexp()
91}
92
93/// Count the number of ERROR and MISSING nodes in the tree.
94///
95/// Returns 0 for a clean parse.
96pub fn tree_error_count(tree: &tree_sitter::Tree) -> usize {
97    let mut count = 0;
98    let mut cursor = tree.walk();
99    traverse_with_cursor(&mut cursor, |node| {
100        if node.is_error() || node.is_missing() {
101            count += 1;
102        }
103        false // never short-circuit, visit all nodes
104    });
105    count
106}
107
108/// Depth-first traversal with a cursor, calling `predicate` on each node.
109///
110/// Returns `true` as soon as the predicate returns `true` (short-circuit).
111/// Returns `false` if no node matches.
112pub(crate) fn traverse_with_cursor(
113    cursor: &mut tree_sitter::TreeCursor,
114    mut predicate: impl FnMut(tree_sitter::Node) -> bool,
115) -> bool {
116    loop {
117        if predicate(cursor.node()) {
118            return true;
119        }
120        if cursor.goto_first_child() {
121            continue;
122        }
123        loop {
124            if cursor.goto_next_sibling() {
125                break;
126            }
127            if !cursor.goto_parent() {
128                return false;
129            }
130        }
131    }
132}
133
134#[cfg(test)]
135mod tests {
136    use super::*;
137
138    fn skip_if_no_languages() -> bool {
139        crate::available_languages().is_empty()
140    }
141
142    #[test]
143    fn test_parse_string_success() {
144        if skip_if_no_languages() {
145            return;
146        }
147        let langs = crate::available_languages();
148        let first = &langs[0];
149        let tree = parse_string(first, b"x");
150        assert!(tree.is_ok(), "parse_string should succeed for '{first}'");
151    }
152
153    #[test]
154    fn test_parse_string_reuses_thread_local_parser_cache() {
155        if skip_if_no_languages() {
156            return;
157        }
158        let langs = crate::available_languages();
159        let first = &langs[0];
160        let before = cached_parser_count_for_tests();
161        let _ = parse_string(first, b"x").unwrap();
162        let after_first = cached_parser_count_for_tests();
163        let _ = parse_string(first, b"y").unwrap();
164        let after_second = cached_parser_count_for_tests();
165        assert!(after_first >= before);
166        assert_eq!(after_second, after_first);
167    }
168
169    #[test]
170    fn test_parse_string_invalid_language() {
171        let result = parse_string("nonexistent_xyz", b"x");
172        assert!(result.is_err());
173    }
174
175    #[test]
176    fn test_tree_to_sexp() {
177        if skip_if_no_languages() {
178            return;
179        }
180        let langs = crate::available_languages();
181        let tree = parse_string(&langs[0], b"x").unwrap();
182        let sexp = tree_to_sexp(&tree);
183        assert!(!sexp.is_empty());
184    }
185
186    #[test]
187    fn test_tree_contains_node_type() {
188        if skip_if_no_languages() {
189            return;
190        }
191        let langs = crate::available_languages();
192        let tree = parse_string(&langs[0], b"x").unwrap();
193        let root_kind = tree.root_node().kind().to_string();
194        assert!(tree_contains_node_type(&tree, &root_kind));
195        assert!(!tree_contains_node_type(&tree, "nonexistent_node_type_xyz"));
196    }
197
198    #[test]
199    fn test_tree_has_error_nodes_clean() {
200        if skip_if_no_languages() {
201            return;
202        }
203        // Most parsers handle single-token inputs without error
204        let langs = crate::available_languages();
205        let tree = parse_string(&langs[0], b"x").unwrap();
206        // Just verify it runs without panic; result depends on grammar
207        let _ = tree_has_error_nodes(&tree);
208    }
209
210    #[test]
211    fn test_tree_error_count() {
212        if skip_if_no_languages() {
213            return;
214        }
215        let langs = crate::available_languages();
216        let tree = parse_string(&langs[0], b"x").unwrap();
217        let count = tree_error_count(&tree);
218        // Just verify it returns a reasonable number
219        assert!(count < 1000);
220    }
221
222    #[test]
223    fn test_parse_with_language_reuses_cache() {
224        if skip_if_no_languages() {
225            return;
226        }
227        let langs = crate::available_languages();
228        let first = &langs[0];
229        let lang = crate::get_language(first).unwrap();
230        let _ = parse_with_language(first, &lang, b"x").unwrap();
231        let after_first = cached_parser_count_for_tests();
232        let _ = parse_with_language(first, &lang, b"y").unwrap();
233        let after_second = cached_parser_count_for_tests();
234        assert_eq!(after_first, after_second, "second call should reuse cached parser");
235    }
236
237    #[test]
238    fn test_parse_with_language_and_parse_string_share_cache() {
239        if skip_if_no_languages() {
240            return;
241        }
242        let langs = crate::available_languages();
243        let first = &langs[0];
244        let lang = crate::get_language(first).unwrap();
245        let _ = parse_string(first, b"x").unwrap();
246        let after_parse_string = cached_parser_count_for_tests();
247        let _ = parse_with_language(first, &lang, b"y").unwrap();
248        let after_parse_with_lang = cached_parser_count_for_tests();
249        assert_eq!(
250            after_parse_string, after_parse_with_lang,
251            "parse_string and parse_with_language should share the same cache"
252        );
253    }
254
255    #[test]
256    fn test_different_languages_get_separate_cache_entries() {
257        let langs = crate::available_languages();
258        if langs.len() < 2 {
259            return;
260        }
261        let before = cached_parser_count_for_tests();
262        let _ = parse_string(&langs[0], b"x").unwrap();
263        let _ = parse_string(&langs[1], b"x").unwrap();
264        let after = cached_parser_count_for_tests();
265        assert!(
266            after >= before + 2,
267            "different languages should create separate cache entries"
268        );
269    }
270}