Skip to main content

garbage_code_hunter/treesitter/
engine.rs

1use std::collections::HashMap;
2use std::path::Path;
3use std::sync::Mutex;
4
5use crate::language::Language;
6
7use super::parsers;
8
9/// A tree-sitter parsing engine that supports multiple languages.
10///
11/// Manages a cache of language-specific parsers and provides
12/// a unified interface for parsing source files.
13/// Uses a Mutex for interior mutability so parsing works with `&self`
14/// and is thread-safe (required by rayon-based parallel analysis).
15pub struct TreeSitterEngine {
16    parsers: Mutex<HashMap<Language, tree_sitter::Parser>>,
17}
18
19impl Default for TreeSitterEngine {
20    fn default() -> Self {
21        Self::new()
22    }
23}
24
25impl TreeSitterEngine {
26    /// Create a new engine with no pre-loaded parsers.
27    pub fn new() -> Self {
28        Self {
29            parsers: Mutex::new(HashMap::new()),
30        }
31    }
32
33    /// Lock the parsers map, returning None if the mutex is poisoned.
34    fn lock_parsers(
35        &self,
36    ) -> Option<std::sync::MutexGuard<'_, HashMap<Language, tree_sitter::Parser>>> {
37        self.parsers.lock().ok()
38    }
39
40    /// Ensure a parser is available for the given language.
41    /// Returns true if the parser was successfully loaded (or already exists).
42    pub fn ensure_parser(&self, lang: Language) -> bool {
43        let mut parsers = match self.lock_parsers() {
44            Some(p) => p,
45            None => return false,
46        };
47        if parsers.contains_key(&lang) {
48            return true;
49        }
50        let grammar = match parsers::get_grammar(lang) {
51            Some(g) => g,
52            None => return false,
53        };
54        let mut parser = tree_sitter::Parser::new();
55        if parser.set_language(&grammar).is_err() {
56            return false;
57        }
58        parsers.insert(lang, parser);
59        true
60    }
61
62    /// Parse source code for the given language.
63    /// Returns None if the language has no loaded parser or parsing fails.
64    pub fn parse(&self, lang: Language, content: &str) -> Option<tree_sitter::Tree> {
65        if !self.ensure_parser(lang) {
66            return None;
67        }
68        let mut parsers = self.lock_parsers()?;
69        parsers.get_mut(&lang).and_then(|p| p.parse(content, None))
70    }
71
72    /// Parse a file, detecting language from its path.
73    /// Returns None if the language is unsupported or parsing fails.
74    pub fn parse_file(&self, path: &Path, content: &str) -> Option<ParsedFile> {
75        let lang = Language::from_path(path);
76        if lang == Language::Unknown {
77            return None;
78        }
79        let tree = self.parse(lang, content)?;
80        Some(ParsedFile {
81            path: path.to_path_buf(),
82            content: content.to_string(),
83            tree,
84            language: lang,
85        })
86    }
87
88    /// Check if a parser can handle the given language.
89    pub fn can_parse(&self, lang: Language) -> bool {
90        self.ensure_parser(lang)
91    }
92}
93
94/// A source file that has been parsed by tree-sitter.
95#[derive(Debug, Clone)]
96pub struct ParsedFile {
97    pub path: std::path::PathBuf,
98    pub content: String,
99    pub tree: tree_sitter::Tree,
100    pub language: Language,
101}
102
103impl ParsedFile {
104    /// Get the root node of the syntax tree.
105    pub fn root_node(&self) -> tree_sitter::Node<'_> {
106        self.tree.root_node()
107    }
108
109    /// Get source text for a given node range.
110    pub fn node_text(&self, node: tree_sitter::Node) -> &str {
111        let start = node.start_byte();
112        let end = node.end_byte();
113        &self.content[start..end]
114    }
115}
116
117#[cfg(test)]
118mod tests {
119    use super::*;
120
121    /// Objective: Verify Rust file parsing produces a valid tree
122    /// Invariants: Root node should be "source_file" with children
123    #[test]
124    fn test_parse_rust_source() {
125        let engine = TreeSitterEngine::new();
126        let code = "fn main() { let x = 42; }";
127        let tree = engine.parse(Language::Rust, code);
128        assert!(tree.is_some(), "Rust parsing should succeed");
129        let tree = tree.unwrap();
130        let root = tree.root_node();
131        assert_eq!(root.kind(), "source_file");
132        assert!(root.child_count() > 0, "Root should have child nodes");
133    }
134
135    /// Objective: Verify parsing invalid code still produces a tree (error recovery)
136    /// Invariants: Tree should exist but contain ERROR nodes
137    #[test]
138    fn test_parse_invalid_rust() {
139        let engine = TreeSitterEngine::new();
140        let code = "fn main() { let x = ; }";
141        let tree = engine.parse(Language::Rust, code);
142        assert!(
143            tree.is_some(),
144            "Should still produce a tree with error recovery"
145        );
146    }
147
148    /// Objective: Verify unsupported language returns None
149    /// Invariants: Unknown language should not crash, just return None
150    #[test]
151    fn test_unsupported_language() {
152        let engine = TreeSitterEngine::new();
153        let result = engine.parse(Language::Unknown, "some code");
154        assert!(result.is_none(), "Unknown language should return None");
155    }
156
157    /// Objective: Verify parse_file from path detects language correctly
158    /// Invariants: .rs file should parse as Rust
159    #[test]
160    fn test_parse_file_by_path() {
161        let engine = TreeSitterEngine::new();
162        let code = "fn add(a: i32, b: i32) -> i32 { a + b }";
163        let result = engine.parse_file(Path::new("test.rs"), code);
164        assert!(result.is_some(), "Should parse .rs file");
165        let parsed = result.unwrap();
166        assert_eq!(parsed.language, Language::Rust);
167        assert_eq!(parsed.node_text(parsed.root_node()), code);
168    }
169}