Skip to main content

garbage_code_hunter/treesitter/
query.rs

1use streaming_iterator::StreamingIterator;
2
3use crate::analyzer::Severity;
4use crate::language::Language;
5use std::cell::RefCell;
6use std::collections::HashMap;
7
8use super::engine::ParsedFile;
9
10/// A tree-sitter query based rule definition.
11///
12/// This is the primary building block for tree-sitter rules.
13/// Each rule specifies a query pattern (in tree-sitter query syntax),
14/// the languages it applies to, and a handler that converts
15/// query matches into `CodeIssue`s.
16pub struct QueryRule {
17    /// Unique rule identifier (e.g. "single-letter-variable").
18    pub name: &'static str,
19
20    /// Languages this rule applies to.
21    pub languages: &'static [Language],
22
23    /// The tree-sitter query pattern string.
24    /// Uses standard tree-sitter query syntax with named captures.
25    pub pattern: &'static str,
26
27    /// Default severity when match is found.
28    pub severity: Severity,
29
30    /// Custom handler to produce issues from a match.
31    /// If None, a default handler is used (one issue per match at the capture node).
32    pub handler: Option<QueryHandler>,
33
34    /// Whether to skip test files.
35    pub skips_test_files: bool,
36}
37
38/// Function signature for custom query match handlers.
39pub type QueryHandler =
40    fn(file: &ParsedFile, captures: &[QueryCapture], match_index: usize) -> Vec<IssueCandidate>;
41
42/// A single named capture from a tree-sitter query match.
43#[derive(Debug, Clone)]
44pub struct QueryCapture<'a> {
45    /// The capture name from the query pattern (e.g. "ident" from `(identifier) @ident`).
46    pub name: String,
47
48    /// The matched syntax node.
49    pub node: tree_sitter::Node<'a>,
50
51    /// Source text of the matched node.
52    pub text: &'a str,
53}
54
55/// An issue candidate produced by a query rule handler.
56#[derive(Debug, Clone)]
57pub struct IssueCandidate {
58    pub line: usize,
59    pub column: usize,
60    pub message: String,
61    pub severity: Severity,
62}
63
64thread_local! {
65    static QUERY_CACHE: RefCell<HashMap<(Language, String), tree_sitter::Query>> =
66        RefCell::new(HashMap::new());
67}
68
69/// Execute a tree-sitter query against a parsed file and collect captures.
70///
71/// Returns a list of capture groups, one per query match.
72/// Each group contains all named captures for that match.
73/// Uses a thread-local cache to avoid recompiling the same query pattern.
74pub fn collect_captures<'a>(
75    file: &'a ParsedFile,
76    pattern: &str,
77) -> Result<Vec<Vec<QueryCapture<'a>>>, String> {
78    QUERY_CACHE.with(|cache| {
79        let mut cache = cache.borrow_mut();
80        let key = (file.language, pattern.to_string());
81        if !cache.contains_key(&key) {
82            let grammar = super::parsers::get_grammar(file.language).ok_or_else(|| {
83                format!(
84                    "No tree-sitter grammar available for {}",
85                    file.language.display_name()
86                )
87            })?;
88            let query = tree_sitter::Query::new(&grammar, pattern)
89                .map_err(|e| format!("Failed to create query: {}", e))?;
90            cache.insert(key.clone(), query);
91        }
92        let query = cache
93            .get(&key)
94            .ok_or_else(|| "Query cache miss for pattern".to_string())?;
95
96        let mut cursor = tree_sitter::QueryCursor::new();
97        let root = file.root_node();
98        let mut matches = cursor.matches(query, root, file.content.as_bytes());
99
100        let capture_names: Vec<String> = query
101            .capture_names()
102            .iter()
103            .map(|s| s.to_string())
104            .collect();
105        let mut result = Vec::new();
106
107        while let Some(match_) = matches.next() {
108            let captures: Vec<QueryCapture> = match_
109                .captures
110                .iter()
111                .map(|capture| {
112                    let name_idx = capture.index as usize;
113                    let name = capture_names.get(name_idx).cloned().unwrap_or_else(|| {
114                        tracing::warn!(
115                            "capture index {} out of bounds (max {}); using 'unknown'",
116                            name_idx,
117                            capture_names.len()
118                        );
119                        "unknown".to_string()
120                    });
121                    let node = capture.node;
122                    let start = node.start_byte();
123                    let end = node.end_byte();
124                    QueryCapture {
125                        name,
126                        node,
127                        text: &file.content[start..end],
128                    }
129                })
130                .collect();
131            result.push(captures);
132        }
133
134        Ok(result)
135    })
136}
137
138/// Execute a merged multi-pattern query in a single cursor traversal.
139///
140/// Concatenates all patterns with newlines and runs one query pass.
141/// Capture names must be unique across all patterns (use prefixed names).
142pub fn collect_captures_multi<'a>(
143    file: &'a ParsedFile,
144    patterns: &[&str],
145) -> Result<Vec<Vec<QueryCapture<'a>>>, String> {
146    let merged = patterns.join("\n");
147    collect_captures(file, &merged)
148}
149
150/// Run a `QueryRule` against a parsed file and produce issues.
151pub fn run_query_rule(file: &ParsedFile, rule: &QueryRule) -> Vec<IssueCandidate> {
152    let captures_group = match collect_captures(file, rule.pattern) {
153        Ok(groups) => groups,
154        Err(e) => {
155            tracing::warn!("Query rule '{}' error: {}", rule.name, e);
156            return vec![];
157        }
158    };
159
160    let mut results = Vec::new();
161
162    for (match_index, captures) in captures_group.iter().enumerate() {
163        if let Some(handler) = rule.handler {
164            results.extend(handler(file, captures, match_index));
165        } else {
166            // Default handler: use the first capture's location
167            if let Some(first) = captures.first() {
168                results.push(IssueCandidate {
169                    line: first.node.start_position().row + 1,
170                    column: first.node.start_position().column + 1,
171                    message: format!("{} detected", rule.name),
172                    severity: rule.severity.clone(),
173                });
174            }
175        }
176    }
177
178    results
179}
180
181#[cfg(test)]
182mod tests {
183    use super::*;
184    use crate::treesitter::TreeSitterEngine;
185
186    /// Objective: Verify basic query matching works for Rust source
187    /// Invariants: Query for identifiers should find all identifier nodes
188    #[test]
189    fn test_collect_captures_basic() {
190        let engine = TreeSitterEngine::new();
191        let code = "fn main() { let x = 42; }";
192        let file = engine
193            .parse_file(std::path::Path::new("test.rs"), code)
194            .expect("Should parse");
195
196        let captures = collect_captures(&file, "(identifier) @id").expect("Query should succeed");
197        assert!(!captures.is_empty(), "Should find at least one identifier");
198        // Should find: main, x
199        assert_eq!(captures.len(), 2, "Should find 2 identifiers: main, x");
200    }
201
202    /// Objective: Verify single-letter variable detection via query
203    /// Invariants: Pattern matching single-character identifiers should catch them
204    #[test]
205    fn test_single_letter_variable_query() {
206        let engine = TreeSitterEngine::new();
207
208        let code = "fn compute() { let a = 1; let bb = 2; let ccc = 3; }";
209        let file = engine
210            .parse_file(std::path::Path::new("test.rs"), code)
211            .expect("Should parse");
212
213        // Match `let` bindings with single-letter patterns
214        let pattern = "
215            (let_declaration
216                pattern: (identifier) @var
217                (#match? @var \"^[a-z]$\")
218            )
219        ";
220        let captures = collect_captures(&file, pattern).expect("Query should succeed");
221
222        // The `let` pattern: `let a = ...; let bb = ...; let ccc = ...;`
223        // Only `a` should match (single letter)
224        assert_eq!(
225            captures.len(),
226            1,
227            "Only 'a' should match single-letter pattern"
228        );
229        if let Some(first) = captures.first().and_then(|c| c.first()) {
230            assert_eq!(first.text, "a", "Should capture 'a'");
231        }
232    }
233
234    /// Objective: Verify invalid query returns an error
235    /// Invariants: Malformed query pattern should not panic
236    #[test]
237    fn test_invalid_query_returns_error() {
238        let engine = TreeSitterEngine::new();
239        let code = "fn main() {}";
240        let file = engine
241            .parse_file(std::path::Path::new("test.rs"), code)
242            .expect("Should parse");
243
244        let result = collect_captures(&file, "(nonexistent_node) @x");
245        // Unknown node type in query should be an error
246        assert!(result.is_err(), "Query with unknown node type should error");
247    }
248
249    /// Objective: Verify QueryRule default handler produces issues
250    /// Invariants: A QueryRule with no custom handler should still produce issues
251    #[test]
252    fn test_query_rule_default_handler() {
253        let engine = TreeSitterEngine::new();
254        let code = "fn main() { let x = 1; let y = 2; }";
255        let file = engine
256            .parse_file(std::path::Path::new("test.rs"), code)
257            .expect("Should parse");
258
259        let rule = QueryRule {
260            name: "single-letter-var",
261            languages: &[Language::Rust],
262            pattern: "
263                (let_declaration
264                    pattern: (identifier) @var
265                    (#match? @var \"^[a-z]$\")
266                )
267            ",
268            severity: Severity::Spicy,
269            handler: None,
270            skips_test_files: false,
271        };
272
273        let issues = run_query_rule(&file, &rule);
274        assert_eq!(issues.len(), 2, "Should find 2 single-letter variables");
275        assert_eq!(issues[0].message, "single-letter-var detected");
276    }
277}