garbage_code_hunter/treesitter/
query.rs1use streaming_iterator::StreamingIterator;
2
3use crate::analyzer::Severity;
4use crate::language::Language;
5use std::cell::RefCell;
6use std::collections::HashMap;
7
8use super::engine::ParsedFile;
9
10pub struct QueryRule {
17 pub name: &'static str,
19
20 pub languages: &'static [Language],
22
23 pub pattern: &'static str,
26
27 pub severity: Severity,
29
30 pub handler: Option<QueryHandler>,
33
34 pub skips_test_files: bool,
36}
37
38pub type QueryHandler =
40 fn(file: &ParsedFile, captures: &[QueryCapture], match_index: usize) -> Vec<IssueCandidate>;
41
42#[derive(Debug, Clone)]
44pub struct QueryCapture<'a> {
45 pub name: String,
47
48 pub node: tree_sitter::Node<'a>,
50
51 pub text: &'a str,
53}
54
55#[derive(Debug, Clone)]
57pub struct IssueCandidate {
58 pub line: usize,
59 pub column: usize,
60 pub message: String,
61 pub severity: Severity,
62}
63
64thread_local! {
65 static QUERY_CACHE: RefCell<HashMap<(Language, String), tree_sitter::Query>> =
66 RefCell::new(HashMap::new());
67}
68
69pub fn collect_captures<'a>(
75 file: &'a ParsedFile,
76 pattern: &str,
77) -> Result<Vec<Vec<QueryCapture<'a>>>, String> {
78 QUERY_CACHE.with(|cache| {
79 let mut cache = cache.borrow_mut();
80 let key = (file.language, pattern.to_string());
81 if !cache.contains_key(&key) {
82 let grammar = super::parsers::get_grammar(file.language).ok_or_else(|| {
83 format!(
84 "No tree-sitter grammar available for {}",
85 file.language.display_name()
86 )
87 })?;
88 let query = tree_sitter::Query::new(&grammar, pattern)
89 .map_err(|e| format!("Failed to create query: {}", e))?;
90 cache.insert(key.clone(), query);
91 }
92 let query = cache
93 .get(&key)
94 .ok_or_else(|| "Query cache miss for pattern".to_string())?;
95
96 let mut cursor = tree_sitter::QueryCursor::new();
97 let root = file.root_node();
98 let mut matches = cursor.matches(query, root, file.content.as_bytes());
99
100 let capture_names: Vec<String> = query
101 .capture_names()
102 .iter()
103 .map(|s| s.to_string())
104 .collect();
105 let mut result = Vec::new();
106
107 while let Some(match_) = matches.next() {
108 let captures: Vec<QueryCapture> = match_
109 .captures
110 .iter()
111 .map(|capture| {
112 let name_idx = capture.index as usize;
113 let name = capture_names.get(name_idx).cloned().unwrap_or_else(|| {
114 tracing::warn!(
115 "capture index {} out of bounds (max {}); using 'unknown'",
116 name_idx,
117 capture_names.len()
118 );
119 "unknown".to_string()
120 });
121 let node = capture.node;
122 let start = node.start_byte();
123 let end = node.end_byte();
124 QueryCapture {
125 name,
126 node,
127 text: &file.content[start..end],
128 }
129 })
130 .collect();
131 result.push(captures);
132 }
133
134 Ok(result)
135 })
136}
137
138pub fn collect_captures_multi<'a>(
143 file: &'a ParsedFile,
144 patterns: &[&str],
145) -> Result<Vec<Vec<QueryCapture<'a>>>, String> {
146 let merged = patterns.join("\n");
147 collect_captures(file, &merged)
148}
149
150pub fn run_query_rule(file: &ParsedFile, rule: &QueryRule) -> Vec<IssueCandidate> {
152 let captures_group = match collect_captures(file, rule.pattern) {
153 Ok(groups) => groups,
154 Err(e) => {
155 tracing::warn!("Query rule '{}' error: {}", rule.name, e);
156 return vec![];
157 }
158 };
159
160 let mut results = Vec::new();
161
162 for (match_index, captures) in captures_group.iter().enumerate() {
163 if let Some(handler) = rule.handler {
164 results.extend(handler(file, captures, match_index));
165 } else {
166 if let Some(first) = captures.first() {
168 results.push(IssueCandidate {
169 line: first.node.start_position().row + 1,
170 column: first.node.start_position().column + 1,
171 message: format!("{} detected", rule.name),
172 severity: rule.severity.clone(),
173 });
174 }
175 }
176 }
177
178 results
179}
180
181#[cfg(test)]
182mod tests {
183 use super::*;
184 use crate::treesitter::TreeSitterEngine;
185
186 #[test]
189 fn test_collect_captures_basic() {
190 let engine = TreeSitterEngine::new();
191 let code = "fn main() { let x = 42; }";
192 let file = engine
193 .parse_file(std::path::Path::new("test.rs"), code)
194 .expect("Should parse");
195
196 let captures = collect_captures(&file, "(identifier) @id").expect("Query should succeed");
197 assert!(!captures.is_empty(), "Should find at least one identifier");
198 assert_eq!(captures.len(), 2, "Should find 2 identifiers: main, x");
200 }
201
202 #[test]
205 fn test_single_letter_variable_query() {
206 let engine = TreeSitterEngine::new();
207
208 let code = "fn compute() { let a = 1; let bb = 2; let ccc = 3; }";
209 let file = engine
210 .parse_file(std::path::Path::new("test.rs"), code)
211 .expect("Should parse");
212
213 let pattern = "
215 (let_declaration
216 pattern: (identifier) @var
217 (#match? @var \"^[a-z]$\")
218 )
219 ";
220 let captures = collect_captures(&file, pattern).expect("Query should succeed");
221
222 assert_eq!(
225 captures.len(),
226 1,
227 "Only 'a' should match single-letter pattern"
228 );
229 if let Some(first) = captures.first().and_then(|c| c.first()) {
230 assert_eq!(first.text, "a", "Should capture 'a'");
231 }
232 }
233
234 #[test]
237 fn test_invalid_query_returns_error() {
238 let engine = TreeSitterEngine::new();
239 let code = "fn main() {}";
240 let file = engine
241 .parse_file(std::path::Path::new("test.rs"), code)
242 .expect("Should parse");
243
244 let result = collect_captures(&file, "(nonexistent_node) @x");
245 assert!(result.is_err(), "Query with unknown node type should error");
247 }
248
249 #[test]
252 fn test_query_rule_default_handler() {
253 let engine = TreeSitterEngine::new();
254 let code = "fn main() { let x = 1; let y = 2; }";
255 let file = engine
256 .parse_file(std::path::Path::new("test.rs"), code)
257 .expect("Should parse");
258
259 let rule = QueryRule {
260 name: "single-letter-var",
261 languages: &[Language::Rust],
262 pattern: "
263 (let_declaration
264 pattern: (identifier) @var
265 (#match? @var \"^[a-z]$\")
266 )
267 ",
268 severity: Severity::Spicy,
269 handler: None,
270 skips_test_files: false,
271 };
272
273 let issues = run_query_rule(&file, &rule);
274 assert_eq!(issues.len(), 2, "Should find 2 single-letter variables");
275 assert_eq!(issues[0].message, "single-letter-var detected");
276 }
277}