Skip to main content

garbage_code_hunter/treesitter/
query.rs

1use streaming_iterator::StreamingIterator;
2
3use crate::analyzer::Severity;
4use crate::language::Language;
5
6use super::engine::ParsedFile;
7
8/// A tree-sitter query based rule definition.
9///
10/// This is the primary building block for tree-sitter rules.
11/// Each rule specifies a query pattern (in tree-sitter query syntax),
12/// the languages it applies to, and a handler that converts
13/// query matches into `CodeIssue`s.
14pub struct QueryRule {
15    /// Unique rule identifier (e.g. "single-letter-variable").
16    pub name: &'static str,
17
18    /// Languages this rule applies to.
19    pub languages: &'static [Language],
20
21    /// The tree-sitter query pattern string.
22    /// Uses standard tree-sitter query syntax with named captures.
23    pub pattern: &'static str,
24
25    /// Default severity when match is found.
26    pub severity: Severity,
27
28    /// Custom handler to produce issues from a match.
29    /// If None, a default handler is used (one issue per match at the capture node).
30    pub handler: Option<QueryHandler>,
31
32    /// Whether to skip test files.
33    pub skips_test_files: bool,
34}
35
36/// Function signature for custom query match handlers.
37pub type QueryHandler =
38    fn(file: &ParsedFile, captures: &[QueryCapture], match_index: usize) -> Vec<IssueCandidate>;
39
40/// A single named capture from a tree-sitter query match.
41#[derive(Debug, Clone)]
42pub struct QueryCapture<'a> {
43    /// The capture name from the query pattern (e.g. "ident" from `(identifier) @ident`).
44    pub name: String,
45
46    /// The matched syntax node.
47    pub node: tree_sitter::Node<'a>,
48
49    /// Source text of the matched node.
50    pub text: &'a str,
51}
52
53/// An issue candidate produced by a query rule handler.
54#[derive(Debug, Clone)]
55pub struct IssueCandidate {
56    pub line: usize,
57    pub column: usize,
58    pub message: String,
59    pub severity: Severity,
60}
61
62/// Execute a tree-sitter query against a parsed file and collect captures.
63///
64/// Returns a list of capture groups, one per query match.
65/// Each group contains all named captures for that match.
66pub fn collect_captures<'a>(
67    file: &'a ParsedFile,
68    pattern: &str,
69) -> Result<Vec<Vec<QueryCapture<'a>>>, String> {
70    let lang = file.language;
71    let grammar = super::parsers::get_grammar(lang).ok_or_else(|| {
72        format!(
73            "No tree-sitter grammar available for {}",
74            lang.display_name()
75        )
76    })?;
77
78    let query = tree_sitter::Query::new(&grammar, pattern)
79        .map_err(|e| format!("Failed to create query: {}", e))?;
80
81    let mut cursor = tree_sitter::QueryCursor::new();
82    let root = file.root_node();
83    let mut matches = cursor.matches(&query, root, file.content.as_bytes());
84
85    let capture_names: Vec<String> = query
86        .capture_names()
87        .iter()
88        .map(|s| s.to_string())
89        .collect();
90    let mut result = Vec::new();
91
92    while let Some(match_) = matches.next() {
93        let captures: Vec<QueryCapture> = match_
94            .captures
95            .iter()
96            .map(|capture| {
97                let name_idx = capture.index as usize;
98                let name = capture_names
99                    .get(name_idx)
100                    .cloned()
101                    .unwrap_or_else(|| "unknown".to_string());
102                let node = capture.node;
103                let start = node.start_byte();
104                let end = node.end_byte();
105                QueryCapture {
106                    name,
107                    node,
108                    text: &file.content[start..end],
109                }
110            })
111            .collect();
112        result.push(captures);
113    }
114
115    Ok(result)
116}
117
118/// Run a `QueryRule` against a parsed file and produce issues.
119pub fn run_query_rule(file: &ParsedFile, rule: &QueryRule) -> Vec<IssueCandidate> {
120    let captures_group = match collect_captures(file, rule.pattern) {
121        Ok(groups) => groups,
122        Err(e) => {
123            tracing::warn!("Query rule '{}' error: {}", rule.name, e);
124            return vec![];
125        }
126    };
127
128    let mut results = Vec::new();
129
130    for (match_index, captures) in captures_group.iter().enumerate() {
131        if let Some(handler) = rule.handler {
132            results.extend(handler(file, captures, match_index));
133        } else {
134            // Default handler: use the first capture's location
135            if let Some(first) = captures.first() {
136                results.push(IssueCandidate {
137                    line: first.node.start_position().row + 1,
138                    column: first.node.start_position().column + 1,
139                    message: format!("{} detected", rule.name),
140                    severity: rule.severity.clone(),
141                });
142            }
143        }
144    }
145
146    results
147}
148
149#[cfg(test)]
150mod tests {
151    use super::*;
152    use crate::treesitter::TreeSitterEngine;
153
154    /// Objective: Verify basic query matching works for Rust source
155    /// Invariants: Query for identifiers should find all identifier nodes
156    #[test]
157    fn test_collect_captures_basic() {
158        let engine = TreeSitterEngine::new();
159        let code = "fn main() { let x = 42; }";
160        let file = engine
161            .parse_file(std::path::Path::new("test.rs"), code)
162            .expect("Should parse");
163
164        let captures = collect_captures(&file, "(identifier) @id").expect("Query should succeed");
165        assert!(!captures.is_empty(), "Should find at least one identifier");
166        // Should find: main, x
167        assert_eq!(captures.len(), 2, "Should find 2 identifiers: main, x");
168    }
169
170    /// Objective: Verify single-letter variable detection via query
171    /// Invariants: Pattern matching single-character identifiers should catch them
172    #[test]
173    fn test_single_letter_variable_query() {
174        let engine = TreeSitterEngine::new();
175
176        let code = "fn compute() { let a = 1; let bb = 2; let ccc = 3; }";
177        let file = engine
178            .parse_file(std::path::Path::new("test.rs"), code)
179            .expect("Should parse");
180
181        // Match `let` bindings with single-letter patterns
182        let pattern = "
183            (let_declaration
184                pattern: (identifier) @var
185                (#match? @var \"^[a-z]$\")
186            )
187        ";
188        let captures = collect_captures(&file, pattern).expect("Query should succeed");
189
190        // The `let` pattern: `let a = ...; let bb = ...; let ccc = ...;`
191        // Only `a` should match (single letter)
192        assert_eq!(
193            captures.len(),
194            1,
195            "Only 'a' should match single-letter pattern"
196        );
197        if let Some(first) = captures.first().and_then(|c| c.first()) {
198            assert_eq!(first.text, "a", "Should capture 'a'");
199        }
200    }
201
202    /// Objective: Verify invalid query returns an error
203    /// Invariants: Malformed query pattern should not panic
204    #[test]
205    fn test_invalid_query_returns_error() {
206        let engine = TreeSitterEngine::new();
207        let code = "fn main() {}";
208        let file = engine
209            .parse_file(std::path::Path::new("test.rs"), code)
210            .expect("Should parse");
211
212        let result = collect_captures(&file, "(nonexistent_node) @x");
213        // Unknown node type in query should be an error
214        assert!(result.is_err(), "Query with unknown node type should error");
215    }
216
217    /// Objective: Verify QueryRule default handler produces issues
218    /// Invariants: A QueryRule with no custom handler should still produce issues
219    #[test]
220    fn test_query_rule_default_handler() {
221        let engine = TreeSitterEngine::new();
222        let code = "fn main() { let x = 1; let y = 2; }";
223        let file = engine
224            .parse_file(std::path::Path::new("test.rs"), code)
225            .expect("Should parse");
226
227        let rule = QueryRule {
228            name: "single-letter-var",
229            languages: &[Language::Rust],
230            pattern: "
231                (let_declaration
232                    pattern: (identifier) @var
233                    (#match? @var \"^[a-z]$\")
234                )
235            ",
236            severity: Severity::Spicy,
237            handler: None,
238            skips_test_files: false,
239        };
240
241        let issues = run_query_rule(&file, &rule);
242        assert_eq!(issues.len(), 2, "Should find 2 single-letter variables");
243        assert_eq!(issues[0].message, "single-letter-var detected");
244    }
245}