Skip to main content

normalize_syntax_rules/
query.rs

1//! Generic tree-sitter and ast-grep query execution.
2//!
3//! Provides low-level query runners that operate on a single file's content.
4//! Higher-level file discovery and dispatch lives in the CLI tier.
5
6use crate::evaluate_predicates;
7use normalize_languages::ast_grep::DynLang;
8use std::collections::HashMap;
9use std::path::{Path, PathBuf};
10use streaming_iterator::StreamingIterator;
11
12/// Match result from either pattern type.
13#[derive(Debug, serde::Serialize, schemars::JsonSchema)]
14pub struct MatchResult {
15    pub file: PathBuf,
16    pub grammar: String,
17    pub kind: String,
18    pub text: String,
19    pub start_row: usize,
20    pub start_col: usize,
21    pub end_row: usize,
22    pub end_col: usize,
23    pub captures: HashMap<String, String>,
24}
25
26/// Detect if pattern is a tree-sitter S-expression (starts with `(`)
27/// or an ast-grep pattern (anything else).
28pub fn is_sexp_pattern(pattern: &str) -> bool {
29    pattern.trim_start().starts_with('(')
30}
31
32/// Run a tree-sitter S-expression query against a single file's content.
33///
34/// Returns one `MatchResult` per capture per match.
35pub fn run_sexp_query(
36    file: &Path,
37    content: &str,
38    query_str: &str,
39    grammar: &tree_sitter::Language,
40    grammar_name: &str,
41) -> Result<Vec<MatchResult>, String> {
42    let mut parser = tree_sitter::Parser::new();
43    parser
44        .set_language(grammar)
45        .map_err(|e| format!("Failed to set language: {}", e))?;
46
47    let tree = parser
48        .parse(content, None)
49        .ok_or_else(|| "Failed to parse file".to_string())?;
50
51    let query =
52        tree_sitter::Query::new(grammar, query_str).map_err(|e| format!("Invalid query: {}", e))?;
53
54    let mut cursor = tree_sitter::QueryCursor::new();
55    let mut matches_iter = cursor.matches(&query, tree.root_node(), content.as_bytes());
56
57    let mut results = Vec::new();
58    while let Some(m) = matches_iter.next() {
59        if !evaluate_predicates(&query, m, content.as_bytes()) {
60            continue;
61        }
62
63        for cap in m.captures {
64            let node = cap.node;
65            let capture_name = query.capture_names()[cap.index as usize].to_string();
66            let text = node.utf8_text(content.as_bytes()).unwrap_or("").to_string();
67
68            let mut captures = HashMap::new();
69            captures.insert(capture_name.clone(), text.clone());
70
71            results.push(MatchResult {
72                file: file.to_path_buf(),
73                grammar: grammar_name.to_string(),
74                kind: node.kind().to_string(),
75                text,
76                start_row: node.start_position().row + 1,
77                start_col: node.start_position().column + 1,
78                end_row: node.end_position().row + 1,
79                end_col: node.end_position().column + 1,
80                captures,
81            });
82        }
83    }
84
85    Ok(results)
86}
87
88/// Run an ast-grep pattern query against a single file's content.
89pub fn run_astgrep_query(
90    file: &Path,
91    content: &str,
92    pattern_str: &str,
93    grammar: &tree_sitter::Language,
94    grammar_name: &str,
95) -> Result<Vec<MatchResult>, String> {
96    use ast_grep_core::tree_sitter::LanguageExt;
97
98    let lang = DynLang::new(grammar.clone());
99    let grep = lang.ast_grep(content);
100    let pattern = lang
101        .pattern(pattern_str)
102        .map_err(|e| format!("Pattern error: {:?}", e))?;
103
104    let mut results = Vec::new();
105    let root = grep.root();
106    for node_match in root.find_all(&pattern) {
107        let text = node_match.text().to_string();
108        let start_pos = node_match.start_pos();
109        let end_pos = node_match.end_pos();
110
111        // For ast-grep, captures are in the MetaVarEnv, but extracting them
112        // is complex. For now, just report the matched text.
113        let captures = HashMap::new();
114
115        results.push(MatchResult {
116            file: file.to_path_buf(),
117            grammar: grammar_name.to_string(),
118            kind: node_match.kind().to_string(),
119            text,
120            start_row: start_pos.line() + 1,
121            start_col: start_pos.column(&node_match) + 1,
122            end_row: end_pos.line() + 1,
123            end_col: end_pos.column(&node_match) + 1,
124            captures,
125        });
126    }
127
128    Ok(results)
129}