use crate::sources::{SourceContext, SourceRegistry, builtin_registry};
use crate::{Rule, Severity};
use normalize_languages::{GrammarLoader, support_for_path};
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use streaming_iterator::StreamingIterator;
#[derive(Debug)]
pub struct Finding {
pub rule_id: String,
pub file: PathBuf,
pub start_line: usize,
pub start_col: usize,
pub end_line: usize,
pub end_col: usize,
pub start_byte: usize,
pub end_byte: usize,
pub message: String,
pub severity: Severity,
pub matched_text: String,
pub fix: Option<String>,
pub captures: HashMap<String, String>,
}
#[derive(Default)]
pub struct DebugFlags {
pub timing: bool,
}
impl DebugFlags {
pub fn from_args(args: &[String]) -> Self {
let all = args.iter().any(|s| s == "all");
Self {
timing: all || args.iter().any(|s| s == "timing"),
}
}
}
fn line_has_allow_comment(line: &str, rule_id: &str) -> bool {
if let Some(pos) = line.find("moss-allow:") {
let after = &line[pos + 11..]; let after = after.trim_start();
if after.starts_with(rule_id) {
let rest = &after[rule_id.len()..];
return rest.is_empty()
|| rest.starts_with(char::is_whitespace)
|| rest.starts_with('-')
|| rest.starts_with("*/");
}
}
false
}
fn is_allowed_by_comment(content: &str, start_line: usize, rule_id: &str) -> bool {
let lines: Vec<&str> = content.lines().collect();
let line_idx = start_line.saturating_sub(1);
if let Some(line) = lines.get(line_idx) {
if line_has_allow_comment(line, rule_id) {
return true;
}
}
if line_idx > 0 {
if let Some(line) = lines.get(line_idx - 1) {
if line_has_allow_comment(line, rule_id) {
return true;
}
}
}
false
}
fn check_requires(rule: &Rule, registry: &SourceRegistry, ctx: &SourceContext) -> bool {
if rule.requires.is_empty() {
return true;
}
for (key, expected) in &rule.requires {
let actual = match registry.get(ctx, key) {
Some(v) => v,
None => return false, };
let matches = if let Some(rest) = expected.strip_prefix(">=") {
actual >= rest.to_string()
} else if let Some(rest) = expected.strip_prefix("<=") {
actual <= rest.to_string()
} else if let Some(rest) = expected.strip_prefix('!') {
actual != rest
} else {
actual == *expected
};
if !matches {
return false;
}
}
true
}
struct CombinedQuery<'a> {
query: tree_sitter::Query,
pattern_to_rule: Vec<(&'a Rule, usize)>,
}
pub fn run_rules(
rules: &[Rule],
root: &Path,
loader: &GrammarLoader,
filter_rule: Option<&str>,
debug: &DebugFlags,
) -> Vec<Finding> {
let start = std::time::Instant::now();
let mut findings = Vec::new();
let source_registry = builtin_registry();
let active_rules: Vec<&Rule> = rules
.iter()
.filter(|r| filter_rule.map_or(true, |f| r.id == f))
.collect();
if active_rules.is_empty() {
return findings;
}
let files = collect_source_files(root);
let mut files_by_grammar: HashMap<String, Vec<PathBuf>> = HashMap::new();
for file in files {
if let Some(lang) = support_for_path(&file) {
let grammar_name = lang.grammar_name().to_string();
files_by_grammar.entry(grammar_name).or_default().push(file);
}
}
if debug.timing {
eprintln!("[timing] file collection: {:?}", start.elapsed());
}
let compile_start = std::time::Instant::now();
let (specific_rules, global_rules): (Vec<&&Rule>, Vec<&&Rule>) =
active_rules.iter().partition(|r| !r.languages.is_empty());
let mut combined_by_grammar: HashMap<String, CombinedQuery> = HashMap::new();
for grammar_name in files_by_grammar.keys() {
let Some(grammar) = loader.get(grammar_name) else {
continue;
};
let mut compiled_rules: Vec<(&Rule, tree_sitter::Query)> = Vec::new();
for rule in &specific_rules {
if rule.languages.iter().any(|l| l == grammar_name) {
if let Ok(q) = tree_sitter::Query::new(&grammar, &rule.query_str) {
compiled_rules.push((rule, q));
}
}
}
for rule in &global_rules {
if let Ok(q) = tree_sitter::Query::new(&grammar, &rule.query_str) {
compiled_rules.push((rule, q));
}
}
if compiled_rules.is_empty() {
continue;
}
let combined_str = compiled_rules
.iter()
.map(|(r, _)| r.query_str.as_str())
.collect::<Vec<_>>()
.join("\n\n");
let query = match tree_sitter::Query::new(&grammar, &combined_str) {
Ok(q) => q,
Err(e) => {
eprintln!("Warning: combined query failed for {}: {}", grammar_name, e);
continue;
}
};
let mut pattern_to_rule: Vec<(&Rule, usize)> = Vec::new();
let combined_match_idx = query
.capture_names()
.iter()
.position(|n| *n == "match")
.unwrap_or(0);
for (rule, individual_query) in &compiled_rules {
for _ in 0..individual_query.pattern_count() {
pattern_to_rule.push((*rule, combined_match_idx));
}
}
combined_by_grammar.insert(
grammar_name.clone(),
CombinedQuery {
query,
pattern_to_rule,
},
);
}
if debug.timing {
eprintln!(
"[timing] query compilation: {:?} ({} grammars)",
compile_start.elapsed(),
combined_by_grammar.len()
);
}
let process_start = std::time::Instant::now();
for (grammar_name, files) in &files_by_grammar {
let Some(combined) = combined_by_grammar.get(grammar_name) else {
continue;
};
let Some(grammar) = loader.get(grammar_name) else {
continue;
};
let mut parser = tree_sitter::Parser::new();
if parser.set_language(&grammar).is_err() {
continue;
}
for file in files {
let rel_path = file.strip_prefix(root).unwrap_or(file);
let rel_path_str = rel_path.to_string_lossy();
let source_ctx = SourceContext {
file_path: file,
rel_path: &rel_path_str,
project_root: root,
};
let content = match std::fs::read_to_string(file) {
Ok(c) => c,
Err(_) => continue,
};
let tree = match parser.parse(&content, None) {
Some(t) => t,
None => continue,
};
let mut cursor = tree_sitter::QueryCursor::new();
let mut matches = cursor.matches(&combined.query, tree.root_node(), content.as_bytes());
while let Some(m) = matches.next() {
let Some((rule, match_idx)) = combined.pattern_to_rule.get(m.pattern_index) else {
continue;
};
if rule.allow.iter().any(|p| p.matches(&rel_path_str)) {
continue;
}
if !check_requires(rule, &source_registry, &source_ctx) {
continue;
}
if !evaluate_predicates(&combined.query, m, content.as_bytes()) {
continue;
}
let capture = m.captures.iter().find(|c| c.index as usize == *match_idx);
if let Some(cap) = capture {
let node = cap.node;
let start_line = node.start_position().row + 1;
if is_allowed_by_comment(&content, start_line, &rule.id) {
continue;
}
let text = node.utf8_text(content.as_bytes()).unwrap_or("");
let mut captures_map: HashMap<String, String> = HashMap::new();
for cap in m.captures {
let name = combined.query.capture_names()[cap.index as usize].to_string();
if let Ok(cap_text) = cap.node.utf8_text(content.as_bytes()) {
captures_map.insert(name, cap_text.to_string());
}
}
findings.push(Finding {
rule_id: rule.id.clone(),
file: file.clone(),
start_line,
start_col: node.start_position().column + 1,
end_line: node.end_position().row + 1,
end_col: node.end_position().column + 1,
start_byte: node.start_byte(),
end_byte: node.end_byte(),
message: rule.message.clone(),
severity: rule.severity,
matched_text: text.lines().next().unwrap_or("").to_string(),
fix: rule.fix.clone(),
captures: captures_map,
});
}
}
}
}
if debug.timing {
eprintln!(
"[timing] file processing: {:?} ({} findings)",
process_start.elapsed(),
findings.len()
);
eprintln!("[timing] total: {:?}", start.elapsed());
}
findings
}
pub fn evaluate_predicates(
query: &tree_sitter::Query,
match_: &tree_sitter::QueryMatch,
source: &[u8],
) -> bool {
let predicates = query.general_predicates(match_.pattern_index);
for predicate in predicates {
let name = &predicate.operator;
let args = &predicate.args;
match name.as_ref() {
"eq?" | "not-eq?" => {
if args.len() < 2 {
continue;
}
let first_text = match &args[0] {
tree_sitter::QueryPredicateArg::Capture(idx) => match_
.captures
.iter()
.find(|c| c.index == *idx)
.and_then(|c| c.node.utf8_text(source).ok())
.unwrap_or(""),
tree_sitter::QueryPredicateArg::String(s) => s.as_ref(),
};
let second_text = match &args[1] {
tree_sitter::QueryPredicateArg::Capture(idx) => match_
.captures
.iter()
.find(|c| c.index == *idx)
.and_then(|c| c.node.utf8_text(source).ok())
.unwrap_or(""),
tree_sitter::QueryPredicateArg::String(s) => s.as_ref(),
};
let equal = first_text == second_text;
if name.as_ref() == "eq?" && !equal {
return false;
}
if name.as_ref() == "not-eq?" && equal {
return false;
}
}
"match?" | "not-match?" => {
if args.len() < 2 {
continue;
}
let capture_text = match &args[0] {
tree_sitter::QueryPredicateArg::Capture(idx) => match_
.captures
.iter()
.find(|c| c.index == *idx)
.and_then(|c| c.node.utf8_text(source).ok())
.unwrap_or(""),
_ => continue,
};
let pattern = match &args[1] {
tree_sitter::QueryPredicateArg::String(s) => s.as_ref(),
_ => continue,
};
let regex = match regex::Regex::new(pattern) {
Ok(r) => r,
Err(_) => continue,
};
let matches = regex.is_match(capture_text);
if name.as_ref() == "match?" && !matches {
return false;
}
if name.as_ref() == "not-match?" && matches {
return false;
}
}
"any-of?" => {
if args.len() < 2 {
continue;
}
let capture_text = match &args[0] {
tree_sitter::QueryPredicateArg::Capture(idx) => match_
.captures
.iter()
.find(|c| c.index == *idx)
.and_then(|c| c.node.utf8_text(source).ok())
.unwrap_or(""),
_ => continue,
};
let any_match = args[1..].iter().any(|arg| match arg {
tree_sitter::QueryPredicateArg::String(s) => s.as_ref() == capture_text,
_ => false,
});
if !any_match {
return false;
}
}
_ => {
}
}
}
true
}
pub fn expand_fix_template(template: &str, captures: &HashMap<String, String>) -> String {
let mut result = template.to_string();
for (name, value) in captures {
let placeholder = format!("${}", name);
result = result.replace(&placeholder, value);
}
result
}
pub fn apply_fixes(findings: &[Finding]) -> std::io::Result<usize> {
let mut by_file: HashMap<&PathBuf, Vec<&Finding>> = HashMap::new();
for finding in findings {
if finding.fix.is_some() {
by_file.entry(&finding.file).or_default().push(finding);
}
}
let mut files_modified = 0;
for (file, mut file_findings) in by_file {
file_findings.sort_by(|a, b| b.start_byte.cmp(&a.start_byte));
let mut content = std::fs::read_to_string(file)?;
for finding in file_findings {
let fix_template = finding.fix.as_ref().unwrap();
let replacement = expand_fix_template(fix_template, &finding.captures);
let before = &content[..finding.start_byte];
let after = &content[finding.end_byte..];
content = format!("{}{}{}", before, replacement, after);
}
std::fs::write(file, &content)?;
files_modified += 1;
}
Ok(files_modified)
}
fn collect_source_files(root: &Path) -> Vec<PathBuf> {
let mut files = Vec::new();
let walker = ignore::WalkBuilder::new(root)
.hidden(false)
.git_ignore(true)
.build();
for entry in walker.flatten() {
let path = entry.path();
if path.is_file() && support_for_path(path).is_some() {
files.push(path.to_path_buf());
}
}
files
}
#[cfg(test)]
mod tests {
use super::*;
use normalize_languages::GrammarLoader;
use streaming_iterator::StreamingIterator;
fn loader() -> GrammarLoader {
GrammarLoader::new()
}
#[test]
fn test_combined_query_predicate_scoping() {
let loader = loader();
let grammar = loader.get("rust").expect("rust grammar");
let combined_query = r#"
; Pattern 0: matches unwrap
((call_expression
function: (field_expression field: (field_identifier) @_method)
(#eq? @_method "unwrap")) @match)
; Pattern 1: matches expect
((call_expression
function: (field_expression field: (field_identifier) @_method)
(#eq? @_method "expect")) @match)
"#;
let query = tree_sitter::Query::new(&grammar, combined_query)
.expect("combined query should compile");
assert_eq!(query.pattern_count(), 2, "should have 2 patterns");
let test_code = r#"
fn main() {
let x = Some(5);
x.unwrap(); // line 4 - should match pattern 0
x.expect("msg"); // line 5 - should match pattern 1
x.map(|v| v); // line 6 - should NOT match
}
"#;
let mut parser = tree_sitter::Parser::new();
parser.set_language(&grammar).unwrap();
let tree = parser.parse(test_code, None).unwrap();
let mut cursor = tree_sitter::QueryCursor::new();
let mut matches = cursor.matches(&query, tree.root_node(), test_code.as_bytes());
let mut results: Vec<(usize, String)> = Vec::new();
while let Some(m) = matches.next() {
if !evaluate_predicates(&query, m, test_code.as_bytes()) {
continue;
}
let match_capture = m
.captures
.iter()
.find(|c| query.capture_names()[c.index as usize] == "match");
if let Some(cap) = match_capture {
let text = cap.node.utf8_text(test_code.as_bytes()).unwrap();
results.push((m.pattern_index, text.to_string()));
}
}
assert_eq!(results.len(), 2, "should have 2 matches, got {:?}", results);
assert!(
results
.iter()
.any(|(idx, text)| *idx == 0 && text.contains("unwrap")),
"pattern 0 should match unwrap, got {:?}",
results
);
assert!(
results
.iter()
.any(|(idx, text)| *idx == 1 && text.contains("expect")),
"pattern 1 should match expect, got {:?}",
results
);
}
#[test]
fn test_combined_rules_single_traversal() {
let loader = loader();
let grammar = loader.get("rust").expect("rust grammar");
let rules_queries = vec![
(
"unwrap-rule",
r#"((call_expression function: (field_expression field: (field_identifier) @_m) (#eq? @_m "unwrap")) @match)"#,
),
(
"dbg-rule",
r#"((macro_invocation macro: (identifier) @_name (#eq? @_name "dbg")) @match)"#,
),
];
let combined = rules_queries
.iter()
.map(|(_, q)| *q)
.collect::<Vec<_>>()
.join("\n\n");
let query =
tree_sitter::Query::new(&grammar, &combined).expect("combined query should compile");
let test_code = r#"
fn main() {
let x = Some(5);
dbg!(x); // should match pattern 1 (dbg-rule)
x.unwrap(); // should match pattern 0 (unwrap-rule)
}
"#;
let mut parser = tree_sitter::Parser::new();
parser.set_language(&grammar).unwrap();
let tree = parser.parse(test_code, None).unwrap();
let mut cursor = tree_sitter::QueryCursor::new();
let mut matches = cursor.matches(&query, tree.root_node(), test_code.as_bytes());
let mut pattern_indices: Vec<usize> = Vec::new();
while let Some(m) = matches.next() {
if evaluate_predicates(&query, m, test_code.as_bytes()) {
pattern_indices.push(m.pattern_index);
}
}
assert!(
pattern_indices.contains(&0),
"should match pattern 0 (unwrap)"
);
assert!(pattern_indices.contains(&1), "should match pattern 1 (dbg)");
}
}