use crate::sources::{SourceContext, SourceRegistry, builtin_registry};
use crate::{Rule, Severity};
use normalize_languages::{GrammarLoader, support_for_path};
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use streaming_iterator::StreamingIterator;
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct CachedFinding {
pub rule_id: String,
pub file: std::path::PathBuf,
pub start_line: usize,
pub start_col: usize,
pub end_line: usize,
pub end_col: usize,
pub start_byte: usize,
pub end_byte: usize,
pub message: String,
pub severity: Severity,
pub matched_text: String,
pub fix: Option<String>,
pub captures: HashMap<String, String>,
}
impl From<Finding> for CachedFinding {
fn from(f: Finding) -> Self {
Self {
rule_id: f.rule_id,
file: f.file,
start_line: f.start_line,
start_col: f.start_col,
end_line: f.end_line,
end_col: f.end_col,
start_byte: f.start_byte,
end_byte: f.end_byte,
message: f.message,
severity: f.severity,
matched_text: f.matched_text,
fix: f.fix,
captures: f.captures,
}
}
}
impl From<CachedFinding> for Finding {
fn from(c: CachedFinding) -> Self {
Self {
rule_id: c.rule_id,
file: c.file,
start_line: c.start_line,
start_col: c.start_col,
end_line: c.end_line,
end_col: c.end_col,
start_byte: c.start_byte,
end_byte: c.end_byte,
message: c.message,
severity: c.severity,
matched_text: c.matched_text,
fix: c.fix,
captures: c.captures,
}
}
}
struct FindingsCache {
conn: libsql::Connection,
#[allow(dead_code)]
db: libsql::Database,
runtime: Option<tokio::runtime::Runtime>,
}
fn findings_cache_block_on<F: std::future::Future + Send>(
runtime: &Option<tokio::runtime::Runtime>,
fut: F,
) -> F::Output
where
F::Output: Send,
{
if let Ok(handle) = tokio::runtime::Handle::try_current() {
return match handle.runtime_flavor() {
tokio::runtime::RuntimeFlavor::MultiThread => {
tokio::task::block_in_place(|| handle.block_on(fut))
}
_ => spawn_scoped_findings_runtime(fut),
};
}
if let Some(rt) = runtime {
return rt.block_on(fut);
}
spawn_scoped_findings_runtime(fut)
}
fn spawn_scoped_findings_runtime<F: std::future::Future + Send>(fut: F) -> F::Output
where
F::Output: Send,
{
std::thread::scope(|s| {
s.spawn(|| {
let rt = tokio::runtime::Builder::new_current_thread()
.enable_all()
.build()
.expect("failed to build tokio runtime worker thread");
rt.block_on(fut)
})
.join()
.expect("libsql worker thread panicked")
})
}
impl FindingsCache {
fn block_on<F: std::future::Future + Send>(&self, fut: F) -> F::Output
where
F::Output: Send,
{
findings_cache_block_on(&self.runtime, fut)
}
fn open(project_root: &Path) -> Self {
let dir = project_root.join(".normalize");
let _ = std::fs::create_dir_all(&dir);
let db_path = dir.join("findings-cache.sqlite");
let runtime: Option<tokio::runtime::Runtime> =
if tokio::runtime::Handle::try_current().is_ok() {
None
} else {
Some(
tokio::runtime::Builder::new_current_thread()
.enable_all()
.build()
.expect("failed to build tokio runtime for syntax findings cache"),
)
};
let init = async {
let db = match libsql::Builder::new_local(&db_path).build().await {
Ok(db) => db,
Err(_) => libsql::Builder::new_local(":memory:")
.build()
.await
.expect("failed to open in-memory libsql database"),
};
let conn = db.connect().expect("failed to connect to libsql database");
let _ = conn
.execute_batch(
"PRAGMA journal_mode=WAL;
PRAGMA synchronous=NORMAL;
CREATE TABLE IF NOT EXISTS findings_cache (
path TEXT NOT NULL,
engine TEXT NOT NULL,
mtime_nanos INTEGER NOT NULL,
config_hash TEXT NOT NULL,
findings_json TEXT NOT NULL,
PRIMARY KEY (path, engine)
);",
)
.await;
(db, conn)
};
let (db, conn) = findings_cache_block_on(&runtime, init);
Self { conn, db, runtime }
}
fn begin(&self) {
let conn = &self.conn;
let _ = self.block_on(async { conn.execute_batch("BEGIN;").await });
}
fn commit(&self) {
let conn = &self.conn;
let _ = self.block_on(async { conn.execute_batch("COMMIT;").await });
}
fn get(&self, path: &str, mtime_nanos: u64, config_hash: &str, engine: &str) -> Option<String> {
let conn = &self.conn;
self.block_on(async {
let mut rows = conn
.query(
"SELECT findings_json FROM findings_cache
WHERE path = ?1 AND engine = ?2 AND mtime_nanos = ?3 AND config_hash = ?4",
libsql::params![path, engine, mtime_nanos as i64, config_hash],
)
.await
.ok()?;
let row = rows.next().await.ok()??;
row.get::<String>(0).ok()
})
}
fn put(
&self,
path: &str,
mtime_nanos: u64,
config_hash: &str,
engine: &str,
findings_json: &str,
) {
let conn = &self.conn;
let _ = self.block_on(async {
conn.execute(
"INSERT OR REPLACE INTO findings_cache (path, engine, mtime_nanos, config_hash, findings_json)
VALUES (?1, ?2, ?3, ?4, ?5)",
libsql::params![path, engine, mtime_nanos as i64, config_hash, findings_json],
)
.await
});
}
}
fn compute_rules_hash(rules: &[&Rule]) -> String {
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
let mut hasher = DefaultHasher::new();
for rule in rules {
rule.id.hash(&mut hasher);
rule.query_str.hash(&mut hasher);
}
format!("{:x}", hasher.finish())
}
fn file_mtime_nanos(path: &Path) -> u64 {
path.metadata()
.and_then(|m| m.modified())
.map(|t| {
t.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_nanos() as u64)
.unwrap_or(0)
})
.unwrap_or(0)
}
#[derive(Debug, Clone)]
pub struct Finding {
pub rule_id: String,
pub file: PathBuf,
pub start_line: usize,
pub start_col: usize,
pub end_line: usize,
pub end_col: usize,
pub start_byte: usize,
pub end_byte: usize,
pub message: String,
pub severity: Severity,
pub matched_text: String,
pub fix: Option<String>,
pub captures: HashMap<String, String>,
}
#[derive(Default)]
pub struct DebugFlags {
pub timing: bool,
}
impl DebugFlags {
pub fn from_args(args: &[String]) -> Self {
let all = args.iter().any(|s| s == "all");
Self {
timing: all || args.iter().any(|s| s == "timing"),
}
}
}
fn line_has_allow_comment(line: &str, rule_id: &str) -> bool {
if let Some(pos) = line.find("normalize-syntax-allow:") {
let after = &line[pos + 23..]; let after = after.trim_start();
if let Some(rest) = after.strip_prefix(rule_id) {
return rest.is_empty()
|| rest.starts_with(char::is_whitespace)
|| rest.starts_with('-')
|| rest.starts_with("*/");
}
}
false
}
fn test_region_ranges(
grammar_name: &str,
tree: &tree_sitter::Tree,
source: &[u8],
loader: &GrammarLoader,
) -> Vec<(usize, usize)> {
let Some(query_str) = loader.get_test_regions(grammar_name) else {
return Vec::new();
};
let Some(query) = loader.get_compiled_query(grammar_name, "test_regions", &query_str) else {
return Vec::new();
};
let Some(capture_idx) = query
.capture_names()
.iter()
.position(|n| *n == "test_region")
else {
return Vec::new();
};
let mut cursor = tree_sitter::QueryCursor::new();
let mut matches = cursor.matches(&query, tree.root_node(), source);
let mut ranges = Vec::new();
while let Some(m) = matches.next() {
for cap in m.captures {
if cap.index as usize == capture_idx {
ranges.push((cap.node.start_byte(), cap.node.end_byte()));
}
}
}
ranges
}
fn in_any_range(start: usize, end: usize, ranges: &[(usize, usize)]) -> bool {
ranges.iter().any(|&(s, e)| start >= s && end <= e)
}
fn is_allowed_by_comment(content: &str, start_line: usize, rule_id: &str) -> bool {
let lines: Vec<&str> = content.lines().collect();
let line_idx = start_line.saturating_sub(1);
for offset in 0..=2usize {
let Some(idx) = line_idx.checked_sub(offset) else {
break;
};
if let Some(line) = lines.get(idx)
&& line_has_allow_comment(line, rule_id)
{
return true;
}
}
false
}
fn check_requires(rule: &Rule, registry: &SourceRegistry, ctx: &SourceContext) -> bool {
if rule.requires.is_empty() {
return true;
}
for (key, expected) in &rule.requires {
let actual = match registry.get(ctx, key) {
Some(v) => v,
None => return false, };
let matches = if let Some(rest) = expected.strip_prefix(">=") {
*actual >= *rest
} else if let Some(rest) = expected.strip_prefix("<=") {
*actual <= *rest
} else if let Some(rest) = expected.strip_prefix('!') {
actual != rest
} else {
actual == *expected
};
if !matches {
return false;
}
}
true
}
struct CombinedQuery<'a> {
query: tree_sitter::Query,
pattern_to_rule: Vec<(&'a Rule, usize)>,
}
fn compile_cross_language_rule(
rule: &Rule,
grammar: &tree_sitter::Language,
) -> Option<(tree_sitter::Query, String)> {
if let Ok(q) = tree_sitter::Query::new(grammar, &rule.query_str) {
return Some((q, rule.query_str.clone()));
}
let patterns: Vec<&str> = split_query_patterns(&rule.query_str);
if patterns.len() <= 1 {
return None;
}
let valid: Vec<&str> = patterns
.into_iter()
.filter(|p| tree_sitter::Query::new(grammar, p).is_ok())
.collect();
if valid.is_empty() {
return None;
}
let combined = valid.join("\n");
tree_sitter::Query::new(grammar, &combined)
.ok()
.map(|q| (q, combined))
}
fn build_combined_query<'a>(
grammar_name: &str,
grammar: &tree_sitter::Language,
specific_rules: &[&&'a Rule],
global_rules: &[&&'a Rule],
) -> Option<CombinedQuery<'a>> {
let mut compiled_rules: Vec<(&Rule, tree_sitter::Query, String)> = Vec::new();
for rule in specific_rules {
if rule.languages.iter().any(|l| l == grammar_name)
&& let Ok(q) = tree_sitter::Query::new(grammar, &rule.query_str)
{
compiled_rules.push((rule, q, rule.query_str.clone()));
}
}
for rule in global_rules {
if let Some((q, qs)) = compile_cross_language_rule(rule, grammar) {
compiled_rules.push((rule, q, qs));
}
}
if compiled_rules.is_empty() {
return None;
}
let combined_str = compiled_rules
.iter()
.map(|(_, _, qs)| qs.as_str())
.collect::<Vec<_>>()
.join("\n\n");
let query = match tree_sitter::Query::new(grammar, &combined_str) {
Ok(q) => q,
Err(e) => {
eprintln!("Warning: combined query failed for {}: {}", grammar_name, e);
return None;
}
};
let combined_match_idx = query
.capture_names()
.iter()
.position(|n| *n == "match")
.unwrap_or(0);
let mut pattern_to_rule: Vec<(&Rule, usize)> = Vec::new();
for (rule, individual_query, _) in &compiled_rules {
for _ in 0..individual_query.pattern_count() {
pattern_to_rule.push((*rule, combined_match_idx));
}
}
Some(CombinedQuery {
query,
pattern_to_rule,
})
}
fn build_finding(
rule: &Rule,
node: tree_sitter::Node,
content: &str,
query: &tree_sitter::Query,
m: &tree_sitter::QueryMatch,
file: &Path,
) -> Finding {
let text = node.utf8_text(content.as_bytes()).unwrap_or("");
let mut captures_map: HashMap<String, String> = HashMap::new();
for cap in m.captures {
let name = query.capture_names()[cap.index as usize].to_string();
if let Ok(cap_text) = cap.node.utf8_text(content.as_bytes()) {
captures_map.insert(name, cap_text.to_string());
}
}
Finding {
rule_id: rule.id.clone(),
file: file.to_path_buf(),
start_line: node.start_position().row + 1,
start_col: node.start_position().column + 1,
end_line: node.end_position().row + 1,
end_col: node.end_position().column + 1,
start_byte: node.start_byte(),
end_byte: node.end_byte(),
message: rule.message.clone(),
severity: rule.severity,
matched_text: text.lines().next().unwrap_or("").to_string(),
fix: rule.fix.clone(),
captures: captures_map,
}
}
struct AllowPath<'a> {
_full: Option<PathBuf>,
display: std::borrow::Cow<'a, str>,
}
fn allow_path_for_file<'a>(
rel_path: &Path,
rel_path_str: &'a str,
root_in_project: &Option<PathBuf>,
) -> AllowPath<'a> {
if let Some(prefix) = root_in_project {
let buf = prefix.join(rel_path);
let s = buf.to_string_lossy().into_owned();
AllowPath {
_full: Some(buf),
display: std::borrow::Cow::Owned(s),
}
} else {
AllowPath {
_full: None,
display: std::borrow::Cow::Borrowed(rel_path_str),
}
}
}
struct FileContext<'a> {
file: &'a Path,
content: &'a str,
source_registry: &'a SourceRegistry,
source_ctx: SourceContext<'a>,
allow_path_str: &'a str,
skip_ranges: &'a [(usize, usize)],
}
fn process_file_matches(
ctx: &FileContext,
tree: &tree_sitter::Tree,
combined: &CombinedQuery,
findings: &mut Vec<Finding>,
) {
let mut cursor = tree_sitter::QueryCursor::new();
let mut matches = cursor.matches(&combined.query, tree.root_node(), ctx.content.as_bytes());
while let Some(m) = matches.next() {
let Some((rule, match_idx)) = combined.pattern_to_rule.get(m.pattern_index) else {
continue;
};
if rule.allow.iter().any(|p| p.matches(ctx.allow_path_str)) {
continue;
}
if !rule.files.is_empty() {
let filename = ctx
.file
.file_name()
.map(|n| n.to_string_lossy())
.unwrap_or_default();
let matches_path = rule.files.iter().any(|p| p.matches(ctx.allow_path_str));
let matches_name = rule.files.iter().any(|p| p.matches(filename.as_ref()));
if !matches_path && !matches_name {
continue;
}
}
if !check_requires(rule, ctx.source_registry, &ctx.source_ctx) {
continue;
}
if !evaluate_predicates(&combined.query, m, ctx.content.as_bytes()) {
continue;
}
let Some(cap) = m.captures.iter().find(|c| c.index as usize == *match_idx) else {
continue;
};
if !rule.applies_in_tests
&& in_any_range(cap.node.start_byte(), cap.node.end_byte(), ctx.skip_ranges)
{
continue;
}
let start_line = cap.node.start_position().row + 1;
if is_allowed_by_comment(ctx.content, start_line, &rule.id) {
continue;
}
findings.push(build_finding(
rule,
cap.node,
ctx.content,
&combined.query,
m,
ctx.file,
));
}
}
#[allow(clippy::too_many_arguments)]
pub fn run_rules(
rules: &[Rule],
root: &Path,
project_root: &Path,
loader: &GrammarLoader,
filter_rule: Option<&str>,
filter_tag: Option<&str>,
filter_ids: Option<&std::collections::HashSet<String>>,
debug: &DebugFlags,
files: Option<&[PathBuf]>,
path_filter: &normalize_rules_config::PathFilter,
walk_config: &normalize_rules_config::WalkConfig,
) -> Vec<Finding> {
let start = std::time::Instant::now();
let raw_abs_root = root.canonicalize().unwrap_or_else(|_| root.to_path_buf());
let abs_root = if raw_abs_root.is_file() {
raw_abs_root
.parent()
.map(|p| p.to_path_buf())
.unwrap_or(raw_abs_root)
} else {
raw_abs_root
};
let abs_project_root = project_root
.canonicalize()
.unwrap_or_else(|_| project_root.to_path_buf());
let root_in_project = abs_root
.strip_prefix(&abs_project_root)
.ok()
.map(|p| p.to_path_buf());
let mut findings = Vec::new();
let source_registry = builtin_registry();
let explicitly_requested = |r: &&Rule| {
filter_rule.is_some_and(|f| r.id == f) || filter_ids.is_some_and(|ids| ids.contains(&r.id))
};
let active_rules: Vec<&Rule> = rules
.iter()
.filter(|r| r.enabled || explicitly_requested(r))
.filter(|r| filter_rule.is_none_or(|f| r.id == f))
.filter(|r| filter_tag.is_none_or(|t| r.tags.iter().any(|tag| tag == t)))
.filter(|r| filter_ids.is_none_or(|ids| ids.contains(&r.id)))
.collect();
if active_rules.is_empty() {
return findings;
}
let cache = FindingsCache::open(&abs_project_root);
let rules_hash = compute_rules_hash(&active_rules);
const ENGINE: &str = "syntax";
let files = if let Some(explicit) = files {
explicit
.iter()
.filter(|f| support_for_path(f).is_some())
.cloned()
.collect()
} else {
collect_source_files(root, path_filter, walk_config)
};
let mut files_by_grammar: HashMap<String, Vec<PathBuf>> = HashMap::new();
for file in files {
if let Some(lang) = support_for_path(&file) {
let grammar_name = lang.grammar_name().to_string();
files_by_grammar.entry(grammar_name).or_default().push(file);
}
}
if debug.timing {
eprintln!("[timing] file collection: {:?}", start.elapsed());
}
let compile_start = std::time::Instant::now();
let (specific_rules, global_rules): (Vec<&&Rule>, Vec<&&Rule>) =
active_rules.iter().partition(|r| !r.languages.is_empty());
let mut combined_by_grammar: HashMap<String, CombinedQuery> = HashMap::new();
for grammar_name in files_by_grammar.keys() {
let grammar = match loader.get(grammar_name) {
Ok(g) => g,
Err(e) => {
let n = files_by_grammar[grammar_name].len();
eprintln!(
"warning: no grammar for {grammar_name} — {n} file(s) skipped by syntax rules ({e}). Run `normalize grammars install` to fix."
);
continue;
}
};
if let Some(cq) =
build_combined_query(grammar_name, &grammar, &specific_rules, &global_rules)
{
combined_by_grammar.insert(grammar_name.clone(), cq);
}
}
if debug.timing {
eprintln!(
"[timing] query compilation: {:?} ({} grammars)",
compile_start.elapsed(),
combined_by_grammar.len()
);
}
let process_start = std::time::Instant::now();
cache.begin();
for (grammar_name, files) in &files_by_grammar {
let Some(combined) = combined_by_grammar.get(grammar_name) else {
continue;
};
let Some(grammar) = loader.get(grammar_name).ok() else {
continue;
};
let mut parser = tree_sitter::Parser::new();
if parser.set_language(&grammar).is_err() {
continue;
}
for file in files {
let file_key = file.to_string_lossy().into_owned();
let mtime_nanos = file_mtime_nanos(file);
if mtime_nanos > 0
&& let Some(json) = cache.get(&file_key, mtime_nanos, &rules_hash, ENGINE)
{
let cached: Vec<CachedFinding> = serde_json::from_str(&json).unwrap_or_default();
findings.extend(cached.into_iter().map(Finding::from));
continue;
}
let rel_path = file.strip_prefix(root).unwrap_or(file);
let rel_path_str = rel_path.to_string_lossy();
let allow_path = allow_path_for_file(rel_path, &rel_path_str, &root_in_project);
let Ok(content) = std::fs::read_to_string(file) else {
continue;
};
let Some(tree) = parser.parse(&content, None) else {
continue;
};
let skip_ranges = test_region_ranges(grammar_name, &tree, content.as_bytes(), loader);
let file_ctx = FileContext {
file,
content: &content,
source_registry: &source_registry,
source_ctx: SourceContext {
file_path: file,
rel_path: &rel_path_str,
project_root: &abs_project_root,
},
allow_path_str: &allow_path.display,
skip_ranges: &skip_ranges,
};
let mut file_findings: Vec<Finding> = Vec::new();
process_file_matches(&file_ctx, &tree, combined, &mut file_findings);
if mtime_nanos > 0 {
let cached: Vec<CachedFinding> = file_findings
.iter()
.cloned()
.map(CachedFinding::from)
.collect();
if let Ok(json) = serde_json::to_string(&cached) {
cache.put(&file_key, mtime_nanos, &rules_hash, ENGINE, &json);
}
}
findings.extend(file_findings);
}
}
cache.commit();
if debug.timing {
eprintln!(
"[timing] file processing: {:?} ({} findings)",
process_start.elapsed(),
findings.len()
);
eprintln!("[timing] total: {:?}", start.elapsed());
}
findings
}
fn resolve_arg_text<'a>(
arg: &'a tree_sitter::QueryPredicateArg,
match_: &tree_sitter::QueryMatch,
source: &'a [u8],
) -> Option<&'a str> {
match arg {
tree_sitter::QueryPredicateArg::Capture(idx) => Some(
match_
.captures
.iter()
.find(|c| c.index == *idx)
.and_then(|c| c.node.utf8_text(source).ok())
.unwrap_or(""),
),
tree_sitter::QueryPredicateArg::String(s) => Some(s.as_ref()),
}
}
fn resolve_capture_text<'a>(
arg: &'a tree_sitter::QueryPredicateArg,
match_: &tree_sitter::QueryMatch,
source: &'a [u8],
) -> Option<&'a str> {
match arg {
tree_sitter::QueryPredicateArg::Capture(idx) => Some(
match_
.captures
.iter()
.find(|c| c.index == *idx)
.and_then(|c| c.node.utf8_text(source).ok())
.unwrap_or(""),
),
_ => None,
}
}
fn eval_eq(
args: &[tree_sitter::QueryPredicateArg],
match_: &tree_sitter::QueryMatch,
source: &[u8],
negated: bool,
) -> Option<bool> {
if args.len() < 2 {
return None;
}
let first = resolve_arg_text(&args[0], match_, source)?;
let second = resolve_arg_text(&args[1], match_, source)?;
let equal = first == second;
Some(if negated { !equal } else { equal })
}
fn eval_match(
args: &[tree_sitter::QueryPredicateArg],
match_: &tree_sitter::QueryMatch,
source: &[u8],
negated: bool,
) -> Option<bool> {
if args.len() < 2 {
return None;
}
let capture_text = resolve_capture_text(&args[0], match_, source)?;
let pattern = match &args[1] {
tree_sitter::QueryPredicateArg::String(s) => s.as_ref(),
_ => return None,
};
let regex = regex::Regex::new(pattern).ok()?;
let matched = regex.is_match(capture_text);
Some(if negated { !matched } else { matched })
}
fn eval_any_of(
args: &[tree_sitter::QueryPredicateArg],
match_: &tree_sitter::QueryMatch,
source: &[u8],
) -> Option<bool> {
if args.len() < 2 {
return None;
}
let capture_text = resolve_capture_text(&args[0], match_, source)?;
let any_match = args[1..].iter().any(|arg| match arg {
tree_sitter::QueryPredicateArg::String(s) => s.as_ref() == capture_text,
_ => false,
});
Some(any_match)
}
pub fn evaluate_predicates(
query: &tree_sitter::Query,
match_: &tree_sitter::QueryMatch,
source: &[u8],
) -> bool {
let predicates = query.general_predicates(match_.pattern_index);
for predicate in predicates {
let name = predicate.operator.as_ref();
let args = &predicate.args;
let result = match name {
"eq?" => eval_eq(args, match_, source, false),
"not-eq?" => eval_eq(args, match_, source, true),
"match?" => eval_match(args, match_, source, false),
"not-match?" => eval_match(args, match_, source, true),
"any-of?" => eval_any_of(args, match_, source),
_ => None,
};
if result == Some(false) {
return false;
}
}
true
}
#[cfg(feature = "fix")]
pub fn expand_fix_template(template: &str, captures: &HashMap<String, String>) -> String {
let mut result = template.to_string();
for (name, value) in captures {
let placeholder = format!("${}", name);
result = result.replace(&placeholder, value);
}
result
}
#[cfg(feature = "fix")]
pub fn apply_fixes(findings: &[Finding]) -> std::io::Result<usize> {
let mut by_file: HashMap<&PathBuf, Vec<&Finding>> = HashMap::new();
for finding in findings {
if finding.fix.is_some() {
by_file.entry(&finding.file).or_default().push(finding);
}
}
let mut files_modified = 0;
for (file, mut file_findings) in by_file {
file_findings.sort_by(|a, b| b.start_byte.cmp(&a.start_byte));
let mut content = std::fs::read_to_string(file)?;
let mut applied: Vec<(usize, usize)> = Vec::new();
let mut file_changed = false;
for finding in file_findings {
let overlaps = applied
.iter()
.any(|&(s, e)| finding.start_byte < e && finding.end_byte > s);
if overlaps {
continue;
}
let Some(fix_template) = finding.fix.as_ref() else {
continue;
};
let replacement = expand_fix_template(fix_template, &finding.captures);
let before = &content[..finding.start_byte];
let after = &content[finding.end_byte..];
content = format!("{}{}{}", before, replacement, after);
applied.push((finding.start_byte, finding.end_byte));
file_changed = true;
}
if file_changed {
std::fs::write(file, &content)?;
files_modified += 1;
}
}
Ok(files_modified)
}
fn collect_source_files(
root: &Path,
filter: &normalize_rules_config::PathFilter,
walk_config: &normalize_rules_config::WalkConfig,
) -> Vec<PathBuf> {
let mut files = Vec::new();
let ignore_files = walk_config.ignore_files();
let has_gitignore = ignore_files.contains(&".gitignore");
let mut builder = ignore::WalkBuilder::new(root);
builder
.hidden(false)
.git_ignore(has_gitignore)
.git_global(has_gitignore)
.git_exclude(has_gitignore);
for file in &ignore_files {
if *file != ".gitignore" {
let ignore_path = root.join(file);
if ignore_path.exists() {
builder.add_ignore(ignore_path);
}
}
}
let excludes = walk_config.compiled_excludes(root);
let root_owned = root.to_path_buf();
builder.filter_entry(move |e| {
let path = e.path();
let rel = path.strip_prefix(&root_owned).unwrap_or(path);
if rel.as_os_str().is_empty() {
return true;
}
let is_dir = e.file_type().is_some_and(|ft| ft.is_dir());
!excludes
.matched_path_or_any_parents(rel, is_dir)
.is_ignore()
});
let walker = builder.build();
for entry in walker.flatten() {
let path = entry.path();
if path.is_file() && support_for_path(path).is_some() {
if !filter.is_empty() {
let rel = path.strip_prefix(root).unwrap_or(path);
if !filter.matches_path(rel) {
continue;
}
}
files.push(path.to_path_buf());
}
}
files
}
fn split_query_patterns(query_str: &str) -> Vec<&str> {
let mut patterns = Vec::new();
let mut depth = 0i32;
let mut pattern_start: Option<usize> = None;
let bytes = query_str.as_bytes();
let mut i = 0;
while i < bytes.len() {
let b = bytes[i];
match b {
b';' => {
while i < bytes.len() && bytes[i] != b'\n' {
i += 1;
}
}
b'(' => {
if pattern_start.is_none() {
pattern_start = Some(i);
}
depth += 1;
i += 1;
}
b')' => {
depth -= 1;
i += 1;
if depth == 0
&& let Some(start) = pattern_start
{
patterns.push(&query_str[start..i]);
pattern_start = None;
}
}
b'"' => {
i += 1;
while i < bytes.len() && bytes[i] != b'"' {
if bytes[i] == b'\\' {
i += 1; }
i += 1;
}
i += 1; }
_ => {
i += 1;
}
}
}
patterns
}
#[cfg(test)]
mod tests {
use super::*;
use normalize_languages::GrammarLoader;
use normalize_languages::parsers::grammar_loader;
use std::sync::Arc;
use streaming_iterator::StreamingIterator;
fn loader() -> Arc<GrammarLoader> {
grammar_loader()
}
#[test]
fn test_combined_query_predicate_scoping() {
let loader = loader();
let grammar = loader.get("rust").expect("rust grammar");
let combined_query = r#"
; Pattern 0: matches unwrap
((call_expression
function: (field_expression field: (field_identifier) @_method)
(#eq? @_method "unwrap")) @match)
; Pattern 1: matches expect
((call_expression
function: (field_expression field: (field_identifier) @_method)
(#eq? @_method "expect")) @match)
"#;
let query = tree_sitter::Query::new(&grammar, combined_query)
.expect("combined query should compile");
assert_eq!(query.pattern_count(), 2, "should have 2 patterns");
let test_code = r#"
fn main() {
let x = Some(5);
x.unwrap(); // line 4 - should match pattern 0
x.expect("msg"); // line 5 - should match pattern 1
x.map(|v| v); // line 6 - should NOT match
}
"#;
let mut parser = tree_sitter::Parser::new();
parser.set_language(&grammar).unwrap();
let tree = parser.parse(test_code, None).unwrap();
let mut cursor = tree_sitter::QueryCursor::new();
let mut matches = cursor.matches(&query, tree.root_node(), test_code.as_bytes());
let mut results: Vec<(usize, String)> = Vec::new();
while let Some(m) = matches.next() {
if !evaluate_predicates(&query, m, test_code.as_bytes()) {
continue;
}
let match_capture = m
.captures
.iter()
.find(|c| query.capture_names()[c.index as usize] == "match");
if let Some(cap) = match_capture {
let text = cap.node.utf8_text(test_code.as_bytes()).unwrap();
results.push((m.pattern_index, text.to_string()));
}
}
assert_eq!(results.len(), 2, "should have 2 matches, got {:?}", results);
assert!(
results
.iter()
.any(|(idx, text)| *idx == 0 && text.contains("unwrap")),
"pattern 0 should match unwrap, got {:?}",
results
);
assert!(
results
.iter()
.any(|(idx, text)| *idx == 1 && text.contains("expect")),
"pattern 1 should match expect, got {:?}",
results
);
}
#[test]
fn test_combined_rules_single_traversal() {
let loader = loader();
let grammar = loader.get("rust").expect("rust grammar");
let rules_queries = [
(
"unwrap-rule",
r#"((call_expression function: (field_expression field: (field_identifier) @_m) (#eq? @_m "unwrap")) @match)"#,
),
(
"dbg-rule",
r#"((macro_invocation macro: (identifier) @_name (#eq? @_name "dbg")) @match)"#,
),
];
let combined = rules_queries
.iter()
.map(|(_, q)| *q)
.collect::<Vec<_>>()
.join("\n\n");
let query =
tree_sitter::Query::new(&grammar, &combined).expect("combined query should compile");
let test_code = r#"
fn main() {
let x = Some(5);
dbg!(x); // should match pattern 1 (dbg-rule)
x.unwrap(); // should match pattern 0 (unwrap-rule)
}
"#;
let mut parser = tree_sitter::Parser::new();
parser.set_language(&grammar).unwrap();
let tree = parser.parse(test_code, None).unwrap();
let mut cursor = tree_sitter::QueryCursor::new();
let mut matches = cursor.matches(&query, tree.root_node(), test_code.as_bytes());
let mut pattern_indices: Vec<usize> = Vec::new();
while let Some(m) = matches.next() {
if evaluate_predicates(&query, m, test_code.as_bytes()) {
pattern_indices.push(m.pattern_index);
}
}
assert!(
pattern_indices.contains(&0),
"should match pattern 0 (unwrap)"
);
assert!(pattern_indices.contains(&1), "should match pattern 1 (dbg)");
}
#[test]
fn test_test_region_ranges_skips_inline_cfg_test_module() {
let loader = loader();
let grammar = loader.get("rust").expect("rust grammar");
let source = r#"
fn outer() {
let x: Option<i32> = None;
x.unwrap();
}
#[cfg(test)]
mod tests {
fn inner() {
let y: Option<i32> = None;
y.unwrap();
}
}
#[cfg(test)]
#[allow(dead_code)]
mod more_tests {
fn inner2() {
None::<i32>.unwrap();
}
}
mod regular_mod {
fn other() {
None::<i32>.unwrap();
}
}
"#;
let mut parser = tree_sitter::Parser::new();
assert!(parser.set_language(&grammar).is_ok());
let tree = parser.parse(source, None).expect("parse");
let ranges = test_region_ranges("rust", &tree, source.as_bytes(), &loader);
assert_eq!(
ranges.len(),
2,
"expected two cfg(test) modules, got {ranges:?}"
);
let unwrap_query = tree_sitter::Query::new(
&grammar,
r#"((call_expression function: (field_expression field: (field_identifier) @m)) @call (#eq? @m "unwrap"))"#,
)
.expect("compile");
let call_idx = unwrap_query
.capture_names()
.iter()
.position(|n| *n == "call")
.unwrap_or(0);
let mut cursor = tree_sitter::QueryCursor::new();
let mut matches = cursor.matches(&unwrap_query, tree.root_node(), source.as_bytes());
let mut classifications: Vec<(usize, bool)> = Vec::new();
while let Some(m) = matches.next() {
for cap in m.captures {
if cap.index as usize == call_idx {
let line = cap.node.start_position().row + 1;
let in_test = in_any_range(cap.node.start_byte(), cap.node.end_byte(), &ranges);
classifications.push((line, in_test));
}
}
}
let outside: Vec<usize> = classifications
.iter()
.filter_map(|(l, t)| if !*t { Some(*l) } else { None })
.collect();
let inside: Vec<usize> = classifications
.iter()
.filter_map(|(l, t)| if *t { Some(*l) } else { None })
.collect();
assert_eq!(
outside.len(),
2,
"expected 2 unwraps outside cfg(test), got {classifications:?}"
);
assert_eq!(
inside.len(),
2,
"expected 2 unwraps inside cfg(test), got {classifications:?}"
);
}
#[test]
fn test_applies_in_tests_per_rule_opt_in() {
let loader = loader();
let tmp = tempfile::tempdir().expect("tempdir");
let file_path = tmp.path().join("lib.rs");
std::fs::write(
&file_path,
r#"fn outer() {
let x: Option<i32> = None;
x.unwrap();
}
#[cfg(test)]
mod tests {
fn inner() {
let y: Option<i32> = None;
y.unwrap();
}
}
"#,
)
.expect("write fixture");
let query_str = r#"((call_expression
function: (field_expression field: (field_identifier) @_m)
(#eq? @_m "unwrap")) @match)"#;
let make_rule = |id: &str, applies_in_tests: bool| {
let frontmatter = format!(
"# ---\n# id = \"{id}\"\n# severity = \"warning\"\n# message = \"unwrap\"\n# languages = [\"rust\"]\n# applies_in_tests = {applies_in_tests}\n# ---\n\n{query_str}\n"
);
crate::parse_rule_content(&frontmatter, id, false).expect("parse rule")
};
let path_filter = normalize_rules_config::PathFilter::default();
let walk_config = normalize_rules_config::WalkConfig::default();
let debug = DebugFlags::default();
let rules_default = vec![make_rule("test/unwrap-default", false)];
let findings = run_rules(
&rules_default,
tmp.path(),
tmp.path(),
&loader,
None,
None,
None,
&debug,
None,
&path_filter,
&walk_config,
);
assert_eq!(
findings.len(),
1,
"applies_in_tests=false should drop the cfg(test) finding, got {findings:?}"
);
assert_eq!(findings[0].start_line, 3, "outer unwrap is on line 3");
let rules_optin = vec![make_rule("test/unwrap-in-tests", true)];
let findings = run_rules(
&rules_optin,
tmp.path(),
tmp.path(),
&loader,
None,
None,
None,
&debug,
None,
&path_filter,
&walk_config,
);
assert_eq!(
findings.len(),
2,
"applies_in_tests=true should keep both unwraps, got {findings:?}"
);
}
#[test]
fn test_split_query_patterns() {
let query = r#"
; Pattern 1: comment
((comment) @match (#match? @match "TODO"))
; Pattern 2: line_comment
((line_comment) @match (#match? @match "TODO"))
"#;
let patterns = split_query_patterns(query);
assert_eq!(patterns.len(), 2);
assert!(patterns[0].contains("comment"));
assert!(patterns[1].contains("line_comment"));
}
#[test]
fn test_cross_grammar_pattern_fallback() {
let loader = loader();
let grammar = loader.get("rust").expect("rust grammar");
let query_str = r#"((comment) @match (#match? @match "TODO"))
((line_comment) @match (#match? @match "TODO"))"#;
assert!(tree_sitter::Query::new(&grammar, query_str).is_err());
let patterns = split_query_patterns(query_str);
let valid: Vec<&str> = patterns
.into_iter()
.filter(|p| tree_sitter::Query::new(&grammar, p).is_ok())
.collect();
assert_eq!(valid.len(), 1, "only line_comment should compile for Rust");
assert!(valid[0].contains("line_comment"));
}
}
#[cfg(test)]
mod glob_tests {
use glob::Pattern;
#[test]
fn test_glob_allow_patterns() {
let cases = [
(
"crates/normalize/src/rg/**",
"crates/normalize/src/rg/flags/defs.rs",
true,
),
(
"crates/normalize/src/rg/**",
"crates/normalize/src/rg/mod.rs",
true,
),
("**/tests/**", "crates/normalize/tests/foo.rs", true),
(
"**/tests/fixtures/**",
"crates/normalize-syntax-rules/tests/fixtures/rust/foo.rs",
true,
),
(
"crates/normalize-facts-rules-interpret/src/tests.rs",
"crates/normalize-facts-rules-interpret/src/tests.rs",
true,
),
(
"crates/normalize-manifest/src/*.rs",
"crates/normalize-manifest/src/nuget.rs",
true,
),
];
for (p, path, expected) in cases {
let pat = Pattern::new(p).unwrap();
assert_eq!(pat.matches(path), expected, "Pattern: {p}, Path: {path}");
}
}
}