1use crate::sources::{SourceContext, SourceRegistry, builtin_registry};
4use crate::{Rule, Severity};
5use normalize_languages::{GrammarLoader, support_for_path};
6use std::collections::HashMap;
7use std::path::{Path, PathBuf};
8use streaming_iterator::StreamingIterator;
9
10#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
15pub struct CachedFinding {
16 pub rule_id: String,
17 pub file: std::path::PathBuf,
18 pub start_line: usize,
19 pub start_col: usize,
20 pub end_line: usize,
21 pub end_col: usize,
22 pub start_byte: usize,
23 pub end_byte: usize,
24 pub message: String,
25 pub severity: Severity,
26 pub matched_text: String,
27 pub fix: Option<String>,
28 pub captures: HashMap<String, String>,
29}
30
31impl From<Finding> for CachedFinding {
32 fn from(f: Finding) -> Self {
33 Self {
34 rule_id: f.rule_id,
35 file: f.file,
36 start_line: f.start_line,
37 start_col: f.start_col,
38 end_line: f.end_line,
39 end_col: f.end_col,
40 start_byte: f.start_byte,
41 end_byte: f.end_byte,
42 message: f.message,
43 severity: f.severity,
44 matched_text: f.matched_text,
45 fix: f.fix,
46 captures: f.captures,
47 }
48 }
49}
50
51impl From<CachedFinding> for Finding {
52 fn from(c: CachedFinding) -> Self {
53 Self {
54 rule_id: c.rule_id,
55 file: c.file,
56 start_line: c.start_line,
57 start_col: c.start_col,
58 end_line: c.end_line,
59 end_col: c.end_col,
60 start_byte: c.start_byte,
61 end_byte: c.end_byte,
62 message: c.message,
63 severity: c.severity,
64 matched_text: c.matched_text,
65 fix: c.fix,
66 captures: c.captures,
67 }
68 }
69}
70
71struct FindingsCache {
81 conn: libsql::Connection,
82 #[allow(dead_code)]
83 db: libsql::Database,
84 runtime: Option<tokio::runtime::Runtime>,
85}
86
87fn findings_cache_block_on<F: std::future::Future + Send>(
93 runtime: &Option<tokio::runtime::Runtime>,
94 fut: F,
95) -> F::Output
96where
97 F::Output: Send,
98{
99 if let Ok(handle) = tokio::runtime::Handle::try_current() {
100 return match handle.runtime_flavor() {
101 tokio::runtime::RuntimeFlavor::MultiThread => {
102 tokio::task::block_in_place(|| handle.block_on(fut))
103 }
104 _ => spawn_scoped_findings_runtime(fut),
105 };
106 }
107 if let Some(rt) = runtime {
108 return rt.block_on(fut);
109 }
110 spawn_scoped_findings_runtime(fut)
111}
112
113fn spawn_scoped_findings_runtime<F: std::future::Future + Send>(fut: F) -> F::Output
114where
115 F::Output: Send,
116{
117 std::thread::scope(|s| {
118 s.spawn(|| {
119 let rt = tokio::runtime::Builder::new_current_thread()
120 .enable_all()
121 .build()
122 .expect("failed to build tokio runtime worker thread");
123 rt.block_on(fut)
124 })
125 .join()
126 .expect("libsql worker thread panicked")
127 })
128}
129
130impl FindingsCache {
131 fn block_on<F: std::future::Future + Send>(&self, fut: F) -> F::Output
132 where
133 F::Output: Send,
134 {
135 findings_cache_block_on(&self.runtime, fut)
136 }
137
138 fn open(project_root: &Path) -> Self {
139 let dir = project_root.join(".normalize");
140 let _ = std::fs::create_dir_all(&dir);
141 let db_path = dir.join("findings-cache.sqlite");
142 let runtime: Option<tokio::runtime::Runtime> =
143 if tokio::runtime::Handle::try_current().is_ok() {
144 None
145 } else {
146 Some(
147 tokio::runtime::Builder::new_current_thread()
148 .enable_all()
149 .build()
150 .expect("failed to build tokio runtime for syntax findings cache"),
151 )
152 };
153 let init = async {
154 let db = match libsql::Builder::new_local(&db_path).build().await {
155 Ok(db) => db,
156 Err(_) => libsql::Builder::new_local(":memory:")
157 .build()
158 .await
159 .expect("failed to open in-memory libsql database"),
160 };
161 let conn = db.connect().expect("failed to connect to libsql database");
162 let _ = conn
163 .execute_batch(
164 "PRAGMA journal_mode=WAL;
165 PRAGMA synchronous=NORMAL;
166 CREATE TABLE IF NOT EXISTS findings_cache (
167 path TEXT NOT NULL,
168 engine TEXT NOT NULL,
169 mtime_nanos INTEGER NOT NULL,
170 config_hash TEXT NOT NULL,
171 findings_json TEXT NOT NULL,
172 PRIMARY KEY (path, engine)
173 );",
174 )
175 .await;
176 (db, conn)
177 };
178 let (db, conn) = findings_cache_block_on(&runtime, init);
179 Self { conn, db, runtime }
180 }
181
182 fn begin(&self) {
183 let conn = &self.conn;
184 let _ = self.block_on(async { conn.execute_batch("BEGIN;").await });
185 }
186
187 fn commit(&self) {
188 let conn = &self.conn;
189 let _ = self.block_on(async { conn.execute_batch("COMMIT;").await });
190 }
191
192 fn get(&self, path: &str, mtime_nanos: u64, config_hash: &str, engine: &str) -> Option<String> {
193 let conn = &self.conn;
194 self.block_on(async {
195 let mut rows = conn
196 .query(
197 "SELECT findings_json FROM findings_cache
198 WHERE path = ?1 AND engine = ?2 AND mtime_nanos = ?3 AND config_hash = ?4",
199 libsql::params![path, engine, mtime_nanos as i64, config_hash],
200 )
201 .await
202 .ok()?;
203 let row = rows.next().await.ok()??;
204 row.get::<String>(0).ok()
205 })
206 }
207
208 fn put(
209 &self,
210 path: &str,
211 mtime_nanos: u64,
212 config_hash: &str,
213 engine: &str,
214 findings_json: &str,
215 ) {
216 let conn = &self.conn;
217 let _ = self.block_on(async {
218 conn.execute(
219 "INSERT OR REPLACE INTO findings_cache (path, engine, mtime_nanos, config_hash, findings_json)
220 VALUES (?1, ?2, ?3, ?4, ?5)",
221 libsql::params![path, engine, mtime_nanos as i64, config_hash, findings_json],
222 )
223 .await
224 });
225 }
226}
227
228fn compute_rules_hash(rules: &[&Rule]) -> String {
230 use std::collections::hash_map::DefaultHasher;
231 use std::hash::{Hash, Hasher};
232 let mut hasher = DefaultHasher::new();
233 for rule in rules {
234 rule.id.hash(&mut hasher);
235 rule.query_str.hash(&mut hasher);
236 }
237 format!("{:x}", hasher.finish())
238}
239
240fn file_mtime_nanos(path: &Path) -> u64 {
246 path.metadata()
247 .and_then(|m| m.modified())
248 .map(|t| {
249 t.duration_since(std::time::UNIX_EPOCH)
250 .map(|d| d.as_nanos() as u64)
251 .unwrap_or(0)
252 })
253 .unwrap_or(0)
254}
255
256#[derive(Debug, Clone)]
258pub struct Finding {
259 pub rule_id: String,
261 pub file: PathBuf,
263 pub start_line: usize,
265 pub start_col: usize,
267 pub end_line: usize,
269 pub end_col: usize,
271 pub start_byte: usize,
273 pub end_byte: usize,
275 pub message: String,
277 pub severity: Severity,
279 pub matched_text: String,
281 pub fix: Option<String>,
283 pub captures: HashMap<String, String>,
287}
288
289#[derive(Default)]
291pub struct DebugFlags {
292 pub timing: bool,
294}
295
296impl DebugFlags {
297 pub fn from_args(args: &[String]) -> Self {
298 let all = args.iter().any(|s| s == "all");
299 Self {
300 timing: all || args.iter().any(|s| s == "timing"),
301 }
302 }
303}
304
305fn line_has_allow_comment(line: &str, rule_id: &str) -> bool {
308 if let Some(pos) = line.find("normalize-syntax-allow:") {
311 let after = &line[pos + 23..]; let after = after.trim_start();
313 if let Some(rest) = after.strip_prefix(rule_id) {
315 return rest.is_empty()
317 || rest.starts_with(char::is_whitespace)
318 || rest.starts_with('-')
319 || rest.starts_with("*/");
320 }
321 }
322 false
323}
324
325fn test_region_ranges(
335 grammar_name: &str,
336 tree: &tree_sitter::Tree,
337 source: &[u8],
338 loader: &GrammarLoader,
339) -> Vec<(usize, usize)> {
340 let Some(query_str) = loader.get_test_regions(grammar_name) else {
341 return Vec::new();
342 };
343 let Some(query) = loader.get_compiled_query(grammar_name, "test_regions", &query_str) else {
344 return Vec::new();
345 };
346 let Some(capture_idx) = query
347 .capture_names()
348 .iter()
349 .position(|n| *n == "test_region")
350 else {
351 return Vec::new();
352 };
353 let mut cursor = tree_sitter::QueryCursor::new();
354 let mut matches = cursor.matches(&query, tree.root_node(), source);
355 let mut ranges = Vec::new();
356 while let Some(m) = matches.next() {
357 for cap in m.captures {
358 if cap.index as usize == capture_idx {
359 ranges.push((cap.node.start_byte(), cap.node.end_byte()));
360 }
361 }
362 }
363 ranges
364}
365
366fn in_any_range(start: usize, end: usize, ranges: &[(usize, usize)]) -> bool {
368 ranges.iter().any(|&(s, e)| start >= s && end <= e)
369}
370
371fn is_allowed_by_comment(content: &str, start_line: usize, rule_id: &str) -> bool {
375 let lines: Vec<&str> = content.lines().collect();
376 let line_idx = start_line.saturating_sub(1); for offset in 0..=2usize {
379 let Some(idx) = line_idx.checked_sub(offset) else {
380 break;
381 };
382 if let Some(line) = lines.get(idx)
383 && line_has_allow_comment(line, rule_id)
384 {
385 return true;
386 }
387 }
388
389 false
390}
391
392fn check_requires(rule: &Rule, registry: &SourceRegistry, ctx: &SourceContext) -> bool {
400 if rule.requires.is_empty() {
401 return true;
402 }
403
404 for (key, expected) in &rule.requires {
405 let actual = match registry.get(ctx, key) {
406 Some(v) => v,
407 None => return false, };
409
410 let matches = if let Some(rest) = expected.strip_prefix(">=") {
412 *actual >= *rest
413 } else if let Some(rest) = expected.strip_prefix("<=") {
414 *actual <= *rest
415 } else if let Some(rest) = expected.strip_prefix('!') {
416 actual != rest
417 } else {
418 actual == *expected
419 };
420
421 if !matches {
422 return false;
423 }
424 }
425
426 true
427}
428
429struct CombinedQuery<'a> {
431 query: tree_sitter::Query,
432 pattern_to_rule: Vec<(&'a Rule, usize)>,
434}
435
436fn compile_cross_language_rule(
439 rule: &Rule,
440 grammar: &tree_sitter::Language,
441) -> Option<(tree_sitter::Query, String)> {
442 if let Ok(q) = tree_sitter::Query::new(grammar, &rule.query_str) {
443 return Some((q, rule.query_str.clone()));
444 }
445 let patterns: Vec<&str> = split_query_patterns(&rule.query_str);
447 if patterns.len() <= 1 {
448 return None;
449 }
450 let valid: Vec<&str> = patterns
451 .into_iter()
452 .filter(|p| tree_sitter::Query::new(grammar, p).is_ok())
453 .collect();
454 if valid.is_empty() {
455 return None;
456 }
457 let combined = valid.join("\n");
458 tree_sitter::Query::new(grammar, &combined)
459 .ok()
460 .map(|q| (q, combined))
461}
462
463fn build_combined_query<'a>(
465 grammar_name: &str,
466 grammar: &tree_sitter::Language,
467 specific_rules: &[&&'a Rule],
468 global_rules: &[&&'a Rule],
469) -> Option<CombinedQuery<'a>> {
470 let mut compiled_rules: Vec<(&Rule, tree_sitter::Query, String)> = Vec::new();
471
472 for rule in specific_rules {
474 if rule.languages.iter().any(|l| l == grammar_name)
475 && let Ok(q) = tree_sitter::Query::new(grammar, &rule.query_str)
476 {
477 compiled_rules.push((rule, q, rule.query_str.clone()));
478 }
479 }
480
481 for rule in global_rules {
483 if let Some((q, qs)) = compile_cross_language_rule(rule, grammar) {
484 compiled_rules.push((rule, q, qs));
485 }
486 }
487
488 if compiled_rules.is_empty() {
489 return None;
490 }
491
492 let combined_str = compiled_rules
494 .iter()
495 .map(|(_, _, qs)| qs.as_str())
496 .collect::<Vec<_>>()
497 .join("\n\n");
498
499 let query = match tree_sitter::Query::new(grammar, &combined_str) {
500 Ok(q) => q,
501 Err(e) => {
502 eprintln!("Warning: combined query failed for {}: {}", grammar_name, e);
503 return None;
504 }
505 };
506
507 let combined_match_idx = query
509 .capture_names()
510 .iter()
511 .position(|n| *n == "match")
512 .unwrap_or(0);
513
514 let mut pattern_to_rule: Vec<(&Rule, usize)> = Vec::new();
515 for (rule, individual_query, _) in &compiled_rules {
516 for _ in 0..individual_query.pattern_count() {
517 pattern_to_rule.push((*rule, combined_match_idx));
518 }
519 }
520
521 Some(CombinedQuery {
522 query,
523 pattern_to_rule,
524 })
525}
526
527fn build_finding(
529 rule: &Rule,
530 node: tree_sitter::Node,
531 content: &str,
532 query: &tree_sitter::Query,
533 m: &tree_sitter::QueryMatch,
534 file: &Path,
535) -> Finding {
536 let text = node.utf8_text(content.as_bytes()).unwrap_or("");
537
538 let mut captures_map: HashMap<String, String> = HashMap::new();
539 for cap in m.captures {
540 let name = query.capture_names()[cap.index as usize].to_string();
541 if let Ok(cap_text) = cap.node.utf8_text(content.as_bytes()) {
542 captures_map.insert(name, cap_text.to_string());
543 }
544 }
545
546 Finding {
547 rule_id: rule.id.clone(),
548 file: file.to_path_buf(),
549 start_line: node.start_position().row + 1,
550 start_col: node.start_position().column + 1,
551 end_line: node.end_position().row + 1,
552 end_col: node.end_position().column + 1,
553 start_byte: node.start_byte(),
554 end_byte: node.end_byte(),
555 message: rule.message.clone(),
556 severity: rule.severity,
557 matched_text: text.lines().next().unwrap_or("").to_string(),
558 fix: rule.fix.clone(),
559 captures: captures_map,
560 }
561}
562
563struct AllowPath<'a> {
565 _full: Option<PathBuf>,
567 display: std::borrow::Cow<'a, str>,
569}
570
571fn allow_path_for_file<'a>(
573 rel_path: &Path,
574 rel_path_str: &'a str,
575 root_in_project: &Option<PathBuf>,
576) -> AllowPath<'a> {
577 if let Some(prefix) = root_in_project {
578 let buf = prefix.join(rel_path);
579 let s = buf.to_string_lossy().into_owned();
580 AllowPath {
581 _full: Some(buf),
582 display: std::borrow::Cow::Owned(s),
583 }
584 } else {
585 AllowPath {
586 _full: None,
587 display: std::borrow::Cow::Borrowed(rel_path_str),
588 }
589 }
590}
591
592struct FileContext<'a> {
594 file: &'a Path,
595 content: &'a str,
596 source_registry: &'a SourceRegistry,
597 source_ctx: SourceContext<'a>,
598 allow_path_str: &'a str,
599 skip_ranges: &'a [(usize, usize)],
604}
605
606fn process_file_matches(
608 ctx: &FileContext,
609 tree: &tree_sitter::Tree,
610 combined: &CombinedQuery,
611 findings: &mut Vec<Finding>,
612) {
613 let mut cursor = tree_sitter::QueryCursor::new();
614 let mut matches = cursor.matches(&combined.query, tree.root_node(), ctx.content.as_bytes());
615
616 while let Some(m) = matches.next() {
617 let Some((rule, match_idx)) = combined.pattern_to_rule.get(m.pattern_index) else {
618 continue;
619 };
620
621 if rule.allow.iter().any(|p| p.matches(ctx.allow_path_str)) {
622 continue;
623 }
624
625 if !rule.files.is_empty() {
626 let filename = ctx
627 .file
628 .file_name()
629 .map(|n| n.to_string_lossy())
630 .unwrap_or_default();
631 let matches_path = rule.files.iter().any(|p| p.matches(ctx.allow_path_str));
632 let matches_name = rule.files.iter().any(|p| p.matches(filename.as_ref()));
633 if !matches_path && !matches_name {
634 continue;
635 }
636 }
637
638 if !check_requires(rule, ctx.source_registry, &ctx.source_ctx) {
639 continue;
640 }
641
642 if !evaluate_predicates(&combined.query, m, ctx.content.as_bytes()) {
643 continue;
644 }
645
646 let Some(cap) = m.captures.iter().find(|c| c.index as usize == *match_idx) else {
647 continue;
648 };
649
650 if !rule.applies_in_tests
651 && in_any_range(cap.node.start_byte(), cap.node.end_byte(), ctx.skip_ranges)
652 {
653 continue;
654 }
655
656 let start_line = cap.node.start_position().row + 1;
657 if is_allowed_by_comment(ctx.content, start_line, &rule.id) {
658 continue;
659 }
660
661 findings.push(build_finding(
662 rule,
663 cap.node,
664 ctx.content,
665 &combined.query,
666 m,
667 ctx.file,
668 ));
669 }
670}
671
672#[allow(clippy::too_many_arguments)]
675pub fn run_rules(
676 rules: &[Rule],
677 root: &Path,
678 project_root: &Path,
679 loader: &GrammarLoader,
680 filter_rule: Option<&str>,
681 filter_tag: Option<&str>,
682 filter_ids: Option<&std::collections::HashSet<String>>,
683 debug: &DebugFlags,
684 files: Option<&[PathBuf]>,
685 path_filter: &normalize_rules_config::PathFilter,
686 walk_config: &normalize_rules_config::WalkConfig,
687) -> Vec<Finding> {
688 let start = std::time::Instant::now();
689 let raw_abs_root = root.canonicalize().unwrap_or_else(|_| root.to_path_buf());
690 let abs_root = if raw_abs_root.is_file() {
691 raw_abs_root
692 .parent()
693 .map(|p| p.to_path_buf())
694 .unwrap_or(raw_abs_root)
695 } else {
696 raw_abs_root
697 };
698 let abs_project_root = project_root
699 .canonicalize()
700 .unwrap_or_else(|_| project_root.to_path_buf());
701 let root_in_project = abs_root
702 .strip_prefix(&abs_project_root)
703 .ok()
704 .map(|p| p.to_path_buf());
705
706 let mut findings = Vec::new();
707 let source_registry = builtin_registry();
708
709 let explicitly_requested = |r: &&Rule| {
710 filter_rule.is_some_and(|f| r.id == f) || filter_ids.is_some_and(|ids| ids.contains(&r.id))
711 };
712 let active_rules: Vec<&Rule> = rules
713 .iter()
714 .filter(|r| r.enabled || explicitly_requested(r))
715 .filter(|r| filter_rule.is_none_or(|f| r.id == f))
716 .filter(|r| filter_tag.is_none_or(|t| r.tags.iter().any(|tag| tag == t)))
717 .filter(|r| filter_ids.is_none_or(|ids| ids.contains(&r.id)))
718 .collect();
719
720 if active_rules.is_empty() {
721 return findings;
722 }
723
724 let cache = FindingsCache::open(&abs_project_root);
726 let rules_hash = compute_rules_hash(&active_rules);
727 const ENGINE: &str = "syntax";
728
729 let files = if let Some(explicit) = files {
730 explicit
732 .iter()
733 .filter(|f| support_for_path(f).is_some())
734 .cloned()
735 .collect()
736 } else {
737 collect_source_files(root, path_filter, walk_config)
738 };
739 let mut files_by_grammar: HashMap<String, Vec<PathBuf>> = HashMap::new();
740 for file in files {
741 if let Some(lang) = support_for_path(&file) {
742 let grammar_name = lang.grammar_name().to_string();
743 files_by_grammar.entry(grammar_name).or_default().push(file);
744 }
745 }
746
747 if debug.timing {
748 eprintln!("[timing] file collection: {:?}", start.elapsed());
749 }
750 let compile_start = std::time::Instant::now();
751
752 let (specific_rules, global_rules): (Vec<&&Rule>, Vec<&&Rule>) =
753 active_rules.iter().partition(|r| !r.languages.is_empty());
754
755 let mut combined_by_grammar: HashMap<String, CombinedQuery> = HashMap::new();
756 for grammar_name in files_by_grammar.keys() {
757 let grammar = match loader.get(grammar_name) {
758 Ok(g) => g,
759 Err(e) => {
760 let n = files_by_grammar[grammar_name].len();
761 eprintln!(
762 "warning: no grammar for {grammar_name} — {n} file(s) skipped by syntax rules ({e}). Run `normalize grammars install` to fix."
763 );
764 continue;
765 }
766 };
767 if let Some(cq) =
768 build_combined_query(grammar_name, &grammar, &specific_rules, &global_rules)
769 {
770 combined_by_grammar.insert(grammar_name.clone(), cq);
771 }
772 }
773
774 if debug.timing {
775 eprintln!(
776 "[timing] query compilation: {:?} ({} grammars)",
777 compile_start.elapsed(),
778 combined_by_grammar.len()
779 );
780 }
781 let process_start = std::time::Instant::now();
782
783 cache.begin();
784 for (grammar_name, files) in &files_by_grammar {
785 let Some(combined) = combined_by_grammar.get(grammar_name) else {
786 continue;
787 };
788 let Some(grammar) = loader.get(grammar_name).ok() else {
789 continue;
790 };
791 let mut parser = tree_sitter::Parser::new();
792 if parser.set_language(&grammar).is_err() {
793 continue;
794 }
795
796 for file in files {
797 let file_key = file.to_string_lossy().into_owned();
798 let mtime_nanos = file_mtime_nanos(file);
799
800 if mtime_nanos > 0
802 && let Some(json) = cache.get(&file_key, mtime_nanos, &rules_hash, ENGINE)
803 {
804 let cached: Vec<CachedFinding> = serde_json::from_str(&json).unwrap_or_default();
805 findings.extend(cached.into_iter().map(Finding::from));
806 continue;
807 }
808
809 let rel_path = file.strip_prefix(root).unwrap_or(file);
810 let rel_path_str = rel_path.to_string_lossy();
811
812 let allow_path = allow_path_for_file(rel_path, &rel_path_str, &root_in_project);
813
814 let Ok(content) = std::fs::read_to_string(file) else {
815 continue;
816 };
817 let Some(tree) = parser.parse(&content, None) else {
818 continue;
819 };
820
821 let skip_ranges = test_region_ranges(grammar_name, &tree, content.as_bytes(), loader);
822
823 let file_ctx = FileContext {
824 file,
825 content: &content,
826 source_registry: &source_registry,
827 source_ctx: SourceContext {
828 file_path: file,
829 rel_path: &rel_path_str,
830 project_root: &abs_project_root,
831 },
832 allow_path_str: &allow_path.display,
833 skip_ranges: &skip_ranges,
834 };
835
836 let mut file_findings: Vec<Finding> = Vec::new();
837 process_file_matches(&file_ctx, &tree, combined, &mut file_findings);
838
839 if mtime_nanos > 0 {
841 let cached: Vec<CachedFinding> = file_findings
842 .iter()
843 .cloned()
844 .map(CachedFinding::from)
845 .collect();
846 if let Ok(json) = serde_json::to_string(&cached) {
847 cache.put(&file_key, mtime_nanos, &rules_hash, ENGINE, &json);
848 }
849 }
850
851 findings.extend(file_findings);
852 }
853 }
854
855 cache.commit();
856
857 if debug.timing {
858 eprintln!(
859 "[timing] file processing: {:?} ({} findings)",
860 process_start.elapsed(),
861 findings.len()
862 );
863 eprintln!("[timing] total: {:?}", start.elapsed());
864 }
865
866 findings
867}
868
869fn resolve_arg_text<'a>(
871 arg: &'a tree_sitter::QueryPredicateArg,
872 match_: &tree_sitter::QueryMatch,
873 source: &'a [u8],
874) -> Option<&'a str> {
875 match arg {
876 tree_sitter::QueryPredicateArg::Capture(idx) => Some(
877 match_
878 .captures
879 .iter()
880 .find(|c| c.index == *idx)
881 .and_then(|c| c.node.utf8_text(source).ok())
882 .unwrap_or(""),
883 ),
884 tree_sitter::QueryPredicateArg::String(s) => Some(s.as_ref()),
885 }
886}
887
888fn resolve_capture_text<'a>(
890 arg: &'a tree_sitter::QueryPredicateArg,
891 match_: &tree_sitter::QueryMatch,
892 source: &'a [u8],
893) -> Option<&'a str> {
894 match arg {
895 tree_sitter::QueryPredicateArg::Capture(idx) => Some(
896 match_
897 .captures
898 .iter()
899 .find(|c| c.index == *idx)
900 .and_then(|c| c.node.utf8_text(source).ok())
901 .unwrap_or(""),
902 ),
903 _ => None,
904 }
905}
906
907fn eval_eq(
909 args: &[tree_sitter::QueryPredicateArg],
910 match_: &tree_sitter::QueryMatch,
911 source: &[u8],
912 negated: bool,
913) -> Option<bool> {
914 if args.len() < 2 {
915 return None;
916 }
917 let first = resolve_arg_text(&args[0], match_, source)?;
918 let second = resolve_arg_text(&args[1], match_, source)?;
919 let equal = first == second;
920 Some(if negated { !equal } else { equal })
921}
922
923fn eval_match(
925 args: &[tree_sitter::QueryPredicateArg],
926 match_: &tree_sitter::QueryMatch,
927 source: &[u8],
928 negated: bool,
929) -> Option<bool> {
930 if args.len() < 2 {
931 return None;
932 }
933 let capture_text = resolve_capture_text(&args[0], match_, source)?;
934 let pattern = match &args[1] {
935 tree_sitter::QueryPredicateArg::String(s) => s.as_ref(),
936 _ => return None,
937 };
938 let regex = regex::Regex::new(pattern).ok()?;
939 let matched = regex.is_match(capture_text);
940 Some(if negated { !matched } else { matched })
941}
942
943fn eval_any_of(
945 args: &[tree_sitter::QueryPredicateArg],
946 match_: &tree_sitter::QueryMatch,
947 source: &[u8],
948) -> Option<bool> {
949 if args.len() < 2 {
950 return None;
951 }
952 let capture_text = resolve_capture_text(&args[0], match_, source)?;
953 let any_match = args[1..].iter().any(|arg| match arg {
954 tree_sitter::QueryPredicateArg::String(s) => s.as_ref() == capture_text,
955 _ => false,
956 });
957 Some(any_match)
958}
959
960pub fn evaluate_predicates(
962 query: &tree_sitter::Query,
963 match_: &tree_sitter::QueryMatch,
964 source: &[u8],
965) -> bool {
966 let predicates = query.general_predicates(match_.pattern_index);
967 for predicate in predicates {
968 let name = predicate.operator.as_ref();
969 let args = &predicate.args;
970
971 let result = match name {
972 "eq?" => eval_eq(args, match_, source, false),
973 "not-eq?" => eval_eq(args, match_, source, true),
974 "match?" => eval_match(args, match_, source, false),
975 "not-match?" => eval_match(args, match_, source, true),
976 "any-of?" => eval_any_of(args, match_, source),
977 _ => None,
978 };
979
980 if result == Some(false) {
982 return false;
983 }
984 }
985 true
986}
987
988#[cfg(feature = "fix")]
989pub fn expand_fix_template(template: &str, captures: &HashMap<String, String>) -> String {
992 let mut result = template.to_string();
993 for (name, value) in captures {
994 let placeholder = format!("${}", name);
995 result = result.replace(&placeholder, value);
996 }
997 result
998}
999
1000#[cfg(feature = "fix")]
1001pub fn apply_fixes(findings: &[Finding]) -> std::io::Result<usize> {
1012 let mut by_file: HashMap<&PathBuf, Vec<&Finding>> = HashMap::new();
1014 for finding in findings {
1015 if finding.fix.is_some() {
1016 by_file.entry(&finding.file).or_default().push(finding);
1017 }
1018 }
1019
1020 let mut files_modified = 0;
1021
1022 for (file, mut file_findings) in by_file {
1023 file_findings.sort_by(|a, b| b.start_byte.cmp(&a.start_byte));
1027
1028 let mut content = std::fs::read_to_string(file)?;
1029 let mut applied: Vec<(usize, usize)> = Vec::new();
1035 let mut file_changed = false;
1036
1037 for finding in file_findings {
1038 let overlaps = applied
1039 .iter()
1040 .any(|&(s, e)| finding.start_byte < e && finding.end_byte > s);
1041 if overlaps {
1042 continue;
1043 }
1044
1045 let Some(fix_template) = finding.fix.as_ref() else {
1047 continue;
1048 };
1049 let replacement = expand_fix_template(fix_template, &finding.captures);
1050
1051 let before = &content[..finding.start_byte];
1052 let after = &content[finding.end_byte..];
1053 content = format!("{}{}{}", before, replacement, after);
1054
1055 applied.push((finding.start_byte, finding.end_byte));
1056 file_changed = true;
1057 }
1058
1059 if file_changed {
1060 std::fs::write(file, &content)?;
1061 files_modified += 1;
1062 }
1063 }
1064
1065 Ok(files_modified)
1066}
1067
1068fn collect_source_files(
1070 root: &Path,
1071 filter: &normalize_rules_config::PathFilter,
1072 walk_config: &normalize_rules_config::WalkConfig,
1073) -> Vec<PathBuf> {
1074 let mut files = Vec::new();
1075
1076 let ignore_files = walk_config.ignore_files();
1077 let has_gitignore = ignore_files.contains(&".gitignore");
1078 let mut builder = ignore::WalkBuilder::new(root);
1079 builder
1080 .hidden(false)
1081 .git_ignore(has_gitignore)
1082 .git_global(has_gitignore)
1083 .git_exclude(has_gitignore);
1084 for file in &ignore_files {
1086 if *file != ".gitignore" {
1087 let ignore_path = root.join(file);
1088 if ignore_path.exists() {
1089 builder.add_ignore(ignore_path);
1090 }
1091 }
1092 }
1093 let excludes = walk_config.compiled_excludes(root);
1097 let root_owned = root.to_path_buf();
1098 builder.filter_entry(move |e| {
1099 let path = e.path();
1100 let rel = path.strip_prefix(&root_owned).unwrap_or(path);
1101 if rel.as_os_str().is_empty() {
1102 return true;
1103 }
1104 let is_dir = e.file_type().is_some_and(|ft| ft.is_dir());
1105 !excludes
1106 .matched_path_or_any_parents(rel, is_dir)
1107 .is_ignore()
1108 });
1109 let walker = builder.build();
1110
1111 for entry in walker.flatten() {
1112 let path = entry.path();
1113 if path.is_file() && support_for_path(path).is_some() {
1114 if !filter.is_empty() {
1115 let rel = path.strip_prefix(root).unwrap_or(path);
1116 if !filter.matches_path(rel) {
1117 continue;
1118 }
1119 }
1120 files.push(path.to_path_buf());
1121 }
1122 }
1123
1124 files
1125}
1126
1127fn split_query_patterns(query_str: &str) -> Vec<&str> {
1131 let mut patterns = Vec::new();
1132 let mut depth = 0i32;
1133 let mut pattern_start: Option<usize> = None;
1134 let bytes = query_str.as_bytes();
1135 let mut i = 0;
1136
1137 while i < bytes.len() {
1138 let b = bytes[i];
1139 match b {
1140 b';' => {
1141 while i < bytes.len() && bytes[i] != b'\n' {
1143 i += 1;
1144 }
1145 }
1146 b'(' => {
1147 if pattern_start.is_none() {
1148 pattern_start = Some(i);
1149 }
1150 depth += 1;
1151 i += 1;
1152 }
1153 b')' => {
1154 depth -= 1;
1155 i += 1;
1156 if depth == 0
1157 && let Some(start) = pattern_start
1158 {
1159 patterns.push(&query_str[start..i]);
1160 pattern_start = None;
1161 }
1162 }
1163 b'"' => {
1164 i += 1;
1166 while i < bytes.len() && bytes[i] != b'"' {
1167 if bytes[i] == b'\\' {
1168 i += 1; }
1170 i += 1;
1171 }
1172 i += 1; }
1174 _ => {
1175 i += 1;
1176 }
1177 }
1178 }
1179 patterns
1180}
1181
1182#[cfg(test)]
1183mod tests {
1184 use super::*;
1185 use normalize_languages::GrammarLoader;
1186 use normalize_languages::parsers::grammar_loader;
1187 use std::sync::Arc;
1188 use streaming_iterator::StreamingIterator;
1189
1190 fn loader() -> Arc<GrammarLoader> {
1191 grammar_loader()
1192 }
1193
1194 #[test]
1196 fn test_combined_query_predicate_scoping() {
1197 let loader = loader();
1198 let grammar = loader.get("rust").expect("rust grammar");
1199
1200 let combined_query = r#"
1202; Pattern 0: matches unwrap
1203((call_expression
1204 function: (field_expression field: (field_identifier) @_method)
1205 (#eq? @_method "unwrap")) @match)
1206
1207; Pattern 1: matches expect
1208((call_expression
1209 function: (field_expression field: (field_identifier) @_method)
1210 (#eq? @_method "expect")) @match)
1211"#;
1212
1213 let query = tree_sitter::Query::new(&grammar, combined_query)
1214 .expect("combined query should compile");
1215
1216 assert_eq!(query.pattern_count(), 2, "should have 2 patterns");
1217
1218 let test_code = r#"
1219fn main() {
1220 let x = Some(5);
1221 x.unwrap(); // line 4 - should match pattern 0
1222 x.expect("msg"); // line 5 - should match pattern 1
1223 x.map(|v| v); // line 6 - should NOT match
1224}
1225"#;
1226
1227 let mut parser = tree_sitter::Parser::new();
1228 parser.set_language(&grammar).unwrap();
1230 let tree = parser.parse(test_code, None).unwrap();
1232
1233 let mut cursor = tree_sitter::QueryCursor::new();
1234 let mut matches = cursor.matches(&query, tree.root_node(), test_code.as_bytes());
1235
1236 let mut results: Vec<(usize, String)> = Vec::new();
1237 while let Some(m) = matches.next() {
1238 if !evaluate_predicates(&query, m, test_code.as_bytes()) {
1240 continue;
1241 }
1242
1243 let match_capture = m
1244 .captures
1245 .iter()
1246 .find(|c| query.capture_names()[c.index as usize] == "match");
1247
1248 if let Some(cap) = match_capture {
1249 let text = cap.node.utf8_text(test_code.as_bytes()).unwrap();
1251 results.push((m.pattern_index, text.to_string()));
1252 }
1253 }
1254
1255 assert_eq!(results.len(), 2, "should have 2 matches, got {:?}", results);
1257
1258 assert!(
1260 results
1261 .iter()
1262 .any(|(idx, text)| *idx == 0 && text.contains("unwrap")),
1263 "pattern 0 should match unwrap, got {:?}",
1264 results
1265 );
1266
1267 assert!(
1269 results
1270 .iter()
1271 .any(|(idx, text)| *idx == 1 && text.contains("expect")),
1272 "pattern 1 should match expect, got {:?}",
1273 results
1274 );
1275 }
1276
1277 #[test]
1279 fn test_combined_rules_single_traversal() {
1280 let loader = loader();
1281 let grammar = loader.get("rust").expect("rust grammar");
1282
1283 let rules_queries = [
1285 (
1286 "unwrap-rule",
1287 r#"((call_expression function: (field_expression field: (field_identifier) @_m) (#eq? @_m "unwrap")) @match)"#,
1288 ),
1289 (
1290 "dbg-rule",
1291 r#"((macro_invocation macro: (identifier) @_name (#eq? @_name "dbg")) @match)"#,
1292 ),
1293 ];
1294
1295 let combined = rules_queries
1297 .iter()
1298 .map(|(_, q)| *q)
1299 .collect::<Vec<_>>()
1300 .join("\n\n");
1301
1302 let query =
1303 tree_sitter::Query::new(&grammar, &combined).expect("combined query should compile");
1304
1305 let test_code = r#"
1306fn main() {
1307 let x = Some(5);
1308 dbg!(x); // should match pattern 1 (dbg-rule)
1309 x.unwrap(); // should match pattern 0 (unwrap-rule)
1310}
1311"#;
1312
1313 let mut parser = tree_sitter::Parser::new();
1314 parser.set_language(&grammar).unwrap();
1316 let tree = parser.parse(test_code, None).unwrap();
1318
1319 let mut cursor = tree_sitter::QueryCursor::new();
1320 let mut matches = cursor.matches(&query, tree.root_node(), test_code.as_bytes());
1321
1322 let mut pattern_indices: Vec<usize> = Vec::new();
1323 while let Some(m) = matches.next() {
1324 if evaluate_predicates(&query, m, test_code.as_bytes()) {
1325 pattern_indices.push(m.pattern_index);
1326 }
1327 }
1328
1329 assert!(
1331 pattern_indices.contains(&0),
1332 "should match pattern 0 (unwrap)"
1333 );
1334 assert!(pattern_indices.contains(&1), "should match pattern 1 (dbg)");
1335 }
1336
1337 #[test]
1338 fn test_test_region_ranges_skips_inline_cfg_test_module() {
1339 let loader = loader();
1340 let grammar = loader.get("rust").expect("rust grammar");
1341 let source = r#"
1342fn outer() {
1343 let x: Option<i32> = None;
1344 x.unwrap();
1345}
1346
1347#[cfg(test)]
1348mod tests {
1349 fn inner() {
1350 let y: Option<i32> = None;
1351 y.unwrap();
1352 }
1353}
1354
1355#[cfg(test)]
1356#[allow(dead_code)]
1357mod more_tests {
1358 fn inner2() {
1359 None::<i32>.unwrap();
1360 }
1361}
1362
1363mod regular_mod {
1364 fn other() {
1365 None::<i32>.unwrap();
1366 }
1367}
1368"#;
1369 let mut parser = tree_sitter::Parser::new();
1370 assert!(parser.set_language(&grammar).is_ok());
1371 let tree = parser.parse(source, None).expect("parse");
1372 let ranges = test_region_ranges("rust", &tree, source.as_bytes(), &loader);
1373 assert_eq!(
1374 ranges.len(),
1375 2,
1376 "expected two cfg(test) modules, got {ranges:?}"
1377 );
1378
1379 let unwrap_query = tree_sitter::Query::new(
1381 &grammar,
1382 r#"((call_expression function: (field_expression field: (field_identifier) @m)) @call (#eq? @m "unwrap"))"#,
1383 )
1384 .expect("compile");
1385 let call_idx = unwrap_query
1386 .capture_names()
1387 .iter()
1388 .position(|n| *n == "call")
1389 .unwrap_or(0);
1390 let mut cursor = tree_sitter::QueryCursor::new();
1391 let mut matches = cursor.matches(&unwrap_query, tree.root_node(), source.as_bytes());
1392 let mut classifications: Vec<(usize, bool)> = Vec::new();
1393 while let Some(m) = matches.next() {
1394 for cap in m.captures {
1395 if cap.index as usize == call_idx {
1396 let line = cap.node.start_position().row + 1;
1397 let in_test = in_any_range(cap.node.start_byte(), cap.node.end_byte(), &ranges);
1398 classifications.push((line, in_test));
1399 }
1400 }
1401 }
1402 let outside: Vec<usize> = classifications
1404 .iter()
1405 .filter_map(|(l, t)| if !*t { Some(*l) } else { None })
1406 .collect();
1407 let inside: Vec<usize> = classifications
1408 .iter()
1409 .filter_map(|(l, t)| if *t { Some(*l) } else { None })
1410 .collect();
1411 assert_eq!(
1412 outside.len(),
1413 2,
1414 "expected 2 unwraps outside cfg(test), got {classifications:?}"
1415 );
1416 assert_eq!(
1417 inside.len(),
1418 2,
1419 "expected 2 unwraps inside cfg(test), got {classifications:?}"
1420 );
1421 }
1422
1423 #[test]
1427 fn test_applies_in_tests_per_rule_opt_in() {
1428 let loader = loader();
1429 let tmp = tempfile::tempdir().expect("tempdir");
1430 let file_path = tmp.path().join("lib.rs");
1431 std::fs::write(
1432 &file_path,
1433 r#"fn outer() {
1434 let x: Option<i32> = None;
1435 x.unwrap();
1436}
1437
1438#[cfg(test)]
1439mod tests {
1440 fn inner() {
1441 let y: Option<i32> = None;
1442 y.unwrap();
1443 }
1444}
1445"#,
1446 )
1447 .expect("write fixture");
1448
1449 let query_str = r#"((call_expression
1450 function: (field_expression field: (field_identifier) @_m)
1451 (#eq? @_m "unwrap")) @match)"#;
1452
1453 let make_rule = |id: &str, applies_in_tests: bool| {
1454 let frontmatter = format!(
1455 "# ---\n# id = \"{id}\"\n# severity = \"warning\"\n# message = \"unwrap\"\n# languages = [\"rust\"]\n# applies_in_tests = {applies_in_tests}\n# ---\n\n{query_str}\n"
1456 );
1457 crate::parse_rule_content(&frontmatter, id, false).expect("parse rule")
1458 };
1459
1460 let path_filter = normalize_rules_config::PathFilter::default();
1461 let walk_config = normalize_rules_config::WalkConfig::default();
1462 let debug = DebugFlags::default();
1463
1464 let rules_default = vec![make_rule("test/unwrap-default", false)];
1466 let findings = run_rules(
1467 &rules_default,
1468 tmp.path(),
1469 tmp.path(),
1470 &loader,
1471 None,
1472 None,
1473 None,
1474 &debug,
1475 None,
1476 &path_filter,
1477 &walk_config,
1478 );
1479 assert_eq!(
1480 findings.len(),
1481 1,
1482 "applies_in_tests=false should drop the cfg(test) finding, got {findings:?}"
1483 );
1484 assert_eq!(findings[0].start_line, 3, "outer unwrap is on line 3");
1485
1486 let rules_optin = vec![make_rule("test/unwrap-in-tests", true)];
1488 let findings = run_rules(
1489 &rules_optin,
1490 tmp.path(),
1491 tmp.path(),
1492 &loader,
1493 None,
1494 None,
1495 None,
1496 &debug,
1497 None,
1498 &path_filter,
1499 &walk_config,
1500 );
1501 assert_eq!(
1502 findings.len(),
1503 2,
1504 "applies_in_tests=true should keep both unwraps, got {findings:?}"
1505 );
1506 }
1507
1508 #[test]
1509 fn test_split_query_patterns() {
1510 let query = r#"
1511; Pattern 1: comment
1512((comment) @match (#match? @match "TODO"))
1513; Pattern 2: line_comment
1514((line_comment) @match (#match? @match "TODO"))
1515"#;
1516 let patterns = split_query_patterns(query);
1517 assert_eq!(patterns.len(), 2);
1518 assert!(patterns[0].contains("comment"));
1519 assert!(patterns[1].contains("line_comment"));
1520 }
1521
1522 #[test]
1523 fn test_cross_grammar_pattern_fallback() {
1524 let loader = loader();
1527 let grammar = loader.get("rust").expect("rust grammar");
1528
1529 let query_str = r#"((comment) @match (#match? @match "TODO"))
1530((line_comment) @match (#match? @match "TODO"))"#;
1531
1532 assert!(tree_sitter::Query::new(&grammar, query_str).is_err());
1534
1535 let patterns = split_query_patterns(query_str);
1537 let valid: Vec<&str> = patterns
1538 .into_iter()
1539 .filter(|p| tree_sitter::Query::new(&grammar, p).is_ok())
1540 .collect();
1541 assert_eq!(valid.len(), 1, "only line_comment should compile for Rust");
1542 assert!(valid[0].contains("line_comment"));
1543 }
1544}
1545
1546#[cfg(test)]
1547mod glob_tests {
1548 use glob::Pattern;
1549 #[test]
1551 fn test_glob_allow_patterns() {
1552 let cases = [
1553 (
1554 "crates/normalize/src/rg/**",
1555 "crates/normalize/src/rg/flags/defs.rs",
1556 true,
1557 ),
1558 (
1559 "crates/normalize/src/rg/**",
1560 "crates/normalize/src/rg/mod.rs",
1561 true,
1562 ),
1563 ("**/tests/**", "crates/normalize/tests/foo.rs", true),
1564 (
1565 "**/tests/fixtures/**",
1566 "crates/normalize-syntax-rules/tests/fixtures/rust/foo.rs",
1567 true,
1568 ),
1569 (
1570 "crates/normalize-facts-rules-interpret/src/tests.rs",
1571 "crates/normalize-facts-rules-interpret/src/tests.rs",
1572 true,
1573 ),
1574 (
1575 "crates/normalize-manifest/src/*.rs",
1576 "crates/normalize-manifest/src/nuget.rs",
1577 true,
1578 ),
1579 ];
1580 for (p, path, expected) in cases {
1581 let pat = Pattern::new(p).unwrap();
1583 assert_eq!(pat.matches(path), expected, "Pattern: {p}, Path: {path}");
1584 }
1585 }
1586}