1use crate::sources::{SourceContext, SourceRegistry, builtin_registry};
4use crate::{Rule, Severity};
5use normalize_languages::{GrammarLoader, support_for_path};
6use std::collections::HashMap;
7use std::path::{Path, PathBuf};
8use streaming_iterator::StreamingIterator;
9
10#[derive(Debug)]
12pub struct Finding {
13 pub rule_id: String,
14 pub file: PathBuf,
15 pub start_line: usize,
16 pub start_col: usize,
17 pub end_line: usize,
18 pub end_col: usize,
19 pub start_byte: usize,
20 pub end_byte: usize,
21 pub message: String,
22 pub severity: Severity,
23 pub matched_text: String,
24 pub fix: Option<String>,
26 pub captures: HashMap<String, String>,
28}
29
30#[derive(Default)]
32pub struct DebugFlags {
33 pub timing: bool,
34}
35
36impl DebugFlags {
37 pub fn from_args(args: &[String]) -> Self {
38 let all = args.iter().any(|s| s == "all");
39 Self {
40 timing: all || args.iter().any(|s| s == "timing"),
41 }
42 }
43}
44
45fn line_has_allow_comment(line: &str, rule_id: &str) -> bool {
48 if let Some(pos) = line.find("moss-allow:") {
51 let after = &line[pos + 11..]; let after = after.trim_start();
53 if after.starts_with(rule_id) {
55 let rest = &after[rule_id.len()..];
56 return rest.is_empty()
58 || rest.starts_with(char::is_whitespace)
59 || rest.starts_with('-')
60 || rest.starts_with("*/");
61 }
62 }
63 false
64}
65
66fn is_allowed_by_comment(content: &str, start_line: usize, rule_id: &str) -> bool {
69 let lines: Vec<&str> = content.lines().collect();
70 let line_idx = start_line.saturating_sub(1); if let Some(line) = lines.get(line_idx) {
74 if line_has_allow_comment(line, rule_id) {
75 return true;
76 }
77 }
78
79 if line_idx > 0 {
81 if let Some(line) = lines.get(line_idx - 1) {
82 if line_has_allow_comment(line, rule_id) {
83 return true;
84 }
85 }
86 }
87
88 false
89}
90
91fn check_requires(rule: &Rule, registry: &SourceRegistry, ctx: &SourceContext) -> bool {
99 if rule.requires.is_empty() {
100 return true;
101 }
102
103 for (key, expected) in &rule.requires {
104 let actual = match registry.get(ctx, key) {
105 Some(v) => v,
106 None => return false, };
108
109 let matches = if let Some(rest) = expected.strip_prefix(">=") {
111 actual >= rest.to_string()
112 } else if let Some(rest) = expected.strip_prefix("<=") {
113 actual <= rest.to_string()
114 } else if let Some(rest) = expected.strip_prefix('!') {
115 actual != rest
116 } else {
117 actual == *expected
118 };
119
120 if !matches {
121 return false;
122 }
123 }
124
125 true
126}
127
128struct CombinedQuery<'a> {
130 query: tree_sitter::Query,
131 pattern_to_rule: Vec<(&'a Rule, usize)>,
133}
134
135pub fn run_rules(
138 rules: &[Rule],
139 root: &Path,
140 loader: &GrammarLoader,
141 filter_rule: Option<&str>,
142 debug: &DebugFlags,
143) -> Vec<Finding> {
144 let start = std::time::Instant::now();
145
146 let mut findings = Vec::new();
147 let source_registry = builtin_registry();
148
149 let active_rules: Vec<&Rule> = rules
151 .iter()
152 .filter(|r| filter_rule.map_or(true, |f| r.id == f))
153 .collect();
154
155 if active_rules.is_empty() {
156 return findings;
157 }
158
159 let files = collect_source_files(root);
161 let mut files_by_grammar: HashMap<String, Vec<PathBuf>> = HashMap::new();
162 for file in files {
163 if let Some(lang) = support_for_path(&file) {
164 let grammar_name = lang.grammar_name().to_string();
165 files_by_grammar.entry(grammar_name).or_default().push(file);
166 }
167 }
168
169 if debug.timing {
170 eprintln!("[timing] file collection: {:?}", start.elapsed());
171 }
172 let compile_start = std::time::Instant::now();
173
174 let (specific_rules, global_rules): (Vec<&&Rule>, Vec<&&Rule>) =
176 active_rules.iter().partition(|r| !r.languages.is_empty());
177
178 let mut combined_by_grammar: HashMap<String, CombinedQuery> = HashMap::new();
180
181 for grammar_name in files_by_grammar.keys() {
182 let Some(grammar) = loader.get(grammar_name) else {
183 continue;
184 };
185
186 let mut compiled_rules: Vec<(&Rule, tree_sitter::Query)> = Vec::new();
187
188 for rule in &specific_rules {
190 if rule.languages.iter().any(|l| l == grammar_name) {
191 if let Ok(q) = tree_sitter::Query::new(&grammar, &rule.query_str) {
192 compiled_rules.push((rule, q));
193 }
194 }
195 }
196
197 for rule in &global_rules {
199 if let Ok(q) = tree_sitter::Query::new(&grammar, &rule.query_str) {
200 compiled_rules.push((rule, q));
201 }
202 }
203
204 if compiled_rules.is_empty() {
205 continue;
206 }
207
208 let combined_str = compiled_rules
210 .iter()
211 .map(|(r, _)| r.query_str.as_str())
212 .collect::<Vec<_>>()
213 .join("\n\n");
214
215 let query = match tree_sitter::Query::new(&grammar, &combined_str) {
216 Ok(q) => q,
217 Err(e) => {
218 eprintln!("Warning: combined query failed for {}: {}", grammar_name, e);
219 continue;
220 }
221 };
222
223 let mut pattern_to_rule: Vec<(&Rule, usize)> = Vec::new();
225 let combined_match_idx = query
226 .capture_names()
227 .iter()
228 .position(|n| *n == "match")
229 .unwrap_or(0);
230
231 for (rule, individual_query) in &compiled_rules {
232 for _ in 0..individual_query.pattern_count() {
233 pattern_to_rule.push((*rule, combined_match_idx));
234 }
235 }
236
237 combined_by_grammar.insert(
238 grammar_name.clone(),
239 CombinedQuery {
240 query,
241 pattern_to_rule,
242 },
243 );
244 }
245
246 if debug.timing {
247 eprintln!(
248 "[timing] query compilation: {:?} ({} grammars)",
249 compile_start.elapsed(),
250 combined_by_grammar.len()
251 );
252 }
253 let process_start = std::time::Instant::now();
254
255 for (grammar_name, files) in &files_by_grammar {
257 let Some(combined) = combined_by_grammar.get(grammar_name) else {
258 continue;
259 };
260
261 let Some(grammar) = loader.get(grammar_name) else {
262 continue;
263 };
264
265 let mut parser = tree_sitter::Parser::new();
266 if parser.set_language(&grammar).is_err() {
267 continue;
268 }
269
270 for file in files {
271 let rel_path = file.strip_prefix(root).unwrap_or(file);
272 let rel_path_str = rel_path.to_string_lossy();
273
274 let source_ctx = SourceContext {
276 file_path: file,
277 rel_path: &rel_path_str,
278 project_root: root,
279 };
280
281 let content = match std::fs::read_to_string(file) {
282 Ok(c) => c,
283 Err(_) => continue,
284 };
285
286 let tree = match parser.parse(&content, None) {
287 Some(t) => t,
288 None => continue,
289 };
290
291 let mut cursor = tree_sitter::QueryCursor::new();
293 let mut matches = cursor.matches(&combined.query, tree.root_node(), content.as_bytes());
294
295 while let Some(m) = matches.next() {
296 let Some((rule, match_idx)) = combined.pattern_to_rule.get(m.pattern_index) else {
298 continue;
299 };
300
301 if rule.allow.iter().any(|p| p.matches(&rel_path_str)) {
303 continue;
304 }
305
306 if !check_requires(rule, &source_registry, &source_ctx) {
308 continue;
309 }
310
311 if !evaluate_predicates(&combined.query, m, content.as_bytes()) {
312 continue;
313 }
314
315 let capture = m.captures.iter().find(|c| c.index as usize == *match_idx);
316
317 if let Some(cap) = capture {
318 let node = cap.node;
319 let start_line = node.start_position().row + 1;
320
321 if is_allowed_by_comment(&content, start_line, &rule.id) {
322 continue;
323 }
324
325 let text = node.utf8_text(content.as_bytes()).unwrap_or("");
326
327 let mut captures_map: HashMap<String, String> = HashMap::new();
329 for cap in m.captures {
330 let name = combined.query.capture_names()[cap.index as usize].to_string();
331 if let Ok(cap_text) = cap.node.utf8_text(content.as_bytes()) {
332 captures_map.insert(name, cap_text.to_string());
333 }
334 }
335
336 findings.push(Finding {
337 rule_id: rule.id.clone(),
338 file: file.clone(),
339 start_line,
340 start_col: node.start_position().column + 1,
341 end_line: node.end_position().row + 1,
342 end_col: node.end_position().column + 1,
343 start_byte: node.start_byte(),
344 end_byte: node.end_byte(),
345 message: rule.message.clone(),
346 severity: rule.severity,
347 matched_text: text.lines().next().unwrap_or("").to_string(),
348 fix: rule.fix.clone(),
349 captures: captures_map,
350 });
351 }
352 }
353 }
354 }
355
356 if debug.timing {
357 eprintln!(
358 "[timing] file processing: {:?} ({} findings)",
359 process_start.elapsed(),
360 findings.len()
361 );
362 eprintln!("[timing] total: {:?}", start.elapsed());
363 }
364
365 findings
366}
367
368pub fn evaluate_predicates(
370 query: &tree_sitter::Query,
371 match_: &tree_sitter::QueryMatch,
372 source: &[u8],
373) -> bool {
374 let predicates = query.general_predicates(match_.pattern_index);
375 for predicate in predicates {
376 let name = &predicate.operator;
377 let args = &predicate.args;
378
379 match name.as_ref() {
380 "eq?" | "not-eq?" => {
381 if args.len() < 2 {
382 continue;
383 }
384
385 let first_text = match &args[0] {
387 tree_sitter::QueryPredicateArg::Capture(idx) => match_
388 .captures
389 .iter()
390 .find(|c| c.index == *idx)
391 .and_then(|c| c.node.utf8_text(source).ok())
392 .unwrap_or(""),
393 tree_sitter::QueryPredicateArg::String(s) => s.as_ref(),
394 };
395
396 let second_text = match &args[1] {
398 tree_sitter::QueryPredicateArg::Capture(idx) => match_
399 .captures
400 .iter()
401 .find(|c| c.index == *idx)
402 .and_then(|c| c.node.utf8_text(source).ok())
403 .unwrap_or(""),
404 tree_sitter::QueryPredicateArg::String(s) => s.as_ref(),
405 };
406
407 let equal = first_text == second_text;
408 if name.as_ref() == "eq?" && !equal {
409 return false;
410 }
411 if name.as_ref() == "not-eq?" && equal {
412 return false;
413 }
414 }
415 "match?" | "not-match?" => {
416 if args.len() < 2 {
417 continue;
418 }
419
420 let capture_text = match &args[0] {
422 tree_sitter::QueryPredicateArg::Capture(idx) => match_
423 .captures
424 .iter()
425 .find(|c| c.index == *idx)
426 .and_then(|c| c.node.utf8_text(source).ok())
427 .unwrap_or(""),
428 _ => continue,
429 };
430
431 let pattern = match &args[1] {
433 tree_sitter::QueryPredicateArg::String(s) => s.as_ref(),
434 _ => continue,
435 };
436
437 let regex = match regex::Regex::new(pattern) {
439 Ok(r) => r,
440 Err(_) => continue,
441 };
442
443 let matches = regex.is_match(capture_text);
444 if name.as_ref() == "match?" && !matches {
445 return false;
446 }
447 if name.as_ref() == "not-match?" && matches {
448 return false;
449 }
450 }
451 "any-of?" => {
452 if args.len() < 2 {
453 continue;
454 }
455
456 let capture_text = match &args[0] {
458 tree_sitter::QueryPredicateArg::Capture(idx) => match_
459 .captures
460 .iter()
461 .find(|c| c.index == *idx)
462 .and_then(|c| c.node.utf8_text(source).ok())
463 .unwrap_or(""),
464 _ => continue,
465 };
466
467 let any_match = args[1..].iter().any(|arg| match arg {
469 tree_sitter::QueryPredicateArg::String(s) => s.as_ref() == capture_text,
470 _ => false,
471 });
472
473 if !any_match {
474 return false;
475 }
476 }
477 _ => {
478 }
480 }
481 }
482 true
483}
484
485pub fn expand_fix_template(template: &str, captures: &HashMap<String, String>) -> String {
488 let mut result = template.to_string();
489 for (name, value) in captures {
490 let placeholder = format!("${}", name);
491 result = result.replace(&placeholder, value);
492 }
493 result
494}
495
496pub fn apply_fixes(findings: &[Finding]) -> std::io::Result<usize> {
499 let mut by_file: HashMap<&PathBuf, Vec<&Finding>> = HashMap::new();
501 for finding in findings {
502 if finding.fix.is_some() {
503 by_file.entry(&finding.file).or_default().push(finding);
504 }
505 }
506
507 let mut files_modified = 0;
508
509 for (file, mut file_findings) in by_file {
510 file_findings.sort_by(|a, b| b.start_byte.cmp(&a.start_byte));
512
513 let mut content = std::fs::read_to_string(file)?;
514
515 for finding in file_findings {
516 let fix_template = finding.fix.as_ref().unwrap();
517 let replacement = expand_fix_template(fix_template, &finding.captures);
518
519 let before = &content[..finding.start_byte];
521 let after = &content[finding.end_byte..];
522 content = format!("{}{}{}", before, replacement, after);
523 }
524
525 std::fs::write(file, &content)?;
526 files_modified += 1;
527 }
528
529 Ok(files_modified)
530}
531
532fn collect_source_files(root: &Path) -> Vec<PathBuf> {
534 let mut files = Vec::new();
535
536 let walker = ignore::WalkBuilder::new(root)
537 .hidden(false)
538 .git_ignore(true)
539 .build();
540
541 for entry in walker.flatten() {
542 let path = entry.path();
543 if path.is_file() && support_for_path(path).is_some() {
544 files.push(path.to_path_buf());
545 }
546 }
547
548 files
549}
550
551#[cfg(test)]
552mod tests {
553 use super::*;
554 use normalize_languages::GrammarLoader;
555 use streaming_iterator::StreamingIterator;
556
557 fn loader() -> GrammarLoader {
558 GrammarLoader::new()
559 }
560
561 #[test]
563 fn test_combined_query_predicate_scoping() {
564 let loader = loader();
565 let grammar = loader.get("rust").expect("rust grammar");
566
567 let combined_query = r#"
569; Pattern 0: matches unwrap
570((call_expression
571 function: (field_expression field: (field_identifier) @_method)
572 (#eq? @_method "unwrap")) @match)
573
574; Pattern 1: matches expect
575((call_expression
576 function: (field_expression field: (field_identifier) @_method)
577 (#eq? @_method "expect")) @match)
578"#;
579
580 let query = tree_sitter::Query::new(&grammar, combined_query)
581 .expect("combined query should compile");
582
583 assert_eq!(query.pattern_count(), 2, "should have 2 patterns");
584
585 let test_code = r#"
586fn main() {
587 let x = Some(5);
588 x.unwrap(); // line 4 - should match pattern 0
589 x.expect("msg"); // line 5 - should match pattern 1
590 x.map(|v| v); // line 6 - should NOT match
591}
592"#;
593
594 let mut parser = tree_sitter::Parser::new();
595 parser.set_language(&grammar).unwrap();
596 let tree = parser.parse(test_code, None).unwrap();
597
598 let mut cursor = tree_sitter::QueryCursor::new();
599 let mut matches = cursor.matches(&query, tree.root_node(), test_code.as_bytes());
600
601 let mut results: Vec<(usize, String)> = Vec::new();
602 while let Some(m) = matches.next() {
603 if !evaluate_predicates(&query, m, test_code.as_bytes()) {
605 continue;
606 }
607
608 let match_capture = m
609 .captures
610 .iter()
611 .find(|c| query.capture_names()[c.index as usize] == "match");
612
613 if let Some(cap) = match_capture {
614 let text = cap.node.utf8_text(test_code.as_bytes()).unwrap();
615 results.push((m.pattern_index, text.to_string()));
616 }
617 }
618
619 assert_eq!(results.len(), 2, "should have 2 matches, got {:?}", results);
621
622 assert!(
624 results
625 .iter()
626 .any(|(idx, text)| *idx == 0 && text.contains("unwrap")),
627 "pattern 0 should match unwrap, got {:?}",
628 results
629 );
630
631 assert!(
633 results
634 .iter()
635 .any(|(idx, text)| *idx == 1 && text.contains("expect")),
636 "pattern 1 should match expect, got {:?}",
637 results
638 );
639 }
640
641 #[test]
643 fn test_combined_rules_single_traversal() {
644 let loader = loader();
645 let grammar = loader.get("rust").expect("rust grammar");
646
647 let rules_queries = vec![
649 (
650 "unwrap-rule",
651 r#"((call_expression function: (field_expression field: (field_identifier) @_m) (#eq? @_m "unwrap")) @match)"#,
652 ),
653 (
654 "dbg-rule",
655 r#"((macro_invocation macro: (identifier) @_name (#eq? @_name "dbg")) @match)"#,
656 ),
657 ];
658
659 let combined = rules_queries
661 .iter()
662 .map(|(_, q)| *q)
663 .collect::<Vec<_>>()
664 .join("\n\n");
665
666 let query =
667 tree_sitter::Query::new(&grammar, &combined).expect("combined query should compile");
668
669 let test_code = r#"
670fn main() {
671 let x = Some(5);
672 dbg!(x); // should match pattern 1 (dbg-rule)
673 x.unwrap(); // should match pattern 0 (unwrap-rule)
674}
675"#;
676
677 let mut parser = tree_sitter::Parser::new();
678 parser.set_language(&grammar).unwrap();
679 let tree = parser.parse(test_code, None).unwrap();
680
681 let mut cursor = tree_sitter::QueryCursor::new();
682 let mut matches = cursor.matches(&query, tree.root_node(), test_code.as_bytes());
683
684 let mut pattern_indices: Vec<usize> = Vec::new();
685 while let Some(m) = matches.next() {
686 if evaluate_predicates(&query, m, test_code.as_bytes()) {
687 pattern_indices.push(m.pattern_index);
688 }
689 }
690
691 assert!(
693 pattern_indices.contains(&0),
694 "should match pattern 0 (unwrap)"
695 );
696 assert!(pattern_indices.contains(&1), "should match pattern 1 (dbg)");
697 }
698}