Skip to main content

normalize_syntax_rules/
runner.rs

1//! Rule execution with combined query optimization.
2
3use crate::sources::{SourceContext, SourceRegistry, builtin_registry};
4use crate::{Rule, Severity};
5use normalize_languages::{GrammarLoader, support_for_path};
6use std::collections::HashMap;
7use std::path::{Path, PathBuf};
8use streaming_iterator::StreamingIterator;
9
10/// A finding from running a rule.
11#[derive(Debug)]
12pub struct Finding {
13    pub rule_id: String,
14    pub file: PathBuf,
15    pub start_line: usize,
16    pub start_col: usize,
17    pub end_line: usize,
18    pub end_col: usize,
19    pub start_byte: usize,
20    pub end_byte: usize,
21    pub message: String,
22    pub severity: Severity,
23    pub matched_text: String,
24    /// Auto-fix template (None if no fix available).
25    pub fix: Option<String>,
26    /// Capture values from the query match (for fix substitution).
27    pub captures: HashMap<String, String>,
28}
29
30/// Debug output categories.
31#[derive(Default)]
32pub struct DebugFlags {
33    pub timing: bool,
34}
35
36impl DebugFlags {
37    pub fn from_args(args: &[String]) -> Self {
38        let all = args.iter().any(|s| s == "all");
39        Self {
40            timing: all || args.iter().any(|s| s == "timing"),
41        }
42    }
43}
44
45/// Check if a line contains a moss-allow comment for the given rule.
46/// Supports: `// moss-allow: rule-id` or `/* moss-allow: rule-id */`
47fn line_has_allow_comment(line: &str, rule_id: &str) -> bool {
48    // Look for moss-allow: followed by the rule ID
49    // Pattern: moss-allow: rule-id (optionally followed by - reason)
50    if let Some(pos) = line.find("moss-allow:") {
51        let after = &line[pos + 11..]; // len("moss-allow:")
52        let after = after.trim_start();
53        // Check if rule_id matches (might be followed by space, dash, or end of comment)
54        if after.starts_with(rule_id) {
55            let rest = &after[rule_id.len()..];
56            // Valid if followed by nothing, whitespace, dash (reason), or end of comment
57            return rest.is_empty()
58                || rest.starts_with(char::is_whitespace)
59                || rest.starts_with('-')
60                || rest.starts_with("*/");
61        }
62    }
63    false
64}
65
66/// Check if a finding should be allowed based on inline comments.
67/// Checks the line of the finding and the line before.
68fn is_allowed_by_comment(content: &str, start_line: usize, rule_id: &str) -> bool {
69    let lines: Vec<&str> = content.lines().collect();
70    let line_idx = start_line.saturating_sub(1); // 0-indexed
71
72    // Check the line itself
73    if let Some(line) = lines.get(line_idx) {
74        if line_has_allow_comment(line, rule_id) {
75            return true;
76        }
77    }
78
79    // Check the line before (for standalone comment)
80    if line_idx > 0 {
81        if let Some(line) = lines.get(line_idx - 1) {
82            if line_has_allow_comment(line, rule_id) {
83                return true;
84            }
85        }
86    }
87
88    false
89}
90
91/// Check if a rule's requires conditions are met for a given file context.
92///
93/// Supports operators:
94/// - `value` - exact match
95/// - `>=value` - greater or equal (for versions/editions)
96/// - `<=value` - less or equal
97/// - `!value` - not equal
98fn check_requires(rule: &Rule, registry: &SourceRegistry, ctx: &SourceContext) -> bool {
99    if rule.requires.is_empty() {
100        return true;
101    }
102
103    for (key, expected) in &rule.requires {
104        let actual = match registry.get(ctx, key) {
105            Some(v) => v,
106            None => return false, // Required source not available
107        };
108
109        // Parse operator prefix
110        let matches = if let Some(rest) = expected.strip_prefix(">=") {
111            actual >= rest.to_string()
112        } else if let Some(rest) = expected.strip_prefix("<=") {
113            actual <= rest.to_string()
114        } else if let Some(rest) = expected.strip_prefix('!') {
115            actual != rest
116        } else {
117            actual == *expected
118        };
119
120        if !matches {
121            return false;
122        }
123    }
124
125    true
126}
127
128/// Combined query for a grammar with pattern-to-rule mapping.
129struct CombinedQuery<'a> {
130    query: tree_sitter::Query,
131    /// Maps pattern_index to (rule, match_capture_index_in_combined_query)
132    pattern_to_rule: Vec<(&'a Rule, usize)>,
133}
134
135/// Run rules against files in a directory.
136/// Optimized: combines all rules into single query per grammar for single-traversal matching.
137pub fn run_rules(
138    rules: &[Rule],
139    root: &Path,
140    loader: &GrammarLoader,
141    filter_rule: Option<&str>,
142    debug: &DebugFlags,
143) -> Vec<Finding> {
144    let start = std::time::Instant::now();
145
146    let mut findings = Vec::new();
147    let source_registry = builtin_registry();
148
149    // Filter rules first
150    let active_rules: Vec<&Rule> = rules
151        .iter()
152        .filter(|r| filter_rule.map_or(true, |f| r.id == f))
153        .collect();
154
155    if active_rules.is_empty() {
156        return findings;
157    }
158
159    // Collect all source files and group by grammar
160    let files = collect_source_files(root);
161    let mut files_by_grammar: HashMap<String, Vec<PathBuf>> = HashMap::new();
162    for file in files {
163        if let Some(lang) = support_for_path(&file) {
164            let grammar_name = lang.grammar_name().to_string();
165            files_by_grammar.entry(grammar_name).or_default().push(file);
166        }
167    }
168
169    if debug.timing {
170        eprintln!("[timing] file collection: {:?}", start.elapsed());
171    }
172    let compile_start = std::time::Instant::now();
173
174    // Separate rules: language-specific vs cross-language (need per-grammar validation)
175    let (specific_rules, global_rules): (Vec<&&Rule>, Vec<&&Rule>) =
176        active_rules.iter().partition(|r| !r.languages.is_empty());
177
178    // Build combined queries: one per grammar
179    let mut combined_by_grammar: HashMap<String, CombinedQuery> = HashMap::new();
180
181    for grammar_name in files_by_grammar.keys() {
182        let Some(grammar) = loader.get(grammar_name) else {
183            continue;
184        };
185
186        let mut compiled_rules: Vec<(&Rule, tree_sitter::Query)> = Vec::new();
187
188        // Pass 1: Language-specific rules - compile directly (trust the author)
189        for rule in &specific_rules {
190            if rule.languages.iter().any(|l| l == grammar_name) {
191                if let Ok(q) = tree_sitter::Query::new(&grammar, &rule.query_str) {
192                    compiled_rules.push((rule, q));
193                }
194            }
195        }
196
197        // Pass 2: Cross-language rules - validate each one
198        for rule in &global_rules {
199            if let Ok(q) = tree_sitter::Query::new(&grammar, &rule.query_str) {
200                compiled_rules.push((rule, q));
201            }
202        }
203
204        if compiled_rules.is_empty() {
205            continue;
206        }
207
208        // Combine all into one query
209        let combined_str = compiled_rules
210            .iter()
211            .map(|(r, _)| r.query_str.as_str())
212            .collect::<Vec<_>>()
213            .join("\n\n");
214
215        let query = match tree_sitter::Query::new(&grammar, &combined_str) {
216            Ok(q) => q,
217            Err(e) => {
218                eprintln!("Warning: combined query failed for {}: {}", grammar_name, e);
219                continue;
220            }
221        };
222
223        // Map pattern indices to rules
224        let mut pattern_to_rule: Vec<(&Rule, usize)> = Vec::new();
225        let combined_match_idx = query
226            .capture_names()
227            .iter()
228            .position(|n| *n == "match")
229            .unwrap_or(0);
230
231        for (rule, individual_query) in &compiled_rules {
232            for _ in 0..individual_query.pattern_count() {
233                pattern_to_rule.push((*rule, combined_match_idx));
234            }
235        }
236
237        combined_by_grammar.insert(
238            grammar_name.clone(),
239            CombinedQuery {
240                query,
241                pattern_to_rule,
242            },
243        );
244    }
245
246    if debug.timing {
247        eprintln!(
248            "[timing] query compilation: {:?} ({} grammars)",
249            compile_start.elapsed(),
250            combined_by_grammar.len()
251        );
252    }
253    let process_start = std::time::Instant::now();
254
255    // Process files: single query execution per file
256    for (grammar_name, files) in &files_by_grammar {
257        let Some(combined) = combined_by_grammar.get(grammar_name) else {
258            continue;
259        };
260
261        let Some(grammar) = loader.get(grammar_name) else {
262            continue;
263        };
264
265        let mut parser = tree_sitter::Parser::new();
266        if parser.set_language(&grammar).is_err() {
267            continue;
268        }
269
270        for file in files {
271            let rel_path = file.strip_prefix(root).unwrap_or(file);
272            let rel_path_str = rel_path.to_string_lossy();
273
274            // Build source context for this file (used for requires evaluation)
275            let source_ctx = SourceContext {
276                file_path: file,
277                rel_path: &rel_path_str,
278                project_root: root,
279            };
280
281            let content = match std::fs::read_to_string(file) {
282                Ok(c) => c,
283                Err(_) => continue,
284            };
285
286            let tree = match parser.parse(&content, None) {
287                Some(t) => t,
288                None => continue,
289            };
290
291            // Single query execution - one traversal for all rules
292            let mut cursor = tree_sitter::QueryCursor::new();
293            let mut matches = cursor.matches(&combined.query, tree.root_node(), content.as_bytes());
294
295            while let Some(m) = matches.next() {
296                // Look up which rule this pattern belongs to
297                let Some((rule, match_idx)) = combined.pattern_to_rule.get(m.pattern_index) else {
298                    continue;
299                };
300
301                // Check allow patterns for this specific rule
302                if rule.allow.iter().any(|p| p.matches(&rel_path_str)) {
303                    continue;
304                }
305
306                // Check requires conditions
307                if !check_requires(rule, &source_registry, &source_ctx) {
308                    continue;
309                }
310
311                if !evaluate_predicates(&combined.query, m, content.as_bytes()) {
312                    continue;
313                }
314
315                let capture = m.captures.iter().find(|c| c.index as usize == *match_idx);
316
317                if let Some(cap) = capture {
318                    let node = cap.node;
319                    let start_line = node.start_position().row + 1;
320
321                    if is_allowed_by_comment(&content, start_line, &rule.id) {
322                        continue;
323                    }
324
325                    let text = node.utf8_text(content.as_bytes()).unwrap_or("");
326
327                    // Collect all captures for fix substitution
328                    let mut captures_map: HashMap<String, String> = HashMap::new();
329                    for cap in m.captures {
330                        let name = combined.query.capture_names()[cap.index as usize].to_string();
331                        if let Ok(cap_text) = cap.node.utf8_text(content.as_bytes()) {
332                            captures_map.insert(name, cap_text.to_string());
333                        }
334                    }
335
336                    findings.push(Finding {
337                        rule_id: rule.id.clone(),
338                        file: file.clone(),
339                        start_line,
340                        start_col: node.start_position().column + 1,
341                        end_line: node.end_position().row + 1,
342                        end_col: node.end_position().column + 1,
343                        start_byte: node.start_byte(),
344                        end_byte: node.end_byte(),
345                        message: rule.message.clone(),
346                        severity: rule.severity,
347                        matched_text: text.lines().next().unwrap_or("").to_string(),
348                        fix: rule.fix.clone(),
349                        captures: captures_map,
350                    });
351                }
352            }
353        }
354    }
355
356    if debug.timing {
357        eprintln!(
358            "[timing] file processing: {:?} ({} findings)",
359            process_start.elapsed(),
360            findings.len()
361        );
362        eprintln!("[timing] total: {:?}", start.elapsed());
363    }
364
365    findings
366}
367
368/// Evaluate predicates for a match.
369pub fn evaluate_predicates(
370    query: &tree_sitter::Query,
371    match_: &tree_sitter::QueryMatch,
372    source: &[u8],
373) -> bool {
374    let predicates = query.general_predicates(match_.pattern_index);
375    for predicate in predicates {
376        let name = &predicate.operator;
377        let args = &predicate.args;
378
379        match name.as_ref() {
380            "eq?" | "not-eq?" => {
381                if args.len() < 2 {
382                    continue;
383                }
384
385                // Get first capture's text
386                let first_text = match &args[0] {
387                    tree_sitter::QueryPredicateArg::Capture(idx) => match_
388                        .captures
389                        .iter()
390                        .find(|c| c.index == *idx)
391                        .and_then(|c| c.node.utf8_text(source).ok())
392                        .unwrap_or(""),
393                    tree_sitter::QueryPredicateArg::String(s) => s.as_ref(),
394                };
395
396                // Get second value (capture or string)
397                let second_text = match &args[1] {
398                    tree_sitter::QueryPredicateArg::Capture(idx) => match_
399                        .captures
400                        .iter()
401                        .find(|c| c.index == *idx)
402                        .and_then(|c| c.node.utf8_text(source).ok())
403                        .unwrap_or(""),
404                    tree_sitter::QueryPredicateArg::String(s) => s.as_ref(),
405                };
406
407                let equal = first_text == second_text;
408                if name.as_ref() == "eq?" && !equal {
409                    return false;
410                }
411                if name.as_ref() == "not-eq?" && equal {
412                    return false;
413                }
414            }
415            "match?" | "not-match?" => {
416                if args.len() < 2 {
417                    continue;
418                }
419
420                // Get capture's text
421                let capture_text = match &args[0] {
422                    tree_sitter::QueryPredicateArg::Capture(idx) => match_
423                        .captures
424                        .iter()
425                        .find(|c| c.index == *idx)
426                        .and_then(|c| c.node.utf8_text(source).ok())
427                        .unwrap_or(""),
428                    _ => continue,
429                };
430
431                // Get regex pattern
432                let pattern = match &args[1] {
433                    tree_sitter::QueryPredicateArg::String(s) => s.as_ref(),
434                    _ => continue,
435                };
436
437                // Compile and match regex
438                let regex = match regex::Regex::new(pattern) {
439                    Ok(r) => r,
440                    Err(_) => continue,
441                };
442
443                let matches = regex.is_match(capture_text);
444                if name.as_ref() == "match?" && !matches {
445                    return false;
446                }
447                if name.as_ref() == "not-match?" && matches {
448                    return false;
449                }
450            }
451            "any-of?" => {
452                if args.len() < 2 {
453                    continue;
454                }
455
456                // Get capture's text
457                let capture_text = match &args[0] {
458                    tree_sitter::QueryPredicateArg::Capture(idx) => match_
459                        .captures
460                        .iter()
461                        .find(|c| c.index == *idx)
462                        .and_then(|c| c.node.utf8_text(source).ok())
463                        .unwrap_or(""),
464                    _ => continue,
465                };
466
467                // Check if any of the remaining args match
468                let any_match = args[1..].iter().any(|arg| match arg {
469                    tree_sitter::QueryPredicateArg::String(s) => s.as_ref() == capture_text,
470                    _ => false,
471                });
472
473                if !any_match {
474                    return false;
475                }
476            }
477            _ => {
478                // Unknown predicate - ignore
479            }
480        }
481    }
482    true
483}
484
485/// Expand a fix template by substituting capture names with their values.
486/// Uses `$capture_name` syntax. `$match` is the full matched text.
487pub fn expand_fix_template(template: &str, captures: &HashMap<String, String>) -> String {
488    let mut result = template.to_string();
489    for (name, value) in captures {
490        let placeholder = format!("${}", name);
491        result = result.replace(&placeholder, value);
492    }
493    result
494}
495
496/// Apply fixes to findings, returning the number of files modified.
497/// Fixes are applied in reverse order within each file to preserve byte offsets.
498pub fn apply_fixes(findings: &[Finding]) -> std::io::Result<usize> {
499    // Group findings by file
500    let mut by_file: HashMap<&PathBuf, Vec<&Finding>> = HashMap::new();
501    for finding in findings {
502        if finding.fix.is_some() {
503            by_file.entry(&finding.file).or_default().push(finding);
504        }
505    }
506
507    let mut files_modified = 0;
508
509    for (file, mut file_findings) in by_file {
510        // Sort by start_byte descending so we can apply fixes without shifting offsets
511        file_findings.sort_by(|a, b| b.start_byte.cmp(&a.start_byte));
512
513        let mut content = std::fs::read_to_string(file)?;
514
515        for finding in file_findings {
516            let fix_template = finding.fix.as_ref().unwrap();
517            let replacement = expand_fix_template(fix_template, &finding.captures);
518
519            // Replace the matched region with the fix
520            let before = &content[..finding.start_byte];
521            let after = &content[finding.end_byte..];
522            content = format!("{}{}{}", before, replacement, after);
523        }
524
525        std::fs::write(file, &content)?;
526        files_modified += 1;
527    }
528
529    Ok(files_modified)
530}
531
532/// Collect source files from a directory.
533fn collect_source_files(root: &Path) -> Vec<PathBuf> {
534    let mut files = Vec::new();
535
536    let walker = ignore::WalkBuilder::new(root)
537        .hidden(false)
538        .git_ignore(true)
539        .build();
540
541    for entry in walker.flatten() {
542        let path = entry.path();
543        if path.is_file() && support_for_path(path).is_some() {
544            files.push(path.to_path_buf());
545        }
546    }
547
548    files
549}
550
551#[cfg(test)]
552mod tests {
553    use super::*;
554    use normalize_languages::GrammarLoader;
555    use streaming_iterator::StreamingIterator;
556
557    fn loader() -> GrammarLoader {
558        GrammarLoader::new()
559    }
560
561    /// Test that combined queries correctly scope predicates per-pattern.
562    #[test]
563    fn test_combined_query_predicate_scoping() {
564        let loader = loader();
565        let grammar = loader.get("rust").expect("rust grammar");
566
567        // Two patterns with same capture name but different predicate values
568        let combined_query = r#"
569; Pattern 0: matches unwrap
570((call_expression
571  function: (field_expression field: (field_identifier) @_method)
572  (#eq? @_method "unwrap")) @match)
573
574; Pattern 1: matches expect
575((call_expression
576  function: (field_expression field: (field_identifier) @_method)
577  (#eq? @_method "expect")) @match)
578"#;
579
580        let query = tree_sitter::Query::new(&grammar, combined_query)
581            .expect("combined query should compile");
582
583        assert_eq!(query.pattern_count(), 2, "should have 2 patterns");
584
585        let test_code = r#"
586fn main() {
587    let x = Some(5);
588    x.unwrap();      // line 4 - should match pattern 0
589    x.expect("msg"); // line 5 - should match pattern 1
590    x.map(|v| v);    // line 6 - should NOT match
591}
592"#;
593
594        let mut parser = tree_sitter::Parser::new();
595        parser.set_language(&grammar).unwrap();
596        let tree = parser.parse(test_code, None).unwrap();
597
598        let mut cursor = tree_sitter::QueryCursor::new();
599        let mut matches = cursor.matches(&query, tree.root_node(), test_code.as_bytes());
600
601        let mut results: Vec<(usize, String)> = Vec::new();
602        while let Some(m) = matches.next() {
603            // Check predicates - this is what we're testing
604            if !evaluate_predicates(&query, m, test_code.as_bytes()) {
605                continue;
606            }
607
608            let match_capture = m
609                .captures
610                .iter()
611                .find(|c| query.capture_names()[c.index as usize] == "match");
612
613            if let Some(cap) = match_capture {
614                let text = cap.node.utf8_text(test_code.as_bytes()).unwrap();
615                results.push((m.pattern_index, text.to_string()));
616            }
617        }
618
619        // Should have exactly 2 matches
620        assert_eq!(results.len(), 2, "should have 2 matches, got {:?}", results);
621
622        // Pattern 0 should match unwrap
623        assert!(
624            results
625                .iter()
626                .any(|(idx, text)| *idx == 0 && text.contains("unwrap")),
627            "pattern 0 should match unwrap, got {:?}",
628            results
629        );
630
631        // Pattern 1 should match expect
632        assert!(
633            results
634                .iter()
635                .any(|(idx, text)| *idx == 1 && text.contains("expect")),
636            "pattern 1 should match expect, got {:?}",
637            results
638        );
639    }
640
641    /// Test that multiple rules can be combined into single query.
642    #[test]
643    fn test_combined_rules_single_traversal() {
644        let loader = loader();
645        let grammar = loader.get("rust").expect("rust grammar");
646
647        // Simulate combining multiple rule queries
648        let rules_queries = vec![
649            (
650                "unwrap-rule",
651                r#"((call_expression function: (field_expression field: (field_identifier) @_m) (#eq? @_m "unwrap")) @match)"#,
652            ),
653            (
654                "dbg-rule",
655                r#"((macro_invocation macro: (identifier) @_name (#eq? @_name "dbg")) @match)"#,
656            ),
657        ];
658
659        // Combine into single query
660        let combined = rules_queries
661            .iter()
662            .map(|(_, q)| *q)
663            .collect::<Vec<_>>()
664            .join("\n\n");
665
666        let query =
667            tree_sitter::Query::new(&grammar, &combined).expect("combined query should compile");
668
669        let test_code = r#"
670fn main() {
671    let x = Some(5);
672    dbg!(x);        // should match pattern 1 (dbg-rule)
673    x.unwrap();     // should match pattern 0 (unwrap-rule)
674}
675"#;
676
677        let mut parser = tree_sitter::Parser::new();
678        parser.set_language(&grammar).unwrap();
679        let tree = parser.parse(test_code, None).unwrap();
680
681        let mut cursor = tree_sitter::QueryCursor::new();
682        let mut matches = cursor.matches(&query, tree.root_node(), test_code.as_bytes());
683
684        let mut pattern_indices: Vec<usize> = Vec::new();
685        while let Some(m) = matches.next() {
686            if evaluate_predicates(&query, m, test_code.as_bytes()) {
687                pattern_indices.push(m.pattern_index);
688            }
689        }
690
691        // Should match both patterns
692        assert!(
693            pattern_indices.contains(&0),
694            "should match pattern 0 (unwrap)"
695        );
696        assert!(pattern_indices.contains(&1), "should match pattern 1 (dbg)");
697    }
698}