normalize_syntax_rules/
runner.rs

1//! Rule execution with combined query optimization.
2
3use crate::sources::{SourceContext, SourceRegistry, builtin_registry};
4use crate::{Rule, Severity};
5use normalize_languages::{GrammarLoader, support_for_path};
6use std::collections::HashMap;
7use std::path::{Path, PathBuf};
8use streaming_iterator::StreamingIterator;
9
10/// Serializable representation of a `Finding` for caching.
11///
12/// Same fields as `Finding` but derives `Serialize`/`Deserialize` so it can be
13/// persisted to `.normalize/syntax-cache.json`.
14#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
15pub struct CachedFinding {
16    pub rule_id: String,
17    pub file: std::path::PathBuf,
18    pub start_line: usize,
19    pub start_col: usize,
20    pub end_line: usize,
21    pub end_col: usize,
22    pub start_byte: usize,
23    pub end_byte: usize,
24    pub message: String,
25    pub severity: Severity,
26    pub matched_text: String,
27    pub fix: Option<String>,
28    pub captures: HashMap<String, String>,
29}
30
31impl From<Finding> for CachedFinding {
32    fn from(f: Finding) -> Self {
33        Self {
34            rule_id: f.rule_id,
35            file: f.file,
36            start_line: f.start_line,
37            start_col: f.start_col,
38            end_line: f.end_line,
39            end_col: f.end_col,
40            start_byte: f.start_byte,
41            end_byte: f.end_byte,
42            message: f.message,
43            severity: f.severity,
44            matched_text: f.matched_text,
45            fix: f.fix,
46            captures: f.captures,
47        }
48    }
49}
50
51impl From<CachedFinding> for Finding {
52    fn from(c: CachedFinding) -> Self {
53        Self {
54            rule_id: c.rule_id,
55            file: c.file,
56            start_line: c.start_line,
57            start_col: c.start_col,
58            end_line: c.end_line,
59            end_col: c.end_col,
60            start_byte: c.start_byte,
61            end_byte: c.end_byte,
62            message: c.message,
63            severity: c.severity,
64            matched_text: c.matched_text,
65            fix: c.fix,
66            captures: c.captures,
67        }
68    }
69}
70
71/// SQLite-backed per-file findings cache for the syntax engine.
72///
73/// Stored at `<project_root>/.normalize/findings-cache.sqlite`.
74/// This is a private duplicate of the `FindingsCache` in `normalize-native-rules`
75/// to avoid a heavy cross-crate dependency for a small utility.
76///
77/// Backed by libsql. To keep the surrounding `run_rules()` API synchronous
78/// (the syntax engine is pure tree-sitter + rayon), we own a dedicated
79/// current-thread tokio runtime and drive libsql through `runtime.block_on(...)`.
80struct FindingsCache {
81    conn: libsql::Connection,
82    #[allow(dead_code)]
83    db: libsql::Database,
84    runtime: Option<tokio::runtime::Runtime>,
85}
86
87/// Drive `fut` to completion, choosing a strategy based on the *current* thread's
88/// tokio context — not the context at cache-construction time. The cached `runtime`
89/// (set when the cache was opened from a sync context) is only used as a fallback
90/// when we are not currently inside any runtime; calling `cached_rt.block_on()` from
91/// inside another runtime panics with "Cannot start a runtime from within a runtime".
92fn findings_cache_block_on<F: std::future::Future + Send>(
93    runtime: &Option<tokio::runtime::Runtime>,
94    fut: F,
95) -> F::Output
96where
97    F::Output: Send,
98{
99    if let Ok(handle) = tokio::runtime::Handle::try_current() {
100        return match handle.runtime_flavor() {
101            tokio::runtime::RuntimeFlavor::MultiThread => {
102                tokio::task::block_in_place(|| handle.block_on(fut))
103            }
104            _ => spawn_scoped_findings_runtime(fut),
105        };
106    }
107    if let Some(rt) = runtime {
108        return rt.block_on(fut);
109    }
110    spawn_scoped_findings_runtime(fut)
111}
112
113fn spawn_scoped_findings_runtime<F: std::future::Future + Send>(fut: F) -> F::Output
114where
115    F::Output: Send,
116{
117    std::thread::scope(|s| {
118        s.spawn(|| {
119            let rt = tokio::runtime::Builder::new_current_thread()
120                .enable_all()
121                .build()
122                .expect("failed to build tokio runtime worker thread");
123            rt.block_on(fut)
124        })
125        .join()
126        .expect("libsql worker thread panicked")
127    })
128}
129
130impl FindingsCache {
131    fn block_on<F: std::future::Future + Send>(&self, fut: F) -> F::Output
132    where
133        F::Output: Send,
134    {
135        findings_cache_block_on(&self.runtime, fut)
136    }
137
138    fn open(project_root: &Path) -> Self {
139        let dir = project_root.join(".normalize");
140        let _ = std::fs::create_dir_all(&dir);
141        let db_path = dir.join("findings-cache.sqlite");
142        let runtime: Option<tokio::runtime::Runtime> =
143            if tokio::runtime::Handle::try_current().is_ok() {
144                None
145            } else {
146                Some(
147                    tokio::runtime::Builder::new_current_thread()
148                        .enable_all()
149                        .build()
150                        .expect("failed to build tokio runtime for syntax findings cache"),
151                )
152            };
153        let init = async {
154            let db = match libsql::Builder::new_local(&db_path).build().await {
155                Ok(db) => db,
156                Err(_) => libsql::Builder::new_local(":memory:")
157                    .build()
158                    .await
159                    .expect("failed to open in-memory libsql database"),
160            };
161            let conn = db.connect().expect("failed to connect to libsql database");
162            let _ = conn
163                .execute_batch(
164                    "PRAGMA journal_mode=WAL;
165                     PRAGMA synchronous=NORMAL;
166                     CREATE TABLE IF NOT EXISTS findings_cache (
167                        path TEXT NOT NULL,
168                        engine TEXT NOT NULL,
169                        mtime_nanos INTEGER NOT NULL,
170                        config_hash TEXT NOT NULL,
171                        findings_json TEXT NOT NULL,
172                        PRIMARY KEY (path, engine)
173                    );",
174                )
175                .await;
176            (db, conn)
177        };
178        let (db, conn) = findings_cache_block_on(&runtime, init);
179        Self { conn, db, runtime }
180    }
181
182    fn begin(&self) {
183        let conn = &self.conn;
184        let _ = self.block_on(async { conn.execute_batch("BEGIN;").await });
185    }
186
187    fn commit(&self) {
188        let conn = &self.conn;
189        let _ = self.block_on(async { conn.execute_batch("COMMIT;").await });
190    }
191
192    fn get(&self, path: &str, mtime_nanos: u64, config_hash: &str, engine: &str) -> Option<String> {
193        let conn = &self.conn;
194        self.block_on(async {
195            let mut rows = conn
196                .query(
197                    "SELECT findings_json FROM findings_cache
198                     WHERE path = ?1 AND engine = ?2 AND mtime_nanos = ?3 AND config_hash = ?4",
199                    libsql::params![path, engine, mtime_nanos as i64, config_hash],
200                )
201                .await
202                .ok()?;
203            let row = rows.next().await.ok()??;
204            row.get::<String>(0).ok()
205        })
206    }
207
208    fn put(
209        &self,
210        path: &str,
211        mtime_nanos: u64,
212        config_hash: &str,
213        engine: &str,
214        findings_json: &str,
215    ) {
216        let conn = &self.conn;
217        let _ = self.block_on(async {
218            conn.execute(
219                "INSERT OR REPLACE INTO findings_cache (path, engine, mtime_nanos, config_hash, findings_json)
220                 VALUES (?1, ?2, ?3, ?4, ?5)",
221                libsql::params![path, engine, mtime_nanos as i64, config_hash, findings_json],
222            )
223            .await
224        });
225    }
226}
227
228/// Compute a hash of the active rule set for cache invalidation.
229fn compute_rules_hash(rules: &[&Rule]) -> String {
230    use std::collections::hash_map::DefaultHasher;
231    use std::hash::{Hash, Hasher};
232    let mut hasher = DefaultHasher::new();
233    for rule in rules {
234        rule.id.hash(&mut hasher);
235        rule.query_str.hash(&mut hasher);
236    }
237    format!("{:x}", hasher.finish())
238}
239
240/// Get the mtime of a file in nanoseconds since UNIX epoch cast to `u64`, or 0 on failure.
241///
242/// Nanosecond precision is used to avoid false cache hits when a file is
243/// modified within the same second (e.g. during multi-pass fix application in
244/// tests and CI). `u64` fits in SQLite's 64-bit signed INTEGER.
245fn file_mtime_nanos(path: &Path) -> u64 {
246    path.metadata()
247        .and_then(|m| m.modified())
248        .map(|t| {
249            t.duration_since(std::time::UNIX_EPOCH)
250                .map(|d| d.as_nanos() as u64)
251                .unwrap_or(0)
252        })
253        .unwrap_or(0)
254}
255
256/// A finding from running a rule.
257#[derive(Debug, Clone)]
258pub struct Finding {
259    /// ID of the rule that produced this finding.
260    pub rule_id: String,
261    /// Absolute path to the file where the finding was detected.
262    pub file: PathBuf,
263    /// 1-based line number of the start of the match.
264    pub start_line: usize,
265    /// 0-based column of the start of the match.
266    pub start_col: usize,
267    /// 1-based line number of the end of the match.
268    pub end_line: usize,
269    /// 0-based column of the end of the match.
270    pub end_col: usize,
271    /// Byte offset of the start of the match in the source file.
272    pub start_byte: usize,
273    /// Byte offset of the end of the match in the source file.
274    pub end_byte: usize,
275    /// Human-readable description of the finding.
276    pub message: String,
277    /// Severity level of the finding.
278    pub severity: Severity,
279    /// The source text of the matched node.
280    pub matched_text: String,
281    /// Auto-fix template (None if no fix available).
282    pub fix: Option<String>,
283    /// Capture values from the query match keyed by capture name (without `@`).
284    /// Includes all named captures from the query; `@match` is NOT included —
285    /// use `matched_text` for the full matched node text instead.
286    pub captures: HashMap<String, String>,
287}
288
289/// Debug output categories.
290#[derive(Default)]
291pub struct DebugFlags {
292    /// Whether to emit per-rule timing information to stderr.
293    pub timing: bool,
294}
295
296impl DebugFlags {
297    pub fn from_args(args: &[String]) -> Self {
298        let all = args.iter().any(|s| s == "all");
299        Self {
300            timing: all || args.iter().any(|s| s == "timing"),
301        }
302    }
303}
304
305/// Check if a line contains a `normalize-syntax-allow:` comment for the given rule.
306/// Supports: `// normalize-syntax-allow: rule-id` or `/* normalize-syntax-allow: rule-id */`
307fn line_has_allow_comment(line: &str, rule_id: &str) -> bool {
308    // Look for normalize-syntax-allow: followed by the rule ID
309    // Pattern: normalize-syntax-allow: rule-id (optionally followed by - reason)
310    if let Some(pos) = line.find("normalize-syntax-allow:") {
311        let after = &line[pos + 23..]; // len("normalize-syntax-allow:")
312        let after = after.trim_start();
313        // Check if rule_id matches (might be followed by space, dash, or end of comment)
314        if let Some(rest) = after.strip_prefix(rule_id) {
315            // Valid if followed by nothing, whitespace, dash (reason), or end of comment
316            return rest.is_empty()
317                || rest.starts_with(char::is_whitespace)
318                || rest.starts_with('-')
319                || rest.starts_with("*/");
320        }
321    }
322    false
323}
324
325/// Compute the byte ranges of test-only regions in a source file, using the
326/// per-language `*.test_regions.scm` query loaded via `GrammarLoader`.
327///
328/// Returns an empty vector for grammars without a test-regions query (which
329/// is correct: path-based excludes like `**/tests/**` or `*_test.go` remain
330/// the only test-detection mechanism for those languages).
331///
332/// The runner has no language-specific knowledge — what counts as a "test
333/// region" is entirely defined by the language's `.scm` query file.
334fn test_region_ranges(
335    grammar_name: &str,
336    tree: &tree_sitter::Tree,
337    source: &[u8],
338    loader: &GrammarLoader,
339) -> Vec<(usize, usize)> {
340    let Some(query_str) = loader.get_test_regions(grammar_name) else {
341        return Vec::new();
342    };
343    let Some(query) = loader.get_compiled_query(grammar_name, "test_regions", &query_str) else {
344        return Vec::new();
345    };
346    let Some(capture_idx) = query
347        .capture_names()
348        .iter()
349        .position(|n| *n == "test_region")
350    else {
351        return Vec::new();
352    };
353    let mut cursor = tree_sitter::QueryCursor::new();
354    let mut matches = cursor.matches(&query, tree.root_node(), source);
355    let mut ranges = Vec::new();
356    while let Some(m) = matches.next() {
357        for cap in m.captures {
358            if cap.index as usize == capture_idx {
359                ranges.push((cap.node.start_byte(), cap.node.end_byte()));
360            }
361        }
362    }
363    ranges
364}
365
366/// Check if a byte range falls inside any of the given (start, end) ranges.
367fn in_any_range(start: usize, end: usize, ranges: &[(usize, usize)]) -> bool {
368    ranges.iter().any(|&(s, e)| start >= s && end <= e)
369}
370
371/// Check if a finding should be allowed based on inline comments.
372/// Checks the line of the finding and up to 2 lines before (to handle
373/// multi-line expressions like `let x =\n    expr.unwrap()`).
374fn is_allowed_by_comment(content: &str, start_line: usize, rule_id: &str) -> bool {
375    let lines: Vec<&str> = content.lines().collect();
376    let line_idx = start_line.saturating_sub(1); // 0-indexed
377
378    for offset in 0..=2usize {
379        let Some(idx) = line_idx.checked_sub(offset) else {
380            break;
381        };
382        if let Some(line) = lines.get(idx)
383            && line_has_allow_comment(line, rule_id)
384        {
385            return true;
386        }
387    }
388
389    false
390}
391
392/// Check if a rule's requires conditions are met for a given file context.
393///
394/// Supports operators:
395/// - `value` - exact match
396/// - `>=value` - greater or equal (for versions/editions)
397/// - `<=value` - less or equal
398/// - `!value` - not equal
399fn check_requires(rule: &Rule, registry: &SourceRegistry, ctx: &SourceContext) -> bool {
400    if rule.requires.is_empty() {
401        return true;
402    }
403
404    for (key, expected) in &rule.requires {
405        let actual = match registry.get(ctx, key) {
406            Some(v) => v,
407            None => return false, // Required source not available
408        };
409
410        // Parse operator prefix
411        let matches = if let Some(rest) = expected.strip_prefix(">=") {
412            *actual >= *rest
413        } else if let Some(rest) = expected.strip_prefix("<=") {
414            *actual <= *rest
415        } else if let Some(rest) = expected.strip_prefix('!') {
416            actual != rest
417        } else {
418            actual == *expected
419        };
420
421        if !matches {
422            return false;
423        }
424    }
425
426    true
427}
428
429/// Combined query for a grammar with pattern-to-rule mapping.
430struct CombinedQuery<'a> {
431    query: tree_sitter::Query,
432    /// Maps pattern_index to (rule, match_capture_index_in_combined_query)
433    pattern_to_rule: Vec<(&'a Rule, usize)>,
434}
435
436/// Try to compile a cross-language rule for a grammar, falling back to
437/// per-pattern compilation when the full query fails.
438fn compile_cross_language_rule(
439    rule: &Rule,
440    grammar: &tree_sitter::Language,
441) -> Option<(tree_sitter::Query, String)> {
442    if let Ok(q) = tree_sitter::Query::new(grammar, &rule.query_str) {
443        return Some((q, rule.query_str.clone()));
444    }
445    // Full query failed — try each pattern separately
446    let patterns: Vec<&str> = split_query_patterns(&rule.query_str);
447    if patterns.len() <= 1 {
448        return None;
449    }
450    let valid: Vec<&str> = patterns
451        .into_iter()
452        .filter(|p| tree_sitter::Query::new(grammar, p).is_ok())
453        .collect();
454    if valid.is_empty() {
455        return None;
456    }
457    let combined = valid.join("\n");
458    tree_sitter::Query::new(grammar, &combined)
459        .ok()
460        .map(|q| (q, combined))
461}
462
463/// Compile per-grammar rules and build a combined query.
464fn build_combined_query<'a>(
465    grammar_name: &str,
466    grammar: &tree_sitter::Language,
467    specific_rules: &[&&'a Rule],
468    global_rules: &[&&'a Rule],
469) -> Option<CombinedQuery<'a>> {
470    let mut compiled_rules: Vec<(&Rule, tree_sitter::Query, String)> = Vec::new();
471
472    // Pass 1: Language-specific rules - compile directly (trust the author)
473    for rule in specific_rules {
474        if rule.languages.iter().any(|l| l == grammar_name)
475            && let Ok(q) = tree_sitter::Query::new(grammar, &rule.query_str)
476        {
477            compiled_rules.push((rule, q, rule.query_str.clone()));
478        }
479    }
480
481    // Pass 2: Cross-language rules - validate each one with pattern fallback
482    for rule in global_rules {
483        if let Some((q, qs)) = compile_cross_language_rule(rule, grammar) {
484            compiled_rules.push((rule, q, qs));
485        }
486    }
487
488    if compiled_rules.is_empty() {
489        return None;
490    }
491
492    // Combine all into one query
493    let combined_str = compiled_rules
494        .iter()
495        .map(|(_, _, qs)| qs.as_str())
496        .collect::<Vec<_>>()
497        .join("\n\n");
498
499    let query = match tree_sitter::Query::new(grammar, &combined_str) {
500        Ok(q) => q,
501        Err(e) => {
502            eprintln!("Warning: combined query failed for {}: {}", grammar_name, e);
503            return None;
504        }
505    };
506
507    // Map pattern indices to rules
508    let combined_match_idx = query
509        .capture_names()
510        .iter()
511        .position(|n| *n == "match")
512        .unwrap_or(0);
513
514    let mut pattern_to_rule: Vec<(&Rule, usize)> = Vec::new();
515    for (rule, individual_query, _) in &compiled_rules {
516        for _ in 0..individual_query.pattern_count() {
517            pattern_to_rule.push((*rule, combined_match_idx));
518        }
519    }
520
521    Some(CombinedQuery {
522        query,
523        pattern_to_rule,
524    })
525}
526
527/// Build a Finding from a matched capture node.
528fn build_finding(
529    rule: &Rule,
530    node: tree_sitter::Node,
531    content: &str,
532    query: &tree_sitter::Query,
533    m: &tree_sitter::QueryMatch,
534    file: &Path,
535) -> Finding {
536    let text = node.utf8_text(content.as_bytes()).unwrap_or("");
537
538    let mut captures_map: HashMap<String, String> = HashMap::new();
539    for cap in m.captures {
540        let name = query.capture_names()[cap.index as usize].to_string();
541        if let Ok(cap_text) = cap.node.utf8_text(content.as_bytes()) {
542            captures_map.insert(name, cap_text.to_string());
543        }
544    }
545
546    Finding {
547        rule_id: rule.id.clone(),
548        file: file.to_path_buf(),
549        start_line: node.start_position().row + 1,
550        start_col: node.start_position().column + 1,
551        end_line: node.end_position().row + 1,
552        end_col: node.end_position().column + 1,
553        start_byte: node.start_byte(),
554        end_byte: node.end_byte(),
555        message: rule.message.clone(),
556        severity: rule.severity,
557        matched_text: text.lines().next().unwrap_or("").to_string(),
558        fix: rule.fix.clone(),
559        captures: captures_map,
560    }
561}
562
563/// Resolved allow-list path for a file.
564struct AllowPath<'a> {
565    /// Full path (if root_in_project was set).
566    _full: Option<PathBuf>,
567    /// String representation for allow-list matching.
568    display: std::borrow::Cow<'a, str>,
569}
570
571/// Compute the allow-list path for a file (project-root-relative).
572fn allow_path_for_file<'a>(
573    rel_path: &Path,
574    rel_path_str: &'a str,
575    root_in_project: &Option<PathBuf>,
576) -> AllowPath<'a> {
577    if let Some(prefix) = root_in_project {
578        let buf = prefix.join(rel_path);
579        let s = buf.to_string_lossy().into_owned();
580        AllowPath {
581            _full: Some(buf),
582            display: std::borrow::Cow::Owned(s),
583        }
584    } else {
585        AllowPath {
586            _full: None,
587            display: std::borrow::Cow::Borrowed(rel_path_str),
588        }
589    }
590}
591
592/// Context needed to process matches for a single file.
593struct FileContext<'a> {
594    file: &'a Path,
595    content: &'a str,
596    source_registry: &'a SourceRegistry,
597    source_ctx: SourceContext<'a>,
598    allow_path_str: &'a str,
599    /// Byte ranges of test-only regions in this file (e.g. Rust
600    /// `#[cfg(test)] mod ...` blocks), as captured by the language's
601    /// `*.test_regions.scm` query. Findings inside these ranges are dropped
602    /// for rules where `applies_in_tests = false` (the default).
603    skip_ranges: &'a [(usize, usize)],
604}
605
606/// Process all query matches for a single file and append findings.
607fn process_file_matches(
608    ctx: &FileContext,
609    tree: &tree_sitter::Tree,
610    combined: &CombinedQuery,
611    findings: &mut Vec<Finding>,
612) {
613    let mut cursor = tree_sitter::QueryCursor::new();
614    let mut matches = cursor.matches(&combined.query, tree.root_node(), ctx.content.as_bytes());
615
616    while let Some(m) = matches.next() {
617        let Some((rule, match_idx)) = combined.pattern_to_rule.get(m.pattern_index) else {
618            continue;
619        };
620
621        if rule.allow.iter().any(|p| p.matches(ctx.allow_path_str)) {
622            continue;
623        }
624
625        if !rule.files.is_empty() {
626            let filename = ctx
627                .file
628                .file_name()
629                .map(|n| n.to_string_lossy())
630                .unwrap_or_default();
631            let matches_path = rule.files.iter().any(|p| p.matches(ctx.allow_path_str));
632            let matches_name = rule.files.iter().any(|p| p.matches(filename.as_ref()));
633            if !matches_path && !matches_name {
634                continue;
635            }
636        }
637
638        if !check_requires(rule, ctx.source_registry, &ctx.source_ctx) {
639            continue;
640        }
641
642        if !evaluate_predicates(&combined.query, m, ctx.content.as_bytes()) {
643            continue;
644        }
645
646        let Some(cap) = m.captures.iter().find(|c| c.index as usize == *match_idx) else {
647            continue;
648        };
649
650        if !rule.applies_in_tests
651            && in_any_range(cap.node.start_byte(), cap.node.end_byte(), ctx.skip_ranges)
652        {
653            continue;
654        }
655
656        let start_line = cap.node.start_position().row + 1;
657        if is_allowed_by_comment(ctx.content, start_line, &rule.id) {
658            continue;
659        }
660
661        findings.push(build_finding(
662            rule,
663            cap.node,
664            ctx.content,
665            &combined.query,
666            m,
667            ctx.file,
668        ));
669    }
670}
671
672/// Run rules against files in a directory.
673/// Optimized: combines all rules into single query per grammar for single-traversal matching.
674#[allow(clippy::too_many_arguments)]
675pub fn run_rules(
676    rules: &[Rule],
677    root: &Path,
678    project_root: &Path,
679    loader: &GrammarLoader,
680    filter_rule: Option<&str>,
681    filter_tag: Option<&str>,
682    filter_ids: Option<&std::collections::HashSet<String>>,
683    debug: &DebugFlags,
684    files: Option<&[PathBuf]>,
685    path_filter: &normalize_rules_config::PathFilter,
686    walk_config: &normalize_rules_config::WalkConfig,
687) -> Vec<Finding> {
688    let start = std::time::Instant::now();
689    let raw_abs_root = root.canonicalize().unwrap_or_else(|_| root.to_path_buf());
690    let abs_root = if raw_abs_root.is_file() {
691        raw_abs_root
692            .parent()
693            .map(|p| p.to_path_buf())
694            .unwrap_or(raw_abs_root)
695    } else {
696        raw_abs_root
697    };
698    let abs_project_root = project_root
699        .canonicalize()
700        .unwrap_or_else(|_| project_root.to_path_buf());
701    let root_in_project = abs_root
702        .strip_prefix(&abs_project_root)
703        .ok()
704        .map(|p| p.to_path_buf());
705
706    let mut findings = Vec::new();
707    let source_registry = builtin_registry();
708
709    let explicitly_requested = |r: &&Rule| {
710        filter_rule.is_some_and(|f| r.id == f) || filter_ids.is_some_and(|ids| ids.contains(&r.id))
711    };
712    let active_rules: Vec<&Rule> = rules
713        .iter()
714        .filter(|r| r.enabled || explicitly_requested(r))
715        .filter(|r| filter_rule.is_none_or(|f| r.id == f))
716        .filter(|r| filter_tag.is_none_or(|t| r.tags.iter().any(|tag| tag == t)))
717        .filter(|r| filter_ids.is_none_or(|ids| ids.contains(&r.id)))
718        .collect();
719
720    if active_rules.is_empty() {
721        return findings;
722    }
723
724    // Open the SQLite findings cache.
725    let cache = FindingsCache::open(&abs_project_root);
726    let rules_hash = compute_rules_hash(&active_rules);
727    const ENGINE: &str = "syntax";
728
729    let files = if let Some(explicit) = files {
730        // Use the provided file list, filtering to supported languages.
731        explicit
732            .iter()
733            .filter(|f| support_for_path(f).is_some())
734            .cloned()
735            .collect()
736    } else {
737        collect_source_files(root, path_filter, walk_config)
738    };
739    let mut files_by_grammar: HashMap<String, Vec<PathBuf>> = HashMap::new();
740    for file in files {
741        if let Some(lang) = support_for_path(&file) {
742            let grammar_name = lang.grammar_name().to_string();
743            files_by_grammar.entry(grammar_name).or_default().push(file);
744        }
745    }
746
747    if debug.timing {
748        eprintln!("[timing] file collection: {:?}", start.elapsed());
749    }
750    let compile_start = std::time::Instant::now();
751
752    let (specific_rules, global_rules): (Vec<&&Rule>, Vec<&&Rule>) =
753        active_rules.iter().partition(|r| !r.languages.is_empty());
754
755    let mut combined_by_grammar: HashMap<String, CombinedQuery> = HashMap::new();
756    for grammar_name in files_by_grammar.keys() {
757        let grammar = match loader.get(grammar_name) {
758            Ok(g) => g,
759            Err(e) => {
760                let n = files_by_grammar[grammar_name].len();
761                eprintln!(
762                    "warning: no grammar for {grammar_name} — {n} file(s) skipped by syntax rules ({e}). Run `normalize grammars install` to fix."
763                );
764                continue;
765            }
766        };
767        if let Some(cq) =
768            build_combined_query(grammar_name, &grammar, &specific_rules, &global_rules)
769        {
770            combined_by_grammar.insert(grammar_name.clone(), cq);
771        }
772    }
773
774    if debug.timing {
775        eprintln!(
776            "[timing] query compilation: {:?} ({} grammars)",
777            compile_start.elapsed(),
778            combined_by_grammar.len()
779        );
780    }
781    let process_start = std::time::Instant::now();
782
783    cache.begin();
784    for (grammar_name, files) in &files_by_grammar {
785        let Some(combined) = combined_by_grammar.get(grammar_name) else {
786            continue;
787        };
788        let Some(grammar) = loader.get(grammar_name).ok() else {
789            continue;
790        };
791        let mut parser = tree_sitter::Parser::new();
792        if parser.set_language(&grammar).is_err() {
793            continue;
794        }
795
796        for file in files {
797            let file_key = file.to_string_lossy().into_owned();
798            let mtime_nanos = file_mtime_nanos(file);
799
800            // Cache hit: file is unchanged since the last run.
801            if mtime_nanos > 0
802                && let Some(json) = cache.get(&file_key, mtime_nanos, &rules_hash, ENGINE)
803            {
804                let cached: Vec<CachedFinding> = serde_json::from_str(&json).unwrap_or_default();
805                findings.extend(cached.into_iter().map(Finding::from));
806                continue;
807            }
808
809            let rel_path = file.strip_prefix(root).unwrap_or(file);
810            let rel_path_str = rel_path.to_string_lossy();
811
812            let allow_path = allow_path_for_file(rel_path, &rel_path_str, &root_in_project);
813
814            let Ok(content) = std::fs::read_to_string(file) else {
815                continue;
816            };
817            let Some(tree) = parser.parse(&content, None) else {
818                continue;
819            };
820
821            let skip_ranges = test_region_ranges(grammar_name, &tree, content.as_bytes(), loader);
822
823            let file_ctx = FileContext {
824                file,
825                content: &content,
826                source_registry: &source_registry,
827                source_ctx: SourceContext {
828                    file_path: file,
829                    rel_path: &rel_path_str,
830                    project_root: &abs_project_root,
831                },
832                allow_path_str: &allow_path.display,
833                skip_ranges: &skip_ranges,
834            };
835
836            let mut file_findings: Vec<Finding> = Vec::new();
837            process_file_matches(&file_ctx, &tree, combined, &mut file_findings);
838
839            // Update cache entry for this file.
840            if mtime_nanos > 0 {
841                let cached: Vec<CachedFinding> = file_findings
842                    .iter()
843                    .cloned()
844                    .map(CachedFinding::from)
845                    .collect();
846                if let Ok(json) = serde_json::to_string(&cached) {
847                    cache.put(&file_key, mtime_nanos, &rules_hash, ENGINE, &json);
848                }
849            }
850
851            findings.extend(file_findings);
852        }
853    }
854
855    cache.commit();
856
857    if debug.timing {
858        eprintln!(
859            "[timing] file processing: {:?} ({} findings)",
860            process_start.elapsed(),
861            findings.len()
862        );
863        eprintln!("[timing] total: {:?}", start.elapsed());
864    }
865
866    findings
867}
868
869/// Resolve a predicate argument to its text value.
870fn resolve_arg_text<'a>(
871    arg: &'a tree_sitter::QueryPredicateArg,
872    match_: &tree_sitter::QueryMatch,
873    source: &'a [u8],
874) -> Option<&'a str> {
875    match arg {
876        tree_sitter::QueryPredicateArg::Capture(idx) => Some(
877            match_
878                .captures
879                .iter()
880                .find(|c| c.index == *idx)
881                .and_then(|c| c.node.utf8_text(source).ok())
882                .unwrap_or(""),
883        ),
884        tree_sitter::QueryPredicateArg::String(s) => Some(s.as_ref()),
885    }
886}
887
888/// Resolve the first argument as a capture's text (not a string literal).
889fn resolve_capture_text<'a>(
890    arg: &'a tree_sitter::QueryPredicateArg,
891    match_: &tree_sitter::QueryMatch,
892    source: &'a [u8],
893) -> Option<&'a str> {
894    match arg {
895        tree_sitter::QueryPredicateArg::Capture(idx) => Some(
896            match_
897                .captures
898                .iter()
899                .find(|c| c.index == *idx)
900                .and_then(|c| c.node.utf8_text(source).ok())
901                .unwrap_or(""),
902        ),
903        _ => None,
904    }
905}
906
907/// Evaluate an eq?/not-eq? predicate. Returns None to skip, Some(false) to reject.
908fn eval_eq(
909    args: &[tree_sitter::QueryPredicateArg],
910    match_: &tree_sitter::QueryMatch,
911    source: &[u8],
912    negated: bool,
913) -> Option<bool> {
914    if args.len() < 2 {
915        return None;
916    }
917    let first = resolve_arg_text(&args[0], match_, source)?;
918    let second = resolve_arg_text(&args[1], match_, source)?;
919    let equal = first == second;
920    Some(if negated { !equal } else { equal })
921}
922
923/// Evaluate a match?/not-match? predicate. Returns None to skip, Some(false) to reject.
924fn eval_match(
925    args: &[tree_sitter::QueryPredicateArg],
926    match_: &tree_sitter::QueryMatch,
927    source: &[u8],
928    negated: bool,
929) -> Option<bool> {
930    if args.len() < 2 {
931        return None;
932    }
933    let capture_text = resolve_capture_text(&args[0], match_, source)?;
934    let pattern = match &args[1] {
935        tree_sitter::QueryPredicateArg::String(s) => s.as_ref(),
936        _ => return None,
937    };
938    let regex = regex::Regex::new(pattern).ok()?;
939    let matched = regex.is_match(capture_text);
940    Some(if negated { !matched } else { matched })
941}
942
943/// Evaluate an any-of? predicate. Returns None to skip, Some(false) to reject.
944fn eval_any_of(
945    args: &[tree_sitter::QueryPredicateArg],
946    match_: &tree_sitter::QueryMatch,
947    source: &[u8],
948) -> Option<bool> {
949    if args.len() < 2 {
950        return None;
951    }
952    let capture_text = resolve_capture_text(&args[0], match_, source)?;
953    let any_match = args[1..].iter().any(|arg| match arg {
954        tree_sitter::QueryPredicateArg::String(s) => s.as_ref() == capture_text,
955        _ => false,
956    });
957    Some(any_match)
958}
959
960/// Evaluate predicates for a match.
961pub fn evaluate_predicates(
962    query: &tree_sitter::Query,
963    match_: &tree_sitter::QueryMatch,
964    source: &[u8],
965) -> bool {
966    let predicates = query.general_predicates(match_.pattern_index);
967    for predicate in predicates {
968        let name = predicate.operator.as_ref();
969        let args = &predicate.args;
970
971        let result = match name {
972            "eq?" => eval_eq(args, match_, source, false),
973            "not-eq?" => eval_eq(args, match_, source, true),
974            "match?" => eval_match(args, match_, source, false),
975            "not-match?" => eval_match(args, match_, source, true),
976            "any-of?" => eval_any_of(args, match_, source),
977            _ => None,
978        };
979
980        // None means skip (bad args), Some(false) means predicate failed
981        if result == Some(false) {
982            return false;
983        }
984    }
985    true
986}
987
988#[cfg(feature = "fix")]
989/// Expand a fix template by substituting capture names with their values.
990/// Uses `$capture_name` syntax. `$match` is the full matched text.
991pub fn expand_fix_template(template: &str, captures: &HashMap<String, String>) -> String {
992    let mut result = template.to_string();
993    for (name, value) in captures {
994        let placeholder = format!("${}", name);
995        result = result.replace(&placeholder, value);
996    }
997    result
998}
999
1000#[cfg(feature = "fix")]
1001/// Apply one pass of fixes to findings, returning the number of files modified.
1002///
1003/// Fixes are applied in descending byte-offset order within each file so that
1004/// earlier offsets remain valid as later regions are replaced.
1005///
1006/// When findings overlap (e.g. a nested triple `if let` produces both an inner
1007/// and an outer violation), the innermost finding (highest `start_byte`) is
1008/// applied first and the outer one is skipped for this pass.  The caller
1009/// should re-run the rules and call `apply_fixes` again until no files are
1010/// modified; each pass peels one layer of nesting.
1011pub fn apply_fixes(findings: &[Finding]) -> std::io::Result<usize> {
1012    // Group findings by file
1013    let mut by_file: HashMap<&PathBuf, Vec<&Finding>> = HashMap::new();
1014    for finding in findings {
1015        if finding.fix.is_some() {
1016            by_file.entry(&finding.file).or_default().push(finding);
1017        }
1018    }
1019
1020    let mut files_modified = 0;
1021
1022    for (file, mut file_findings) in by_file {
1023        // Descending start_byte: innermost (highest offset) findings are
1024        // processed first, so their replacements don't shift the offsets of
1025        // earlier findings in the same file.
1026        file_findings.sort_by(|a, b| b.start_byte.cmp(&a.start_byte));
1027
1028        let mut content = std::fs::read_to_string(file)?;
1029        // Track byte ranges that have already been replaced in this pass.
1030        // Any finding whose range overlaps an applied range is skipped — it
1031        // is an outer wrapper of an already-fixed inner finding, and its
1032        // captures are stale.  The next pass will pick it up with fresh
1033        // byte offsets.
1034        let mut applied: Vec<(usize, usize)> = Vec::new();
1035        let mut file_changed = false;
1036
1037        for finding in file_findings {
1038            let overlaps = applied
1039                .iter()
1040                .any(|&(s, e)| finding.start_byte < e && finding.end_byte > s);
1041            if overlaps {
1042                continue;
1043            }
1044
1045            // fix.is_some() is guaranteed: by_file only includes findings where fix.is_some()
1046            let Some(fix_template) = finding.fix.as_ref() else {
1047                continue;
1048            };
1049            let replacement = expand_fix_template(fix_template, &finding.captures);
1050
1051            let before = &content[..finding.start_byte];
1052            let after = &content[finding.end_byte..];
1053            content = format!("{}{}{}", before, replacement, after);
1054
1055            applied.push((finding.start_byte, finding.end_byte));
1056            file_changed = true;
1057        }
1058
1059        if file_changed {
1060            std::fs::write(file, &content)?;
1061            files_modified += 1;
1062        }
1063    }
1064
1065    Ok(files_modified)
1066}
1067
1068/// Collect source files from a directory, optionally filtered by [`PathFilter`].
1069fn collect_source_files(
1070    root: &Path,
1071    filter: &normalize_rules_config::PathFilter,
1072    walk_config: &normalize_rules_config::WalkConfig,
1073) -> Vec<PathBuf> {
1074    let mut files = Vec::new();
1075
1076    let ignore_files = walk_config.ignore_files();
1077    let has_gitignore = ignore_files.contains(&".gitignore");
1078    let mut builder = ignore::WalkBuilder::new(root);
1079    builder
1080        .hidden(false)
1081        .git_ignore(has_gitignore)
1082        .git_global(has_gitignore)
1083        .git_exclude(has_gitignore);
1084    // Add any non-.gitignore ignore files
1085    for file in &ignore_files {
1086        if *file != ".gitignore" {
1087            let ignore_path = root.join(file);
1088            if ignore_path.exists() {
1089                builder.add_ignore(ignore_path);
1090            }
1091        }
1092    }
1093    // Compile gitignore-style exclude patterns once, anchored at root. Mirrors
1094    // normalize-native-rules::walk::gitignore_walk so `[walk] exclude` patterns
1095    // with slashes (e.g. `.claude/worktrees/`) match correctly.
1096    let excludes = walk_config.compiled_excludes(root);
1097    let root_owned = root.to_path_buf();
1098    builder.filter_entry(move |e| {
1099        let path = e.path();
1100        let rel = path.strip_prefix(&root_owned).unwrap_or(path);
1101        if rel.as_os_str().is_empty() {
1102            return true;
1103        }
1104        let is_dir = e.file_type().is_some_and(|ft| ft.is_dir());
1105        !excludes
1106            .matched_path_or_any_parents(rel, is_dir)
1107            .is_ignore()
1108    });
1109    let walker = builder.build();
1110
1111    for entry in walker.flatten() {
1112        let path = entry.path();
1113        if path.is_file() && support_for_path(path).is_some() {
1114            if !filter.is_empty() {
1115                let rel = path.strip_prefix(root).unwrap_or(path);
1116                if !filter.matches_path(rel) {
1117                    continue;
1118                }
1119            }
1120            files.push(path.to_path_buf());
1121        }
1122    }
1123
1124    files
1125}
1126
1127/// Split a tree-sitter query string into individual top-level patterns.
1128/// Each pattern starts with `(` at the beginning of a line (possibly after
1129/// whitespace/comments) and ends when the matching `)` is found.
1130fn split_query_patterns(query_str: &str) -> Vec<&str> {
1131    let mut patterns = Vec::new();
1132    let mut depth = 0i32;
1133    let mut pattern_start: Option<usize> = None;
1134    let bytes = query_str.as_bytes();
1135    let mut i = 0;
1136
1137    while i < bytes.len() {
1138        let b = bytes[i];
1139        match b {
1140            b';' => {
1141                // Skip to end of line (comment)
1142                while i < bytes.len() && bytes[i] != b'\n' {
1143                    i += 1;
1144                }
1145            }
1146            b'(' => {
1147                if pattern_start.is_none() {
1148                    pattern_start = Some(i);
1149                }
1150                depth += 1;
1151                i += 1;
1152            }
1153            b')' => {
1154                depth -= 1;
1155                i += 1;
1156                if depth == 0
1157                    && let Some(start) = pattern_start
1158                {
1159                    patterns.push(&query_str[start..i]);
1160                    pattern_start = None;
1161                }
1162            }
1163            b'"' => {
1164                // Skip string literal
1165                i += 1;
1166                while i < bytes.len() && bytes[i] != b'"' {
1167                    if bytes[i] == b'\\' {
1168                        i += 1; // skip escaped char
1169                    }
1170                    i += 1;
1171                }
1172                i += 1; // skip closing quote
1173            }
1174            _ => {
1175                i += 1;
1176            }
1177        }
1178    }
1179    patterns
1180}
1181
1182#[cfg(test)]
1183mod tests {
1184    use super::*;
1185    use normalize_languages::GrammarLoader;
1186    use normalize_languages::parsers::grammar_loader;
1187    use std::sync::Arc;
1188    use streaming_iterator::StreamingIterator;
1189
1190    fn loader() -> Arc<GrammarLoader> {
1191        grammar_loader()
1192    }
1193
1194    /// Test that combined queries correctly scope predicates per-pattern.
1195    #[test]
1196    fn test_combined_query_predicate_scoping() {
1197        let loader = loader();
1198        let grammar = loader.get("rust").expect("rust grammar");
1199
1200        // Two patterns with same capture name but different predicate values
1201        let combined_query = r#"
1202; Pattern 0: matches unwrap
1203((call_expression
1204  function: (field_expression field: (field_identifier) @_method)
1205  (#eq? @_method "unwrap")) @match)
1206
1207; Pattern 1: matches expect
1208((call_expression
1209  function: (field_expression field: (field_identifier) @_method)
1210  (#eq? @_method "expect")) @match)
1211"#;
1212
1213        let query = tree_sitter::Query::new(&grammar, combined_query)
1214            .expect("combined query should compile");
1215
1216        assert_eq!(query.pattern_count(), 2, "should have 2 patterns");
1217
1218        let test_code = r#"
1219fn main() {
1220    let x = Some(5);
1221    x.unwrap();      // line 4 - should match pattern 0
1222    x.expect("msg"); // line 5 - should match pattern 1
1223    x.map(|v| v);    // line 6 - should NOT match
1224}
1225"#;
1226
1227        let mut parser = tree_sitter::Parser::new();
1228        // normalize-syntax-allow: rust/unwrap-in-impl - test code, panic is appropriate
1229        parser.set_language(&grammar).unwrap();
1230        // normalize-syntax-allow: rust/unwrap-in-impl - test code, panic is appropriate
1231        let tree = parser.parse(test_code, None).unwrap();
1232
1233        let mut cursor = tree_sitter::QueryCursor::new();
1234        let mut matches = cursor.matches(&query, tree.root_node(), test_code.as_bytes());
1235
1236        let mut results: Vec<(usize, String)> = Vec::new();
1237        while let Some(m) = matches.next() {
1238            // Check predicates - this is what we're testing
1239            if !evaluate_predicates(&query, m, test_code.as_bytes()) {
1240                continue;
1241            }
1242
1243            let match_capture = m
1244                .captures
1245                .iter()
1246                .find(|c| query.capture_names()[c.index as usize] == "match");
1247
1248            if let Some(cap) = match_capture {
1249                // normalize-syntax-allow: rust/unwrap-in-impl - test code, panic is appropriate
1250                let text = cap.node.utf8_text(test_code.as_bytes()).unwrap();
1251                results.push((m.pattern_index, text.to_string()));
1252            }
1253        }
1254
1255        // Should have exactly 2 matches
1256        assert_eq!(results.len(), 2, "should have 2 matches, got {:?}", results);
1257
1258        // Pattern 0 should match unwrap
1259        assert!(
1260            results
1261                .iter()
1262                .any(|(idx, text)| *idx == 0 && text.contains("unwrap")),
1263            "pattern 0 should match unwrap, got {:?}",
1264            results
1265        );
1266
1267        // Pattern 1 should match expect
1268        assert!(
1269            results
1270                .iter()
1271                .any(|(idx, text)| *idx == 1 && text.contains("expect")),
1272            "pattern 1 should match expect, got {:?}",
1273            results
1274        );
1275    }
1276
1277    /// Test that multiple rules can be combined into single query.
1278    #[test]
1279    fn test_combined_rules_single_traversal() {
1280        let loader = loader();
1281        let grammar = loader.get("rust").expect("rust grammar");
1282
1283        // Simulate combining multiple rule queries
1284        let rules_queries = [
1285            (
1286                "unwrap-rule",
1287                r#"((call_expression function: (field_expression field: (field_identifier) @_m) (#eq? @_m "unwrap")) @match)"#,
1288            ),
1289            (
1290                "dbg-rule",
1291                r#"((macro_invocation macro: (identifier) @_name (#eq? @_name "dbg")) @match)"#,
1292            ),
1293        ];
1294
1295        // Combine into single query
1296        let combined = rules_queries
1297            .iter()
1298            .map(|(_, q)| *q)
1299            .collect::<Vec<_>>()
1300            .join("\n\n");
1301
1302        let query =
1303            tree_sitter::Query::new(&grammar, &combined).expect("combined query should compile");
1304
1305        let test_code = r#"
1306fn main() {
1307    let x = Some(5);
1308    dbg!(x);        // should match pattern 1 (dbg-rule)
1309    x.unwrap();     // should match pattern 0 (unwrap-rule)
1310}
1311"#;
1312
1313        let mut parser = tree_sitter::Parser::new();
1314        // normalize-syntax-allow: rust/unwrap-in-impl - test code, panic is appropriate
1315        parser.set_language(&grammar).unwrap();
1316        // normalize-syntax-allow: rust/unwrap-in-impl - test code, panic is appropriate
1317        let tree = parser.parse(test_code, None).unwrap();
1318
1319        let mut cursor = tree_sitter::QueryCursor::new();
1320        let mut matches = cursor.matches(&query, tree.root_node(), test_code.as_bytes());
1321
1322        let mut pattern_indices: Vec<usize> = Vec::new();
1323        while let Some(m) = matches.next() {
1324            if evaluate_predicates(&query, m, test_code.as_bytes()) {
1325                pattern_indices.push(m.pattern_index);
1326            }
1327        }
1328
1329        // Should match both patterns
1330        assert!(
1331            pattern_indices.contains(&0),
1332            "should match pattern 0 (unwrap)"
1333        );
1334        assert!(pattern_indices.contains(&1), "should match pattern 1 (dbg)");
1335    }
1336
1337    #[test]
1338    fn test_test_region_ranges_skips_inline_cfg_test_module() {
1339        let loader = loader();
1340        let grammar = loader.get("rust").expect("rust grammar");
1341        let source = r#"
1342fn outer() {
1343    let x: Option<i32> = None;
1344    x.unwrap();
1345}
1346
1347#[cfg(test)]
1348mod tests {
1349    fn inner() {
1350        let y: Option<i32> = None;
1351        y.unwrap();
1352    }
1353}
1354
1355#[cfg(test)]
1356#[allow(dead_code)]
1357mod more_tests {
1358    fn inner2() {
1359        None::<i32>.unwrap();
1360    }
1361}
1362
1363mod regular_mod {
1364    fn other() {
1365        None::<i32>.unwrap();
1366    }
1367}
1368"#;
1369        let mut parser = tree_sitter::Parser::new();
1370        assert!(parser.set_language(&grammar).is_ok());
1371        let tree = parser.parse(source, None).expect("parse");
1372        let ranges = test_region_ranges("rust", &tree, source.as_bytes(), &loader);
1373        assert_eq!(
1374            ranges.len(),
1375            2,
1376            "expected two cfg(test) modules, got {ranges:?}"
1377        );
1378
1379        // Find each `unwrap()` call and check classification.
1380        let unwrap_query = tree_sitter::Query::new(
1381            &grammar,
1382            r#"((call_expression function: (field_expression field: (field_identifier) @m)) @call (#eq? @m "unwrap"))"#,
1383        )
1384        .expect("compile");
1385        let call_idx = unwrap_query
1386            .capture_names()
1387            .iter()
1388            .position(|n| *n == "call")
1389            .unwrap_or(0);
1390        let mut cursor = tree_sitter::QueryCursor::new();
1391        let mut matches = cursor.matches(&unwrap_query, tree.root_node(), source.as_bytes());
1392        let mut classifications: Vec<(usize, bool)> = Vec::new();
1393        while let Some(m) = matches.next() {
1394            for cap in m.captures {
1395                if cap.index as usize == call_idx {
1396                    let line = cap.node.start_position().row + 1;
1397                    let in_test = in_any_range(cap.node.start_byte(), cap.node.end_byte(), &ranges);
1398                    classifications.push((line, in_test));
1399                }
1400            }
1401        }
1402        // Lines: 4 (outer, NOT in test), 11 (in test), 19 (in test), 26 (NOT in test)
1403        let outside: Vec<usize> = classifications
1404            .iter()
1405            .filter_map(|(l, t)| if !*t { Some(*l) } else { None })
1406            .collect();
1407        let inside: Vec<usize> = classifications
1408            .iter()
1409            .filter_map(|(l, t)| if *t { Some(*l) } else { None })
1410            .collect();
1411        assert_eq!(
1412            outside.len(),
1413            2,
1414            "expected 2 unwraps outside cfg(test), got {classifications:?}"
1415        );
1416        assert_eq!(
1417            inside.len(),
1418            2,
1419            "expected 2 unwraps inside cfg(test), got {classifications:?}"
1420        );
1421    }
1422
1423    /// `applies_in_tests = false` (default) drops findings inside test regions;
1424    /// `applies_in_tests = true` keeps them. Same file content, two rules with
1425    /// the same query but different opt-in.
1426    #[test]
1427    fn test_applies_in_tests_per_rule_opt_in() {
1428        let loader = loader();
1429        let tmp = tempfile::tempdir().expect("tempdir");
1430        let file_path = tmp.path().join("lib.rs");
1431        std::fs::write(
1432            &file_path,
1433            r#"fn outer() {
1434    let x: Option<i32> = None;
1435    x.unwrap();
1436}
1437
1438#[cfg(test)]
1439mod tests {
1440    fn inner() {
1441        let y: Option<i32> = None;
1442        y.unwrap();
1443    }
1444}
1445"#,
1446        )
1447        .expect("write fixture");
1448
1449        let query_str = r#"((call_expression
1450  function: (field_expression field: (field_identifier) @_m)
1451  (#eq? @_m "unwrap")) @match)"#;
1452
1453        let make_rule = |id: &str, applies_in_tests: bool| {
1454            let frontmatter = format!(
1455                "# ---\n# id = \"{id}\"\n# severity = \"warning\"\n# message = \"unwrap\"\n# languages = [\"rust\"]\n# applies_in_tests = {applies_in_tests}\n# ---\n\n{query_str}\n"
1456            );
1457            crate::parse_rule_content(&frontmatter, id, false).expect("parse rule")
1458        };
1459
1460        let path_filter = normalize_rules_config::PathFilter::default();
1461        let walk_config = normalize_rules_config::WalkConfig::default();
1462        let debug = DebugFlags::default();
1463
1464        // Rule with applies_in_tests = false (default): only the outer unwrap.
1465        let rules_default = vec![make_rule("test/unwrap-default", false)];
1466        let findings = run_rules(
1467            &rules_default,
1468            tmp.path(),
1469            tmp.path(),
1470            &loader,
1471            None,
1472            None,
1473            None,
1474            &debug,
1475            None,
1476            &path_filter,
1477            &walk_config,
1478        );
1479        assert_eq!(
1480            findings.len(),
1481            1,
1482            "applies_in_tests=false should drop the cfg(test) finding, got {findings:?}"
1483        );
1484        assert_eq!(findings[0].start_line, 3, "outer unwrap is on line 3");
1485
1486        // Rule with applies_in_tests = true: both unwraps.
1487        let rules_optin = vec![make_rule("test/unwrap-in-tests", true)];
1488        let findings = run_rules(
1489            &rules_optin,
1490            tmp.path(),
1491            tmp.path(),
1492            &loader,
1493            None,
1494            None,
1495            None,
1496            &debug,
1497            None,
1498            &path_filter,
1499            &walk_config,
1500        );
1501        assert_eq!(
1502            findings.len(),
1503            2,
1504            "applies_in_tests=true should keep both unwraps, got {findings:?}"
1505        );
1506    }
1507
1508    #[test]
1509    fn test_split_query_patterns() {
1510        let query = r#"
1511; Pattern 1: comment
1512((comment) @match (#match? @match "TODO"))
1513; Pattern 2: line_comment
1514((line_comment) @match (#match? @match "TODO"))
1515"#;
1516        let patterns = split_query_patterns(query);
1517        assert_eq!(patterns.len(), 2);
1518        assert!(patterns[0].contains("comment"));
1519        assert!(patterns[1].contains("line_comment"));
1520    }
1521
1522    #[test]
1523    fn test_cross_grammar_pattern_fallback() {
1524        // Rust grammar doesn't have `comment` node type but has `line_comment`.
1525        // A multi-pattern query with both should compile with only valid patterns.
1526        let loader = loader();
1527        let grammar = loader.get("rust").expect("rust grammar");
1528
1529        let query_str = r#"((comment) @match (#match? @match "TODO"))
1530((line_comment) @match (#match? @match "TODO"))"#;
1531
1532        // Full query should fail (Rust has no `comment` node type)
1533        assert!(tree_sitter::Query::new(&grammar, query_str).is_err());
1534
1535        // But splitting and filtering should succeed
1536        let patterns = split_query_patterns(query_str);
1537        let valid: Vec<&str> = patterns
1538            .into_iter()
1539            .filter(|p| tree_sitter::Query::new(&grammar, p).is_ok())
1540            .collect();
1541        assert_eq!(valid.len(), 1, "only line_comment should compile for Rust");
1542        assert!(valid[0].contains("line_comment"));
1543    }
1544}
1545
1546#[cfg(test)]
1547mod glob_tests {
1548    use glob::Pattern;
1549    // normalize-syntax-allow: rust/unwrap-in-impl - test code, panic is appropriate
1550    #[test]
1551    fn test_glob_allow_patterns() {
1552        let cases = [
1553            (
1554                "crates/normalize/src/rg/**",
1555                "crates/normalize/src/rg/flags/defs.rs",
1556                true,
1557            ),
1558            (
1559                "crates/normalize/src/rg/**",
1560                "crates/normalize/src/rg/mod.rs",
1561                true,
1562            ),
1563            ("**/tests/**", "crates/normalize/tests/foo.rs", true),
1564            (
1565                "**/tests/fixtures/**",
1566                "crates/normalize-syntax-rules/tests/fixtures/rust/foo.rs",
1567                true,
1568            ),
1569            (
1570                "crates/normalize-facts-rules-interpret/src/tests.rs",
1571                "crates/normalize-facts-rules-interpret/src/tests.rs",
1572                true,
1573            ),
1574            (
1575                "crates/normalize-manifest/src/*.rs",
1576                "crates/normalize-manifest/src/nuget.rs",
1577                true,
1578            ),
1579        ];
1580        for (p, path, expected) in cases {
1581            // normalize-syntax-allow: rust/unwrap-in-impl - test code, literal constant patterns
1582            let pat = Pattern::new(p).unwrap();
1583            assert_eq!(pat.matches(path), expected, "Pattern: {p}, Path: {path}");
1584        }
1585    }
1586}
normalize_syntax_rules/runner.rs

normalize_syntax_rules/
runner.rs