Skip to main content

code_baseline/
scan.rs

1use crate::cli::toml_config::{TomlConfig, TomlRule};
2use crate::git_diff;
3use crate::presets::{self, PresetError};
4use crate::rules::factory::{self, FactoryError};
5use crate::rules::file_presence::FilePresenceRule;
6use crate::rules::{Rule, ScanContext, Violation};
7use globset::{Glob, GlobSet, GlobSetBuilder};
8use ignore::WalkBuilder;
9use rayon::prelude::*;
10use serde::Serialize;
11use std::collections::HashMap;
12use std::fmt;
13use std::fs;
14use std::path::{Path, PathBuf};
15use std::sync::atomic::{AtomicUsize, Ordering};
16use std::sync::Mutex;
17
18/// A plugin config file containing additional rules.
19#[derive(Debug, serde::Deserialize)]
20struct PluginConfig {
21    #[serde(default)]
22    rule: Vec<crate::cli::toml_config::TomlRule>,
23}
24
25#[derive(Debug)]
26pub enum ScanError {
27    ConfigRead(std::io::Error),
28    ConfigParse(toml::de::Error),
29    GlobParse(globset::Error),
30    RuleFactory(FactoryError),
31    Preset(PresetError),
32    GitDiff(String),
33}
34
35impl fmt::Display for ScanError {
36    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
37        match self {
38            ScanError::ConfigRead(e) => write!(f, "failed to read config: {}", e),
39            ScanError::ConfigParse(e) => write!(f, "failed to parse config: {}", e),
40            ScanError::GlobParse(e) => write!(f, "invalid glob pattern: {}", e),
41            ScanError::RuleFactory(e) => write!(f, "failed to build rule: {}", e),
42            ScanError::Preset(e) => write!(f, "preset error: {}", e),
43            ScanError::GitDiff(e) => write!(f, "git diff failed: {}", e),
44        }
45    }
46}
47
48impl std::error::Error for ScanError {}
49
50pub struct ScanResult {
51    pub violations: Vec<Violation>,
52    pub files_scanned: usize,
53    pub rules_loaded: usize,
54    /// For each ratchet rule: (found_count, max_count).
55    pub ratchet_counts: HashMap<String, (usize, usize)>,
56    /// Number of changed files when using --changed-only.
57    pub changed_files_count: Option<usize>,
58    /// Base ref used for diff when using --changed-only.
59    pub base_ref: Option<String>,
60}
61
62#[derive(Debug, Serialize, serde::Deserialize)]
63pub struct BaselineEntry {
64    pub rule_id: String,
65    pub pattern: String,
66    pub count: usize,
67}
68
69#[derive(Debug, Serialize, serde::Deserialize)]
70pub struct BaselineResult {
71    pub entries: Vec<BaselineEntry>,
72    pub files_scanned: usize,
73}
74
75/// A group of rules that share the same glob patterns.
76/// Glob matching is done once per group, amortizing the cost when
77/// multiple rules use the same inclusion/exclusion patterns.
78struct RuleGroup {
79    inclusion_glob: Option<GlobSet>,
80    exclusion_glob: Option<GlobSet>,
81    rules: Vec<RuleWithConditioning>,
82}
83
84/// A single rule with its conditioning data and pre-computed suppression strings.
85struct RuleWithConditioning {
86    rule: Box<dyn Rule>,
87    file_contains: Option<String>,
88    file_not_contains: Option<String>,
89    /// Pre-computed `"baseline:allow-{rule_id}"` string.
90    allow_marker: String,
91    /// Pre-computed `"baseline:allow-next-line {rule_id}"` string.
92    allow_next_line: String,
93}
94
95/// Result of building rules from config.
96struct BuiltRules {
97    rule_groups: Vec<RuleGroup>,
98    ratchet_thresholds: HashMap<String, usize>,
99    file_presence_rules: Vec<FilePresenceRule>,
100}
101
102/// Build rules from resolved TOML rules. Shared by run_scan and run_scan_stdin.
103fn build_rules(resolved_rules: &[TomlRule]) -> Result<BuiltRules, ScanError> {
104    let mut ratchet_thresholds: HashMap<String, usize> = HashMap::new();
105    let mut file_presence_rules: Vec<FilePresenceRule> = Vec::new();
106
107    // Intermediate representation before grouping
108    struct IntermediateRule {
109        rule: Box<dyn Rule>,
110        inclusion_pattern: Option<String>,
111        exclusion_patterns: Vec<String>,
112        file_contains: Option<String>,
113        file_not_contains: Option<String>,
114    }
115
116    let mut intermediates: Vec<IntermediateRule> = Vec::new();
117
118    for toml_rule in resolved_rules {
119        let rule_config = toml_rule.to_rule_config();
120
121        // File-presence rules are handled separately (they check existence, not content)
122        if toml_rule.rule_type == "file-presence" {
123            if let Ok(fp_rule) = FilePresenceRule::new(&rule_config) {
124                file_presence_rules.push(fp_rule);
125            }
126            continue;
127        }
128
129        let rule = factory::build_rule(&toml_rule.rule_type, &rule_config)
130            .map_err(ScanError::RuleFactory)?;
131
132        if toml_rule.rule_type == "ratchet" {
133            if let Some(max) = toml_rule.max_count {
134                ratchet_thresholds.insert(rule.id().to_string(), max);
135            }
136        }
137
138        let inclusion_pattern = rule.file_glob().map(|s| s.to_string());
139        let exclusion_patterns = toml_rule.exclude_glob.clone();
140
141        intermediates.push(IntermediateRule {
142            rule,
143            inclusion_pattern,
144            exclusion_patterns,
145            file_contains: toml_rule.file_contains.clone(),
146            file_not_contains: toml_rule.file_not_contains.clone(),
147        });
148    }
149
150    // Group rules by (inclusion_pattern, exclusion_patterns) to avoid redundant glob matching.
151    let mut groups: Vec<((Option<String>, Vec<String>), Vec<IntermediateRule>)> = Vec::new();
152
153    for ir in intermediates {
154        let key = (ir.inclusion_pattern.clone(), ir.exclusion_patterns.clone());
155        if let Some(group) = groups.iter_mut().find(|(k, _)| *k == key) {
156            group.1.push(ir);
157        } else {
158            groups.push((key, vec![ir]));
159        }
160    }
161
162    // Build RuleGroups with compiled GlobSets (once per unique pattern)
163    let mut rule_groups: Vec<RuleGroup> = Vec::new();
164    for ((inc_pattern, exc_patterns), intermediates) in groups {
165        let inclusion_glob = if let Some(ref pattern) = inc_pattern {
166            Some(build_glob_set_from_pattern(pattern)?)
167        } else {
168            None
169        };
170
171        let exclusion_glob = if !exc_patterns.is_empty() {
172            Some(build_glob_set(&exc_patterns)?)
173        } else {
174            None
175        };
176
177        let rules = intermediates
178            .into_iter()
179            .map(|ir| {
180                let id = ir.rule.id().to_string();
181                RuleWithConditioning {
182                    rule: ir.rule,
183                    file_contains: ir.file_contains,
184                    file_not_contains: ir.file_not_contains,
185                    allow_marker: format!("baseline:allow-{}", id),
186                    allow_next_line: format!("baseline:allow-next-line {}", id),
187                }
188            })
189            .collect();
190
191        rule_groups.push(RuleGroup {
192            inclusion_glob,
193            exclusion_glob,
194            rules,
195        });
196    }
197
198    Ok(BuiltRules {
199        rule_groups,
200        ratchet_thresholds,
201        file_presence_rules,
202    })
203}
204
205/// Check if a rule group matches a file path (inclusion + exclusion globs).
206fn group_matches_file(group: &RuleGroup, file_str: &str, file_name: &str) -> bool {
207    let included = match &group.inclusion_glob {
208        Some(gs) => gs.is_match(file_str) || gs.is_match(file_name),
209        None => true,
210    };
211    if !included {
212        return false;
213    }
214    if let Some(ref exc) = group.exclusion_glob {
215        if exc.is_match(file_str) || exc.is_match(file_name) {
216            return false;
217        }
218    }
219    true
220}
221
222/// Check file-context conditioning (file_contains / file_not_contains) with caching.
223fn passes_file_conditioning_cached<'a>(
224    rule: &'a RuleWithConditioning,
225    content: &str,
226    cache: &mut HashMap<&'a str, bool>,
227) -> bool {
228    if let Some(ref needle) = rule.file_contains {
229        let &mut result = cache
230            .entry(needle.as_str())
231            .or_insert_with(|| content.contains(needle.as_str()));
232        if !result {
233            return false;
234        }
235    }
236    if let Some(ref needle) = rule.file_not_contains {
237        let &mut result = cache
238            .entry(needle.as_str())
239            .or_insert_with(|| content.contains(needle.as_str()));
240        if result {
241            return false;
242        }
243    }
244    true
245}
246
247/// Run rules against content and collect violations, filtering escape-hatch comments.
248fn run_rules_on_content(
249    rule_groups: &[RuleGroup],
250    file_path: &Path,
251    content: &str,
252    file_str: &str,
253    file_name: &str,
254) -> Vec<Violation> {
255    let mut violations = Vec::new();
256    let content_lines: Vec<&str> = content.lines().collect();
257    let ctx = ScanContext {
258        file_path,
259        content,
260    };
261    let mut conditioning_cache: HashMap<&str, bool> = HashMap::new();
262
263    for group in rule_groups {
264        if !group_matches_file(group, file_str, file_name) {
265            continue;
266        }
267
268        for rule_cond in &group.rules {
269            if !passes_file_conditioning_cached(rule_cond, content, &mut conditioning_cache) {
270                continue;
271            }
272
273            let file_violations = rule_cond.rule.check_file(&ctx);
274            for v in file_violations {
275                if let Some(line_num) = v.line {
276                    if is_suppressed(
277                        &content_lines,
278                        line_num,
279                        &rule_cond.allow_marker,
280                        &rule_cond.allow_next_line,
281                    ) {
282                        continue;
283                    }
284                }
285                violations.push(v);
286            }
287        }
288    }
289
290    violations
291}
292
293/// Run a full scan: parse config, build rules, walk files, collect violations.
294pub fn run_scan(config_path: &Path, target_paths: &[PathBuf]) -> Result<ScanResult, ScanError> {
295    // 1. Read and parse TOML config
296    let config_text = fs::read_to_string(config_path).map_err(ScanError::ConfigRead)?;
297    let toml_config: TomlConfig = toml::from_str(&config_text).map_err(ScanError::ConfigParse)?;
298
299    // 2. Load plugin rules from external TOML files
300    let mut plugin_rules: Vec<crate::cli::toml_config::TomlRule> = Vec::new();
301    for plugin_path in &toml_config.baseline.plugins {
302        let plugin_text = fs::read_to_string(plugin_path).map_err(ScanError::ConfigRead)?;
303        let plugin_config: PluginConfig =
304            toml::from_str(&plugin_text).map_err(ScanError::ConfigParse)?;
305        plugin_rules.extend(plugin_config.rule);
306    }
307
308    // 3. Resolve presets and merge with user-defined rules + plugin rules
309    let mut all_user_rules = toml_config.rule.clone();
310    all_user_rules.extend(plugin_rules);
311
312    let resolved_rules = presets::resolve_rules(
313        &toml_config.baseline.extends,
314        &all_user_rules,
315    )
316    .map_err(ScanError::Preset)?;
317
318    // 4. Build exclude glob set
319    let exclude_set = build_glob_set(&toml_config.baseline.exclude)?;
320
321    // 5. Build rules via factory
322    let built = build_rules(&resolved_rules)?;
323    let rules_loaded: usize = built.rule_groups.iter().map(|g| g.rules.len()).sum();
324
325    // 6. Walk target paths and collect files
326    let files = collect_files(target_paths, &exclude_set);
327
328    // 7. Run rules on each file (parallel)
329    let files_scanned = AtomicUsize::new(0);
330
331    let mut violations: Vec<Violation> = files
332        .par_iter()
333        .filter_map(|file_path| {
334            let file_str = file_path.to_string_lossy();
335            let file_name = file_path.file_name().unwrap_or_default().to_string_lossy();
336
337            // Pre-check: does ANY rule group match this file? If not, skip the read entirely.
338            let any_match = built
339                .rule_groups
340                .iter()
341                .any(|g| group_matches_file(g, &file_str, &file_name));
342            if !any_match {
343                return None;
344            }
345
346            let content = fs::read_to_string(file_path).ok()?;
347
348            files_scanned.fetch_add(1, Ordering::Relaxed);
349            let file_violations = run_rules_on_content(
350                &built.rule_groups,
351                file_path,
352                &content,
353                &file_str,
354                &file_name,
355            );
356            if file_violations.is_empty() {
357                None
358            } else {
359                Some(file_violations)
360            }
361        })
362        .flatten()
363        .collect();
364
365    // 8. Run file-presence checks
366    for fp_rule in &built.file_presence_rules {
367        let mut fp_violations = fp_rule.check_paths(target_paths);
368        violations.append(&mut fp_violations);
369    }
370
371    // 9. Apply ratchet thresholds
372    let ratchet_counts = apply_ratchet_thresholds(&mut violations, &built.ratchet_thresholds);
373
374    Ok(ScanResult {
375        violations,
376        files_scanned: files_scanned.load(Ordering::Relaxed),
377        rules_loaded,
378        ratchet_counts,
379        changed_files_count: None,
380        base_ref: None,
381    })
382}
383
384/// Suppress ratchet violations that are within budget. Returns counts for display.
385fn apply_ratchet_thresholds(
386    violations: &mut Vec<Violation>,
387    thresholds: &HashMap<String, usize>,
388) -> HashMap<String, (usize, usize)> {
389    if thresholds.is_empty() {
390        return HashMap::new();
391    }
392
393    // Count violations per ratchet rule
394    let mut counts: HashMap<String, usize> = HashMap::new();
395    for v in violations.iter() {
396        if thresholds.contains_key(&v.rule_id) {
397            *counts.entry(v.rule_id.clone()).or_insert(0) += 1;
398        }
399    }
400
401    // Build result map and determine which rules to suppress
402    let mut result: HashMap<String, (usize, usize)> = HashMap::new();
403    let mut suppress: std::collections::HashSet<String> = std::collections::HashSet::new();
404
405    for (rule_id, &max) in thresholds {
406        let found = counts.get(rule_id).copied().unwrap_or(0);
407        result.insert(rule_id.clone(), (found, max));
408        if found <= max {
409            suppress.insert(rule_id.clone());
410        }
411    }
412
413    // Remove suppressed violations
414    if !suppress.is_empty() {
415        violations.retain(|v| !suppress.contains(&v.rule_id));
416    }
417
418    result
419}
420
421/// Run a scan on stdin content with a virtual filename.
422pub fn run_scan_stdin(
423    config_path: &Path,
424    content: &str,
425    filename: &str,
426) -> Result<ScanResult, ScanError> {
427    let config_text = fs::read_to_string(config_path).map_err(ScanError::ConfigRead)?;
428    let toml_config: TomlConfig = toml::from_str(&config_text).map_err(ScanError::ConfigParse)?;
429
430    let resolved_rules = presets::resolve_rules(
431        &toml_config.baseline.extends,
432        &toml_config.rule,
433    )
434    .map_err(ScanError::Preset)?;
435
436    let built = build_rules(&resolved_rules)?;
437    let rules_loaded: usize = built.rule_groups.iter().map(|g| g.rules.len()).sum();
438
439    let file_path = PathBuf::from(filename);
440    let file_str = file_path.to_string_lossy();
441    let file_name = file_path.file_name().unwrap_or_default().to_string_lossy();
442
443    let violations =
444        run_rules_on_content(&built.rule_groups, &file_path, content, &file_str, &file_name);
445
446    let mut violations = violations;
447    let ratchet_counts = apply_ratchet_thresholds(&mut violations, &built.ratchet_thresholds);
448
449    Ok(ScanResult {
450        violations,
451        files_scanned: 1,
452        rules_loaded,
453        ratchet_counts,
454        changed_files_count: None,
455        base_ref: None,
456    })
457}
458
459/// Run a scan filtered to only files/lines changed relative to a base branch.
460pub fn run_scan_changed(
461    config_path: &Path,
462    target_paths: &[PathBuf],
463    base_ref: &str,
464) -> Result<ScanResult, ScanError> {
465    // Get diff info from git
466    let diff = git_diff::diff_info(base_ref).map_err(|e| ScanError::GitDiff(e.to_string()))?;
467    let repo_root = git_diff::repo_root().map_err(|e| ScanError::GitDiff(e.to_string()))?;
468
469    let changed_files_count = diff.changed_lines.len();
470
471    // Run normal scan
472    let mut result = run_scan(config_path, target_paths)?;
473
474    // Post-filter violations to only those in changed files/lines
475    result.violations.retain(|v| {
476        // Compute relative path from repo root for matching against diff
477        let rel_path = if v.file.is_absolute() {
478            v.file.strip_prefix(&repo_root).unwrap_or(&v.file).to_path_buf()
479        } else {
480            v.file.clone()
481        };
482
483        if !diff.has_file(&rel_path) {
484            return false;
485        }
486
487        // File-level violations (no line number) pass if file is changed
488        match v.line {
489            Some(line) => diff.has_line(&rel_path, line),
490            None => true,
491        }
492    });
493
494    result.changed_files_count = Some(changed_files_count);
495    result.base_ref = Some(base_ref.to_string());
496
497    Ok(result)
498}
499
500/// Run baseline counting: parse config, build only ratchet rules, count matches.
501pub fn run_baseline(
502    config_path: &Path,
503    target_paths: &[PathBuf],
504) -> Result<BaselineResult, ScanError> {
505    let config_text = fs::read_to_string(config_path).map_err(ScanError::ConfigRead)?;
506    let toml_config: TomlConfig = toml::from_str(&config_text).map_err(ScanError::ConfigParse)?;
507
508    // Resolve presets and merge with user-defined rules
509    let resolved_rules = presets::resolve_rules(
510        &toml_config.baseline.extends,
511        &toml_config.rule,
512    )
513    .map_err(ScanError::Preset)?;
514
515    let exclude_set = build_glob_set(&toml_config.baseline.exclude)?;
516
517    // Build only ratchet rules
518    let mut rules: Vec<(Box<dyn Rule>, Option<GlobSet>, String)> = Vec::new();
519    for toml_rule in &resolved_rules {
520        if toml_rule.rule_type != "ratchet" {
521            continue;
522        }
523        let rule_config = toml_rule.to_rule_config();
524        let rule = factory::build_rule(&toml_rule.rule_type, &rule_config)
525            .map_err(ScanError::RuleFactory)?;
526
527        let pattern = toml_rule.pattern.clone().unwrap_or_default();
528
529        let rule_glob = if let Some(ref pat) = rule.file_glob() {
530            Some(build_glob_set_from_pattern(pat)?)
531        } else {
532            None
533        };
534
535        rules.push((rule, rule_glob, pattern));
536    }
537
538    let files = collect_files(target_paths, &exclude_set);
539
540    let files_scanned = AtomicUsize::new(0);
541
542    let counts: HashMap<String, usize> = files
543        .par_iter()
544        .filter_map(|file_path| {
545            let content = fs::read_to_string(file_path).ok()?;
546
547            files_scanned.fetch_add(1, Ordering::Relaxed);
548            let ctx = ScanContext {
549                file_path,
550                content: &content,
551            };
552
553            let mut local_counts: HashMap<String, usize> = HashMap::new();
554            for (rule, rule_glob, _) in &rules {
555                if let Some(ref gs) = rule_glob {
556                    let file_str = file_path.to_string_lossy();
557                    let file_name = file_path.file_name().unwrap_or_default().to_string_lossy();
558                    if !gs.is_match(&*file_str) && !gs.is_match(&*file_name) {
559                        continue;
560                    }
561                }
562
563                let violations = rule.check_file(&ctx);
564                if !violations.is_empty() {
565                    *local_counts.entry(rule.id().to_string()).or_insert(0) += violations.len();
566                }
567            }
568
569            if local_counts.is_empty() {
570                None
571            } else {
572                Some(local_counts)
573            }
574        })
575        .reduce(
576            || HashMap::new(),
577            |mut acc, local| {
578                for (k, v) in local {
579                    *acc.entry(k).or_insert(0) += v;
580                }
581                acc
582            },
583        );
584
585    let entries: Vec<BaselineEntry> = rules
586        .iter()
587        .map(|(rule, _, pattern)| BaselineEntry {
588            rule_id: rule.id().to_string(),
589            pattern: pattern.clone(),
590            count: counts.get(rule.id()).copied().unwrap_or(0),
591        })
592        .collect();
593
594    Ok(BaselineResult {
595        entries,
596        files_scanned: files_scanned.load(Ordering::Relaxed),
597    })
598}
599
600/// Check if a violation is suppressed by an escape-hatch comment.
601/// Uses pre-computed marker strings to avoid per-call allocations.
602fn is_suppressed(lines: &[&str], line_num: usize, allow_marker: &str, allow_next_line: &str) -> bool {
603    let allow_all = "baseline:allow-all";
604
605    // Check current line (1-indexed)
606    if line_num > 0 && line_num <= lines.len() {
607        let line = lines[line_num - 1];
608        if line.contains(allow_marker) || line.contains(allow_all) {
609            return true;
610        }
611    }
612
613    // Check previous line (next-line style: `// baseline:allow-next-line`)
614    if line_num >= 2 && line_num <= lines.len() {
615        let prev = lines[line_num - 2];
616        if prev.contains(allow_next_line)
617            || prev.contains("baseline:allow-next-line all")
618        {
619            return true;
620        }
621    }
622
623    false
624}
625
626pub(crate) fn collect_files(target_paths: &[PathBuf], exclude_set: &GlobSet) -> Vec<PathBuf> {
627    let mut files: Vec<PathBuf> = Vec::new();
628    for target in target_paths {
629        if target.is_file() {
630            files.push(target.clone());
631        } else {
632            // Use the `ignore` crate's parallel walker for multi-threaded directory traversal.
633            let walker = WalkBuilder::new(target)
634                .hidden(true) // skip hidden files/dirs like .git
635                .git_ignore(true) // respect .gitignore
636                .git_global(true) // respect global gitignore
637                .git_exclude(true) // respect .git/info/exclude
638                .build_parallel();
639
640            let collected: Mutex<Vec<PathBuf>> = Mutex::new(Vec::new());
641
642            walker.run(|| {
643                Box::new(|entry| {
644                    if let Ok(entry) = entry {
645                        if entry.file_type().map_or(false, |ft| ft.is_file()) {
646                            let path = entry.into_path();
647                            let rel = path.strip_prefix(target).unwrap_or(&path);
648                            if !exclude_set.is_match(rel.to_string_lossy().as_ref()) {
649                                collected.lock().unwrap().push(path);
650                            }
651                        }
652                    }
653                    ignore::WalkState::Continue
654                })
655            });
656
657            files.extend(collected.into_inner().unwrap());
658        }
659    }
660    files
661}
662
663/// Normalize a glob pattern:
664/// 1. Expand brace syntax (`{a,b}`) into multiple patterns (globset doesn't support it).
665/// 2. Auto-prefix path-based globs with `**/` so they match against absolute paths.
666///    e.g. `apps/web/src/**/*.tsx` → `**/apps/web/src/**/*.tsx`
667fn expand_glob(pattern: &str) -> Vec<String> {
668    // First, expand brace syntax
669    if let Some(open) = pattern.find('{') {
670        if let Some(close) = pattern[open..].find('}') {
671            let close = open + close;
672            let prefix = &pattern[..open];
673            let suffix = &pattern[close + 1..];
674            let alternatives = &pattern[open + 1..close];
675
676            // Only expand if there are commas (otherwise it's a glob char class)
677            if alternatives.contains(',') {
678                let mut result = Vec::new();
679                for alt in alternatives.split(',') {
680                    let expanded = format!("{prefix}{alt}{suffix}");
681                    // Recursively expand in case there are nested braces
682                    result.extend(expand_glob(&expanded));
683                }
684                return result;
685            }
686        }
687    }
688
689    // Auto-prefix path-based globs that don't already start with ** or /
690    // e.g. "apps/web/src/**/*.tsx" → "**/apps/web/src/**/*.tsx"
691    // This ensures they match against absolute file paths.
692    let normalized = if pattern.contains('/')
693        && !pattern.starts_with("**/")
694        && !pattern.starts_with('/')
695    {
696        format!("**/{pattern}")
697    } else {
698        pattern.to_string()
699    };
700
701    vec![normalized]
702}
703
704/// Build a GlobSet from a single pattern string, expanding brace syntax.
705pub(crate) fn build_glob_set_from_pattern(pattern: &str) -> Result<GlobSet, ScanError> {
706    let expanded = expand_glob(pattern);
707    let mut builder = GlobSetBuilder::new();
708    for pat in &expanded {
709        builder.add(Glob::new(pat).map_err(ScanError::GlobParse)?);
710    }
711    builder.build().map_err(ScanError::GlobParse)
712}
713
714pub(crate) fn build_glob_set(patterns: &[String]) -> Result<GlobSet, ScanError> {
715    let mut builder = GlobSetBuilder::new();
716    for pattern in patterns {
717        for pat in &expand_glob(pattern) {
718            builder.add(Glob::new(pat).map_err(ScanError::GlobParse)?);
719        }
720    }
721    builder.build().map_err(ScanError::GlobParse)
722}
723
724#[cfg(test)]
725mod tests {
726    use super::*;
727    use crate::config::Severity;
728
729    fn make_violation(rule_id: &str) -> Violation {
730        Violation {
731            rule_id: rule_id.to_string(),
732            severity: Severity::Error,
733            file: PathBuf::from("test.ts"),
734            line: Some(1),
735            column: Some(1),
736            message: "test".to_string(),
737            suggest: None,
738            source_line: None,
739            fix: None,
740        }
741    }
742
743    /// Count total rules across all groups.
744    fn total_rules(groups: &[RuleGroup]) -> usize {
745        groups.iter().map(|g| g.rules.len()).sum()
746    }
747
748    #[test]
749    fn ratchet_under_budget_suppresses() {
750        let mut violations = vec![
751            make_violation("ratchet-legacy"),
752            make_violation("ratchet-legacy"),
753            make_violation("other-rule"),
754        ];
755        let mut thresholds = HashMap::new();
756        thresholds.insert("ratchet-legacy".to_string(), 5);
757
758        let counts = apply_ratchet_thresholds(&mut violations, &thresholds);
759
760        assert_eq!(violations.len(), 1); // only "other-rule" remains
761        assert_eq!(violations[0].rule_id, "other-rule");
762        assert_eq!(counts["ratchet-legacy"], (2, 5));
763    }
764
765    #[test]
766    fn ratchet_over_budget_keeps_all() {
767        let mut violations = vec![
768            make_violation("ratchet-legacy"),
769            make_violation("ratchet-legacy"),
770            make_violation("ratchet-legacy"),
771            make_violation("other-rule"),
772        ];
773        let mut thresholds = HashMap::new();
774        thresholds.insert("ratchet-legacy".to_string(), 2);
775
776        let counts = apply_ratchet_thresholds(&mut violations, &thresholds);
777
778        assert_eq!(violations.len(), 4); // all kept
779        assert_eq!(counts["ratchet-legacy"], (3, 2));
780    }
781
782    #[test]
783    fn ratchet_exactly_at_budget_suppresses() {
784        let mut violations = vec![
785            make_violation("ratchet-legacy"),
786            make_violation("ratchet-legacy"),
787        ];
788        let mut thresholds = HashMap::new();
789        thresholds.insert("ratchet-legacy".to_string(), 2);
790
791        let counts = apply_ratchet_thresholds(&mut violations, &thresholds);
792
793        assert_eq!(violations.len(), 0); // suppressed (at budget)
794        assert_eq!(counts["ratchet-legacy"], (2, 2));
795    }
796
797    #[test]
798    fn no_ratchet_rules_is_noop() {
799        let mut violations = vec![make_violation("other-rule")];
800        let thresholds = HashMap::new();
801
802        let counts = apply_ratchet_thresholds(&mut violations, &thresholds);
803
804        assert_eq!(violations.len(), 1);
805        assert!(counts.is_empty());
806    }
807
808    #[test]
809    fn ratchet_zero_with_matches_keeps_all() {
810        let mut violations = vec![make_violation("ratchet-zero")];
811        let mut thresholds = HashMap::new();
812        thresholds.insert("ratchet-zero".to_string(), 0);
813
814        let counts = apply_ratchet_thresholds(&mut violations, &thresholds);
815
816        assert_eq!(violations.len(), 1);
817        assert_eq!(counts["ratchet-zero"], (1, 0));
818    }
819
820    #[test]
821    fn ratchet_zero_no_matches_suppresses() {
822        let mut violations: Vec<Violation> = vec![];
823        let mut thresholds = HashMap::new();
824        thresholds.insert("ratchet-zero".to_string(), 0);
825
826        let counts = apply_ratchet_thresholds(&mut violations, &thresholds);
827
828        assert!(violations.is_empty());
829        assert_eq!(counts["ratchet-zero"], (0, 0));
830    }
831
832    // ── is_suppressed tests ──
833
834    #[test]
835    fn suppressed_by_same_line_allow() {
836        let lines = vec![
837            "let x = style={{ color: 'red' }}; // baseline:allow-no-inline-styles",
838        ];
839        assert!(is_suppressed(
840            &lines,
841            1,
842            "baseline:allow-no-inline-styles",
843            "baseline:allow-next-line no-inline-styles",
844        ));
845    }
846
847    #[test]
848    fn suppressed_by_allow_all() {
849        let lines = vec![
850            "let x = style={{ color: 'red' }}; // baseline:allow-all",
851        ];
852        assert!(is_suppressed(
853            &lines,
854            1,
855            "baseline:allow-no-inline-styles",
856            "baseline:allow-next-line no-inline-styles",
857        ));
858        assert!(is_suppressed(
859            &lines,
860            1,
861            "baseline:allow-any-other-rule",
862            "baseline:allow-next-line any-other-rule",
863        ));
864    }
865
866    #[test]
867    fn suppressed_by_allow_next_line() {
868        let lines = vec![
869            "// baseline:allow-next-line no-inline-styles",
870            "let x = style={{ color: 'red' }};",
871        ];
872        assert!(is_suppressed(
873            &lines,
874            2,
875            "baseline:allow-no-inline-styles",
876            "baseline:allow-next-line no-inline-styles",
877        ));
878    }
879
880    #[test]
881    fn suppressed_by_allow_next_line_all() {
882        let lines = vec![
883            "// baseline:allow-next-line all",
884            "let x = style={{ color: 'red' }};",
885        ];
886        assert!(is_suppressed(
887            &lines,
888            2,
889            "baseline:allow-no-inline-styles",
890            "baseline:allow-next-line no-inline-styles",
891        ));
892    }
893
894    #[test]
895    fn not_suppressed_wrong_rule_id() {
896        let lines = vec![
897            "let x = style={{ color: 'red' }}; // baseline:allow-other-rule",
898        ];
899        assert!(!is_suppressed(
900            &lines,
901            1,
902            "baseline:allow-no-inline-styles",
903            "baseline:allow-next-line no-inline-styles",
904        ));
905    }
906
907    #[test]
908    fn not_suppressed_no_comment() {
909        let lines = vec![
910            "let x = style={{ color: 'red' }};",
911        ];
912        assert!(!is_suppressed(
913            &lines,
914            1,
915            "baseline:allow-no-inline-styles",
916            "baseline:allow-next-line no-inline-styles",
917        ));
918    }
919
920    #[test]
921    fn not_suppressed_next_line_wrong_rule() {
922        let lines = vec![
923            "// baseline:allow-next-line other-rule",
924            "let x = style={{ color: 'red' }};",
925        ];
926        assert!(!is_suppressed(
927            &lines,
928            2,
929            "baseline:allow-no-inline-styles",
930            "baseline:allow-next-line no-inline-styles",
931        ));
932    }
933
934    #[test]
935    fn suppressed_line_zero_is_safe() {
936        let lines = vec!["some content"];
937        // line_num 0 should not panic
938        assert!(!is_suppressed(
939            &lines,
940            0,
941            "baseline:allow-any-rule",
942            "baseline:allow-next-line any-rule",
943        ));
944    }
945
946    #[test]
947    fn suppressed_past_end_is_safe() {
948        let lines = vec!["some content"];
949        // line_num past end should not panic
950        assert!(!is_suppressed(
951            &lines,
952            5,
953            "baseline:allow-any-rule",
954            "baseline:allow-next-line any-rule",
955        ));
956    }
957
958    // ── ScanError Display tests ──
959
960    #[test]
961    fn scan_error_display_config_read() {
962        let err = ScanError::ConfigRead(std::io::Error::new(
963            std::io::ErrorKind::NotFound,
964            "not found",
965        ));
966        assert!(err.to_string().contains("failed to read config"));
967    }
968
969    #[test]
970    fn scan_error_display_config_parse() {
971        let toml_err = toml::from_str::<TomlConfig>("not valid toml [[[").unwrap_err();
972        let err = ScanError::ConfigParse(toml_err);
973        assert!(err.to_string().contains("failed to parse config"));
974    }
975
976    #[test]
977    fn scan_error_display_glob_parse() {
978        let glob_err = Glob::new("[invalid").unwrap_err();
979        let err = ScanError::GlobParse(glob_err);
980        assert!(err.to_string().contains("invalid glob pattern"));
981    }
982
983    #[test]
984    fn scan_error_display_rule_factory() {
985        let err = ScanError::RuleFactory(FactoryError::UnknownRuleType("nope".into()));
986        assert!(err.to_string().contains("failed to build rule"));
987    }
988
989    #[test]
990    fn scan_error_display_preset() {
991        let err = ScanError::Preset(PresetError::UnknownPreset {
992            name: "bad".into(),
993            available: vec!["shadcn-strict"],
994        });
995        assert!(err.to_string().contains("preset error"));
996    }
997
998    #[test]
999    fn scan_error_display_git_diff() {
1000        let err = ScanError::GitDiff("diff broke".into());
1001        assert_eq!(err.to_string(), "git diff failed: diff broke");
1002    }
1003
1004    // ── build_rules tests ──
1005
1006    #[test]
1007    fn build_rules_banned_pattern_rule() {
1008        let rules = vec![TomlRule {
1009            id: "no-console".into(),
1010            rule_type: "banned-pattern".into(),
1011            pattern: Some("console\\.log".into()),
1012            message: "no console.log".into(),
1013            glob: Some("**/*.ts".into()),
1014            ..Default::default()
1015        }];
1016
1017        let built = build_rules(&rules).unwrap();
1018        assert_eq!(total_rules(&built.rule_groups), 1);
1019        assert!(built.ratchet_thresholds.is_empty());
1020        assert!(built.file_presence_rules.is_empty());
1021    }
1022
1023    #[test]
1024    fn build_rules_ratchet_records_threshold() {
1025        let rules = vec![TomlRule {
1026            id: "legacy-api".into(),
1027            rule_type: "ratchet".into(),
1028            pattern: Some("legacyCall".into()),
1029            max_count: Some(10),
1030            glob: Some("**/*.ts".into()),
1031            message: "legacy".into(),
1032            ..Default::default()
1033        }];
1034
1035        let built = build_rules(&rules).unwrap();
1036        assert_eq!(total_rules(&built.rule_groups), 1);
1037        assert_eq!(built.ratchet_thresholds["legacy-api"], 10);
1038    }
1039
1040    #[test]
1041    fn build_rules_file_presence_separated() {
1042        let rules = vec![
1043            TomlRule {
1044                id: "has-readme".into(),
1045                rule_type: "file-presence".into(),
1046                required_files: vec!["README.md".into()],
1047                message: "need readme".into(),
1048                ..Default::default()
1049            },
1050            TomlRule {
1051                id: "no-console".into(),
1052                rule_type: "banned-pattern".into(),
1053                pattern: Some("console\\.log".into()),
1054                message: "no console".into(),
1055                ..Default::default()
1056            },
1057        ];
1058
1059        let built = build_rules(&rules).unwrap();
1060        assert_eq!(total_rules(&built.rule_groups), 1); // only banned-pattern
1061        assert_eq!(built.file_presence_rules.len(), 1);
1062    }
1063
1064    #[test]
1065    fn build_rules_unknown_type_errors() {
1066        let rules = vec![TomlRule {
1067            id: "bad".into(),
1068            rule_type: "nonexistent-rule-type".into(),
1069            message: "x".into(),
1070            ..Default::default()
1071        }];
1072
1073        let result = build_rules(&rules);
1074        assert!(result.is_err());
1075        let err = result.err().unwrap();
1076        assert!(matches!(err, ScanError::RuleFactory(_)));
1077    }
1078
1079    #[test]
1080    fn build_rules_with_exclude_glob() {
1081        let rules = vec![TomlRule {
1082            id: "no-console".into(),
1083            rule_type: "banned-pattern".into(),
1084            pattern: Some("console\\.log".into()),
1085            message: "no console".into(),
1086            exclude_glob: vec!["**/test/**".into()],
1087            ..Default::default()
1088        }];
1089
1090        let built = build_rules(&rules).unwrap();
1091        assert_eq!(built.rule_groups.len(), 1);
1092        assert!(built.rule_groups[0].exclusion_glob.is_some());
1093    }
1094
1095    #[test]
1096    fn build_rules_with_file_conditioning() {
1097        let rules = vec![TomlRule {
1098            id: "no-console".into(),
1099            rule_type: "banned-pattern".into(),
1100            pattern: Some("console\\.log".into()),
1101            message: "no console".into(),
1102            file_contains: Some("import React".into()),
1103            file_not_contains: Some("// @generated".into()),
1104            ..Default::default()
1105        }];
1106
1107        let built = build_rules(&rules).unwrap();
1108        assert_eq!(built.rule_groups.len(), 1);
1109        assert!(built.rule_groups[0].rules[0].file_contains.is_some());
1110        assert!(built.rule_groups[0].rules[0].file_not_contains.is_some());
1111    }
1112
1113    // ── group_matches_file tests ──
1114
1115    #[test]
1116    fn group_matches_file_no_glob_matches_all() {
1117        let rules = vec![TomlRule {
1118            id: "r".into(),
1119            rule_type: "banned-pattern".into(),
1120            pattern: Some("x".into()),
1121            message: "m".into(),
1122            ..Default::default()
1123        }];
1124        let built = build_rules(&rules).unwrap();
1125        assert!(group_matches_file(&built.rule_groups[0], "anything.rs", "anything.rs"));
1126    }
1127
1128    #[test]
1129    fn group_matches_file_inclusion_glob_filters() {
1130        let rules = vec![TomlRule {
1131            id: "r".into(),
1132            rule_type: "banned-pattern".into(),
1133            pattern: Some("x".into()),
1134            message: "m".into(),
1135            glob: Some("**/*.tsx".into()),
1136            ..Default::default()
1137        }];
1138        let built = build_rules(&rules).unwrap();
1139        assert!(group_matches_file(&built.rule_groups[0], "src/Foo.tsx", "Foo.tsx"));
1140        assert!(!group_matches_file(&built.rule_groups[0], "src/Foo.rs", "Foo.rs"));
1141    }
1142
1143    #[test]
1144    fn group_matches_file_exclusion_glob_rejects() {
1145        let rules = vec![TomlRule {
1146            id: "r".into(),
1147            rule_type: "banned-pattern".into(),
1148            pattern: Some("x".into()),
1149            message: "m".into(),
1150            exclude_glob: vec!["**/test/**".into()],
1151            ..Default::default()
1152        }];
1153        let built = build_rules(&rules).unwrap();
1154        assert!(group_matches_file(&built.rule_groups[0], "src/app.ts", "app.ts"));
1155        assert!(!group_matches_file(&built.rule_groups[0], "src/test/app.ts", "app.ts"));
1156    }
1157
1158    // ── passes_file_conditioning tests ──
1159
1160    #[test]
1161    fn passes_conditioning_no_conditions() {
1162        let rules = vec![TomlRule {
1163            id: "r".into(),
1164            rule_type: "banned-pattern".into(),
1165            pattern: Some("x".into()),
1166            message: "m".into(),
1167            ..Default::default()
1168        }];
1169        let built = build_rules(&rules).unwrap();
1170        let mut cache = HashMap::new();
1171        assert!(passes_file_conditioning_cached(&built.rule_groups[0].rules[0], "anything", &mut cache));
1172    }
1173
1174    #[test]
1175    fn passes_conditioning_file_contains_present() {
1176        let rules = vec![TomlRule {
1177            id: "r".into(),
1178            rule_type: "banned-pattern".into(),
1179            pattern: Some("x".into()),
1180            message: "m".into(),
1181            file_contains: Some("import React".into()),
1182            ..Default::default()
1183        }];
1184        let built = build_rules(&rules).unwrap();
1185        let mut cache = HashMap::new();
1186        assert!(passes_file_conditioning_cached(&built.rule_groups[0].rules[0], "import React from 'react';", &mut cache));
1187        let mut cache = HashMap::new();
1188        assert!(!passes_file_conditioning_cached(&built.rule_groups[0].rules[0], "import Vue from 'vue';", &mut cache));
1189    }
1190
1191    #[test]
1192    fn passes_conditioning_file_not_contains() {
1193        let rules = vec![TomlRule {
1194            id: "r".into(),
1195            rule_type: "banned-pattern".into(),
1196            pattern: Some("x".into()),
1197            message: "m".into(),
1198            file_not_contains: Some("// @generated".into()),
1199            ..Default::default()
1200        }];
1201        let built = build_rules(&rules).unwrap();
1202        let mut cache = HashMap::new();
1203        assert!(passes_file_conditioning_cached(&built.rule_groups[0].rules[0], "normal code", &mut cache));
1204        let mut cache = HashMap::new();
1205        assert!(!passes_file_conditioning_cached(&built.rule_groups[0].rules[0], "// @generated\nnormal code", &mut cache));
1206    }
1207
1208    #[test]
1209    fn passes_conditioning_both_conditions() {
1210        let rules = vec![TomlRule {
1211            id: "r".into(),
1212            rule_type: "banned-pattern".into(),
1213            pattern: Some("x".into()),
1214            message: "m".into(),
1215            file_contains: Some("import React".into()),
1216            file_not_contains: Some("// @generated".into()),
1217            ..Default::default()
1218        }];
1219        let built = build_rules(&rules).unwrap();
1220        // Has required, missing excluded -> pass
1221        let mut cache = HashMap::new();
1222        assert!(passes_file_conditioning_cached(&built.rule_groups[0].rules[0], "import React", &mut cache));
1223        // Missing required -> fail
1224        let mut cache = HashMap::new();
1225        assert!(!passes_file_conditioning_cached(&built.rule_groups[0].rules[0], "import Vue", &mut cache));
1226        // Has both -> fail (file_not_contains blocks it)
1227        let mut cache = HashMap::new();
1228        assert!(!passes_file_conditioning_cached(&built.rule_groups[0].rules[0], "import React // @generated", &mut cache));
1229    }
1230
1231    // ── run_rules_on_content tests ──
1232
1233    #[test]
1234    fn run_rules_on_content_finds_violations() {
1235        let rules = vec![TomlRule {
1236            id: "no-console".into(),
1237            rule_type: "banned-pattern".into(),
1238            pattern: Some("console\\.log".into()),
1239            message: "no console.log".into(),
1240            regex: true,
1241            ..Default::default()
1242        }];
1243        let built = build_rules(&rules).unwrap();
1244        let path = PathBuf::from("test.ts");
1245        let content = "console.log('hello');\nfoo();\n";
1246
1247        let violations = run_rules_on_content(&built.rule_groups, &path, content, "test.ts", "test.ts");
1248        assert_eq!(violations.len(), 1);
1249        assert_eq!(violations[0].rule_id, "no-console");
1250    }
1251
1252    #[test]
1253    fn run_rules_on_content_respects_suppression() {
1254        let rules = vec![TomlRule {
1255            id: "no-console".into(),
1256            rule_type: "banned-pattern".into(),
1257            pattern: Some("console\\.log".into()),
1258            message: "no console.log".into(),
1259            regex: true,
1260            ..Default::default()
1261        }];
1262        let built = build_rules(&rules).unwrap();
1263        let path = PathBuf::from("test.ts");
1264        let content = "console.log('hello'); // baseline:allow-no-console\n";
1265
1266        let violations = run_rules_on_content(&built.rule_groups, &path, content, "test.ts", "test.ts");
1267        assert_eq!(violations.len(), 0);
1268    }
1269
1270    #[test]
1271    fn run_rules_on_content_skips_non_matching_glob() {
1272        let rules = vec![TomlRule {
1273            id: "no-console".into(),
1274            rule_type: "banned-pattern".into(),
1275            pattern: Some("console\\.log".into()),
1276            message: "no console.log".into(),
1277            regex: true,
1278            glob: Some("**/*.tsx".into()),
1279            ..Default::default()
1280        }];
1281        let built = build_rules(&rules).unwrap();
1282        let path = PathBuf::from("test.rs");
1283        let content = "console.log('hello');\n";
1284
1285        let violations = run_rules_on_content(&built.rule_groups, &path, content, "test.rs", "test.rs");
1286        assert_eq!(violations.len(), 0);
1287    }
1288
1289    #[test]
1290    fn run_rules_on_content_skips_file_conditioning() {
1291        let rules = vec![TomlRule {
1292            id: "no-console".into(),
1293            rule_type: "banned-pattern".into(),
1294            pattern: Some("console\\.log".into()),
1295            message: "no console.log".into(),
1296            regex: true,
1297            file_contains: Some("import React".into()),
1298            ..Default::default()
1299        }];
1300        let built = build_rules(&rules).unwrap();
1301        let path = PathBuf::from("test.ts");
1302        let content = "console.log('hello');\n"; // no "import React"
1303
1304        let violations = run_rules_on_content(&built.rule_groups, &path, content, "test.ts", "test.ts");
1305        assert_eq!(violations.len(), 0);
1306    }
1307
1308    // ── build_glob_set tests ──
1309
1310    #[test]
1311    fn build_glob_set_empty() {
1312        let gs = build_glob_set(&[]).unwrap();
1313        assert!(!gs.is_match("anything"));
1314    }
1315
1316    #[test]
1317    fn build_glob_set_matches() {
1318        let gs = build_glob_set(&["**/*.ts".into(), "**/*.tsx".into()]).unwrap();
1319        assert!(gs.is_match("src/foo.ts"));
1320        assert!(gs.is_match("src/foo.tsx"));
1321        assert!(!gs.is_match("src/foo.rs"));
1322    }
1323
1324    #[test]
1325    fn build_glob_set_invalid_pattern() {
1326        let err = build_glob_set(&["[invalid".into()]).unwrap_err();
1327        assert!(matches!(err, ScanError::GlobParse(_)));
1328    }
1329
1330    // ── expand_glob / brace expansion tests ──
1331
1332    #[test]
1333    fn expand_glob_no_braces() {
1334        assert_eq!(expand_glob("**/*.ts"), vec!["**/*.ts"]);
1335    }
1336
1337    #[test]
1338    fn expand_glob_single_brace() {
1339        let mut result = expand_glob("**/*.{ts,tsx}");
1340        result.sort();
1341        assert_eq!(result, vec!["**/*.ts", "**/*.tsx"]);
1342    }
1343
1344    #[test]
1345    fn expand_glob_three_alternatives() {
1346        let mut result = expand_glob("src/**/*.{ts,tsx,js}");
1347        result.sort();
1348        // Path-based globs get **/ prefix
1349        assert_eq!(
1350            result,
1351            vec!["**/src/**/*.js", "**/src/**/*.ts", "**/src/**/*.tsx"]
1352        );
1353    }
1354
1355    #[test]
1356    fn expand_glob_no_comma_passthrough() {
1357        // Braces without commas should pass through (e.g. character classes)
1358        assert_eq!(expand_glob("**/*.[tj]s"), vec!["**/*.[tj]s"]);
1359    }
1360
1361    #[test]
1362    fn expand_glob_auto_prefix_path_glob() {
1363        // Path-based globs without **/ prefix get auto-prefixed
1364        assert_eq!(
1365            expand_glob("apps/web/src/**/*.tsx"),
1366            vec!["**/apps/web/src/**/*.tsx"]
1367        );
1368    }
1369
1370    #[test]
1371    fn expand_glob_no_double_prefix() {
1372        // Already prefixed with **/ should not get double-prefixed
1373        assert_eq!(
1374            expand_glob("**/apps/web/src/**/*.tsx"),
1375            vec!["**/apps/web/src/**/*.tsx"]
1376        );
1377    }
1378
1379    #[test]
1380    fn expand_glob_simple_extension_no_prefix() {
1381        // Simple extension globs (no /) should not get prefixed
1382        assert_eq!(expand_glob("*.ts"), vec!["*.ts"]);
1383    }
1384
1385    #[test]
1386    fn build_glob_set_brace_expansion() {
1387        let gs = build_glob_set(&["**/*.{ts,tsx}".into()]).unwrap();
1388        assert!(gs.is_match("src/foo.ts"));
1389        assert!(gs.is_match("src/foo.tsx"));
1390        assert!(!gs.is_match("src/foo.js"));
1391    }
1392
1393    #[test]
1394    fn build_glob_set_from_pattern_brace_expansion() {
1395        let gs = build_glob_set_from_pattern("**/*.{ts,tsx,js,jsx}").unwrap();
1396        assert!(gs.is_match("src/components/Button.tsx"));
1397        assert!(gs.is_match("lib/utils.js"));
1398        assert!(!gs.is_match("src/main.rs"));
1399    }
1400
1401    #[test]
1402    fn build_glob_set_from_pattern_path_glob() {
1403        let gs = build_glob_set_from_pattern("src/components/**/*.{ts,tsx}").unwrap();
1404        assert!(gs.is_match("src/components/Button.tsx"));
1405        assert!(gs.is_match("src/components/deep/nested/Card.ts"));
1406        assert!(!gs.is_match("lib/utils.tsx"));
1407    }
1408
1409    #[test]
1410    fn build_glob_set_path_glob_matches_absolute() {
1411        // The real-world case: "apps/web/src/**/*.{ts,tsx}" must match absolute paths
1412        let gs = build_glob_set_from_pattern("apps/web/src/**/*.{ts,tsx}").unwrap();
1413        assert!(gs.is_match("/Users/dev/project/apps/web/src/components/Foo.tsx"));
1414        assert!(gs.is_match("apps/web/src/index.ts"));
1415        assert!(!gs.is_match("/Users/dev/project/apps/api/src/index.ts"));
1416    }
1417
1418    // ── rule grouping tests ──
1419
1420    #[test]
1421    fn build_rules_groups_same_glob() {
1422        let rules = vec![
1423            TomlRule {
1424                id: "no-console".into(),
1425                rule_type: "banned-pattern".into(),
1426                pattern: Some("console\\.log".into()),
1427                message: "no console".into(),
1428                glob: Some("**/*.ts".into()),
1429                regex: true,
1430                ..Default::default()
1431            },
1432            TomlRule {
1433                id: "no-debugger".into(),
1434                rule_type: "banned-pattern".into(),
1435                pattern: Some("debugger".into()),
1436                message: "no debugger".into(),
1437                glob: Some("**/*.ts".into()),
1438                ..Default::default()
1439            },
1440        ];
1441
1442        let built = build_rules(&rules).unwrap();
1443        // Both rules share the same glob, so they should be in one group
1444        assert_eq!(built.rule_groups.len(), 1);
1445        assert_eq!(built.rule_groups[0].rules.len(), 2);
1446    }
1447
1448    #[test]
1449    fn build_rules_separates_different_globs() {
1450        let rules = vec![
1451            TomlRule {
1452                id: "no-console".into(),
1453                rule_type: "banned-pattern".into(),
1454                pattern: Some("console\\.log".into()),
1455                message: "no console".into(),
1456                glob: Some("**/*.ts".into()),
1457                regex: true,
1458                ..Default::default()
1459            },
1460            TomlRule {
1461                id: "no-debugger".into(),
1462                rule_type: "banned-pattern".into(),
1463                pattern: Some("debugger".into()),
1464                message: "no debugger".into(),
1465                glob: Some("**/*.tsx".into()),
1466                ..Default::default()
1467            },
1468        ];
1469
1470        let built = build_rules(&rules).unwrap();
1471        // Different globs -> separate groups
1472        assert_eq!(built.rule_groups.len(), 2);
1473        assert_eq!(built.rule_groups[0].rules.len(), 1);
1474        assert_eq!(built.rule_groups[1].rules.len(), 1);
1475    }
1476
1477    // ── run_scan integration tests ──
1478
1479    #[test]
1480    fn run_scan_with_banned_pattern() {
1481        let dir = tempfile::tempdir().unwrap();
1482
1483        // Write config
1484        let config = dir.path().join("baseline.toml");
1485        fs::write(
1486            &config,
1487            r#"
1488[baseline]
1489
1490[[rule]]
1491id = "no-console"
1492type = "banned-pattern"
1493severity = "error"
1494pattern = "console\\.log"
1495regex = true
1496message = "Do not use console.log"
1497"#,
1498        )
1499        .unwrap();
1500
1501        // Write a source file
1502        let src_dir = dir.path().join("src");
1503        fs::create_dir(&src_dir).unwrap();
1504        fs::write(src_dir.join("app.ts"), "console.log('hi');\nfoo();\n").unwrap();
1505
1506        let result = run_scan(&config, &[src_dir]).unwrap();
1507        assert_eq!(result.violations.len(), 1);
1508        assert_eq!(result.violations[0].rule_id, "no-console");
1509        assert_eq!(result.files_scanned, 1);
1510        assert_eq!(result.rules_loaded, 1);
1511    }
1512
1513    #[test]
1514    fn run_scan_no_violations() {
1515        let dir = tempfile::tempdir().unwrap();
1516
1517        let config = dir.path().join("baseline.toml");
1518        fs::write(
1519            &config,
1520            r#"
1521[baseline]
1522
1523[[rule]]
1524id = "no-console"
1525type = "banned-pattern"
1526severity = "error"
1527pattern = "console\\.log"
1528regex = true
1529message = "Do not use console.log"
1530glob = "**/*.ts"
1531"#,
1532        )
1533        .unwrap();
1534
1535        let src_dir = dir.path().join("src");
1536        fs::create_dir(&src_dir).unwrap();
1537        fs::write(src_dir.join("app.ts"), "doStuff();\n").unwrap();
1538
1539        let result = run_scan(&config, &[src_dir]).unwrap();
1540        assert!(result.violations.is_empty());
1541        assert_eq!(result.files_scanned, 1);
1542    }
1543
1544    #[test]
1545    fn run_scan_excludes_files() {
1546        let dir = tempfile::tempdir().unwrap();
1547
1548        let config = dir.path().join("baseline.toml");
1549        fs::write(
1550            &config,
1551            r#"
1552[baseline]
1553exclude = ["**/dist/**"]
1554
1555[[rule]]
1556id = "no-console"
1557type = "banned-pattern"
1558severity = "error"
1559pattern = "console\\.log"
1560regex = true
1561message = "no console"
1562"#,
1563        )
1564        .unwrap();
1565
1566        // File in dist should be excluded
1567        let dist_dir = dir.path().join("dist");
1568        fs::create_dir(&dist_dir).unwrap();
1569        fs::write(dist_dir.join("app.ts"), "console.log('hi');\n").unwrap();
1570
1571        let result = run_scan(&config, &[dir.path().to_path_buf()]).unwrap();
1572        // The dist file should be excluded
1573        for v in &result.violations {
1574            assert!(!v.file.to_string_lossy().contains("dist"));
1575        }
1576    }
1577
1578    #[test]
1579    fn run_scan_file_presence_rule() {
1580        let dir = tempfile::tempdir().unwrap();
1581
1582        let config = dir.path().join("baseline.toml");
1583        fs::write(
1584            &config,
1585            r#"
1586[baseline]
1587
1588[[rule]]
1589id = "has-readme"
1590type = "file-presence"
1591severity = "error"
1592required_files = ["README.md"]
1593message = "README.md is required"
1594"#,
1595        )
1596        .unwrap();
1597
1598        // No README.md in dir
1599        let result = run_scan(&config, &[dir.path().to_path_buf()]).unwrap();
1600        assert!(result.violations.iter().any(|v| v.rule_id == "has-readme"));
1601    }
1602
1603    #[test]
1604    fn run_scan_missing_config_errors() {
1605        let result = run_scan(
1606            Path::new("/nonexistent/baseline.toml"),
1607            &[PathBuf::from(".")],
1608        );
1609        assert!(result.is_err());
1610        assert!(matches!(result.err().unwrap(), ScanError::ConfigRead(_)));
1611    }
1612
1613    #[test]
1614    fn run_scan_invalid_config_errors() {
1615        let dir = tempfile::tempdir().unwrap();
1616        let config = dir.path().join("baseline.toml");
1617        fs::write(&config, "this is not valid toml [[[").unwrap();
1618
1619        let result = run_scan(&config, &[dir.path().to_path_buf()]);
1620        assert!(result.is_err());
1621        assert!(matches!(result.err().unwrap(), ScanError::ConfigParse(_)));
1622    }
1623
1624    #[test]
1625    fn run_scan_with_ratchet_rule() {
1626        let dir = tempfile::tempdir().unwrap();
1627
1628        let config = dir.path().join("baseline.toml");
1629        fs::write(
1630            &config,
1631            r#"
1632[baseline]
1633
1634[[rule]]
1635id = "legacy-api"
1636type = "ratchet"
1637severity = "warning"
1638pattern = "legacyCall"
1639max_count = 5
1640message = "legacy api usage"
1641"#,
1642        )
1643        .unwrap();
1644
1645        let src_dir = dir.path().join("src");
1646        fs::create_dir(&src_dir).unwrap();
1647        fs::write(src_dir.join("app.ts"), "legacyCall();\nlegacyCall();\n").unwrap();
1648
1649        let result = run_scan(&config, &[src_dir]).unwrap();
1650        // 2 matches, max 5 -> suppressed
1651        assert!(result.violations.is_empty());
1652        assert_eq!(result.ratchet_counts["legacy-api"], (2, 5));
1653    }
1654
1655    // ── run_scan_stdin tests ──
1656
1657    #[test]
1658    fn run_scan_stdin_finds_violations() {
1659        let dir = tempfile::tempdir().unwrap();
1660
1661        let config = dir.path().join("baseline.toml");
1662        fs::write(
1663            &config,
1664            r#"
1665[baseline]
1666
1667[[rule]]
1668id = "no-console"
1669type = "banned-pattern"
1670severity = "error"
1671pattern = "console\\.log"
1672regex = true
1673message = "no console.log"
1674"#,
1675        )
1676        .unwrap();
1677
1678        let result =
1679            run_scan_stdin(&config, "console.log('hello');\nfoo();\n", "test.ts").unwrap();
1680        assert_eq!(result.violations.len(), 1);
1681        assert_eq!(result.files_scanned, 1);
1682    }
1683
1684    #[test]
1685    fn run_scan_stdin_no_violations() {
1686        let dir = tempfile::tempdir().unwrap();
1687
1688        let config = dir.path().join("baseline.toml");
1689        fs::write(
1690            &config,
1691            r#"
1692[baseline]
1693
1694[[rule]]
1695id = "no-console"
1696type = "banned-pattern"
1697severity = "error"
1698pattern = "console\\.log"
1699regex = true
1700message = "no console.log"
1701glob = "**/*.ts"
1702"#,
1703        )
1704        .unwrap();
1705
1706        let result = run_scan_stdin(&config, "doStuff();\n", "app.ts").unwrap();
1707        assert!(result.violations.is_empty());
1708    }
1709
1710    #[test]
1711    fn run_scan_stdin_glob_filters_filename() {
1712        let dir = tempfile::tempdir().unwrap();
1713
1714        let config = dir.path().join("baseline.toml");
1715        fs::write(
1716            &config,
1717            r#"
1718[baseline]
1719
1720[[rule]]
1721id = "no-console"
1722type = "banned-pattern"
1723severity = "error"
1724pattern = "console\\.log"
1725regex = true
1726message = "no console.log"
1727glob = "**/*.tsx"
1728"#,
1729        )
1730        .unwrap();
1731
1732        // File doesn't match glob
1733        let result =
1734            run_scan_stdin(&config, "console.log('hello');\n", "app.rs").unwrap();
1735        assert!(result.violations.is_empty());
1736    }
1737
1738    // ── run_baseline tests ──
1739
1740    #[test]
1741    fn run_baseline_counts_ratchet_matches() {
1742        let dir = tempfile::tempdir().unwrap();
1743
1744        let config = dir.path().join("baseline.toml");
1745        fs::write(
1746            &config,
1747            r#"
1748[baseline]
1749
1750[[rule]]
1751id = "legacy-api"
1752type = "ratchet"
1753severity = "warning"
1754pattern = "legacyCall"
1755max_count = 100
1756message = "legacy usage"
1757"#,
1758        )
1759        .unwrap();
1760
1761        let src_dir = dir.path().join("src");
1762        fs::create_dir(&src_dir).unwrap();
1763        fs::write(
1764            src_dir.join("app.ts"),
1765            "legacyCall();\nlegacyCall();\nlegacyCall();\n",
1766        )
1767        .unwrap();
1768
1769        let result = run_baseline(&config, &[src_dir]).unwrap();
1770        assert_eq!(result.entries.len(), 1);
1771        assert_eq!(result.entries[0].rule_id, "legacy-api");
1772        assert_eq!(result.entries[0].count, 3);
1773        assert_eq!(result.files_scanned, 1);
1774    }
1775
1776    #[test]
1777    fn run_baseline_skips_non_ratchet_rules() {
1778        let dir = tempfile::tempdir().unwrap();
1779
1780        let config = dir.path().join("baseline.toml");
1781        fs::write(
1782            &config,
1783            r#"
1784[baseline]
1785
1786[[rule]]
1787id = "no-console"
1788type = "banned-pattern"
1789severity = "error"
1790pattern = "console\\.log"
1791regex = true
1792message = "no console"
1793
1794[[rule]]
1795id = "legacy-api"
1796type = "ratchet"
1797severity = "warning"
1798pattern = "legacyCall"
1799max_count = 100
1800message = "legacy usage"
1801"#,
1802        )
1803        .unwrap();
1804
1805        let src_dir = dir.path().join("src");
1806        fs::create_dir(&src_dir).unwrap();
1807        fs::write(src_dir.join("app.ts"), "console.log('hi');\nlegacyCall();\n").unwrap();
1808
1809        let result = run_baseline(&config, &[src_dir]).unwrap();
1810        // Only ratchet rules appear in baseline
1811        assert_eq!(result.entries.len(), 1);
1812        assert_eq!(result.entries[0].rule_id, "legacy-api");
1813    }
1814
1815    // ── collect_files tests ──
1816
1817    #[test]
1818    fn collect_files_single_file() {
1819        let dir = tempfile::tempdir().unwrap();
1820        let file = dir.path().join("test.ts");
1821        fs::write(&file, "content").unwrap();
1822
1823        let empty_glob = build_glob_set(&[]).unwrap();
1824        let files = collect_files(&[file.clone()], &empty_glob);
1825        assert_eq!(files.len(), 1);
1826        assert_eq!(files[0], file);
1827    }
1828
1829    #[test]
1830    fn collect_files_directory_walk() {
1831        let dir = tempfile::tempdir().unwrap();
1832        let sub = dir.path().join("sub");
1833        fs::create_dir(&sub).unwrap();
1834        fs::write(sub.join("a.ts"), "a").unwrap();
1835        fs::write(sub.join("b.ts"), "b").unwrap();
1836
1837        let empty_glob = build_glob_set(&[]).unwrap();
1838        let files = collect_files(&[dir.path().to_path_buf()], &empty_glob);
1839        assert_eq!(files.len(), 2);
1840    }
1841
1842    #[test]
1843    fn collect_files_excludes_patterns() {
1844        let dir = tempfile::tempdir().unwrap();
1845        fs::write(dir.path().join("keep.ts"), "keep").unwrap();
1846        fs::write(dir.path().join("skip.log"), "skip").unwrap();
1847
1848        let exclude = build_glob_set(&["*.log".into()]).unwrap();
1849        let files = collect_files(&[dir.path().to_path_buf()], &exclude);
1850        assert!(files.iter().all(|f| !f.to_string_lossy().ends_with(".log")));
1851        assert!(files.iter().any(|f| f.to_string_lossy().ends_with(".ts")));
1852    }
1853
1854    // ── run_scan with presets ──
1855
1856    #[test]
1857    fn run_scan_with_preset() {
1858        let dir = tempfile::tempdir().unwrap();
1859
1860        let config = dir.path().join("baseline.toml");
1861        fs::write(
1862            &config,
1863            r#"
1864[baseline]
1865extends = ["shadcn-strict"]
1866"#,
1867        )
1868        .unwrap();
1869
1870        let src_dir = dir.path().join("src");
1871        fs::create_dir(&src_dir).unwrap();
1872        fs::write(src_dir.join("app.tsx"), "export default function App() { return <div>hi</div>; }\n").unwrap();
1873
1874        let result = run_scan(&config, &[src_dir]).unwrap();
1875        // Just verify it doesn't error
1876        assert!(result.rules_loaded > 0);
1877    }
1878
1879    // ── run_scan with plugins ──
1880
1881    #[test]
1882    fn run_scan_with_plugin() {
1883        let dir = tempfile::tempdir().unwrap();
1884
1885        let plugin_path = dir.path().join("custom-rules.toml");
1886        fs::write(
1887            &plugin_path,
1888            r#"
1889[[rule]]
1890id = "no-todo"
1891type = "banned-pattern"
1892severity = "warning"
1893pattern = "TODO"
1894message = "No TODOs allowed"
1895"#,
1896        )
1897        .unwrap();
1898
1899        let config = dir.path().join("baseline.toml");
1900        fs::write(
1901            &config,
1902            format!(
1903                r#"
1904[baseline]
1905plugins = ["{}"]
1906"#,
1907                plugin_path.display()
1908            ),
1909        )
1910        .unwrap();
1911
1912        let src_dir = dir.path().join("src");
1913        fs::create_dir(&src_dir).unwrap();
1914        fs::write(src_dir.join("app.ts"), "// TODO: fix this\n").unwrap();
1915
1916        let result = run_scan(&config, &[src_dir]).unwrap();
1917        assert!(result.violations.iter().any(|v| v.rule_id == "no-todo"));
1918    }
1919
1920    #[test]
1921    fn run_scan_skip_no_matching_files() {
1922        let dir = tempfile::tempdir().unwrap();
1923
1924        let config = dir.path().join("baseline.toml");
1925        fs::write(
1926            &config,
1927            r#"
1928[baseline]
1929
1930[[rule]]
1931id = "no-console"
1932type = "banned-pattern"
1933severity = "error"
1934pattern = "console\\.log"
1935regex = true
1936message = "no console"
1937glob = "**/*.tsx"
1938"#,
1939        )
1940        .unwrap();
1941
1942        let src_dir = dir.path().join("src");
1943        fs::create_dir(&src_dir).unwrap();
1944        // Write a .rs file that won't match the *.tsx glob
1945        fs::write(src_dir.join("app.rs"), "console.log('hello');\n").unwrap();
1946
1947        let result = run_scan(&config, &[src_dir]).unwrap();
1948        assert!(result.violations.is_empty());
1949        // The file shouldn't even be read since no rule matches
1950        assert_eq!(result.files_scanned, 0);
1951    }
1952}