Skip to main content

code_baseline/
scan.rs

1use crate::cli::toml_config::{TomlConfig, TomlRule};
2use crate::git_diff;
3use crate::presets::{self, PresetError};
4use crate::rules::factory::{self, FactoryError};
5use crate::rules::file_presence::FilePresenceRule;
6use crate::rules::{Rule, ScanContext, Violation};
7use globset::{Glob, GlobSet, GlobSetBuilder};
8use ignore::WalkBuilder;
9use rayon::prelude::*;
10use serde::Serialize;
11use std::collections::HashMap;
12use std::fmt;
13use std::fs;
14use std::path::{Path, PathBuf};
15use std::sync::atomic::{AtomicUsize, Ordering};
16use std::sync::Mutex;
17
18/// A plugin config file containing additional rules.
19#[derive(Debug, serde::Deserialize)]
20struct PluginConfig {
21    #[serde(default)]
22    rule: Vec<crate::cli::toml_config::TomlRule>,
23}
24
25#[derive(Debug)]
26pub enum ScanError {
27    ConfigRead(std::io::Error),
28    ConfigParse(toml::de::Error),
29    GlobParse(globset::Error),
30    RuleFactory(FactoryError),
31    Preset(PresetError),
32    GitDiff(String),
33}
34
35impl fmt::Display for ScanError {
36    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
37        match self {
38            ScanError::ConfigRead(e) => write!(f, "failed to read config: {}", e),
39            ScanError::ConfigParse(e) => write!(f, "failed to parse config: {}", e),
40            ScanError::GlobParse(e) => write!(f, "invalid glob pattern: {}", e),
41            ScanError::RuleFactory(e) => write!(f, "failed to build rule: {}", e),
42            ScanError::Preset(e) => write!(f, "preset error: {}", e),
43            ScanError::GitDiff(e) => write!(f, "git diff failed: {}", e),
44        }
45    }
46}
47
48impl std::error::Error for ScanError {}
49
50pub struct ScanResult {
51    pub violations: Vec<Violation>,
52    pub files_scanned: usize,
53    pub rules_loaded: usize,
54    /// For each ratchet rule: (found_count, max_count).
55    pub ratchet_counts: HashMap<String, (usize, usize)>,
56    /// Number of changed files when using --changed-only.
57    pub changed_files_count: Option<usize>,
58    /// Base ref used for diff when using --changed-only.
59    pub base_ref: Option<String>,
60}
61
62#[derive(Debug, Serialize, serde::Deserialize)]
63pub struct BaselineEntry {
64    pub rule_id: String,
65    pub pattern: String,
66    pub count: usize,
67}
68
69#[derive(Debug, Serialize, serde::Deserialize)]
70pub struct BaselineResult {
71    pub entries: Vec<BaselineEntry>,
72    pub files_scanned: usize,
73}
74
75/// A group of rules that share the same glob patterns.
76/// Glob matching is done once per group, amortizing the cost when
77/// multiple rules use the same inclusion/exclusion patterns.
78struct RuleGroup {
79    inclusion_glob: Option<GlobSet>,
80    exclusion_glob: Option<GlobSet>,
81    rules: Vec<RuleWithConditioning>,
82}
83
84/// A single rule with its conditioning data and pre-computed suppression strings.
85struct RuleWithConditioning {
86    rule: Box<dyn Rule>,
87    file_contains: Option<String>,
88    file_not_contains: Option<String>,
89    /// Pre-computed `"baseline:allow-{rule_id}"` string.
90    allow_marker: String,
91    /// Pre-computed `"baseline:allow-next-line {rule_id}"` string.
92    allow_next_line: String,
93}
94
95/// Result of building rules from config.
96struct BuiltRules {
97    rule_groups: Vec<RuleGroup>,
98    ratchet_thresholds: HashMap<String, usize>,
99    file_presence_rules: Vec<FilePresenceRule>,
100}
101
102/// Build rules from resolved TOML rules. Shared by run_scan and run_scan_stdin.
103fn build_rules(resolved_rules: &[TomlRule]) -> Result<BuiltRules, ScanError> {
104    let mut ratchet_thresholds: HashMap<String, usize> = HashMap::new();
105    let mut file_presence_rules: Vec<FilePresenceRule> = Vec::new();
106
107    // Intermediate representation before grouping
108    struct IntermediateRule {
109        rule: Box<dyn Rule>,
110        inclusion_pattern: Option<String>,
111        exclusion_patterns: Vec<String>,
112        file_contains: Option<String>,
113        file_not_contains: Option<String>,
114    }
115
116    let mut intermediates: Vec<IntermediateRule> = Vec::new();
117
118    for toml_rule in resolved_rules {
119        let rule_config = toml_rule.to_rule_config();
120
121        // File-presence rules are handled separately (they check existence, not content)
122        if toml_rule.rule_type == "file-presence" {
123            if let Ok(fp_rule) = FilePresenceRule::new(&rule_config) {
124                file_presence_rules.push(fp_rule);
125            }
126            continue;
127        }
128
129        let rule = factory::build_rule(&toml_rule.rule_type, &rule_config)
130            .map_err(ScanError::RuleFactory)?;
131
132        if toml_rule.rule_type == "ratchet" {
133            if let Some(max) = toml_rule.max_count {
134                ratchet_thresholds.insert(rule.id().to_string(), max);
135            }
136        }
137
138        let inclusion_pattern = rule.file_glob().map(|s| s.to_string());
139        let exclusion_patterns = toml_rule.exclude_glob.clone();
140
141        intermediates.push(IntermediateRule {
142            rule,
143            inclusion_pattern,
144            exclusion_patterns,
145            file_contains: toml_rule.file_contains.clone(),
146            file_not_contains: toml_rule.file_not_contains.clone(),
147        });
148    }
149
150    // Group rules by (inclusion_pattern, exclusion_patterns) to avoid redundant glob matching.
151    let mut groups: Vec<((Option<String>, Vec<String>), Vec<IntermediateRule>)> = Vec::new();
152
153    for ir in intermediates {
154        let key = (ir.inclusion_pattern.clone(), ir.exclusion_patterns.clone());
155        if let Some(group) = groups.iter_mut().find(|(k, _)| *k == key) {
156            group.1.push(ir);
157        } else {
158            groups.push((key, vec![ir]));
159        }
160    }
161
162    // Build RuleGroups with compiled GlobSets (once per unique pattern)
163    let mut rule_groups: Vec<RuleGroup> = Vec::new();
164    for ((inc_pattern, exc_patterns), intermediates) in groups {
165        let inclusion_glob = if let Some(ref pattern) = inc_pattern {
166            Some(build_glob_set_from_pattern(pattern)?)
167        } else {
168            None
169        };
170
171        let exclusion_glob = if !exc_patterns.is_empty() {
172            Some(build_glob_set(&exc_patterns)?)
173        } else {
174            None
175        };
176
177        let rules = intermediates
178            .into_iter()
179            .map(|ir| {
180                let id = ir.rule.id().to_string();
181                RuleWithConditioning {
182                    rule: ir.rule,
183                    file_contains: ir.file_contains,
184                    file_not_contains: ir.file_not_contains,
185                    allow_marker: format!("baseline:allow-{}", id),
186                    allow_next_line: format!("baseline:allow-next-line {}", id),
187                }
188            })
189            .collect();
190
191        rule_groups.push(RuleGroup {
192            inclusion_glob,
193            exclusion_glob,
194            rules,
195        });
196    }
197
198    Ok(BuiltRules {
199        rule_groups,
200        ratchet_thresholds,
201        file_presence_rules,
202    })
203}
204
205/// Check if a rule group matches a file path (inclusion + exclusion globs).
206fn group_matches_file(group: &RuleGroup, file_str: &str, file_name: &str) -> bool {
207    let included = match &group.inclusion_glob {
208        Some(gs) => gs.is_match(file_str) || gs.is_match(file_name),
209        None => true,
210    };
211    if !included {
212        return false;
213    }
214    if let Some(ref exc) = group.exclusion_glob {
215        if exc.is_match(file_str) || exc.is_match(file_name) {
216            return false;
217        }
218    }
219    true
220}
221
222/// Check file-context conditioning (file_contains / file_not_contains) with caching.
223fn passes_file_conditioning_cached<'a>(
224    rule: &'a RuleWithConditioning,
225    content: &str,
226    cache: &mut HashMap<&'a str, bool>,
227) -> bool {
228    if let Some(ref needle) = rule.file_contains {
229        let &mut result = cache
230            .entry(needle.as_str())
231            .or_insert_with(|| content.contains(needle.as_str()));
232        if !result {
233            return false;
234        }
235    }
236    if let Some(ref needle) = rule.file_not_contains {
237        let &mut result = cache
238            .entry(needle.as_str())
239            .or_insert_with(|| content.contains(needle.as_str()));
240        if result {
241            return false;
242        }
243    }
244    true
245}
246
247/// Run rules against content and collect violations, filtering escape-hatch comments.
248fn run_rules_on_content(
249    rule_groups: &[RuleGroup],
250    file_path: &Path,
251    content: &str,
252    file_str: &str,
253    file_name: &str,
254) -> Vec<Violation> {
255    let mut violations = Vec::new();
256    let content_lines: Vec<&str> = content.lines().collect();
257    let ctx = ScanContext {
258        file_path,
259        content,
260    };
261    let mut conditioning_cache: HashMap<&str, bool> = HashMap::new();
262
263    for group in rule_groups {
264        if !group_matches_file(group, file_str, file_name) {
265            continue;
266        }
267
268        for rule_cond in &group.rules {
269            if !passes_file_conditioning_cached(rule_cond, content, &mut conditioning_cache) {
270                continue;
271            }
272
273            let file_violations = rule_cond.rule.check_file(&ctx);
274            for v in file_violations {
275                if let Some(line_num) = v.line {
276                    if is_suppressed(
277                        &content_lines,
278                        line_num,
279                        &rule_cond.allow_marker,
280                        &rule_cond.allow_next_line,
281                    ) {
282                        continue;
283                    }
284                }
285                violations.push(v);
286            }
287        }
288    }
289
290    violations
291}
292
293/// Run a full scan: parse config, build rules, walk files, collect violations.
294pub fn run_scan(config_path: &Path, target_paths: &[PathBuf]) -> Result<ScanResult, ScanError> {
295    // 1. Read and parse TOML config
296    let config_text = fs::read_to_string(config_path).map_err(ScanError::ConfigRead)?;
297    let toml_config: TomlConfig = toml::from_str(&config_text).map_err(ScanError::ConfigParse)?;
298
299    // 2. Load plugin rules from external TOML files
300    let mut plugin_rules: Vec<crate::cli::toml_config::TomlRule> = Vec::new();
301    for plugin_path in &toml_config.baseline.plugins {
302        let plugin_text = fs::read_to_string(plugin_path).map_err(ScanError::ConfigRead)?;
303        let plugin_config: PluginConfig =
304            toml::from_str(&plugin_text).map_err(ScanError::ConfigParse)?;
305        plugin_rules.extend(plugin_config.rule);
306    }
307
308    // 3. Resolve presets and merge with user-defined rules + plugin rules
309    let mut all_user_rules = toml_config.rule.clone();
310    all_user_rules.extend(plugin_rules);
311
312    let mut resolved_rules = presets::resolve_rules(
313        &toml_config.baseline.extends,
314        &all_user_rules,
315    )
316    .map_err(ScanError::Preset)?;
317
318    // 3b. Resolve scoped presets and append
319    let scoped_rules = presets::resolve_scoped_rules(
320        &toml_config.baseline.scoped,
321        &all_user_rules,
322    )
323    .map_err(ScanError::Preset)?;
324    resolved_rules.extend(scoped_rules);
325
326    // 4. Build exclude glob set
327    let exclude_set = build_glob_set(&toml_config.baseline.exclude)?;
328
329    // 5. Build rules via factory
330    let built = build_rules(&resolved_rules)?;
331    let rules_loaded: usize = built.rule_groups.iter().map(|g| g.rules.len()).sum();
332
333    // 6. Walk target paths and collect files
334    let files = collect_files(target_paths, &exclude_set);
335
336    // 7. Run rules on each file (parallel)
337    let files_scanned = AtomicUsize::new(0);
338
339    let mut violations: Vec<Violation> = files
340        .par_iter()
341        .filter_map(|file_path| {
342            let file_str = file_path.to_string_lossy();
343            let file_name = file_path.file_name().unwrap_or_default().to_string_lossy();
344
345            // Pre-check: does ANY rule group match this file? If not, skip the read entirely.
346            let any_match = built
347                .rule_groups
348                .iter()
349                .any(|g| group_matches_file(g, &file_str, &file_name));
350            if !any_match {
351                return None;
352            }
353
354            let content = fs::read_to_string(file_path).ok()?;
355
356            files_scanned.fetch_add(1, Ordering::Relaxed);
357            let file_violations = run_rules_on_content(
358                &built.rule_groups,
359                file_path,
360                &content,
361                &file_str,
362                &file_name,
363            );
364            if file_violations.is_empty() {
365                None
366            } else {
367                Some(file_violations)
368            }
369        })
370        .flatten()
371        .collect();
372
373    // 8. Run file-presence checks
374    for fp_rule in &built.file_presence_rules {
375        let mut fp_violations = fp_rule.check_paths(target_paths);
376        violations.append(&mut fp_violations);
377    }
378
379    // 9. Apply ratchet thresholds
380    let ratchet_counts = apply_ratchet_thresholds(&mut violations, &built.ratchet_thresholds);
381
382    Ok(ScanResult {
383        violations,
384        files_scanned: files_scanned.load(Ordering::Relaxed),
385        rules_loaded,
386        ratchet_counts,
387        changed_files_count: None,
388        base_ref: None,
389    })
390}
391
392/// Suppress ratchet violations that are within budget. Returns counts for display.
393fn apply_ratchet_thresholds(
394    violations: &mut Vec<Violation>,
395    thresholds: &HashMap<String, usize>,
396) -> HashMap<String, (usize, usize)> {
397    if thresholds.is_empty() {
398        return HashMap::new();
399    }
400
401    // Count violations per ratchet rule
402    let mut counts: HashMap<String, usize> = HashMap::new();
403    for v in violations.iter() {
404        if thresholds.contains_key(&v.rule_id) {
405            *counts.entry(v.rule_id.clone()).or_insert(0) += 1;
406        }
407    }
408
409    // Build result map and determine which rules to suppress
410    let mut result: HashMap<String, (usize, usize)> = HashMap::new();
411    let mut suppress: std::collections::HashSet<String> = std::collections::HashSet::new();
412
413    for (rule_id, &max) in thresholds {
414        let found = counts.get(rule_id).copied().unwrap_or(0);
415        result.insert(rule_id.clone(), (found, max));
416        if found <= max {
417            suppress.insert(rule_id.clone());
418        }
419    }
420
421    // Remove suppressed violations
422    if !suppress.is_empty() {
423        violations.retain(|v| !suppress.contains(&v.rule_id));
424    }
425
426    result
427}
428
429/// Run a scan on stdin content with a virtual filename.
430pub fn run_scan_stdin(
431    config_path: &Path,
432    content: &str,
433    filename: &str,
434) -> Result<ScanResult, ScanError> {
435    let config_text = fs::read_to_string(config_path).map_err(ScanError::ConfigRead)?;
436    let toml_config: TomlConfig = toml::from_str(&config_text).map_err(ScanError::ConfigParse)?;
437
438    let mut resolved_rules = presets::resolve_rules(
439        &toml_config.baseline.extends,
440        &toml_config.rule,
441    )
442    .map_err(ScanError::Preset)?;
443
444    let scoped_rules = presets::resolve_scoped_rules(
445        &toml_config.baseline.scoped,
446        &toml_config.rule,
447    )
448    .map_err(ScanError::Preset)?;
449    resolved_rules.extend(scoped_rules);
450
451    let built = build_rules(&resolved_rules)?;
452    let rules_loaded: usize = built.rule_groups.iter().map(|g| g.rules.len()).sum();
453
454    let file_path = PathBuf::from(filename);
455    let file_str = file_path.to_string_lossy();
456    let file_name = file_path.file_name().unwrap_or_default().to_string_lossy();
457
458    let violations =
459        run_rules_on_content(&built.rule_groups, &file_path, content, &file_str, &file_name);
460
461    let mut violations = violations;
462    let ratchet_counts = apply_ratchet_thresholds(&mut violations, &built.ratchet_thresholds);
463
464    Ok(ScanResult {
465        violations,
466        files_scanned: 1,
467        rules_loaded,
468        ratchet_counts,
469        changed_files_count: None,
470        base_ref: None,
471    })
472}
473
474/// Run a scan filtered to only files/lines changed relative to a base branch.
475pub fn run_scan_changed(
476    config_path: &Path,
477    target_paths: &[PathBuf],
478    base_ref: &str,
479) -> Result<ScanResult, ScanError> {
480    // Get diff info from git
481    let diff = git_diff::diff_info(base_ref).map_err(|e| ScanError::GitDiff(e.to_string()))?;
482    let repo_root = git_diff::repo_root().map_err(|e| ScanError::GitDiff(e.to_string()))?;
483
484    let changed_files_count = diff.changed_lines.len();
485
486    // Run normal scan
487    let mut result = run_scan(config_path, target_paths)?;
488
489    // Post-filter violations to only those in changed files/lines
490    result.violations.retain(|v| {
491        // Compute relative path from repo root for matching against diff
492        let rel_path = if v.file.is_absolute() {
493            v.file.strip_prefix(&repo_root).unwrap_or(&v.file).to_path_buf()
494        } else {
495            v.file.clone()
496        };
497
498        if !diff.has_file(&rel_path) {
499            return false;
500        }
501
502        // File-level violations (no line number) pass if file is changed
503        match v.line {
504            Some(line) => diff.has_line(&rel_path, line),
505            None => true,
506        }
507    });
508
509    result.changed_files_count = Some(changed_files_count);
510    result.base_ref = Some(base_ref.to_string());
511
512    Ok(result)
513}
514
515/// Run baseline counting: parse config, build only ratchet rules, count matches.
516pub fn run_baseline(
517    config_path: &Path,
518    target_paths: &[PathBuf],
519) -> Result<BaselineResult, ScanError> {
520    let config_text = fs::read_to_string(config_path).map_err(ScanError::ConfigRead)?;
521    let toml_config: TomlConfig = toml::from_str(&config_text).map_err(ScanError::ConfigParse)?;
522
523    // Resolve presets and merge with user-defined rules
524    let mut resolved_rules = presets::resolve_rules(
525        &toml_config.baseline.extends,
526        &toml_config.rule,
527    )
528    .map_err(ScanError::Preset)?;
529
530    let scoped_rules = presets::resolve_scoped_rules(
531        &toml_config.baseline.scoped,
532        &toml_config.rule,
533    )
534    .map_err(ScanError::Preset)?;
535    resolved_rules.extend(scoped_rules);
536
537    let exclude_set = build_glob_set(&toml_config.baseline.exclude)?;
538
539    // Build only ratchet rules
540    let mut rules: Vec<(Box<dyn Rule>, Option<GlobSet>, String)> = Vec::new();
541    for toml_rule in &resolved_rules {
542        if toml_rule.rule_type != "ratchet" {
543            continue;
544        }
545        let rule_config = toml_rule.to_rule_config();
546        let rule = factory::build_rule(&toml_rule.rule_type, &rule_config)
547            .map_err(ScanError::RuleFactory)?;
548
549        let pattern = toml_rule.pattern.clone().unwrap_or_default();
550
551        let rule_glob = if let Some(ref pat) = rule.file_glob() {
552            Some(build_glob_set_from_pattern(pat)?)
553        } else {
554            None
555        };
556
557        rules.push((rule, rule_glob, pattern));
558    }
559
560    let files = collect_files(target_paths, &exclude_set);
561
562    let files_scanned = AtomicUsize::new(0);
563
564    let counts: HashMap<String, usize> = files
565        .par_iter()
566        .filter_map(|file_path| {
567            let content = fs::read_to_string(file_path).ok()?;
568
569            files_scanned.fetch_add(1, Ordering::Relaxed);
570            let ctx = ScanContext {
571                file_path,
572                content: &content,
573            };
574
575            let mut local_counts: HashMap<String, usize> = HashMap::new();
576            for (rule, rule_glob, _) in &rules {
577                if let Some(ref gs) = rule_glob {
578                    let file_str = file_path.to_string_lossy();
579                    let file_name = file_path.file_name().unwrap_or_default().to_string_lossy();
580                    if !gs.is_match(&*file_str) && !gs.is_match(&*file_name) {
581                        continue;
582                    }
583                }
584
585                let violations = rule.check_file(&ctx);
586                if !violations.is_empty() {
587                    *local_counts.entry(rule.id().to_string()).or_insert(0) += violations.len();
588                }
589            }
590
591            if local_counts.is_empty() {
592                None
593            } else {
594                Some(local_counts)
595            }
596        })
597        .reduce(
598            || HashMap::new(),
599            |mut acc, local| {
600                for (k, v) in local {
601                    *acc.entry(k).or_insert(0) += v;
602                }
603                acc
604            },
605        );
606
607    let entries: Vec<BaselineEntry> = rules
608        .iter()
609        .map(|(rule, _, pattern)| BaselineEntry {
610            rule_id: rule.id().to_string(),
611            pattern: pattern.clone(),
612            count: counts.get(rule.id()).copied().unwrap_or(0),
613        })
614        .collect();
615
616    Ok(BaselineResult {
617        entries,
618        files_scanned: files_scanned.load(Ordering::Relaxed),
619    })
620}
621
622/// Check if a violation is suppressed by an escape-hatch comment.
623/// Uses pre-computed marker strings to avoid per-call allocations.
624fn is_suppressed(lines: &[&str], line_num: usize, allow_marker: &str, allow_next_line: &str) -> bool {
625    let allow_all = "baseline:allow-all";
626
627    // Check current line (1-indexed)
628    if line_num > 0 && line_num <= lines.len() {
629        let line = lines[line_num - 1];
630        if line.contains(allow_marker) || line.contains(allow_all) {
631            return true;
632        }
633    }
634
635    // Check previous line (next-line style: `// baseline:allow-next-line`)
636    if line_num >= 2 && line_num <= lines.len() {
637        let prev = lines[line_num - 2];
638        if prev.contains(allow_next_line)
639            || prev.contains("baseline:allow-next-line all")
640        {
641            return true;
642        }
643    }
644
645    false
646}
647
648pub(crate) fn collect_files(target_paths: &[PathBuf], exclude_set: &GlobSet) -> Vec<PathBuf> {
649    let mut files: Vec<PathBuf> = Vec::new();
650    for target in target_paths {
651        if target.is_file() {
652            files.push(target.clone());
653        } else {
654            // Use the `ignore` crate's parallel walker for multi-threaded directory traversal.
655            let walker = WalkBuilder::new(target)
656                .hidden(true) // skip hidden files/dirs like .git
657                .git_ignore(true) // respect .gitignore
658                .git_global(true) // respect global gitignore
659                .git_exclude(true) // respect .git/info/exclude
660                .build_parallel();
661
662            let collected: Mutex<Vec<PathBuf>> = Mutex::new(Vec::new());
663
664            walker.run(|| {
665                Box::new(|entry| {
666                    if let Ok(entry) = entry {
667                        if entry.file_type().map_or(false, |ft| ft.is_file()) {
668                            let path = entry.into_path();
669                            let rel = path.strip_prefix(target).unwrap_or(&path);
670                            if !exclude_set.is_match(rel.to_string_lossy().as_ref()) {
671                                collected.lock().unwrap().push(path);
672                            }
673                        }
674                    }
675                    ignore::WalkState::Continue
676                })
677            });
678
679            files.extend(collected.into_inner().unwrap());
680        }
681    }
682    files
683}
684
685/// Normalize a glob pattern:
686/// 1. Expand brace syntax (`{a,b}`) into multiple patterns (globset doesn't support it).
687/// 2. Auto-prefix path-based globs with `**/` so they match against absolute paths.
688///    e.g. `apps/web/src/**/*.tsx` → `**/apps/web/src/**/*.tsx`
689fn expand_glob(pattern: &str) -> Vec<String> {
690    // First, expand brace syntax
691    if let Some(open) = pattern.find('{') {
692        if let Some(close) = pattern[open..].find('}') {
693            let close = open + close;
694            let prefix = &pattern[..open];
695            let suffix = &pattern[close + 1..];
696            let alternatives = &pattern[open + 1..close];
697
698            // Only expand if there are commas (otherwise it's a glob char class)
699            if alternatives.contains(',') {
700                let mut result = Vec::new();
701                for alt in alternatives.split(',') {
702                    let expanded = format!("{prefix}{alt}{suffix}");
703                    // Recursively expand in case there are nested braces
704                    result.extend(expand_glob(&expanded));
705                }
706                return result;
707            }
708        }
709    }
710
711    // Auto-prefix path-based globs that don't already start with ** or /
712    // e.g. "apps/web/src/**/*.tsx" → "**/apps/web/src/**/*.tsx"
713    // This ensures they match against absolute file paths.
714    let normalized = if pattern.contains('/')
715        && !pattern.starts_with("**/")
716        && !pattern.starts_with('/')
717    {
718        format!("**/{pattern}")
719    } else {
720        pattern.to_string()
721    };
722
723    vec![normalized]
724}
725
726/// Build a GlobSet from a single pattern string, expanding brace syntax.
727pub(crate) fn build_glob_set_from_pattern(pattern: &str) -> Result<GlobSet, ScanError> {
728    let expanded = expand_glob(pattern);
729    let mut builder = GlobSetBuilder::new();
730    for pat in &expanded {
731        builder.add(Glob::new(pat).map_err(ScanError::GlobParse)?);
732    }
733    builder.build().map_err(ScanError::GlobParse)
734}
735
736pub(crate) fn build_glob_set(patterns: &[String]) -> Result<GlobSet, ScanError> {
737    let mut builder = GlobSetBuilder::new();
738    for pattern in patterns {
739        for pat in &expand_glob(pattern) {
740            builder.add(Glob::new(pat).map_err(ScanError::GlobParse)?);
741        }
742    }
743    builder.build().map_err(ScanError::GlobParse)
744}
745
746#[cfg(test)]
747mod tests {
748    use super::*;
749    use crate::config::Severity;
750
751    fn make_violation(rule_id: &str) -> Violation {
752        Violation {
753            rule_id: rule_id.to_string(),
754            severity: Severity::Error,
755            file: PathBuf::from("test.ts"),
756            line: Some(1),
757            column: Some(1),
758            message: "test".to_string(),
759            suggest: None,
760            source_line: None,
761            fix: None,
762        }
763    }
764
765    /// Count total rules across all groups.
766    fn total_rules(groups: &[RuleGroup]) -> usize {
767        groups.iter().map(|g| g.rules.len()).sum()
768    }
769
770    #[test]
771    fn ratchet_under_budget_suppresses() {
772        let mut violations = vec![
773            make_violation("ratchet-legacy"),
774            make_violation("ratchet-legacy"),
775            make_violation("other-rule"),
776        ];
777        let mut thresholds = HashMap::new();
778        thresholds.insert("ratchet-legacy".to_string(), 5);
779
780        let counts = apply_ratchet_thresholds(&mut violations, &thresholds);
781
782        assert_eq!(violations.len(), 1); // only "other-rule" remains
783        assert_eq!(violations[0].rule_id, "other-rule");
784        assert_eq!(counts["ratchet-legacy"], (2, 5));
785    }
786
787    #[test]
788    fn ratchet_over_budget_keeps_all() {
789        let mut violations = vec![
790            make_violation("ratchet-legacy"),
791            make_violation("ratchet-legacy"),
792            make_violation("ratchet-legacy"),
793            make_violation("other-rule"),
794        ];
795        let mut thresholds = HashMap::new();
796        thresholds.insert("ratchet-legacy".to_string(), 2);
797
798        let counts = apply_ratchet_thresholds(&mut violations, &thresholds);
799
800        assert_eq!(violations.len(), 4); // all kept
801        assert_eq!(counts["ratchet-legacy"], (3, 2));
802    }
803
804    #[test]
805    fn ratchet_exactly_at_budget_suppresses() {
806        let mut violations = vec![
807            make_violation("ratchet-legacy"),
808            make_violation("ratchet-legacy"),
809        ];
810        let mut thresholds = HashMap::new();
811        thresholds.insert("ratchet-legacy".to_string(), 2);
812
813        let counts = apply_ratchet_thresholds(&mut violations, &thresholds);
814
815        assert_eq!(violations.len(), 0); // suppressed (at budget)
816        assert_eq!(counts["ratchet-legacy"], (2, 2));
817    }
818
819    #[test]
820    fn no_ratchet_rules_is_noop() {
821        let mut violations = vec![make_violation("other-rule")];
822        let thresholds = HashMap::new();
823
824        let counts = apply_ratchet_thresholds(&mut violations, &thresholds);
825
826        assert_eq!(violations.len(), 1);
827        assert!(counts.is_empty());
828    }
829
830    #[test]
831    fn ratchet_zero_with_matches_keeps_all() {
832        let mut violations = vec![make_violation("ratchet-zero")];
833        let mut thresholds = HashMap::new();
834        thresholds.insert("ratchet-zero".to_string(), 0);
835
836        let counts = apply_ratchet_thresholds(&mut violations, &thresholds);
837
838        assert_eq!(violations.len(), 1);
839        assert_eq!(counts["ratchet-zero"], (1, 0));
840    }
841
842    #[test]
843    fn ratchet_zero_no_matches_suppresses() {
844        let mut violations: Vec<Violation> = vec![];
845        let mut thresholds = HashMap::new();
846        thresholds.insert("ratchet-zero".to_string(), 0);
847
848        let counts = apply_ratchet_thresholds(&mut violations, &thresholds);
849
850        assert!(violations.is_empty());
851        assert_eq!(counts["ratchet-zero"], (0, 0));
852    }
853
854    // ── is_suppressed tests ──
855
856    #[test]
857    fn suppressed_by_same_line_allow() {
858        let lines = vec![
859            "let x = style={{ color: 'red' }}; // baseline:allow-no-inline-styles",
860        ];
861        assert!(is_suppressed(
862            &lines,
863            1,
864            "baseline:allow-no-inline-styles",
865            "baseline:allow-next-line no-inline-styles",
866        ));
867    }
868
869    #[test]
870    fn suppressed_by_allow_all() {
871        let lines = vec![
872            "let x = style={{ color: 'red' }}; // baseline:allow-all",
873        ];
874        assert!(is_suppressed(
875            &lines,
876            1,
877            "baseline:allow-no-inline-styles",
878            "baseline:allow-next-line no-inline-styles",
879        ));
880        assert!(is_suppressed(
881            &lines,
882            1,
883            "baseline:allow-any-other-rule",
884            "baseline:allow-next-line any-other-rule",
885        ));
886    }
887
888    #[test]
889    fn suppressed_by_allow_next_line() {
890        let lines = vec![
891            "// baseline:allow-next-line no-inline-styles",
892            "let x = style={{ color: 'red' }};",
893        ];
894        assert!(is_suppressed(
895            &lines,
896            2,
897            "baseline:allow-no-inline-styles",
898            "baseline:allow-next-line no-inline-styles",
899        ));
900    }
901
902    #[test]
903    fn suppressed_by_allow_next_line_all() {
904        let lines = vec![
905            "// baseline:allow-next-line all",
906            "let x = style={{ color: 'red' }};",
907        ];
908        assert!(is_suppressed(
909            &lines,
910            2,
911            "baseline:allow-no-inline-styles",
912            "baseline:allow-next-line no-inline-styles",
913        ));
914    }
915
916    #[test]
917    fn not_suppressed_wrong_rule_id() {
918        let lines = vec![
919            "let x = style={{ color: 'red' }}; // baseline:allow-other-rule",
920        ];
921        assert!(!is_suppressed(
922            &lines,
923            1,
924            "baseline:allow-no-inline-styles",
925            "baseline:allow-next-line no-inline-styles",
926        ));
927    }
928
929    #[test]
930    fn not_suppressed_no_comment() {
931        let lines = vec![
932            "let x = style={{ color: 'red' }};",
933        ];
934        assert!(!is_suppressed(
935            &lines,
936            1,
937            "baseline:allow-no-inline-styles",
938            "baseline:allow-next-line no-inline-styles",
939        ));
940    }
941
942    #[test]
943    fn not_suppressed_next_line_wrong_rule() {
944        let lines = vec![
945            "// baseline:allow-next-line other-rule",
946            "let x = style={{ color: 'red' }};",
947        ];
948        assert!(!is_suppressed(
949            &lines,
950            2,
951            "baseline:allow-no-inline-styles",
952            "baseline:allow-next-line no-inline-styles",
953        ));
954    }
955
956    #[test]
957    fn suppressed_line_zero_is_safe() {
958        let lines = vec!["some content"];
959        // line_num 0 should not panic
960        assert!(!is_suppressed(
961            &lines,
962            0,
963            "baseline:allow-any-rule",
964            "baseline:allow-next-line any-rule",
965        ));
966    }
967
968    #[test]
969    fn suppressed_past_end_is_safe() {
970        let lines = vec!["some content"];
971        // line_num past end should not panic
972        assert!(!is_suppressed(
973            &lines,
974            5,
975            "baseline:allow-any-rule",
976            "baseline:allow-next-line any-rule",
977        ));
978    }
979
980    // ── ScanError Display tests ──
981
982    #[test]
983    fn scan_error_display_config_read() {
984        let err = ScanError::ConfigRead(std::io::Error::new(
985            std::io::ErrorKind::NotFound,
986            "not found",
987        ));
988        assert!(err.to_string().contains("failed to read config"));
989    }
990
991    #[test]
992    fn scan_error_display_config_parse() {
993        let toml_err = toml::from_str::<TomlConfig>("not valid toml [[[").unwrap_err();
994        let err = ScanError::ConfigParse(toml_err);
995        assert!(err.to_string().contains("failed to parse config"));
996    }
997
998    #[test]
999    fn scan_error_display_glob_parse() {
1000        let glob_err = Glob::new("[invalid").unwrap_err();
1001        let err = ScanError::GlobParse(glob_err);
1002        assert!(err.to_string().contains("invalid glob pattern"));
1003    }
1004
1005    #[test]
1006    fn scan_error_display_rule_factory() {
1007        let err = ScanError::RuleFactory(FactoryError::UnknownRuleType("nope".into()));
1008        assert!(err.to_string().contains("failed to build rule"));
1009    }
1010
1011    #[test]
1012    fn scan_error_display_preset() {
1013        let err = ScanError::Preset(PresetError::UnknownPreset {
1014            name: "bad".into(),
1015            available: vec!["shadcn-strict"],
1016        });
1017        assert!(err.to_string().contains("preset error"));
1018    }
1019
1020    #[test]
1021    fn scan_error_display_git_diff() {
1022        let err = ScanError::GitDiff("diff broke".into());
1023        assert_eq!(err.to_string(), "git diff failed: diff broke");
1024    }
1025
1026    // ── build_rules tests ──
1027
1028    #[test]
1029    fn build_rules_banned_pattern_rule() {
1030        let rules = vec![TomlRule {
1031            id: "no-console".into(),
1032            rule_type: "banned-pattern".into(),
1033            pattern: Some("console\\.log".into()),
1034            message: "no console.log".into(),
1035            glob: Some("**/*.ts".into()),
1036            ..Default::default()
1037        }];
1038
1039        let built = build_rules(&rules).unwrap();
1040        assert_eq!(total_rules(&built.rule_groups), 1);
1041        assert!(built.ratchet_thresholds.is_empty());
1042        assert!(built.file_presence_rules.is_empty());
1043    }
1044
1045    #[test]
1046    fn build_rules_ratchet_records_threshold() {
1047        let rules = vec![TomlRule {
1048            id: "legacy-api".into(),
1049            rule_type: "ratchet".into(),
1050            pattern: Some("legacyCall".into()),
1051            max_count: Some(10),
1052            glob: Some("**/*.ts".into()),
1053            message: "legacy".into(),
1054            ..Default::default()
1055        }];
1056
1057        let built = build_rules(&rules).unwrap();
1058        assert_eq!(total_rules(&built.rule_groups), 1);
1059        assert_eq!(built.ratchet_thresholds["legacy-api"], 10);
1060    }
1061
1062    #[test]
1063    fn build_rules_file_presence_separated() {
1064        let rules = vec![
1065            TomlRule {
1066                id: "has-readme".into(),
1067                rule_type: "file-presence".into(),
1068                required_files: vec!["README.md".into()],
1069                message: "need readme".into(),
1070                ..Default::default()
1071            },
1072            TomlRule {
1073                id: "no-console".into(),
1074                rule_type: "banned-pattern".into(),
1075                pattern: Some("console\\.log".into()),
1076                message: "no console".into(),
1077                ..Default::default()
1078            },
1079        ];
1080
1081        let built = build_rules(&rules).unwrap();
1082        assert_eq!(total_rules(&built.rule_groups), 1); // only banned-pattern
1083        assert_eq!(built.file_presence_rules.len(), 1);
1084    }
1085
1086    #[test]
1087    fn build_rules_unknown_type_errors() {
1088        let rules = vec![TomlRule {
1089            id: "bad".into(),
1090            rule_type: "nonexistent-rule-type".into(),
1091            message: "x".into(),
1092            ..Default::default()
1093        }];
1094
1095        let result = build_rules(&rules);
1096        assert!(result.is_err());
1097        let err = result.err().unwrap();
1098        assert!(matches!(err, ScanError::RuleFactory(_)));
1099    }
1100
1101    #[test]
1102    fn build_rules_with_exclude_glob() {
1103        let rules = vec![TomlRule {
1104            id: "no-console".into(),
1105            rule_type: "banned-pattern".into(),
1106            pattern: Some("console\\.log".into()),
1107            message: "no console".into(),
1108            exclude_glob: vec!["**/test/**".into()],
1109            ..Default::default()
1110        }];
1111
1112        let built = build_rules(&rules).unwrap();
1113        assert_eq!(built.rule_groups.len(), 1);
1114        assert!(built.rule_groups[0].exclusion_glob.is_some());
1115    }
1116
1117    #[test]
1118    fn build_rules_with_file_conditioning() {
1119        let rules = vec![TomlRule {
1120            id: "no-console".into(),
1121            rule_type: "banned-pattern".into(),
1122            pattern: Some("console\\.log".into()),
1123            message: "no console".into(),
1124            file_contains: Some("import React".into()),
1125            file_not_contains: Some("// @generated".into()),
1126            ..Default::default()
1127        }];
1128
1129        let built = build_rules(&rules).unwrap();
1130        assert_eq!(built.rule_groups.len(), 1);
1131        assert!(built.rule_groups[0].rules[0].file_contains.is_some());
1132        assert!(built.rule_groups[0].rules[0].file_not_contains.is_some());
1133    }
1134
1135    // ── group_matches_file tests ──
1136
1137    #[test]
1138    fn group_matches_file_no_glob_matches_all() {
1139        let rules = vec![TomlRule {
1140            id: "r".into(),
1141            rule_type: "banned-pattern".into(),
1142            pattern: Some("x".into()),
1143            message: "m".into(),
1144            ..Default::default()
1145        }];
1146        let built = build_rules(&rules).unwrap();
1147        assert!(group_matches_file(&built.rule_groups[0], "anything.rs", "anything.rs"));
1148    }
1149
1150    #[test]
1151    fn group_matches_file_inclusion_glob_filters() {
1152        let rules = vec![TomlRule {
1153            id: "r".into(),
1154            rule_type: "banned-pattern".into(),
1155            pattern: Some("x".into()),
1156            message: "m".into(),
1157            glob: Some("**/*.tsx".into()),
1158            ..Default::default()
1159        }];
1160        let built = build_rules(&rules).unwrap();
1161        assert!(group_matches_file(&built.rule_groups[0], "src/Foo.tsx", "Foo.tsx"));
1162        assert!(!group_matches_file(&built.rule_groups[0], "src/Foo.rs", "Foo.rs"));
1163    }
1164
1165    #[test]
1166    fn group_matches_file_exclusion_glob_rejects() {
1167        let rules = vec![TomlRule {
1168            id: "r".into(),
1169            rule_type: "banned-pattern".into(),
1170            pattern: Some("x".into()),
1171            message: "m".into(),
1172            exclude_glob: vec!["**/test/**".into()],
1173            ..Default::default()
1174        }];
1175        let built = build_rules(&rules).unwrap();
1176        assert!(group_matches_file(&built.rule_groups[0], "src/app.ts", "app.ts"));
1177        assert!(!group_matches_file(&built.rule_groups[0], "src/test/app.ts", "app.ts"));
1178    }
1179
1180    // ── passes_file_conditioning tests ──
1181
1182    #[test]
1183    fn passes_conditioning_no_conditions() {
1184        let rules = vec![TomlRule {
1185            id: "r".into(),
1186            rule_type: "banned-pattern".into(),
1187            pattern: Some("x".into()),
1188            message: "m".into(),
1189            ..Default::default()
1190        }];
1191        let built = build_rules(&rules).unwrap();
1192        let mut cache = HashMap::new();
1193        assert!(passes_file_conditioning_cached(&built.rule_groups[0].rules[0], "anything", &mut cache));
1194    }
1195
1196    #[test]
1197    fn passes_conditioning_file_contains_present() {
1198        let rules = vec![TomlRule {
1199            id: "r".into(),
1200            rule_type: "banned-pattern".into(),
1201            pattern: Some("x".into()),
1202            message: "m".into(),
1203            file_contains: Some("import React".into()),
1204            ..Default::default()
1205        }];
1206        let built = build_rules(&rules).unwrap();
1207        let mut cache = HashMap::new();
1208        assert!(passes_file_conditioning_cached(&built.rule_groups[0].rules[0], "import React from 'react';", &mut cache));
1209        let mut cache = HashMap::new();
1210        assert!(!passes_file_conditioning_cached(&built.rule_groups[0].rules[0], "import Vue from 'vue';", &mut cache));
1211    }
1212
1213    #[test]
1214    fn passes_conditioning_file_not_contains() {
1215        let rules = vec![TomlRule {
1216            id: "r".into(),
1217            rule_type: "banned-pattern".into(),
1218            pattern: Some("x".into()),
1219            message: "m".into(),
1220            file_not_contains: Some("// @generated".into()),
1221            ..Default::default()
1222        }];
1223        let built = build_rules(&rules).unwrap();
1224        let mut cache = HashMap::new();
1225        assert!(passes_file_conditioning_cached(&built.rule_groups[0].rules[0], "normal code", &mut cache));
1226        let mut cache = HashMap::new();
1227        assert!(!passes_file_conditioning_cached(&built.rule_groups[0].rules[0], "// @generated\nnormal code", &mut cache));
1228    }
1229
1230    #[test]
1231    fn passes_conditioning_both_conditions() {
1232        let rules = vec![TomlRule {
1233            id: "r".into(),
1234            rule_type: "banned-pattern".into(),
1235            pattern: Some("x".into()),
1236            message: "m".into(),
1237            file_contains: Some("import React".into()),
1238            file_not_contains: Some("// @generated".into()),
1239            ..Default::default()
1240        }];
1241        let built = build_rules(&rules).unwrap();
1242        // Has required, missing excluded -> pass
1243        let mut cache = HashMap::new();
1244        assert!(passes_file_conditioning_cached(&built.rule_groups[0].rules[0], "import React", &mut cache));
1245        // Missing required -> fail
1246        let mut cache = HashMap::new();
1247        assert!(!passes_file_conditioning_cached(&built.rule_groups[0].rules[0], "import Vue", &mut cache));
1248        // Has both -> fail (file_not_contains blocks it)
1249        let mut cache = HashMap::new();
1250        assert!(!passes_file_conditioning_cached(&built.rule_groups[0].rules[0], "import React // @generated", &mut cache));
1251    }
1252
1253    // ── run_rules_on_content tests ──
1254
1255    #[test]
1256    fn run_rules_on_content_finds_violations() {
1257        let rules = vec![TomlRule {
1258            id: "no-console".into(),
1259            rule_type: "banned-pattern".into(),
1260            pattern: Some("console\\.log".into()),
1261            message: "no console.log".into(),
1262            regex: true,
1263            ..Default::default()
1264        }];
1265        let built = build_rules(&rules).unwrap();
1266        let path = PathBuf::from("test.ts");
1267        let content = "console.log('hello');\nfoo();\n";
1268
1269        let violations = run_rules_on_content(&built.rule_groups, &path, content, "test.ts", "test.ts");
1270        assert_eq!(violations.len(), 1);
1271        assert_eq!(violations[0].rule_id, "no-console");
1272    }
1273
1274    #[test]
1275    fn run_rules_on_content_respects_suppression() {
1276        let rules = vec![TomlRule {
1277            id: "no-console".into(),
1278            rule_type: "banned-pattern".into(),
1279            pattern: Some("console\\.log".into()),
1280            message: "no console.log".into(),
1281            regex: true,
1282            ..Default::default()
1283        }];
1284        let built = build_rules(&rules).unwrap();
1285        let path = PathBuf::from("test.ts");
1286        let content = "console.log('hello'); // baseline:allow-no-console\n";
1287
1288        let violations = run_rules_on_content(&built.rule_groups, &path, content, "test.ts", "test.ts");
1289        assert_eq!(violations.len(), 0);
1290    }
1291
1292    #[test]
1293    fn run_rules_on_content_skips_non_matching_glob() {
1294        let rules = vec![TomlRule {
1295            id: "no-console".into(),
1296            rule_type: "banned-pattern".into(),
1297            pattern: Some("console\\.log".into()),
1298            message: "no console.log".into(),
1299            regex: true,
1300            glob: Some("**/*.tsx".into()),
1301            ..Default::default()
1302        }];
1303        let built = build_rules(&rules).unwrap();
1304        let path = PathBuf::from("test.rs");
1305        let content = "console.log('hello');\n";
1306
1307        let violations = run_rules_on_content(&built.rule_groups, &path, content, "test.rs", "test.rs");
1308        assert_eq!(violations.len(), 0);
1309    }
1310
1311    #[test]
1312    fn run_rules_on_content_skips_file_conditioning() {
1313        let rules = vec![TomlRule {
1314            id: "no-console".into(),
1315            rule_type: "banned-pattern".into(),
1316            pattern: Some("console\\.log".into()),
1317            message: "no console.log".into(),
1318            regex: true,
1319            file_contains: Some("import React".into()),
1320            ..Default::default()
1321        }];
1322        let built = build_rules(&rules).unwrap();
1323        let path = PathBuf::from("test.ts");
1324        let content = "console.log('hello');\n"; // no "import React"
1325
1326        let violations = run_rules_on_content(&built.rule_groups, &path, content, "test.ts", "test.ts");
1327        assert_eq!(violations.len(), 0);
1328    }
1329
1330    // ── build_glob_set tests ──
1331
1332    #[test]
1333    fn build_glob_set_empty() {
1334        let gs = build_glob_set(&[]).unwrap();
1335        assert!(!gs.is_match("anything"));
1336    }
1337
1338    #[test]
1339    fn build_glob_set_matches() {
1340        let gs = build_glob_set(&["**/*.ts".into(), "**/*.tsx".into()]).unwrap();
1341        assert!(gs.is_match("src/foo.ts"));
1342        assert!(gs.is_match("src/foo.tsx"));
1343        assert!(!gs.is_match("src/foo.rs"));
1344    }
1345
1346    #[test]
1347    fn build_glob_set_invalid_pattern() {
1348        let err = build_glob_set(&["[invalid".into()]).unwrap_err();
1349        assert!(matches!(err, ScanError::GlobParse(_)));
1350    }
1351
1352    // ── expand_glob / brace expansion tests ──
1353
1354    #[test]
1355    fn expand_glob_no_braces() {
1356        assert_eq!(expand_glob("**/*.ts"), vec!["**/*.ts"]);
1357    }
1358
1359    #[test]
1360    fn expand_glob_single_brace() {
1361        let mut result = expand_glob("**/*.{ts,tsx}");
1362        result.sort();
1363        assert_eq!(result, vec!["**/*.ts", "**/*.tsx"]);
1364    }
1365
1366    #[test]
1367    fn expand_glob_three_alternatives() {
1368        let mut result = expand_glob("src/**/*.{ts,tsx,js}");
1369        result.sort();
1370        // Path-based globs get **/ prefix
1371        assert_eq!(
1372            result,
1373            vec!["**/src/**/*.js", "**/src/**/*.ts", "**/src/**/*.tsx"]
1374        );
1375    }
1376
1377    #[test]
1378    fn expand_glob_no_comma_passthrough() {
1379        // Braces without commas should pass through (e.g. character classes)
1380        assert_eq!(expand_glob("**/*.[tj]s"), vec!["**/*.[tj]s"]);
1381    }
1382
1383    #[test]
1384    fn expand_glob_auto_prefix_path_glob() {
1385        // Path-based globs without **/ prefix get auto-prefixed
1386        assert_eq!(
1387            expand_glob("apps/web/src/**/*.tsx"),
1388            vec!["**/apps/web/src/**/*.tsx"]
1389        );
1390    }
1391
1392    #[test]
1393    fn expand_glob_no_double_prefix() {
1394        // Already prefixed with **/ should not get double-prefixed
1395        assert_eq!(
1396            expand_glob("**/apps/web/src/**/*.tsx"),
1397            vec!["**/apps/web/src/**/*.tsx"]
1398        );
1399    }
1400
1401    #[test]
1402    fn expand_glob_simple_extension_no_prefix() {
1403        // Simple extension globs (no /) should not get prefixed
1404        assert_eq!(expand_glob("*.ts"), vec!["*.ts"]);
1405    }
1406
1407    #[test]
1408    fn build_glob_set_brace_expansion() {
1409        let gs = build_glob_set(&["**/*.{ts,tsx}".into()]).unwrap();
1410        assert!(gs.is_match("src/foo.ts"));
1411        assert!(gs.is_match("src/foo.tsx"));
1412        assert!(!gs.is_match("src/foo.js"));
1413    }
1414
1415    #[test]
1416    fn build_glob_set_from_pattern_brace_expansion() {
1417        let gs = build_glob_set_from_pattern("**/*.{ts,tsx,js,jsx}").unwrap();
1418        assert!(gs.is_match("src/components/Button.tsx"));
1419        assert!(gs.is_match("lib/utils.js"));
1420        assert!(!gs.is_match("src/main.rs"));
1421    }
1422
1423    #[test]
1424    fn build_glob_set_from_pattern_path_glob() {
1425        let gs = build_glob_set_from_pattern("src/components/**/*.{ts,tsx}").unwrap();
1426        assert!(gs.is_match("src/components/Button.tsx"));
1427        assert!(gs.is_match("src/components/deep/nested/Card.ts"));
1428        assert!(!gs.is_match("lib/utils.tsx"));
1429    }
1430
1431    #[test]
1432    fn build_glob_set_path_glob_matches_absolute() {
1433        // The real-world case: "apps/web/src/**/*.{ts,tsx}" must match absolute paths
1434        let gs = build_glob_set_from_pattern("apps/web/src/**/*.{ts,tsx}").unwrap();
1435        assert!(gs.is_match("/Users/dev/project/apps/web/src/components/Foo.tsx"));
1436        assert!(gs.is_match("apps/web/src/index.ts"));
1437        assert!(!gs.is_match("/Users/dev/project/apps/api/src/index.ts"));
1438    }
1439
1440    // ── rule grouping tests ──
1441
1442    #[test]
1443    fn build_rules_groups_same_glob() {
1444        let rules = vec![
1445            TomlRule {
1446                id: "no-console".into(),
1447                rule_type: "banned-pattern".into(),
1448                pattern: Some("console\\.log".into()),
1449                message: "no console".into(),
1450                glob: Some("**/*.ts".into()),
1451                regex: true,
1452                ..Default::default()
1453            },
1454            TomlRule {
1455                id: "no-debugger".into(),
1456                rule_type: "banned-pattern".into(),
1457                pattern: Some("debugger".into()),
1458                message: "no debugger".into(),
1459                glob: Some("**/*.ts".into()),
1460                ..Default::default()
1461            },
1462        ];
1463
1464        let built = build_rules(&rules).unwrap();
1465        // Both rules share the same glob, so they should be in one group
1466        assert_eq!(built.rule_groups.len(), 1);
1467        assert_eq!(built.rule_groups[0].rules.len(), 2);
1468    }
1469
1470    #[test]
1471    fn build_rules_separates_different_globs() {
1472        let rules = vec![
1473            TomlRule {
1474                id: "no-console".into(),
1475                rule_type: "banned-pattern".into(),
1476                pattern: Some("console\\.log".into()),
1477                message: "no console".into(),
1478                glob: Some("**/*.ts".into()),
1479                regex: true,
1480                ..Default::default()
1481            },
1482            TomlRule {
1483                id: "no-debugger".into(),
1484                rule_type: "banned-pattern".into(),
1485                pattern: Some("debugger".into()),
1486                message: "no debugger".into(),
1487                glob: Some("**/*.tsx".into()),
1488                ..Default::default()
1489            },
1490        ];
1491
1492        let built = build_rules(&rules).unwrap();
1493        // Different globs -> separate groups
1494        assert_eq!(built.rule_groups.len(), 2);
1495        assert_eq!(built.rule_groups[0].rules.len(), 1);
1496        assert_eq!(built.rule_groups[1].rules.len(), 1);
1497    }
1498
1499    // ── run_scan integration tests ──
1500
1501    #[test]
1502    fn run_scan_with_banned_pattern() {
1503        let dir = tempfile::tempdir().unwrap();
1504
1505        // Write config
1506        let config = dir.path().join("baseline.toml");
1507        fs::write(
1508            &config,
1509            r#"
1510[baseline]
1511
1512[[rule]]
1513id = "no-console"
1514type = "banned-pattern"
1515severity = "error"
1516pattern = "console\\.log"
1517regex = true
1518message = "Do not use console.log"
1519"#,
1520        )
1521        .unwrap();
1522
1523        // Write a source file
1524        let src_dir = dir.path().join("src");
1525        fs::create_dir(&src_dir).unwrap();
1526        fs::write(src_dir.join("app.ts"), "console.log('hi');\nfoo();\n").unwrap();
1527
1528        let result = run_scan(&config, &[src_dir]).unwrap();
1529        assert_eq!(result.violations.len(), 1);
1530        assert_eq!(result.violations[0].rule_id, "no-console");
1531        assert_eq!(result.files_scanned, 1);
1532        assert_eq!(result.rules_loaded, 1);
1533    }
1534
1535    #[test]
1536    fn run_scan_no_violations() {
1537        let dir = tempfile::tempdir().unwrap();
1538
1539        let config = dir.path().join("baseline.toml");
1540        fs::write(
1541            &config,
1542            r#"
1543[baseline]
1544
1545[[rule]]
1546id = "no-console"
1547type = "banned-pattern"
1548severity = "error"
1549pattern = "console\\.log"
1550regex = true
1551message = "Do not use console.log"
1552glob = "**/*.ts"
1553"#,
1554        )
1555        .unwrap();
1556
1557        let src_dir = dir.path().join("src");
1558        fs::create_dir(&src_dir).unwrap();
1559        fs::write(src_dir.join("app.ts"), "doStuff();\n").unwrap();
1560
1561        let result = run_scan(&config, &[src_dir]).unwrap();
1562        assert!(result.violations.is_empty());
1563        assert_eq!(result.files_scanned, 1);
1564    }
1565
1566    #[test]
1567    fn run_scan_excludes_files() {
1568        let dir = tempfile::tempdir().unwrap();
1569
1570        let config = dir.path().join("baseline.toml");
1571        fs::write(
1572            &config,
1573            r#"
1574[baseline]
1575exclude = ["**/dist/**"]
1576
1577[[rule]]
1578id = "no-console"
1579type = "banned-pattern"
1580severity = "error"
1581pattern = "console\\.log"
1582regex = true
1583message = "no console"
1584"#,
1585        )
1586        .unwrap();
1587
1588        // File in dist should be excluded
1589        let dist_dir = dir.path().join("dist");
1590        fs::create_dir(&dist_dir).unwrap();
1591        fs::write(dist_dir.join("app.ts"), "console.log('hi');\n").unwrap();
1592
1593        let result = run_scan(&config, &[dir.path().to_path_buf()]).unwrap();
1594        // The dist file should be excluded
1595        for v in &result.violations {
1596            assert!(!v.file.to_string_lossy().contains("dist"));
1597        }
1598    }
1599
1600    #[test]
1601    fn run_scan_file_presence_rule() {
1602        let dir = tempfile::tempdir().unwrap();
1603
1604        let config = dir.path().join("baseline.toml");
1605        fs::write(
1606            &config,
1607            r#"
1608[baseline]
1609
1610[[rule]]
1611id = "has-readme"
1612type = "file-presence"
1613severity = "error"
1614required_files = ["README.md"]
1615message = "README.md is required"
1616"#,
1617        )
1618        .unwrap();
1619
1620        // No README.md in dir
1621        let result = run_scan(&config, &[dir.path().to_path_buf()]).unwrap();
1622        assert!(result.violations.iter().any(|v| v.rule_id == "has-readme"));
1623    }
1624
1625    #[test]
1626    fn run_scan_missing_config_errors() {
1627        let result = run_scan(
1628            Path::new("/nonexistent/baseline.toml"),
1629            &[PathBuf::from(".")],
1630        );
1631        assert!(result.is_err());
1632        assert!(matches!(result.err().unwrap(), ScanError::ConfigRead(_)));
1633    }
1634
1635    #[test]
1636    fn run_scan_invalid_config_errors() {
1637        let dir = tempfile::tempdir().unwrap();
1638        let config = dir.path().join("baseline.toml");
1639        fs::write(&config, "this is not valid toml [[[").unwrap();
1640
1641        let result = run_scan(&config, &[dir.path().to_path_buf()]);
1642        assert!(result.is_err());
1643        assert!(matches!(result.err().unwrap(), ScanError::ConfigParse(_)));
1644    }
1645
1646    #[test]
1647    fn run_scan_with_ratchet_rule() {
1648        let dir = tempfile::tempdir().unwrap();
1649
1650        let config = dir.path().join("baseline.toml");
1651        fs::write(
1652            &config,
1653            r#"
1654[baseline]
1655
1656[[rule]]
1657id = "legacy-api"
1658type = "ratchet"
1659severity = "warning"
1660pattern = "legacyCall"
1661max_count = 5
1662message = "legacy api usage"
1663"#,
1664        )
1665        .unwrap();
1666
1667        let src_dir = dir.path().join("src");
1668        fs::create_dir(&src_dir).unwrap();
1669        fs::write(src_dir.join("app.ts"), "legacyCall();\nlegacyCall();\n").unwrap();
1670
1671        let result = run_scan(&config, &[src_dir]).unwrap();
1672        // 2 matches, max 5 -> suppressed
1673        assert!(result.violations.is_empty());
1674        assert_eq!(result.ratchet_counts["legacy-api"], (2, 5));
1675    }
1676
1677    // ── run_scan_stdin tests ──
1678
1679    #[test]
1680    fn run_scan_stdin_finds_violations() {
1681        let dir = tempfile::tempdir().unwrap();
1682
1683        let config = dir.path().join("baseline.toml");
1684        fs::write(
1685            &config,
1686            r#"
1687[baseline]
1688
1689[[rule]]
1690id = "no-console"
1691type = "banned-pattern"
1692severity = "error"
1693pattern = "console\\.log"
1694regex = true
1695message = "no console.log"
1696"#,
1697        )
1698        .unwrap();
1699
1700        let result =
1701            run_scan_stdin(&config, "console.log('hello');\nfoo();\n", "test.ts").unwrap();
1702        assert_eq!(result.violations.len(), 1);
1703        assert_eq!(result.files_scanned, 1);
1704    }
1705
1706    #[test]
1707    fn run_scan_stdin_no_violations() {
1708        let dir = tempfile::tempdir().unwrap();
1709
1710        let config = dir.path().join("baseline.toml");
1711        fs::write(
1712            &config,
1713            r#"
1714[baseline]
1715
1716[[rule]]
1717id = "no-console"
1718type = "banned-pattern"
1719severity = "error"
1720pattern = "console\\.log"
1721regex = true
1722message = "no console.log"
1723glob = "**/*.ts"
1724"#,
1725        )
1726        .unwrap();
1727
1728        let result = run_scan_stdin(&config, "doStuff();\n", "app.ts").unwrap();
1729        assert!(result.violations.is_empty());
1730    }
1731
1732    #[test]
1733    fn run_scan_stdin_glob_filters_filename() {
1734        let dir = tempfile::tempdir().unwrap();
1735
1736        let config = dir.path().join("baseline.toml");
1737        fs::write(
1738            &config,
1739            r#"
1740[baseline]
1741
1742[[rule]]
1743id = "no-console"
1744type = "banned-pattern"
1745severity = "error"
1746pattern = "console\\.log"
1747regex = true
1748message = "no console.log"
1749glob = "**/*.tsx"
1750"#,
1751        )
1752        .unwrap();
1753
1754        // File doesn't match glob
1755        let result =
1756            run_scan_stdin(&config, "console.log('hello');\n", "app.rs").unwrap();
1757        assert!(result.violations.is_empty());
1758    }
1759
1760    // ── run_baseline tests ──
1761
1762    #[test]
1763    fn run_baseline_counts_ratchet_matches() {
1764        let dir = tempfile::tempdir().unwrap();
1765
1766        let config = dir.path().join("baseline.toml");
1767        fs::write(
1768            &config,
1769            r#"
1770[baseline]
1771
1772[[rule]]
1773id = "legacy-api"
1774type = "ratchet"
1775severity = "warning"
1776pattern = "legacyCall"
1777max_count = 100
1778message = "legacy usage"
1779"#,
1780        )
1781        .unwrap();
1782
1783        let src_dir = dir.path().join("src");
1784        fs::create_dir(&src_dir).unwrap();
1785        fs::write(
1786            src_dir.join("app.ts"),
1787            "legacyCall();\nlegacyCall();\nlegacyCall();\n",
1788        )
1789        .unwrap();
1790
1791        let result = run_baseline(&config, &[src_dir]).unwrap();
1792        assert_eq!(result.entries.len(), 1);
1793        assert_eq!(result.entries[0].rule_id, "legacy-api");
1794        assert_eq!(result.entries[0].count, 3);
1795        assert_eq!(result.files_scanned, 1);
1796    }
1797
1798    #[test]
1799    fn run_baseline_skips_non_ratchet_rules() {
1800        let dir = tempfile::tempdir().unwrap();
1801
1802        let config = dir.path().join("baseline.toml");
1803        fs::write(
1804            &config,
1805            r#"
1806[baseline]
1807
1808[[rule]]
1809id = "no-console"
1810type = "banned-pattern"
1811severity = "error"
1812pattern = "console\\.log"
1813regex = true
1814message = "no console"
1815
1816[[rule]]
1817id = "legacy-api"
1818type = "ratchet"
1819severity = "warning"
1820pattern = "legacyCall"
1821max_count = 100
1822message = "legacy usage"
1823"#,
1824        )
1825        .unwrap();
1826
1827        let src_dir = dir.path().join("src");
1828        fs::create_dir(&src_dir).unwrap();
1829        fs::write(src_dir.join("app.ts"), "console.log('hi');\nlegacyCall();\n").unwrap();
1830
1831        let result = run_baseline(&config, &[src_dir]).unwrap();
1832        // Only ratchet rules appear in baseline
1833        assert_eq!(result.entries.len(), 1);
1834        assert_eq!(result.entries[0].rule_id, "legacy-api");
1835    }
1836
1837    // ── collect_files tests ──
1838
1839    #[test]
1840    fn collect_files_single_file() {
1841        let dir = tempfile::tempdir().unwrap();
1842        let file = dir.path().join("test.ts");
1843        fs::write(&file, "content").unwrap();
1844
1845        let empty_glob = build_glob_set(&[]).unwrap();
1846        let files = collect_files(&[file.clone()], &empty_glob);
1847        assert_eq!(files.len(), 1);
1848        assert_eq!(files[0], file);
1849    }
1850
1851    #[test]
1852    fn collect_files_directory_walk() {
1853        let dir = tempfile::tempdir().unwrap();
1854        let sub = dir.path().join("sub");
1855        fs::create_dir(&sub).unwrap();
1856        fs::write(sub.join("a.ts"), "a").unwrap();
1857        fs::write(sub.join("b.ts"), "b").unwrap();
1858
1859        let empty_glob = build_glob_set(&[]).unwrap();
1860        let files = collect_files(&[dir.path().to_path_buf()], &empty_glob);
1861        assert_eq!(files.len(), 2);
1862    }
1863
1864    #[test]
1865    fn collect_files_excludes_patterns() {
1866        let dir = tempfile::tempdir().unwrap();
1867        fs::write(dir.path().join("keep.ts"), "keep").unwrap();
1868        fs::write(dir.path().join("skip.log"), "skip").unwrap();
1869
1870        let exclude = build_glob_set(&["*.log".into()]).unwrap();
1871        let files = collect_files(&[dir.path().to_path_buf()], &exclude);
1872        assert!(files.iter().all(|f| !f.to_string_lossy().ends_with(".log")));
1873        assert!(files.iter().any(|f| f.to_string_lossy().ends_with(".ts")));
1874    }
1875
1876    // ── run_scan with presets ──
1877
1878    #[test]
1879    fn run_scan_with_preset() {
1880        let dir = tempfile::tempdir().unwrap();
1881
1882        let config = dir.path().join("baseline.toml");
1883        fs::write(
1884            &config,
1885            r#"
1886[baseline]
1887extends = ["shadcn-strict"]
1888"#,
1889        )
1890        .unwrap();
1891
1892        let src_dir = dir.path().join("src");
1893        fs::create_dir(&src_dir).unwrap();
1894        fs::write(src_dir.join("app.tsx"), "export default function App() { return <div>hi</div>; }\n").unwrap();
1895
1896        let result = run_scan(&config, &[src_dir]).unwrap();
1897        // Just verify it doesn't error
1898        assert!(result.rules_loaded > 0);
1899    }
1900
1901    // ── run_scan with plugins ──
1902
1903    #[test]
1904    fn run_scan_with_plugin() {
1905        let dir = tempfile::tempdir().unwrap();
1906
1907        let plugin_path = dir.path().join("custom-rules.toml");
1908        fs::write(
1909            &plugin_path,
1910            r#"
1911[[rule]]
1912id = "no-todo"
1913type = "banned-pattern"
1914severity = "warning"
1915pattern = "TODO"
1916message = "No TODOs allowed"
1917"#,
1918        )
1919        .unwrap();
1920
1921        let config = dir.path().join("baseline.toml");
1922        fs::write(
1923            &config,
1924            format!(
1925                r#"
1926[baseline]
1927plugins = ["{}"]
1928"#,
1929                plugin_path.display()
1930            ),
1931        )
1932        .unwrap();
1933
1934        let src_dir = dir.path().join("src");
1935        fs::create_dir(&src_dir).unwrap();
1936        fs::write(src_dir.join("app.ts"), "// TODO: fix this\n").unwrap();
1937
1938        let result = run_scan(&config, &[src_dir]).unwrap();
1939        assert!(result.violations.iter().any(|v| v.rule_id == "no-todo"));
1940    }
1941
1942    #[test]
1943    fn run_scan_skip_no_matching_files() {
1944        let dir = tempfile::tempdir().unwrap();
1945
1946        let config = dir.path().join("baseline.toml");
1947        fs::write(
1948            &config,
1949            r#"
1950[baseline]
1951
1952[[rule]]
1953id = "no-console"
1954type = "banned-pattern"
1955severity = "error"
1956pattern = "console\\.log"
1957regex = true
1958message = "no console"
1959glob = "**/*.tsx"
1960"#,
1961        )
1962        .unwrap();
1963
1964        let src_dir = dir.path().join("src");
1965        fs::create_dir(&src_dir).unwrap();
1966        // Write a .rs file that won't match the *.tsx glob
1967        fs::write(src_dir.join("app.rs"), "console.log('hello');\n").unwrap();
1968
1969        let result = run_scan(&config, &[src_dir]).unwrap();
1970        assert!(result.violations.is_empty());
1971        // The file shouldn't even be read since no rule matches
1972        assert_eq!(result.files_scanned, 0);
1973    }
1974}