Skip to main content

code_baseline/
scan.rs

1use crate::cli::toml_config::{TomlConfig, TomlRule};
2use crate::git_diff;
3use crate::presets::{self, PresetError};
4use crate::rules::factory::{self, FactoryError};
5use crate::rules::file_presence::FilePresenceRule;
6use crate::rules::{Rule, ScanContext, Violation};
7use globset::{Glob, GlobSet, GlobSetBuilder};
8use ignore::WalkBuilder;
9use rayon::prelude::*;
10use serde::Serialize;
11use std::collections::HashMap;
12use std::fmt;
13use std::fs;
14use std::path::{Path, PathBuf};
15use std::sync::atomic::{AtomicUsize, Ordering};
16use std::sync::Mutex;
17
18/// Detect minified/bundled files by checking for extremely long lines.
19/// Normal source code rarely exceeds 500 chars per line; minified code
20/// routinely has lines of 10K+ chars.
21const MINIFIED_LINE_LENGTH_THRESHOLD: usize = 500;
22
23fn is_likely_minified(content: &str) -> bool {
24    content.lines().any(|line| line.len() > MINIFIED_LINE_LENGTH_THRESHOLD)
25}
26
27/// A plugin config file containing additional rules.
28#[derive(Debug, serde::Deserialize)]
29struct PluginConfig {
30    #[serde(default)]
31    rule: Vec<crate::cli::toml_config::TomlRule>,
32}
33
34#[derive(Debug)]
35pub enum ScanError {
36    ConfigRead(std::io::Error),
37    ConfigParse(toml::de::Error),
38    GlobParse(globset::Error),
39    RuleFactory(FactoryError),
40    Preset(PresetError),
41    GitDiff(String),
42}
43
44impl fmt::Display for ScanError {
45    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
46        match self {
47            ScanError::ConfigRead(e) => write!(f, "failed to read config: {}", e),
48            ScanError::ConfigParse(e) => write!(f, "failed to parse config: {}", e),
49            ScanError::GlobParse(e) => write!(f, "invalid glob pattern: {}", e),
50            ScanError::RuleFactory(e) => write!(f, "failed to build rule: {}", e),
51            ScanError::Preset(e) => write!(f, "preset error: {}", e),
52            ScanError::GitDiff(e) => write!(f, "git diff failed: {}", e),
53        }
54    }
55}
56
57impl std::error::Error for ScanError {}
58
59pub struct ScanResult {
60    pub violations: Vec<Violation>,
61    pub files_scanned: usize,
62    pub rules_loaded: usize,
63    /// For each ratchet rule: (found_count, max_count).
64    pub ratchet_counts: HashMap<String, (usize, usize)>,
65    /// Number of changed files when using --changed-only.
66    pub changed_files_count: Option<usize>,
67    /// Base ref used for diff when using --changed-only.
68    pub base_ref: Option<String>,
69}
70
71#[derive(Debug, Serialize, serde::Deserialize)]
72pub struct BaselineEntry {
73    pub rule_id: String,
74    pub pattern: String,
75    pub count: usize,
76}
77
78#[derive(Debug, Serialize, serde::Deserialize)]
79pub struct BaselineResult {
80    pub entries: Vec<BaselineEntry>,
81    pub files_scanned: usize,
82}
83
84/// A group of rules that share the same glob patterns.
85/// Glob matching is done once per group, amortizing the cost when
86/// multiple rules use the same inclusion/exclusion patterns.
87struct RuleGroup {
88    inclusion_glob: Option<GlobSet>,
89    exclusion_glob: Option<GlobSet>,
90    rules: Vec<RuleWithConditioning>,
91}
92
93/// A single rule with its conditioning data and pre-computed suppression strings.
94struct RuleWithConditioning {
95    rule: Box<dyn Rule>,
96    file_contains: Option<String>,
97    file_not_contains: Option<String>,
98    /// Pre-computed `"baseline:allow-{rule_id}"` string.
99    allow_marker: String,
100    /// Pre-computed `"baseline:allow-next-line {rule_id}"` string.
101    allow_next_line: String,
102}
103
104/// Result of building rules from config.
105struct BuiltRules {
106    rule_groups: Vec<RuleGroup>,
107    ratchet_thresholds: HashMap<String, usize>,
108    file_presence_rules: Vec<FilePresenceRule>,
109}
110
111/// Build rules from resolved TOML rules. Shared by run_scan and run_scan_stdin.
112fn build_rules(resolved_rules: &[TomlRule]) -> Result<BuiltRules, ScanError> {
113    let mut ratchet_thresholds: HashMap<String, usize> = HashMap::new();
114    let mut file_presence_rules: Vec<FilePresenceRule> = Vec::new();
115
116    // Intermediate representation before grouping
117    struct IntermediateRule {
118        rule: Box<dyn Rule>,
119        inclusion_pattern: Option<String>,
120        exclusion_patterns: Vec<String>,
121        file_contains: Option<String>,
122        file_not_contains: Option<String>,
123    }
124
125    let mut intermediates: Vec<IntermediateRule> = Vec::new();
126
127    for toml_rule in resolved_rules {
128        let rule_config = toml_rule.to_rule_config();
129
130        // File-presence rules are handled separately (they check existence, not content)
131        if toml_rule.rule_type == "file-presence" {
132            if let Ok(fp_rule) = FilePresenceRule::new(&rule_config) {
133                file_presence_rules.push(fp_rule);
134            }
135            continue;
136        }
137
138        let rule = factory::build_rule(&toml_rule.rule_type, &rule_config)
139            .map_err(ScanError::RuleFactory)?;
140
141        if toml_rule.rule_type == "ratchet" {
142            if let Some(max) = toml_rule.max_count {
143                ratchet_thresholds.insert(rule.id().to_string(), max);
144            }
145        }
146
147        let inclusion_pattern = rule.file_glob().map(|s| s.to_string());
148        let exclusion_patterns = toml_rule.exclude_glob.clone();
149
150        intermediates.push(IntermediateRule {
151            rule,
152            inclusion_pattern,
153            exclusion_patterns,
154            file_contains: toml_rule.file_contains.clone(),
155            file_not_contains: toml_rule.file_not_contains.clone(),
156        });
157    }
158
159    // Group rules by (inclusion_pattern, exclusion_patterns) to avoid redundant glob matching.
160    let mut groups: Vec<((Option<String>, Vec<String>), Vec<IntermediateRule>)> = Vec::new();
161
162    for ir in intermediates {
163        let key = (ir.inclusion_pattern.clone(), ir.exclusion_patterns.clone());
164        if let Some(group) = groups.iter_mut().find(|(k, _)| *k == key) {
165            group.1.push(ir);
166        } else {
167            groups.push((key, vec![ir]));
168        }
169    }
170
171    // Build RuleGroups with compiled GlobSets (once per unique pattern)
172    let mut rule_groups: Vec<RuleGroup> = Vec::new();
173    for ((inc_pattern, exc_patterns), intermediates) in groups {
174        let inclusion_glob = if let Some(ref pattern) = inc_pattern {
175            Some(build_glob_set_from_pattern(pattern)?)
176        } else {
177            None
178        };
179
180        let exclusion_glob = if !exc_patterns.is_empty() {
181            Some(build_glob_set(&exc_patterns)?)
182        } else {
183            None
184        };
185
186        let rules = intermediates
187            .into_iter()
188            .map(|ir| {
189                let id = ir.rule.id().to_string();
190                RuleWithConditioning {
191                    rule: ir.rule,
192                    file_contains: ir.file_contains,
193                    file_not_contains: ir.file_not_contains,
194                    allow_marker: format!("baseline:allow-{}", id),
195                    allow_next_line: format!("baseline:allow-next-line {}", id),
196                }
197            })
198            .collect();
199
200        rule_groups.push(RuleGroup {
201            inclusion_glob,
202            exclusion_glob,
203            rules,
204        });
205    }
206
207    Ok(BuiltRules {
208        rule_groups,
209        ratchet_thresholds,
210        file_presence_rules,
211    })
212}
213
214/// Check if a rule group matches a file path (inclusion + exclusion globs).
215fn group_matches_file(group: &RuleGroup, file_str: &str, file_name: &str) -> bool {
216    let included = match &group.inclusion_glob {
217        Some(gs) => gs.is_match(file_str) || gs.is_match(file_name),
218        None => true,
219    };
220    if !included {
221        return false;
222    }
223    if let Some(ref exc) = group.exclusion_glob {
224        if exc.is_match(file_str) || exc.is_match(file_name) {
225            return false;
226        }
227    }
228    true
229}
230
231/// Check file-context conditioning (file_contains / file_not_contains) with caching.
232fn passes_file_conditioning_cached<'a>(
233    rule: &'a RuleWithConditioning,
234    content: &str,
235    cache: &mut HashMap<&'a str, bool>,
236) -> bool {
237    if let Some(ref needle) = rule.file_contains {
238        let &mut result = cache
239            .entry(needle.as_str())
240            .or_insert_with(|| content.contains(needle.as_str()));
241        if !result {
242            return false;
243        }
244    }
245    if let Some(ref needle) = rule.file_not_contains {
246        let &mut result = cache
247            .entry(needle.as_str())
248            .or_insert_with(|| content.contains(needle.as_str()));
249        if result {
250            return false;
251        }
252    }
253    true
254}
255
256/// Run rules against content and collect violations, filtering escape-hatch comments.
257fn run_rules_on_content(
258    rule_groups: &[RuleGroup],
259    file_path: &Path,
260    content: &str,
261    file_str: &str,
262    file_name: &str,
263) -> Vec<Violation> {
264    let mut violations = Vec::new();
265    let content_lines: Vec<&str> = content.lines().collect();
266    let ctx = ScanContext {
267        file_path,
268        content,
269    };
270    let mut conditioning_cache: HashMap<&str, bool> = HashMap::new();
271
272    for group in rule_groups {
273        if !group_matches_file(group, file_str, file_name) {
274            continue;
275        }
276
277        for rule_cond in &group.rules {
278            if !passes_file_conditioning_cached(rule_cond, content, &mut conditioning_cache) {
279                continue;
280            }
281
282            let file_violations = rule_cond.rule.check_file(&ctx);
283            for v in file_violations {
284                if let Some(line_num) = v.line {
285                    if is_suppressed(
286                        &content_lines,
287                        line_num,
288                        &rule_cond.allow_marker,
289                        &rule_cond.allow_next_line,
290                    ) {
291                        continue;
292                    }
293                }
294                violations.push(v);
295            }
296        }
297    }
298
299    violations
300}
301
302/// Run a full scan: parse config, build rules, walk files, collect violations.
303pub fn run_scan(config_path: &Path, target_paths: &[PathBuf]) -> Result<ScanResult, ScanError> {
304    // 1. Read and parse TOML config
305    let config_text = fs::read_to_string(config_path).map_err(ScanError::ConfigRead)?;
306    let toml_config: TomlConfig = toml::from_str(&config_text).map_err(ScanError::ConfigParse)?;
307
308    // 2. Load plugin rules from external TOML files
309    let mut plugin_rules: Vec<crate::cli::toml_config::TomlRule> = Vec::new();
310    for plugin_path in &toml_config.baseline.plugins {
311        let plugin_text = fs::read_to_string(plugin_path).map_err(ScanError::ConfigRead)?;
312        let plugin_config: PluginConfig =
313            toml::from_str(&plugin_text).map_err(ScanError::ConfigParse)?;
314        plugin_rules.extend(plugin_config.rule);
315    }
316
317    // 3. Resolve presets and merge with user-defined rules + plugin rules
318    let mut all_user_rules = toml_config.rule.clone();
319    all_user_rules.extend(plugin_rules);
320
321    let mut resolved_rules = presets::resolve_rules(
322        &toml_config.baseline.extends,
323        &all_user_rules,
324    )
325    .map_err(ScanError::Preset)?;
326
327    // 3b. Resolve scoped presets and append
328    let scoped_rules = presets::resolve_scoped_rules(
329        &toml_config.baseline.scoped,
330        &all_user_rules,
331    )
332    .map_err(ScanError::Preset)?;
333    resolved_rules.extend(scoped_rules);
334
335    // 4. Build exclude glob set
336    let exclude_set = build_glob_set(&toml_config.baseline.exclude)?;
337
338    // 5. Build rules via factory
339    let built = build_rules(&resolved_rules)?;
340    let rules_loaded: usize = built.rule_groups.iter().map(|g| g.rules.len()).sum();
341
342    // 6. Walk target paths and collect files
343    let files = collect_files(target_paths, &exclude_set);
344
345    // 7. Run rules on each file (parallel)
346    let files_scanned = AtomicUsize::new(0);
347
348    let mut violations: Vec<Violation> = files
349        .par_iter()
350        .filter_map(|file_path| {
351            let file_str = file_path.to_string_lossy();
352            let file_name = file_path.file_name().unwrap_or_default().to_string_lossy();
353
354            // Pre-check: does ANY rule group match this file? If not, skip the read entirely.
355            let any_match = built
356                .rule_groups
357                .iter()
358                .any(|g| group_matches_file(g, &file_str, &file_name));
359            if !any_match {
360                return None;
361            }
362
363            let content = fs::read_to_string(file_path).ok()?;
364            if is_likely_minified(&content) {
365                return None;
366            }
367
368            files_scanned.fetch_add(1, Ordering::Relaxed);
369            let file_violations = run_rules_on_content(
370                &built.rule_groups,
371                file_path,
372                &content,
373                &file_str,
374                &file_name,
375            );
376            if file_violations.is_empty() {
377                None
378            } else {
379                Some(file_violations)
380            }
381        })
382        .flatten()
383        .collect();
384
385    // 8. Run file-presence checks
386    for fp_rule in &built.file_presence_rules {
387        let mut fp_violations = fp_rule.check_paths(target_paths);
388        violations.append(&mut fp_violations);
389    }
390
391    // 9. Apply ratchet thresholds
392    let ratchet_counts = apply_ratchet_thresholds(&mut violations, &built.ratchet_thresholds);
393
394    Ok(ScanResult {
395        violations,
396        files_scanned: files_scanned.load(Ordering::Relaxed),
397        rules_loaded,
398        ratchet_counts,
399        changed_files_count: None,
400        base_ref: None,
401    })
402}
403
404/// Suppress ratchet violations that are within budget. Returns counts for display.
405fn apply_ratchet_thresholds(
406    violations: &mut Vec<Violation>,
407    thresholds: &HashMap<String, usize>,
408) -> HashMap<String, (usize, usize)> {
409    if thresholds.is_empty() {
410        return HashMap::new();
411    }
412
413    // Count violations per ratchet rule
414    let mut counts: HashMap<String, usize> = HashMap::new();
415    for v in violations.iter() {
416        if thresholds.contains_key(&v.rule_id) {
417            *counts.entry(v.rule_id.clone()).or_insert(0) += 1;
418        }
419    }
420
421    // Build result map and determine which rules to suppress
422    let mut result: HashMap<String, (usize, usize)> = HashMap::new();
423    let mut suppress: std::collections::HashSet<String> = std::collections::HashSet::new();
424
425    for (rule_id, &max) in thresholds {
426        let found = counts.get(rule_id).copied().unwrap_or(0);
427        result.insert(rule_id.clone(), (found, max));
428        if found <= max {
429            suppress.insert(rule_id.clone());
430        }
431    }
432
433    // Remove suppressed violations
434    if !suppress.is_empty() {
435        violations.retain(|v| !suppress.contains(&v.rule_id));
436    }
437
438    result
439}
440
441/// Run a scan on stdin content with a virtual filename.
442pub fn run_scan_stdin(
443    config_path: &Path,
444    content: &str,
445    filename: &str,
446) -> Result<ScanResult, ScanError> {
447    let config_text = fs::read_to_string(config_path).map_err(ScanError::ConfigRead)?;
448    let toml_config: TomlConfig = toml::from_str(&config_text).map_err(ScanError::ConfigParse)?;
449
450    let mut resolved_rules = presets::resolve_rules(
451        &toml_config.baseline.extends,
452        &toml_config.rule,
453    )
454    .map_err(ScanError::Preset)?;
455
456    let scoped_rules = presets::resolve_scoped_rules(
457        &toml_config.baseline.scoped,
458        &toml_config.rule,
459    )
460    .map_err(ScanError::Preset)?;
461    resolved_rules.extend(scoped_rules);
462
463    let built = build_rules(&resolved_rules)?;
464    let rules_loaded: usize = built.rule_groups.iter().map(|g| g.rules.len()).sum();
465
466    if is_likely_minified(content) {
467        return Ok(ScanResult {
468            violations: vec![],
469            files_scanned: 0,
470            rules_loaded,
471            ratchet_counts: HashMap::new(),
472            changed_files_count: None,
473            base_ref: None,
474        });
475    }
476
477    let file_path = PathBuf::from(filename);
478    let file_str = file_path.to_string_lossy();
479    let file_name = file_path.file_name().unwrap_or_default().to_string_lossy();
480
481    let violations =
482        run_rules_on_content(&built.rule_groups, &file_path, content, &file_str, &file_name);
483
484    let mut violations = violations;
485    let ratchet_counts = apply_ratchet_thresholds(&mut violations, &built.ratchet_thresholds);
486
487    Ok(ScanResult {
488        violations,
489        files_scanned: 1,
490        rules_loaded,
491        ratchet_counts,
492        changed_files_count: None,
493        base_ref: None,
494    })
495}
496
497/// Run a scan filtered to only files/lines changed relative to a base branch.
498pub fn run_scan_changed(
499    config_path: &Path,
500    target_paths: &[PathBuf],
501    base_ref: &str,
502) -> Result<ScanResult, ScanError> {
503    // Get diff info from git
504    let diff = git_diff::diff_info(base_ref).map_err(|e| ScanError::GitDiff(e.to_string()))?;
505    let repo_root = git_diff::repo_root().map_err(|e| ScanError::GitDiff(e.to_string()))?;
506
507    let changed_files_count = diff.changed_lines.len();
508
509    // Run normal scan
510    let mut result = run_scan(config_path, target_paths)?;
511
512    // Post-filter violations to only those in changed files/lines
513    result.violations.retain(|v| {
514        // Compute relative path from repo root for matching against diff
515        let rel_path = if v.file.is_absolute() {
516            v.file.strip_prefix(&repo_root).unwrap_or(&v.file).to_path_buf()
517        } else {
518            v.file.clone()
519        };
520
521        if !diff.has_file(&rel_path) {
522            return false;
523        }
524
525        // File-level violations (no line number) pass if file is changed
526        match v.line {
527            Some(line) => diff.has_line(&rel_path, line),
528            None => true,
529        }
530    });
531
532    result.changed_files_count = Some(changed_files_count);
533    result.base_ref = Some(base_ref.to_string());
534
535    Ok(result)
536}
537
538/// Run baseline counting: parse config, build only ratchet rules, count matches.
539pub fn run_baseline(
540    config_path: &Path,
541    target_paths: &[PathBuf],
542) -> Result<BaselineResult, ScanError> {
543    let config_text = fs::read_to_string(config_path).map_err(ScanError::ConfigRead)?;
544    let toml_config: TomlConfig = toml::from_str(&config_text).map_err(ScanError::ConfigParse)?;
545
546    // Resolve presets and merge with user-defined rules
547    let mut resolved_rules = presets::resolve_rules(
548        &toml_config.baseline.extends,
549        &toml_config.rule,
550    )
551    .map_err(ScanError::Preset)?;
552
553    let scoped_rules = presets::resolve_scoped_rules(
554        &toml_config.baseline.scoped,
555        &toml_config.rule,
556    )
557    .map_err(ScanError::Preset)?;
558    resolved_rules.extend(scoped_rules);
559
560    let exclude_set = build_glob_set(&toml_config.baseline.exclude)?;
561
562    // Build only ratchet rules
563    let mut rules: Vec<(Box<dyn Rule>, Option<GlobSet>, String)> = Vec::new();
564    for toml_rule in &resolved_rules {
565        if toml_rule.rule_type != "ratchet" {
566            continue;
567        }
568        let rule_config = toml_rule.to_rule_config();
569        let rule = factory::build_rule(&toml_rule.rule_type, &rule_config)
570            .map_err(ScanError::RuleFactory)?;
571
572        let pattern = toml_rule.pattern.clone().unwrap_or_default();
573
574        let rule_glob = if let Some(ref pat) = rule.file_glob() {
575            Some(build_glob_set_from_pattern(pat)?)
576        } else {
577            None
578        };
579
580        rules.push((rule, rule_glob, pattern));
581    }
582
583    let files = collect_files(target_paths, &exclude_set);
584
585    let files_scanned = AtomicUsize::new(0);
586
587    let counts: HashMap<String, usize> = files
588        .par_iter()
589        .filter_map(|file_path| {
590            let content = fs::read_to_string(file_path).ok()?;
591            if is_likely_minified(&content) {
592                return None;
593            }
594
595            files_scanned.fetch_add(1, Ordering::Relaxed);
596            let ctx = ScanContext {
597                file_path,
598                content: &content,
599            };
600
601            let mut local_counts: HashMap<String, usize> = HashMap::new();
602            for (rule, rule_glob, _) in &rules {
603                if let Some(ref gs) = rule_glob {
604                    let file_str = file_path.to_string_lossy();
605                    let file_name = file_path.file_name().unwrap_or_default().to_string_lossy();
606                    if !gs.is_match(&*file_str) && !gs.is_match(&*file_name) {
607                        continue;
608                    }
609                }
610
611                let violations = rule.check_file(&ctx);
612                if !violations.is_empty() {
613                    *local_counts.entry(rule.id().to_string()).or_insert(0) += violations.len();
614                }
615            }
616
617            if local_counts.is_empty() {
618                None
619            } else {
620                Some(local_counts)
621            }
622        })
623        .reduce(
624            || HashMap::new(),
625            |mut acc, local| {
626                for (k, v) in local {
627                    *acc.entry(k).or_insert(0) += v;
628                }
629                acc
630            },
631        );
632
633    let entries: Vec<BaselineEntry> = rules
634        .iter()
635        .map(|(rule, _, pattern)| BaselineEntry {
636            rule_id: rule.id().to_string(),
637            pattern: pattern.clone(),
638            count: counts.get(rule.id()).copied().unwrap_or(0),
639        })
640        .collect();
641
642    Ok(BaselineResult {
643        entries,
644        files_scanned: files_scanned.load(Ordering::Relaxed),
645    })
646}
647
648/// Check if a violation is suppressed by an escape-hatch comment.
649/// Uses pre-computed marker strings to avoid per-call allocations.
650fn is_suppressed(lines: &[&str], line_num: usize, allow_marker: &str, allow_next_line: &str) -> bool {
651    let allow_all = "baseline:allow-all";
652
653    // Check current line (1-indexed)
654    if line_num > 0 && line_num <= lines.len() {
655        let line = lines[line_num - 1];
656        if line.contains(allow_marker) || line.contains(allow_all) {
657            return true;
658        }
659    }
660
661    // Check previous line (next-line style: `// baseline:allow-next-line`)
662    if line_num >= 2 && line_num <= lines.len() {
663        let prev = lines[line_num - 2];
664        if prev.contains(allow_next_line)
665            || prev.contains("baseline:allow-next-line all")
666        {
667            return true;
668        }
669    }
670
671    false
672}
673
674pub(crate) fn collect_files(target_paths: &[PathBuf], exclude_set: &GlobSet) -> Vec<PathBuf> {
675    let mut files: Vec<PathBuf> = Vec::new();
676    for target in target_paths {
677        if target.is_file() {
678            files.push(target.clone());
679        } else {
680            // Use the `ignore` crate's parallel walker for multi-threaded directory traversal.
681            let walker = WalkBuilder::new(target)
682                .hidden(true) // skip hidden files/dirs like .git
683                .git_ignore(true) // respect .gitignore
684                .git_global(true) // respect global gitignore
685                .git_exclude(true) // respect .git/info/exclude
686                .build_parallel();
687
688            let collected: Mutex<Vec<PathBuf>> = Mutex::new(Vec::new());
689
690            walker.run(|| {
691                Box::new(|entry| {
692                    if let Ok(entry) = entry {
693                        if entry.file_type().map_or(false, |ft| ft.is_file()) {
694                            let path = entry.into_path();
695                            let rel = path.strip_prefix(target).unwrap_or(&path);
696                            if !exclude_set.is_match(rel.to_string_lossy().as_ref()) {
697                                collected.lock().unwrap().push(path);
698                            }
699                        }
700                    }
701                    ignore::WalkState::Continue
702                })
703            });
704
705            files.extend(collected.into_inner().unwrap());
706        }
707    }
708    files
709}
710
711/// Normalize a glob pattern:
712/// 1. Expand brace syntax (`{a,b}`) into multiple patterns (globset doesn't support it).
713/// 2. Auto-prefix path-based globs with `**/` so they match against absolute paths.
714///    e.g. `apps/web/src/**/*.tsx` → `**/apps/web/src/**/*.tsx`
715fn expand_glob(pattern: &str) -> Vec<String> {
716    // First, expand brace syntax
717    if let Some(open) = pattern.find('{') {
718        if let Some(close) = pattern[open..].find('}') {
719            let close = open + close;
720            let prefix = &pattern[..open];
721            let suffix = &pattern[close + 1..];
722            let alternatives = &pattern[open + 1..close];
723
724            // Only expand if there are commas (otherwise it's a glob char class)
725            if alternatives.contains(',') {
726                let mut result = Vec::new();
727                for alt in alternatives.split(',') {
728                    let expanded = format!("{prefix}{alt}{suffix}");
729                    // Recursively expand in case there are nested braces
730                    result.extend(expand_glob(&expanded));
731                }
732                return result;
733            }
734        }
735    }
736
737    // Auto-prefix path-based globs that don't already start with ** or /
738    // e.g. "apps/web/src/**/*.tsx" → "**/apps/web/src/**/*.tsx"
739    // This ensures they match against absolute file paths.
740    let normalized = if pattern.contains('/')
741        && !pattern.starts_with("**/")
742        && !pattern.starts_with('/')
743    {
744        format!("**/{pattern}")
745    } else {
746        pattern.to_string()
747    };
748
749    vec![normalized]
750}
751
752/// Build a GlobSet from a single pattern string, expanding brace syntax.
753pub(crate) fn build_glob_set_from_pattern(pattern: &str) -> Result<GlobSet, ScanError> {
754    let expanded = expand_glob(pattern);
755    let mut builder = GlobSetBuilder::new();
756    for pat in &expanded {
757        builder.add(Glob::new(pat).map_err(ScanError::GlobParse)?);
758    }
759    builder.build().map_err(ScanError::GlobParse)
760}
761
762pub(crate) fn build_glob_set(patterns: &[String]) -> Result<GlobSet, ScanError> {
763    let mut builder = GlobSetBuilder::new();
764    for pattern in patterns {
765        for pat in &expand_glob(pattern) {
766            builder.add(Glob::new(pat).map_err(ScanError::GlobParse)?);
767        }
768    }
769    builder.build().map_err(ScanError::GlobParse)
770}
771
772#[cfg(test)]
773mod tests {
774    use super::*;
775    use crate::config::Severity;
776
777    fn make_violation(rule_id: &str) -> Violation {
778        Violation {
779            rule_id: rule_id.to_string(),
780            severity: Severity::Error,
781            file: PathBuf::from("test.ts"),
782            line: Some(1),
783            column: Some(1),
784            message: "test".to_string(),
785            suggest: None,
786            source_line: None,
787            fix: None,
788        }
789    }
790
791    /// Count total rules across all groups.
792    fn total_rules(groups: &[RuleGroup]) -> usize {
793        groups.iter().map(|g| g.rules.len()).sum()
794    }
795
796    #[test]
797    fn ratchet_under_budget_suppresses() {
798        let mut violations = vec![
799            make_violation("ratchet-legacy"),
800            make_violation("ratchet-legacy"),
801            make_violation("other-rule"),
802        ];
803        let mut thresholds = HashMap::new();
804        thresholds.insert("ratchet-legacy".to_string(), 5);
805
806        let counts = apply_ratchet_thresholds(&mut violations, &thresholds);
807
808        assert_eq!(violations.len(), 1); // only "other-rule" remains
809        assert_eq!(violations[0].rule_id, "other-rule");
810        assert_eq!(counts["ratchet-legacy"], (2, 5));
811    }
812
813    #[test]
814    fn ratchet_over_budget_keeps_all() {
815        let mut violations = vec![
816            make_violation("ratchet-legacy"),
817            make_violation("ratchet-legacy"),
818            make_violation("ratchet-legacy"),
819            make_violation("other-rule"),
820        ];
821        let mut thresholds = HashMap::new();
822        thresholds.insert("ratchet-legacy".to_string(), 2);
823
824        let counts = apply_ratchet_thresholds(&mut violations, &thresholds);
825
826        assert_eq!(violations.len(), 4); // all kept
827        assert_eq!(counts["ratchet-legacy"], (3, 2));
828    }
829
830    #[test]
831    fn ratchet_exactly_at_budget_suppresses() {
832        let mut violations = vec![
833            make_violation("ratchet-legacy"),
834            make_violation("ratchet-legacy"),
835        ];
836        let mut thresholds = HashMap::new();
837        thresholds.insert("ratchet-legacy".to_string(), 2);
838
839        let counts = apply_ratchet_thresholds(&mut violations, &thresholds);
840
841        assert_eq!(violations.len(), 0); // suppressed (at budget)
842        assert_eq!(counts["ratchet-legacy"], (2, 2));
843    }
844
845    #[test]
846    fn no_ratchet_rules_is_noop() {
847        let mut violations = vec![make_violation("other-rule")];
848        let thresholds = HashMap::new();
849
850        let counts = apply_ratchet_thresholds(&mut violations, &thresholds);
851
852        assert_eq!(violations.len(), 1);
853        assert!(counts.is_empty());
854    }
855
856    #[test]
857    fn ratchet_zero_with_matches_keeps_all() {
858        let mut violations = vec![make_violation("ratchet-zero")];
859        let mut thresholds = HashMap::new();
860        thresholds.insert("ratchet-zero".to_string(), 0);
861
862        let counts = apply_ratchet_thresholds(&mut violations, &thresholds);
863
864        assert_eq!(violations.len(), 1);
865        assert_eq!(counts["ratchet-zero"], (1, 0));
866    }
867
868    #[test]
869    fn ratchet_zero_no_matches_suppresses() {
870        let mut violations: Vec<Violation> = vec![];
871        let mut thresholds = HashMap::new();
872        thresholds.insert("ratchet-zero".to_string(), 0);
873
874        let counts = apply_ratchet_thresholds(&mut violations, &thresholds);
875
876        assert!(violations.is_empty());
877        assert_eq!(counts["ratchet-zero"], (0, 0));
878    }
879
880    // ── is_suppressed tests ──
881
882    #[test]
883    fn suppressed_by_same_line_allow() {
884        let lines = vec![
885            "let x = style={{ color: 'red' }}; // baseline:allow-no-inline-styles",
886        ];
887        assert!(is_suppressed(
888            &lines,
889            1,
890            "baseline:allow-no-inline-styles",
891            "baseline:allow-next-line no-inline-styles",
892        ));
893    }
894
895    #[test]
896    fn suppressed_by_allow_all() {
897        let lines = vec![
898            "let x = style={{ color: 'red' }}; // baseline:allow-all",
899        ];
900        assert!(is_suppressed(
901            &lines,
902            1,
903            "baseline:allow-no-inline-styles",
904            "baseline:allow-next-line no-inline-styles",
905        ));
906        assert!(is_suppressed(
907            &lines,
908            1,
909            "baseline:allow-any-other-rule",
910            "baseline:allow-next-line any-other-rule",
911        ));
912    }
913
914    #[test]
915    fn suppressed_by_allow_next_line() {
916        let lines = vec![
917            "// baseline:allow-next-line no-inline-styles",
918            "let x = style={{ color: 'red' }};",
919        ];
920        assert!(is_suppressed(
921            &lines,
922            2,
923            "baseline:allow-no-inline-styles",
924            "baseline:allow-next-line no-inline-styles",
925        ));
926    }
927
928    #[test]
929    fn suppressed_by_allow_next_line_all() {
930        let lines = vec![
931            "// baseline:allow-next-line all",
932            "let x = style={{ color: 'red' }};",
933        ];
934        assert!(is_suppressed(
935            &lines,
936            2,
937            "baseline:allow-no-inline-styles",
938            "baseline:allow-next-line no-inline-styles",
939        ));
940    }
941
942    #[test]
943    fn not_suppressed_wrong_rule_id() {
944        let lines = vec![
945            "let x = style={{ color: 'red' }}; // baseline:allow-other-rule",
946        ];
947        assert!(!is_suppressed(
948            &lines,
949            1,
950            "baseline:allow-no-inline-styles",
951            "baseline:allow-next-line no-inline-styles",
952        ));
953    }
954
955    #[test]
956    fn not_suppressed_no_comment() {
957        let lines = vec![
958            "let x = style={{ color: 'red' }};",
959        ];
960        assert!(!is_suppressed(
961            &lines,
962            1,
963            "baseline:allow-no-inline-styles",
964            "baseline:allow-next-line no-inline-styles",
965        ));
966    }
967
968    #[test]
969    fn not_suppressed_next_line_wrong_rule() {
970        let lines = vec![
971            "// baseline:allow-next-line other-rule",
972            "let x = style={{ color: 'red' }};",
973        ];
974        assert!(!is_suppressed(
975            &lines,
976            2,
977            "baseline:allow-no-inline-styles",
978            "baseline:allow-next-line no-inline-styles",
979        ));
980    }
981
982    #[test]
983    fn suppressed_line_zero_is_safe() {
984        let lines = vec!["some content"];
985        // line_num 0 should not panic
986        assert!(!is_suppressed(
987            &lines,
988            0,
989            "baseline:allow-any-rule",
990            "baseline:allow-next-line any-rule",
991        ));
992    }
993
994    #[test]
995    fn suppressed_past_end_is_safe() {
996        let lines = vec!["some content"];
997        // line_num past end should not panic
998        assert!(!is_suppressed(
999            &lines,
1000            5,
1001            "baseline:allow-any-rule",
1002            "baseline:allow-next-line any-rule",
1003        ));
1004    }
1005
1006    // ── ScanError Display tests ──
1007
1008    #[test]
1009    fn scan_error_display_config_read() {
1010        let err = ScanError::ConfigRead(std::io::Error::new(
1011            std::io::ErrorKind::NotFound,
1012            "not found",
1013        ));
1014        assert!(err.to_string().contains("failed to read config"));
1015    }
1016
1017    #[test]
1018    fn scan_error_display_config_parse() {
1019        let toml_err = toml::from_str::<TomlConfig>("not valid toml [[[").unwrap_err();
1020        let err = ScanError::ConfigParse(toml_err);
1021        assert!(err.to_string().contains("failed to parse config"));
1022    }
1023
1024    #[test]
1025    fn scan_error_display_glob_parse() {
1026        let glob_err = Glob::new("[invalid").unwrap_err();
1027        let err = ScanError::GlobParse(glob_err);
1028        assert!(err.to_string().contains("invalid glob pattern"));
1029    }
1030
1031    #[test]
1032    fn scan_error_display_rule_factory() {
1033        let err = ScanError::RuleFactory(FactoryError::UnknownRuleType("nope".into()));
1034        assert!(err.to_string().contains("failed to build rule"));
1035    }
1036
1037    #[test]
1038    fn scan_error_display_preset() {
1039        let err = ScanError::Preset(PresetError::UnknownPreset {
1040            name: "bad".into(),
1041            available: vec!["shadcn-strict"],
1042        });
1043        assert!(err.to_string().contains("preset error"));
1044    }
1045
1046    #[test]
1047    fn scan_error_display_git_diff() {
1048        let err = ScanError::GitDiff("diff broke".into());
1049        assert_eq!(err.to_string(), "git diff failed: diff broke");
1050    }
1051
1052    // ── build_rules tests ──
1053
1054    #[test]
1055    fn build_rules_banned_pattern_rule() {
1056        let rules = vec![TomlRule {
1057            id: "no-console".into(),
1058            rule_type: "banned-pattern".into(),
1059            pattern: Some("console\\.log".into()),
1060            message: "no console.log".into(),
1061            glob: Some("**/*.ts".into()),
1062            ..Default::default()
1063        }];
1064
1065        let built = build_rules(&rules).unwrap();
1066        assert_eq!(total_rules(&built.rule_groups), 1);
1067        assert!(built.ratchet_thresholds.is_empty());
1068        assert!(built.file_presence_rules.is_empty());
1069    }
1070
1071    #[test]
1072    fn build_rules_ratchet_records_threshold() {
1073        let rules = vec![TomlRule {
1074            id: "legacy-api".into(),
1075            rule_type: "ratchet".into(),
1076            pattern: Some("legacyCall".into()),
1077            max_count: Some(10),
1078            glob: Some("**/*.ts".into()),
1079            message: "legacy".into(),
1080            ..Default::default()
1081        }];
1082
1083        let built = build_rules(&rules).unwrap();
1084        assert_eq!(total_rules(&built.rule_groups), 1);
1085        assert_eq!(built.ratchet_thresholds["legacy-api"], 10);
1086    }
1087
1088    #[test]
1089    fn build_rules_file_presence_separated() {
1090        let rules = vec![
1091            TomlRule {
1092                id: "has-readme".into(),
1093                rule_type: "file-presence".into(),
1094                required_files: vec!["README.md".into()],
1095                message: "need readme".into(),
1096                ..Default::default()
1097            },
1098            TomlRule {
1099                id: "no-console".into(),
1100                rule_type: "banned-pattern".into(),
1101                pattern: Some("console\\.log".into()),
1102                message: "no console".into(),
1103                ..Default::default()
1104            },
1105        ];
1106
1107        let built = build_rules(&rules).unwrap();
1108        assert_eq!(total_rules(&built.rule_groups), 1); // only banned-pattern
1109        assert_eq!(built.file_presence_rules.len(), 1);
1110    }
1111
1112    #[test]
1113    fn build_rules_unknown_type_errors() {
1114        let rules = vec![TomlRule {
1115            id: "bad".into(),
1116            rule_type: "nonexistent-rule-type".into(),
1117            message: "x".into(),
1118            ..Default::default()
1119        }];
1120
1121        let result = build_rules(&rules);
1122        assert!(result.is_err());
1123        let err = result.err().unwrap();
1124        assert!(matches!(err, ScanError::RuleFactory(_)));
1125    }
1126
1127    #[test]
1128    fn build_rules_with_exclude_glob() {
1129        let rules = vec![TomlRule {
1130            id: "no-console".into(),
1131            rule_type: "banned-pattern".into(),
1132            pattern: Some("console\\.log".into()),
1133            message: "no console".into(),
1134            exclude_glob: vec!["**/test/**".into()],
1135            ..Default::default()
1136        }];
1137
1138        let built = build_rules(&rules).unwrap();
1139        assert_eq!(built.rule_groups.len(), 1);
1140        assert!(built.rule_groups[0].exclusion_glob.is_some());
1141    }
1142
1143    #[test]
1144    fn build_rules_with_file_conditioning() {
1145        let rules = vec![TomlRule {
1146            id: "no-console".into(),
1147            rule_type: "banned-pattern".into(),
1148            pattern: Some("console\\.log".into()),
1149            message: "no console".into(),
1150            file_contains: Some("import React".into()),
1151            file_not_contains: Some("// @generated".into()),
1152            ..Default::default()
1153        }];
1154
1155        let built = build_rules(&rules).unwrap();
1156        assert_eq!(built.rule_groups.len(), 1);
1157        assert!(built.rule_groups[0].rules[0].file_contains.is_some());
1158        assert!(built.rule_groups[0].rules[0].file_not_contains.is_some());
1159    }
1160
1161    // ── group_matches_file tests ──
1162
1163    #[test]
1164    fn group_matches_file_no_glob_matches_all() {
1165        let rules = vec![TomlRule {
1166            id: "r".into(),
1167            rule_type: "banned-pattern".into(),
1168            pattern: Some("x".into()),
1169            message: "m".into(),
1170            ..Default::default()
1171        }];
1172        let built = build_rules(&rules).unwrap();
1173        assert!(group_matches_file(&built.rule_groups[0], "anything.rs", "anything.rs"));
1174    }
1175
1176    #[test]
1177    fn group_matches_file_inclusion_glob_filters() {
1178        let rules = vec![TomlRule {
1179            id: "r".into(),
1180            rule_type: "banned-pattern".into(),
1181            pattern: Some("x".into()),
1182            message: "m".into(),
1183            glob: Some("**/*.tsx".into()),
1184            ..Default::default()
1185        }];
1186        let built = build_rules(&rules).unwrap();
1187        assert!(group_matches_file(&built.rule_groups[0], "src/Foo.tsx", "Foo.tsx"));
1188        assert!(!group_matches_file(&built.rule_groups[0], "src/Foo.rs", "Foo.rs"));
1189    }
1190
1191    #[test]
1192    fn group_matches_file_exclusion_glob_rejects() {
1193        let rules = vec![TomlRule {
1194            id: "r".into(),
1195            rule_type: "banned-pattern".into(),
1196            pattern: Some("x".into()),
1197            message: "m".into(),
1198            exclude_glob: vec!["**/test/**".into()],
1199            ..Default::default()
1200        }];
1201        let built = build_rules(&rules).unwrap();
1202        assert!(group_matches_file(&built.rule_groups[0], "src/app.ts", "app.ts"));
1203        assert!(!group_matches_file(&built.rule_groups[0], "src/test/app.ts", "app.ts"));
1204    }
1205
1206    // ── passes_file_conditioning tests ──
1207
1208    #[test]
1209    fn passes_conditioning_no_conditions() {
1210        let rules = vec![TomlRule {
1211            id: "r".into(),
1212            rule_type: "banned-pattern".into(),
1213            pattern: Some("x".into()),
1214            message: "m".into(),
1215            ..Default::default()
1216        }];
1217        let built = build_rules(&rules).unwrap();
1218        let mut cache = HashMap::new();
1219        assert!(passes_file_conditioning_cached(&built.rule_groups[0].rules[0], "anything", &mut cache));
1220    }
1221
1222    #[test]
1223    fn passes_conditioning_file_contains_present() {
1224        let rules = vec![TomlRule {
1225            id: "r".into(),
1226            rule_type: "banned-pattern".into(),
1227            pattern: Some("x".into()),
1228            message: "m".into(),
1229            file_contains: Some("import React".into()),
1230            ..Default::default()
1231        }];
1232        let built = build_rules(&rules).unwrap();
1233        let mut cache = HashMap::new();
1234        assert!(passes_file_conditioning_cached(&built.rule_groups[0].rules[0], "import React from 'react';", &mut cache));
1235        let mut cache = HashMap::new();
1236        assert!(!passes_file_conditioning_cached(&built.rule_groups[0].rules[0], "import Vue from 'vue';", &mut cache));
1237    }
1238
1239    #[test]
1240    fn passes_conditioning_file_not_contains() {
1241        let rules = vec![TomlRule {
1242            id: "r".into(),
1243            rule_type: "banned-pattern".into(),
1244            pattern: Some("x".into()),
1245            message: "m".into(),
1246            file_not_contains: Some("// @generated".into()),
1247            ..Default::default()
1248        }];
1249        let built = build_rules(&rules).unwrap();
1250        let mut cache = HashMap::new();
1251        assert!(passes_file_conditioning_cached(&built.rule_groups[0].rules[0], "normal code", &mut cache));
1252        let mut cache = HashMap::new();
1253        assert!(!passes_file_conditioning_cached(&built.rule_groups[0].rules[0], "// @generated\nnormal code", &mut cache));
1254    }
1255
1256    #[test]
1257    fn passes_conditioning_both_conditions() {
1258        let rules = vec![TomlRule {
1259            id: "r".into(),
1260            rule_type: "banned-pattern".into(),
1261            pattern: Some("x".into()),
1262            message: "m".into(),
1263            file_contains: Some("import React".into()),
1264            file_not_contains: Some("// @generated".into()),
1265            ..Default::default()
1266        }];
1267        let built = build_rules(&rules).unwrap();
1268        // Has required, missing excluded -> pass
1269        let mut cache = HashMap::new();
1270        assert!(passes_file_conditioning_cached(&built.rule_groups[0].rules[0], "import React", &mut cache));
1271        // Missing required -> fail
1272        let mut cache = HashMap::new();
1273        assert!(!passes_file_conditioning_cached(&built.rule_groups[0].rules[0], "import Vue", &mut cache));
1274        // Has both -> fail (file_not_contains blocks it)
1275        let mut cache = HashMap::new();
1276        assert!(!passes_file_conditioning_cached(&built.rule_groups[0].rules[0], "import React // @generated", &mut cache));
1277    }
1278
1279    // ── run_rules_on_content tests ──
1280
1281    #[test]
1282    fn run_rules_on_content_finds_violations() {
1283        let rules = vec![TomlRule {
1284            id: "no-console".into(),
1285            rule_type: "banned-pattern".into(),
1286            pattern: Some("console\\.log".into()),
1287            message: "no console.log".into(),
1288            regex: true,
1289            ..Default::default()
1290        }];
1291        let built = build_rules(&rules).unwrap();
1292        let path = PathBuf::from("test.ts");
1293        let content = "console.log('hello');\nfoo();\n";
1294
1295        let violations = run_rules_on_content(&built.rule_groups, &path, content, "test.ts", "test.ts");
1296        assert_eq!(violations.len(), 1);
1297        assert_eq!(violations[0].rule_id, "no-console");
1298    }
1299
1300    #[test]
1301    fn run_rules_on_content_respects_suppression() {
1302        let rules = vec![TomlRule {
1303            id: "no-console".into(),
1304            rule_type: "banned-pattern".into(),
1305            pattern: Some("console\\.log".into()),
1306            message: "no console.log".into(),
1307            regex: true,
1308            ..Default::default()
1309        }];
1310        let built = build_rules(&rules).unwrap();
1311        let path = PathBuf::from("test.ts");
1312        let content = "console.log('hello'); // baseline:allow-no-console\n";
1313
1314        let violations = run_rules_on_content(&built.rule_groups, &path, content, "test.ts", "test.ts");
1315        assert_eq!(violations.len(), 0);
1316    }
1317
1318    #[test]
1319    fn run_rules_on_content_skips_non_matching_glob() {
1320        let rules = vec![TomlRule {
1321            id: "no-console".into(),
1322            rule_type: "banned-pattern".into(),
1323            pattern: Some("console\\.log".into()),
1324            message: "no console.log".into(),
1325            regex: true,
1326            glob: Some("**/*.tsx".into()),
1327            ..Default::default()
1328        }];
1329        let built = build_rules(&rules).unwrap();
1330        let path = PathBuf::from("test.rs");
1331        let content = "console.log('hello');\n";
1332
1333        let violations = run_rules_on_content(&built.rule_groups, &path, content, "test.rs", "test.rs");
1334        assert_eq!(violations.len(), 0);
1335    }
1336
1337    #[test]
1338    fn run_rules_on_content_skips_file_conditioning() {
1339        let rules = vec![TomlRule {
1340            id: "no-console".into(),
1341            rule_type: "banned-pattern".into(),
1342            pattern: Some("console\\.log".into()),
1343            message: "no console.log".into(),
1344            regex: true,
1345            file_contains: Some("import React".into()),
1346            ..Default::default()
1347        }];
1348        let built = build_rules(&rules).unwrap();
1349        let path = PathBuf::from("test.ts");
1350        let content = "console.log('hello');\n"; // no "import React"
1351
1352        let violations = run_rules_on_content(&built.rule_groups, &path, content, "test.ts", "test.ts");
1353        assert_eq!(violations.len(), 0);
1354    }
1355
1356    // ── build_glob_set tests ──
1357
1358    #[test]
1359    fn build_glob_set_empty() {
1360        let gs = build_glob_set(&[]).unwrap();
1361        assert!(!gs.is_match("anything"));
1362    }
1363
1364    #[test]
1365    fn build_glob_set_matches() {
1366        let gs = build_glob_set(&["**/*.ts".into(), "**/*.tsx".into()]).unwrap();
1367        assert!(gs.is_match("src/foo.ts"));
1368        assert!(gs.is_match("src/foo.tsx"));
1369        assert!(!gs.is_match("src/foo.rs"));
1370    }
1371
1372    #[test]
1373    fn build_glob_set_invalid_pattern() {
1374        let err = build_glob_set(&["[invalid".into()]).unwrap_err();
1375        assert!(matches!(err, ScanError::GlobParse(_)));
1376    }
1377
1378    // ── expand_glob / brace expansion tests ──
1379
1380    #[test]
1381    fn expand_glob_no_braces() {
1382        assert_eq!(expand_glob("**/*.ts"), vec!["**/*.ts"]);
1383    }
1384
1385    #[test]
1386    fn expand_glob_single_brace() {
1387        let mut result = expand_glob("**/*.{ts,tsx}");
1388        result.sort();
1389        assert_eq!(result, vec!["**/*.ts", "**/*.tsx"]);
1390    }
1391
1392    #[test]
1393    fn expand_glob_three_alternatives() {
1394        let mut result = expand_glob("src/**/*.{ts,tsx,js}");
1395        result.sort();
1396        // Path-based globs get **/ prefix
1397        assert_eq!(
1398            result,
1399            vec!["**/src/**/*.js", "**/src/**/*.ts", "**/src/**/*.tsx"]
1400        );
1401    }
1402
1403    #[test]
1404    fn expand_glob_no_comma_passthrough() {
1405        // Braces without commas should pass through (e.g. character classes)
1406        assert_eq!(expand_glob("**/*.[tj]s"), vec!["**/*.[tj]s"]);
1407    }
1408
1409    #[test]
1410    fn expand_glob_auto_prefix_path_glob() {
1411        // Path-based globs without **/ prefix get auto-prefixed
1412        assert_eq!(
1413            expand_glob("apps/web/src/**/*.tsx"),
1414            vec!["**/apps/web/src/**/*.tsx"]
1415        );
1416    }
1417
1418    #[test]
1419    fn expand_glob_no_double_prefix() {
1420        // Already prefixed with **/ should not get double-prefixed
1421        assert_eq!(
1422            expand_glob("**/apps/web/src/**/*.tsx"),
1423            vec!["**/apps/web/src/**/*.tsx"]
1424        );
1425    }
1426
1427    #[test]
1428    fn expand_glob_simple_extension_no_prefix() {
1429        // Simple extension globs (no /) should not get prefixed
1430        assert_eq!(expand_glob("*.ts"), vec!["*.ts"]);
1431    }
1432
1433    #[test]
1434    fn build_glob_set_brace_expansion() {
1435        let gs = build_glob_set(&["**/*.{ts,tsx}".into()]).unwrap();
1436        assert!(gs.is_match("src/foo.ts"));
1437        assert!(gs.is_match("src/foo.tsx"));
1438        assert!(!gs.is_match("src/foo.js"));
1439    }
1440
1441    #[test]
1442    fn build_glob_set_from_pattern_brace_expansion() {
1443        let gs = build_glob_set_from_pattern("**/*.{ts,tsx,js,jsx}").unwrap();
1444        assert!(gs.is_match("src/components/Button.tsx"));
1445        assert!(gs.is_match("lib/utils.js"));
1446        assert!(!gs.is_match("src/main.rs"));
1447    }
1448
1449    #[test]
1450    fn build_glob_set_from_pattern_path_glob() {
1451        let gs = build_glob_set_from_pattern("src/components/**/*.{ts,tsx}").unwrap();
1452        assert!(gs.is_match("src/components/Button.tsx"));
1453        assert!(gs.is_match("src/components/deep/nested/Card.ts"));
1454        assert!(!gs.is_match("lib/utils.tsx"));
1455    }
1456
1457    #[test]
1458    fn build_glob_set_path_glob_matches_absolute() {
1459        // The real-world case: "apps/web/src/**/*.{ts,tsx}" must match absolute paths
1460        let gs = build_glob_set_from_pattern("apps/web/src/**/*.{ts,tsx}").unwrap();
1461        assert!(gs.is_match("/Users/dev/project/apps/web/src/components/Foo.tsx"));
1462        assert!(gs.is_match("apps/web/src/index.ts"));
1463        assert!(!gs.is_match("/Users/dev/project/apps/api/src/index.ts"));
1464    }
1465
1466    // ── rule grouping tests ──
1467
1468    #[test]
1469    fn build_rules_groups_same_glob() {
1470        let rules = vec![
1471            TomlRule {
1472                id: "no-console".into(),
1473                rule_type: "banned-pattern".into(),
1474                pattern: Some("console\\.log".into()),
1475                message: "no console".into(),
1476                glob: Some("**/*.ts".into()),
1477                regex: true,
1478                ..Default::default()
1479            },
1480            TomlRule {
1481                id: "no-debugger".into(),
1482                rule_type: "banned-pattern".into(),
1483                pattern: Some("debugger".into()),
1484                message: "no debugger".into(),
1485                glob: Some("**/*.ts".into()),
1486                ..Default::default()
1487            },
1488        ];
1489
1490        let built = build_rules(&rules).unwrap();
1491        // Both rules share the same glob, so they should be in one group
1492        assert_eq!(built.rule_groups.len(), 1);
1493        assert_eq!(built.rule_groups[0].rules.len(), 2);
1494    }
1495
1496    #[test]
1497    fn build_rules_separates_different_globs() {
1498        let rules = vec![
1499            TomlRule {
1500                id: "no-console".into(),
1501                rule_type: "banned-pattern".into(),
1502                pattern: Some("console\\.log".into()),
1503                message: "no console".into(),
1504                glob: Some("**/*.ts".into()),
1505                regex: true,
1506                ..Default::default()
1507            },
1508            TomlRule {
1509                id: "no-debugger".into(),
1510                rule_type: "banned-pattern".into(),
1511                pattern: Some("debugger".into()),
1512                message: "no debugger".into(),
1513                glob: Some("**/*.tsx".into()),
1514                ..Default::default()
1515            },
1516        ];
1517
1518        let built = build_rules(&rules).unwrap();
1519        // Different globs -> separate groups
1520        assert_eq!(built.rule_groups.len(), 2);
1521        assert_eq!(built.rule_groups[0].rules.len(), 1);
1522        assert_eq!(built.rule_groups[1].rules.len(), 1);
1523    }
1524
1525    // ── run_scan integration tests ──
1526
1527    #[test]
1528    fn run_scan_with_banned_pattern() {
1529        let dir = tempfile::tempdir().unwrap();
1530
1531        // Write config
1532        let config = dir.path().join("baseline.toml");
1533        fs::write(
1534            &config,
1535            r#"
1536[baseline]
1537
1538[[rule]]
1539id = "no-console"
1540type = "banned-pattern"
1541severity = "error"
1542pattern = "console\\.log"
1543regex = true
1544message = "Do not use console.log"
1545"#,
1546        )
1547        .unwrap();
1548
1549        // Write a source file
1550        let src_dir = dir.path().join("src");
1551        fs::create_dir(&src_dir).unwrap();
1552        fs::write(src_dir.join("app.ts"), "console.log('hi');\nfoo();\n").unwrap();
1553
1554        let result = run_scan(&config, &[src_dir]).unwrap();
1555        assert_eq!(result.violations.len(), 1);
1556        assert_eq!(result.violations[0].rule_id, "no-console");
1557        assert_eq!(result.files_scanned, 1);
1558        assert_eq!(result.rules_loaded, 1);
1559    }
1560
1561    #[test]
1562    fn run_scan_no_violations() {
1563        let dir = tempfile::tempdir().unwrap();
1564
1565        let config = dir.path().join("baseline.toml");
1566        fs::write(
1567            &config,
1568            r#"
1569[baseline]
1570
1571[[rule]]
1572id = "no-console"
1573type = "banned-pattern"
1574severity = "error"
1575pattern = "console\\.log"
1576regex = true
1577message = "Do not use console.log"
1578glob = "**/*.ts"
1579"#,
1580        )
1581        .unwrap();
1582
1583        let src_dir = dir.path().join("src");
1584        fs::create_dir(&src_dir).unwrap();
1585        fs::write(src_dir.join("app.ts"), "doStuff();\n").unwrap();
1586
1587        let result = run_scan(&config, &[src_dir]).unwrap();
1588        assert!(result.violations.is_empty());
1589        assert_eq!(result.files_scanned, 1);
1590    }
1591
1592    #[test]
1593    fn run_scan_excludes_files() {
1594        let dir = tempfile::tempdir().unwrap();
1595
1596        let config = dir.path().join("baseline.toml");
1597        fs::write(
1598            &config,
1599            r#"
1600[baseline]
1601exclude = ["**/dist/**"]
1602
1603[[rule]]
1604id = "no-console"
1605type = "banned-pattern"
1606severity = "error"
1607pattern = "console\\.log"
1608regex = true
1609message = "no console"
1610"#,
1611        )
1612        .unwrap();
1613
1614        // File in dist should be excluded
1615        let dist_dir = dir.path().join("dist");
1616        fs::create_dir(&dist_dir).unwrap();
1617        fs::write(dist_dir.join("app.ts"), "console.log('hi');\n").unwrap();
1618
1619        let result = run_scan(&config, &[dir.path().to_path_buf()]).unwrap();
1620        // The dist file should be excluded
1621        for v in &result.violations {
1622            assert!(!v.file.to_string_lossy().contains("dist"));
1623        }
1624    }
1625
1626    #[test]
1627    fn run_scan_file_presence_rule() {
1628        let dir = tempfile::tempdir().unwrap();
1629
1630        let config = dir.path().join("baseline.toml");
1631        fs::write(
1632            &config,
1633            r#"
1634[baseline]
1635
1636[[rule]]
1637id = "has-readme"
1638type = "file-presence"
1639severity = "error"
1640required_files = ["README.md"]
1641message = "README.md is required"
1642"#,
1643        )
1644        .unwrap();
1645
1646        // No README.md in dir
1647        let result = run_scan(&config, &[dir.path().to_path_buf()]).unwrap();
1648        assert!(result.violations.iter().any(|v| v.rule_id == "has-readme"));
1649    }
1650
1651    #[test]
1652    fn run_scan_missing_config_errors() {
1653        let result = run_scan(
1654            Path::new("/nonexistent/baseline.toml"),
1655            &[PathBuf::from(".")],
1656        );
1657        assert!(result.is_err());
1658        assert!(matches!(result.err().unwrap(), ScanError::ConfigRead(_)));
1659    }
1660
1661    #[test]
1662    fn run_scan_invalid_config_errors() {
1663        let dir = tempfile::tempdir().unwrap();
1664        let config = dir.path().join("baseline.toml");
1665        fs::write(&config, "this is not valid toml [[[").unwrap();
1666
1667        let result = run_scan(&config, &[dir.path().to_path_buf()]);
1668        assert!(result.is_err());
1669        assert!(matches!(result.err().unwrap(), ScanError::ConfigParse(_)));
1670    }
1671
1672    #[test]
1673    fn run_scan_with_ratchet_rule() {
1674        let dir = tempfile::tempdir().unwrap();
1675
1676        let config = dir.path().join("baseline.toml");
1677        fs::write(
1678            &config,
1679            r#"
1680[baseline]
1681
1682[[rule]]
1683id = "legacy-api"
1684type = "ratchet"
1685severity = "warning"
1686pattern = "legacyCall"
1687max_count = 5
1688message = "legacy api usage"
1689"#,
1690        )
1691        .unwrap();
1692
1693        let src_dir = dir.path().join("src");
1694        fs::create_dir(&src_dir).unwrap();
1695        fs::write(src_dir.join("app.ts"), "legacyCall();\nlegacyCall();\n").unwrap();
1696
1697        let result = run_scan(&config, &[src_dir]).unwrap();
1698        // 2 matches, max 5 -> suppressed
1699        assert!(result.violations.is_empty());
1700        assert_eq!(result.ratchet_counts["legacy-api"], (2, 5));
1701    }
1702
1703    // ── run_scan_stdin tests ──
1704
1705    #[test]
1706    fn run_scan_stdin_finds_violations() {
1707        let dir = tempfile::tempdir().unwrap();
1708
1709        let config = dir.path().join("baseline.toml");
1710        fs::write(
1711            &config,
1712            r#"
1713[baseline]
1714
1715[[rule]]
1716id = "no-console"
1717type = "banned-pattern"
1718severity = "error"
1719pattern = "console\\.log"
1720regex = true
1721message = "no console.log"
1722"#,
1723        )
1724        .unwrap();
1725
1726        let result =
1727            run_scan_stdin(&config, "console.log('hello');\nfoo();\n", "test.ts").unwrap();
1728        assert_eq!(result.violations.len(), 1);
1729        assert_eq!(result.files_scanned, 1);
1730    }
1731
1732    #[test]
1733    fn run_scan_stdin_no_violations() {
1734        let dir = tempfile::tempdir().unwrap();
1735
1736        let config = dir.path().join("baseline.toml");
1737        fs::write(
1738            &config,
1739            r#"
1740[baseline]
1741
1742[[rule]]
1743id = "no-console"
1744type = "banned-pattern"
1745severity = "error"
1746pattern = "console\\.log"
1747regex = true
1748message = "no console.log"
1749glob = "**/*.ts"
1750"#,
1751        )
1752        .unwrap();
1753
1754        let result = run_scan_stdin(&config, "doStuff();\n", "app.ts").unwrap();
1755        assert!(result.violations.is_empty());
1756    }
1757
1758    #[test]
1759    fn run_scan_stdin_glob_filters_filename() {
1760        let dir = tempfile::tempdir().unwrap();
1761
1762        let config = dir.path().join("baseline.toml");
1763        fs::write(
1764            &config,
1765            r#"
1766[baseline]
1767
1768[[rule]]
1769id = "no-console"
1770type = "banned-pattern"
1771severity = "error"
1772pattern = "console\\.log"
1773regex = true
1774message = "no console.log"
1775glob = "**/*.tsx"
1776"#,
1777        )
1778        .unwrap();
1779
1780        // File doesn't match glob
1781        let result =
1782            run_scan_stdin(&config, "console.log('hello');\n", "app.rs").unwrap();
1783        assert!(result.violations.is_empty());
1784    }
1785
1786    // ── run_baseline tests ──
1787
1788    #[test]
1789    fn run_baseline_counts_ratchet_matches() {
1790        let dir = tempfile::tempdir().unwrap();
1791
1792        let config = dir.path().join("baseline.toml");
1793        fs::write(
1794            &config,
1795            r#"
1796[baseline]
1797
1798[[rule]]
1799id = "legacy-api"
1800type = "ratchet"
1801severity = "warning"
1802pattern = "legacyCall"
1803max_count = 100
1804message = "legacy usage"
1805"#,
1806        )
1807        .unwrap();
1808
1809        let src_dir = dir.path().join("src");
1810        fs::create_dir(&src_dir).unwrap();
1811        fs::write(
1812            src_dir.join("app.ts"),
1813            "legacyCall();\nlegacyCall();\nlegacyCall();\n",
1814        )
1815        .unwrap();
1816
1817        let result = run_baseline(&config, &[src_dir]).unwrap();
1818        assert_eq!(result.entries.len(), 1);
1819        assert_eq!(result.entries[0].rule_id, "legacy-api");
1820        assert_eq!(result.entries[0].count, 3);
1821        assert_eq!(result.files_scanned, 1);
1822    }
1823
1824    #[test]
1825    fn run_baseline_skips_non_ratchet_rules() {
1826        let dir = tempfile::tempdir().unwrap();
1827
1828        let config = dir.path().join("baseline.toml");
1829        fs::write(
1830            &config,
1831            r#"
1832[baseline]
1833
1834[[rule]]
1835id = "no-console"
1836type = "banned-pattern"
1837severity = "error"
1838pattern = "console\\.log"
1839regex = true
1840message = "no console"
1841
1842[[rule]]
1843id = "legacy-api"
1844type = "ratchet"
1845severity = "warning"
1846pattern = "legacyCall"
1847max_count = 100
1848message = "legacy usage"
1849"#,
1850        )
1851        .unwrap();
1852
1853        let src_dir = dir.path().join("src");
1854        fs::create_dir(&src_dir).unwrap();
1855        fs::write(src_dir.join("app.ts"), "console.log('hi');\nlegacyCall();\n").unwrap();
1856
1857        let result = run_baseline(&config, &[src_dir]).unwrap();
1858        // Only ratchet rules appear in baseline
1859        assert_eq!(result.entries.len(), 1);
1860        assert_eq!(result.entries[0].rule_id, "legacy-api");
1861    }
1862
1863    // ── collect_files tests ──
1864
1865    #[test]
1866    fn collect_files_single_file() {
1867        let dir = tempfile::tempdir().unwrap();
1868        let file = dir.path().join("test.ts");
1869        fs::write(&file, "content").unwrap();
1870
1871        let empty_glob = build_glob_set(&[]).unwrap();
1872        let files = collect_files(&[file.clone()], &empty_glob);
1873        assert_eq!(files.len(), 1);
1874        assert_eq!(files[0], file);
1875    }
1876
1877    #[test]
1878    fn collect_files_directory_walk() {
1879        let dir = tempfile::tempdir().unwrap();
1880        let sub = dir.path().join("sub");
1881        fs::create_dir(&sub).unwrap();
1882        fs::write(sub.join("a.ts"), "a").unwrap();
1883        fs::write(sub.join("b.ts"), "b").unwrap();
1884
1885        let empty_glob = build_glob_set(&[]).unwrap();
1886        let files = collect_files(&[dir.path().to_path_buf()], &empty_glob);
1887        assert_eq!(files.len(), 2);
1888    }
1889
1890    #[test]
1891    fn collect_files_excludes_patterns() {
1892        let dir = tempfile::tempdir().unwrap();
1893        fs::write(dir.path().join("keep.ts"), "keep").unwrap();
1894        fs::write(dir.path().join("skip.log"), "skip").unwrap();
1895
1896        let exclude = build_glob_set(&["*.log".into()]).unwrap();
1897        let files = collect_files(&[dir.path().to_path_buf()], &exclude);
1898        assert!(files.iter().all(|f| !f.to_string_lossy().ends_with(".log")));
1899        assert!(files.iter().any(|f| f.to_string_lossy().ends_with(".ts")));
1900    }
1901
1902    // ── run_scan with presets ──
1903
1904    #[test]
1905    fn run_scan_with_preset() {
1906        let dir = tempfile::tempdir().unwrap();
1907
1908        let config = dir.path().join("baseline.toml");
1909        fs::write(
1910            &config,
1911            r#"
1912[baseline]
1913extends = ["shadcn-strict"]
1914"#,
1915        )
1916        .unwrap();
1917
1918        let src_dir = dir.path().join("src");
1919        fs::create_dir(&src_dir).unwrap();
1920        fs::write(src_dir.join("app.tsx"), "export default function App() { return <div>hi</div>; }\n").unwrap();
1921
1922        let result = run_scan(&config, &[src_dir]).unwrap();
1923        // Just verify it doesn't error
1924        assert!(result.rules_loaded > 0);
1925    }
1926
1927    // ── run_scan with plugins ──
1928
1929    #[test]
1930    fn run_scan_with_plugin() {
1931        let dir = tempfile::tempdir().unwrap();
1932
1933        let plugin_path = dir.path().join("custom-rules.toml");
1934        fs::write(
1935            &plugin_path,
1936            r#"
1937[[rule]]
1938id = "no-todo"
1939type = "banned-pattern"
1940severity = "warning"
1941pattern = "TODO"
1942message = "No TODOs allowed"
1943"#,
1944        )
1945        .unwrap();
1946
1947        let config = dir.path().join("baseline.toml");
1948        fs::write(
1949            &config,
1950            format!(
1951                r#"
1952[baseline]
1953plugins = ["{}"]
1954"#,
1955                plugin_path.display()
1956            ),
1957        )
1958        .unwrap();
1959
1960        let src_dir = dir.path().join("src");
1961        fs::create_dir(&src_dir).unwrap();
1962        fs::write(src_dir.join("app.ts"), "// TODO: fix this\n").unwrap();
1963
1964        let result = run_scan(&config, &[src_dir]).unwrap();
1965        assert!(result.violations.iter().any(|v| v.rule_id == "no-todo"));
1966    }
1967
1968    // ── is_likely_minified tests ──
1969
1970    #[test]
1971    fn minified_empty_content() {
1972        assert!(!is_likely_minified(""));
1973    }
1974
1975    #[test]
1976    fn minified_normal_source() {
1977        let content = "const x = 1;\nconst y = 2;\nfunction foo() { return x + y; }\n";
1978        assert!(!is_likely_minified(content));
1979    }
1980
1981    #[test]
1982    fn minified_single_long_line() {
1983        let long_line = "a".repeat(MINIFIED_LINE_LENGTH_THRESHOLD + 1);
1984        assert!(is_likely_minified(&long_line));
1985    }
1986
1987    #[test]
1988    fn minified_mixed_with_one_long_line() {
1989        let mut content = "const x = 1;\n".to_string();
1990        content.push_str(&"a".repeat(MINIFIED_LINE_LENGTH_THRESHOLD + 1));
1991        content.push_str("\nconst y = 2;\n");
1992        assert!(is_likely_minified(&content));
1993    }
1994
1995    #[test]
1996    fn minified_exactly_at_threshold() {
1997        let line = "a".repeat(MINIFIED_LINE_LENGTH_THRESHOLD);
1998        assert!(!is_likely_minified(&line));
1999    }
2000
2001    // ── run_scan skips minified files ──
2002
2003    #[test]
2004    fn run_scan_skips_minified_file() {
2005        let dir = tempfile::tempdir().unwrap();
2006
2007        let config = dir.path().join("baseline.toml");
2008        fs::write(
2009            &config,
2010            r#"
2011[baseline]
2012
2013[[rule]]
2014id = "no-console"
2015type = "banned-pattern"
2016severity = "error"
2017pattern = "console\\.log"
2018regex = true
2019message = "no console.log"
2020"#,
2021        )
2022        .unwrap();
2023
2024        let src_dir = dir.path().join("src");
2025        fs::create_dir(&src_dir).unwrap();
2026        // Write a minified file containing the banned pattern
2027        let mut minified = "console.log('hi');".to_string();
2028        minified.push_str(&"x".repeat(MINIFIED_LINE_LENGTH_THRESHOLD + 1));
2029        minified.push('\n');
2030        fs::write(src_dir.join("bundle.js"), &minified).unwrap();
2031
2032        let result = run_scan(&config, &[src_dir]).unwrap();
2033        assert!(result.violations.is_empty());
2034        assert_eq!(result.files_scanned, 0);
2035    }
2036
2037    #[test]
2038    fn run_scan_stdin_skips_minified() {
2039        let dir = tempfile::tempdir().unwrap();
2040
2041        let config = dir.path().join("baseline.toml");
2042        fs::write(
2043            &config,
2044            r#"
2045[baseline]
2046
2047[[rule]]
2048id = "no-console"
2049type = "banned-pattern"
2050severity = "error"
2051pattern = "console\\.log"
2052regex = true
2053message = "no console.log"
2054"#,
2055        )
2056        .unwrap();
2057
2058        let mut minified = "console.log('hi');".to_string();
2059        minified.push_str(&"x".repeat(MINIFIED_LINE_LENGTH_THRESHOLD + 1));
2060
2061        let result = run_scan_stdin(&config, &minified, "bundle.js").unwrap();
2062        assert!(result.violations.is_empty());
2063        assert_eq!(result.files_scanned, 0);
2064    }
2065
2066    #[test]
2067    fn run_scan_skip_no_matching_files() {
2068        let dir = tempfile::tempdir().unwrap();
2069
2070        let config = dir.path().join("baseline.toml");
2071        fs::write(
2072            &config,
2073            r#"
2074[baseline]
2075
2076[[rule]]
2077id = "no-console"
2078type = "banned-pattern"
2079severity = "error"
2080pattern = "console\\.log"
2081regex = true
2082message = "no console"
2083glob = "**/*.tsx"
2084"#,
2085        )
2086        .unwrap();
2087
2088        let src_dir = dir.path().join("src");
2089        fs::create_dir(&src_dir).unwrap();
2090        // Write a .rs file that won't match the *.tsx glob
2091        fs::write(src_dir.join("app.rs"), "console.log('hello');\n").unwrap();
2092
2093        let result = run_scan(&config, &[src_dir]).unwrap();
2094        assert!(result.violations.is_empty());
2095        // The file shouldn't even be read since no rule matches
2096        assert_eq!(result.files_scanned, 0);
2097    }
2098}