Skip to main content

diffguard_domain/
rules.rs

1use std::collections::BTreeSet;
2use std::path::Path;
3
4use globset::{Glob, GlobSet, GlobSetBuilder};
5use regex::Regex;
6
7use diffguard_types::{MatchMode, RuleConfig, Severity};
8
9#[derive(Debug, thiserror::Error)]
10pub enum RuleCompileError {
11    #[error("rule '{rule_id}' has no patterns")]
12    MissingPatterns { rule_id: String },
13
14    #[error("rule '{rule_id}' has invalid regex '{pattern}': {source}")]
15    InvalidRegex {
16        rule_id: String,
17        pattern: String,
18        source: regex::Error,
19    },
20
21    #[error("rule '{rule_id}' has invalid glob '{glob}': {source}")]
22    InvalidGlob {
23        rule_id: String,
24        glob: String,
25        source: globset::Error,
26    },
27
28    #[error("rule '{rule_id}' has invalid multiline_window '{value}' (must be >= 2)")]
29    InvalidMultilineWindow { rule_id: String, value: u32 },
30
31    #[error("rule '{rule_id}' depends on unknown rule '{dependency}'")]
32    UnknownDependency { rule_id: String, dependency: String },
33}
34
35#[derive(Debug, Clone)]
36pub struct CompiledRule {
37    pub id: String,
38    pub severity: Severity,
39    pub message: String,
40    pub languages: BTreeSet<String>,
41    pub patterns: Vec<Regex>,
42    pub include: Option<GlobSet>,
43    pub exclude: Option<GlobSet>,
44    pub ignore_comments: bool,
45    pub ignore_strings: bool,
46    pub match_mode: MatchMode,
47    pub multiline: bool,
48    pub multiline_window: usize,
49    pub context_patterns: Vec<Regex>,
50    pub context_window: usize,
51    pub escalate_patterns: Vec<Regex>,
52    pub escalate_window: usize,
53    pub escalate_to: Option<Severity>,
54    pub depends_on: BTreeSet<String>,
55}
56
57impl CompiledRule {
58    pub fn applies_to(&self, path: &Path, language: Option<&str>) -> bool {
59        if self
60            .include
61            .as_ref()
62            .is_some_and(|include| !include.is_match(path))
63        {
64            return false;
65        }
66
67        if self
68            .exclude
69            .as_ref()
70            .is_some_and(|exclude| exclude.is_match(path))
71        {
72            return false;
73        }
74
75        if !self.languages.is_empty() {
76            let Some(lang) = language else {
77                return false;
78            };
79            if !self.languages.contains(&lang.to_ascii_lowercase()) {
80                return false;
81            }
82        }
83
84        true
85    }
86}
87
88pub fn compile_rules(configs: &[RuleConfig]) -> Result<Vec<CompiledRule>, RuleCompileError> {
89    let mut out = Vec::with_capacity(configs.len());
90    let known_rule_ids = configs
91        .iter()
92        .map(|cfg| cfg.id.clone())
93        .collect::<BTreeSet<_>>();
94
95    for cfg in configs {
96        if cfg.patterns.is_empty() {
97            return Err(RuleCompileError::MissingPatterns {
98                rule_id: cfg.id.clone(),
99            });
100        }
101
102        let patterns = compile_pattern_group(&cfg.id, &cfg.patterns)?;
103        let context_patterns = compile_pattern_group(&cfg.id, &cfg.context_patterns)?;
104        let escalate_patterns = compile_pattern_group(&cfg.id, &cfg.escalate_patterns)?;
105
106        let include = compile_globs(&cfg.paths, &cfg.id)?;
107        let exclude = compile_globs(&cfg.exclude_paths, &cfg.id)?;
108
109        let languages = cfg
110            .languages
111            .iter()
112            .map(|s| s.to_ascii_lowercase())
113            .collect::<BTreeSet<_>>();
114
115        if cfg.multiline
116            && let Some(window) = cfg.multiline_window
117            && window < 2
118        {
119            return Err(RuleCompileError::InvalidMultilineWindow {
120                rule_id: cfg.id.clone(),
121                value: window,
122            });
123        }
124
125        for dependency in &cfg.depends_on {
126            if !known_rule_ids.contains(dependency) {
127                return Err(RuleCompileError::UnknownDependency {
128                    rule_id: cfg.id.clone(),
129                    dependency: dependency.clone(),
130                });
131            }
132        }
133
134        out.push(CompiledRule {
135            id: cfg.id.clone(),
136            severity: cfg.severity,
137            message: cfg.message.clone(),
138            languages,
139            patterns,
140            include,
141            exclude,
142            ignore_comments: cfg.ignore_comments,
143            ignore_strings: cfg.ignore_strings,
144            match_mode: cfg.match_mode,
145            multiline: cfg.multiline,
146            multiline_window: cfg.multiline_window.unwrap_or(2).max(2) as usize,
147            context_patterns,
148            context_window: cfg.context_window.unwrap_or(3) as usize,
149            escalate_patterns,
150            escalate_window: cfg.escalate_window.unwrap_or(0) as usize,
151            escalate_to: cfg.escalate_to,
152            depends_on: cfg.depends_on.iter().cloned().collect(),
153        });
154    }
155
156    Ok(out)
157}
158
159fn compile_pattern_group(
160    rule_id: &str,
161    patterns: &[String],
162) -> Result<Vec<Regex>, RuleCompileError> {
163    let mut out = Vec::with_capacity(patterns.len());
164    for pattern in patterns {
165        let compiled = Regex::new(pattern).map_err(|source| RuleCompileError::InvalidRegex {
166            rule_id: rule_id.to_string(),
167            pattern: pattern.clone(),
168            source,
169        })?;
170        out.push(compiled);
171    }
172    Ok(out)
173}
174
175fn compile_globs(globs: &[String], rule_id: &str) -> Result<Option<GlobSet>, RuleCompileError> {
176    if globs.is_empty() {
177        return Ok(None);
178    }
179
180    let mut builder = GlobSetBuilder::new();
181    for g in globs {
182        let glob = Glob::new(g).map_err(|e| RuleCompileError::InvalidGlob {
183            rule_id: rule_id.to_string(),
184            glob: g.clone(),
185            source: e,
186        })?;
187        builder.add(glob);
188    }
189
190    Ok(Some(builder.build().expect("globset build should succeed")))
191}
192
193/// Detects programming language from file extension.
194/// Returns lowercase language identifier or None for unknown extensions.
195pub fn detect_language(path: &Path) -> Option<&'static str> {
196    let ext = path.extension()?.to_str()?;
197    match ext.to_ascii_lowercase().as_str() {
198        "rs" => Some("rust"),
199        "py" | "pyw" => Some("python"),
200        "js" | "mjs" | "cjs" | "jsx" => Some("javascript"),
201        "ts" | "mts" | "cts" | "tsx" => Some("typescript"),
202        "go" => Some("go"),
203        "java" => Some("java"),
204        "kt" | "kts" => Some("kotlin"),
205        "rb" | "rake" => Some("ruby"),
206        "c" | "h" => Some("c"),
207        "cpp" | "cc" | "cxx" | "hpp" | "hxx" | "hh" => Some("cpp"),
208        "cs" => Some("csharp"),
209        "sh" | "bash" | "zsh" | "ksh" | "fish" => Some("shell"),
210        "swift" => Some("swift"),
211        "scala" | "sc" => Some("scala"),
212        "sql" => Some("sql"),
213        "xml" | "xsl" | "xslt" | "xsd" | "svg" | "xhtml" => Some("xml"),
214        "html" | "htm" => Some("xml"),
215        "php" | "phtml" | "php3" | "php4" | "php5" | "php7" | "phps" => Some("php"),
216        "yaml" | "yml" => Some("yaml"),
217        "toml" => Some("toml"),
218        "json" | "jsonc" | "json5" => Some("json"),
219        _ => None,
220    }
221}
222
223#[cfg(test)]
224mod tests {
225    use super::*;
226
227    /// Helper to create a RuleConfig for testing with default help/url
228    #[allow(clippy::too_many_arguments)]
229    fn test_rule(
230        id: &str,
231        severity: Severity,
232        message: &str,
233        languages: Vec<&str>,
234        patterns: Vec<&str>,
235        paths: Vec<&str>,
236        exclude_paths: Vec<&str>,
237        ignore_comments: bool,
238        ignore_strings: bool,
239    ) -> RuleConfig {
240        RuleConfig {
241            id: id.to_string(),
242            severity,
243            message: message.to_string(),
244            languages: languages.into_iter().map(|s| s.to_string()).collect(),
245            patterns: patterns.into_iter().map(|s| s.to_string()).collect(),
246            paths: paths.into_iter().map(|s| s.to_string()).collect(),
247            exclude_paths: exclude_paths.into_iter().map(|s| s.to_string()).collect(),
248            ignore_comments,
249            ignore_strings,
250            match_mode: Default::default(),
251            multiline: false,
252            multiline_window: None,
253            context_patterns: vec![],
254            context_window: None,
255            escalate_patterns: vec![],
256            escalate_window: None,
257            escalate_to: None,
258            depends_on: vec![],
259            help: None,
260            url: None,
261            tags: vec![],
262            test_cases: vec![],
263        }
264    }
265
266    #[test]
267    fn compile_and_match_basic_rule() {
268        let cfg = test_rule(
269            "x",
270            Severity::Warn,
271            "m",
272            vec!["rust"],
273            vec!["unwrap"],
274            vec!["**/*.rs"],
275            vec!["**/tests/**"],
276            true,
277            true,
278        );
279
280        let rules = compile_rules(&[cfg]).unwrap();
281        let r = &rules[0];
282
283        assert!(r.applies_to(Path::new("src/lib.rs"), Some("rust")));
284        assert!(!r.applies_to(Path::new("src/lib.rs"), Some("python")));
285        assert!(!r.applies_to(Path::new("tests/test.rs"), Some("rust")));
286    }
287
288    #[test]
289    fn detect_language_rust() {
290        assert_eq!(detect_language(Path::new("src/lib.rs")), Some("rust"));
291    }
292
293    #[test]
294    fn detect_language_python() {
295        assert_eq!(detect_language(Path::new("script.py")), Some("python"));
296        assert_eq!(detect_language(Path::new("script.pyw")), Some("python"));
297    }
298
299    #[test]
300    fn detect_language_javascript() {
301        assert_eq!(detect_language(Path::new("app.js")), Some("javascript"));
302        assert_eq!(detect_language(Path::new("module.mjs")), Some("javascript"));
303        assert_eq!(detect_language(Path::new("module.cjs")), Some("javascript"));
304        assert_eq!(
305            detect_language(Path::new("component.jsx")),
306            Some("javascript")
307        );
308    }
309
310    #[test]
311    fn detect_language_typescript() {
312        assert_eq!(detect_language(Path::new("app.ts")), Some("typescript"));
313        assert_eq!(detect_language(Path::new("module.mts")), Some("typescript"));
314        assert_eq!(detect_language(Path::new("module.cts")), Some("typescript"));
315        assert_eq!(
316            detect_language(Path::new("component.tsx")),
317            Some("typescript")
318        );
319    }
320
321    #[test]
322    fn detect_language_go() {
323        assert_eq!(detect_language(Path::new("main.go")), Some("go"));
324    }
325
326    #[test]
327    fn detect_language_java() {
328        assert_eq!(detect_language(Path::new("Main.java")), Some("java"));
329    }
330
331    #[test]
332    fn detect_language_kotlin() {
333        assert_eq!(detect_language(Path::new("Main.kt")), Some("kotlin"));
334        assert_eq!(detect_language(Path::new("build.kts")), Some("kotlin"));
335    }
336
337    #[test]
338    fn detect_language_ruby() {
339        assert_eq!(detect_language(Path::new("script.rb")), Some("ruby"));
340        assert_eq!(detect_language(Path::new("Rakefile.rake")), Some("ruby"));
341    }
342
343    #[test]
344    fn detect_language_c() {
345        assert_eq!(detect_language(Path::new("main.c")), Some("c"));
346        assert_eq!(detect_language(Path::new("header.h")), Some("c"));
347    }
348
349    #[test]
350    fn detect_language_cpp() {
351        assert_eq!(detect_language(Path::new("main.cpp")), Some("cpp"));
352        assert_eq!(detect_language(Path::new("main.cc")), Some("cpp"));
353        assert_eq!(detect_language(Path::new("main.cxx")), Some("cpp"));
354        assert_eq!(detect_language(Path::new("header.hpp")), Some("cpp"));
355        assert_eq!(detect_language(Path::new("header.hxx")), Some("cpp"));
356        assert_eq!(detect_language(Path::new("header.hh")), Some("cpp"));
357    }
358
359    #[test]
360    fn detect_language_csharp() {
361        assert_eq!(detect_language(Path::new("Program.cs")), Some("csharp"));
362    }
363
364    #[test]
365    fn detect_language_shell() {
366        assert_eq!(detect_language(Path::new("script.sh")), Some("shell"));
367        assert_eq!(detect_language(Path::new("script.bash")), Some("shell"));
368        assert_eq!(detect_language(Path::new("script.zsh")), Some("shell"));
369        assert_eq!(detect_language(Path::new("script.ksh")), Some("shell"));
370        assert_eq!(detect_language(Path::new("script.fish")), Some("shell"));
371    }
372
373    #[test]
374    fn detect_language_unknown() {
375        assert_eq!(detect_language(Path::new("file.txt")), None);
376        assert_eq!(detect_language(Path::new("file.md")), None);
377        assert_eq!(detect_language(Path::new("file")), None);
378    }
379
380    #[test]
381    fn detect_language_swift() {
382        assert_eq!(detect_language(Path::new("app.swift")), Some("swift"));
383        assert_eq!(detect_language(Path::new("App.SWIFT")), Some("swift"));
384    }
385
386    #[test]
387    fn detect_language_scala() {
388        assert_eq!(detect_language(Path::new("app.scala")), Some("scala"));
389        assert_eq!(detect_language(Path::new("app.sc")), Some("scala"));
390        assert_eq!(detect_language(Path::new("App.SCALA")), Some("scala"));
391    }
392
393    #[test]
394    fn detect_language_sql() {
395        assert_eq!(detect_language(Path::new("query.sql")), Some("sql"));
396        assert_eq!(detect_language(Path::new("Query.SQL")), Some("sql"));
397    }
398
399    #[test]
400    fn detect_language_xml() {
401        assert_eq!(detect_language(Path::new("config.xml")), Some("xml"));
402        assert_eq!(detect_language(Path::new("style.xsl")), Some("xml"));
403        assert_eq!(detect_language(Path::new("transform.xslt")), Some("xml"));
404        assert_eq!(detect_language(Path::new("schema.xsd")), Some("xml"));
405        assert_eq!(detect_language(Path::new("icon.svg")), Some("xml"));
406        assert_eq!(detect_language(Path::new("page.xhtml")), Some("xml"));
407        assert_eq!(detect_language(Path::new("page.html")), Some("xml"));
408        assert_eq!(detect_language(Path::new("page.htm")), Some("xml"));
409    }
410
411    #[test]
412    fn detect_language_php() {
413        assert_eq!(detect_language(Path::new("index.php")), Some("php"));
414        assert_eq!(detect_language(Path::new("template.phtml")), Some("php"));
415        assert_eq!(detect_language(Path::new("legacy.php3")), Some("php"));
416        assert_eq!(detect_language(Path::new("legacy.php4")), Some("php"));
417        assert_eq!(detect_language(Path::new("legacy.php5")), Some("php"));
418        assert_eq!(detect_language(Path::new("modern.php7")), Some("php"));
419        assert_eq!(detect_language(Path::new("highlight.phps")), Some("php"));
420    }
421
422    #[test]
423    fn detect_language_case_insensitive() {
424        // Test that extension matching is case-insensitive
425        assert_eq!(detect_language(Path::new("file.RS")), Some("rust"));
426        assert_eq!(detect_language(Path::new("file.PY")), Some("python"));
427        assert_eq!(detect_language(Path::new("file.JS")), Some("javascript"));
428        assert_eq!(detect_language(Path::new("file.TS")), Some("typescript"));
429        assert_eq!(detect_language(Path::new("file.CPP")), Some("cpp"));
430        assert_eq!(detect_language(Path::new("file.JSON")), Some("json"));
431        assert_eq!(detect_language(Path::new("file.YAML")), Some("yaml"));
432        assert_eq!(detect_language(Path::new("file.TOML")), Some("toml"));
433    }
434
435    #[test]
436    fn detect_language_yaml_toml_json() {
437        assert_eq!(detect_language(Path::new("config.yaml")), Some("yaml"));
438        assert_eq!(detect_language(Path::new("config.yml")), Some("yaml"));
439        assert_eq!(detect_language(Path::new("config.toml")), Some("toml"));
440        assert_eq!(detect_language(Path::new("config.json")), Some("json"));
441        assert_eq!(detect_language(Path::new("config.jsonc")), Some("json"));
442        assert_eq!(detect_language(Path::new("config.json5")), Some("json"));
443    }
444
445    // =========================================================================
446    // Rule Matching Tests - Task 12.3
447    // Requirements: 9.4, 9.5
448    // =========================================================================
449
450    // --- Overlapping Patterns Tests (first match wins) ---
451
452    #[test]
453    fn overlapping_patterns_first_pattern_wins() {
454        // When multiple patterns could match, the first pattern in the list wins
455        let cfg = RuleConfig {
456            id: "test.overlapping".to_string(),
457            severity: Severity::Warn,
458            message: "found match".to_string(),
459            languages: vec![],
460            patterns: vec![
461                "foo".to_string(),    // First pattern
462                "foobar".to_string(), // Second pattern (more specific)
463            ],
464            paths: vec![],
465            exclude_paths: vec![],
466            ignore_comments: false,
467            ignore_strings: false,
468            match_mode: Default::default(),
469            multiline: false,
470            multiline_window: None,
471            context_patterns: vec![],
472            context_window: None,
473            escalate_patterns: vec![],
474            escalate_window: None,
475            escalate_to: None,
476            depends_on: vec![],
477            help: None,
478            url: None,
479            tags: vec![],
480            test_cases: vec![],
481        };
482
483        let rules = compile_rules(&[cfg]).unwrap();
484        let r = &rules[0];
485
486        // Both patterns could match "foobar", but first_match should return "foo"
487        let content = "foobar";
488        let m = r
489            .patterns
490            .iter()
491            .find_map(|p| p.find(content))
492            .expect("Expected a pattern to match");
493        // First pattern "foo" should match at position 0-3
494        assert_eq!(m.start(), 0);
495        assert_eq!(m.end(), 3);
496        assert_eq!(&content[m.start()..m.end()], "foo");
497    }
498
499    #[test]
500    fn overlapping_rules_first_rule_wins() {
501        // When multiple rules could match the same content, both produce findings
502        // but the order of findings follows the rule order
503        let configs = vec![
504            RuleConfig {
505                id: "rule.first".to_string(),
506                severity: Severity::Warn,
507                message: "first rule".to_string(),
508                languages: vec![],
509                patterns: vec!["error".to_string()],
510                paths: vec![],
511                exclude_paths: vec![],
512                ignore_comments: false,
513                ignore_strings: false,
514                match_mode: Default::default(),
515                multiline: false,
516                multiline_window: None,
517                context_patterns: vec![],
518                context_window: None,
519                escalate_patterns: vec![],
520                escalate_window: None,
521                escalate_to: None,
522                depends_on: vec![],
523                help: None,
524                url: None,
525                tags: vec![],
526                test_cases: vec![],
527            },
528            RuleConfig {
529                id: "rule.second".to_string(),
530                severity: Severity::Error,
531                message: "second rule".to_string(),
532                languages: vec![],
533                patterns: vec!["error".to_string()],
534                paths: vec![],
535                exclude_paths: vec![],
536                ignore_comments: false,
537                ignore_strings: false,
538                match_mode: Default::default(),
539                multiline: false,
540                multiline_window: None,
541                context_patterns: vec![],
542                context_window: None,
543                escalate_patterns: vec![],
544                escalate_window: None,
545                escalate_to: None,
546                depends_on: vec![],
547                help: None,
548                url: None,
549                tags: vec![],
550                test_cases: vec![],
551            },
552        ];
553
554        let rules = compile_rules(&configs).unwrap();
555        assert_eq!(rules.len(), 2);
556        assert_eq!(rules[0].id, "rule.first");
557        assert_eq!(rules[1].id, "rule.second");
558    }
559
560    #[test]
561    fn overlapping_patterns_specific_vs_general() {
562        // Test that pattern order matters: general pattern first catches everything
563        let cfg = RuleConfig {
564            id: "test.general_first".to_string(),
565            severity: Severity::Warn,
566            message: "found".to_string(),
567            languages: vec![],
568            patterns: vec![
569                r"\w+".to_string(),      // General: matches any word
570                r"specific".to_string(), // Specific: matches only "specific"
571            ],
572            paths: vec![],
573            exclude_paths: vec![],
574            ignore_comments: false,
575            ignore_strings: false,
576            match_mode: Default::default(),
577            multiline: false,
578            multiline_window: None,
579            context_patterns: vec![],
580            context_window: None,
581            escalate_patterns: vec![],
582            escalate_window: None,
583            escalate_to: None,
584            depends_on: vec![],
585            help: None,
586            url: None,
587            tags: vec![],
588            test_cases: vec![],
589        };
590
591        let rules = compile_rules(&[cfg]).unwrap();
592        let r = &rules[0];
593
594        // The general pattern should match first
595        let content = "specific";
596        let m = r.patterns[0]
597            .find(content)
598            .expect("Expected specific pattern to match");
599        assert_eq!(&content[m.start()..m.end()], "specific");
600    }
601
602    // --- Complex Glob Pattern Tests ---
603
604    #[test]
605    fn glob_pattern_recursive_wildcard() {
606        // Test **/*.rs matches files in any subdirectory
607        let cfg = RuleConfig {
608            id: "test.glob".to_string(),
609            severity: Severity::Warn,
610            message: "m".to_string(),
611            languages: vec![],
612            patterns: vec!["x".to_string()],
613            paths: vec!["**/*.rs".to_string()],
614            exclude_paths: vec![],
615            ignore_comments: false,
616            ignore_strings: false,
617            match_mode: Default::default(),
618            multiline: false,
619            multiline_window: None,
620            context_patterns: vec![],
621            context_window: None,
622            escalate_patterns: vec![],
623            escalate_window: None,
624            escalate_to: None,
625            depends_on: vec![],
626            help: None,
627            url: None,
628            tags: vec![],
629            test_cases: vec![],
630        };
631
632        let rules = compile_rules(&[cfg]).unwrap();
633        let r = &rules[0];
634
635        // Should match files at any depth
636        assert!(r.applies_to(Path::new("lib.rs"), None));
637        assert!(r.applies_to(Path::new("src/lib.rs"), None));
638        assert!(r.applies_to(Path::new("src/foo/bar/lib.rs"), None));
639        assert!(r.applies_to(Path::new("deeply/nested/path/to/file.rs"), None));
640
641        // Should not match non-.rs files
642        assert!(!r.applies_to(Path::new("src/lib.py"), None));
643        assert!(!r.applies_to(Path::new("src/lib.rs.bak"), None));
644    }
645
646    #[test]
647    fn glob_pattern_specific_directory() {
648        // Test src/**/*.ts matches only files under src/
649        let cfg = RuleConfig {
650            id: "test.glob".to_string(),
651            severity: Severity::Warn,
652            message: "m".to_string(),
653            languages: vec![],
654            patterns: vec!["x".to_string()],
655            paths: vec!["src/**/*.ts".to_string()],
656            exclude_paths: vec![],
657            ignore_comments: false,
658            ignore_strings: false,
659            match_mode: Default::default(),
660            multiline: false,
661            multiline_window: None,
662            context_patterns: vec![],
663            context_window: None,
664            escalate_patterns: vec![],
665            escalate_window: None,
666            escalate_to: None,
667            depends_on: vec![],
668            help: None,
669            url: None,
670            tags: vec![],
671            test_cases: vec![],
672        };
673
674        let rules = compile_rules(&[cfg]).unwrap();
675        let r = &rules[0];
676
677        // Should match files under src/
678        assert!(r.applies_to(Path::new("src/app.ts"), None));
679        assert!(r.applies_to(Path::new("src/components/Button.ts"), None));
680        assert!(r.applies_to(Path::new("src/a/b/c/d.ts"), None));
681
682        // Should not match files outside src/
683        assert!(!r.applies_to(Path::new("app.ts"), None));
684        assert!(!r.applies_to(Path::new("lib/app.ts"), None));
685        assert!(!r.applies_to(Path::new("tests/app.ts"), None));
686    }
687
688    #[test]
689    fn glob_pattern_exclude_test_directories() {
690        // Test excluding **/test/** and **/tests/**
691        let cfg = RuleConfig {
692            id: "test.glob".to_string(),
693            severity: Severity::Warn,
694            message: "m".to_string(),
695            languages: vec![],
696            patterns: vec!["x".to_string()],
697            paths: vec!["**/*.rs".to_string()],
698            exclude_paths: vec!["**/test/**".to_string(), "**/tests/**".to_string()],
699            ignore_comments: false,
700            ignore_strings: false,
701            match_mode: Default::default(),
702            multiline: false,
703            multiline_window: None,
704            context_patterns: vec![],
705            context_window: None,
706            escalate_patterns: vec![],
707            escalate_window: None,
708            escalate_to: None,
709            depends_on: vec![],
710            help: None,
711            url: None,
712            tags: vec![],
713            test_cases: vec![],
714        };
715
716        let rules = compile_rules(&[cfg]).unwrap();
717        let r = &rules[0];
718
719        // Should match regular source files
720        assert!(r.applies_to(Path::new("src/lib.rs"), None));
721        assert!(r.applies_to(Path::new("src/foo/bar.rs"), None));
722
723        // Should exclude test directories
724        assert!(!r.applies_to(Path::new("test/lib.rs"), None));
725        assert!(!r.applies_to(Path::new("tests/lib.rs"), None));
726        assert!(!r.applies_to(Path::new("src/test/lib.rs"), None));
727        assert!(!r.applies_to(Path::new("src/tests/lib.rs"), None));
728        assert!(!r.applies_to(Path::new("foo/test/bar.rs"), None));
729        assert!(!r.applies_to(Path::new("foo/tests/bar.rs"), None));
730    }
731
732    #[test]
733    fn glob_pattern_multiple_extensions() {
734        // Test matching multiple file extensions
735        let cfg = RuleConfig {
736            id: "test.glob".to_string(),
737            severity: Severity::Warn,
738            message: "m".to_string(),
739            languages: vec![],
740            patterns: vec!["x".to_string()],
741            paths: vec![
742                "**/*.js".to_string(),
743                "**/*.ts".to_string(),
744                "**/*.jsx".to_string(),
745                "**/*.tsx".to_string(),
746            ],
747            exclude_paths: vec![],
748            ignore_comments: false,
749            ignore_strings: false,
750            match_mode: Default::default(),
751            multiline: false,
752            multiline_window: None,
753            context_patterns: vec![],
754            context_window: None,
755            escalate_patterns: vec![],
756            escalate_window: None,
757            escalate_to: None,
758            depends_on: vec![],
759            help: None,
760            url: None,
761            tags: vec![],
762            test_cases: vec![],
763        };
764
765        let rules = compile_rules(&[cfg]).unwrap();
766        let r = &rules[0];
767
768        // Should match all specified extensions
769        assert!(r.applies_to(Path::new("src/app.js"), None));
770        assert!(r.applies_to(Path::new("src/app.ts"), None));
771        assert!(r.applies_to(Path::new("src/App.jsx"), None));
772        assert!(r.applies_to(Path::new("src/App.tsx"), None));
773
774        // Should not match other extensions
775        assert!(!r.applies_to(Path::new("src/app.py"), None));
776        assert!(!r.applies_to(Path::new("src/app.rs"), None));
777    }
778
779    #[test]
780    fn glob_pattern_exclude_specific_files() {
781        // Test excluding specific file patterns like *.test.* and *.spec.*
782        let cfg = RuleConfig {
783            id: "test.glob".to_string(),
784            severity: Severity::Warn,
785            message: "m".to_string(),
786            languages: vec![],
787            patterns: vec!["x".to_string()],
788            paths: vec!["**/*.ts".to_string()],
789            exclude_paths: vec!["**/*.test.ts".to_string(), "**/*.spec.ts".to_string()],
790            ignore_comments: false,
791            ignore_strings: false,
792            match_mode: Default::default(),
793            multiline: false,
794            multiline_window: None,
795            context_patterns: vec![],
796            context_window: None,
797            escalate_patterns: vec![],
798            escalate_window: None,
799            escalate_to: None,
800            depends_on: vec![],
801            help: None,
802            url: None,
803            tags: vec![],
804            test_cases: vec![],
805        };
806
807        let rules = compile_rules(&[cfg]).unwrap();
808        let r = &rules[0];
809
810        // Should match regular TypeScript files
811        assert!(r.applies_to(Path::new("src/app.ts"), None));
812        assert!(r.applies_to(Path::new("src/utils/helper.ts"), None));
813
814        // Should exclude test and spec files
815        assert!(!r.applies_to(Path::new("src/app.test.ts"), None));
816        assert!(!r.applies_to(Path::new("src/app.spec.ts"), None));
817        assert!(!r.applies_to(Path::new("src/utils/helper.test.ts"), None));
818        assert!(!r.applies_to(Path::new("src/utils/helper.spec.ts"), None));
819    }
820
821    #[test]
822    fn glob_pattern_no_include_matches_all() {
823        // When no include paths are specified, rule applies to all files
824        let cfg = RuleConfig {
825            id: "test.glob".to_string(),
826            severity: Severity::Warn,
827            message: "m".to_string(),
828            languages: vec![],
829            patterns: vec!["x".to_string()],
830            paths: vec![], // Empty - matches all
831            exclude_paths: vec![],
832            ignore_comments: false,
833            ignore_strings: false,
834            match_mode: Default::default(),
835            multiline: false,
836            multiline_window: None,
837            context_patterns: vec![],
838            context_window: None,
839            escalate_patterns: vec![],
840            escalate_window: None,
841            escalate_to: None,
842            depends_on: vec![],
843            help: None,
844            url: None,
845            tags: vec![],
846            test_cases: vec![],
847        };
848
849        let rules = compile_rules(&[cfg]).unwrap();
850        let r = &rules[0];
851
852        // Should match any file
853        assert!(r.applies_to(Path::new("anything.txt"), None));
854        assert!(r.applies_to(Path::new("src/lib.rs"), None));
855        assert!(r.applies_to(Path::new("deeply/nested/file.py"), None));
856    }
857
858    // --- Language Filtering Edge Cases ---
859
860    #[test]
861    fn language_filter_empty_matches_all() {
862        // When no languages are specified, rule applies to all languages
863        let cfg = RuleConfig {
864            id: "test.lang".to_string(),
865            severity: Severity::Warn,
866            message: "m".to_string(),
867            languages: vec![], // Empty - matches all
868            patterns: vec!["x".to_string()],
869            paths: vec![],
870            exclude_paths: vec![],
871            ignore_comments: false,
872            ignore_strings: false,
873            match_mode: Default::default(),
874            multiline: false,
875            multiline_window: None,
876            context_patterns: vec![],
877            context_window: None,
878            escalate_patterns: vec![],
879            escalate_window: None,
880            escalate_to: None,
881            depends_on: vec![],
882            help: None,
883            url: None,
884            tags: vec![],
885            test_cases: vec![],
886        };
887
888        let rules = compile_rules(&[cfg]).unwrap();
889        let r = &rules[0];
890
891        // Should match any language
892        assert!(r.applies_to(Path::new("file.rs"), Some("rust")));
893        assert!(r.applies_to(Path::new("file.py"), Some("python")));
894        assert!(r.applies_to(Path::new("file.js"), Some("javascript")));
895        // Should also match when language is None (unknown extension)
896        assert!(r.applies_to(Path::new("file.txt"), None));
897    }
898
899    #[test]
900    fn language_filter_single_language() {
901        // Rule with single language filter
902        let cfg = RuleConfig {
903            id: "test.lang".to_string(),
904            severity: Severity::Warn,
905            message: "m".to_string(),
906            languages: vec!["rust".to_string()],
907            patterns: vec!["x".to_string()],
908            paths: vec![],
909            exclude_paths: vec![],
910            ignore_comments: false,
911            ignore_strings: false,
912            match_mode: Default::default(),
913            multiline: false,
914            multiline_window: None,
915            context_patterns: vec![],
916            context_window: None,
917            escalate_patterns: vec![],
918            escalate_window: None,
919            escalate_to: None,
920            depends_on: vec![],
921            help: None,
922            url: None,
923            tags: vec![],
924            test_cases: vec![],
925        };
926
927        let rules = compile_rules(&[cfg]).unwrap();
928        let r = &rules[0];
929
930        // Should only match Rust
931        assert!(r.applies_to(Path::new("file.rs"), Some("rust")));
932        assert!(!r.applies_to(Path::new("file.py"), Some("python")));
933        assert!(!r.applies_to(Path::new("file.js"), Some("javascript")));
934        // Should not match when language is None
935        assert!(!r.applies_to(Path::new("file.txt"), None));
936    }
937
938    #[test]
939    fn language_filter_multiple_languages() {
940        // Rule with multiple language filters
941        let cfg = RuleConfig {
942            id: "test.lang".to_string(),
943            severity: Severity::Warn,
944            message: "m".to_string(),
945            languages: vec!["javascript".to_string(), "typescript".to_string()],
946            patterns: vec!["x".to_string()],
947            paths: vec![],
948            exclude_paths: vec![],
949            ignore_comments: false,
950            ignore_strings: false,
951            match_mode: Default::default(),
952            multiline: false,
953            multiline_window: None,
954            context_patterns: vec![],
955            context_window: None,
956            escalate_patterns: vec![],
957            escalate_window: None,
958            escalate_to: None,
959            depends_on: vec![],
960            help: None,
961            url: None,
962            tags: vec![],
963            test_cases: vec![],
964        };
965
966        let rules = compile_rules(&[cfg]).unwrap();
967        let r = &rules[0];
968
969        // Should match JavaScript and TypeScript
970        assert!(r.applies_to(Path::new("file.js"), Some("javascript")));
971        assert!(r.applies_to(Path::new("file.ts"), Some("typescript")));
972        // Should not match other languages
973        assert!(!r.applies_to(Path::new("file.rs"), Some("rust")));
974        assert!(!r.applies_to(Path::new("file.py"), Some("python")));
975    }
976
977    #[test]
978    fn language_filter_case_insensitive() {
979        // Language matching should be case-insensitive
980        let cfg = RuleConfig {
981            id: "test.lang".to_string(),
982            severity: Severity::Warn,
983            message: "m".to_string(),
984            languages: vec!["RUST".to_string()], // Uppercase in config
985            patterns: vec!["x".to_string()],
986            paths: vec![],
987            exclude_paths: vec![],
988            ignore_comments: false,
989            ignore_strings: false,
990            match_mode: Default::default(),
991            multiline: false,
992            multiline_window: None,
993            context_patterns: vec![],
994            context_window: None,
995            escalate_patterns: vec![],
996            escalate_window: None,
997            escalate_to: None,
998            depends_on: vec![],
999            help: None,
1000            url: None,
1001            tags: vec![],
1002            test_cases: vec![],
1003        };
1004
1005        let rules = compile_rules(&[cfg]).unwrap();
1006        let r = &rules[0];
1007
1008        // Should match regardless of case
1009        assert!(r.applies_to(Path::new("file.rs"), Some("rust")));
1010        assert!(r.applies_to(Path::new("file.rs"), Some("Rust")));
1011        assert!(r.applies_to(Path::new("file.rs"), Some("RUST")));
1012    }
1013
1014    #[test]
1015    fn language_filter_with_path_filter_combined() {
1016        // Both language and path filters must match
1017        let cfg = RuleConfig {
1018            id: "test.combined".to_string(),
1019            severity: Severity::Warn,
1020            message: "m".to_string(),
1021            languages: vec!["rust".to_string()],
1022            patterns: vec!["x".to_string()],
1023            paths: vec!["src/**/*.rs".to_string()],
1024            exclude_paths: vec!["**/tests/**".to_string()],
1025            ignore_comments: false,
1026            ignore_strings: false,
1027            match_mode: Default::default(),
1028            multiline: false,
1029            multiline_window: None,
1030            context_patterns: vec![],
1031            context_window: None,
1032            escalate_patterns: vec![],
1033            escalate_window: None,
1034            escalate_to: None,
1035            depends_on: vec![],
1036            help: None,
1037            url: None,
1038            tags: vec![],
1039            test_cases: vec![],
1040        };
1041
1042        let rules = compile_rules(&[cfg]).unwrap();
1043        let r = &rules[0];
1044
1045        // Must match both language AND path
1046        assert!(r.applies_to(Path::new("src/lib.rs"), Some("rust")));
1047        assert!(r.applies_to(Path::new("src/foo/bar.rs"), Some("rust")));
1048
1049        // Wrong language - should not match
1050        assert!(!r.applies_to(Path::new("src/lib.rs"), Some("python")));
1051
1052        // Wrong path - should not match
1053        assert!(!r.applies_to(Path::new("lib/lib.rs"), Some("rust")));
1054
1055        // Excluded path - should not match
1056        assert!(!r.applies_to(Path::new("src/tests/lib.rs"), Some("rust")));
1057
1058        // No language detected - should not match when language filter is set
1059        assert!(!r.applies_to(Path::new("src/lib.rs"), None));
1060    }
1061
1062    #[test]
1063    fn language_filter_unknown_language_in_config() {
1064        // Rule with an unknown/custom language identifier
1065        let cfg = RuleConfig {
1066            id: "test.lang".to_string(),
1067            severity: Severity::Warn,
1068            message: "m".to_string(),
1069            languages: vec!["customlang".to_string()],
1070            patterns: vec!["x".to_string()],
1071            paths: vec![],
1072            exclude_paths: vec![],
1073            ignore_comments: false,
1074            ignore_strings: false,
1075            match_mode: Default::default(),
1076            multiline: false,
1077            multiline_window: None,
1078            context_patterns: vec![],
1079            context_window: None,
1080            escalate_patterns: vec![],
1081            escalate_window: None,
1082            escalate_to: None,
1083            depends_on: vec![],
1084            help: None,
1085            url: None,
1086            tags: vec![],
1087            test_cases: vec![],
1088        };
1089
1090        let rules = compile_rules(&[cfg]).unwrap();
1091        let r = &rules[0];
1092
1093        // Should match when the custom language is provided
1094        assert!(r.applies_to(Path::new("file.custom"), Some("customlang")));
1095        // Should not match other languages
1096        assert!(!r.applies_to(Path::new("file.rs"), Some("rust")));
1097    }
1098
1099    #[test]
1100    fn language_filter_none_language_with_filter() {
1101        // When language filter is set but file has no detected language
1102        let cfg = RuleConfig {
1103            id: "test.lang".to_string(),
1104            severity: Severity::Warn,
1105            message: "m".to_string(),
1106            languages: vec!["rust".to_string()],
1107            patterns: vec!["x".to_string()],
1108            paths: vec![],
1109            exclude_paths: vec![],
1110            ignore_comments: false,
1111            ignore_strings: false,
1112            match_mode: Default::default(),
1113            multiline: false,
1114            multiline_window: None,
1115            context_patterns: vec![],
1116            context_window: None,
1117            escalate_patterns: vec![],
1118            escalate_window: None,
1119            escalate_to: None,
1120            depends_on: vec![],
1121            help: None,
1122            url: None,
1123            tags: vec![],
1124            test_cases: vec![],
1125        };
1126
1127        let rules = compile_rules(&[cfg]).unwrap();
1128        let r = &rules[0];
1129
1130        // Should not match when language is None and filter is set
1131        assert!(!r.applies_to(Path::new("file.txt"), None));
1132        assert!(!r.applies_to(Path::new("Makefile"), None));
1133        assert!(!r.applies_to(Path::new("README.md"), None));
1134    }
1135
1136    #[test]
1137    fn compile_rejects_invalid_multiline_window() {
1138        let cfg = RuleConfig {
1139            id: "test.multiline".to_string(),
1140            severity: Severity::Warn,
1141            message: "m".to_string(),
1142            languages: vec![],
1143            patterns: vec!["a".to_string()],
1144            paths: vec![],
1145            exclude_paths: vec![],
1146            ignore_comments: false,
1147            ignore_strings: false,
1148            match_mode: Default::default(),
1149            multiline: true,
1150            multiline_window: Some(1),
1151            context_patterns: vec![],
1152            context_window: None,
1153            escalate_patterns: vec![],
1154            escalate_window: None,
1155            escalate_to: None,
1156            depends_on: vec![],
1157            help: None,
1158            url: None,
1159            tags: vec![],
1160            test_cases: vec![],
1161        };
1162
1163        let err = compile_rules(&[cfg]).expect_err("window < 2 should fail");
1164        match err {
1165            RuleCompileError::InvalidMultilineWindow { rule_id, value } => {
1166                assert_eq!(rule_id, "test.multiline");
1167                assert_eq!(value, 1);
1168            }
1169            other => panic!("unexpected error: {other:?}"),
1170        }
1171    }
1172
1173    #[test]
1174    fn compile_rejects_unknown_dependency() {
1175        let cfg = RuleConfig {
1176            id: "test.dependent".to_string(),
1177            severity: Severity::Warn,
1178            message: "m".to_string(),
1179            languages: vec![],
1180            patterns: vec!["a".to_string()],
1181            paths: vec![],
1182            exclude_paths: vec![],
1183            ignore_comments: false,
1184            ignore_strings: false,
1185            match_mode: Default::default(),
1186            multiline: false,
1187            multiline_window: None,
1188            context_patterns: vec![],
1189            context_window: None,
1190            escalate_patterns: vec![],
1191            escalate_window: None,
1192            escalate_to: None,
1193            depends_on: vec!["missing.rule".to_string()],
1194            help: None,
1195            url: None,
1196            tags: vec![],
1197            test_cases: vec![],
1198        };
1199
1200        let err = compile_rules(&[cfg]).expect_err("unknown dependency should fail");
1201        match err {
1202            RuleCompileError::UnknownDependency {
1203                rule_id,
1204                dependency,
1205            } => {
1206                assert_eq!(rule_id, "test.dependent");
1207                assert_eq!(dependency, "missing.rule");
1208            }
1209            other => panic!("unexpected error: {other:?}"),
1210        }
1211    }
1212}