Skip to main content

thread_rule_engine/
rule_collection.rs

1// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com>
2// SPDX-FileCopyrightText: 2025 Knitli Inc. <knitli@knit.li>
3// SPDX-FileContributor: Adam Poulemanos <adam@knit.li>
4//
5// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT
6
7use crate::{RuleConfig, Severity};
8use globset::{Glob, GlobSet, GlobSetBuilder};
9use std::path::Path;
10use thread_ast_engine::language::Language;
11
12/// RuleBucket stores rules of the same language id.
13/// Rules for different language will stay in separate buckets.
14pub struct RuleBucket<L: Language> {
15    rules: Vec<RuleConfig<L>>,
16    lang: L,
17}
18
19impl<L: Language> RuleBucket<L> {
20    fn new(lang: L) -> Self {
21        Self {
22            rules: vec![],
23            lang,
24        }
25    }
26    pub fn add(&mut self, rule: RuleConfig<L>) {
27        self.rules.push(rule);
28    }
29}
30
31struct ContingentRule<L: Language> {
32    rule: RuleConfig<L>,
33    files_globs: Option<GlobSet>,
34    ignore_globs: Option<GlobSet>,
35}
36
37fn build_glob_set(paths: &Vec<String>) -> Result<GlobSet, globset::Error> {
38    let mut builder = GlobSetBuilder::new();
39    for path in paths {
40        builder.add(Glob::new(path)?);
41    }
42    builder.build()
43}
44
45impl<L> TryFrom<RuleConfig<L>> for ContingentRule<L>
46where
47    L: Language,
48{
49    type Error = globset::Error;
50    fn try_from(rule: RuleConfig<L>) -> Result<Self, Self::Error> {
51        let files_globs = rule.files.as_ref().map(build_glob_set).transpose()?;
52        let ignore_globs = rule.ignores.as_ref().map(build_glob_set).transpose()?;
53        Ok(Self {
54            rule,
55            files_globs,
56            ignore_globs,
57        })
58    }
59}
60
61impl<L: Language> ContingentRule<L> {
62    pub fn matches_path<P: AsRef<Path>>(&self, path: P) -> bool {
63        if let Some(ignore_globs) = &self.ignore_globs
64            && ignore_globs.is_match(&path)
65        {
66            return false;
67        }
68        if let Some(files_globs) = &self.files_globs {
69            return files_globs.is_match(path);
70        }
71        true
72    }
73}
74
75/// A collection of rules to run one round of scanning.
76/// Rules will be grouped together based on their language, path globbing and pattern rule.
77pub struct RuleCollection<L: Language + Eq> {
78    // use vec since we don't have many languages
79    /// a list of rule buckets grouped by languages.
80    /// Tenured rules will always run against a file of that language type.
81    tenured: Vec<RuleBucket<L>>,
82    /// contingent rules will run against a file if it matches file/ignore glob.
83    contingent: Vec<ContingentRule<L>>,
84}
85
86impl<L: Language + Eq> RuleCollection<L> {
87    pub fn try_new(configs: Vec<RuleConfig<L>>) -> Result<Self, globset::Error> {
88        let mut tenured = vec![];
89        let mut contingent = vec![];
90        for config in configs {
91            if matches!(config.severity, Severity::Off) {
92                continue;
93            } else if config.files.is_none() && config.ignores.is_none() {
94                Self::add_tenured_rule(&mut tenured, config);
95            } else {
96                contingent.push(ContingentRule::try_from(config)?);
97            }
98        }
99        Ok(Self {
100            tenured,
101            contingent,
102        })
103    }
104
105    pub fn get_rule_from_lang(&self, path: &Path, lang: L) -> Vec<&RuleConfig<L>> {
106        let mut all_rules = vec![];
107        for rule in &self.tenured {
108            if rule.lang == lang {
109                all_rules = rule.rules.iter().collect();
110                break;
111            }
112        }
113        all_rules.extend(self.contingent.iter().filter_map(|cont| {
114            if cont.rule.language == lang && cont.matches_path(path) {
115                Some(&cont.rule)
116            } else {
117                None
118            }
119        }));
120        all_rules
121    }
122
123    pub fn for_path<P: AsRef<Path>>(&self, path: P) -> Vec<&RuleConfig<L>> {
124        let path = path.as_ref();
125        let Some(lang) = L::from_path(path) else {
126            return vec![];
127        };
128        let mut ret = self.get_rule_from_lang(path, lang);
129        ret.sort_unstable_by_key(|r| &r.id);
130        ret
131    }
132
133    pub fn get_rule(&self, id: &str) -> Option<&RuleConfig<L>> {
134        for rule in &self.tenured {
135            for r in &rule.rules {
136                if r.id == id {
137                    return Some(r);
138                }
139            }
140        }
141        for rule in &self.contingent {
142            if rule.rule.id == id {
143                return Some(&rule.rule);
144            }
145        }
146        None
147    }
148
149    pub fn total_rule_count(&self) -> usize {
150        let mut ret = self.tenured.iter().map(|bucket| bucket.rules.len()).sum();
151        ret += self.contingent.len();
152        ret
153    }
154
155    pub fn for_each_rule(&self, mut f: impl FnMut(&RuleConfig<L>)) {
156        for bucket in &self.tenured {
157            for rule in &bucket.rules {
158                f(rule);
159            }
160        }
161        for rule in &self.contingent {
162            f(&rule.rule);
163        }
164    }
165
166    fn add_tenured_rule(tenured: &mut Vec<RuleBucket<L>>, rule: RuleConfig<L>) {
167        let lang = rule.language.clone();
168        for bucket in tenured.iter_mut() {
169            if bucket.lang == lang {
170                bucket.add(rule);
171                return;
172            }
173        }
174        let mut bucket = RuleBucket::new(lang);
175        bucket.add(rule);
176        tenured.push(bucket);
177    }
178}
179
180impl<L: Language + Eq> Default for RuleCollection<L> {
181    fn default() -> Self {
182        Self {
183            tenured: vec![],
184            contingent: vec![],
185        }
186    }
187}
188
189#[cfg(test)]
190mod test {
191    use super::*;
192    use crate::GlobalRules;
193    use crate::from_yaml_string;
194    use crate::test::TypeScript;
195
196    fn make_rule(files: &str) -> RuleCollection<TypeScript> {
197        let globals = GlobalRules::default();
198        let rule_config = from_yaml_string(
199            &format!(
200                r"
201id: test
202message: test rule
203severity: info
204language: Tsx
205rule:
206  all: [kind: number]
207{files}"
208            ),
209            &globals,
210        )
211        .unwrap()
212        .pop()
213        .unwrap();
214        RuleCollection::try_new(vec![rule_config]).expect("should parse")
215    }
216
217    fn assert_match_path(collection: &RuleCollection<TypeScript>, path: &str) {
218        let rules = collection.for_path(path);
219        assert_eq!(rules.len(), 1);
220        assert_eq!(rules[0].id, "test");
221    }
222
223    fn assert_ignore_path(collection: &RuleCollection<TypeScript>, path: &str) {
224        let rules = collection.for_path(path);
225        assert!(rules.is_empty());
226    }
227
228    #[test]
229    fn test_ignore_rule() {
230        let src = r#"
231ignores:
232  - ./manage.py
233  - "**/test*"
234"#;
235        let collection = make_rule(src);
236        assert_ignore_path(&collection, "./manage.py");
237        assert_ignore_path(&collection, "./src/test.py");
238        assert_match_path(&collection, "./src/app.py");
239    }
240
241    #[test]
242    fn test_files_rule() {
243        let src = r#"
244files:
245  - ./manage.py
246  - "**/test*"
247"#;
248        let collection = make_rule(src);
249        assert_match_path(&collection, "./manage.py");
250        assert_match_path(&collection, "./src/test.py");
251        assert_ignore_path(&collection, "./src/app.py");
252    }
253
254    #[test]
255    fn test_files_with_ignores_rule() {
256        let src = r#"
257files:
258  - ./src/**/*.py
259ignores:
260  - ./src/excluded/*.py
261"#;
262        let collection = make_rule(src);
263        assert_match_path(&collection, "./src/test.py");
264        assert_match_path(&collection, "./src/some_folder/test.py");
265        assert_ignore_path(&collection, "./src/excluded/app.py");
266    }
267
268    #[test]
269    fn test_rule_collection_get_contingent_rule() {
270        let src = r#"
271files:
272  - ./manage.py
273  - "**/test*"
274"#;
275        let collection = make_rule(src);
276        assert!(collection.get_rule("test").is_some());
277    }
278
279    #[test]
280    fn test_rule_collection_get_tenured_rule() {
281        let src = r#""#;
282        let collection = make_rule(src);
283        assert!(collection.get_rule("test").is_some());
284    }
285
286    #[test]
287    #[ignore]
288    fn test_rules_for_path() {
289        todo!()
290    }
291}