Skip to main content

thread_rule_engine/
combined.rs

1// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com>
2// SPDX-FileCopyrightText: 2025 Knitli Inc. <knitli@knit.li>
3// SPDX-FileContributor: Adam Poulemanos <adam@knit.li>
4//
5// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT
6
7use crate::{RuleConfig, SerializableRule, SerializableRuleConfig, SerializableRuleCore, Severity};
8
9use thread_ast_engine::language::Language;
10use thread_ast_engine::matcher::{Matcher, MatcherExt};
11use thread_ast_engine::{AstGrep, Doc, Node, NodeMatch};
12
13use thread_utilities::{RapidMap, RapidSet, map_with_capacity};
14
15pub struct ScanResult<'t, 'r, D: Doc, L: Language> {
16    pub diffs: Vec<(&'r RuleConfig<L>, NodeMatch<'t, D>)>,
17    pub matches: Vec<(&'r RuleConfig<L>, Vec<NodeMatch<'t, D>>)>,
18}
19
20/// store the index to the rule and the matched node
21/// it will be converted to ScanResult by resolving the rule
22struct ScanResultInner<'t, D: Doc> {
23    diffs: Vec<(usize, NodeMatch<'t, D>)>,
24    matches: RapidMap<usize, Vec<NodeMatch<'t, D>>>,
25    unused_suppressions: Vec<NodeMatch<'t, D>>,
26}
27
28impl<'t, D: Doc> ScanResultInner<'t, D> {
29    pub fn into_result<'r, L: Language>(
30        self,
31        combined: &CombinedScan<'r, L>,
32        separate_fix: bool,
33    ) -> ScanResult<'t, 'r, D, L> {
34        let mut diffs: Vec<_> = self
35            .diffs
36            .into_iter()
37            .map(|(idx, nm)| (combined.get_rule(idx), nm))
38            .collect();
39        let mut matches: Vec<_> = self
40            .matches
41            .into_iter()
42            .map(|(idx, nms)| (combined.get_rule(idx), nms))
43            .collect();
44        if let Some(rule) = combined.unused_suppression_rule {
45            if separate_fix {
46                diffs.extend(self.unused_suppressions.into_iter().map(|nm| (rule, nm)));
47                diffs.sort_unstable_by_key(|(_, nm)| nm.range().start);
48            } else if !self.unused_suppressions.is_empty() {
49                // do not push empty suppression to matches
50                let mut supprs = self.unused_suppressions;
51                supprs.sort_unstable_by_key(|nm| nm.range().start);
52                matches.push((rule, supprs));
53            }
54        }
55        ScanResult { diffs, matches }
56    }
57}
58
59enum SuppressKind {
60    /// suppress the whole file
61    File,
62    /// suppress specific line
63    Line(usize),
64}
65
66fn get_suppression_kind(node: &Node<'_, impl Doc>) -> Option<SuppressKind> {
67    if !node.kind().contains("comment") || !node.text().contains(IGNORE_TEXT) {
68        return None;
69    }
70    let line = node.start_pos().line();
71    let suppress_next_line = if let Some(prev) = node.prev() {
72        prev.start_pos().line() != line
73    } else {
74        true
75    };
76    // if the first line is suppressed and the next line is empty,
77    // we suppress the whole file see gh #1541
78    if line == 0
79        && suppress_next_line
80        && node
81            .next()
82            .map(|next| next.start_pos().line() >= 2)
83            .unwrap_or(true)
84    {
85        return Some(SuppressKind::File);
86    }
87    let key = if suppress_next_line { line + 1 } else { line };
88    Some(SuppressKind::Line(key))
89}
90
91struct Suppressions {
92    file: Option<Suppression>,
93    /// line number which may be suppressed
94    lines: RapidMap<usize, Suppression>,
95}
96
97impl Suppressions {
98    fn collect_all<D: Doc>(root: &AstGrep<D>) -> (Self, RapidMap<usize, Node<'_, D>>) {
99        let mut suppressions = Self {
100            file: None,
101            lines: RapidMap::default(),
102        };
103        let mut suppression_nodes = RapidMap::default();
104        for node in root.root().dfs() {
105            let is_all_suppressed = suppressions.collect(&node, &mut suppression_nodes);
106            if is_all_suppressed {
107                break;
108            }
109        }
110        (suppressions, suppression_nodes)
111    }
112    /// collect all suppression nodes from the root node
113    /// returns if the whole file need to be suppressed, including unused sup
114    /// see #1541
115    fn collect<'r, D: Doc>(
116        &mut self,
117        node: &Node<'r, D>,
118        suppression_nodes: &mut RapidMap<usize, Node<'r, D>>,
119    ) -> bool {
120        let Some(sup) = get_suppression_kind(node) else {
121            return false;
122        };
123        let suppressed = Suppression {
124            suppressed: parse_suppression_set(&node.text()),
125            node_id: node.node_id(),
126        };
127        suppression_nodes.insert(node.node_id(), node.clone());
128        match sup {
129            SuppressKind::File => {
130                let is_all_suppressed = suppressed.suppressed.is_none();
131                self.file = Some(suppressed);
132                is_all_suppressed
133            }
134            SuppressKind::Line(key) => {
135                self.lines.insert(
136                    key,
137                    Suppression {
138                        suppressed: parse_suppression_set(&node.text()),
139                        node_id: node.node_id(),
140                    },
141                );
142                false
143            }
144        }
145    }
146
147    fn file_suppression(&self) -> MaySuppressed<'_> {
148        if let Some(sup) = &self.file {
149            MaySuppressed::Yes(sup)
150        } else {
151            MaySuppressed::No
152        }
153    }
154
155    fn line_suppression<D: Doc>(&self, node: &Node<'_, D>) -> MaySuppressed<'_> {
156        let line = node.start_pos().line();
157        if let Some(sup) = self.lines.get(&line) {
158            MaySuppressed::Yes(sup)
159        } else {
160            MaySuppressed::No
161        }
162    }
163}
164
165struct Suppression {
166    /// None = suppress all
167    suppressed: Option<RapidSet<String>>,
168    node_id: usize,
169}
170
171enum MaySuppressed<'a> {
172    Yes(&'a Suppression),
173    No,
174}
175
176impl MaySuppressed<'_> {
177    fn suppressed_id(&self, rule_id: &str) -> Option<usize> {
178        let suppression = match self {
179            MaySuppressed::No => return None,
180            MaySuppressed::Yes(s) => s,
181        };
182        if let Some(set) = &suppression.suppressed {
183            if set.contains(rule_id) {
184                Some(suppression.node_id)
185            } else {
186                None
187            }
188        } else {
189            Some(suppression.node_id)
190        }
191    }
192}
193
194const IGNORE_TEXT: &str = "ast-grep-ignore";
195
196/// A struct to group all rules according to their potential kinds.
197/// This can greatly reduce traversal times and skip unmatchable rules.
198/// Rules are referenced by their index in the rules vector.
199pub struct CombinedScan<'r, L: Language> {
200    rules: Vec<&'r RuleConfig<L>>,
201    /// a vec of vec, mapping from kind to a list of rule index
202    kind_rule_mapping: Vec<Vec<usize>>,
203    /// a rule for unused_suppressions
204    unused_suppression_rule: Option<&'r RuleConfig<L>>,
205}
206
207impl<'r, L: Language> CombinedScan<'r, L> {
208    pub fn new(mut rules: Vec<&'r RuleConfig<L>>) -> Self {
209        // process fixable rule first, the order by id
210        // note, mapping.push will invert order so we sort fixable order in reverse
211        rules.sort_unstable_by_key(|r| (r.fix.is_some(), &r.id));
212        let mut mapping = Vec::new();
213
214        // Pre-calculate the maximum kind to avoid repeated resizing
215        let max_kind = rules
216            .iter()
217            .filter_map(|rule| rule.matcher.potential_kinds())
218            .map(|bitset| bitset.iter().max().unwrap_or(0))
219            .max()
220            .unwrap_or(0);
221
222        // Pre-allocate with known capacity to avoid allocations during insertion
223        mapping.resize(max_kind + 1, Vec::new());
224
225        for (idx, rule) in rules.iter().enumerate() {
226            let Some(kinds) = rule.matcher.potential_kinds() else {
227                eprintln!("rule `{}` must have kind", &rule.id);
228                continue;
229            };
230            for kind in &kinds {
231                // Now we can safely index without bounds checking
232                mapping[kind].push(idx);
233            }
234        }
235
236        // Shrink the mapping to remove empty vectors at the end
237        while let Some(last) = mapping.last() {
238            if last.is_empty() {
239                mapping.pop();
240            } else {
241                break;
242            }
243        }
244
245        Self {
246            rules,
247            kind_rule_mapping: mapping,
248            unused_suppression_rule: None,
249        }
250    }
251
252    pub fn set_unused_suppression_rule(&mut self, rule: &'r RuleConfig<L>) {
253        if matches!(rule.severity, Severity::Off) {
254            return;
255        }
256        self.unused_suppression_rule = Some(rule);
257    }
258
259    pub fn scan<'a, D>(&self, root: &'a AstGrep<D>, separate_fix: bool) -> ScanResult<'a, '_, D, L>
260    where
261        D: Doc<Lang = L>,
262    {
263        let mut result = ScanResultInner {
264            diffs: Vec::with_capacity(32), // Pre-allocate for common case
265            matches: map_with_capacity(self.rules.len()),
266            unused_suppressions: Vec::with_capacity(8),
267        };
268        let (suppressions, mut suppression_nodes) = Suppressions::collect_all(root);
269        let file_sup = suppressions.file_suppression();
270        if let MaySuppressed::Yes(s) = file_sup
271            && s.suppressed.is_none()
272        {
273            return result.into_result(self, separate_fix);
274        }
275        for node in root.root().dfs() {
276            let kind = node.kind_id() as usize;
277            let Some(rule_idx) = self.kind_rule_mapping.get(kind) else {
278                continue;
279            };
280            let line_sup = suppressions.line_suppression(&node);
281            for &idx in rule_idx {
282                let rule = &self.rules[idx];
283                let Some(ret) = rule.matcher.match_node(node.clone()) else {
284                    continue;
285                };
286                if let Some(id) = file_sup.suppressed_id(&rule.id) {
287                    suppression_nodes.remove(&id);
288                    continue;
289                }
290                if let Some(id) = line_sup.suppressed_id(&rule.id) {
291                    suppression_nodes.remove(&id);
292                    continue;
293                }
294                if rule.fix.is_none() || !separate_fix {
295                    let matches = result.matches.entry(idx).or_default();
296                    matches.push(ret);
297                } else {
298                    result.diffs.push((idx, ret));
299                }
300            }
301        }
302        result.unused_suppressions = suppression_nodes
303            .into_values()
304            .map(NodeMatch::from)
305            .collect();
306        result.into_result(self, separate_fix)
307    }
308
309    pub fn get_rule(&self, idx: usize) -> &'r RuleConfig<L> {
310        self.rules[idx]
311    }
312
313    pub fn unused_config(severity: Severity, lang: L) -> RuleConfig<L> {
314        let rule: SerializableRule = crate::from_str(r#"{"any": []}"#).unwrap();
315        let core = SerializableRuleCore {
316            rule,
317            constraints: None,
318            fix: crate::from_str(r#"''"#).unwrap(),
319            transform: None,
320            utils: None,
321        };
322        let config = SerializableRuleConfig {
323            core,
324            id: "unused-suppression".to_string(),
325            severity,
326            files: None,
327            ignores: None,
328            language: lang,
329            message: "Unused 'ast-grep-ignore' directive.".into(),
330            metadata: None,
331            note: None,
332            rewriters: None,
333            url: None,
334            labels: None,
335        };
336        RuleConfig::try_from(config, &Default::default()).unwrap()
337    }
338}
339
340fn parse_suppression_set(text: &str) -> Option<RapidSet<String>> {
341    let (_, after) = text.trim().split_once(IGNORE_TEXT)?;
342    let after = after.trim();
343    if after.is_empty() {
344        return None;
345    }
346    let (_, rules) = after.split_once(':')?;
347    let set = rules.split(',').map(|r| r.trim().to_string()).collect();
348    Some(set)
349}
350
351#[cfg(test)]
352mod test {
353    use super::*;
354    use crate::SerializableRuleConfig;
355    use crate::from_str;
356    use crate::test::TypeScript;
357    use thread_ast_engine::tree_sitter::{LanguageExt, StrDoc};
358
359    fn create_rule() -> RuleConfig<TypeScript> {
360        let rule: SerializableRuleConfig<TypeScript> = from_str(
361            r"
362id: test
363rule: {pattern: 'console.log($A)'}
364language: Tsx",
365        )
366        .expect("parse");
367        RuleConfig::try_from(rule, &Default::default()).expect("work")
368    }
369
370    fn test_scan<F>(source: &str, test_fn: F)
371    where
372        F: Fn(
373            Vec<(
374                &'_ RuleConfig<TypeScript>,
375                Vec<NodeMatch<'_, StrDoc<TypeScript>>>,
376            )>,
377        ),
378    {
379        let root = TypeScript::Tsx.ast_grep(source);
380        let rule = create_rule();
381        let rules = vec![&rule];
382        let scan = CombinedScan::new(rules);
383        let scanned = scan.scan(&root, false);
384        test_fn(scanned.matches);
385    }
386
387    #[test]
388    fn test_ignore_node() {
389        let source = r#"
390    // ast-grep-ignore
391    console.log('ignored all')
392    console.log('no ignore')
393    // ast-grep-ignore: test
394    console.log('ignore one')
395    // ast-grep-ignore: not-test
396    console.log('ignore another')
397    // ast-grep-ignore: not-test, test
398    console.log('multiple ignore')
399    "#;
400        test_scan(source, |scanned| {
401            let matches = &scanned[0];
402            assert_eq!(matches.1.len(), 2);
403            assert_eq!(matches.1[0].text(), "console.log('no ignore')");
404            assert_eq!(matches.1[1].text(), "console.log('ignore another')");
405        });
406    }
407
408    #[test]
409    fn test_ignore_node_same_line() {
410        let source = r#"
411    console.log('ignored all') // ast-grep-ignore
412    console.log('no ignore')
413    console.log('ignore one') // ast-grep-ignore: test
414    console.log('ignore another') // ast-grep-ignore: not-test
415    console.log('multiple ignore') // ast-grep-ignore: not-test, test
416    "#;
417        test_scan(source, |scanned| {
418            let matches = &scanned[0];
419            assert_eq!(matches.1.len(), 2);
420            assert_eq!(matches.1[0].text(), "console.log('no ignore')");
421            assert_eq!(matches.1[1].text(), "console.log('ignore another')");
422        });
423    }
424
425    fn test_scan_unused<F>(source: &str, test_fn: F)
426    where
427        F: Fn(
428            Vec<(
429                &'_ RuleConfig<TypeScript>,
430                Vec<NodeMatch<'_, StrDoc<TypeScript>>>,
431            )>,
432        ),
433    {
434        let root = TypeScript::Tsx.ast_grep(source);
435        let rule = create_rule();
436        let rules = vec![&rule];
437        let mut scan = CombinedScan::new(rules);
438        let mut unused = create_rule();
439        unused.id = "unused-suppression".to_string();
440        scan.set_unused_suppression_rule(&unused);
441        let scanned = scan.scan(&root, false);
442        test_fn(scanned.matches);
443    }
444
445    #[test]
446    fn test_non_used_suppression() {
447        let source = r#"
448    console.log('no ignore')
449    console.debug('not used') // ast-grep-ignore: test
450    console.log('multiple ignore') // ast-grep-ignore: test
451    "#;
452        test_scan_unused(source, |scanned| {
453            assert_eq!(scanned.len(), 2);
454            let unused = &scanned[1];
455            assert_eq!(unused.1.len(), 1);
456            assert_eq!(unused.1[0].text(), "// ast-grep-ignore: test");
457        });
458    }
459
460    #[test]
461    fn test_file_suppression() {
462        let source = r#"// ast-grep-ignore: test
463
464    console.log('ignored')
465    console.debug('report') // ast-grep-ignore: test
466    console.log('report') // ast-grep-ignore: test
467    "#;
468        test_scan_unused(source, |scanned| {
469            assert_eq!(scanned.len(), 1);
470            let unused = &scanned[0];
471            assert_eq!(unused.1.len(), 2);
472        });
473        let source = r#"// ast-grep-ignore: test
474    console.debug('above is not file sup')
475    console.log('not ignored')
476    "#;
477        test_scan_unused(source, |scanned| {
478            assert_eq!(scanned.len(), 2);
479            assert_eq!(scanned[0].0.id, "test");
480            assert_eq!(scanned[1].0.id, "unused-suppression");
481        });
482    }
483
484    #[test]
485    fn test_file_suppression_all() {
486        let source = r#"// ast-grep-ignore
487
488    console.log('ignored')
489    console.debug('report') // ast-grep-ignore: test
490    console.log('report') // ast-grep-ignore
491    "#;
492        test_scan_unused(source, |scanned| {
493            assert_eq!(scanned.len(), 0);
494        });
495        let source = r#"// ast-grep-ignore
496
497    console.debug('no hit')
498    "#;
499        test_scan_unused(source, |scanned| {
500            assert_eq!(scanned.len(), 0);
501        });
502    }
503}