sdivi_patterns/
catalog.rs1use std::collections::BTreeMap;
4use std::path::PathBuf;
5
6use globset::{Glob, GlobSet, GlobSetBuilder};
7use sdivi_config::PatternsConfig;
8use serde::{Deserialize, Serialize};
9
10use crate::fingerprint::{fingerprint_node_kind, PatternFingerprint};
11use crate::queries;
12
13#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
25pub struct PatternLocation {
26 pub file: PathBuf,
28 pub start_row: usize,
30 pub start_col: usize,
32}
33
34#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
45pub struct PatternStats {
46 pub count: u32,
48 pub locations: Vec<PatternLocation>,
50}
51
52#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq)]
66pub struct PatternCatalog {
67 pub entries: BTreeMap<String, BTreeMap<PatternFingerprint, PatternStats>>,
69}
70
71#[cfg(feature = "pipeline-records")]
89pub fn build_catalog(
90 records: &[sdivi_parsing::feature_record::FeatureRecord],
91 config: &PatternsConfig,
92) -> PatternCatalog {
93 let exclude_set = build_globset(&config.scope_exclude);
94
95 let mut entries: BTreeMap<String, BTreeMap<PatternFingerprint, PatternStats>> = BTreeMap::new();
96
97 for record in records {
98 if is_excluded(&record.path, &exclude_set) {
99 continue;
100 }
101 for hint in &record.pattern_hints {
102 let Some(category) = queries::category_for_node_kind(&hint.node_kind, &record.language)
103 else {
104 continue;
105 };
106 let fp = fingerprint_node_kind(&hint.node_kind);
107 let location = PatternLocation {
108 file: record.path.clone(),
109 start_row: hint.start_row,
110 start_col: hint.start_col,
111 };
112 let cat_map = entries.entry(category.to_string()).or_default();
113 let stats = cat_map.entry(fp).or_insert(PatternStats {
114 count: 0,
115 locations: vec![],
116 });
117 stats.count += 1;
118 stats.locations.push(location);
119 }
120 }
121
122 let min = config.min_pattern_nodes;
123 for cat_map in entries.values_mut() {
124 cat_map.retain(|_, stats| stats.count >= min);
125 }
126 entries.retain(|_, cat_map| !cat_map.is_empty());
127
128 PatternCatalog { entries }
129}
130
131#[cfg(feature = "pipeline-records")]
132fn build_globset(patterns: &[String]) -> Option<GlobSet> {
133 if patterns.is_empty() {
134 return None;
135 }
136 let mut builder = GlobSetBuilder::new();
137 for pat in patterns {
138 if let Ok(glob) = Glob::new(pat) {
139 builder.add(glob);
140 }
141 }
142 builder.build().ok()
143}
144
145#[cfg(feature = "pipeline-records")]
146fn is_excluded(path: &std::path::Path, exclude_set: &Option<GlobSet>) -> bool {
147 match exclude_set {
148 None => false,
149 Some(gs) => gs.is_match(path),
150 }
151}