use std::collections::BTreeMap;
use std::path::PathBuf;
use globset::{Glob, GlobSet, GlobSetBuilder};
use sdivi_config::PatternsConfig;
use serde::{Deserialize, Serialize};
use crate::fingerprint::{fingerprint_node_kind, PatternFingerprint};
use crate::queries;
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct PatternLocation {
pub file: PathBuf,
pub start_row: usize,
pub start_col: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct PatternStats {
pub count: u32,
pub locations: Vec<PatternLocation>,
}
#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq)]
pub struct PatternCatalog {
pub entries: BTreeMap<String, BTreeMap<PatternFingerprint, PatternStats>>,
}
#[cfg(feature = "pipeline-records")]
pub fn build_catalog(
records: &[sdivi_parsing::feature_record::FeatureRecord],
config: &PatternsConfig,
) -> PatternCatalog {
let exclude_set = build_globset(&config.scope_exclude);
let mut entries: BTreeMap<String, BTreeMap<PatternFingerprint, PatternStats>> = BTreeMap::new();
for record in records {
if is_excluded(&record.path, &exclude_set) {
continue;
}
for hint in &record.pattern_hints {
let Some(category) = queries::category_for_node_kind(&hint.node_kind, &record.language)
else {
continue;
};
let fp = fingerprint_node_kind(&hint.node_kind);
let location = PatternLocation {
file: record.path.clone(),
start_row: hint.start_row,
start_col: hint.start_col,
};
let cat_map = entries.entry(category.to_string()).or_default();
let stats = cat_map.entry(fp).or_insert(PatternStats {
count: 0,
locations: vec![],
});
stats.count += 1;
stats.locations.push(location);
}
}
let min = config.min_pattern_nodes;
for cat_map in entries.values_mut() {
cat_map.retain(|_, stats| stats.count >= min);
}
entries.retain(|_, cat_map| !cat_map.is_empty());
PatternCatalog { entries }
}
#[cfg(feature = "pipeline-records")]
fn build_globset(patterns: &[String]) -> Option<GlobSet> {
if patterns.is_empty() {
return None;
}
let mut builder = GlobSetBuilder::new();
for pat in patterns {
if let Ok(glob) = Glob::new(pat) {
builder.add(glob);
}
}
builder.build().ok()
}
#[cfg(feature = "pipeline-records")]
fn is_excluded(path: &std::path::Path, exclude_set: &Option<GlobSet>) -> bool {
match exclude_set {
None => false,
Some(gs) => gs.is_match(path),
}
}