pub mod api_conventions;
pub mod async_patterns;
pub mod constraints;
pub mod detector;
pub mod error_handling;
pub mod format;
pub mod import_patterns;
pub mod language_profile;
pub mod languages;
pub mod naming;
pub mod resource_mgmt;
pub mod signals;
pub mod soft_delete;
pub mod test_idioms;
pub mod type_coverage;
pub mod validation;
use std::collections::HashMap;
use std::path::Path;
use std::time::Instant;
use crate::ast::parser::ParserPool;
use crate::error::TldrError;
use crate::fs::tree::{collect_files, get_file_tree};
use crate::types::{
ApiConventionPattern, AsyncPattern, ErrorHandlingPattern, ImportPattern, Language,
LanguageDistribution, NamingPattern, PatternCategory, PatternMetadata, PatternReport,
ResourceManagementPattern, SoftDeletePattern, TestIdiomPattern, TypeCoveragePattern,
ValidationPattern,
};
use crate::TldrResult;
pub use constraints::{generate_constraints, DetectedPatterns};
pub use detector::PatternDetector;
pub use signals::PatternSignals;
#[derive(Debug, Clone)]
pub struct PatternConfig {
pub min_confidence: f64,
pub max_files: usize,
pub evidence_limit: usize,
pub categories: Vec<PatternCategory>,
pub generate_constraints: bool,
}
impl Default for PatternConfig {
fn default() -> Self {
Self {
min_confidence: 0.5,
max_files: 1000,
evidence_limit: 3,
categories: Vec::new(), generate_constraints: true,
}
}
}
pub struct PatternMiner {
config: PatternConfig,
parser_pool: ParserPool,
}
impl PatternMiner {
pub fn new(config: PatternConfig) -> Self {
Self {
config,
parser_pool: ParserPool::new(),
}
}
pub fn mine_patterns(&self, path: &Path, lang: Option<Language>) -> TldrResult<PatternReport> {
let start = Instant::now();
let files = self.collect_files(path, lang)?;
let mut files_analyzed = 0;
let mut files_skipped = 0;
let mut files_partial = 0;
let mut files_by_language: HashMap<String, usize> = HashMap::new();
let mut patterns_by_language: HashMap<String, usize> = HashMap::new();
let mut aggregated_signals = PatternSignals::default();
for (file_path, file_lang) in files.iter().take(self.config.max_files) {
let content = match std::fs::read_to_string(file_path) {
Ok(c) => c,
Err(_) => {
files_skipped += 1;
continue;
}
};
match self.extract_file_signals(&content, *file_lang, file_path) {
Ok(signals) => {
aggregated_signals.merge(&signals);
files_analyzed += 1;
*files_by_language.entry(file_lang.to_string()).or_insert(0) += 1;
}
Err(TldrError::ParseError { .. }) => {
if let Ok(partial) =
self.extract_partial_signals(&content, *file_lang, file_path)
{
aggregated_signals.merge(&partial);
files_partial += 1;
*files_by_language.entry(file_lang.to_string()).or_insert(0) += 1;
} else {
files_skipped += 1;
}
}
Err(_) => {
files_skipped += 1;
}
}
}
let duration_ms = start.elapsed().as_millis() as u64;
let soft_delete = self.signals_to_soft_delete(&aggregated_signals);
let error_handling = self.signals_to_error_handling(&aggregated_signals);
let naming = self.signals_to_naming(&aggregated_signals);
let resource_management = self.signals_to_resource_mgmt(&aggregated_signals);
let validation = self.signals_to_validation(&aggregated_signals);
let test_idioms = self.signals_to_test_idioms(&aggregated_signals);
let import_patterns = self.signals_to_import_patterns(&aggregated_signals);
let type_coverage = self.signals_to_type_coverage(&aggregated_signals);
let api_conventions = self.signals_to_api_conventions(&aggregated_signals);
let async_patterns = self.signals_to_async_patterns(&aggregated_signals);
let patterns_before = self.count_patterns_before_filter(&DetectedPatterns {
soft_delete: &soft_delete,
error_handling: &error_handling,
naming: &naming,
resource_management: &resource_management,
validation: &validation,
test_idioms: &test_idioms,
import_patterns: &import_patterns,
type_coverage: &type_coverage,
api_conventions: &api_conventions,
async_patterns: &async_patterns,
});
let soft_delete = self.filter_by_confidence(soft_delete);
let error_handling = self.filter_by_confidence(error_handling);
let naming = self.filter_by_confidence(naming);
let resource_management = self.filter_by_confidence(resource_management);
let validation = self.filter_by_confidence(validation);
let test_idioms = self.filter_by_confidence(test_idioms);
let import_patterns = self.filter_by_confidence(import_patterns);
let type_coverage = self.filter_by_confidence(type_coverage);
let api_conventions = self.filter_by_confidence(api_conventions);
let async_patterns = self.filter_by_confidence(async_patterns);
let patterns_after = self.count_patterns_before_filter(&DetectedPatterns {
soft_delete: &soft_delete,
error_handling: &error_handling,
naming: &naming,
resource_management: &resource_management,
validation: &validation,
test_idioms: &test_idioms,
import_patterns: &import_patterns,
type_coverage: &type_coverage,
api_conventions: &api_conventions,
async_patterns: &async_patterns,
});
let supported_pattern_languages: &[&str] =
&["python", "typescript", "javascript", "go", "rust", "java"];
for lang in files_by_language.keys() {
let count = if supported_pattern_languages.contains(&lang.as_str()) {
patterns_after
} else {
0
};
patterns_by_language.insert(lang.clone(), count);
}
let metadata = PatternMetadata {
files_analyzed,
files_skipped,
files_partial,
duration_ms,
language_distribution: LanguageDistribution {
files_by_language,
patterns_by_language,
},
patterns_before_filter: patterns_before,
patterns_after_filter: patterns_after,
confidence_threshold: self.config.min_confidence,
};
let constraints = if self.config.generate_constraints {
generate_constraints(&DetectedPatterns {
soft_delete: &soft_delete,
error_handling: &error_handling,
naming: &naming,
resource_management: &resource_management,
validation: &validation,
test_idioms: &test_idioms,
import_patterns: &import_patterns,
type_coverage: &type_coverage,
api_conventions: &api_conventions,
async_patterns: &async_patterns,
})
} else {
Vec::new()
};
let conflicts = self.detect_conflicts(&DetectedPatterns {
soft_delete: &soft_delete,
error_handling: &error_handling,
naming: &naming,
resource_management: &resource_management,
validation: &validation,
test_idioms: &test_idioms,
import_patterns: &import_patterns,
type_coverage: &type_coverage,
api_conventions: &api_conventions,
async_patterns: &async_patterns,
});
Ok(PatternReport {
metadata,
soft_delete,
error_handling,
naming,
resource_management,
validation,
test_idioms,
import_patterns,
type_coverage,
api_conventions,
async_patterns,
constraints,
conflicts,
})
}
fn collect_files(
&self,
path: &Path,
lang: Option<Language>,
) -> TldrResult<Vec<(std::path::PathBuf, Language)>> {
if path.is_file() {
let file_lang = lang.or_else(|| Language::from_path(path)).ok_or_else(|| {
TldrError::UnsupportedLanguage(
path.extension()
.map(|e| e.to_string_lossy().to_string())
.unwrap_or_else(|| "unknown".to_string()),
)
})?;
return Ok(vec![(path.to_path_buf(), file_lang)]);
}
let mut files = Vec::new();
let ignore_spec = crate::IgnoreSpec::default();
let tree = get_file_tree(path, None, true, Some(&ignore_spec))?;
let source_files = collect_files(&tree, path);
for file_path in source_files {
let file_lang = match lang {
Some(l) => l,
None => match Language::from_path(&file_path) {
Some(l) => l,
None => continue,
},
};
if let Some(filter_lang) = lang {
if file_lang != filter_lang {
continue;
}
}
files.push((file_path, file_lang));
}
Ok(files)
}
fn extract_file_signals(
&self,
content: &str,
lang: Language,
file_path: &Path,
) -> TldrResult<PatternSignals> {
let tree = self.parser_pool.parse(content, lang)?;
let detector = PatternDetector::new(lang, file_path.to_path_buf());
Ok(detector.detect_all(&tree, content))
}
fn extract_partial_signals(
&self,
content: &str,
lang: Language,
file_path: &Path,
) -> TldrResult<PatternSignals> {
let detector = PatternDetector::new(lang, file_path.to_path_buf());
Ok(detector.detect_fallback(content))
}
fn signals_to_soft_delete(&self, signals: &PatternSignals) -> Option<SoftDeletePattern> {
soft_delete::signals_to_pattern(signals, self.config.evidence_limit)
}
fn signals_to_error_handling(&self, signals: &PatternSignals) -> Option<ErrorHandlingPattern> {
error_handling::signals_to_pattern(signals, self.config.evidence_limit)
}
fn signals_to_naming(&self, signals: &PatternSignals) -> Option<NamingPattern> {
naming::signals_to_pattern(signals)
}
fn signals_to_resource_mgmt(
&self,
signals: &PatternSignals,
) -> Option<ResourceManagementPattern> {
resource_mgmt::signals_to_pattern(signals, self.config.evidence_limit)
}
fn signals_to_validation(&self, signals: &PatternSignals) -> Option<ValidationPattern> {
validation::signals_to_pattern(signals, self.config.evidence_limit)
}
fn signals_to_test_idioms(&self, signals: &PatternSignals) -> Option<TestIdiomPattern> {
test_idioms::signals_to_pattern(signals, self.config.evidence_limit)
}
fn signals_to_import_patterns(&self, signals: &PatternSignals) -> Option<ImportPattern> {
import_patterns::signals_to_pattern(signals, self.config.evidence_limit)
}
fn signals_to_type_coverage(&self, signals: &PatternSignals) -> Option<TypeCoveragePattern> {
type_coverage::signals_to_pattern(signals, self.config.evidence_limit)
}
fn signals_to_api_conventions(&self, signals: &PatternSignals) -> Option<ApiConventionPattern> {
api_conventions::signals_to_pattern(signals, self.config.evidence_limit)
}
fn signals_to_async_patterns(&self, signals: &PatternSignals) -> Option<AsyncPattern> {
async_patterns::signals_to_pattern(signals, self.config.evidence_limit)
}
fn filter_by_confidence<T: HasConfidence>(&self, pattern: Option<T>) -> Option<T> {
pattern.filter(|p| p.confidence() >= self.config.min_confidence)
}
fn count_patterns_before_filter(&self, patterns: &DetectedPatterns<'_>) -> usize {
let mut count = 0;
if patterns.soft_delete.is_some() {
count += 1;
}
if patterns.error_handling.is_some() {
count += 1;
}
if patterns.naming.is_some() {
count += 1;
}
if patterns.resource_management.is_some() {
count += 1;
}
if patterns.validation.is_some() {
count += 1;
}
if patterns.test_idioms.is_some() {
count += 1;
}
if patterns.import_patterns.is_some() {
count += 1;
}
if patterns.type_coverage.is_some() {
count += 1;
}
if patterns.api_conventions.is_some() {
count += 1;
}
if patterns.async_patterns.is_some() {
count += 1;
}
count
}
fn detect_conflicts(&self, patterns: &DetectedPatterns<'_>) -> Vec<String> {
let mut conflicts = Vec::new();
if let Some(imports) = patterns.import_patterns {
if imports.grouping_style == crate::types::ImportGrouping::Ungrouped {
conflicts.push(
"Inconsistent import grouping: no clear ordering pattern detected".to_string(),
);
}
if imports.absolute_vs_relative == crate::types::ImportStyle::Mixed {
conflicts.push(
"Mixed import styles: some files use absolute imports, others use relative"
.to_string(),
);
}
}
conflicts
}
}
pub trait HasConfidence {
fn confidence(&self) -> f64;
}
impl HasConfidence for SoftDeletePattern {
fn confidence(&self) -> f64 {
self.confidence
}
}
impl HasConfidence for ErrorHandlingPattern {
fn confidence(&self) -> f64 {
self.confidence
}
}
impl HasConfidence for NamingPattern {
fn confidence(&self) -> f64 {
self.consistency_score
}
}
impl HasConfidence for ResourceManagementPattern {
fn confidence(&self) -> f64 {
self.confidence
}
}
impl HasConfidence for ValidationPattern {
fn confidence(&self) -> f64 {
self.confidence
}
}
impl HasConfidence for TestIdiomPattern {
fn confidence(&self) -> f64 {
self.confidence
}
}
impl HasConfidence for ImportPattern {
fn confidence(&self) -> f64 {
1.0 }
}
impl HasConfidence for TypeCoveragePattern {
fn confidence(&self) -> f64 {
self.coverage_overall
}
}
impl HasConfidence for ApiConventionPattern {
fn confidence(&self) -> f64 {
self.confidence
}
}
impl HasConfidence for AsyncPattern {
fn confidence(&self) -> f64 {
self.concurrency_confidence
}
}
pub fn detect_patterns(path: &Path, lang: Option<Language>) -> TldrResult<PatternReport> {
let miner = PatternMiner::new(PatternConfig::default());
miner.mine_patterns(path, lang)
}
pub fn detect_patterns_with_config(
path: &Path,
lang: Option<Language>,
config: PatternConfig,
) -> TldrResult<PatternReport> {
let miner = PatternMiner::new(config);
miner.mine_patterns(path, lang)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::types::{
ImportGrouping, ImportPattern, ImportStyle, NamingConvention, NamingPattern,
StarImportUsage, TypeCoveragePattern,
};
fn miner_with_threshold(threshold: f64) -> PatternMiner {
PatternMiner::new(PatternConfig {
min_confidence: threshold,
..PatternConfig::default()
})
}
#[test]
fn test_all_pattern_types_filtered_by_confidence_naming() {
let miner = miner_with_threshold(0.7);
let low_confidence_naming: Option<NamingPattern> = Some(NamingPattern {
functions: NamingConvention::SnakeCase,
classes: NamingConvention::PascalCase,
constants: NamingConvention::UpperSnakeCase,
private_prefix: None,
consistency_score: 0.3, violations: Vec::new(),
});
let filtered = miner.filter_by_confidence(low_confidence_naming);
assert!(
filtered.is_none(),
"NamingPattern with consistency_score 0.3 should be filtered out at threshold 0.7, \
but it survived the filter. This indicates naming patterns skip confidence filtering."
);
}
#[test]
fn test_all_pattern_types_filtered_by_confidence_imports() {
let miner = miner_with_threshold(0.7);
let import_pattern: Option<ImportPattern> = Some(ImportPattern {
grouping_style: ImportGrouping::StdlibFirst,
absolute_vs_relative: ImportStyle::Absolute,
star_imports: StarImportUsage::None,
alias_conventions: Vec::new(),
evidence: Vec::new(),
});
let filtered = miner.filter_by_confidence(import_pattern);
assert!(
filtered.is_some(),
"ImportPattern with confidence 1.0 should survive threshold 0.7"
);
}
#[test]
fn test_all_pattern_types_filtered_by_confidence_type_coverage() {
let miner = miner_with_threshold(0.7);
let low_coverage: Option<TypeCoveragePattern> = Some(TypeCoveragePattern {
coverage_overall: 0.2, coverage_functions: 0.1,
coverage_variables: 0.3,
typevar_usage: false,
generic_patterns: Vec::new(),
evidence: Vec::new(),
});
let filtered = miner.filter_by_confidence(low_coverage);
assert!(
filtered.is_none(),
"TypeCoveragePattern with coverage_overall 0.2 should be filtered out at threshold 0.7, \
but it survived the filter. This indicates type_coverage patterns skip confidence filtering."
);
}
#[test]
fn test_patterns_by_language_independent() {
use std::collections::HashMap;
let mut files_by_language = HashMap::new();
files_by_language.insert("python".to_string(), 10_usize);
files_by_language.insert("lua".to_string(), 5_usize);
let patterns_after = 4_usize;
let supported_pattern_languages: &[&str] =
&["python", "typescript", "javascript", "go", "rust", "java"];
let mut patterns_by_language = HashMap::new();
for lang in files_by_language.keys() {
let count = if supported_pattern_languages.contains(&lang.as_str()) {
patterns_after
} else {
0
};
patterns_by_language.insert(lang.clone(), count);
}
let python_count = *patterns_by_language.get("python").unwrap();
let lua_count = *patterns_by_language.get("lua").unwrap();
assert_eq!(
python_count, patterns_after,
"Supported language (python) should get patterns_after count ({}), got {}",
patterns_after, python_count
);
assert_eq!(
lua_count, 0,
"Unsupported language (lua) should get 0 patterns, got {}",
lua_count
);
assert_ne!(
python_count, lua_count,
"patterns_by_language should have per-language counts: supported languages get \
the global count, unsupported languages get 0. Both got {}.",
python_count
);
}
#[test]
fn test_patterns_survive_filter_when_high_confidence() {
let miner = miner_with_threshold(0.5);
let naming: Option<NamingPattern> = Some(NamingPattern {
functions: NamingConvention::SnakeCase,
classes: NamingConvention::PascalCase,
constants: NamingConvention::UpperSnakeCase,
private_prefix: Some("_".to_string()),
consistency_score: 0.95, violations: Vec::new(),
});
let filtered = miner.filter_by_confidence(naming);
assert!(
filtered.is_some(),
"NamingPattern with consistency_score 0.95 should survive threshold 0.5"
);
let type_cov: Option<TypeCoveragePattern> = Some(TypeCoveragePattern {
coverage_overall: 0.85,
coverage_functions: 0.9,
coverage_variables: 0.8,
typevar_usage: true,
generic_patterns: vec!["Optional".to_string()],
evidence: Vec::new(),
});
let filtered = miner.filter_by_confidence(type_cov);
assert!(
filtered.is_some(),
"TypeCoveragePattern with coverage_overall 0.85 should survive threshold 0.5"
);
}
}