1pub mod api_conventions;
24pub mod async_patterns;
25pub mod constraints;
26pub mod detector;
27pub mod error_handling;
28pub mod format;
29pub mod import_patterns;
30pub mod language_profile;
31pub mod languages;
32pub mod naming;
33pub mod resource_mgmt;
34pub mod signals;
35pub mod soft_delete;
36pub mod test_idioms;
37pub mod type_coverage;
38pub mod validation;
39
40use std::collections::HashMap;
41use std::path::Path;
42use std::time::Instant;
43
44use crate::ast::parser::ParserPool;
45use crate::error::TldrError;
46use crate::fs::tree::{collect_files, get_file_tree};
47use crate::types::{
48 ApiConventionPattern, AsyncPattern, ErrorHandlingPattern, ImportPattern, Language,
49 LanguageDistribution, NamingPattern, PatternCategory, PatternMetadata, PatternReport,
50 ResourceManagementPattern, SoftDeletePattern, TestIdiomPattern, TypeCoveragePattern,
51 ValidationPattern,
52};
53use crate::TldrResult;
54
55pub use constraints::{generate_constraints, DetectedPatterns};
56pub use detector::PatternDetector;
57pub use signals::PatternSignals;
58
59#[derive(Debug, Clone)]
61pub struct PatternConfig {
62 pub min_confidence: f64,
64 pub max_files: usize,
66 pub evidence_limit: usize,
68 pub categories: Vec<PatternCategory>,
70 pub generate_constraints: bool,
72}
73
74impl Default for PatternConfig {
75 fn default() -> Self {
76 Self {
77 min_confidence: 0.5,
78 max_files: 1000,
79 evidence_limit: 3,
80 categories: Vec::new(), generate_constraints: true,
82 }
83 }
84}
85
86pub struct PatternMiner {
88 config: PatternConfig,
89 parser_pool: ParserPool,
90}
91
92impl PatternMiner {
93 pub fn new(config: PatternConfig) -> Self {
95 Self {
96 config,
97 parser_pool: ParserPool::new(),
98 }
99 }
100
101 pub fn mine_patterns(&self, path: &Path, lang: Option<Language>) -> TldrResult<PatternReport> {
111 let start = Instant::now();
112
113 let files = self.collect_files(path, lang)?;
115
116 let mut files_analyzed = 0;
117 let mut files_skipped = 0;
118 let mut files_partial = 0;
119 let mut files_by_language: HashMap<String, usize> = HashMap::new();
120 let mut patterns_by_language: HashMap<String, usize> = HashMap::new();
121
122 let mut aggregated_signals = PatternSignals::default();
124
125 for (file_path, file_lang) in files.iter().take(self.config.max_files) {
126 let content = match std::fs::read_to_string(file_path) {
128 Ok(c) => c,
129 Err(_) => {
130 files_skipped += 1;
131 continue;
132 }
133 };
134
135 match self.extract_file_signals(&content, *file_lang, file_path) {
137 Ok(signals) => {
138 aggregated_signals.merge(&signals);
139 files_analyzed += 1;
140 *files_by_language.entry(file_lang.to_string()).or_insert(0) += 1;
141 }
142 Err(TldrError::ParseError { .. }) => {
143 if let Ok(partial) =
145 self.extract_partial_signals(&content, *file_lang, file_path)
146 {
147 aggregated_signals.merge(&partial);
148 files_partial += 1;
149 *files_by_language.entry(file_lang.to_string()).or_insert(0) += 1;
150 } else {
151 files_skipped += 1;
152 }
153 }
154 Err(_) => {
155 files_skipped += 1;
156 }
157 }
158 }
159
160 let duration_ms = start.elapsed().as_millis() as u64;
161
162 let soft_delete = self.signals_to_soft_delete(&aggregated_signals);
164 let error_handling = self.signals_to_error_handling(&aggregated_signals);
165 let naming = self.signals_to_naming(&aggregated_signals);
166 let resource_management = self.signals_to_resource_mgmt(&aggregated_signals);
167 let validation = self.signals_to_validation(&aggregated_signals);
168 let test_idioms = self.signals_to_test_idioms(&aggregated_signals);
169 let import_patterns = self.signals_to_import_patterns(&aggregated_signals);
170 let type_coverage = self.signals_to_type_coverage(&aggregated_signals);
171 let api_conventions = self.signals_to_api_conventions(&aggregated_signals);
172 let async_patterns = self.signals_to_async_patterns(&aggregated_signals);
173
174 let patterns_before = self.count_patterns_before_filter(&DetectedPatterns {
176 soft_delete: &soft_delete,
177 error_handling: &error_handling,
178 naming: &naming,
179 resource_management: &resource_management,
180 validation: &validation,
181 test_idioms: &test_idioms,
182 import_patterns: &import_patterns,
183 type_coverage: &type_coverage,
184 api_conventions: &api_conventions,
185 async_patterns: &async_patterns,
186 });
187
188 let soft_delete = self.filter_by_confidence(soft_delete);
198 let error_handling = self.filter_by_confidence(error_handling);
199 let naming = self.filter_by_confidence(naming);
200 let resource_management = self.filter_by_confidence(resource_management);
201 let validation = self.filter_by_confidence(validation);
202 let test_idioms = self.filter_by_confidence(test_idioms);
203 let import_patterns = self.filter_by_confidence(import_patterns);
204 let type_coverage = self.filter_by_confidence(type_coverage);
205 let api_conventions = self.filter_by_confidence(api_conventions);
206 let async_patterns = self.filter_by_confidence(async_patterns);
207
208 let patterns_after = self.count_patterns_before_filter(&DetectedPatterns {
209 soft_delete: &soft_delete,
210 error_handling: &error_handling,
211 naming: &naming,
212 resource_management: &resource_management,
213 validation: &validation,
214 test_idioms: &test_idioms,
215 import_patterns: &import_patterns,
216 type_coverage: &type_coverage,
217 api_conventions: &api_conventions,
218 async_patterns: &async_patterns,
219 });
220
221 let supported_pattern_languages: &[&str] =
227 &["python", "typescript", "javascript", "go", "rust", "java"];
228 for lang in files_by_language.keys() {
229 let count = if supported_pattern_languages.contains(&lang.as_str()) {
230 patterns_after
231 } else {
232 0
233 };
234 patterns_by_language.insert(lang.clone(), count);
235 }
236
237 let metadata = PatternMetadata {
239 files_analyzed,
240 files_skipped,
241 files_partial,
242 duration_ms,
243 language_distribution: LanguageDistribution {
244 files_by_language,
245 patterns_by_language,
246 },
247 patterns_before_filter: patterns_before,
248 patterns_after_filter: patterns_after,
249 confidence_threshold: self.config.min_confidence,
250 };
251
252 let constraints = if self.config.generate_constraints {
254 generate_constraints(&DetectedPatterns {
255 soft_delete: &soft_delete,
256 error_handling: &error_handling,
257 naming: &naming,
258 resource_management: &resource_management,
259 validation: &validation,
260 test_idioms: &test_idioms,
261 import_patterns: &import_patterns,
262 type_coverage: &type_coverage,
263 api_conventions: &api_conventions,
264 async_patterns: &async_patterns,
265 })
266 } else {
267 Vec::new()
268 };
269
270 let conflicts = self.detect_conflicts(&DetectedPatterns {
272 soft_delete: &soft_delete,
273 error_handling: &error_handling,
274 naming: &naming,
275 resource_management: &resource_management,
276 validation: &validation,
277 test_idioms: &test_idioms,
278 import_patterns: &import_patterns,
279 type_coverage: &type_coverage,
280 api_conventions: &api_conventions,
281 async_patterns: &async_patterns,
282 });
283
284 Ok(PatternReport {
285 metadata,
286 soft_delete,
287 error_handling,
288 naming,
289 resource_management,
290 validation,
291 test_idioms,
292 import_patterns,
293 type_coverage,
294 api_conventions,
295 async_patterns,
296 constraints,
297 conflicts,
298 })
299 }
300
301 fn collect_files(
303 &self,
304 path: &Path,
305 lang: Option<Language>,
306 ) -> TldrResult<Vec<(std::path::PathBuf, Language)>> {
307 if path.is_file() {
308 let file_lang = lang.or_else(|| Language::from_path(path)).ok_or_else(|| {
309 TldrError::UnsupportedLanguage(
310 path.extension()
311 .map(|e| e.to_string_lossy().to_string())
312 .unwrap_or_else(|| "unknown".to_string()),
313 )
314 })?;
315 return Ok(vec![(path.to_path_buf(), file_lang)]);
316 }
317
318 let mut files = Vec::new();
319 let ignore_spec = crate::IgnoreSpec::default();
320
321 let tree = get_file_tree(path, None, true, Some(&ignore_spec))?;
323 let source_files = collect_files(&tree, path);
324
325 for file_path in source_files {
326 let file_lang = match lang {
327 Some(l) => l,
328 None => match Language::from_path(&file_path) {
329 Some(l) => l,
330 None => continue,
331 },
332 };
333
334 if let Some(filter_lang) = lang {
336 if file_lang != filter_lang {
337 continue;
338 }
339 }
340
341 files.push((file_path, file_lang));
342 }
343
344 Ok(files)
345 }
346
347 fn extract_file_signals(
349 &self,
350 content: &str,
351 lang: Language,
352 file_path: &Path,
353 ) -> TldrResult<PatternSignals> {
354 let tree = self.parser_pool.parse(content, lang)?;
355 let detector = PatternDetector::new(lang, file_path.to_path_buf());
356 Ok(detector.detect_all(&tree, content))
357 }
358
359 fn extract_partial_signals(
361 &self,
362 content: &str,
363 lang: Language,
364 file_path: &Path,
365 ) -> TldrResult<PatternSignals> {
366 let detector = PatternDetector::new(lang, file_path.to_path_buf());
368 Ok(detector.detect_fallback(content))
369 }
370
371 fn signals_to_soft_delete(&self, signals: &PatternSignals) -> Option<SoftDeletePattern> {
373 soft_delete::signals_to_pattern(signals, self.config.evidence_limit)
374 }
375
376 fn signals_to_error_handling(&self, signals: &PatternSignals) -> Option<ErrorHandlingPattern> {
377 error_handling::signals_to_pattern(signals, self.config.evidence_limit)
378 }
379
380 fn signals_to_naming(&self, signals: &PatternSignals) -> Option<NamingPattern> {
381 naming::signals_to_pattern(signals)
382 }
383
384 fn signals_to_resource_mgmt(
385 &self,
386 signals: &PatternSignals,
387 ) -> Option<ResourceManagementPattern> {
388 resource_mgmt::signals_to_pattern(signals, self.config.evidence_limit)
389 }
390
391 fn signals_to_validation(&self, signals: &PatternSignals) -> Option<ValidationPattern> {
392 validation::signals_to_pattern(signals, self.config.evidence_limit)
393 }
394
395 fn signals_to_test_idioms(&self, signals: &PatternSignals) -> Option<TestIdiomPattern> {
396 test_idioms::signals_to_pattern(signals, self.config.evidence_limit)
397 }
398
399 fn signals_to_import_patterns(&self, signals: &PatternSignals) -> Option<ImportPattern> {
400 import_patterns::signals_to_pattern(signals, self.config.evidence_limit)
401 }
402
403 fn signals_to_type_coverage(&self, signals: &PatternSignals) -> Option<TypeCoveragePattern> {
404 type_coverage::signals_to_pattern(signals, self.config.evidence_limit)
405 }
406
407 fn signals_to_api_conventions(&self, signals: &PatternSignals) -> Option<ApiConventionPattern> {
408 api_conventions::signals_to_pattern(signals, self.config.evidence_limit)
409 }
410
411 fn signals_to_async_patterns(&self, signals: &PatternSignals) -> Option<AsyncPattern> {
412 async_patterns::signals_to_pattern(signals, self.config.evidence_limit)
413 }
414
415 fn filter_by_confidence<T: HasConfidence>(&self, pattern: Option<T>) -> Option<T> {
417 pattern.filter(|p| p.confidence() >= self.config.min_confidence)
418 }
419
420 fn count_patterns_before_filter(&self, patterns: &DetectedPatterns<'_>) -> usize {
422 let mut count = 0;
423 if patterns.soft_delete.is_some() {
424 count += 1;
425 }
426 if patterns.error_handling.is_some() {
427 count += 1;
428 }
429 if patterns.naming.is_some() {
430 count += 1;
431 }
432 if patterns.resource_management.is_some() {
433 count += 1;
434 }
435 if patterns.validation.is_some() {
436 count += 1;
437 }
438 if patterns.test_idioms.is_some() {
439 count += 1;
440 }
441 if patterns.import_patterns.is_some() {
442 count += 1;
443 }
444 if patterns.type_coverage.is_some() {
445 count += 1;
446 }
447 if patterns.api_conventions.is_some() {
448 count += 1;
449 }
450 if patterns.async_patterns.is_some() {
451 count += 1;
452 }
453 count
454 }
455
456 fn detect_conflicts(&self, patterns: &DetectedPatterns<'_>) -> Vec<String> {
458 let mut conflicts = Vec::new();
459
460 if let Some(imports) = patterns.import_patterns {
462 if imports.grouping_style == crate::types::ImportGrouping::Ungrouped {
463 conflicts.push(
464 "Inconsistent import grouping: no clear ordering pattern detected".to_string(),
465 );
466 }
467 if imports.absolute_vs_relative == crate::types::ImportStyle::Mixed {
468 conflicts.push(
469 "Mixed import styles: some files use absolute imports, others use relative"
470 .to_string(),
471 );
472 }
473 }
474
475 conflicts
476 }
477}
478
479pub trait HasConfidence {
481 fn confidence(&self) -> f64;
483}
484
485impl HasConfidence for SoftDeletePattern {
486 fn confidence(&self) -> f64 {
487 self.confidence
488 }
489}
490
491impl HasConfidence for ErrorHandlingPattern {
492 fn confidence(&self) -> f64 {
493 self.confidence
494 }
495}
496
497impl HasConfidence for NamingPattern {
498 fn confidence(&self) -> f64 {
499 self.consistency_score
500 }
501}
502
503impl HasConfidence for ResourceManagementPattern {
504 fn confidence(&self) -> f64 {
505 self.confidence
506 }
507}
508
509impl HasConfidence for ValidationPattern {
510 fn confidence(&self) -> f64 {
511 self.confidence
512 }
513}
514
515impl HasConfidence for TestIdiomPattern {
516 fn confidence(&self) -> f64 {
517 self.confidence
518 }
519}
520
521impl HasConfidence for ImportPattern {
522 fn confidence(&self) -> f64 {
523 1.0 }
525}
526
527impl HasConfidence for TypeCoveragePattern {
528 fn confidence(&self) -> f64 {
529 self.coverage_overall
530 }
531}
532
533impl HasConfidence for ApiConventionPattern {
534 fn confidence(&self) -> f64 {
535 self.confidence
536 }
537}
538
539impl HasConfidence for AsyncPattern {
540 fn confidence(&self) -> f64 {
541 self.concurrency_confidence
542 }
543}
544
545pub fn detect_patterns(path: &Path, lang: Option<Language>) -> TldrResult<PatternReport> {
547 let miner = PatternMiner::new(PatternConfig::default());
548 miner.mine_patterns(path, lang)
549}
550
551pub fn detect_patterns_with_config(
553 path: &Path,
554 lang: Option<Language>,
555 config: PatternConfig,
556) -> TldrResult<PatternReport> {
557 let miner = PatternMiner::new(config);
558 miner.mine_patterns(path, lang)
559}
560
561#[cfg(test)]
562mod tests {
563 use super::*;
564 use crate::types::{
565 ImportGrouping, ImportPattern, ImportStyle, NamingConvention, NamingPattern,
566 StarImportUsage, TypeCoveragePattern,
567 };
568
569 fn miner_with_threshold(threshold: f64) -> PatternMiner {
571 PatternMiner::new(PatternConfig {
572 min_confidence: threshold,
573 ..PatternConfig::default()
574 })
575 }
576
577 #[test]
585 fn test_all_pattern_types_filtered_by_confidence_naming() {
586 let miner = miner_with_threshold(0.7);
587
588 let low_confidence_naming: Option<NamingPattern> = Some(NamingPattern {
590 functions: NamingConvention::SnakeCase,
591 classes: NamingConvention::PascalCase,
592 constants: NamingConvention::UpperSnakeCase,
593 private_prefix: None,
594 consistency_score: 0.3, violations: Vec::new(),
596 });
597
598 let filtered = miner.filter_by_confidence(low_confidence_naming);
600 assert!(
601 filtered.is_none(),
602 "NamingPattern with consistency_score 0.3 should be filtered out at threshold 0.7, \
603 but it survived the filter. This indicates naming patterns skip confidence filtering."
604 );
605 }
606
607 #[test]
609 fn test_all_pattern_types_filtered_by_confidence_imports() {
610 let miner = miner_with_threshold(0.7);
611
612 let import_pattern: Option<ImportPattern> = Some(ImportPattern {
626 grouping_style: ImportGrouping::StdlibFirst,
627 absolute_vs_relative: ImportStyle::Absolute,
628 star_imports: StarImportUsage::None,
629 alias_conventions: Vec::new(),
630 evidence: Vec::new(),
631 });
632
633 let filtered = miner.filter_by_confidence(import_pattern);
635 assert!(
636 filtered.is_some(),
637 "ImportPattern with confidence 1.0 should survive threshold 0.7"
638 );
639 }
640
641 #[test]
643 fn test_all_pattern_types_filtered_by_confidence_type_coverage() {
644 let miner = miner_with_threshold(0.7);
645
646 let low_coverage: Option<TypeCoveragePattern> = Some(TypeCoveragePattern {
648 coverage_overall: 0.2, coverage_functions: 0.1,
650 coverage_variables: 0.3,
651 typevar_usage: false,
652 generic_patterns: Vec::new(),
653 evidence: Vec::new(),
654 });
655
656 let filtered = miner.filter_by_confidence(low_coverage);
658 assert!(
659 filtered.is_none(),
660 "TypeCoveragePattern with coverage_overall 0.2 should be filtered out at threshold 0.7, \
661 but it survived the filter. This indicates type_coverage patterns skip confidence filtering."
662 );
663 }
664
665 #[test]
676 fn test_patterns_by_language_independent() {
677 use std::collections::HashMap;
683
684 let mut files_by_language = HashMap::new();
686 files_by_language.insert("python".to_string(), 10_usize);
687 files_by_language.insert("lua".to_string(), 5_usize);
688
689 let patterns_after = 4_usize;
690
691 let supported_pattern_languages: &[&str] =
693 &["python", "typescript", "javascript", "go", "rust", "java"];
694 let mut patterns_by_language = HashMap::new();
695 for lang in files_by_language.keys() {
696 let count = if supported_pattern_languages.contains(&lang.as_str()) {
697 patterns_after
698 } else {
699 0
700 };
701 patterns_by_language.insert(lang.clone(), count);
702 }
703
704 let python_count = *patterns_by_language.get("python").unwrap();
705 let lua_count = *patterns_by_language.get("lua").unwrap();
706
707 assert_eq!(
709 python_count, patterns_after,
710 "Supported language (python) should get patterns_after count ({}), got {}",
711 patterns_after, python_count
712 );
713
714 assert_eq!(
716 lua_count, 0,
717 "Unsupported language (lua) should get 0 patterns, got {}",
718 lua_count
719 );
720
721 assert_ne!(
723 python_count, lua_count,
724 "patterns_by_language should have per-language counts: supported languages get \
725 the global count, unsupported languages get 0. Both got {}.",
726 python_count
727 );
728 }
729
730 #[test]
736 fn test_patterns_survive_filter_when_high_confidence() {
737 let miner = miner_with_threshold(0.5);
738
739 let naming: Option<NamingPattern> = Some(NamingPattern {
741 functions: NamingConvention::SnakeCase,
742 classes: NamingConvention::PascalCase,
743 constants: NamingConvention::UpperSnakeCase,
744 private_prefix: Some("_".to_string()),
745 consistency_score: 0.95, violations: Vec::new(),
747 });
748
749 let filtered = miner.filter_by_confidence(naming);
750 assert!(
751 filtered.is_some(),
752 "NamingPattern with consistency_score 0.95 should survive threshold 0.5"
753 );
754
755 let type_cov: Option<TypeCoveragePattern> = Some(TypeCoveragePattern {
757 coverage_overall: 0.85,
758 coverage_functions: 0.9,
759 coverage_variables: 0.8,
760 typevar_usage: true,
761 generic_patterns: vec!["Optional".to_string()],
762 evidence: Vec::new(),
763 });
764
765 let filtered = miner.filter_by_confidence(type_cov);
766 assert!(
767 filtered.is_some(),
768 "TypeCoveragePattern with coverage_overall 0.85 should survive threshold 0.5"
769 );
770 }
771}