1use crate::ComprehensiveEvaluation;
7use serde::{Deserialize, Serialize};
8use std::collections::HashMap;
9
10#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
12pub enum TuningCategory {
13 Statistical,
15 Coherence,
17 Quality,
19 MLReadiness,
21 Performance,
23 Anomaly,
25}
26
27#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
29pub enum TuningPriority {
30 Critical,
32 High,
34 Medium,
36 Low,
38 Info,
40}
41
42#[derive(Debug, Clone, Serialize, Deserialize)]
44pub struct TuningOpportunity {
45 pub category: TuningCategory,
47 pub priority: TuningPriority,
49 pub title: String,
51 pub description: String,
53 pub current_value: Option<String>,
55 pub target_value: Option<String>,
57 pub expected_improvement: String,
59 pub config_paths: Vec<String>,
61}
62
63impl TuningOpportunity {
64 pub fn new(
66 category: TuningCategory,
67 priority: TuningPriority,
68 title: impl Into<String>,
69 description: impl Into<String>,
70 ) -> Self {
71 Self {
72 category,
73 priority,
74 title: title.into(),
75 description: description.into(),
76 current_value: None,
77 target_value: None,
78 expected_improvement: String::new(),
79 config_paths: Vec::new(),
80 }
81 }
82
83 pub fn with_current_value(mut self, value: impl Into<String>) -> Self {
85 self.current_value = Some(value.into());
86 self
87 }
88
89 pub fn with_target_value(mut self, value: impl Into<String>) -> Self {
91 self.target_value = Some(value.into());
92 self
93 }
94
95 pub fn with_expected_improvement(mut self, improvement: impl Into<String>) -> Self {
97 self.expected_improvement = improvement.into();
98 self
99 }
100
101 pub fn with_config_path(mut self, path: impl Into<String>) -> Self {
103 self.config_paths.push(path.into());
104 self
105 }
106}
107
108#[derive(Debug, Clone, Serialize, Deserialize)]
110pub struct ConfigSuggestion {
111 pub path: String,
113 pub current_value: String,
115 pub suggested_value: String,
117 pub reason: String,
119 pub confidence: f64,
121 pub auto_fixable: bool,
123}
124
125impl ConfigSuggestion {
126 pub fn new(
128 path: impl Into<String>,
129 current_value: impl Into<String>,
130 suggested_value: impl Into<String>,
131 reason: impl Into<String>,
132 ) -> Self {
133 Self {
134 path: path.into(),
135 current_value: current_value.into(),
136 suggested_value: suggested_value.into(),
137 reason: reason.into(),
138 confidence: 0.5,
139 auto_fixable: false,
140 }
141 }
142
143 pub fn with_confidence(mut self, confidence: f64) -> Self {
145 self.confidence = confidence.clamp(0.0, 1.0);
146 self
147 }
148
149 pub fn auto_fixable(mut self) -> Self {
151 self.auto_fixable = true;
152 self
153 }
154}
155
156pub struct TuningAnalyzer {
158 min_gap_fraction: f64,
160 include_low_priority: bool,
162}
163
164impl TuningAnalyzer {
165 pub fn new() -> Self {
167 Self {
168 min_gap_fraction: 0.05,
169 include_low_priority: true,
170 }
171 }
172
173 pub fn with_min_gap(mut self, gap: f64) -> Self {
175 self.min_gap_fraction = gap;
176 self
177 }
178
179 pub fn with_low_priority(mut self, include: bool) -> Self {
181 self.include_low_priority = include;
182 self
183 }
184
185 pub fn analyze(&self, evaluation: &ComprehensiveEvaluation) -> Vec<TuningOpportunity> {
187 let mut opportunities = Vec::new();
188
189 self.analyze_statistical(&evaluation.statistical, &mut opportunities);
191
192 self.analyze_coherence(&evaluation.coherence, &mut opportunities);
194
195 self.analyze_quality(&evaluation.quality, &mut opportunities);
197
198 self.analyze_ml_readiness(&evaluation.ml_readiness, &mut opportunities);
200
201 if !self.include_low_priority {
203 opportunities.retain(|o| {
204 o.priority != TuningPriority::Low && o.priority != TuningPriority::Info
205 });
206 }
207
208 opportunities.sort_by(|a, b| a.priority.cmp(&b.priority));
210
211 opportunities
212 }
213
214 fn analyze_statistical(
215 &self,
216 stat: &crate::statistical::StatisticalEvaluation,
217 opportunities: &mut Vec<TuningOpportunity>,
218 ) {
219 if let Some(ref benford) = stat.benford {
221 if benford.p_value < 0.05 {
222 let priority = if benford.p_value < 0.01 {
223 TuningPriority::High
224 } else {
225 TuningPriority::Medium
226 };
227
228 opportunities.push(
229 TuningOpportunity::new(
230 TuningCategory::Statistical,
231 priority,
232 "Benford's Law Non-Conformance",
233 "Generated amounts do not follow Benford's Law distribution",
234 )
235 .with_current_value(format!("p-value: {:.4}", benford.p_value))
236 .with_target_value("p-value > 0.05")
237 .with_expected_improvement("Better statistical realism")
238 .with_config_path("transactions.amount.benford_compliance"),
239 );
240 }
241 }
242
243 if let Some(ref amount) = stat.amount_distribution {
245 if let Some(p_value) = amount.lognormal_ks_pvalue {
246 if p_value < 0.05 {
247 opportunities.push(
248 TuningOpportunity::new(
249 TuningCategory::Statistical,
250 TuningPriority::Medium,
251 "Amount Distribution Mismatch",
252 "Amount distribution does not match expected log-normal pattern",
253 )
254 .with_current_value(format!("KS p-value: {:.4}", p_value))
255 .with_target_value("KS p-value > 0.05")
256 .with_expected_improvement("More realistic amount patterns")
257 .with_config_path("transactions.amount.distribution"),
258 );
259 }
260 }
261
262 if amount.round_number_ratio < 0.05 {
264 opportunities.push(
265 TuningOpportunity::new(
266 TuningCategory::Statistical,
267 TuningPriority::Low,
268 "Low Round Number Bias",
269 "Round number occurrence is lower than typically seen in real data",
270 )
271 .with_current_value(format!("{:.1}%", amount.round_number_ratio * 100.0))
272 .with_target_value("5-15%")
273 .with_expected_improvement("More natural-looking amounts")
274 .with_config_path("transactions.amount.round_number_bias"),
275 );
276 }
277 }
278
279 if let Some(ref temporal) = stat.temporal {
281 if temporal.pattern_correlation < 0.6 {
282 opportunities.push(
283 TuningOpportunity::new(
284 TuningCategory::Statistical,
285 TuningPriority::Medium,
286 "Weak Temporal Patterns",
287 "Generated data lacks strong temporal patterns",
288 )
289 .with_current_value(format!("correlation: {:.3}", temporal.pattern_correlation))
290 .with_target_value("correlation > 0.8")
291 .with_expected_improvement("Better temporal realism")
292 .with_config_path("transactions.temporal"),
293 );
294 }
295 }
296 }
297
298 fn analyze_coherence(
299 &self,
300 coherence: &crate::coherence::CoherenceEvaluation,
301 opportunities: &mut Vec<TuningOpportunity>,
302 ) {
303 if let Some(ref balance) = coherence.balance {
305 if !balance.equation_balanced {
306 opportunities.push(
307 TuningOpportunity::new(
308 TuningCategory::Coherence,
309 TuningPriority::Critical,
310 "Balance Sheet Imbalance",
311 "Assets do not equal Liabilities + Equity",
312 )
313 .with_current_value(format!("max imbalance: {}", balance.max_imbalance))
314 .with_target_value("imbalance = 0")
315 .with_expected_improvement("Valid trial balance")
316 .with_config_path("balance.coherence_enabled"),
317 );
318 }
319 }
320
321 if let Some(ref subledger) = coherence.subledger {
323 if subledger.completeness_score < 0.99 {
324 opportunities.push(
325 TuningOpportunity::new(
326 TuningCategory::Coherence,
327 TuningPriority::High,
328 "Subledger Reconciliation Issues",
329 "Subledger balances do not fully reconcile to GL control accounts",
330 )
331 .with_current_value(format!("{:.1}%", subledger.completeness_score * 100.0))
332 .with_target_value("> 99%")
333 .with_expected_improvement("Full GL-subledger reconciliation")
334 .with_config_path("subledger"),
335 );
336 }
337 }
338
339 if let Some(ref doc_chain) = coherence.document_chain {
341 let avg_completion =
342 (doc_chain.p2p_completion_rate + doc_chain.o2c_completion_rate) / 2.0;
343 if avg_completion < 0.90 {
344 opportunities.push(
345 TuningOpportunity::new(
346 TuningCategory::Coherence,
347 TuningPriority::Medium,
348 "Incomplete Document Chains",
349 "Many document flows do not complete to payment/receipt",
350 )
351 .with_current_value(format!(
352 "P2P: {:.1}%, O2C: {:.1}%",
353 doc_chain.p2p_completion_rate * 100.0,
354 doc_chain.o2c_completion_rate * 100.0
355 ))
356 .with_target_value("> 90%")
357 .with_expected_improvement("More complete P2P/O2C flows")
358 .with_config_path("document_flows"),
359 );
360 }
361 }
362
363 if let Some(ref ic) = coherence.intercompany {
365 if ic.match_rate < 0.95 {
366 opportunities.push(
367 TuningOpportunity::new(
368 TuningCategory::Coherence,
369 TuningPriority::High,
370 "Intercompany Matching Issues",
371 "Intercompany transactions are not fully matched",
372 )
373 .with_current_value(format!("{:.1}%", ic.match_rate * 100.0))
374 .with_target_value("> 95%")
375 .with_expected_improvement("Clean IC reconciliation")
376 .with_config_path("intercompany"),
377 );
378 }
379 }
380 }
381
382 fn analyze_quality(
383 &self,
384 quality: &crate::quality::QualityEvaluation,
385 opportunities: &mut Vec<TuningOpportunity>,
386 ) {
387 if let Some(ref uniqueness) = quality.uniqueness {
389 if uniqueness.duplicate_rate > 0.01 {
390 opportunities.push(
391 TuningOpportunity::new(
392 TuningCategory::Quality,
393 TuningPriority::High,
394 "High Duplicate Rate",
395 "Excessive duplicate records detected",
396 )
397 .with_current_value(format!("{:.2}%", uniqueness.duplicate_rate * 100.0))
398 .with_target_value("< 1%")
399 .with_expected_improvement("Cleaner unique data")
400 .with_config_path("data_quality.duplicate_rate"),
401 );
402 }
403 }
404
405 if let Some(ref completeness) = quality.completeness {
407 if completeness.overall_completeness < 0.95 {
408 opportunities.push(
409 TuningOpportunity::new(
410 TuningCategory::Quality,
411 TuningPriority::Medium,
412 "Low Data Completeness",
413 "Many fields have missing values",
414 )
415 .with_current_value(format!(
416 "{:.1}%",
417 completeness.overall_completeness * 100.0
418 ))
419 .with_target_value("> 95%")
420 .with_expected_improvement("More complete records")
421 .with_config_path("data_quality.missing_rate"),
422 );
423 }
424 }
425
426 if let Some(ref format) = quality.format {
428 if format.consistency_score < 0.99 {
429 opportunities.push(
430 TuningOpportunity::new(
431 TuningCategory::Quality,
432 TuningPriority::Low,
433 "Format Inconsistencies",
434 "Some fields have inconsistent formats",
435 )
436 .with_current_value(format!("{:.1}%", format.consistency_score * 100.0))
437 .with_target_value("> 99%")
438 .with_expected_improvement("Consistent field formats")
439 .with_config_path("data_quality.format_variations"),
440 );
441 }
442 }
443 }
444
445 fn analyze_ml_readiness(
446 &self,
447 ml: &crate::ml::MLReadinessEvaluation,
448 opportunities: &mut Vec<TuningOpportunity>,
449 ) {
450 if let Some(ref labels) = ml.labels {
452 if labels.anomaly_rate < 0.01 {
454 opportunities.push(
455 TuningOpportunity::new(
456 TuningCategory::MLReadiness,
457 TuningPriority::High,
458 "Low Anomaly Rate",
459 "Too few anomalies for effective ML training",
460 )
461 .with_current_value(format!("{:.2}%", labels.anomaly_rate * 100.0))
462 .with_target_value("1-20%")
463 .with_expected_improvement("Better ML model training")
464 .with_config_path("anomaly_injection.base_rate"),
465 );
466 } else if labels.anomaly_rate > 0.20 {
467 opportunities.push(
468 TuningOpportunity::new(
469 TuningCategory::MLReadiness,
470 TuningPriority::Medium,
471 "High Anomaly Rate",
472 "Too many anomalies may reduce model effectiveness",
473 )
474 .with_current_value(format!("{:.1}%", labels.anomaly_rate * 100.0))
475 .with_target_value("1-20%")
476 .with_expected_improvement("Realistic anomaly distribution")
477 .with_config_path("anomaly_injection.base_rate"),
478 );
479 }
480
481 if labels.label_coverage < 0.99 {
483 opportunities.push(
484 TuningOpportunity::new(
485 TuningCategory::MLReadiness,
486 TuningPriority::High,
487 "Low Label Coverage",
488 "Not all records have proper labels",
489 )
490 .with_current_value(format!("{:.1}%", labels.label_coverage * 100.0))
491 .with_target_value("> 99%")
492 .with_expected_improvement("Complete supervised labels")
493 .with_config_path("anomaly_injection"),
494 );
495 }
496 }
497
498 if let Some(ref splits) = ml.splits {
500 if !splits.is_valid {
501 opportunities.push(
502 TuningOpportunity::new(
503 TuningCategory::MLReadiness,
504 TuningPriority::High,
505 "Invalid Train/Test Splits",
506 "Train/validation/test splits have issues",
507 )
508 .with_expected_improvement("Valid ML evaluation setup")
509 .with_config_path("graph_export.train_ratio")
510 .with_config_path("graph_export.validation_ratio"),
511 );
512 }
513 }
514
515 if let Some(ref graph) = ml.graph {
517 if graph.connectivity_score < 0.95 {
518 opportunities.push(
519 TuningOpportunity::new(
520 TuningCategory::MLReadiness,
521 TuningPriority::Medium,
522 "Low Graph Connectivity",
523 "Transaction graph has isolated components",
524 )
525 .with_current_value(format!("{:.1}%", graph.connectivity_score * 100.0))
526 .with_target_value("> 95%")
527 .with_expected_improvement("Better GNN training")
528 .with_config_path("graph_export"),
529 );
530 }
531 }
532 }
533}
534
535impl Default for TuningAnalyzer {
536 fn default() -> Self {
537 Self::new()
538 }
539}
540
541pub struct ConfigSuggestionGenerator {
543 templates: HashMap<String, SuggestionTemplate>,
545}
546
547#[derive(Clone)]
548struct SuggestionTemplate {
549 default_value: String,
550 description: String,
551 auto_fixable: bool,
552}
553
554impl ConfigSuggestionGenerator {
555 pub fn new() -> Self {
557 let mut templates = HashMap::new();
558
559 templates.insert(
561 "transactions.amount.benford_compliance".to_string(),
562 SuggestionTemplate {
563 default_value: "true".to_string(),
564 description: "Enable Benford's Law compliance for amount generation".to_string(),
565 auto_fixable: true,
566 },
567 );
568
569 templates.insert(
570 "transactions.amount.round_number_bias".to_string(),
571 SuggestionTemplate {
572 default_value: "0.10".to_string(),
573 description: "Increase round number occurrence rate".to_string(),
574 auto_fixable: true,
575 },
576 );
577
578 templates.insert(
579 "anomaly_injection.base_rate".to_string(),
580 SuggestionTemplate {
581 default_value: "0.05".to_string(),
582 description: "Adjust anomaly injection rate".to_string(),
583 auto_fixable: true,
584 },
585 );
586
587 Self { templates }
588 }
589
590 pub fn generate(&self, opportunities: &[TuningOpportunity]) -> Vec<ConfigSuggestion> {
592 let mut suggestions = Vec::new();
593
594 for opportunity in opportunities {
595 for path in &opportunity.config_paths {
596 if let Some(template) = self.templates.get(path) {
597 let current = opportunity.current_value.clone().unwrap_or_default();
598 let suggested = opportunity
599 .target_value
600 .clone()
601 .unwrap_or_else(|| template.default_value.clone());
602
603 let mut suggestion = ConfigSuggestion::new(
604 path.clone(),
605 current,
606 suggested,
607 template.description.clone(),
608 );
609
610 let confidence = match opportunity.priority {
612 TuningPriority::Critical => 0.95,
613 TuningPriority::High => 0.85,
614 TuningPriority::Medium => 0.70,
615 TuningPriority::Low => 0.50,
616 TuningPriority::Info => 0.30,
617 };
618
619 suggestion = suggestion.with_confidence(confidence);
620
621 if template.auto_fixable {
622 suggestion = suggestion.auto_fixable();
623 }
624
625 suggestions.push(suggestion);
626 }
627 }
628 }
629
630 suggestions
631 }
632
633 pub fn add_template(
635 &mut self,
636 path: impl Into<String>,
637 default_value: impl Into<String>,
638 description: impl Into<String>,
639 auto_fixable: bool,
640 ) {
641 self.templates.insert(
642 path.into(),
643 SuggestionTemplate {
644 default_value: default_value.into(),
645 description: description.into(),
646 auto_fixable,
647 },
648 );
649 }
650}
651
652impl Default for ConfigSuggestionGenerator {
653 fn default() -> Self {
654 Self::new()
655 }
656}
657
658#[cfg(test)]
659mod tests {
660 use super::*;
661
662 #[test]
663 fn test_tuning_opportunity_creation() {
664 let opportunity = TuningOpportunity::new(
665 TuningCategory::Statistical,
666 TuningPriority::High,
667 "Test Opportunity",
668 "Test description",
669 )
670 .with_current_value("0.01")
671 .with_target_value("0.05")
672 .with_expected_improvement("Better results")
673 .with_config_path("test.path");
674
675 assert_eq!(opportunity.category, TuningCategory::Statistical);
676 assert_eq!(opportunity.priority, TuningPriority::High);
677 assert_eq!(opportunity.current_value, Some("0.01".to_string()));
678 assert_eq!(opportunity.config_paths.len(), 1);
679 }
680
681 #[test]
682 fn test_config_suggestion_creation() {
683 let suggestion =
684 ConfigSuggestion::new("test.path", "old_value", "new_value", "Test reason")
685 .with_confidence(0.8)
686 .auto_fixable();
687
688 assert_eq!(suggestion.path, "test.path");
689 assert_eq!(suggestion.confidence, 0.8);
690 assert!(suggestion.auto_fixable);
691 }
692
693 #[test]
694 fn test_tuning_analyzer_default() {
695 let analyzer = TuningAnalyzer::default();
696 assert!(analyzer.include_low_priority);
697 }
698
699 #[test]
700 fn test_suggestion_generator() {
701 let generator = ConfigSuggestionGenerator::new();
702 assert!(generator
703 .templates
704 .contains_key("anomaly_injection.base_rate"));
705 }
706}