datasynth_eval/tuning/
mod.rs

1//! Configuration tuning and optimization suggestions.
2//!
3//! Analyzes evaluation results to identify tuning opportunities
4//! and generate actionable configuration suggestions.
5
6use crate::ComprehensiveEvaluation;
7use serde::{Deserialize, Serialize};
8use std::collections::HashMap;
9
10/// Category of tuning opportunity.
11#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
12pub enum TuningCategory {
13    /// Statistical distribution tuning (Benford's, amount distributions).
14    Statistical,
15    /// Balance and coherence tuning.
16    Coherence,
17    /// Data quality tuning (completeness, uniqueness).
18    Quality,
19    /// ML-readiness tuning (labels, splits, features).
20    MLReadiness,
21    /// Performance optimization.
22    Performance,
23    /// Anomaly injection tuning.
24    Anomaly,
25}
26
27/// Priority level for tuning recommendations.
28#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
29pub enum TuningPriority {
30    /// Critical issue that needs immediate attention.
31    Critical,
32    /// High priority improvement.
33    High,
34    /// Medium priority enhancement.
35    Medium,
36    /// Low priority fine-tuning.
37    Low,
38    /// Informational suggestion.
39    Info,
40}
41
42/// A tuning opportunity identified from evaluation results.
43#[derive(Debug, Clone, Serialize, Deserialize)]
44pub struct TuningOpportunity {
45    /// Category of the tuning opportunity.
46    pub category: TuningCategory,
47    /// Priority level.
48    pub priority: TuningPriority,
49    /// Short title.
50    pub title: String,
51    /// Detailed description.
52    pub description: String,
53    /// Current value or state.
54    pub current_value: Option<String>,
55    /// Recommended target value or state.
56    pub target_value: Option<String>,
57    /// Expected improvement description.
58    pub expected_improvement: String,
59    /// Related configuration path(s).
60    pub config_paths: Vec<String>,
61}
62
63impl TuningOpportunity {
64    /// Create a new tuning opportunity.
65    pub fn new(
66        category: TuningCategory,
67        priority: TuningPriority,
68        title: impl Into<String>,
69        description: impl Into<String>,
70    ) -> Self {
71        Self {
72            category,
73            priority,
74            title: title.into(),
75            description: description.into(),
76            current_value: None,
77            target_value: None,
78            expected_improvement: String::new(),
79            config_paths: Vec::new(),
80        }
81    }
82
83    /// Set current value.
84    pub fn with_current_value(mut self, value: impl Into<String>) -> Self {
85        self.current_value = Some(value.into());
86        self
87    }
88
89    /// Set target value.
90    pub fn with_target_value(mut self, value: impl Into<String>) -> Self {
91        self.target_value = Some(value.into());
92        self
93    }
94
95    /// Set expected improvement.
96    pub fn with_expected_improvement(mut self, improvement: impl Into<String>) -> Self {
97        self.expected_improvement = improvement.into();
98        self
99    }
100
101    /// Add related config path.
102    pub fn with_config_path(mut self, path: impl Into<String>) -> Self {
103        self.config_paths.push(path.into());
104        self
105    }
106}
107
108/// A specific configuration change suggestion.
109#[derive(Debug, Clone, Serialize, Deserialize)]
110pub struct ConfigSuggestion {
111    /// Configuration path (e.g., "transactions.amount.round_number_bias").
112    pub path: String,
113    /// Current value (as string representation).
114    pub current_value: String,
115    /// Suggested new value.
116    pub suggested_value: String,
117    /// Reason for the suggestion.
118    pub reason: String,
119    /// Confidence level (0.0-1.0).
120    pub confidence: f64,
121    /// Whether this is an automatic fix.
122    pub auto_fixable: bool,
123}
124
125impl ConfigSuggestion {
126    /// Create a new config suggestion.
127    pub fn new(
128        path: impl Into<String>,
129        current_value: impl Into<String>,
130        suggested_value: impl Into<String>,
131        reason: impl Into<String>,
132    ) -> Self {
133        Self {
134            path: path.into(),
135            current_value: current_value.into(),
136            suggested_value: suggested_value.into(),
137            reason: reason.into(),
138            confidence: 0.5,
139            auto_fixable: false,
140        }
141    }
142
143    /// Set confidence level.
144    pub fn with_confidence(mut self, confidence: f64) -> Self {
145        self.confidence = confidence.clamp(0.0, 1.0);
146        self
147    }
148
149    /// Mark as auto-fixable.
150    pub fn auto_fixable(mut self) -> Self {
151        self.auto_fixable = true;
152        self
153    }
154}
155
156/// Analyzes evaluation results to identify tuning opportunities.
157pub struct TuningAnalyzer {
158    /// Minimum threshold gap to trigger a suggestion (as fraction).
159    min_gap_fraction: f64,
160    /// Whether to include low-priority suggestions.
161    include_low_priority: bool,
162}
163
164impl TuningAnalyzer {
165    /// Create a new tuning analyzer.
166    pub fn new() -> Self {
167        Self {
168            min_gap_fraction: 0.05,
169            include_low_priority: true,
170        }
171    }
172
173    /// Set minimum gap fraction to trigger suggestions.
174    pub fn with_min_gap(mut self, gap: f64) -> Self {
175        self.min_gap_fraction = gap;
176        self
177    }
178
179    /// Set whether to include low-priority suggestions.
180    pub fn with_low_priority(mut self, include: bool) -> Self {
181        self.include_low_priority = include;
182        self
183    }
184
185    /// Analyze evaluation results and return tuning opportunities.
186    pub fn analyze(&self, evaluation: &ComprehensiveEvaluation) -> Vec<TuningOpportunity> {
187        let mut opportunities = Vec::new();
188
189        // Analyze statistical issues
190        self.analyze_statistical(&evaluation.statistical, &mut opportunities);
191
192        // Analyze coherence issues
193        self.analyze_coherence(&evaluation.coherence, &mut opportunities);
194
195        // Analyze quality issues
196        self.analyze_quality(&evaluation.quality, &mut opportunities);
197
198        // Analyze ML-readiness issues
199        self.analyze_ml_readiness(&evaluation.ml_readiness, &mut opportunities);
200
201        // Filter by priority if needed
202        if !self.include_low_priority {
203            opportunities.retain(|o| {
204                o.priority != TuningPriority::Low && o.priority != TuningPriority::Info
205            });
206        }
207
208        // Sort by priority
209        opportunities.sort_by(|a, b| a.priority.cmp(&b.priority));
210
211        opportunities
212    }
213
214    fn analyze_statistical(
215        &self,
216        stat: &crate::statistical::StatisticalEvaluation,
217        opportunities: &mut Vec<TuningOpportunity>,
218    ) {
219        // Check Benford's Law conformity
220        if let Some(ref benford) = stat.benford {
221            if benford.p_value < 0.05 {
222                let priority = if benford.p_value < 0.01 {
223                    TuningPriority::High
224                } else {
225                    TuningPriority::Medium
226                };
227
228                opportunities.push(
229                    TuningOpportunity::new(
230                        TuningCategory::Statistical,
231                        priority,
232                        "Benford's Law Non-Conformance",
233                        "Generated amounts do not follow Benford's Law distribution",
234                    )
235                    .with_current_value(format!("p-value: {:.4}", benford.p_value))
236                    .with_target_value("p-value > 0.05")
237                    .with_expected_improvement("Better statistical realism")
238                    .with_config_path("transactions.amount.benford_compliance"),
239                );
240            }
241        }
242
243        // Check amount distribution
244        if let Some(ref amount) = stat.amount_distribution {
245            if let Some(p_value) = amount.lognormal_ks_pvalue {
246                if p_value < 0.05 {
247                    opportunities.push(
248                        TuningOpportunity::new(
249                            TuningCategory::Statistical,
250                            TuningPriority::Medium,
251                            "Amount Distribution Mismatch",
252                            "Amount distribution does not match expected log-normal pattern",
253                        )
254                        .with_current_value(format!("KS p-value: {:.4}", p_value))
255                        .with_target_value("KS p-value > 0.05")
256                        .with_expected_improvement("More realistic amount patterns")
257                        .with_config_path("transactions.amount.distribution"),
258                    );
259                }
260            }
261
262            // Check round number bias
263            if amount.round_number_ratio < 0.05 {
264                opportunities.push(
265                    TuningOpportunity::new(
266                        TuningCategory::Statistical,
267                        TuningPriority::Low,
268                        "Low Round Number Bias",
269                        "Round number occurrence is lower than typically seen in real data",
270                    )
271                    .with_current_value(format!("{:.1}%", amount.round_number_ratio * 100.0))
272                    .with_target_value("5-15%")
273                    .with_expected_improvement("More natural-looking amounts")
274                    .with_config_path("transactions.amount.round_number_bias"),
275                );
276            }
277        }
278
279        // Check temporal patterns
280        if let Some(ref temporal) = stat.temporal {
281            if temporal.pattern_correlation < 0.6 {
282                opportunities.push(
283                    TuningOpportunity::new(
284                        TuningCategory::Statistical,
285                        TuningPriority::Medium,
286                        "Weak Temporal Patterns",
287                        "Generated data lacks strong temporal patterns",
288                    )
289                    .with_current_value(format!("correlation: {:.3}", temporal.pattern_correlation))
290                    .with_target_value("correlation > 0.8")
291                    .with_expected_improvement("Better temporal realism")
292                    .with_config_path("transactions.temporal"),
293                );
294            }
295        }
296    }
297
298    fn analyze_coherence(
299        &self,
300        coherence: &crate::coherence::CoherenceEvaluation,
301        opportunities: &mut Vec<TuningOpportunity>,
302    ) {
303        // Check balance sheet
304        if let Some(ref balance) = coherence.balance {
305            if !balance.equation_balanced {
306                opportunities.push(
307                    TuningOpportunity::new(
308                        TuningCategory::Coherence,
309                        TuningPriority::Critical,
310                        "Balance Sheet Imbalance",
311                        "Assets do not equal Liabilities + Equity",
312                    )
313                    .with_current_value(format!("max imbalance: {}", balance.max_imbalance))
314                    .with_target_value("imbalance = 0")
315                    .with_expected_improvement("Valid trial balance")
316                    .with_config_path("balance.coherence_enabled"),
317                );
318            }
319        }
320
321        // Check subledger reconciliation
322        if let Some(ref subledger) = coherence.subledger {
323            if subledger.completeness_score < 0.99 {
324                opportunities.push(
325                    TuningOpportunity::new(
326                        TuningCategory::Coherence,
327                        TuningPriority::High,
328                        "Subledger Reconciliation Issues",
329                        "Subledger balances do not fully reconcile to GL control accounts",
330                    )
331                    .with_current_value(format!("{:.1}%", subledger.completeness_score * 100.0))
332                    .with_target_value("> 99%")
333                    .with_expected_improvement("Full GL-subledger reconciliation")
334                    .with_config_path("subledger"),
335                );
336            }
337        }
338
339        // Check document chains
340        if let Some(ref doc_chain) = coherence.document_chain {
341            let avg_completion =
342                (doc_chain.p2p_completion_rate + doc_chain.o2c_completion_rate) / 2.0;
343            if avg_completion < 0.90 {
344                opportunities.push(
345                    TuningOpportunity::new(
346                        TuningCategory::Coherence,
347                        TuningPriority::Medium,
348                        "Incomplete Document Chains",
349                        "Many document flows do not complete to payment/receipt",
350                    )
351                    .with_current_value(format!(
352                        "P2P: {:.1}%, O2C: {:.1}%",
353                        doc_chain.p2p_completion_rate * 100.0,
354                        doc_chain.o2c_completion_rate * 100.0
355                    ))
356                    .with_target_value("> 90%")
357                    .with_expected_improvement("More complete P2P/O2C flows")
358                    .with_config_path("document_flows"),
359                );
360            }
361        }
362
363        // Check IC matching
364        if let Some(ref ic) = coherence.intercompany {
365            if ic.match_rate < 0.95 {
366                opportunities.push(
367                    TuningOpportunity::new(
368                        TuningCategory::Coherence,
369                        TuningPriority::High,
370                        "Intercompany Matching Issues",
371                        "Intercompany transactions are not fully matched",
372                    )
373                    .with_current_value(format!("{:.1}%", ic.match_rate * 100.0))
374                    .with_target_value("> 95%")
375                    .with_expected_improvement("Clean IC reconciliation")
376                    .with_config_path("intercompany"),
377                );
378            }
379        }
380    }
381
382    fn analyze_quality(
383        &self,
384        quality: &crate::quality::QualityEvaluation,
385        opportunities: &mut Vec<TuningOpportunity>,
386    ) {
387        // Check uniqueness
388        if let Some(ref uniqueness) = quality.uniqueness {
389            if uniqueness.duplicate_rate > 0.01 {
390                opportunities.push(
391                    TuningOpportunity::new(
392                        TuningCategory::Quality,
393                        TuningPriority::High,
394                        "High Duplicate Rate",
395                        "Excessive duplicate records detected",
396                    )
397                    .with_current_value(format!("{:.2}%", uniqueness.duplicate_rate * 100.0))
398                    .with_target_value("< 1%")
399                    .with_expected_improvement("Cleaner unique data")
400                    .with_config_path("data_quality.duplicate_rate"),
401                );
402            }
403        }
404
405        // Check completeness
406        if let Some(ref completeness) = quality.completeness {
407            if completeness.overall_completeness < 0.95 {
408                opportunities.push(
409                    TuningOpportunity::new(
410                        TuningCategory::Quality,
411                        TuningPriority::Medium,
412                        "Low Data Completeness",
413                        "Many fields have missing values",
414                    )
415                    .with_current_value(format!(
416                        "{:.1}%",
417                        completeness.overall_completeness * 100.0
418                    ))
419                    .with_target_value("> 95%")
420                    .with_expected_improvement("More complete records")
421                    .with_config_path("data_quality.missing_rate"),
422                );
423            }
424        }
425
426        // Check format consistency
427        if let Some(ref format) = quality.format {
428            if format.consistency_score < 0.99 {
429                opportunities.push(
430                    TuningOpportunity::new(
431                        TuningCategory::Quality,
432                        TuningPriority::Low,
433                        "Format Inconsistencies",
434                        "Some fields have inconsistent formats",
435                    )
436                    .with_current_value(format!("{:.1}%", format.consistency_score * 100.0))
437                    .with_target_value("> 99%")
438                    .with_expected_improvement("Consistent field formats")
439                    .with_config_path("data_quality.format_variations"),
440                );
441            }
442        }
443    }
444
445    fn analyze_ml_readiness(
446        &self,
447        ml: &crate::ml::MLReadinessEvaluation,
448        opportunities: &mut Vec<TuningOpportunity>,
449    ) {
450        // Check labels
451        if let Some(ref labels) = ml.labels {
452            // Check anomaly rate bounds
453            if labels.anomaly_rate < 0.01 {
454                opportunities.push(
455                    TuningOpportunity::new(
456                        TuningCategory::MLReadiness,
457                        TuningPriority::High,
458                        "Low Anomaly Rate",
459                        "Too few anomalies for effective ML training",
460                    )
461                    .with_current_value(format!("{:.2}%", labels.anomaly_rate * 100.0))
462                    .with_target_value("1-20%")
463                    .with_expected_improvement("Better ML model training")
464                    .with_config_path("anomaly_injection.base_rate"),
465                );
466            } else if labels.anomaly_rate > 0.20 {
467                opportunities.push(
468                    TuningOpportunity::new(
469                        TuningCategory::MLReadiness,
470                        TuningPriority::Medium,
471                        "High Anomaly Rate",
472                        "Too many anomalies may reduce model effectiveness",
473                    )
474                    .with_current_value(format!("{:.1}%", labels.anomaly_rate * 100.0))
475                    .with_target_value("1-20%")
476                    .with_expected_improvement("Realistic anomaly distribution")
477                    .with_config_path("anomaly_injection.base_rate"),
478                );
479            }
480
481            // Check label coverage
482            if labels.label_coverage < 0.99 {
483                opportunities.push(
484                    TuningOpportunity::new(
485                        TuningCategory::MLReadiness,
486                        TuningPriority::High,
487                        "Low Label Coverage",
488                        "Not all records have proper labels",
489                    )
490                    .with_current_value(format!("{:.1}%", labels.label_coverage * 100.0))
491                    .with_target_value("> 99%")
492                    .with_expected_improvement("Complete supervised labels")
493                    .with_config_path("anomaly_injection"),
494                );
495            }
496        }
497
498        // Check splits
499        if let Some(ref splits) = ml.splits {
500            if !splits.is_valid {
501                opportunities.push(
502                    TuningOpportunity::new(
503                        TuningCategory::MLReadiness,
504                        TuningPriority::High,
505                        "Invalid Train/Test Splits",
506                        "Train/validation/test splits have issues",
507                    )
508                    .with_expected_improvement("Valid ML evaluation setup")
509                    .with_config_path("graph_export.train_ratio")
510                    .with_config_path("graph_export.validation_ratio"),
511                );
512            }
513        }
514
515        // Check graph structure
516        if let Some(ref graph) = ml.graph {
517            if graph.connectivity_score < 0.95 {
518                opportunities.push(
519                    TuningOpportunity::new(
520                        TuningCategory::MLReadiness,
521                        TuningPriority::Medium,
522                        "Low Graph Connectivity",
523                        "Transaction graph has isolated components",
524                    )
525                    .with_current_value(format!("{:.1}%", graph.connectivity_score * 100.0))
526                    .with_target_value("> 95%")
527                    .with_expected_improvement("Better GNN training")
528                    .with_config_path("graph_export"),
529                );
530            }
531        }
532    }
533}
534
535impl Default for TuningAnalyzer {
536    fn default() -> Self {
537        Self::new()
538    }
539}
540
541/// Generates configuration suggestions from tuning opportunities.
542pub struct ConfigSuggestionGenerator {
543    /// Template suggestions by config path.
544    templates: HashMap<String, SuggestionTemplate>,
545}
546
547#[derive(Clone)]
548struct SuggestionTemplate {
549    default_value: String,
550    description: String,
551    auto_fixable: bool,
552}
553
554impl ConfigSuggestionGenerator {
555    /// Create a new suggestion generator.
556    pub fn new() -> Self {
557        let mut templates = HashMap::new();
558
559        // Add common templates
560        templates.insert(
561            "transactions.amount.benford_compliance".to_string(),
562            SuggestionTemplate {
563                default_value: "true".to_string(),
564                description: "Enable Benford's Law compliance for amount generation".to_string(),
565                auto_fixable: true,
566            },
567        );
568
569        templates.insert(
570            "transactions.amount.round_number_bias".to_string(),
571            SuggestionTemplate {
572                default_value: "0.10".to_string(),
573                description: "Increase round number occurrence rate".to_string(),
574                auto_fixable: true,
575            },
576        );
577
578        templates.insert(
579            "anomaly_injection.base_rate".to_string(),
580            SuggestionTemplate {
581                default_value: "0.05".to_string(),
582                description: "Adjust anomaly injection rate".to_string(),
583                auto_fixable: true,
584            },
585        );
586
587        Self { templates }
588    }
589
590    /// Generate config suggestions from tuning opportunities.
591    pub fn generate(&self, opportunities: &[TuningOpportunity]) -> Vec<ConfigSuggestion> {
592        let mut suggestions = Vec::new();
593
594        for opportunity in opportunities {
595            for path in &opportunity.config_paths {
596                if let Some(template) = self.templates.get(path) {
597                    let current = opportunity.current_value.clone().unwrap_or_default();
598                    let suggested = opportunity
599                        .target_value
600                        .clone()
601                        .unwrap_or_else(|| template.default_value.clone());
602
603                    let mut suggestion = ConfigSuggestion::new(
604                        path.clone(),
605                        current,
606                        suggested,
607                        template.description.clone(),
608                    );
609
610                    // Set confidence based on priority
611                    let confidence = match opportunity.priority {
612                        TuningPriority::Critical => 0.95,
613                        TuningPriority::High => 0.85,
614                        TuningPriority::Medium => 0.70,
615                        TuningPriority::Low => 0.50,
616                        TuningPriority::Info => 0.30,
617                    };
618
619                    suggestion = suggestion.with_confidence(confidence);
620
621                    if template.auto_fixable {
622                        suggestion = suggestion.auto_fixable();
623                    }
624
625                    suggestions.push(suggestion);
626                }
627            }
628        }
629
630        suggestions
631    }
632
633    /// Add a custom template.
634    pub fn add_template(
635        &mut self,
636        path: impl Into<String>,
637        default_value: impl Into<String>,
638        description: impl Into<String>,
639        auto_fixable: bool,
640    ) {
641        self.templates.insert(
642            path.into(),
643            SuggestionTemplate {
644                default_value: default_value.into(),
645                description: description.into(),
646                auto_fixable,
647            },
648        );
649    }
650}
651
652impl Default for ConfigSuggestionGenerator {
653    fn default() -> Self {
654        Self::new()
655    }
656}
657
658#[cfg(test)]
659mod tests {
660    use super::*;
661
662    #[test]
663    fn test_tuning_opportunity_creation() {
664        let opportunity = TuningOpportunity::new(
665            TuningCategory::Statistical,
666            TuningPriority::High,
667            "Test Opportunity",
668            "Test description",
669        )
670        .with_current_value("0.01")
671        .with_target_value("0.05")
672        .with_expected_improvement("Better results")
673        .with_config_path("test.path");
674
675        assert_eq!(opportunity.category, TuningCategory::Statistical);
676        assert_eq!(opportunity.priority, TuningPriority::High);
677        assert_eq!(opportunity.current_value, Some("0.01".to_string()));
678        assert_eq!(opportunity.config_paths.len(), 1);
679    }
680
681    #[test]
682    fn test_config_suggestion_creation() {
683        let suggestion =
684            ConfigSuggestion::new("test.path", "old_value", "new_value", "Test reason")
685                .with_confidence(0.8)
686                .auto_fixable();
687
688        assert_eq!(suggestion.path, "test.path");
689        assert_eq!(suggestion.confidence, 0.8);
690        assert!(suggestion.auto_fixable);
691    }
692
693    #[test]
694    fn test_tuning_analyzer_default() {
695        let analyzer = TuningAnalyzer::default();
696        assert!(analyzer.include_low_priority);
697    }
698
699    #[test]
700    fn test_suggestion_generator() {
701        let generator = ConfigSuggestionGenerator::new();
702        assert!(generator
703            .templates
704            .contains_key("anomaly_injection.base_rate"));
705    }
706}
datasynth_eval/tuning/mod.rs

datasynth_eval/tuning/
mod.rs