datasynth_eval/enhancement/
auto_tuner.rs

1//! Auto-tuning engine for deriving optimal configuration from evaluation results.
2//!
3//! The AutoTuner analyzes evaluation results to identify metric gaps and
4//! computes suggested configuration values that should improve those metrics.
5
6use crate::{ComprehensiveEvaluation, EvaluationThresholds};
7use serde::{Deserialize, Serialize};
8use std::collections::HashMap;
9
10/// A configuration patch representing a change to apply.
11#[derive(Debug, Clone, Serialize, Deserialize)]
12pub struct ConfigPatch {
13    /// Configuration path (dot-separated).
14    pub path: String,
15    /// The current value (if known).
16    pub current_value: Option<String>,
17    /// The suggested new value.
18    pub suggested_value: String,
19    /// Confidence level (0.0-1.0) that this change will help.
20    pub confidence: f64,
21    /// Expected improvement description.
22    pub expected_impact: String,
23}
24
25impl ConfigPatch {
26    /// Create a new config patch.
27    pub fn new(path: impl Into<String>, suggested_value: impl Into<String>) -> Self {
28        Self {
29            path: path.into(),
30            current_value: None,
31            suggested_value: suggested_value.into(),
32            confidence: 0.5,
33            expected_impact: String::new(),
34        }
35    }
36
37    /// Set the current value.
38    pub fn with_current(mut self, value: impl Into<String>) -> Self {
39        self.current_value = Some(value.into());
40        self
41    }
42
43    /// Set the confidence level.
44    pub fn with_confidence(mut self, confidence: f64) -> Self {
45        self.confidence = confidence.clamp(0.0, 1.0);
46        self
47    }
48
49    /// Set the expected impact.
50    pub fn with_impact(mut self, impact: impl Into<String>) -> Self {
51        self.expected_impact = impact.into();
52        self
53    }
54}
55
56/// Result of auto-tuning analysis.
57#[derive(Debug, Clone, Serialize, Deserialize)]
58pub struct AutoTuneResult {
59    /// Configuration patches to apply.
60    pub patches: Vec<ConfigPatch>,
61    /// Overall improvement score (0.0-1.0).
62    pub expected_improvement: f64,
63    /// Metrics that will be addressed.
64    pub addressed_metrics: Vec<String>,
65    /// Metrics that cannot be automatically fixed.
66    pub unaddressable_metrics: Vec<String>,
67    /// Summary message.
68    pub summary: String,
69}
70
71impl AutoTuneResult {
72    /// Create a new empty result.
73    pub fn new() -> Self {
74        Self {
75            patches: Vec::new(),
76            expected_improvement: 0.0,
77            addressed_metrics: Vec::new(),
78            unaddressable_metrics: Vec::new(),
79            summary: String::new(),
80        }
81    }
82
83    /// Check if any patches are suggested.
84    pub fn has_patches(&self) -> bool {
85        !self.patches.is_empty()
86    }
87
88    /// Get patches sorted by confidence (highest first).
89    pub fn patches_by_confidence(&self) -> Vec<&ConfigPatch> {
90        let mut sorted: Vec<_> = self.patches.iter().collect();
91        sorted.sort_by(|a, b| {
92            b.confidence
93                .partial_cmp(&a.confidence)
94                .unwrap_or(std::cmp::Ordering::Equal)
95        });
96        sorted
97    }
98}
99
100impl Default for AutoTuneResult {
101    fn default() -> Self {
102        Self::new()
103    }
104}
105
106/// Metric gap analysis result.
107#[derive(Debug, Clone)]
108pub struct MetricGap {
109    /// Name of the metric.
110    pub metric_name: String,
111    /// Current value.
112    pub current_value: f64,
113    /// Target threshold value.
114    pub target_value: f64,
115    /// Gap (target - current for min thresholds, current - target for max).
116    pub gap: f64,
117    /// Whether this is a minimum threshold (true) or maximum (false).
118    pub is_minimum: bool,
119    /// Related configuration paths.
120    pub config_paths: Vec<String>,
121}
122
123impl MetricGap {
124    /// Calculate the severity of the gap (0.0-1.0).
125    pub fn severity(&self) -> f64 {
126        if self.target_value == 0.0 {
127            if self.gap.abs() > 0.0 {
128                1.0
129            } else {
130                0.0
131            }
132        } else {
133            (self.gap.abs() / self.target_value.abs()).min(1.0)
134        }
135    }
136}
137
138/// Auto-tuner that derives optimal configuration from evaluation results.
139pub struct AutoTuner {
140    /// Thresholds to compare against.
141    thresholds: EvaluationThresholds,
142    /// Known metric-to-config mappings.
143    metric_mappings: HashMap<String, Vec<MetricConfigMapping>>,
144}
145
146/// Mapping from a metric to configuration paths that affect it.
147#[derive(Debug, Clone)]
148struct MetricConfigMapping {
149    /// Configuration path.
150    config_path: String,
151    /// How much influence this config has on the metric (0.0-1.0).
152    influence: f64,
153    /// Function to compute suggested value given the gap.
154    compute_value: ComputeStrategy,
155}
156
157/// Strategy for computing suggested config values.
158#[allow(dead_code)] // Variants reserved for future tuning strategies
159#[derive(Debug, Clone, Copy)]
160enum ComputeStrategy {
161    /// Enable a boolean flag.
162    EnableBoolean,
163    /// Set to a specific value.
164    SetFixed(f64),
165    /// Increase by the gap amount.
166    IncreaseByGap,
167    /// Decrease by the gap amount.
168    DecreaseByGap,
169    /// Set to target value directly.
170    SetToTarget,
171    /// Multiply current by factor based on gap.
172    MultiplyByGapFactor,
173}
174
175impl AutoTuner {
176    /// Create a new auto-tuner with default thresholds.
177    pub fn new() -> Self {
178        Self::with_thresholds(EvaluationThresholds::default())
179    }
180
181    /// Create an auto-tuner with specific thresholds.
182    pub fn with_thresholds(thresholds: EvaluationThresholds) -> Self {
183        let mut tuner = Self {
184            thresholds,
185            metric_mappings: HashMap::new(),
186        };
187        tuner.initialize_mappings();
188        tuner
189    }
190
191    /// Initialize known metric-to-config mappings.
192    fn initialize_mappings(&mut self) {
193        // Benford's Law
194        self.metric_mappings.insert(
195            "benford_p_value".to_string(),
196            vec![MetricConfigMapping {
197                config_path: "transactions.amount.benford_compliance".to_string(),
198                influence: 0.9,
199                compute_value: ComputeStrategy::EnableBoolean,
200            }],
201        );
202
203        // Round number bias
204        self.metric_mappings.insert(
205            "round_number_ratio".to_string(),
206            vec![MetricConfigMapping {
207                config_path: "transactions.amount.round_number_bias".to_string(),
208                influence: 0.95,
209                compute_value: ComputeStrategy::SetToTarget,
210            }],
211        );
212
213        // Temporal correlation
214        self.metric_mappings.insert(
215            "temporal_correlation".to_string(),
216            vec![MetricConfigMapping {
217                config_path: "transactions.temporal.seasonality_strength".to_string(),
218                influence: 0.7,
219                compute_value: ComputeStrategy::IncreaseByGap,
220            }],
221        );
222
223        // Anomaly rate
224        self.metric_mappings.insert(
225            "anomaly_rate".to_string(),
226            vec![MetricConfigMapping {
227                config_path: "anomaly_injection.base_rate".to_string(),
228                influence: 0.95,
229                compute_value: ComputeStrategy::SetToTarget,
230            }],
231        );
232
233        // Label coverage
234        self.metric_mappings.insert(
235            "label_coverage".to_string(),
236            vec![MetricConfigMapping {
237                config_path: "anomaly_injection.label_all".to_string(),
238                influence: 0.9,
239                compute_value: ComputeStrategy::EnableBoolean,
240            }],
241        );
242
243        // Duplicate rate
244        self.metric_mappings.insert(
245            "duplicate_rate".to_string(),
246            vec![MetricConfigMapping {
247                config_path: "data_quality.duplicates.exact_rate".to_string(),
248                influence: 0.8,
249                compute_value: ComputeStrategy::SetToTarget,
250            }],
251        );
252
253        // Completeness
254        self.metric_mappings.insert(
255            "completeness_rate".to_string(),
256            vec![MetricConfigMapping {
257                config_path: "data_quality.missing_values.overall_rate".to_string(),
258                influence: 0.9,
259                compute_value: ComputeStrategy::DecreaseByGap,
260            }],
261        );
262
263        // IC match rate
264        self.metric_mappings.insert(
265            "ic_match_rate".to_string(),
266            vec![MetricConfigMapping {
267                config_path: "intercompany.match_precision".to_string(),
268                influence: 0.85,
269                compute_value: ComputeStrategy::IncreaseByGap,
270            }],
271        );
272
273        // Document chain completion
274        self.metric_mappings.insert(
275            "doc_chain_completion".to_string(),
276            vec![
277                MetricConfigMapping {
278                    config_path: "document_flows.p2p.completion_rate".to_string(),
279                    influence: 0.5,
280                    compute_value: ComputeStrategy::SetToTarget,
281                },
282                MetricConfigMapping {
283                    config_path: "document_flows.o2c.completion_rate".to_string(),
284                    influence: 0.5,
285                    compute_value: ComputeStrategy::SetToTarget,
286                },
287            ],
288        );
289
290        // Graph connectivity
291        self.metric_mappings.insert(
292            "graph_connectivity".to_string(),
293            vec![MetricConfigMapping {
294                config_path: "graph_export.ensure_connected".to_string(),
295                influence: 0.8,
296                compute_value: ComputeStrategy::EnableBoolean,
297            }],
298        );
299    }
300
301    /// Analyze evaluation results and produce auto-tune suggestions.
302    pub fn analyze(&self, evaluation: &ComprehensiveEvaluation) -> AutoTuneResult {
303        let mut result = AutoTuneResult::new();
304
305        // Identify metric gaps
306        let gaps = self.identify_gaps(evaluation);
307
308        // Generate patches for each gap
309        for gap in gaps {
310            if let Some(mappings) = self.metric_mappings.get(&gap.metric_name) {
311                for mapping in mappings {
312                    if let Some(patch) = self.generate_patch(&gap, mapping) {
313                        result.patches.push(patch);
314                        if !result.addressed_metrics.contains(&gap.metric_name) {
315                            result.addressed_metrics.push(gap.metric_name.clone());
316                        }
317                    }
318                }
319            } else {
320                if !result.unaddressable_metrics.contains(&gap.metric_name) {
321                    result.unaddressable_metrics.push(gap.metric_name.clone());
322                }
323            }
324        }
325
326        // Calculate expected improvement
327        if !result.patches.is_empty() {
328            let avg_confidence: f64 = result.patches.iter().map(|p| p.confidence).sum::<f64>()
329                / result.patches.len() as f64;
330            result.expected_improvement = avg_confidence;
331        }
332
333        // Generate summary
334        result.summary = self.generate_summary(&result);
335
336        result
337    }
338
339    /// Identify gaps between current metrics and thresholds.
340    fn identify_gaps(&self, evaluation: &ComprehensiveEvaluation) -> Vec<MetricGap> {
341        let mut gaps = Vec::new();
342
343        // Check statistical metrics
344        if let Some(ref benford) = evaluation.statistical.benford {
345            if benford.p_value < self.thresholds.benford_p_value_min {
346                gaps.push(MetricGap {
347                    metric_name: "benford_p_value".to_string(),
348                    current_value: benford.p_value,
349                    target_value: self.thresholds.benford_p_value_min,
350                    gap: self.thresholds.benford_p_value_min - benford.p_value,
351                    is_minimum: true,
352                    config_paths: vec!["transactions.amount.benford_compliance".to_string()],
353                });
354            }
355        }
356
357        if let Some(ref amount) = evaluation.statistical.amount_distribution {
358            if amount.round_number_ratio < 0.05 {
359                gaps.push(MetricGap {
360                    metric_name: "round_number_ratio".to_string(),
361                    current_value: amount.round_number_ratio,
362                    target_value: 0.10, // Target 10%
363                    gap: 0.10 - amount.round_number_ratio,
364                    is_minimum: true,
365                    config_paths: vec!["transactions.amount.round_number_bias".to_string()],
366                });
367            }
368        }
369
370        if let Some(ref temporal) = evaluation.statistical.temporal {
371            if temporal.pattern_correlation < self.thresholds.temporal_correlation_min {
372                gaps.push(MetricGap {
373                    metric_name: "temporal_correlation".to_string(),
374                    current_value: temporal.pattern_correlation,
375                    target_value: self.thresholds.temporal_correlation_min,
376                    gap: self.thresholds.temporal_correlation_min - temporal.pattern_correlation,
377                    is_minimum: true,
378                    config_paths: vec!["transactions.temporal.seasonality_strength".to_string()],
379                });
380            }
381        }
382
383        // Check coherence metrics
384        if let Some(ref ic) = evaluation.coherence.intercompany {
385            if ic.match_rate < self.thresholds.ic_match_rate_min {
386                gaps.push(MetricGap {
387                    metric_name: "ic_match_rate".to_string(),
388                    current_value: ic.match_rate,
389                    target_value: self.thresholds.ic_match_rate_min,
390                    gap: self.thresholds.ic_match_rate_min - ic.match_rate,
391                    is_minimum: true,
392                    config_paths: vec!["intercompany.match_precision".to_string()],
393                });
394            }
395        }
396
397        if let Some(ref doc_chain) = evaluation.coherence.document_chain {
398            let avg_completion =
399                (doc_chain.p2p_completion_rate + doc_chain.o2c_completion_rate) / 2.0;
400            if avg_completion < self.thresholds.document_chain_completion_min {
401                gaps.push(MetricGap {
402                    metric_name: "doc_chain_completion".to_string(),
403                    current_value: avg_completion,
404                    target_value: self.thresholds.document_chain_completion_min,
405                    gap: self.thresholds.document_chain_completion_min - avg_completion,
406                    is_minimum: true,
407                    config_paths: vec![
408                        "document_flows.p2p.completion_rate".to_string(),
409                        "document_flows.o2c.completion_rate".to_string(),
410                    ],
411                });
412            }
413        }
414
415        // Check quality metrics
416        if let Some(ref uniqueness) = evaluation.quality.uniqueness {
417            if uniqueness.duplicate_rate > self.thresholds.duplicate_rate_max {
418                gaps.push(MetricGap {
419                    metric_name: "duplicate_rate".to_string(),
420                    current_value: uniqueness.duplicate_rate,
421                    target_value: self.thresholds.duplicate_rate_max,
422                    gap: uniqueness.duplicate_rate - self.thresholds.duplicate_rate_max,
423                    is_minimum: false, // This is a maximum threshold
424                    config_paths: vec!["data_quality.duplicates.exact_rate".to_string()],
425                });
426            }
427        }
428
429        if let Some(ref completeness) = evaluation.quality.completeness {
430            if completeness.overall_completeness < self.thresholds.completeness_rate_min {
431                gaps.push(MetricGap {
432                    metric_name: "completeness_rate".to_string(),
433                    current_value: completeness.overall_completeness,
434                    target_value: self.thresholds.completeness_rate_min,
435                    gap: self.thresholds.completeness_rate_min - completeness.overall_completeness,
436                    is_minimum: true,
437                    config_paths: vec!["data_quality.missing_values.overall_rate".to_string()],
438                });
439            }
440        }
441
442        // Check ML metrics
443        if let Some(ref labels) = evaluation.ml_readiness.labels {
444            if labels.anomaly_rate < self.thresholds.anomaly_rate_min {
445                gaps.push(MetricGap {
446                    metric_name: "anomaly_rate".to_string(),
447                    current_value: labels.anomaly_rate,
448                    target_value: self.thresholds.anomaly_rate_min,
449                    gap: self.thresholds.anomaly_rate_min - labels.anomaly_rate,
450                    is_minimum: true,
451                    config_paths: vec!["anomaly_injection.base_rate".to_string()],
452                });
453            } else if labels.anomaly_rate > self.thresholds.anomaly_rate_max {
454                gaps.push(MetricGap {
455                    metric_name: "anomaly_rate".to_string(),
456                    current_value: labels.anomaly_rate,
457                    target_value: self.thresholds.anomaly_rate_max,
458                    gap: labels.anomaly_rate - self.thresholds.anomaly_rate_max,
459                    is_minimum: false,
460                    config_paths: vec!["anomaly_injection.base_rate".to_string()],
461                });
462            }
463
464            if labels.label_coverage < self.thresholds.label_coverage_min {
465                gaps.push(MetricGap {
466                    metric_name: "label_coverage".to_string(),
467                    current_value: labels.label_coverage,
468                    target_value: self.thresholds.label_coverage_min,
469                    gap: self.thresholds.label_coverage_min - labels.label_coverage,
470                    is_minimum: true,
471                    config_paths: vec!["anomaly_injection.label_all".to_string()],
472                });
473            }
474        }
475
476        if let Some(ref graph) = evaluation.ml_readiness.graph {
477            if graph.connectivity_score < self.thresholds.graph_connectivity_min {
478                gaps.push(MetricGap {
479                    metric_name: "graph_connectivity".to_string(),
480                    current_value: graph.connectivity_score,
481                    target_value: self.thresholds.graph_connectivity_min,
482                    gap: self.thresholds.graph_connectivity_min - graph.connectivity_score,
483                    is_minimum: true,
484                    config_paths: vec!["graph_export.ensure_connected".to_string()],
485                });
486            }
487        }
488
489        gaps
490    }
491
492    /// Generate a config patch for a metric gap.
493    fn generate_patch(
494        &self,
495        gap: &MetricGap,
496        mapping: &MetricConfigMapping,
497    ) -> Option<ConfigPatch> {
498        let suggested_value = match mapping.compute_value {
499            ComputeStrategy::EnableBoolean => "true".to_string(),
500            ComputeStrategy::SetFixed(v) => format!("{:.4}", v),
501            ComputeStrategy::IncreaseByGap => format!("{:.4}", gap.current_value + gap.gap * 1.2),
502            ComputeStrategy::DecreaseByGap => {
503                format!("{:.4}", (gap.current_value - gap.gap * 1.2).max(0.0))
504            }
505            ComputeStrategy::SetToTarget => format!("{:.4}", gap.target_value),
506            ComputeStrategy::MultiplyByGapFactor => {
507                let factor = if gap.is_minimum {
508                    1.0 + gap.severity() * 0.5
509                } else {
510                    1.0 / (1.0 + gap.severity() * 0.5)
511                };
512                format!("{:.4}", gap.current_value * factor)
513            }
514        };
515
516        let confidence = mapping.influence * (1.0 - gap.severity() * 0.3);
517        let impact = format!(
518            "Should improve {} from {:.3} toward {:.3}",
519            gap.metric_name, gap.current_value, gap.target_value
520        );
521
522        Some(
523            ConfigPatch::new(&mapping.config_path, suggested_value)
524                .with_current(format!("{:.4}", gap.current_value))
525                .with_confidence(confidence)
526                .with_impact(impact),
527        )
528    }
529
530    /// Generate a summary message for the auto-tune result.
531    fn generate_summary(&self, result: &AutoTuneResult) -> String {
532        if result.patches.is_empty() {
533            "No configuration changes suggested. All metrics meet thresholds.".to_string()
534        } else {
535            let high_confidence: Vec<_> = result
536                .patches
537                .iter()
538                .filter(|p| p.confidence > 0.7)
539                .collect();
540            let addressable = result.addressed_metrics.len();
541            let unaddressable = result.unaddressable_metrics.len();
542
543            format!(
544                "Suggested {} configuration changes ({} high-confidence). \
545                 {} metrics can be improved, {} require manual investigation.",
546                result.patches.len(),
547                high_confidence.len(),
548                addressable,
549                unaddressable
550            )
551        }
552    }
553
554    /// Get the thresholds being used.
555    pub fn thresholds(&self) -> &EvaluationThresholds {
556        &self.thresholds
557    }
558}
559
560impl Default for AutoTuner {
561    fn default() -> Self {
562        Self::new()
563    }
564}
565
566#[cfg(test)]
567mod tests {
568    use super::*;
569    use crate::statistical::{BenfordAnalysis, BenfordConformity};
570
571    #[test]
572    fn test_auto_tuner_creation() {
573        let tuner = AutoTuner::new();
574        assert!(!tuner.metric_mappings.is_empty());
575    }
576
577    #[test]
578    fn test_config_patch_builder() {
579        let patch = ConfigPatch::new("test.path", "value")
580            .with_current("old")
581            .with_confidence(0.8)
582            .with_impact("Should help");
583
584        assert_eq!(patch.path, "test.path");
585        assert_eq!(patch.current_value, Some("old".to_string()));
586        assert_eq!(patch.confidence, 0.8);
587    }
588
589    #[test]
590    fn test_auto_tune_result() {
591        let mut result = AutoTuneResult::new();
592        assert!(!result.has_patches());
593
594        result
595            .patches
596            .push(ConfigPatch::new("test", "value").with_confidence(0.9));
597        assert!(result.has_patches());
598
599        let sorted = result.patches_by_confidence();
600        assert_eq!(sorted.len(), 1);
601    }
602
603    #[test]
604    fn test_metric_gap_severity() {
605        let gap = MetricGap {
606            metric_name: "test".to_string(),
607            current_value: 0.02,
608            target_value: 0.05,
609            gap: 0.03,
610            is_minimum: true,
611            config_paths: vec![],
612        };
613
614        // Severity = gap / target = 0.03 / 0.05 = 0.6
615        assert!((gap.severity() - 0.6).abs() < 0.001);
616    }
617
618    #[test]
619    fn test_analyze_empty_evaluation() {
620        let tuner = AutoTuner::new();
621        let evaluation = ComprehensiveEvaluation::new();
622
623        let result = tuner.analyze(&evaluation);
624
625        // Empty evaluation should produce no patches
626        assert!(result.patches.is_empty());
627    }
628
629    #[test]
630    fn test_analyze_with_benford_gap() {
631        let tuner = AutoTuner::new();
632        let mut evaluation = ComprehensiveEvaluation::new();
633
634        // Set a failing Benford analysis
635        evaluation.statistical.benford = Some(BenfordAnalysis {
636            sample_size: 1000,
637            observed_frequencies: [0.1; 9],
638            observed_counts: [100; 9],
639            expected_frequencies: [
640                0.301, 0.176, 0.125, 0.097, 0.079, 0.067, 0.058, 0.051, 0.046,
641            ],
642            chi_squared: 25.0,
643            degrees_of_freedom: 8,
644            p_value: 0.01, // Below threshold of 0.05
645            mad: 0.02,
646            conformity: BenfordConformity::NonConforming,
647            max_deviation: (1, 0.2), // Tuple of (digit_index, deviation)
648            passes: false,
649            anti_benford_score: 0.5,
650        });
651
652        let result = tuner.analyze(&evaluation);
653
654        // Should suggest enabling Benford compliance
655        assert!(!result.patches.is_empty());
656        assert!(result
657            .addressed_metrics
658            .contains(&"benford_p_value".to_string()));
659    }
660}
datasynth_eval/enhancement/auto_tuner.rs

datasynth_eval/enhancement/
auto_tuner.rs