datasynth_eval/enhancement/
auto_tuner.rs

1//! Auto-tuning engine for deriving optimal configuration from evaluation results.
2//!
3//! The AutoTuner analyzes evaluation results to identify metric gaps and
4//! computes suggested configuration values that should improve those metrics.
5
6use crate::{ComprehensiveEvaluation, EvaluationThresholds};
7use serde::{Deserialize, Serialize};
8use std::collections::HashMap;
9
10/// A configuration patch representing a change to apply.
11#[derive(Debug, Clone, Serialize, Deserialize)]
12pub struct ConfigPatch {
13    /// Configuration path (dot-separated).
14    pub path: String,
15    /// The current value (if known).
16    pub current_value: Option<String>,
17    /// The suggested new value.
18    pub suggested_value: String,
19    /// Confidence level (0.0-1.0) that this change will help.
20    pub confidence: f64,
21    /// Expected improvement description.
22    pub expected_impact: String,
23}
24
25impl ConfigPatch {
26    /// Create a new config patch.
27    pub fn new(path: impl Into<String>, suggested_value: impl Into<String>) -> Self {
28        Self {
29            path: path.into(),
30            current_value: None,
31            suggested_value: suggested_value.into(),
32            confidence: 0.5,
33            expected_impact: String::new(),
34        }
35    }
36
37    /// Set the current value.
38    pub fn with_current(mut self, value: impl Into<String>) -> Self {
39        self.current_value = Some(value.into());
40        self
41    }
42
43    /// Set the confidence level.
44    pub fn with_confidence(mut self, confidence: f64) -> Self {
45        self.confidence = confidence.clamp(0.0, 1.0);
46        self
47    }
48
49    /// Set the expected impact.
50    pub fn with_impact(mut self, impact: impl Into<String>) -> Self {
51        self.expected_impact = impact.into();
52        self
53    }
54}
55
56/// Result of auto-tuning analysis.
57#[derive(Debug, Clone, Serialize, Deserialize)]
58pub struct AutoTuneResult {
59    /// Configuration patches to apply.
60    pub patches: Vec<ConfigPatch>,
61    /// Overall improvement score (0.0-1.0).
62    pub expected_improvement: f64,
63    /// Metrics that will be addressed.
64    pub addressed_metrics: Vec<String>,
65    /// Metrics that cannot be automatically fixed.
66    pub unaddressable_metrics: Vec<String>,
67    /// Summary message.
68    pub summary: String,
69}
70
71impl AutoTuneResult {
72    /// Create a new empty result.
73    pub fn new() -> Self {
74        Self {
75            patches: Vec::new(),
76            expected_improvement: 0.0,
77            addressed_metrics: Vec::new(),
78            unaddressable_metrics: Vec::new(),
79            summary: String::new(),
80        }
81    }
82
83    /// Check if any patches are suggested.
84    pub fn has_patches(&self) -> bool {
85        !self.patches.is_empty()
86    }
87
88    /// Get patches sorted by confidence (highest first).
89    pub fn patches_by_confidence(&self) -> Vec<&ConfigPatch> {
90        let mut sorted: Vec<_> = self.patches.iter().collect();
91        sorted.sort_by(|a, b| {
92            b.confidence
93                .partial_cmp(&a.confidence)
94                .unwrap_or(std::cmp::Ordering::Equal)
95        });
96        sorted
97    }
98}
99
100impl Default for AutoTuneResult {
101    fn default() -> Self {
102        Self::new()
103    }
104}
105
106/// Metric gap analysis result.
107#[derive(Debug, Clone)]
108pub struct MetricGap {
109    /// Name of the metric.
110    pub metric_name: String,
111    /// Current value.
112    pub current_value: f64,
113    /// Target threshold value.
114    pub target_value: f64,
115    /// Gap (target - current for min thresholds, current - target for max).
116    pub gap: f64,
117    /// Whether this is a minimum threshold (true) or maximum (false).
118    pub is_minimum: bool,
119    /// Related configuration paths.
120    pub config_paths: Vec<String>,
121}
122
123impl MetricGap {
124    /// Calculate the severity of the gap (0.0-1.0).
125    pub fn severity(&self) -> f64 {
126        if self.target_value == 0.0 {
127            if self.gap.abs() > 0.0 {
128                1.0
129            } else {
130                0.0
131            }
132        } else {
133            (self.gap.abs() / self.target_value.abs()).min(1.0)
134        }
135    }
136}
137
138/// Auto-tuner that derives optimal configuration from evaluation results.
139pub struct AutoTuner {
140    /// Thresholds to compare against.
141    thresholds: EvaluationThresholds,
142    /// Known metric-to-config mappings.
143    metric_mappings: HashMap<String, Vec<MetricConfigMapping>>,
144}
145
146/// Mapping from a metric to configuration paths that affect it.
147#[derive(Debug, Clone)]
148struct MetricConfigMapping {
149    /// Configuration path.
150    config_path: String,
151    /// How much influence this config has on the metric (0.0-1.0).
152    influence: f64,
153    /// Function to compute suggested value given the gap.
154    compute_value: ComputeStrategy,
155}
156
157/// Strategy for computing suggested config values.
158#[allow(dead_code)] // Variants reserved for future tuning strategies
159#[derive(Debug, Clone, Copy)]
160enum ComputeStrategy {
161    /// Enable a boolean flag.
162    EnableBoolean,
163    /// Set to a specific value.
164    SetFixed(f64),
165    /// Increase by the gap amount.
166    IncreaseByGap,
167    /// Decrease by the gap amount.
168    DecreaseByGap,
169    /// Set to target value directly.
170    SetToTarget,
171    /// Multiply current by factor based on gap.
172    MultiplyByGapFactor,
173}
174
175impl AutoTuner {
176    /// Create a new auto-tuner with default thresholds.
177    pub fn new() -> Self {
178        Self::with_thresholds(EvaluationThresholds::default())
179    }
180
181    /// Create an auto-tuner with specific thresholds.
182    pub fn with_thresholds(thresholds: EvaluationThresholds) -> Self {
183        let mut tuner = Self {
184            thresholds,
185            metric_mappings: HashMap::new(),
186        };
187        tuner.initialize_mappings();
188        tuner
189    }
190
191    /// Initialize known metric-to-config mappings.
192    fn initialize_mappings(&mut self) {
193        // Benford's Law
194        self.metric_mappings.insert(
195            "benford_p_value".to_string(),
196            vec![MetricConfigMapping {
197                config_path: "transactions.amount.benford_compliance".to_string(),
198                influence: 0.9,
199                compute_value: ComputeStrategy::EnableBoolean,
200            }],
201        );
202
203        // Round number bias
204        self.metric_mappings.insert(
205            "round_number_ratio".to_string(),
206            vec![MetricConfigMapping {
207                config_path: "transactions.amount.round_number_bias".to_string(),
208                influence: 0.95,
209                compute_value: ComputeStrategy::SetToTarget,
210            }],
211        );
212
213        // Temporal correlation
214        self.metric_mappings.insert(
215            "temporal_correlation".to_string(),
216            vec![MetricConfigMapping {
217                config_path: "transactions.temporal.seasonality_strength".to_string(),
218                influence: 0.7,
219                compute_value: ComputeStrategy::IncreaseByGap,
220            }],
221        );
222
223        // Anomaly rate
224        self.metric_mappings.insert(
225            "anomaly_rate".to_string(),
226            vec![MetricConfigMapping {
227                config_path: "anomaly_injection.base_rate".to_string(),
228                influence: 0.95,
229                compute_value: ComputeStrategy::SetToTarget,
230            }],
231        );
232
233        // Label coverage
234        self.metric_mappings.insert(
235            "label_coverage".to_string(),
236            vec![MetricConfigMapping {
237                config_path: "anomaly_injection.label_all".to_string(),
238                influence: 0.9,
239                compute_value: ComputeStrategy::EnableBoolean,
240            }],
241        );
242
243        // Duplicate rate
244        self.metric_mappings.insert(
245            "duplicate_rate".to_string(),
246            vec![MetricConfigMapping {
247                config_path: "data_quality.duplicates.exact_rate".to_string(),
248                influence: 0.8,
249                compute_value: ComputeStrategy::SetToTarget,
250            }],
251        );
252
253        // Completeness
254        self.metric_mappings.insert(
255            "completeness_rate".to_string(),
256            vec![MetricConfigMapping {
257                config_path: "data_quality.missing_values.overall_rate".to_string(),
258                influence: 0.9,
259                compute_value: ComputeStrategy::DecreaseByGap,
260            }],
261        );
262
263        // IC match rate
264        self.metric_mappings.insert(
265            "ic_match_rate".to_string(),
266            vec![MetricConfigMapping {
267                config_path: "intercompany.match_precision".to_string(),
268                influence: 0.85,
269                compute_value: ComputeStrategy::IncreaseByGap,
270            }],
271        );
272
273        // Document chain completion
274        self.metric_mappings.insert(
275            "doc_chain_completion".to_string(),
276            vec![
277                MetricConfigMapping {
278                    config_path: "document_flows.p2p.completion_rate".to_string(),
279                    influence: 0.5,
280                    compute_value: ComputeStrategy::SetToTarget,
281                },
282                MetricConfigMapping {
283                    config_path: "document_flows.o2c.completion_rate".to_string(),
284                    influence: 0.5,
285                    compute_value: ComputeStrategy::SetToTarget,
286                },
287            ],
288        );
289
290        // Graph connectivity
291        self.metric_mappings.insert(
292            "graph_connectivity".to_string(),
293            vec![MetricConfigMapping {
294                config_path: "graph_export.ensure_connected".to_string(),
295                influence: 0.8,
296                compute_value: ComputeStrategy::EnableBoolean,
297            }],
298        );
299    }
300
301    /// Analyze evaluation results and produce auto-tune suggestions.
302    pub fn analyze(&self, evaluation: &ComprehensiveEvaluation) -> AutoTuneResult {
303        let mut result = AutoTuneResult::new();
304
305        // Identify metric gaps
306        let gaps = self.identify_gaps(evaluation);
307
308        // Generate patches for each gap
309        for gap in gaps {
310            if let Some(mappings) = self.metric_mappings.get(&gap.metric_name) {
311                for mapping in mappings {
312                    if let Some(patch) = self.generate_patch(&gap, mapping) {
313                        result.patches.push(patch);
314                        if !result.addressed_metrics.contains(&gap.metric_name) {
315                            result.addressed_metrics.push(gap.metric_name.clone());
316                        }
317                    }
318                }
319            } else if !result.unaddressable_metrics.contains(&gap.metric_name) {
320                result.unaddressable_metrics.push(gap.metric_name.clone());
321            }
322        }
323
324        // Calculate expected improvement
325        if !result.patches.is_empty() {
326            let avg_confidence: f64 = result.patches.iter().map(|p| p.confidence).sum::<f64>()
327                / result.patches.len() as f64;
328            result.expected_improvement = avg_confidence;
329        }
330
331        // Generate summary
332        result.summary = self.generate_summary(&result);
333
334        result
335    }
336
337    /// Identify gaps between current metrics and thresholds.
338    fn identify_gaps(&self, evaluation: &ComprehensiveEvaluation) -> Vec<MetricGap> {
339        let mut gaps = Vec::new();
340
341        // Check statistical metrics
342        if let Some(ref benford) = evaluation.statistical.benford {
343            if benford.p_value < self.thresholds.benford_p_value_min {
344                gaps.push(MetricGap {
345                    metric_name: "benford_p_value".to_string(),
346                    current_value: benford.p_value,
347                    target_value: self.thresholds.benford_p_value_min,
348                    gap: self.thresholds.benford_p_value_min - benford.p_value,
349                    is_minimum: true,
350                    config_paths: vec!["transactions.amount.benford_compliance".to_string()],
351                });
352            }
353        }
354
355        if let Some(ref amount) = evaluation.statistical.amount_distribution {
356            if amount.round_number_ratio < 0.05 {
357                gaps.push(MetricGap {
358                    metric_name: "round_number_ratio".to_string(),
359                    current_value: amount.round_number_ratio,
360                    target_value: 0.10, // Target 10%
361                    gap: 0.10 - amount.round_number_ratio,
362                    is_minimum: true,
363                    config_paths: vec!["transactions.amount.round_number_bias".to_string()],
364                });
365            }
366        }
367
368        if let Some(ref temporal) = evaluation.statistical.temporal {
369            if temporal.pattern_correlation < self.thresholds.temporal_correlation_min {
370                gaps.push(MetricGap {
371                    metric_name: "temporal_correlation".to_string(),
372                    current_value: temporal.pattern_correlation,
373                    target_value: self.thresholds.temporal_correlation_min,
374                    gap: self.thresholds.temporal_correlation_min - temporal.pattern_correlation,
375                    is_minimum: true,
376                    config_paths: vec!["transactions.temporal.seasonality_strength".to_string()],
377                });
378            }
379        }
380
381        // Check coherence metrics
382        if let Some(ref ic) = evaluation.coherence.intercompany {
383            if ic.match_rate < self.thresholds.ic_match_rate_min {
384                gaps.push(MetricGap {
385                    metric_name: "ic_match_rate".to_string(),
386                    current_value: ic.match_rate,
387                    target_value: self.thresholds.ic_match_rate_min,
388                    gap: self.thresholds.ic_match_rate_min - ic.match_rate,
389                    is_minimum: true,
390                    config_paths: vec!["intercompany.match_precision".to_string()],
391                });
392            }
393        }
394
395        if let Some(ref doc_chain) = evaluation.coherence.document_chain {
396            let avg_completion =
397                (doc_chain.p2p_completion_rate + doc_chain.o2c_completion_rate) / 2.0;
398            if avg_completion < self.thresholds.document_chain_completion_min {
399                gaps.push(MetricGap {
400                    metric_name: "doc_chain_completion".to_string(),
401                    current_value: avg_completion,
402                    target_value: self.thresholds.document_chain_completion_min,
403                    gap: self.thresholds.document_chain_completion_min - avg_completion,
404                    is_minimum: true,
405                    config_paths: vec![
406                        "document_flows.p2p.completion_rate".to_string(),
407                        "document_flows.o2c.completion_rate".to_string(),
408                    ],
409                });
410            }
411        }
412
413        // Check quality metrics
414        if let Some(ref uniqueness) = evaluation.quality.uniqueness {
415            if uniqueness.duplicate_rate > self.thresholds.duplicate_rate_max {
416                gaps.push(MetricGap {
417                    metric_name: "duplicate_rate".to_string(),
418                    current_value: uniqueness.duplicate_rate,
419                    target_value: self.thresholds.duplicate_rate_max,
420                    gap: uniqueness.duplicate_rate - self.thresholds.duplicate_rate_max,
421                    is_minimum: false, // This is a maximum threshold
422                    config_paths: vec!["data_quality.duplicates.exact_rate".to_string()],
423                });
424            }
425        }
426
427        if let Some(ref completeness) = evaluation.quality.completeness {
428            if completeness.overall_completeness < self.thresholds.completeness_rate_min {
429                gaps.push(MetricGap {
430                    metric_name: "completeness_rate".to_string(),
431                    current_value: completeness.overall_completeness,
432                    target_value: self.thresholds.completeness_rate_min,
433                    gap: self.thresholds.completeness_rate_min - completeness.overall_completeness,
434                    is_minimum: true,
435                    config_paths: vec!["data_quality.missing_values.overall_rate".to_string()],
436                });
437            }
438        }
439
440        // Check ML metrics
441        if let Some(ref labels) = evaluation.ml_readiness.labels {
442            if labels.anomaly_rate < self.thresholds.anomaly_rate_min {
443                gaps.push(MetricGap {
444                    metric_name: "anomaly_rate".to_string(),
445                    current_value: labels.anomaly_rate,
446                    target_value: self.thresholds.anomaly_rate_min,
447                    gap: self.thresholds.anomaly_rate_min - labels.anomaly_rate,
448                    is_minimum: true,
449                    config_paths: vec!["anomaly_injection.base_rate".to_string()],
450                });
451            } else if labels.anomaly_rate > self.thresholds.anomaly_rate_max {
452                gaps.push(MetricGap {
453                    metric_name: "anomaly_rate".to_string(),
454                    current_value: labels.anomaly_rate,
455                    target_value: self.thresholds.anomaly_rate_max,
456                    gap: labels.anomaly_rate - self.thresholds.anomaly_rate_max,
457                    is_minimum: false,
458                    config_paths: vec!["anomaly_injection.base_rate".to_string()],
459                });
460            }
461
462            if labels.label_coverage < self.thresholds.label_coverage_min {
463                gaps.push(MetricGap {
464                    metric_name: "label_coverage".to_string(),
465                    current_value: labels.label_coverage,
466                    target_value: self.thresholds.label_coverage_min,
467                    gap: self.thresholds.label_coverage_min - labels.label_coverage,
468                    is_minimum: true,
469                    config_paths: vec!["anomaly_injection.label_all".to_string()],
470                });
471            }
472        }
473
474        if let Some(ref graph) = evaluation.ml_readiness.graph {
475            if graph.connectivity_score < self.thresholds.graph_connectivity_min {
476                gaps.push(MetricGap {
477                    metric_name: "graph_connectivity".to_string(),
478                    current_value: graph.connectivity_score,
479                    target_value: self.thresholds.graph_connectivity_min,
480                    gap: self.thresholds.graph_connectivity_min - graph.connectivity_score,
481                    is_minimum: true,
482                    config_paths: vec!["graph_export.ensure_connected".to_string()],
483                });
484            }
485        }
486
487        gaps
488    }
489
490    /// Generate a config patch for a metric gap.
491    fn generate_patch(
492        &self,
493        gap: &MetricGap,
494        mapping: &MetricConfigMapping,
495    ) -> Option<ConfigPatch> {
496        let suggested_value = match mapping.compute_value {
497            ComputeStrategy::EnableBoolean => "true".to_string(),
498            ComputeStrategy::SetFixed(v) => format!("{:.4}", v),
499            ComputeStrategy::IncreaseByGap => format!("{:.4}", gap.current_value + gap.gap * 1.2),
500            ComputeStrategy::DecreaseByGap => {
501                format!("{:.4}", (gap.current_value - gap.gap * 1.2).max(0.0))
502            }
503            ComputeStrategy::SetToTarget => format!("{:.4}", gap.target_value),
504            ComputeStrategy::MultiplyByGapFactor => {
505                let factor = if gap.is_minimum {
506                    1.0 + gap.severity() * 0.5
507                } else {
508                    1.0 / (1.0 + gap.severity() * 0.5)
509                };
510                format!("{:.4}", gap.current_value * factor)
511            }
512        };
513
514        let confidence = mapping.influence * (1.0 - gap.severity() * 0.3);
515        let impact = format!(
516            "Should improve {} from {:.3} toward {:.3}",
517            gap.metric_name, gap.current_value, gap.target_value
518        );
519
520        Some(
521            ConfigPatch::new(&mapping.config_path, suggested_value)
522                .with_current(format!("{:.4}", gap.current_value))
523                .with_confidence(confidence)
524                .with_impact(impact),
525        )
526    }
527
528    /// Generate a summary message for the auto-tune result.
529    fn generate_summary(&self, result: &AutoTuneResult) -> String {
530        if result.patches.is_empty() {
531            "No configuration changes suggested. All metrics meet thresholds.".to_string()
532        } else {
533            let high_confidence: Vec<_> = result
534                .patches
535                .iter()
536                .filter(|p| p.confidence > 0.7)
537                .collect();
538            let addressable = result.addressed_metrics.len();
539            let unaddressable = result.unaddressable_metrics.len();
540
541            format!(
542                "Suggested {} configuration changes ({} high-confidence). \
543                 {} metrics can be improved, {} require manual investigation.",
544                result.patches.len(),
545                high_confidence.len(),
546                addressable,
547                unaddressable
548            )
549        }
550    }
551
552    /// Get the thresholds being used.
553    pub fn thresholds(&self) -> &EvaluationThresholds {
554        &self.thresholds
555    }
556}
557
558impl Default for AutoTuner {
559    fn default() -> Self {
560        Self::new()
561    }
562}
563
564#[cfg(test)]
565mod tests {
566    use super::*;
567    use crate::statistical::{BenfordAnalysis, BenfordConformity};
568
569    #[test]
570    fn test_auto_tuner_creation() {
571        let tuner = AutoTuner::new();
572        assert!(!tuner.metric_mappings.is_empty());
573    }
574
575    #[test]
576    fn test_config_patch_builder() {
577        let patch = ConfigPatch::new("test.path", "value")
578            .with_current("old")
579            .with_confidence(0.8)
580            .with_impact("Should help");
581
582        assert_eq!(patch.path, "test.path");
583        assert_eq!(patch.current_value, Some("old".to_string()));
584        assert_eq!(patch.confidence, 0.8);
585    }
586
587    #[test]
588    fn test_auto_tune_result() {
589        let mut result = AutoTuneResult::new();
590        assert!(!result.has_patches());
591
592        result
593            .patches
594            .push(ConfigPatch::new("test", "value").with_confidence(0.9));
595        assert!(result.has_patches());
596
597        let sorted = result.patches_by_confidence();
598        assert_eq!(sorted.len(), 1);
599    }
600
601    #[test]
602    fn test_metric_gap_severity() {
603        let gap = MetricGap {
604            metric_name: "test".to_string(),
605            current_value: 0.02,
606            target_value: 0.05,
607            gap: 0.03,
608            is_minimum: true,
609            config_paths: vec![],
610        };
611
612        // Severity = gap / target = 0.03 / 0.05 = 0.6
613        assert!((gap.severity() - 0.6).abs() < 0.001);
614    }
615
616    #[test]
617    fn test_analyze_empty_evaluation() {
618        let tuner = AutoTuner::new();
619        let evaluation = ComprehensiveEvaluation::new();
620
621        let result = tuner.analyze(&evaluation);
622
623        // Empty evaluation should produce no patches
624        assert!(result.patches.is_empty());
625    }
626
627    #[test]
628    fn test_analyze_with_benford_gap() {
629        let tuner = AutoTuner::new();
630        let mut evaluation = ComprehensiveEvaluation::new();
631
632        // Set a failing Benford analysis
633        evaluation.statistical.benford = Some(BenfordAnalysis {
634            sample_size: 1000,
635            observed_frequencies: [0.1; 9],
636            observed_counts: [100; 9],
637            expected_frequencies: [
638                0.301, 0.176, 0.125, 0.097, 0.079, 0.067, 0.058, 0.051, 0.046,
639            ],
640            chi_squared: 25.0,
641            degrees_of_freedom: 8,
642            p_value: 0.01, // Below threshold of 0.05
643            mad: 0.02,
644            conformity: BenfordConformity::NonConforming,
645            max_deviation: (1, 0.2), // Tuple of (digit_index, deviation)
646            passes: false,
647            anti_benford_score: 0.5,
648        });
649
650        let result = tuner.analyze(&evaluation);
651
652        // Should suggest enabling Benford compliance
653        assert!(!result.patches.is_empty());
654        assert!(result
655            .addressed_metrics
656            .contains(&"benford_p_value".to_string()));
657    }
658}
datasynth_eval/enhancement/auto_tuner.rs

datasynth_eval/enhancement/
auto_tuner.rs