datasynth_eval/enhancement/
recommendation_engine.rs

1//! Recommendation engine for providing prioritized enhancement suggestions.
2//!
3//! The recommendation engine performs root cause analysis on evaluation
4//! failures and provides actionable, prioritized recommendations.
5
6use crate::{ComprehensiveEvaluation, EvaluationThresholds};
7use serde::{Deserialize, Serialize};
8use std::collections::HashMap;
9
10/// Priority level for recommendations.
11#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
12pub enum RecommendationPriority {
13    /// Critical issues that will cause data to fail validation.
14    Critical = 0,
15    /// High priority issues affecting data quality significantly.
16    High = 1,
17    /// Medium priority improvements.
18    Medium = 2,
19    /// Low priority enhancements.
20    Low = 3,
21    /// Informational only, no action required.
22    Info = 4,
23}
24
25impl RecommendationPriority {
26    /// Get display name.
27    pub fn name(&self) -> &'static str {
28        match self {
29            RecommendationPriority::Critical => "Critical",
30            RecommendationPriority::High => "High",
31            RecommendationPriority::Medium => "Medium",
32            RecommendationPriority::Low => "Low",
33            RecommendationPriority::Info => "Info",
34        }
35    }
36}
37
38/// Category of the recommendation.
39#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
40pub enum RecommendationCategory {
41    /// Statistical distribution issues.
42    Statistical,
43    /// Data coherence issues (balance, subledger, etc.).
44    Coherence,
45    /// Data quality issues (duplicates, missing, etc.).
46    DataQuality,
47    /// ML readiness issues.
48    MLReadiness,
49    /// Performance issues.
50    Performance,
51    /// Configuration issues.
52    Configuration,
53}
54
55impl RecommendationCategory {
56    /// Get display name.
57    pub fn name(&self) -> &'static str {
58        match self {
59            RecommendationCategory::Statistical => "Statistical Quality",
60            RecommendationCategory::Coherence => "Data Coherence",
61            RecommendationCategory::DataQuality => "Data Quality",
62            RecommendationCategory::MLReadiness => "ML Readiness",
63            RecommendationCategory::Performance => "Performance",
64            RecommendationCategory::Configuration => "Configuration",
65        }
66    }
67}
68
69/// Root cause identified for an issue.
70#[derive(Debug, Clone, Serialize, Deserialize)]
71pub struct RootCause {
72    /// Short description of the root cause.
73    pub description: String,
74    /// Detailed explanation.
75    pub explanation: String,
76    /// Evidence supporting this root cause.
77    pub evidence: Vec<String>,
78    /// Confidence level (0.0-1.0).
79    pub confidence: f64,
80}
81
82impl RootCause {
83    /// Create a new root cause.
84    pub fn new(description: impl Into<String>) -> Self {
85        Self {
86            description: description.into(),
87            explanation: String::new(),
88            evidence: Vec::new(),
89            confidence: 0.5,
90        }
91    }
92
93    /// Add explanation.
94    pub fn with_explanation(mut self, explanation: impl Into<String>) -> Self {
95        self.explanation = explanation.into();
96        self
97    }
98
99    /// Add evidence.
100    pub fn with_evidence(mut self, evidence: impl Into<String>) -> Self {
101        self.evidence.push(evidence.into());
102        self
103    }
104
105    /// Set confidence.
106    pub fn with_confidence(mut self, confidence: f64) -> Self {
107        self.confidence = confidence.clamp(0.0, 1.0);
108        self
109    }
110}
111
112/// A single recommendation.
113#[derive(Debug, Clone, Serialize, Deserialize)]
114pub struct Recommendation {
115    /// Unique identifier.
116    pub id: String,
117    /// Priority level.
118    pub priority: RecommendationPriority,
119    /// Category.
120    pub category: RecommendationCategory,
121    /// Short title.
122    pub title: String,
123    /// Detailed description.
124    pub description: String,
125    /// Root causes identified.
126    pub root_causes: Vec<RootCause>,
127    /// Suggested actions to take.
128    pub actions: Vec<SuggestedAction>,
129    /// Metrics affected.
130    pub affected_metrics: Vec<String>,
131    /// Expected improvement if addressed.
132    pub expected_improvement: String,
133}
134
135impl Recommendation {
136    /// Create a new recommendation.
137    pub fn new(
138        id: impl Into<String>,
139        priority: RecommendationPriority,
140        category: RecommendationCategory,
141        title: impl Into<String>,
142    ) -> Self {
143        Self {
144            id: id.into(),
145            priority,
146            category,
147            title: title.into(),
148            description: String::new(),
149            root_causes: Vec::new(),
150            actions: Vec::new(),
151            affected_metrics: Vec::new(),
152            expected_improvement: String::new(),
153        }
154    }
155
156    /// Add description.
157    pub fn with_description(mut self, description: impl Into<String>) -> Self {
158        self.description = description.into();
159        self
160    }
161
162    /// Add root cause.
163    pub fn with_root_cause(mut self, root_cause: RootCause) -> Self {
164        self.root_causes.push(root_cause);
165        self
166    }
167
168    /// Add action.
169    pub fn with_action(mut self, action: SuggestedAction) -> Self {
170        self.actions.push(action);
171        self
172    }
173
174    /// Add affected metric.
175    pub fn with_affected_metric(mut self, metric: impl Into<String>) -> Self {
176        self.affected_metrics.push(metric.into());
177        self
178    }
179
180    /// Set expected improvement.
181    pub fn with_expected_improvement(mut self, improvement: impl Into<String>) -> Self {
182        self.expected_improvement = improvement.into();
183        self
184    }
185}
186
187/// A suggested action to address an issue.
188#[derive(Debug, Clone, Serialize, Deserialize)]
189pub struct SuggestedAction {
190    /// Description of the action.
191    pub description: String,
192    /// Configuration path if applicable.
193    pub config_path: Option<String>,
194    /// Suggested value if applicable.
195    pub suggested_value: Option<String>,
196    /// Whether this can be automatically applied.
197    pub auto_applicable: bool,
198    /// Estimated effort (Low, Medium, High).
199    pub effort: String,
200}
201
202impl SuggestedAction {
203    /// Create a new action.
204    pub fn new(description: impl Into<String>) -> Self {
205        Self {
206            description: description.into(),
207            config_path: None,
208            suggested_value: None,
209            auto_applicable: false,
210            effort: "Medium".to_string(),
211        }
212    }
213
214    /// Set config change.
215    pub fn with_config_change(mut self, path: impl Into<String>, value: impl Into<String>) -> Self {
216        self.config_path = Some(path.into());
217        self.suggested_value = Some(value.into());
218        self.auto_applicable = true;
219        self
220    }
221
222    /// Set effort level.
223    pub fn with_effort(mut self, effort: impl Into<String>) -> Self {
224        self.effort = effort.into();
225        self
226    }
227
228    /// Mark as not auto-applicable.
229    pub fn manual_only(mut self) -> Self {
230        self.auto_applicable = false;
231        self
232    }
233}
234
235/// Enhancement report containing all recommendations.
236#[derive(Debug, Clone, Serialize, Deserialize)]
237pub struct EnhancementReport {
238    /// All recommendations.
239    pub recommendations: Vec<Recommendation>,
240    /// Summary by category.
241    pub category_summary: HashMap<String, usize>,
242    /// Summary by priority.
243    pub priority_summary: HashMap<String, usize>,
244    /// Overall health score (0.0-1.0).
245    pub health_score: f64,
246    /// Top issues to address.
247    pub top_issues: Vec<String>,
248    /// Quick wins (easy to fix with high impact).
249    pub quick_wins: Vec<String>,
250}
251
252impl EnhancementReport {
253    /// Create a new empty report.
254    pub fn new() -> Self {
255        Self {
256            recommendations: Vec::new(),
257            category_summary: HashMap::new(),
258            priority_summary: HashMap::new(),
259            health_score: 1.0,
260            top_issues: Vec::new(),
261            quick_wins: Vec::new(),
262        }
263    }
264
265    /// Add a recommendation.
266    pub fn add(&mut self, recommendation: Recommendation) {
267        // Update summaries
268        *self
269            .category_summary
270            .entry(recommendation.category.name().to_string())
271            .or_insert(0) += 1;
272        *self
273            .priority_summary
274            .entry(recommendation.priority.name().to_string())
275            .or_insert(0) += 1;
276
277        self.recommendations.push(recommendation);
278    }
279
280    /// Finalize the report (calculate scores, sort, etc.).
281    pub fn finalize(&mut self) {
282        // Sort recommendations by priority
283        self.recommendations
284            .sort_by(|a, b| a.priority.cmp(&b.priority));
285
286        // Calculate health score
287        let critical_count = *self.priority_summary.get("Critical").unwrap_or(&0);
288        let high_count = *self.priority_summary.get("High").unwrap_or(&0);
289        let medium_count = *self.priority_summary.get("Medium").unwrap_or(&0);
290
291        let penalty =
292            critical_count as f64 * 0.3 + high_count as f64 * 0.1 + medium_count as f64 * 0.02;
293        self.health_score = (1.0 - penalty).max(0.0);
294
295        // Identify top issues (critical and high priority)
296        self.top_issues = self
297            .recommendations
298            .iter()
299            .filter(|r| {
300                r.priority == RecommendationPriority::Critical
301                    || r.priority == RecommendationPriority::High
302            })
303            .take(5)
304            .map(|r| r.title.clone())
305            .collect();
306
307        // Identify quick wins (auto-applicable actions)
308        self.quick_wins = self
309            .recommendations
310            .iter()
311            .filter(|r| r.actions.iter().any(|a| a.auto_applicable))
312            .take(5)
313            .map(|r| r.title.clone())
314            .collect();
315    }
316
317    /// Get recommendations by category.
318    pub fn by_category(&self, category: RecommendationCategory) -> Vec<&Recommendation> {
319        self.recommendations
320            .iter()
321            .filter(|r| r.category == category)
322            .collect()
323    }
324
325    /// Get recommendations by priority.
326    pub fn by_priority(&self, priority: RecommendationPriority) -> Vec<&Recommendation> {
327        self.recommendations
328            .iter()
329            .filter(|r| r.priority == priority)
330            .collect()
331    }
332
333    /// Check if there are critical issues.
334    pub fn has_critical_issues(&self) -> bool {
335        *self.priority_summary.get("Critical").unwrap_or(&0) > 0
336    }
337}
338
339impl Default for EnhancementReport {
340    fn default() -> Self {
341        Self::new()
342    }
343}
344
345/// Engine for generating recommendations from evaluation results.
346pub struct RecommendationEngine {
347    /// Thresholds for comparison.
348    thresholds: EvaluationThresholds,
349    /// Counter for generating unique IDs.
350    id_counter: u32,
351}
352
353impl RecommendationEngine {
354    /// Create a new recommendation engine.
355    pub fn new() -> Self {
356        Self::with_thresholds(EvaluationThresholds::default())
357    }
358
359    /// Create with specific thresholds.
360    pub fn with_thresholds(thresholds: EvaluationThresholds) -> Self {
361        Self {
362            thresholds,
363            id_counter: 0,
364        }
365    }
366
367    /// Generate an enhancement report from evaluation results.
368    pub fn generate_report(&mut self, evaluation: &ComprehensiveEvaluation) -> EnhancementReport {
369        let mut report = EnhancementReport::new();
370
371        // Analyze statistical issues
372        self.analyze_statistical(&evaluation.statistical, &mut report);
373
374        // Analyze coherence issues
375        self.analyze_coherence(&evaluation.coherence, &mut report);
376
377        // Analyze quality issues
378        self.analyze_quality(&evaluation.quality, &mut report);
379
380        // Analyze ML readiness issues
381        self.analyze_ml_readiness(&evaluation.ml_readiness, &mut report);
382
383        // Finalize the report
384        report.finalize();
385
386        report
387    }
388
389    /// Generate a unique ID.
390    fn next_id(&mut self) -> String {
391        self.id_counter += 1;
392        format!("REC-{:04}", self.id_counter)
393    }
394
395    /// Analyze statistical evaluation results.
396    fn analyze_statistical(
397        &mut self,
398        stat: &crate::statistical::StatisticalEvaluation,
399        report: &mut EnhancementReport,
400    ) {
401        // Check Benford's Law
402        if let Some(ref benford) = stat.benford {
403            if benford.p_value < self.thresholds.benford_p_value_min {
404                let severity = if benford.p_value < 0.01 {
405                    RecommendationPriority::High
406                } else {
407                    RecommendationPriority::Medium
408                };
409
410                let rec = Recommendation::new(
411                    self.next_id(),
412                    severity,
413                    RecommendationCategory::Statistical,
414                    "Benford's Law Non-Conformance",
415                )
416                .with_description(
417                    "Generated transaction amounts do not follow Benford's Law, \
418                     which may indicate unrealistic data patterns.",
419                )
420                .with_root_cause(
421                    RootCause::new("Amount generation not using Benford-compliant distribution")
422                        .with_explanation(
423                            "Real financial data naturally follows Benford's Law for first digits. \
424                             Random or uniform distributions will fail this test.",
425                        )
426                        .with_evidence(format!("p-value: {:.4} (threshold: {:.4})", benford.p_value, self.thresholds.benford_p_value_min))
427                        .with_confidence(0.9),
428                )
429                .with_action(
430                    SuggestedAction::new("Enable Benford's Law compliance in amount generation")
431                        .with_config_change("transactions.amount.benford_compliance", "true")
432                        .with_effort("Low"),
433                )
434                .with_affected_metric("benford_p_value")
435                .with_expected_improvement("Statistical p-value should increase to > 0.05");
436
437                report.add(rec);
438            }
439        }
440
441        // Check temporal patterns
442        if let Some(ref temporal) = stat.temporal {
443            if temporal.pattern_correlation < self.thresholds.temporal_correlation_min {
444                let rec = Recommendation::new(
445                    self.next_id(),
446                    RecommendationPriority::Medium,
447                    RecommendationCategory::Statistical,
448                    "Weak Temporal Patterns",
449                )
450                .with_description(
451                    "Generated data lacks realistic temporal patterns such as \
452                     seasonality, month-end spikes, and weekday variations.",
453                )
454                .with_root_cause(
455                    RootCause::new("Insufficient temporal variation in generation")
456                        .with_explanation(
457                            "Real financial data shows strong temporal patterns including \
458                             month-end closing activity, seasonal variations, and weekday effects.",
459                        )
460                        .with_evidence(format!(
461                            "Correlation: {:.3} (threshold: {:.3})",
462                            temporal.pattern_correlation, self.thresholds.temporal_correlation_min
463                        ))
464                        .with_confidence(0.75),
465                )
466                .with_action(
467                    SuggestedAction::new("Increase seasonality strength")
468                        .with_config_change("transactions.temporal.seasonality_strength", "0.8")
469                        .with_effort("Low"),
470                )
471                .with_action(
472                    SuggestedAction::new("Enable month-end spike patterns")
473                        .with_config_change("transactions.temporal.month_end_spike", "true")
474                        .with_effort("Low"),
475                )
476                .with_affected_metric("temporal_correlation")
477                .with_expected_improvement("Better temporal pattern correlation (> 0.8)");
478
479                report.add(rec);
480            }
481        }
482    }
483
484    /// Analyze coherence evaluation results.
485    fn analyze_coherence(
486        &mut self,
487        coherence: &crate::coherence::CoherenceEvaluation,
488        report: &mut EnhancementReport,
489    ) {
490        // Check balance sheet
491        if let Some(ref balance) = coherence.balance {
492            if !balance.equation_balanced {
493                let rec = Recommendation::new(
494                    self.next_id(),
495                    RecommendationPriority::Critical,
496                    RecommendationCategory::Coherence,
497                    "Balance Sheet Imbalance",
498                )
499                .with_description(
500                    "The fundamental accounting equation (Assets = Liabilities + Equity) is violated. \
501                     This is a critical data integrity issue.",
502                )
503                .with_root_cause(
504                    RootCause::new("Unbalanced journal entries generated")
505                        .with_explanation(
506                            "Every journal entry must have equal debits and credits. \
507                             An imbalance indicates entries were created incorrectly.",
508                        )
509                        .with_evidence(format!("Max imbalance: {}", balance.max_imbalance))
510                        .with_confidence(0.95),
511                )
512                .with_action(
513                    SuggestedAction::new("Enable balance coherence validation")
514                        .with_config_change("balance.coherence_enabled", "true")
515                        .with_effort("Low"),
516                )
517                .with_action(
518                    SuggestedAction::new("Review JE generation logic for balance enforcement")
519                        .manual_only()
520                        .with_effort("High"),
521                )
522                .with_affected_metric("balance_equation")
523                .with_expected_improvement("Zero imbalance in trial balance");
524
525                report.add(rec);
526            }
527        }
528
529        // Check intercompany matching
530        if let Some(ref ic) = coherence.intercompany {
531            if ic.match_rate < self.thresholds.ic_match_rate_min {
532                let rec = Recommendation::new(
533                    self.next_id(),
534                    RecommendationPriority::High,
535                    RecommendationCategory::Coherence,
536                    "Intercompany Matching Issues",
537                )
538                .with_description(
539                    "Intercompany transactions are not fully matched between entities. \
540                     This will cause issues during consolidation.",
541                )
542                .with_root_cause(
543                    RootCause::new("IC transaction pairs not properly linked")
544                        .with_explanation(
545                            "Intercompany transactions should always have matching entries \
546                             in both the selling and buying entities.",
547                        )
548                        .with_evidence(format!(
549                            "Match rate: {:.1}% (threshold: {:.1}%)",
550                            ic.match_rate * 100.0,
551                            self.thresholds.ic_match_rate_min * 100.0
552                        ))
553                        .with_confidence(0.85),
554                )
555                .with_action(
556                    SuggestedAction::new("Increase IC matching precision")
557                        .with_config_change("intercompany.match_precision", "0.99")
558                        .with_effort("Low"),
559                )
560                .with_affected_metric("ic_match_rate")
561                .with_expected_improvement("IC match rate > 95%");
562
563                report.add(rec);
564            }
565        }
566
567        // Check document chains
568        if let Some(ref doc_chain) = coherence.document_chain {
569            let avg_completion =
570                (doc_chain.p2p_completion_rate + doc_chain.o2c_completion_rate) / 2.0;
571            if avg_completion < self.thresholds.document_chain_completion_min {
572                let rec = Recommendation::new(
573                    self.next_id(),
574                    RecommendationPriority::Medium,
575                    RecommendationCategory::Coherence,
576                    "Incomplete Document Chains",
577                )
578                .with_description(
579                    "Many document flows (P2P, O2C) do not complete to final payment/receipt. \
580                     This reduces realism for AP/AR aging analysis.",
581                )
582                .with_root_cause(
583                    RootCause::new("Document flow completion rates set too low")
584                        .with_explanation(
585                            "Real business processes typically complete most document flows. \
586                             Very low completion rates may not be realistic.",
587                        )
588                        .with_evidence(format!(
589                            "P2P: {:.1}%, O2C: {:.1}% (threshold: {:.1}%)",
590                            doc_chain.p2p_completion_rate * 100.0,
591                            doc_chain.o2c_completion_rate * 100.0,
592                            self.thresholds.document_chain_completion_min * 100.0
593                        ))
594                        .with_confidence(0.7),
595                )
596                .with_action(
597                    SuggestedAction::new("Increase P2P completion rate")
598                        .with_config_change("document_flows.p2p.completion_rate", "0.95")
599                        .with_effort("Low"),
600                )
601                .with_action(
602                    SuggestedAction::new("Increase O2C completion rate")
603                        .with_config_change("document_flows.o2c.completion_rate", "0.95")
604                        .with_effort("Low"),
605                )
606                .with_affected_metric("doc_chain_completion")
607                .with_expected_improvement("Document chain completion > 90%");
608
609                report.add(rec);
610            }
611        }
612    }
613
614    /// Analyze quality evaluation results.
615    fn analyze_quality(
616        &mut self,
617        quality: &crate::quality::QualityEvaluation,
618        report: &mut EnhancementReport,
619    ) {
620        // Check duplicates
621        if let Some(ref uniqueness) = quality.uniqueness {
622            if uniqueness.duplicate_rate > self.thresholds.duplicate_rate_max {
623                let rec = Recommendation::new(
624                    self.next_id(),
625                    RecommendationPriority::High,
626                    RecommendationCategory::DataQuality,
627                    "High Duplicate Rate",
628                )
629                .with_description(
630                    "Excessive duplicate records detected in the generated data. \
631                     This may cause issues in downstream processing.",
632                )
633                .with_root_cause(
634                    RootCause::new("Duplicate injection rate set too high")
635                        .with_explanation(
636                            "Data quality variations can inject duplicates, but \
637                             high rates may be unrealistic for most use cases.",
638                        )
639                        .with_evidence(format!(
640                            "Duplicate rate: {:.2}% (threshold: {:.2}%)",
641                            uniqueness.duplicate_rate * 100.0,
642                            self.thresholds.duplicate_rate_max * 100.0
643                        ))
644                        .with_confidence(0.9),
645                )
646                .with_action(
647                    SuggestedAction::new("Reduce duplicate injection rate")
648                        .with_config_change("data_quality.duplicates.exact_rate", "0.005")
649                        .with_effort("Low"),
650                )
651                .with_affected_metric("duplicate_rate")
652                .with_expected_improvement("Duplicate rate < 1%");
653
654                report.add(rec);
655            }
656        }
657
658        // Check completeness
659        if let Some(ref completeness) = quality.completeness {
660            if completeness.overall_completeness < self.thresholds.completeness_rate_min {
661                let rec = Recommendation::new(
662                    self.next_id(),
663                    RecommendationPriority::Medium,
664                    RecommendationCategory::DataQuality,
665                    "Low Data Completeness",
666                )
667                .with_description(
668                    "Many fields have missing values. While some missing data is realistic, \
669                     excessive missing values may reduce data utility.",
670                )
671                .with_root_cause(
672                    RootCause::new("Missing value injection rate set too high")
673                        .with_explanation(
674                            "Data quality variations inject missing values to simulate \
675                             real-world data quality issues, but rates may be too aggressive.",
676                        )
677                        .with_evidence(format!(
678                            "Completeness: {:.1}% (threshold: {:.1}%)",
679                            completeness.overall_completeness * 100.0,
680                            self.thresholds.completeness_rate_min * 100.0
681                        ))
682                        .with_confidence(0.8),
683                )
684                .with_action(
685                    SuggestedAction::new("Reduce missing value injection rate")
686                        .with_config_change("data_quality.missing_values.overall_rate", "0.02")
687                        .with_effort("Low"),
688                )
689                .with_affected_metric("completeness_rate")
690                .with_expected_improvement("Completeness > 95%");
691
692                report.add(rec);
693            }
694        }
695    }
696
697    /// Analyze ML readiness evaluation results.
698    fn analyze_ml_readiness(
699        &mut self,
700        ml: &crate::ml::MLReadinessEvaluation,
701        report: &mut EnhancementReport,
702    ) {
703        // Check anomaly rate
704        if let Some(ref labels) = ml.labels {
705            if labels.anomaly_rate < self.thresholds.anomaly_rate_min {
706                let rec = Recommendation::new(
707                    self.next_id(),
708                    RecommendationPriority::High,
709                    RecommendationCategory::MLReadiness,
710                    "Insufficient Anomaly Rate",
711                )
712                .with_description(
713                    "Too few anomalies for effective ML training. Anomaly detection \
714                     models need sufficient positive examples.",
715                )
716                .with_root_cause(
717                    RootCause::new("Anomaly injection rate set too low")
718                        .with_explanation(
719                            "ML models for anomaly detection typically need 1-10% anomaly rate \
720                             during training to learn effective patterns.",
721                        )
722                        .with_evidence(format!(
723                            "Anomaly rate: {:.2}% (minimum: {:.2}%)",
724                            labels.anomaly_rate * 100.0,
725                            self.thresholds.anomaly_rate_min * 100.0
726                        ))
727                        .with_confidence(0.9),
728                )
729                .with_action(
730                    SuggestedAction::new("Increase anomaly injection rate")
731                        .with_config_change("anomaly_injection.base_rate", "0.05")
732                        .with_effort("Low"),
733                )
734                .with_affected_metric("anomaly_rate")
735                .with_expected_improvement("Anomaly rate 1-10% for ML training");
736
737                report.add(rec);
738            } else if labels.anomaly_rate > self.thresholds.anomaly_rate_max {
739                let rec = Recommendation::new(
740                    self.next_id(),
741                    RecommendationPriority::Medium,
742                    RecommendationCategory::MLReadiness,
743                    "Excessive Anomaly Rate",
744                )
745                .with_description(
746                    "Too many anomalies may reduce model effectiveness and make \
747                     the data unrealistic for testing.",
748                )
749                .with_root_cause(
750                    RootCause::new("Anomaly injection rate set too high")
751                        .with_explanation(
752                            "While anomalies are needed for ML training, rates above 20% \
753                             are typically unrealistic and may confuse models.",
754                        )
755                        .with_evidence(format!(
756                            "Anomaly rate: {:.1}% (maximum: {:.1}%)",
757                            labels.anomaly_rate * 100.0,
758                            self.thresholds.anomaly_rate_max * 100.0
759                        ))
760                        .with_confidence(0.75),
761                )
762                .with_action(
763                    SuggestedAction::new("Reduce anomaly injection rate")
764                        .with_config_change("anomaly_injection.base_rate", "0.05")
765                        .with_effort("Low"),
766                )
767                .with_affected_metric("anomaly_rate")
768                .with_expected_improvement("Anomaly rate within 1-20% range");
769
770                report.add(rec);
771            }
772
773            // Check label coverage
774            if labels.label_coverage < self.thresholds.label_coverage_min {
775                let rec = Recommendation::new(
776                    self.next_id(),
777                    RecommendationPriority::High,
778                    RecommendationCategory::MLReadiness,
779                    "Incomplete Label Coverage",
780                )
781                .with_description(
782                    "Not all records have proper labels. Supervised ML requires \
783                     complete labels for training.",
784                )
785                .with_root_cause(
786                    RootCause::new("Label generation not capturing all anomalies")
787                        .with_explanation(
788                            "Every injected anomaly should have a corresponding label. \
789                             Missing labels indicate a labeling pipeline issue.",
790                        )
791                        .with_evidence(format!(
792                            "Label coverage: {:.1}% (threshold: {:.1}%)",
793                            labels.label_coverage * 100.0,
794                            self.thresholds.label_coverage_min * 100.0
795                        ))
796                        .with_confidence(0.85),
797                )
798                .with_action(
799                    SuggestedAction::new("Enable complete label generation")
800                        .with_config_change("anomaly_injection.label_all", "true")
801                        .with_effort("Low"),
802                )
803                .with_affected_metric("label_coverage")
804                .with_expected_improvement("Label coverage > 99%");
805
806                report.add(rec);
807            }
808        }
809
810        // Check graph connectivity
811        if let Some(ref graph) = ml.graph {
812            if graph.connectivity_score < self.thresholds.graph_connectivity_min {
813                let rec = Recommendation::new(
814                    self.next_id(),
815                    RecommendationPriority::Medium,
816                    RecommendationCategory::MLReadiness,
817                    "Low Graph Connectivity",
818                )
819                .with_description(
820                    "The transaction graph has isolated components, which may \
821                     reduce GNN model effectiveness.",
822                )
823                .with_root_cause(
824                    RootCause::new("Insufficient entity relationships in generated data")
825                        .with_explanation(
826                            "Graph neural networks benefit from well-connected graphs. \
827                             Isolated components receive no message passing.",
828                        )
829                        .with_evidence(format!(
830                            "Connectivity: {:.1}% (threshold: {:.1}%)",
831                            graph.connectivity_score * 100.0,
832                            self.thresholds.graph_connectivity_min * 100.0
833                        ))
834                        .with_confidence(0.7),
835                )
836                .with_action(
837                    SuggestedAction::new("Enable graph connectivity enforcement")
838                        .with_config_change("graph_export.ensure_connected", "true")
839                        .with_effort("Medium"),
840                )
841                .with_affected_metric("graph_connectivity")
842                .with_expected_improvement("Graph connectivity > 95%");
843
844                report.add(rec);
845            }
846        }
847    }
848}
849
850impl Default for RecommendationEngine {
851    fn default() -> Self {
852        Self::new()
853    }
854}
855
856#[cfg(test)]
857mod tests {
858    use super::*;
859
860    #[test]
861    fn test_recommendation_builder() {
862        let rec = Recommendation::new(
863            "REC-001",
864            RecommendationPriority::High,
865            RecommendationCategory::Statistical,
866            "Test Issue",
867        )
868        .with_description("Test description")
869        .with_root_cause(RootCause::new("Test cause").with_confidence(0.8))
870        .with_action(SuggestedAction::new("Fix it").with_config_change("test.path", "value"));
871
872        assert_eq!(rec.id, "REC-001");
873        assert_eq!(rec.priority, RecommendationPriority::High);
874        assert_eq!(rec.root_causes.len(), 1);
875        assert_eq!(rec.actions.len(), 1);
876    }
877
878    #[test]
879    fn test_enhancement_report() {
880        let mut report = EnhancementReport::new();
881
882        report.add(Recommendation::new(
883            "REC-001",
884            RecommendationPriority::Critical,
885            RecommendationCategory::Coherence,
886            "Critical Issue",
887        ));
888
889        report.add(Recommendation::new(
890            "REC-002",
891            RecommendationPriority::Low,
892            RecommendationCategory::DataQuality,
893            "Minor Issue",
894        ));
895
896        report.finalize();
897
898        assert!(report.has_critical_issues());
899        assert_eq!(report.recommendations.len(), 2);
900        assert!(report.health_score < 1.0);
901    }
902
903    #[test]
904    fn test_recommendation_engine() {
905        let mut engine = RecommendationEngine::new();
906        let evaluation = ComprehensiveEvaluation::new();
907
908        let report = engine.generate_report(&evaluation);
909
910        // Empty evaluation should produce no recommendations
911        assert!(report.recommendations.is_empty());
912        assert_eq!(report.health_score, 1.0);
913    }
914
915    #[test]
916    fn test_root_cause_builder() {
917        let cause = RootCause::new("Test cause")
918            .with_explanation("Detailed explanation")
919            .with_evidence("Evidence 1")
920            .with_evidence("Evidence 2")
921            .with_confidence(0.9);
922
923        assert_eq!(cause.evidence.len(), 2);
924        assert_eq!(cause.confidence, 0.9);
925    }
926
927    #[test]
928    fn test_suggested_action() {
929        let action = SuggestedAction::new("Do something")
930            .with_config_change("path", "value")
931            .with_effort("Low");
932
933        assert!(action.auto_applicable);
934        assert_eq!(action.config_path, Some("path".to_string()));
935    }
936}
datasynth_eval/enhancement/recommendation_engine.rs

datasynth_eval/enhancement/
recommendation_engine.rs