datasynth_eval/enhancement/
recommendation_engine.rs

1//! Recommendation engine for providing prioritized enhancement suggestions.
2//!
3//! The recommendation engine performs root cause analysis on evaluation
4//! failures and provides actionable, prioritized recommendations.
5
6use crate::{ComprehensiveEvaluation, EvaluationThresholds};
7use serde::{Deserialize, Serialize};
8use std::collections::HashMap;
9
10/// Priority level for recommendations.
11#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
12pub enum RecommendationPriority {
13    /// Critical issues that will cause data to fail validation.
14    Critical = 0,
15    /// High priority issues affecting data quality significantly.
16    High = 1,
17    /// Medium priority improvements.
18    Medium = 2,
19    /// Low priority enhancements.
20    Low = 3,
21    /// Informational only, no action required.
22    Info = 4,
23}
24
25impl RecommendationPriority {
26    /// Get display name.
27    pub fn name(&self) -> &'static str {
28        match self {
29            RecommendationPriority::Critical => "Critical",
30            RecommendationPriority::High => "High",
31            RecommendationPriority::Medium => "Medium",
32            RecommendationPriority::Low => "Low",
33            RecommendationPriority::Info => "Info",
34        }
35    }
36}
37
38/// Category of the recommendation.
39#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
40pub enum RecommendationCategory {
41    /// Statistical distribution issues.
42    Statistical,
43    /// Data coherence issues (balance, subledger, etc.).
44    Coherence,
45    /// Data quality issues (duplicates, missing, etc.).
46    DataQuality,
47    /// ML readiness issues.
48    MLReadiness,
49    /// Performance issues.
50    Performance,
51    /// Configuration issues.
52    Configuration,
53}
54
55impl RecommendationCategory {
56    /// Get display name.
57    pub fn name(&self) -> &'static str {
58        match self {
59            RecommendationCategory::Statistical => "Statistical Quality",
60            RecommendationCategory::Coherence => "Data Coherence",
61            RecommendationCategory::DataQuality => "Data Quality",
62            RecommendationCategory::MLReadiness => "ML Readiness",
63            RecommendationCategory::Performance => "Performance",
64            RecommendationCategory::Configuration => "Configuration",
65        }
66    }
67}
68
69/// Root cause identified for an issue.
70#[derive(Debug, Clone, Serialize, Deserialize)]
71pub struct RootCause {
72    /// Short description of the root cause.
73    pub description: String,
74    /// Detailed explanation.
75    pub explanation: String,
76    /// Evidence supporting this root cause.
77    pub evidence: Vec<String>,
78    /// Confidence level (0.0-1.0).
79    pub confidence: f64,
80}
81
82impl RootCause {
83    /// Create a new root cause.
84    pub fn new(description: impl Into<String>) -> Self {
85        Self {
86            description: description.into(),
87            explanation: String::new(),
88            evidence: Vec::new(),
89            confidence: 0.5,
90        }
91    }
92
93    /// Add explanation.
94    pub fn with_explanation(mut self, explanation: impl Into<String>) -> Self {
95        self.explanation = explanation.into();
96        self
97    }
98
99    /// Add evidence.
100    pub fn with_evidence(mut self, evidence: impl Into<String>) -> Self {
101        self.evidence.push(evidence.into());
102        self
103    }
104
105    /// Set confidence.
106    pub fn with_confidence(mut self, confidence: f64) -> Self {
107        self.confidence = confidence.clamp(0.0, 1.0);
108        self
109    }
110}
111
112/// A single recommendation.
113#[derive(Debug, Clone, Serialize, Deserialize)]
114pub struct Recommendation {
115    /// Unique identifier.
116    pub id: String,
117    /// Priority level.
118    pub priority: RecommendationPriority,
119    /// Category.
120    pub category: RecommendationCategory,
121    /// Short title.
122    pub title: String,
123    /// Detailed description.
124    pub description: String,
125    /// Root causes identified.
126    pub root_causes: Vec<RootCause>,
127    /// Suggested actions to take.
128    pub actions: Vec<SuggestedAction>,
129    /// Metrics affected.
130    pub affected_metrics: Vec<String>,
131    /// Expected improvement if addressed.
132    pub expected_improvement: String,
133}
134
135impl Recommendation {
136    /// Create a new recommendation.
137    pub fn new(
138        id: impl Into<String>,
139        priority: RecommendationPriority,
140        category: RecommendationCategory,
141        title: impl Into<String>,
142    ) -> Self {
143        Self {
144            id: id.into(),
145            priority,
146            category,
147            title: title.into(),
148            description: String::new(),
149            root_causes: Vec::new(),
150            actions: Vec::new(),
151            affected_metrics: Vec::new(),
152            expected_improvement: String::new(),
153        }
154    }
155
156    /// Add description.
157    pub fn with_description(mut self, description: impl Into<String>) -> Self {
158        self.description = description.into();
159        self
160    }
161
162    /// Add root cause.
163    pub fn with_root_cause(mut self, root_cause: RootCause) -> Self {
164        self.root_causes.push(root_cause);
165        self
166    }
167
168    /// Add action.
169    pub fn with_action(mut self, action: SuggestedAction) -> Self {
170        self.actions.push(action);
171        self
172    }
173
174    /// Add affected metric.
175    pub fn with_affected_metric(mut self, metric: impl Into<String>) -> Self {
176        self.affected_metrics.push(metric.into());
177        self
178    }
179
180    /// Set expected improvement.
181    pub fn with_expected_improvement(mut self, improvement: impl Into<String>) -> Self {
182        self.expected_improvement = improvement.into();
183        self
184    }
185}
186
187/// A suggested action to address an issue.
188#[derive(Debug, Clone, Serialize, Deserialize)]
189pub struct SuggestedAction {
190    /// Description of the action.
191    pub description: String,
192    /// Configuration path if applicable.
193    pub config_path: Option<String>,
194    /// Suggested value if applicable.
195    pub suggested_value: Option<String>,
196    /// Whether this can be automatically applied.
197    pub auto_applicable: bool,
198    /// Estimated effort (Low, Medium, High).
199    pub effort: String,
200}
201
202impl SuggestedAction {
203    /// Create a new action.
204    pub fn new(description: impl Into<String>) -> Self {
205        Self {
206            description: description.into(),
207            config_path: None,
208            suggested_value: None,
209            auto_applicable: false,
210            effort: "Medium".to_string(),
211        }
212    }
213
214    /// Set config change.
215    pub fn with_config_change(mut self, path: impl Into<String>, value: impl Into<String>) -> Self {
216        self.config_path = Some(path.into());
217        self.suggested_value = Some(value.into());
218        self.auto_applicable = true;
219        self
220    }
221
222    /// Set effort level.
223    pub fn with_effort(mut self, effort: impl Into<String>) -> Self {
224        self.effort = effort.into();
225        self
226    }
227
228    /// Mark as not auto-applicable.
229    pub fn manual_only(mut self) -> Self {
230        self.auto_applicable = false;
231        self
232    }
233}
234
235/// Enhancement report containing all recommendations.
236#[derive(Debug, Clone, Serialize, Deserialize)]
237pub struct EnhancementReport {
238    /// All recommendations.
239    pub recommendations: Vec<Recommendation>,
240    /// Summary by category.
241    pub category_summary: HashMap<String, usize>,
242    /// Summary by priority.
243    pub priority_summary: HashMap<String, usize>,
244    /// Overall health score (0.0-1.0).
245    pub health_score: f64,
246    /// Top issues to address.
247    pub top_issues: Vec<String>,
248    /// Quick wins (easy to fix with high impact).
249    pub quick_wins: Vec<String>,
250}
251
252impl EnhancementReport {
253    /// Create a new empty report.
254    pub fn new() -> Self {
255        Self {
256            recommendations: Vec::new(),
257            category_summary: HashMap::new(),
258            priority_summary: HashMap::new(),
259            health_score: 1.0,
260            top_issues: Vec::new(),
261            quick_wins: Vec::new(),
262        }
263    }
264
265    /// Add a recommendation.
266    pub fn add(&mut self, recommendation: Recommendation) {
267        // Update summaries
268        *self
269            .category_summary
270            .entry(recommendation.category.name().to_string())
271            .or_insert(0) += 1;
272        *self
273            .priority_summary
274            .entry(recommendation.priority.name().to_string())
275            .or_insert(0) += 1;
276
277        self.recommendations.push(recommendation);
278    }
279
280    /// Finalize the report (calculate scores, sort, etc.).
281    pub fn finalize(&mut self) {
282        // Sort recommendations by priority
283        self.recommendations
284            .sort_by(|a, b| a.priority.cmp(&b.priority));
285
286        // Calculate health score
287        let critical_count = *self.priority_summary.get("Critical").unwrap_or(&0);
288        let high_count = *self.priority_summary.get("High").unwrap_or(&0);
289        let medium_count = *self.priority_summary.get("Medium").unwrap_or(&0);
290
291        let penalty =
292            critical_count as f64 * 0.3 + high_count as f64 * 0.1 + medium_count as f64 * 0.02;
293        self.health_score = (1.0 - penalty).max(0.0);
294
295        // Identify top issues (critical and high priority)
296        self.top_issues = self
297            .recommendations
298            .iter()
299            .filter(|r| {
300                r.priority == RecommendationPriority::Critical
301                    || r.priority == RecommendationPriority::High
302            })
303            .take(5)
304            .map(|r| r.title.clone())
305            .collect();
306
307        // Identify quick wins (auto-applicable actions)
308        self.quick_wins = self
309            .recommendations
310            .iter()
311            .filter(|r| r.actions.iter().any(|a| a.auto_applicable))
312            .take(5)
313            .map(|r| r.title.clone())
314            .collect();
315    }
316
317    /// Get recommendations by category.
318    pub fn by_category(&self, category: RecommendationCategory) -> Vec<&Recommendation> {
319        self.recommendations
320            .iter()
321            .filter(|r| r.category == category)
322            .collect()
323    }
324
325    /// Get recommendations by priority.
326    pub fn by_priority(&self, priority: RecommendationPriority) -> Vec<&Recommendation> {
327        self.recommendations
328            .iter()
329            .filter(|r| r.priority == priority)
330            .collect()
331    }
332
333    /// Check if there are critical issues.
334    pub fn has_critical_issues(&self) -> bool {
335        *self.priority_summary.get("Critical").unwrap_or(&0) > 0
336    }
337}
338
339impl Default for EnhancementReport {
340    fn default() -> Self {
341        Self::new()
342    }
343}
344
345/// Engine for generating recommendations from evaluation results.
346pub struct RecommendationEngine {
347    /// Thresholds for comparison.
348    thresholds: EvaluationThresholds,
349    /// Counter for generating unique IDs.
350    id_counter: u32,
351}
352
353impl RecommendationEngine {
354    /// Create a new recommendation engine.
355    pub fn new() -> Self {
356        Self::with_thresholds(EvaluationThresholds::default())
357    }
358
359    /// Create with specific thresholds.
360    pub fn with_thresholds(thresholds: EvaluationThresholds) -> Self {
361        Self {
362            thresholds,
363            id_counter: 0,
364        }
365    }
366
367    /// Generate an enhancement report from evaluation results.
368    pub fn generate_report(&mut self, evaluation: &ComprehensiveEvaluation) -> EnhancementReport {
369        let mut report = EnhancementReport::new();
370
371        // Analyze statistical issues
372        self.analyze_statistical(&evaluation.statistical, &mut report);
373
374        // Analyze coherence issues
375        self.analyze_coherence(&evaluation.coherence, &mut report);
376
377        // Analyze quality issues
378        self.analyze_quality(&evaluation.quality, &mut report);
379
380        // Analyze ML readiness issues
381        self.analyze_ml_readiness(&evaluation.ml_readiness, &mut report);
382
383        // Analyze banking issues
384        if let Some(ref banking) = evaluation.banking {
385            self.analyze_banking(banking, &mut report);
386        }
387
388        // Analyze process mining issues
389        if let Some(ref pm) = evaluation.process_mining {
390            self.analyze_process_mining(pm, &mut report);
391        }
392
393        // Finalize the report
394        report.finalize();
395
396        report
397    }
398
399    /// Generate a unique ID.
400    fn next_id(&mut self) -> String {
401        self.id_counter += 1;
402        format!("REC-{:04}", self.id_counter)
403    }
404
405    /// Analyze statistical evaluation results.
406    fn analyze_statistical(
407        &mut self,
408        stat: &crate::statistical::StatisticalEvaluation,
409        report: &mut EnhancementReport,
410    ) {
411        // Check Benford's Law
412        if let Some(ref benford) = stat.benford {
413            if benford.p_value < self.thresholds.benford_p_value_min {
414                let severity = if benford.p_value < 0.01 {
415                    RecommendationPriority::High
416                } else {
417                    RecommendationPriority::Medium
418                };
419
420                let rec = Recommendation::new(
421                    self.next_id(),
422                    severity,
423                    RecommendationCategory::Statistical,
424                    "Benford's Law Non-Conformance",
425                )
426                .with_description(
427                    "Generated transaction amounts do not follow Benford's Law, \
428                     which may indicate unrealistic data patterns.",
429                )
430                .with_root_cause(
431                    RootCause::new("Amount generation not using Benford-compliant distribution")
432                        .with_explanation(
433                            "Real financial data naturally follows Benford's Law for first digits. \
434                             Random or uniform distributions will fail this test.",
435                        )
436                        .with_evidence(format!("p-value: {:.4} (threshold: {:.4})", benford.p_value, self.thresholds.benford_p_value_min))
437                        .with_confidence(0.9),
438                )
439                .with_action(
440                    SuggestedAction::new("Enable Benford's Law compliance in amount generation")
441                        .with_config_change("transactions.amount.benford_compliance", "true")
442                        .with_effort("Low"),
443                )
444                .with_affected_metric("benford_p_value")
445                .with_expected_improvement("Statistical p-value should increase to > 0.05");
446
447                report.add(rec);
448            }
449        }
450
451        // Check temporal patterns
452        if let Some(ref temporal) = stat.temporal {
453            if temporal.pattern_correlation < self.thresholds.temporal_correlation_min {
454                let rec = Recommendation::new(
455                    self.next_id(),
456                    RecommendationPriority::Medium,
457                    RecommendationCategory::Statistical,
458                    "Weak Temporal Patterns",
459                )
460                .with_description(
461                    "Generated data lacks realistic temporal patterns such as \
462                     seasonality, month-end spikes, and weekday variations.",
463                )
464                .with_root_cause(
465                    RootCause::new("Insufficient temporal variation in generation")
466                        .with_explanation(
467                            "Real financial data shows strong temporal patterns including \
468                             month-end closing activity, seasonal variations, and weekday effects.",
469                        )
470                        .with_evidence(format!(
471                            "Correlation: {:.3} (threshold: {:.3})",
472                            temporal.pattern_correlation, self.thresholds.temporal_correlation_min
473                        ))
474                        .with_confidence(0.75),
475                )
476                .with_action(
477                    SuggestedAction::new("Increase seasonality strength")
478                        .with_config_change("transactions.temporal.seasonality_strength", "0.8")
479                        .with_effort("Low"),
480                )
481                .with_action(
482                    SuggestedAction::new("Enable month-end spike patterns")
483                        .with_config_change("transactions.temporal.month_end_spike", "true")
484                        .with_effort("Low"),
485                )
486                .with_affected_metric("temporal_correlation")
487                .with_expected_improvement("Better temporal pattern correlation (> 0.8)");
488
489                report.add(rec);
490            }
491        }
492    }
493
494    /// Analyze coherence evaluation results.
495    fn analyze_coherence(
496        &mut self,
497        coherence: &crate::coherence::CoherenceEvaluation,
498        report: &mut EnhancementReport,
499    ) {
500        // Check balance sheet
501        if let Some(ref balance) = coherence.balance {
502            if !balance.equation_balanced {
503                let rec = Recommendation::new(
504                    self.next_id(),
505                    RecommendationPriority::Critical,
506                    RecommendationCategory::Coherence,
507                    "Balance Sheet Imbalance",
508                )
509                .with_description(
510                    "The fundamental accounting equation (Assets = Liabilities + Equity) is violated. \
511                     This is a critical data integrity issue.",
512                )
513                .with_root_cause(
514                    RootCause::new("Unbalanced journal entries generated")
515                        .with_explanation(
516                            "Every journal entry must have equal debits and credits. \
517                             An imbalance indicates entries were created incorrectly.",
518                        )
519                        .with_evidence(format!("Max imbalance: {}", balance.max_imbalance))
520                        .with_confidence(0.95),
521                )
522                .with_action(
523                    SuggestedAction::new("Enable balance coherence validation")
524                        .with_config_change("balance.coherence_enabled", "true")
525                        .with_effort("Low"),
526                )
527                .with_action(
528                    SuggestedAction::new("Review JE generation logic for balance enforcement")
529                        .manual_only()
530                        .with_effort("High"),
531                )
532                .with_affected_metric("balance_equation")
533                .with_expected_improvement("Zero imbalance in trial balance");
534
535                report.add(rec);
536            }
537        }
538
539        // Check intercompany matching
540        if let Some(ref ic) = coherence.intercompany {
541            if ic.match_rate < self.thresholds.ic_match_rate_min {
542                let rec = Recommendation::new(
543                    self.next_id(),
544                    RecommendationPriority::High,
545                    RecommendationCategory::Coherence,
546                    "Intercompany Matching Issues",
547                )
548                .with_description(
549                    "Intercompany transactions are not fully matched between entities. \
550                     This will cause issues during consolidation.",
551                )
552                .with_root_cause(
553                    RootCause::new("IC transaction pairs not properly linked")
554                        .with_explanation(
555                            "Intercompany transactions should always have matching entries \
556                             in both the selling and buying entities.",
557                        )
558                        .with_evidence(format!(
559                            "Match rate: {:.1}% (threshold: {:.1}%)",
560                            ic.match_rate * 100.0,
561                            self.thresholds.ic_match_rate_min * 100.0
562                        ))
563                        .with_confidence(0.85),
564                )
565                .with_action(
566                    SuggestedAction::new("Increase IC matching precision")
567                        .with_config_change("intercompany.match_precision", "0.99")
568                        .with_effort("Low"),
569                )
570                .with_affected_metric("ic_match_rate")
571                .with_expected_improvement("IC match rate > 95%");
572
573                report.add(rec);
574            }
575        }
576
577        // Check enterprise process chain coherence
578        self.analyze_enterprise_coherence(coherence, report);
579
580        // Check document chains
581        if let Some(ref doc_chain) = coherence.document_chain {
582            let avg_completion =
583                (doc_chain.p2p_completion_rate + doc_chain.o2c_completion_rate) / 2.0;
584            if avg_completion < self.thresholds.document_chain_completion_min {
585                let rec = Recommendation::new(
586                    self.next_id(),
587                    RecommendationPriority::Medium,
588                    RecommendationCategory::Coherence,
589                    "Incomplete Document Chains",
590                )
591                .with_description(
592                    "Many document flows (P2P, O2C) do not complete to final payment/receipt. \
593                     This reduces realism for AP/AR aging analysis.",
594                )
595                .with_root_cause(
596                    RootCause::new("Document flow completion rates set too low")
597                        .with_explanation(
598                            "Real business processes typically complete most document flows. \
599                             Very low completion rates may not be realistic.",
600                        )
601                        .with_evidence(format!(
602                            "P2P: {:.1}%, O2C: {:.1}% (threshold: {:.1}%)",
603                            doc_chain.p2p_completion_rate * 100.0,
604                            doc_chain.o2c_completion_rate * 100.0,
605                            self.thresholds.document_chain_completion_min * 100.0
606                        ))
607                        .with_confidence(0.7),
608                )
609                .with_action(
610                    SuggestedAction::new("Increase P2P completion rate")
611                        .with_config_change("document_flows.p2p.completion_rate", "0.95")
612                        .with_effort("Low"),
613                )
614                .with_action(
615                    SuggestedAction::new("Increase O2C completion rate")
616                        .with_config_change("document_flows.o2c.completion_rate", "0.95")
617                        .with_effort("Low"),
618                )
619                .with_affected_metric("doc_chain_completion")
620                .with_expected_improvement("Document chain completion > 90%");
621
622                report.add(rec);
623            }
624        }
625    }
626
627    /// Analyze quality evaluation results.
628    fn analyze_quality(
629        &mut self,
630        quality: &crate::quality::QualityEvaluation,
631        report: &mut EnhancementReport,
632    ) {
633        // Check duplicates
634        if let Some(ref uniqueness) = quality.uniqueness {
635            if uniqueness.duplicate_rate > self.thresholds.duplicate_rate_max {
636                let rec = Recommendation::new(
637                    self.next_id(),
638                    RecommendationPriority::High,
639                    RecommendationCategory::DataQuality,
640                    "High Duplicate Rate",
641                )
642                .with_description(
643                    "Excessive duplicate records detected in the generated data. \
644                     This may cause issues in downstream processing.",
645                )
646                .with_root_cause(
647                    RootCause::new("Duplicate injection rate set too high")
648                        .with_explanation(
649                            "Data quality variations can inject duplicates, but \
650                             high rates may be unrealistic for most use cases.",
651                        )
652                        .with_evidence(format!(
653                            "Duplicate rate: {:.2}% (threshold: {:.2}%)",
654                            uniqueness.duplicate_rate * 100.0,
655                            self.thresholds.duplicate_rate_max * 100.0
656                        ))
657                        .with_confidence(0.9),
658                )
659                .with_action(
660                    SuggestedAction::new("Reduce duplicate injection rate")
661                        .with_config_change("data_quality.duplicates.exact_rate", "0.005")
662                        .with_effort("Low"),
663                )
664                .with_affected_metric("duplicate_rate")
665                .with_expected_improvement("Duplicate rate < 1%");
666
667                report.add(rec);
668            }
669        }
670
671        // Check completeness
672        if let Some(ref completeness) = quality.completeness {
673            if completeness.overall_completeness < self.thresholds.completeness_rate_min {
674                let rec = Recommendation::new(
675                    self.next_id(),
676                    RecommendationPriority::Medium,
677                    RecommendationCategory::DataQuality,
678                    "Low Data Completeness",
679                )
680                .with_description(
681                    "Many fields have missing values. While some missing data is realistic, \
682                     excessive missing values may reduce data utility.",
683                )
684                .with_root_cause(
685                    RootCause::new("Missing value injection rate set too high")
686                        .with_explanation(
687                            "Data quality variations inject missing values to simulate \
688                             real-world data quality issues, but rates may be too aggressive.",
689                        )
690                        .with_evidence(format!(
691                            "Completeness: {:.1}% (threshold: {:.1}%)",
692                            completeness.overall_completeness * 100.0,
693                            self.thresholds.completeness_rate_min * 100.0
694                        ))
695                        .with_confidence(0.8),
696                )
697                .with_action(
698                    SuggestedAction::new("Reduce missing value injection rate")
699                        .with_config_change("data_quality.missing_values.overall_rate", "0.02")
700                        .with_effort("Low"),
701                )
702                .with_affected_metric("completeness_rate")
703                .with_expected_improvement("Completeness > 95%");
704
705                report.add(rec);
706            }
707        }
708    }
709
710    /// Analyze new coherence evaluators (enterprise process chains).
711    fn analyze_enterprise_coherence(
712        &mut self,
713        coherence: &crate::coherence::CoherenceEvaluation,
714        report: &mut EnhancementReport,
715    ) {
716        // HR/Payroll accuracy
717        if let Some(ref hr) = coherence.hr_payroll {
718            if !hr.passes {
719                let rec = Recommendation::new(
720                    self.next_id(),
721                    RecommendationPriority::High,
722                    RecommendationCategory::Coherence,
723                    "Payroll Calculation Errors",
724                )
725                .with_description(
726                    "Payroll calculations (gross-to-net, component sums) contain arithmetic errors.",
727                )
728                .with_root_cause(
729                    RootCause::new("Payroll arithmetic not enforced during generation")
730                        .with_explanation(
731                            "Real payroll systems enforce exact arithmetic: net = gross - deductions. \
732                             Generated data should maintain these invariants.",
733                        )
734                        .with_confidence(0.9),
735                )
736                .with_action(
737                    SuggestedAction::new("Ensure payroll calculation precision")
738                        .with_config_change("hr.payroll.calculation_precision", "exact")
739                        .with_effort("Low"),
740                )
741                .with_affected_metric("payroll_accuracy")
742                .with_expected_improvement("Payroll arithmetic accuracy > 99.9%");
743
744                report.add(rec);
745            }
746        }
747
748        // Manufacturing yield
749        if let Some(ref mfg) = coherence.manufacturing {
750            if !mfg.passes {
751                let rec = Recommendation::new(
752                    self.next_id(),
753                    RecommendationPriority::Medium,
754                    RecommendationCategory::Coherence,
755                    "Manufacturing Data Inconsistencies",
756                )
757                .with_description(
758                    "Manufacturing data shows inconsistencies in yield rates, \
759                     operation sequencing, or quality inspection calculations.",
760                )
761                .with_root_cause(
762                    RootCause::new("Manufacturing constraints not fully enforced")
763                        .with_explanation(
764                            "Production orders should have consistent yield calculations, \
765                             monotonically ordered operations, and valid quality metrics.",
766                        )
767                        .with_confidence(0.8),
768                )
769                .with_action(
770                    SuggestedAction::new("Enable manufacturing constraint validation")
771                        .with_config_change("manufacturing.validate_constraints", "true")
772                        .with_effort("Medium"),
773                )
774                .with_affected_metric("manufacturing_yield")
775                .with_expected_improvement("Yield consistency > 95%");
776
777                report.add(rec);
778            }
779        }
780
781        // Financial reporting tie-back
782        if let Some(ref fr) = coherence.financial_reporting {
783            if !fr.passes {
784                let rec = Recommendation::new(
785                    self.next_id(),
786                    RecommendationPriority::Critical,
787                    RecommendationCategory::Coherence,
788                    "Financial Statement Tie-Back Failures",
789                )
790                .with_description(
791                    "Financial statements do not reconcile to the trial balance. \
792                     This is a critical audit concern.",
793                )
794                .with_root_cause(
795                    RootCause::new("Statement generation not derived from GL data")
796                        .with_explanation(
797                            "Financial statements must tie back to trial balance totals. \
798                             Independent generation of statements and GL will cause discrepancies.",
799                        )
800                        .with_confidence(0.95),
801                )
802                .with_action(
803                    SuggestedAction::new("Enable statement-to-TB tie-back enforcement")
804                        .with_config_change("financial_reporting.tie_back_enforced", "true")
805                        .with_effort("Medium"),
806                )
807                .with_affected_metric("financial_reporting_tie_back")
808                .with_expected_improvement("Statement-TB tie-back rate > 99%");
809
810                report.add(rec);
811            }
812        }
813
814        // Sourcing chain
815        if let Some(ref sourcing) = coherence.sourcing {
816            if !sourcing.passes {
817                let rec = Recommendation::new(
818                    self.next_id(),
819                    RecommendationPriority::Medium,
820                    RecommendationCategory::Coherence,
821                    "Incomplete S2C Process Chain",
822                )
823                .with_description(
824                    "Source-to-Contract chain has incomplete flows: \
825                     projects missing RFx events, evaluations, or contracts.",
826                )
827                .with_root_cause(
828                    RootCause::new("S2C completion rates configured too low").with_confidence(0.7),
829                )
830                .with_action(
831                    SuggestedAction::new("Increase S2C completion rates")
832                        .with_config_change("source_to_pay.rfx_completion_rate", "0.95")
833                        .with_effort("Low"),
834                )
835                .with_affected_metric("s2c_chain_completion")
836                .with_expected_improvement("RFx completion rate > 90%");
837
838                report.add(rec);
839            }
840        }
841    }
842
843    /// Analyze ML readiness evaluation results.
844    fn analyze_ml_readiness(
845        &mut self,
846        ml: &crate::ml::MLReadinessEvaluation,
847        report: &mut EnhancementReport,
848    ) {
849        // Check anomaly rate
850        if let Some(ref labels) = ml.labels {
851            if labels.anomaly_rate < self.thresholds.anomaly_rate_min {
852                let rec = Recommendation::new(
853                    self.next_id(),
854                    RecommendationPriority::High,
855                    RecommendationCategory::MLReadiness,
856                    "Insufficient Anomaly Rate",
857                )
858                .with_description(
859                    "Too few anomalies for effective ML training. Anomaly detection \
860                     models need sufficient positive examples.",
861                )
862                .with_root_cause(
863                    RootCause::new("Anomaly injection rate set too low")
864                        .with_explanation(
865                            "ML models for anomaly detection typically need 1-10% anomaly rate \
866                             during training to learn effective patterns.",
867                        )
868                        .with_evidence(format!(
869                            "Anomaly rate: {:.2}% (minimum: {:.2}%)",
870                            labels.anomaly_rate * 100.0,
871                            self.thresholds.anomaly_rate_min * 100.0
872                        ))
873                        .with_confidence(0.9),
874                )
875                .with_action(
876                    SuggestedAction::new("Increase anomaly injection rate")
877                        .with_config_change("anomaly_injection.base_rate", "0.05")
878                        .with_effort("Low"),
879                )
880                .with_affected_metric("anomaly_rate")
881                .with_expected_improvement("Anomaly rate 1-10% for ML training");
882
883                report.add(rec);
884            } else if labels.anomaly_rate > self.thresholds.anomaly_rate_max {
885                let rec = Recommendation::new(
886                    self.next_id(),
887                    RecommendationPriority::Medium,
888                    RecommendationCategory::MLReadiness,
889                    "Excessive Anomaly Rate",
890                )
891                .with_description(
892                    "Too many anomalies may reduce model effectiveness and make \
893                     the data unrealistic for testing.",
894                )
895                .with_root_cause(
896                    RootCause::new("Anomaly injection rate set too high")
897                        .with_explanation(
898                            "While anomalies are needed for ML training, rates above 20% \
899                             are typically unrealistic and may confuse models.",
900                        )
901                        .with_evidence(format!(
902                            "Anomaly rate: {:.1}% (maximum: {:.1}%)",
903                            labels.anomaly_rate * 100.0,
904                            self.thresholds.anomaly_rate_max * 100.0
905                        ))
906                        .with_confidence(0.75),
907                )
908                .with_action(
909                    SuggestedAction::new("Reduce anomaly injection rate")
910                        .with_config_change("anomaly_injection.base_rate", "0.05")
911                        .with_effort("Low"),
912                )
913                .with_affected_metric("anomaly_rate")
914                .with_expected_improvement("Anomaly rate within 1-20% range");
915
916                report.add(rec);
917            }
918
919            // Check label coverage
920            if labels.label_coverage < self.thresholds.label_coverage_min {
921                let rec = Recommendation::new(
922                    self.next_id(),
923                    RecommendationPriority::High,
924                    RecommendationCategory::MLReadiness,
925                    "Incomplete Label Coverage",
926                )
927                .with_description(
928                    "Not all records have proper labels. Supervised ML requires \
929                     complete labels for training.",
930                )
931                .with_root_cause(
932                    RootCause::new("Label generation not capturing all anomalies")
933                        .with_explanation(
934                            "Every injected anomaly should have a corresponding label. \
935                             Missing labels indicate a labeling pipeline issue.",
936                        )
937                        .with_evidence(format!(
938                            "Label coverage: {:.1}% (threshold: {:.1}%)",
939                            labels.label_coverage * 100.0,
940                            self.thresholds.label_coverage_min * 100.0
941                        ))
942                        .with_confidence(0.85),
943                )
944                .with_action(
945                    SuggestedAction::new("Enable complete label generation")
946                        .with_config_change("anomaly_injection.label_all", "true")
947                        .with_effort("Low"),
948                )
949                .with_affected_metric("label_coverage")
950                .with_expected_improvement("Label coverage > 99%");
951
952                report.add(rec);
953            }
954        }
955
956        // Check ML enrichment evaluators
957        self.analyze_ml_enrichment(ml, report);
958
959        // Check graph connectivity
960        if let Some(ref graph) = ml.graph {
961            if graph.connectivity_score < self.thresholds.graph_connectivity_min {
962                let rec = Recommendation::new(
963                    self.next_id(),
964                    RecommendationPriority::Medium,
965                    RecommendationCategory::MLReadiness,
966                    "Low Graph Connectivity",
967                )
968                .with_description(
969                    "The transaction graph has isolated components, which may \
970                     reduce GNN model effectiveness.",
971                )
972                .with_root_cause(
973                    RootCause::new("Insufficient entity relationships in generated data")
974                        .with_explanation(
975                            "Graph neural networks benefit from well-connected graphs. \
976                             Isolated components receive no message passing.",
977                        )
978                        .with_evidence(format!(
979                            "Connectivity: {:.1}% (threshold: {:.1}%)",
980                            graph.connectivity_score * 100.0,
981                            self.thresholds.graph_connectivity_min * 100.0
982                        ))
983                        .with_confidence(0.7),
984                )
985                .with_action(
986                    SuggestedAction::new("Enable graph connectivity enforcement")
987                        .with_config_change("graph_export.ensure_connected", "true")
988                        .with_effort("Medium"),
989                )
990                .with_affected_metric("graph_connectivity")
991                .with_expected_improvement("Graph connectivity > 95%");
992
993                report.add(rec);
994            }
995        }
996    }
997
998    /// Analyze banking evaluation results.
999    fn analyze_banking(
1000        &mut self,
1001        banking: &crate::banking::BankingEvaluation,
1002        report: &mut EnhancementReport,
1003    ) {
1004        if let Some(ref kyc) = banking.kyc {
1005            if !kyc.passes {
1006                let rec = Recommendation::new(
1007                    self.next_id(),
1008                    RecommendationPriority::High,
1009                    RecommendationCategory::Coherence,
1010                    "Incomplete KYC Profiles",
1011                )
1012                .with_description(
1013                    "KYC profiles are missing required fields or beneficial owner data.",
1014                )
1015                .with_root_cause(
1016                    RootCause::new("KYC generation not populating all required fields")
1017                        .with_confidence(0.85),
1018                )
1019                .with_action(
1020                    SuggestedAction::new("Enable full KYC field generation")
1021                        .with_config_change("enterprise.banking.kyc_completeness", "full")
1022                        .with_effort("Low"),
1023                )
1024                .with_affected_metric("kyc_completeness");
1025
1026                report.add(rec);
1027            }
1028        }
1029
1030        if let Some(ref aml) = banking.aml {
1031            if !aml.passes {
1032                let rec = Recommendation::new(
1033                    self.next_id(),
1034                    RecommendationPriority::Medium,
1035                    RecommendationCategory::MLReadiness,
1036                    "Low AML Typology Detectability",
1037                )
1038                .with_description(
1039                    "AML typologies are not producing statistically detectable patterns, \
1040                     reducing ML training effectiveness.",
1041                )
1042                .with_root_cause(
1043                    RootCause::new("AML typology signal too weak")
1044                        .with_explanation(
1045                            "Each AML typology (structuring, layering, etc.) should produce \
1046                             patterns detectable above background noise.",
1047                        )
1048                        .with_confidence(0.75),
1049                )
1050                .with_action(
1051                    SuggestedAction::new("Increase AML typology intensity")
1052                        .with_config_change("enterprise.banking.aml_intensity", "medium")
1053                        .with_effort("Low"),
1054                )
1055                .with_affected_metric("aml_detectability");
1056
1057                report.add(rec);
1058            }
1059        }
1060    }
1061
1062    /// Analyze process mining evaluation results.
1063    fn analyze_process_mining(
1064        &mut self,
1065        pm: &crate::process_mining::ProcessMiningEvaluation,
1066        report: &mut EnhancementReport,
1067    ) {
1068        if let Some(ref es) = pm.event_sequence {
1069            if !es.passes {
1070                let rec = Recommendation::new(
1071                    self.next_id(),
1072                    RecommendationPriority::High,
1073                    RecommendationCategory::Coherence,
1074                    "Invalid Event Sequences",
1075                )
1076                .with_description(
1077                    "OCEL 2.0 event logs contain timestamp ordering violations or \
1078                     incomplete object lifecycles.",
1079                )
1080                .with_root_cause(
1081                    RootCause::new("Event generation not enforcing temporal ordering")
1082                        .with_confidence(0.9),
1083                )
1084                .with_action(
1085                    SuggestedAction::new("Enable strict event timestamp ordering")
1086                        .with_config_change("business_processes.ocel_strict_ordering", "true")
1087                        .with_effort("Low"),
1088                )
1089                .with_affected_metric("process_mining_coverage");
1090
1091                report.add(rec);
1092            }
1093        }
1094
1095        if let Some(ref va) = pm.variants {
1096            if !va.passes {
1097                let rec = Recommendation::new(
1098                    self.next_id(),
1099                    RecommendationPriority::Medium,
1100                    RecommendationCategory::MLReadiness,
1101                    "Low Process Variant Diversity",
1102                )
1103                .with_description(
1104                    "Process variants lack diversity - too many cases follow the happy path.",
1105                )
1106                .with_root_cause(
1107                    RootCause::new("Insufficient exception path generation").with_confidence(0.7),
1108                )
1109                .with_action(
1110                    SuggestedAction::new("Increase exception path probability")
1111                        .with_config_change("business_processes.exception_rate", "0.15")
1112                        .with_effort("Low"),
1113                )
1114                .with_affected_metric("variant_diversity");
1115
1116                report.add(rec);
1117            }
1118        }
1119    }
1120
1121    /// Analyze new ML enrichment evaluators.
1122    fn analyze_ml_enrichment(
1123        &mut self,
1124        ml: &crate::ml::MLReadinessEvaluation,
1125        report: &mut EnhancementReport,
1126    ) {
1127        if let Some(ref as_eval) = ml.anomaly_scoring {
1128            if !as_eval.passes {
1129                let rec = Recommendation::new(
1130                    self.next_id(),
1131                    RecommendationPriority::High,
1132                    RecommendationCategory::MLReadiness,
1133                    "Low Anomaly Separability",
1134                )
1135                .with_description(
1136                    "Injected anomalies are not sufficiently separable from normal records, \
1137                     reducing model training effectiveness.",
1138                )
1139                .with_root_cause(
1140                    RootCause::new("Anomaly injection intensity too low")
1141                        .with_explanation(
1142                            "Anomalies need to produce measurable statistical deviations. \
1143                             Subtle anomalies may be undetectable by ML models.",
1144                        )
1145                        .with_confidence(0.8),
1146                )
1147                .with_action(
1148                    SuggestedAction::new("Increase anomaly injection signal strength")
1149                        .with_config_change("anomaly_injection.base_rate", "0.05")
1150                        .with_effort("Low"),
1151                )
1152                .with_affected_metric("anomaly_separability")
1153                .with_expected_improvement("AUC-ROC > 0.70");
1154
1155                report.add(rec);
1156            }
1157        }
1158
1159        if let Some(ref dg_eval) = ml.domain_gap {
1160            if !dg_eval.passes {
1161                let rec = Recommendation::new(
1162                    self.next_id(),
1163                    RecommendationPriority::Medium,
1164                    RecommendationCategory::MLReadiness,
1165                    "Large Domain Gap",
1166                )
1167                .with_description(
1168                    "Synthetic data distributions diverge significantly from expected \
1169                     real-world distributions, which may reduce transfer learning effectiveness.",
1170                )
1171                .with_root_cause(
1172                    RootCause::new("Distribution parameters not calibrated to industry")
1173                        .with_confidence(0.7),
1174                )
1175                .with_action(
1176                    SuggestedAction::new("Use industry-specific distribution profile")
1177                        .with_config_change("distributions.industry_profile", "financial_services")
1178                        .with_effort("Low"),
1179                )
1180                .with_affected_metric("domain_gap_score")
1181                .with_expected_improvement("Domain gap < 0.25");
1182
1183                report.add(rec);
1184            }
1185        }
1186
1187        if let Some(ref gnn_eval) = ml.gnn_readiness {
1188            if !gnn_eval.passes {
1189                let rec = Recommendation::new(
1190                    self.next_id(),
1191                    RecommendationPriority::Medium,
1192                    RecommendationCategory::MLReadiness,
1193                    "GNN Training Readiness Issues",
1194                )
1195                .with_description(
1196                    "Graph structure may not be suitable for GNN training due to \
1197                     low feature completeness, high label leakage, or poor homophily.",
1198                )
1199                .with_root_cause(
1200                    RootCause::new("Graph structure not optimized for GNN training")
1201                        .with_confidence(0.7),
1202                )
1203                .with_action(
1204                    SuggestedAction::new("Enable graph connectivity and cross-process links")
1205                        .with_config_change("cross_process_links.enabled", "true")
1206                        .with_effort("Medium"),
1207                )
1208                .with_affected_metric("gnn_readiness_score")
1209                .with_expected_improvement("GNN readiness > 0.65");
1210
1211                report.add(rec);
1212            }
1213        }
1214    }
1215}
1216
1217impl Default for RecommendationEngine {
1218    fn default() -> Self {
1219        Self::new()
1220    }
1221}
1222
1223#[cfg(test)]
1224#[allow(clippy::unwrap_used)]
1225mod tests {
1226    use super::*;
1227
1228    #[test]
1229    fn test_recommendation_builder() {
1230        let rec = Recommendation::new(
1231            "REC-001",
1232            RecommendationPriority::High,
1233            RecommendationCategory::Statistical,
1234            "Test Issue",
1235        )
1236        .with_description("Test description")
1237        .with_root_cause(RootCause::new("Test cause").with_confidence(0.8))
1238        .with_action(SuggestedAction::new("Fix it").with_config_change("test.path", "value"));
1239
1240        assert_eq!(rec.id, "REC-001");
1241        assert_eq!(rec.priority, RecommendationPriority::High);
1242        assert_eq!(rec.root_causes.len(), 1);
1243        assert_eq!(rec.actions.len(), 1);
1244    }
1245
1246    #[test]
1247    fn test_enhancement_report() {
1248        let mut report = EnhancementReport::new();
1249
1250        report.add(Recommendation::new(
1251            "REC-001",
1252            RecommendationPriority::Critical,
1253            RecommendationCategory::Coherence,
1254            "Critical Issue",
1255        ));
1256
1257        report.add(Recommendation::new(
1258            "REC-002",
1259            RecommendationPriority::Low,
1260            RecommendationCategory::DataQuality,
1261            "Minor Issue",
1262        ));
1263
1264        report.finalize();
1265
1266        assert!(report.has_critical_issues());
1267        assert_eq!(report.recommendations.len(), 2);
1268        assert!(report.health_score < 1.0);
1269    }
1270
1271    #[test]
1272    fn test_recommendation_engine() {
1273        let mut engine = RecommendationEngine::new();
1274        let evaluation = ComprehensiveEvaluation::new();
1275
1276        let report = engine.generate_report(&evaluation);
1277
1278        // Empty evaluation should produce no recommendations
1279        assert!(report.recommendations.is_empty());
1280        assert_eq!(report.health_score, 1.0);
1281    }
1282
1283    #[test]
1284    fn test_root_cause_builder() {
1285        let cause = RootCause::new("Test cause")
1286            .with_explanation("Detailed explanation")
1287            .with_evidence("Evidence 1")
1288            .with_evidence("Evidence 2")
1289            .with_confidence(0.9);
1290
1291        assert_eq!(cause.evidence.len(), 2);
1292        assert_eq!(cause.confidence, 0.9);
1293    }
1294
1295    #[test]
1296    fn test_suggested_action() {
1297        let action = SuggestedAction::new("Do something")
1298            .with_config_change("path", "value")
1299            .with_effort("Low");
1300
1301        assert!(action.auto_applicable);
1302        assert_eq!(action.config_path, Some("path".to_string()));
1303    }
1304}
datasynth_eval/enhancement/recommendation_engine.rs

datasynth_eval/enhancement/
recommendation_engine.rs