datasynth_eval/enhancement/
recommendation_engine.rs

1//! Recommendation engine for providing prioritized enhancement suggestions.
2//!
3//! The recommendation engine performs root cause analysis on evaluation
4//! failures and provides actionable, prioritized recommendations.
5
6use crate::{ComprehensiveEvaluation, EvaluationThresholds};
7use serde::{Deserialize, Serialize};
8use std::collections::HashMap;
9
10/// Priority level for recommendations.
11#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
12pub enum RecommendationPriority {
13    /// Critical issues that will cause data to fail validation.
14    Critical = 0,
15    /// High priority issues affecting data quality significantly.
16    High = 1,
17    /// Medium priority improvements.
18    Medium = 2,
19    /// Low priority enhancements.
20    Low = 3,
21    /// Informational only, no action required.
22    Info = 4,
23}
24
25impl RecommendationPriority {
26    /// Get display name.
27    pub fn name(&self) -> &'static str {
28        match self {
29            RecommendationPriority::Critical => "Critical",
30            RecommendationPriority::High => "High",
31            RecommendationPriority::Medium => "Medium",
32            RecommendationPriority::Low => "Low",
33            RecommendationPriority::Info => "Info",
34        }
35    }
36}
37
38/// Category of the recommendation.
39#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
40pub enum RecommendationCategory {
41    /// Statistical distribution issues.
42    Statistical,
43    /// Data coherence issues (balance, subledger, etc.).
44    Coherence,
45    /// Data quality issues (duplicates, missing, etc.).
46    DataQuality,
47    /// ML readiness issues.
48    MLReadiness,
49    /// Performance issues.
50    Performance,
51    /// Configuration issues.
52    Configuration,
53}
54
55impl RecommendationCategory {
56    /// Get display name.
57    pub fn name(&self) -> &'static str {
58        match self {
59            RecommendationCategory::Statistical => "Statistical Quality",
60            RecommendationCategory::Coherence => "Data Coherence",
61            RecommendationCategory::DataQuality => "Data Quality",
62            RecommendationCategory::MLReadiness => "ML Readiness",
63            RecommendationCategory::Performance => "Performance",
64            RecommendationCategory::Configuration => "Configuration",
65        }
66    }
67}
68
69/// Root cause identified for an issue.
70#[derive(Debug, Clone, Serialize, Deserialize)]
71pub struct RootCause {
72    /// Short description of the root cause.
73    pub description: String,
74    /// Detailed explanation.
75    pub explanation: String,
76    /// Evidence supporting this root cause.
77    pub evidence: Vec<String>,
78    /// Confidence level (0.0-1.0).
79    pub confidence: f64,
80}
81
82impl RootCause {
83    /// Create a new root cause.
84    pub fn new(description: impl Into<String>) -> Self {
85        Self {
86            description: description.into(),
87            explanation: String::new(),
88            evidence: Vec::new(),
89            confidence: 0.5,
90        }
91    }
92
93    /// Add explanation.
94    pub fn with_explanation(mut self, explanation: impl Into<String>) -> Self {
95        self.explanation = explanation.into();
96        self
97    }
98
99    /// Add evidence.
100    pub fn with_evidence(mut self, evidence: impl Into<String>) -> Self {
101        self.evidence.push(evidence.into());
102        self
103    }
104
105    /// Set confidence.
106    pub fn with_confidence(mut self, confidence: f64) -> Self {
107        self.confidence = confidence.clamp(0.0, 1.0);
108        self
109    }
110}
111
112/// A single recommendation.
113#[derive(Debug, Clone, Serialize, Deserialize)]
114pub struct Recommendation {
115    /// Unique identifier.
116    pub id: String,
117    /// Priority level.
118    pub priority: RecommendationPriority,
119    /// Category.
120    pub category: RecommendationCategory,
121    /// Short title.
122    pub title: String,
123    /// Detailed description.
124    pub description: String,
125    /// Root causes identified.
126    pub root_causes: Vec<RootCause>,
127    /// Suggested actions to take.
128    pub actions: Vec<SuggestedAction>,
129    /// Metrics affected.
130    pub affected_metrics: Vec<String>,
131    /// Expected improvement if addressed.
132    pub expected_improvement: String,
133}
134
135impl Recommendation {
136    /// Create a new recommendation.
137    pub fn new(
138        id: impl Into<String>,
139        priority: RecommendationPriority,
140        category: RecommendationCategory,
141        title: impl Into<String>,
142    ) -> Self {
143        Self {
144            id: id.into(),
145            priority,
146            category,
147            title: title.into(),
148            description: String::new(),
149            root_causes: Vec::new(),
150            actions: Vec::new(),
151            affected_metrics: Vec::new(),
152            expected_improvement: String::new(),
153        }
154    }
155
156    /// Add description.
157    pub fn with_description(mut self, description: impl Into<String>) -> Self {
158        self.description = description.into();
159        self
160    }
161
162    /// Add root cause.
163    pub fn with_root_cause(mut self, root_cause: RootCause) -> Self {
164        self.root_causes.push(root_cause);
165        self
166    }
167
168    /// Add action.
169    pub fn with_action(mut self, action: SuggestedAction) -> Self {
170        self.actions.push(action);
171        self
172    }
173
174    /// Add affected metric.
175    pub fn with_affected_metric(mut self, metric: impl Into<String>) -> Self {
176        self.affected_metrics.push(metric.into());
177        self
178    }
179
180    /// Set expected improvement.
181    pub fn with_expected_improvement(mut self, improvement: impl Into<String>) -> Self {
182        self.expected_improvement = improvement.into();
183        self
184    }
185}
186
187/// A suggested action to address an issue.
188#[derive(Debug, Clone, Serialize, Deserialize)]
189pub struct SuggestedAction {
190    /// Description of the action.
191    pub description: String,
192    /// Configuration path if applicable.
193    pub config_path: Option<String>,
194    /// Suggested value if applicable.
195    pub suggested_value: Option<String>,
196    /// Whether this can be automatically applied.
197    pub auto_applicable: bool,
198    /// Estimated effort (Low, Medium, High).
199    pub effort: String,
200}
201
202impl SuggestedAction {
203    /// Create a new action.
204    pub fn new(description: impl Into<String>) -> Self {
205        Self {
206            description: description.into(),
207            config_path: None,
208            suggested_value: None,
209            auto_applicable: false,
210            effort: "Medium".to_string(),
211        }
212    }
213
214    /// Set config change.
215    pub fn with_config_change(mut self, path: impl Into<String>, value: impl Into<String>) -> Self {
216        self.config_path = Some(path.into());
217        self.suggested_value = Some(value.into());
218        self.auto_applicable = true;
219        self
220    }
221
222    /// Set effort level.
223    pub fn with_effort(mut self, effort: impl Into<String>) -> Self {
224        self.effort = effort.into();
225        self
226    }
227
228    /// Mark as not auto-applicable.
229    pub fn manual_only(mut self) -> Self {
230        self.auto_applicable = false;
231        self
232    }
233}
234
235/// Enhancement report containing all recommendations.
236#[derive(Debug, Clone, Serialize, Deserialize)]
237pub struct EnhancementReport {
238    /// All recommendations.
239    pub recommendations: Vec<Recommendation>,
240    /// Summary by category.
241    pub category_summary: HashMap<String, usize>,
242    /// Summary by priority.
243    pub priority_summary: HashMap<String, usize>,
244    /// Overall health score (0.0-1.0).
245    pub health_score: f64,
246    /// Top issues to address.
247    pub top_issues: Vec<String>,
248    /// Quick wins (easy to fix with high impact).
249    pub quick_wins: Vec<String>,
250}
251
252impl EnhancementReport {
253    /// Create a new empty report.
254    pub fn new() -> Self {
255        Self {
256            recommendations: Vec::new(),
257            category_summary: HashMap::new(),
258            priority_summary: HashMap::new(),
259            health_score: 1.0,
260            top_issues: Vec::new(),
261            quick_wins: Vec::new(),
262        }
263    }
264
265    /// Add a recommendation.
266    pub fn add(&mut self, recommendation: Recommendation) {
267        // Update summaries
268        *self
269            .category_summary
270            .entry(recommendation.category.name().to_string())
271            .or_insert(0) += 1;
272        *self
273            .priority_summary
274            .entry(recommendation.priority.name().to_string())
275            .or_insert(0) += 1;
276
277        self.recommendations.push(recommendation);
278    }
279
280    /// Finalize the report (calculate scores, sort, etc.).
281    pub fn finalize(&mut self) {
282        // Sort recommendations by priority
283        self.recommendations.sort_by_key(|a| a.priority);
284
285        // Calculate health score
286        let critical_count = *self.priority_summary.get("Critical").unwrap_or(&0);
287        let high_count = *self.priority_summary.get("High").unwrap_or(&0);
288        let medium_count = *self.priority_summary.get("Medium").unwrap_or(&0);
289
290        let penalty =
291            critical_count as f64 * 0.3 + high_count as f64 * 0.1 + medium_count as f64 * 0.02;
292        self.health_score = (1.0 - penalty).max(0.0);
293
294        // Identify top issues (critical and high priority)
295        self.top_issues = self
296            .recommendations
297            .iter()
298            .filter(|r| {
299                r.priority == RecommendationPriority::Critical
300                    || r.priority == RecommendationPriority::High
301            })
302            .take(5)
303            .map(|r| r.title.clone())
304            .collect();
305
306        // Identify quick wins (auto-applicable actions)
307        self.quick_wins = self
308            .recommendations
309            .iter()
310            .filter(|r| r.actions.iter().any(|a| a.auto_applicable))
311            .take(5)
312            .map(|r| r.title.clone())
313            .collect();
314    }
315
316    /// Get recommendations by category.
317    pub fn by_category(&self, category: RecommendationCategory) -> Vec<&Recommendation> {
318        self.recommendations
319            .iter()
320            .filter(|r| r.category == category)
321            .collect()
322    }
323
324    /// Get recommendations by priority.
325    pub fn by_priority(&self, priority: RecommendationPriority) -> Vec<&Recommendation> {
326        self.recommendations
327            .iter()
328            .filter(|r| r.priority == priority)
329            .collect()
330    }
331
332    /// Check if there are critical issues.
333    pub fn has_critical_issues(&self) -> bool {
334        *self.priority_summary.get("Critical").unwrap_or(&0) > 0
335    }
336}
337
338impl Default for EnhancementReport {
339    fn default() -> Self {
340        Self::new()
341    }
342}
343
344/// Engine for generating recommendations from evaluation results.
345pub struct RecommendationEngine {
346    /// Thresholds for comparison.
347    thresholds: EvaluationThresholds,
348    /// Counter for generating unique IDs.
349    id_counter: u32,
350}
351
352impl RecommendationEngine {
353    /// Create a new recommendation engine.
354    pub fn new() -> Self {
355        Self::with_thresholds(EvaluationThresholds::default())
356    }
357
358    /// Create with specific thresholds.
359    pub fn with_thresholds(thresholds: EvaluationThresholds) -> Self {
360        Self {
361            thresholds,
362            id_counter: 0,
363        }
364    }
365
366    /// Generate an enhancement report from evaluation results.
367    pub fn generate_report(&mut self, evaluation: &ComprehensiveEvaluation) -> EnhancementReport {
368        let mut report = EnhancementReport::new();
369
370        // Analyze statistical issues
371        self.analyze_statistical(&evaluation.statistical, &mut report);
372
373        // Analyze coherence issues
374        self.analyze_coherence(&evaluation.coherence, &mut report);
375
376        // Analyze quality issues
377        self.analyze_quality(&evaluation.quality, &mut report);
378
379        // Analyze ML readiness issues
380        self.analyze_ml_readiness(&evaluation.ml_readiness, &mut report);
381
382        // Analyze banking issues
383        if let Some(ref banking) = evaluation.banking {
384            self.analyze_banking(banking, &mut report);
385        }
386
387        // Analyze process mining issues
388        if let Some(ref pm) = evaluation.process_mining {
389            self.analyze_process_mining(pm, &mut report);
390        }
391
392        // Finalize the report
393        report.finalize();
394
395        report
396    }
397
398    /// Generate a unique ID.
399    fn next_id(&mut self) -> String {
400        self.id_counter += 1;
401        format!("REC-{:04}", self.id_counter)
402    }
403
404    /// Analyze statistical evaluation results.
405    fn analyze_statistical(
406        &mut self,
407        stat: &crate::statistical::StatisticalEvaluation,
408        report: &mut EnhancementReport,
409    ) {
410        // Check Benford's Law
411        if let Some(ref benford) = stat.benford {
412            if benford.p_value < self.thresholds.benford_p_value_min {
413                let severity = if benford.p_value < 0.01 {
414                    RecommendationPriority::High
415                } else {
416                    RecommendationPriority::Medium
417                };
418
419                let rec = Recommendation::new(
420                    self.next_id(),
421                    severity,
422                    RecommendationCategory::Statistical,
423                    "Benford's Law Non-Conformance",
424                )
425                .with_description(
426                    "Generated transaction amounts do not follow Benford's Law, \
427                     which may indicate unrealistic data patterns.",
428                )
429                .with_root_cause(
430                    RootCause::new("Amount generation not using Benford-compliant distribution")
431                        .with_explanation(
432                            "Real financial data naturally follows Benford's Law for first digits. \
433                             Random or uniform distributions will fail this test.",
434                        )
435                        .with_evidence(format!("p-value: {:.4} (threshold: {:.4})", benford.p_value, self.thresholds.benford_p_value_min))
436                        .with_confidence(0.9),
437                )
438                .with_action(
439                    SuggestedAction::new("Enable Benford's Law compliance in amount generation")
440                        .with_config_change("transactions.amount.benford_compliance", "true")
441                        .with_effort("Low"),
442                )
443                .with_affected_metric("benford_p_value")
444                .with_expected_improvement("Statistical p-value should increase to > 0.05");
445
446                report.add(rec);
447            }
448        }
449
450        // Check temporal patterns
451        if let Some(ref temporal) = stat.temporal {
452            if temporal.pattern_correlation < self.thresholds.temporal_correlation_min {
453                let rec = Recommendation::new(
454                    self.next_id(),
455                    RecommendationPriority::Medium,
456                    RecommendationCategory::Statistical,
457                    "Weak Temporal Patterns",
458                )
459                .with_description(
460                    "Generated data lacks realistic temporal patterns such as \
461                     seasonality, month-end spikes, and weekday variations.",
462                )
463                .with_root_cause(
464                    RootCause::new("Insufficient temporal variation in generation")
465                        .with_explanation(
466                            "Real financial data shows strong temporal patterns including \
467                             month-end closing activity, seasonal variations, and weekday effects.",
468                        )
469                        .with_evidence(format!(
470                            "Correlation: {:.3} (threshold: {:.3})",
471                            temporal.pattern_correlation, self.thresholds.temporal_correlation_min
472                        ))
473                        .with_confidence(0.75),
474                )
475                .with_action(
476                    SuggestedAction::new("Increase seasonality strength")
477                        .with_config_change("transactions.temporal.seasonality_strength", "0.8")
478                        .with_effort("Low"),
479                )
480                .with_action(
481                    SuggestedAction::new("Enable month-end spike patterns")
482                        .with_config_change("transactions.temporal.month_end_spike", "true")
483                        .with_effort("Low"),
484                )
485                .with_affected_metric("temporal_correlation")
486                .with_expected_improvement("Better temporal pattern correlation (> 0.8)");
487
488                report.add(rec);
489            }
490        }
491    }
492
493    /// Analyze coherence evaluation results.
494    fn analyze_coherence(
495        &mut self,
496        coherence: &crate::coherence::CoherenceEvaluation,
497        report: &mut EnhancementReport,
498    ) {
499        // Check balance sheet
500        if let Some(ref balance) = coherence.balance {
501            if !balance.equation_balanced {
502                let rec = Recommendation::new(
503                    self.next_id(),
504                    RecommendationPriority::Critical,
505                    RecommendationCategory::Coherence,
506                    "Balance Sheet Imbalance",
507                )
508                .with_description(
509                    "The fundamental accounting equation (Assets = Liabilities + Equity) is violated. \
510                     This is a critical data integrity issue.",
511                )
512                .with_root_cause(
513                    RootCause::new("Unbalanced journal entries generated")
514                        .with_explanation(
515                            "Every journal entry must have equal debits and credits. \
516                             An imbalance indicates entries were created incorrectly.",
517                        )
518                        .with_evidence(format!("Max imbalance: {}", balance.max_imbalance))
519                        .with_confidence(0.95),
520                )
521                .with_action(
522                    SuggestedAction::new("Enable balance coherence validation")
523                        .with_config_change("balance.coherence_enabled", "true")
524                        .with_effort("Low"),
525                )
526                .with_action(
527                    SuggestedAction::new("Review JE generation logic for balance enforcement")
528                        .manual_only()
529                        .with_effort("High"),
530                )
531                .with_affected_metric("balance_equation")
532                .with_expected_improvement("Zero imbalance in trial balance");
533
534                report.add(rec);
535            }
536        }
537
538        // Check intercompany matching
539        if let Some(ref ic) = coherence.intercompany {
540            if ic.match_rate < self.thresholds.ic_match_rate_min {
541                let rec = Recommendation::new(
542                    self.next_id(),
543                    RecommendationPriority::High,
544                    RecommendationCategory::Coherence,
545                    "Intercompany Matching Issues",
546                )
547                .with_description(
548                    "Intercompany transactions are not fully matched between entities. \
549                     This will cause issues during consolidation.",
550                )
551                .with_root_cause(
552                    RootCause::new("IC transaction pairs not properly linked")
553                        .with_explanation(
554                            "Intercompany transactions should always have matching entries \
555                             in both the selling and buying entities.",
556                        )
557                        .with_evidence(format!(
558                            "Match rate: {:.1}% (threshold: {:.1}%)",
559                            ic.match_rate * 100.0,
560                            self.thresholds.ic_match_rate_min * 100.0
561                        ))
562                        .with_confidence(0.85),
563                )
564                .with_action(
565                    SuggestedAction::new("Increase IC matching precision")
566                        .with_config_change("intercompany.match_precision", "0.99")
567                        .with_effort("Low"),
568                )
569                .with_affected_metric("ic_match_rate")
570                .with_expected_improvement("IC match rate > 95%");
571
572                report.add(rec);
573            }
574        }
575
576        // Check enterprise process chain coherence
577        self.analyze_enterprise_coherence(coherence, report);
578
579        // Check document chains
580        if let Some(ref doc_chain) = coherence.document_chain {
581            let avg_completion =
582                (doc_chain.p2p_completion_rate + doc_chain.o2c_completion_rate) / 2.0;
583            if avg_completion < self.thresholds.document_chain_completion_min {
584                let rec = Recommendation::new(
585                    self.next_id(),
586                    RecommendationPriority::Medium,
587                    RecommendationCategory::Coherence,
588                    "Incomplete Document Chains",
589                )
590                .with_description(
591                    "Many document flows (P2P, O2C) do not complete to final payment/receipt. \
592                     This reduces realism for AP/AR aging analysis.",
593                )
594                .with_root_cause(
595                    RootCause::new("Document flow completion rates set too low")
596                        .with_explanation(
597                            "Real business processes typically complete most document flows. \
598                             Very low completion rates may not be realistic.",
599                        )
600                        .with_evidence(format!(
601                            "P2P: {:.1}%, O2C: {:.1}% (threshold: {:.1}%)",
602                            doc_chain.p2p_completion_rate * 100.0,
603                            doc_chain.o2c_completion_rate * 100.0,
604                            self.thresholds.document_chain_completion_min * 100.0
605                        ))
606                        .with_confidence(0.7),
607                )
608                .with_action(
609                    SuggestedAction::new("Increase P2P completion rate")
610                        .with_config_change("document_flows.p2p.completion_rate", "0.95")
611                        .with_effort("Low"),
612                )
613                .with_action(
614                    SuggestedAction::new("Increase O2C completion rate")
615                        .with_config_change("document_flows.o2c.completion_rate", "0.95")
616                        .with_effort("Low"),
617                )
618                .with_affected_metric("doc_chain_completion")
619                .with_expected_improvement("Document chain completion > 90%");
620
621                report.add(rec);
622            }
623        }
624    }
625
626    /// Analyze quality evaluation results.
627    fn analyze_quality(
628        &mut self,
629        quality: &crate::quality::QualityEvaluation,
630        report: &mut EnhancementReport,
631    ) {
632        // Check duplicates
633        if let Some(ref uniqueness) = quality.uniqueness {
634            if uniqueness.duplicate_rate > self.thresholds.duplicate_rate_max {
635                let rec = Recommendation::new(
636                    self.next_id(),
637                    RecommendationPriority::High,
638                    RecommendationCategory::DataQuality,
639                    "High Duplicate Rate",
640                )
641                .with_description(
642                    "Excessive duplicate records detected in the generated data. \
643                     This may cause issues in downstream processing.",
644                )
645                .with_root_cause(
646                    RootCause::new("Duplicate injection rate set too high")
647                        .with_explanation(
648                            "Data quality variations can inject duplicates, but \
649                             high rates may be unrealistic for most use cases.",
650                        )
651                        .with_evidence(format!(
652                            "Duplicate rate: {:.2}% (threshold: {:.2}%)",
653                            uniqueness.duplicate_rate * 100.0,
654                            self.thresholds.duplicate_rate_max * 100.0
655                        ))
656                        .with_confidence(0.9),
657                )
658                .with_action(
659                    SuggestedAction::new("Reduce duplicate injection rate")
660                        .with_config_change("data_quality.duplicates.exact_rate", "0.005")
661                        .with_effort("Low"),
662                )
663                .with_affected_metric("duplicate_rate")
664                .with_expected_improvement("Duplicate rate < 1%");
665
666                report.add(rec);
667            }
668        }
669
670        // Check completeness
671        if let Some(ref completeness) = quality.completeness {
672            if completeness.overall_completeness < self.thresholds.completeness_rate_min {
673                let rec = Recommendation::new(
674                    self.next_id(),
675                    RecommendationPriority::Medium,
676                    RecommendationCategory::DataQuality,
677                    "Low Data Completeness",
678                )
679                .with_description(
680                    "Many fields have missing values. While some missing data is realistic, \
681                     excessive missing values may reduce data utility.",
682                )
683                .with_root_cause(
684                    RootCause::new("Missing value injection rate set too high")
685                        .with_explanation(
686                            "Data quality variations inject missing values to simulate \
687                             real-world data quality issues, but rates may be too aggressive.",
688                        )
689                        .with_evidence(format!(
690                            "Completeness: {:.1}% (threshold: {:.1}%)",
691                            completeness.overall_completeness * 100.0,
692                            self.thresholds.completeness_rate_min * 100.0
693                        ))
694                        .with_confidence(0.8),
695                )
696                .with_action(
697                    SuggestedAction::new("Reduce missing value injection rate")
698                        .with_config_change("data_quality.missing_values.overall_rate", "0.02")
699                        .with_effort("Low"),
700                )
701                .with_affected_metric("completeness_rate")
702                .with_expected_improvement("Completeness > 95%");
703
704                report.add(rec);
705            }
706        }
707    }
708
709    /// Analyze new coherence evaluators (enterprise process chains).
710    fn analyze_enterprise_coherence(
711        &mut self,
712        coherence: &crate::coherence::CoherenceEvaluation,
713        report: &mut EnhancementReport,
714    ) {
715        // HR/Payroll accuracy
716        if let Some(ref hr) = coherence.hr_payroll {
717            if !hr.passes {
718                let rec = Recommendation::new(
719                    self.next_id(),
720                    RecommendationPriority::High,
721                    RecommendationCategory::Coherence,
722                    "Payroll Calculation Errors",
723                )
724                .with_description(
725                    "Payroll calculations (gross-to-net, component sums) contain arithmetic errors.",
726                )
727                .with_root_cause(
728                    RootCause::new("Payroll arithmetic not enforced during generation")
729                        .with_explanation(
730                            "Real payroll systems enforce exact arithmetic: net = gross - deductions. \
731                             Generated data should maintain these invariants.",
732                        )
733                        .with_confidence(0.9),
734                )
735                .with_action(
736                    SuggestedAction::new("Ensure payroll calculation precision")
737                        .with_config_change("hr.payroll.calculation_precision", "exact")
738                        .with_effort("Low"),
739                )
740                .with_affected_metric("payroll_accuracy")
741                .with_expected_improvement("Payroll arithmetic accuracy > 99.9%");
742
743                report.add(rec);
744            }
745        }
746
747        // Manufacturing yield
748        if let Some(ref mfg) = coherence.manufacturing {
749            if !mfg.passes {
750                let rec = Recommendation::new(
751                    self.next_id(),
752                    RecommendationPriority::Medium,
753                    RecommendationCategory::Coherence,
754                    "Manufacturing Data Inconsistencies",
755                )
756                .with_description(
757                    "Manufacturing data shows inconsistencies in yield rates, \
758                     operation sequencing, or quality inspection calculations.",
759                )
760                .with_root_cause(
761                    RootCause::new("Manufacturing constraints not fully enforced")
762                        .with_explanation(
763                            "Production orders should have consistent yield calculations, \
764                             monotonically ordered operations, and valid quality metrics.",
765                        )
766                        .with_confidence(0.8),
767                )
768                .with_action(
769                    SuggestedAction::new("Enable manufacturing constraint validation")
770                        .with_config_change("manufacturing.validate_constraints", "true")
771                        .with_effort("Medium"),
772                )
773                .with_affected_metric("manufacturing_yield")
774                .with_expected_improvement("Yield consistency > 95%");
775
776                report.add(rec);
777            }
778        }
779
780        // Financial reporting tie-back
781        if let Some(ref fr) = coherence.financial_reporting {
782            if !fr.passes {
783                let rec = Recommendation::new(
784                    self.next_id(),
785                    RecommendationPriority::Critical,
786                    RecommendationCategory::Coherence,
787                    "Financial Statement Tie-Back Failures",
788                )
789                .with_description(
790                    "Financial statements do not reconcile to the trial balance. \
791                     This is a critical audit concern.",
792                )
793                .with_root_cause(
794                    RootCause::new("Statement generation not derived from GL data")
795                        .with_explanation(
796                            "Financial statements must tie back to trial balance totals. \
797                             Independent generation of statements and GL will cause discrepancies.",
798                        )
799                        .with_confidence(0.95),
800                )
801                .with_action(
802                    SuggestedAction::new("Enable statement-to-TB tie-back enforcement")
803                        .with_config_change("financial_reporting.tie_back_enforced", "true")
804                        .with_effort("Medium"),
805                )
806                .with_affected_metric("financial_reporting_tie_back")
807                .with_expected_improvement("Statement-TB tie-back rate > 99%");
808
809                report.add(rec);
810            }
811        }
812
813        // Sourcing chain
814        if let Some(ref sourcing) = coherence.sourcing {
815            if !sourcing.passes {
816                let rec = Recommendation::new(
817                    self.next_id(),
818                    RecommendationPriority::Medium,
819                    RecommendationCategory::Coherence,
820                    "Incomplete S2C Process Chain",
821                )
822                .with_description(
823                    "Source-to-Contract chain has incomplete flows: \
824                     projects missing RFx events, evaluations, or contracts.",
825                )
826                .with_root_cause(
827                    RootCause::new("S2C completion rates configured too low").with_confidence(0.7),
828                )
829                .with_action(
830                    SuggestedAction::new("Increase S2C completion rates")
831                        .with_config_change("source_to_pay.rfx_completion_rate", "0.95")
832                        .with_effort("Low"),
833                )
834                .with_affected_metric("s2c_chain_completion")
835                .with_expected_improvement("RFx completion rate > 90%");
836
837                report.add(rec);
838            }
839        }
840    }
841
842    /// Analyze ML readiness evaluation results.
843    fn analyze_ml_readiness(
844        &mut self,
845        ml: &crate::ml::MLReadinessEvaluation,
846        report: &mut EnhancementReport,
847    ) {
848        // Check anomaly rate
849        if let Some(ref labels) = ml.labels {
850            if labels.anomaly_rate < self.thresholds.anomaly_rate_min {
851                let rec = Recommendation::new(
852                    self.next_id(),
853                    RecommendationPriority::High,
854                    RecommendationCategory::MLReadiness,
855                    "Insufficient Anomaly Rate",
856                )
857                .with_description(
858                    "Too few anomalies for effective ML training. Anomaly detection \
859                     models need sufficient positive examples.",
860                )
861                .with_root_cause(
862                    RootCause::new("Anomaly injection rate set too low")
863                        .with_explanation(
864                            "ML models for anomaly detection typically need 1-10% anomaly rate \
865                             during training to learn effective patterns.",
866                        )
867                        .with_evidence(format!(
868                            "Anomaly rate: {:.2}% (minimum: {:.2}%)",
869                            labels.anomaly_rate * 100.0,
870                            self.thresholds.anomaly_rate_min * 100.0
871                        ))
872                        .with_confidence(0.9),
873                )
874                .with_action(
875                    SuggestedAction::new("Increase anomaly injection rate")
876                        .with_config_change("anomaly_injection.base_rate", "0.05")
877                        .with_effort("Low"),
878                )
879                .with_affected_metric("anomaly_rate")
880                .with_expected_improvement("Anomaly rate 1-10% for ML training");
881
882                report.add(rec);
883            } else if labels.anomaly_rate > self.thresholds.anomaly_rate_max {
884                let rec = Recommendation::new(
885                    self.next_id(),
886                    RecommendationPriority::Medium,
887                    RecommendationCategory::MLReadiness,
888                    "Excessive Anomaly Rate",
889                )
890                .with_description(
891                    "Too many anomalies may reduce model effectiveness and make \
892                     the data unrealistic for testing.",
893                )
894                .with_root_cause(
895                    RootCause::new("Anomaly injection rate set too high")
896                        .with_explanation(
897                            "While anomalies are needed for ML training, rates above 20% \
898                             are typically unrealistic and may confuse models.",
899                        )
900                        .with_evidence(format!(
901                            "Anomaly rate: {:.1}% (maximum: {:.1}%)",
902                            labels.anomaly_rate * 100.0,
903                            self.thresholds.anomaly_rate_max * 100.0
904                        ))
905                        .with_confidence(0.75),
906                )
907                .with_action(
908                    SuggestedAction::new("Reduce anomaly injection rate")
909                        .with_config_change("anomaly_injection.base_rate", "0.05")
910                        .with_effort("Low"),
911                )
912                .with_affected_metric("anomaly_rate")
913                .with_expected_improvement("Anomaly rate within 1-20% range");
914
915                report.add(rec);
916            }
917
918            // Check label coverage
919            if labels.label_coverage < self.thresholds.label_coverage_min {
920                let rec = Recommendation::new(
921                    self.next_id(),
922                    RecommendationPriority::High,
923                    RecommendationCategory::MLReadiness,
924                    "Incomplete Label Coverage",
925                )
926                .with_description(
927                    "Not all records have proper labels. Supervised ML requires \
928                     complete labels for training.",
929                )
930                .with_root_cause(
931                    RootCause::new("Label generation not capturing all anomalies")
932                        .with_explanation(
933                            "Every injected anomaly should have a corresponding label. \
934                             Missing labels indicate a labeling pipeline issue.",
935                        )
936                        .with_evidence(format!(
937                            "Label coverage: {:.1}% (threshold: {:.1}%)",
938                            labels.label_coverage * 100.0,
939                            self.thresholds.label_coverage_min * 100.0
940                        ))
941                        .with_confidence(0.85),
942                )
943                .with_action(
944                    SuggestedAction::new("Enable complete label generation")
945                        .with_config_change("anomaly_injection.label_all", "true")
946                        .with_effort("Low"),
947                )
948                .with_affected_metric("label_coverage")
949                .with_expected_improvement("Label coverage > 99%");
950
951                report.add(rec);
952            }
953        }
954
955        // Check ML enrichment evaluators
956        self.analyze_ml_enrichment(ml, report);
957
958        // Check graph connectivity
959        if let Some(ref graph) = ml.graph {
960            if graph.connectivity_score < self.thresholds.graph_connectivity_min {
961                let rec = Recommendation::new(
962                    self.next_id(),
963                    RecommendationPriority::Medium,
964                    RecommendationCategory::MLReadiness,
965                    "Low Graph Connectivity",
966                )
967                .with_description(
968                    "The transaction graph has isolated components, which may \
969                     reduce GNN model effectiveness.",
970                )
971                .with_root_cause(
972                    RootCause::new("Insufficient entity relationships in generated data")
973                        .with_explanation(
974                            "Graph neural networks benefit from well-connected graphs. \
975                             Isolated components receive no message passing.",
976                        )
977                        .with_evidence(format!(
978                            "Connectivity: {:.1}% (threshold: {:.1}%)",
979                            graph.connectivity_score * 100.0,
980                            self.thresholds.graph_connectivity_min * 100.0
981                        ))
982                        .with_confidence(0.7),
983                )
984                .with_action(
985                    SuggestedAction::new("Enable graph connectivity enforcement")
986                        .with_config_change("graph_export.ensure_connected", "true")
987                        .with_effort("Medium"),
988                )
989                .with_affected_metric("graph_connectivity")
990                .with_expected_improvement("Graph connectivity > 95%");
991
992                report.add(rec);
993            }
994        }
995    }
996
997    /// Analyze banking evaluation results.
998    fn analyze_banking(
999        &mut self,
1000        banking: &crate::banking::BankingEvaluation,
1001        report: &mut EnhancementReport,
1002    ) {
1003        if let Some(ref kyc) = banking.kyc {
1004            if !kyc.passes {
1005                let rec = Recommendation::new(
1006                    self.next_id(),
1007                    RecommendationPriority::High,
1008                    RecommendationCategory::Coherence,
1009                    "Incomplete KYC Profiles",
1010                )
1011                .with_description(
1012                    "KYC profiles are missing required fields or beneficial owner data.",
1013                )
1014                .with_root_cause(
1015                    RootCause::new("KYC generation not populating all required fields")
1016                        .with_confidence(0.85),
1017                )
1018                .with_action(
1019                    SuggestedAction::new("Enable full KYC field generation")
1020                        .with_config_change("enterprise.banking.kyc_completeness", "full")
1021                        .with_effort("Low"),
1022                )
1023                .with_affected_metric("kyc_completeness");
1024
1025                report.add(rec);
1026            }
1027        }
1028
1029        if let Some(ref aml) = banking.aml {
1030            if !aml.passes {
1031                let rec = Recommendation::new(
1032                    self.next_id(),
1033                    RecommendationPriority::Medium,
1034                    RecommendationCategory::MLReadiness,
1035                    "Low AML Typology Detectability",
1036                )
1037                .with_description(
1038                    "AML typologies are not producing statistically detectable patterns, \
1039                     reducing ML training effectiveness.",
1040                )
1041                .with_root_cause(
1042                    RootCause::new("AML typology signal too weak")
1043                        .with_explanation(
1044                            "Each AML typology (structuring, layering, etc.) should produce \
1045                             patterns detectable above background noise.",
1046                        )
1047                        .with_confidence(0.75),
1048                )
1049                .with_action(
1050                    SuggestedAction::new("Increase AML typology intensity")
1051                        .with_config_change("enterprise.banking.aml_intensity", "medium")
1052                        .with_effort("Low"),
1053                )
1054                .with_affected_metric("aml_detectability");
1055
1056                report.add(rec);
1057            }
1058        }
1059    }
1060
1061    /// Analyze process mining evaluation results.
1062    fn analyze_process_mining(
1063        &mut self,
1064        pm: &crate::process_mining::ProcessMiningEvaluation,
1065        report: &mut EnhancementReport,
1066    ) {
1067        if let Some(ref es) = pm.event_sequence {
1068            if !es.passes {
1069                let rec = Recommendation::new(
1070                    self.next_id(),
1071                    RecommendationPriority::High,
1072                    RecommendationCategory::Coherence,
1073                    "Invalid Event Sequences",
1074                )
1075                .with_description(
1076                    "OCEL 2.0 event logs contain timestamp ordering violations or \
1077                     incomplete object lifecycles.",
1078                )
1079                .with_root_cause(
1080                    RootCause::new("Event generation not enforcing temporal ordering")
1081                        .with_confidence(0.9),
1082                )
1083                .with_action(
1084                    SuggestedAction::new("Enable strict event timestamp ordering")
1085                        .with_config_change("business_processes.ocel_strict_ordering", "true")
1086                        .with_effort("Low"),
1087                )
1088                .with_affected_metric("process_mining_coverage");
1089
1090                report.add(rec);
1091            }
1092        }
1093
1094        if let Some(ref va) = pm.variants {
1095            if !va.passes {
1096                let rec = Recommendation::new(
1097                    self.next_id(),
1098                    RecommendationPriority::Medium,
1099                    RecommendationCategory::MLReadiness,
1100                    "Low Process Variant Diversity",
1101                )
1102                .with_description(
1103                    "Process variants lack diversity - too many cases follow the happy path.",
1104                )
1105                .with_root_cause(
1106                    RootCause::new("Insufficient exception path generation").with_confidence(0.7),
1107                )
1108                .with_action(
1109                    SuggestedAction::new("Increase exception path probability")
1110                        .with_config_change("business_processes.exception_rate", "0.15")
1111                        .with_effort("Low"),
1112                )
1113                .with_affected_metric("variant_diversity");
1114
1115                report.add(rec);
1116            }
1117        }
1118    }
1119
1120    /// Analyze new ML enrichment evaluators.
1121    fn analyze_ml_enrichment(
1122        &mut self,
1123        ml: &crate::ml::MLReadinessEvaluation,
1124        report: &mut EnhancementReport,
1125    ) {
1126        if let Some(ref as_eval) = ml.anomaly_scoring {
1127            if !as_eval.passes {
1128                let rec = Recommendation::new(
1129                    self.next_id(),
1130                    RecommendationPriority::High,
1131                    RecommendationCategory::MLReadiness,
1132                    "Low Anomaly Separability",
1133                )
1134                .with_description(
1135                    "Injected anomalies are not sufficiently separable from normal records, \
1136                     reducing model training effectiveness.",
1137                )
1138                .with_root_cause(
1139                    RootCause::new("Anomaly injection intensity too low")
1140                        .with_explanation(
1141                            "Anomalies need to produce measurable statistical deviations. \
1142                             Subtle anomalies may be undetectable by ML models.",
1143                        )
1144                        .with_confidence(0.8),
1145                )
1146                .with_action(
1147                    SuggestedAction::new("Increase anomaly injection signal strength")
1148                        .with_config_change("anomaly_injection.base_rate", "0.05")
1149                        .with_effort("Low"),
1150                )
1151                .with_affected_metric("anomaly_separability")
1152                .with_expected_improvement("AUC-ROC > 0.70");
1153
1154                report.add(rec);
1155            }
1156        }
1157
1158        if let Some(ref dg_eval) = ml.domain_gap {
1159            if !dg_eval.passes {
1160                let rec = Recommendation::new(
1161                    self.next_id(),
1162                    RecommendationPriority::Medium,
1163                    RecommendationCategory::MLReadiness,
1164                    "Large Domain Gap",
1165                )
1166                .with_description(
1167                    "Synthetic data distributions diverge significantly from expected \
1168                     real-world distributions, which may reduce transfer learning effectiveness.",
1169                )
1170                .with_root_cause(
1171                    RootCause::new("Distribution parameters not calibrated to industry")
1172                        .with_confidence(0.7),
1173                )
1174                .with_action(
1175                    SuggestedAction::new("Use industry-specific distribution profile")
1176                        .with_config_change("distributions.industry_profile", "financial_services")
1177                        .with_effort("Low"),
1178                )
1179                .with_affected_metric("domain_gap_score")
1180                .with_expected_improvement("Domain gap < 0.25");
1181
1182                report.add(rec);
1183            }
1184        }
1185
1186        if let Some(ref gnn_eval) = ml.gnn_readiness {
1187            if !gnn_eval.passes {
1188                let rec = Recommendation::new(
1189                    self.next_id(),
1190                    RecommendationPriority::Medium,
1191                    RecommendationCategory::MLReadiness,
1192                    "GNN Training Readiness Issues",
1193                )
1194                .with_description(
1195                    "Graph structure may not be suitable for GNN training due to \
1196                     low feature completeness, high label leakage, or poor homophily.",
1197                )
1198                .with_root_cause(
1199                    RootCause::new("Graph structure not optimized for GNN training")
1200                        .with_confidence(0.7),
1201                )
1202                .with_action(
1203                    SuggestedAction::new("Enable graph connectivity and cross-process links")
1204                        .with_config_change("cross_process_links.enabled", "true")
1205                        .with_effort("Medium"),
1206                )
1207                .with_affected_metric("gnn_readiness_score")
1208                .with_expected_improvement("GNN readiness > 0.65");
1209
1210                report.add(rec);
1211            }
1212        }
1213    }
1214}
1215
1216impl Default for RecommendationEngine {
1217    fn default() -> Self {
1218        Self::new()
1219    }
1220}
1221
1222#[cfg(test)]
1223#[allow(clippy::unwrap_used)]
1224mod tests {
1225    use super::*;
1226
1227    #[test]
1228    fn test_recommendation_builder() {
1229        let rec = Recommendation::new(
1230            "REC-001",
1231            RecommendationPriority::High,
1232            RecommendationCategory::Statistical,
1233            "Test Issue",
1234        )
1235        .with_description("Test description")
1236        .with_root_cause(RootCause::new("Test cause").with_confidence(0.8))
1237        .with_action(SuggestedAction::new("Fix it").with_config_change("test.path", "value"));
1238
1239        assert_eq!(rec.id, "REC-001");
1240        assert_eq!(rec.priority, RecommendationPriority::High);
1241        assert_eq!(rec.root_causes.len(), 1);
1242        assert_eq!(rec.actions.len(), 1);
1243    }
1244
1245    #[test]
1246    fn test_enhancement_report() {
1247        let mut report = EnhancementReport::new();
1248
1249        report.add(Recommendation::new(
1250            "REC-001",
1251            RecommendationPriority::Critical,
1252            RecommendationCategory::Coherence,
1253            "Critical Issue",
1254        ));
1255
1256        report.add(Recommendation::new(
1257            "REC-002",
1258            RecommendationPriority::Low,
1259            RecommendationCategory::DataQuality,
1260            "Minor Issue",
1261        ));
1262
1263        report.finalize();
1264
1265        assert!(report.has_critical_issues());
1266        assert_eq!(report.recommendations.len(), 2);
1267        assert!(report.health_score < 1.0);
1268    }
1269
1270    #[test]
1271    fn test_recommendation_engine() {
1272        let mut engine = RecommendationEngine::new();
1273        let evaluation = ComprehensiveEvaluation::new();
1274
1275        let report = engine.generate_report(&evaluation);
1276
1277        // Empty evaluation should produce no recommendations
1278        assert!(report.recommendations.is_empty());
1279        assert_eq!(report.health_score, 1.0);
1280    }
1281
1282    #[test]
1283    fn test_root_cause_builder() {
1284        let cause = RootCause::new("Test cause")
1285            .with_explanation("Detailed explanation")
1286            .with_evidence("Evidence 1")
1287            .with_evidence("Evidence 2")
1288            .with_confidence(0.9);
1289
1290        assert_eq!(cause.evidence.len(), 2);
1291        assert_eq!(cause.confidence, 0.9);
1292    }
1293
1294    #[test]
1295    fn test_suggested_action() {
1296        let action = SuggestedAction::new("Do something")
1297            .with_config_change("path", "value")
1298            .with_effort("Low");
1299
1300        assert!(action.auto_applicable);
1301        assert_eq!(action.config_path, Some("path".to_string()));
1302    }
1303}
datasynth_eval/enhancement/recommendation_engine.rs

datasynth_eval/enhancement/
recommendation_engine.rs