Skip to main content

datasynth_eval/
lib.rs

1#![deny(clippy::unwrap_used)]
2// Allow some clippy lints that are common in test/evaluation code
3#![allow(clippy::field_reassign_with_default)]
4#![allow(clippy::too_many_arguments)]
5#![allow(clippy::upper_case_acronyms)] // MCAR, MAR, MNAR, ISO are standard abbreviations
6
7//! Synthetic Data Evaluation Framework
8//!
9//! This crate provides comprehensive evaluation capabilities for validating
10//! the quality and correctness of generated synthetic financial data.
11//!
12//! # Features
13//!
14//! - **Statistical Quality**: Benford's Law, amount distributions, line item patterns
15//! - **Semantic Coherence**: Balance sheet validation, subledger reconciliation
16//! - **Data Quality**: Uniqueness, completeness, format consistency
17//! - **ML-Readiness**: Feature distributions, label quality, graph structure
18//! - **Reporting**: HTML and JSON reports with pass/fail thresholds
19//!
20//! # Example
21//!
22//! ```ignore
23//! use datasynth_eval::{Evaluator, EvaluationConfig};
24//!
25//! let config = EvaluationConfig::default();
26//! let evaluator = Evaluator::new(config);
27//!
28//! // Evaluate generated data
29//! let result = evaluator.evaluate(&generation_result)?;
30//!
31//! // Generate report
32//! result.generate_html_report("evaluation_report.html")?;
33//! ```
34
35pub mod benchmarks;
36pub mod config;
37pub mod enhancement;
38pub mod error;
39pub mod gates;
40pub mod privacy;
41
42pub mod coherence;
43pub mod ml;
44pub mod quality;
45pub mod report;
46pub mod statistical;
47pub mod tuning;
48
49pub mod banking;
50pub mod causal;
51pub mod diff_engine;
52pub mod enrichment;
53pub mod process_mining;
54pub mod scenario_diff;
55
56// Re-exports
57pub use config::{EvaluationConfig, EvaluationThresholds, PrivacyEvaluationConfig};
58pub use error::{EvalError, EvalResult};
59
60pub use statistical::{
61    AmountDistributionAnalysis, AmountDistributionAnalyzer, AnomalyRealismEvaluation,
62    AnomalyRealismEvaluator, BenfordAnalysis, BenfordAnalyzer, BenfordConformity,
63    DetectionDifficulty, DriftDetectionAnalysis, DriftDetectionAnalyzer, DriftDetectionEntry,
64    DriftDetectionMetrics, DriftEventCategory, LabeledDriftEvent, LabeledEventAnalysis,
65    LineItemAnalysis, LineItemAnalyzer, LineItemEntry, StatisticalEvaluation, TemporalAnalysis,
66    TemporalAnalyzer, TemporalEntry,
67};
68
69pub use coherence::{
70    AccountType, ApprovalLevelData, AuditEvaluation, AuditEvaluator, AuditFindingData,
71    AuditRiskData, AuditTrailEvaluation, AuditTrailGap, BalanceSheetEvaluation,
72    BalanceSheetEvaluator, BalanceSnapshot, BankReconciliationEvaluation,
73    BankReconciliationEvaluator, BidEvaluationData, BudgetVarianceData, CashPositionData,
74    CoherenceEvaluation, ConcentrationMetrics, CountryPackData, CountryPackEvaluation,
75    CountryPackEvaluator, CountryPackThresholds, CovenantData, CrossProcessEvaluation,
76    CrossProcessEvaluator, CycleCountData, DocumentChainEvaluation, DocumentChainEvaluator,
77    DocumentReferenceData, EarnedValueData, EntityReferenceData, EsgEvaluation, EsgEvaluator,
78    EsgThresholds, ExpenseReportData, FairValueEvaluation, FinancialReportingEvaluation,
79    FinancialReportingEvaluator, FinancialStatementData, FrameworkViolation, GovernanceData,
80    HedgeEffectivenessData, HolidayData, HrPayrollEvaluation, HrPayrollEvaluator, ICMatchingData,
81    ICMatchingEvaluation, ICMatchingEvaluator, ImpairmentEvaluation, IsaComplianceEvaluation,
82    KpiData, LeaseAccountingEvaluation, LeaseAccountingEvaluator, LeaseEvaluation,
83    ManufacturingEvaluation, ManufacturingEvaluator, MaterialityData, NettingData, NetworkEdge,
84    NetworkEvaluation, NetworkEvaluator, NetworkNode, NetworkThresholds, O2CChainData,
85    P2PChainData, PayrollHoursData, PayrollLineItemData, PayrollRunData, PcaobComplianceEvaluation,
86    PerformanceObligation, ProductionOrderData, ProjectAccountingEvaluation,
87    ProjectAccountingEvaluator, ProjectAccountingThresholds, ProjectRevenueData,
88    QualityInspectionData, QuoteLineData, ReconciliationData, ReferentialData,
89    ReferentialIntegrityEvaluation, ReferentialIntegrityEvaluator, RetainageData, RevenueContract,
90    RevenueRecognitionEvaluation, RevenueRecognitionEvaluator, RoutingOperationData,
91    SafetyMetricData, SalesQuoteData, SalesQuoteEvaluation, SalesQuoteEvaluator,
92    SalesQuoteThresholds, ScorecardCoverageData, SourcingEvaluation, SourcingEvaluator,
93    SourcingProjectData, SoxComplianceEvaluation, SpendAnalysisData, StandardsComplianceEvaluation,
94    StandardsThresholds, StrengthStats, SubledgerEvaluator, SubledgerReconciliationEvaluation,
95    SupplierEsgData, TaxEvaluation, TaxEvaluator, TaxLineData, TaxRateData, TaxReturnData,
96    TaxThresholds, TimeEntryData, TreasuryEvaluation, TreasuryEvaluator, TreasuryThresholds,
97    VariableConsideration, ViolationSeverity, WaterUsageData, WithholdingData, WorkpaperData,
98};
99
100pub use quality::{
101    CompletenessAnalysis, CompletenessAnalyzer, ConsistencyAnalysis, ConsistencyAnalyzer,
102    ConsistencyRule, DuplicateInfo, FieldCompleteness, FieldDefinition, FieldValue, FormatAnalysis,
103    FormatAnalyzer, FormatVariation, QualityEvaluation, UniqueRecord, UniquenessAnalysis,
104    UniquenessAnalyzer,
105};
106
107pub use ml::{
108    AnomalyScoringAnalysis, AnomalyScoringAnalyzer, CrossModalAnalysis, CrossModalAnalyzer,
109    DomainGapAnalysis, DomainGapAnalyzer, EmbeddingReadinessAnalysis, EmbeddingReadinessAnalyzer,
110    FeatureAnalysis, FeatureAnalyzer, FeatureQualityAnalysis, FeatureQualityAnalyzer, FeatureStats,
111    GnnReadinessAnalysis, GnnReadinessAnalyzer, GraphAnalysis, GraphAnalyzer, GraphMetrics,
112    LabelAnalysis, LabelAnalyzer, LabelDistribution, MLReadinessEvaluation,
113    SchemeDetectabilityAnalysis, SchemeDetectabilityAnalyzer, SplitAnalysis, SplitAnalyzer,
114    SplitMetrics, TemporalFidelityAnalysis, TemporalFidelityAnalyzer,
115};
116
117pub use report::{
118    BaselineComparison, ComparisonResult, EvaluationReport, HtmlReportGenerator,
119    JsonReportGenerator, MetricChange, ReportMetadata, ThresholdChecker, ThresholdResult,
120};
121
122pub use tuning::{
123    ConfigSuggestion, ConfigSuggestionGenerator, TuningAnalyzer, TuningCategory, TuningOpportunity,
124};
125
126pub use enhancement::{
127    AutoTuneResult, AutoTuner, ConfigPatch, EnhancementReport, Recommendation,
128    RecommendationCategory, RecommendationEngine, RecommendationPriority, RootCause,
129    SuggestedAction,
130};
131
132pub use privacy::{
133    LinkageAttack, LinkageConfig, LinkageResults, MembershipInferenceAttack, MiaConfig, MiaResults,
134    NistAlignmentReport, NistCriterion, PrivacyEvaluation, SynQPMatrix, SynQPQuadrant,
135};
136
137pub use benchmarks::{
138    // ACFE-calibrated benchmarks
139    acfe_calibrated_1k,
140    acfe_collusion_5k,
141    acfe_management_override_2k,
142    all_acfe_benchmarks,
143    all_benchmarks,
144    // Industry-specific benchmarks
145    all_industry_benchmarks,
146    anomaly_bench_1k,
147    data_quality_100k,
148    entity_match_5k,
149    financial_services_fraud_5k,
150    fraud_detect_10k,
151    get_benchmark,
152    get_industry_benchmark,
153    graph_fraud_10k,
154    healthcare_fraud_5k,
155    manufacturing_fraud_5k,
156    retail_fraud_10k,
157    technology_fraud_3k,
158    AcfeAlignment,
159    AcfeCalibration,
160    AcfeCategoryDistribution,
161    BaselineModelType,
162    BaselineResult,
163    BenchmarkBuilder,
164    BenchmarkSuite,
165    BenchmarkTaskType,
166    CostMatrix,
167    DatasetSpec,
168    EvaluationSpec,
169    FeatureSet,
170    IndustryBenchmarkAnalysis,
171    LeaderboardEntry,
172    MetricType,
173    SplitRatios,
174};
175
176pub use banking::{
177    AmlDetectabilityAnalysis, AmlDetectabilityAnalyzer, AmlTransactionData, BankingEvaluation,
178    KycCompletenessAnalysis, KycCompletenessAnalyzer, KycProfileData, TypologyData,
179};
180
181pub use process_mining::{
182    EventSequenceAnalysis, EventSequenceAnalyzer, ProcessEventData, ProcessMiningEvaluation,
183    VariantAnalysis, VariantAnalyzer, VariantData,
184};
185
186pub use causal::{CausalModelEvaluation, CausalModelEvaluator};
187
188pub use enrichment::{EnrichmentQualityEvaluation, EnrichmentQualityEvaluator};
189
190use serde::{Deserialize, Serialize};
191
192/// Comprehensive evaluation result combining all evaluation modules.
193#[derive(Debug, Clone, Serialize, Deserialize)]
194pub struct ComprehensiveEvaluation {
195    /// Statistical quality evaluation.
196    pub statistical: StatisticalEvaluation,
197    /// Semantic coherence evaluation.
198    pub coherence: CoherenceEvaluation,
199    /// Data quality evaluation.
200    pub quality: QualityEvaluation,
201    /// ML-readiness evaluation.
202    pub ml_readiness: MLReadinessEvaluation,
203    /// Privacy evaluation (optional — only populated when privacy testing is enabled).
204    #[serde(default, skip_serializing_if = "Option::is_none")]
205    pub privacy: Option<PrivacyEvaluation>,
206    /// Banking/KYC/AML evaluation (optional).
207    #[serde(default, skip_serializing_if = "Option::is_none")]
208    pub banking: Option<BankingEvaluation>,
209    /// OCEL 2.0 process mining evaluation (optional).
210    #[serde(default, skip_serializing_if = "Option::is_none")]
211    pub process_mining: Option<ProcessMiningEvaluation>,
212    /// Causal model evaluation (optional).
213    #[serde(default, skip_serializing_if = "Option::is_none")]
214    pub causal: Option<CausalModelEvaluation>,
215    /// LLM enrichment quality evaluation (optional).
216    #[serde(default, skip_serializing_if = "Option::is_none")]
217    pub enrichment_quality: Option<EnrichmentQualityEvaluation>,
218    /// Overall pass/fail status.
219    pub passes: bool,
220    /// Summary of all failures.
221    pub failures: Vec<String>,
222    /// Tuning opportunities identified.
223    pub tuning_opportunities: Vec<TuningOpportunity>,
224    /// Configuration suggestions.
225    pub config_suggestions: Vec<ConfigSuggestion>,
226}
227
228impl ComprehensiveEvaluation {
229    /// Create a new empty evaluation.
230    pub fn new() -> Self {
231        Self {
232            statistical: StatisticalEvaluation::default(),
233            coherence: CoherenceEvaluation::default(),
234            quality: QualityEvaluation::default(),
235            ml_readiness: MLReadinessEvaluation::default(),
236            privacy: None,
237            banking: None,
238            process_mining: None,
239            causal: None,
240            enrichment_quality: None,
241            passes: true,
242            failures: Vec::new(),
243            tuning_opportunities: Vec::new(),
244            config_suggestions: Vec::new(),
245        }
246    }
247
248    /// Check all evaluations against thresholds and update overall status.
249    pub fn check_all_thresholds(&mut self, thresholds: &EvaluationThresholds) {
250        self.failures.clear();
251
252        // Check statistical thresholds
253        self.statistical.check_thresholds(thresholds);
254        self.failures.extend(self.statistical.failures.clone());
255
256        // Check coherence thresholds
257        self.coherence.check_thresholds(thresholds);
258        self.failures.extend(self.coherence.failures.clone());
259
260        // Check quality thresholds
261        self.quality.check_thresholds(thresholds);
262        self.failures.extend(self.quality.failures.clone());
263
264        // Check ML thresholds
265        self.ml_readiness.check_thresholds(thresholds);
266        self.failures.extend(self.ml_readiness.failures.clone());
267
268        // Check privacy evaluation (if present)
269        if let Some(ref mut privacy) = self.privacy {
270            privacy.update_status();
271            self.failures.extend(privacy.failures.clone());
272        }
273
274        // Check banking evaluation
275        if let Some(ref mut banking) = self.banking {
276            banking.check_thresholds();
277            self.failures.extend(banking.issues.clone());
278        }
279
280        // Check process mining evaluation
281        if let Some(ref mut pm) = self.process_mining {
282            pm.check_thresholds();
283            self.failures.extend(pm.issues.clone());
284        }
285
286        // Check causal model evaluation
287        if let Some(ref causal) = self.causal {
288            if !causal.passes {
289                self.failures.extend(causal.issues.clone());
290            }
291        }
292
293        // Check enrichment quality evaluation
294        if let Some(ref enrichment) = self.enrichment_quality {
295            if !enrichment.passes {
296                self.failures.extend(enrichment.issues.clone());
297            }
298        }
299
300        self.passes = self.failures.is_empty();
301    }
302}
303
304impl Default for ComprehensiveEvaluation {
305    fn default() -> Self {
306        Self::new()
307    }
308}
309
310/// Main evaluator that coordinates all evaluation modules.
311pub struct Evaluator {
312    /// Evaluation configuration.
313    config: EvaluationConfig,
314}
315
316impl Evaluator {
317    /// Create a new evaluator with the given configuration.
318    pub fn new(config: EvaluationConfig) -> Self {
319        Self { config }
320    }
321
322    /// Create an evaluator with default configuration.
323    pub fn with_defaults() -> Self {
324        Self::new(EvaluationConfig::default())
325    }
326
327    /// Get the configuration.
328    pub fn config(&self) -> &EvaluationConfig {
329        &self.config
330    }
331
332    /// Run a comprehensive evaluation and return results.
333    ///
334    /// This is a placeholder - actual implementation would take
335    /// generation results as input.
336    pub fn run_evaluation(&self) -> ComprehensiveEvaluation {
337        let mut evaluation = ComprehensiveEvaluation::new();
338        evaluation.check_all_thresholds(&self.config.thresholds);
339        evaluation
340    }
341}
342
343#[cfg(test)]
344#[allow(clippy::unwrap_used)]
345mod tests {
346    use super::*;
347
348    #[test]
349    fn test_comprehensive_evaluation_new() {
350        let eval = ComprehensiveEvaluation::new();
351        assert!(eval.passes);
352        assert!(eval.failures.is_empty());
353    }
354
355    #[test]
356    fn test_evaluator_creation() {
357        let evaluator = Evaluator::with_defaults();
358        assert_eq!(evaluator.config().thresholds.benford_p_value_min, 0.05);
359    }
360}