Skip to main content

datasynth_eval/
lib.rs

1#![deny(clippy::unwrap_used)]
2// Allow some clippy lints that are common in test/evaluation code
3#![allow(clippy::field_reassign_with_default)]
4#![allow(clippy::too_many_arguments)]
5#![allow(clippy::upper_case_acronyms)] // MCAR, MAR, MNAR, ISO are standard abbreviations
6
7//! Synthetic Data Evaluation Framework
8//!
9//! This crate provides comprehensive evaluation capabilities for validating
10//! the quality and correctness of generated synthetic financial data.
11//!
12//! # Features
13//!
14//! - **Statistical Quality**: Benford's Law, amount distributions, line item patterns
15//! - **Semantic Coherence**: Balance sheet validation, subledger reconciliation
16//! - **Data Quality**: Uniqueness, completeness, format consistency
17//! - **ML-Readiness**: Feature distributions, label quality, graph structure
18//! - **Reporting**: HTML and JSON reports with pass/fail thresholds
19//!
20//! # Example
21//!
22//! ```ignore
23//! use datasynth_eval::{Evaluator, EvaluationConfig};
24//!
25//! let config = EvaluationConfig::default();
26//! let evaluator = Evaluator::new(config);
27//!
28//! // Evaluate generated data
29//! let result = evaluator.evaluate(&generation_result)?;
30//!
31//! // Generate report
32//! result.generate_html_report("evaluation_report.html")?;
33//! ```
34
35pub mod benchmarks;
36pub mod config;
37pub mod enhancement;
38pub mod error;
39pub mod gates;
40pub mod privacy;
41
42pub mod coherence;
43pub mod ml;
44pub mod quality;
45pub mod report;
46pub mod statistical;
47pub mod tuning;
48
49pub mod banking;
50pub mod causal;
51pub mod diff_engine;
52pub mod enrichment;
53pub mod process_mining;
54pub mod scenario_diff;
55
56// Re-exports
57pub use config::{EvaluationConfig, EvaluationThresholds, PrivacyEvaluationConfig};
58pub use error::{EvalError, EvalResult};
59
60pub use statistical::{
61    AmountDistributionAnalysis, AmountDistributionAnalyzer, AnomalyRealismEvaluation,
62    AnomalyRealismEvaluator, BenfordAnalysis, BenfordAnalyzer, BenfordConformity,
63    DetectionDifficulty, DriftDetectionAnalysis, DriftDetectionAnalyzer, DriftDetectionEntry,
64    DriftDetectionMetrics, DriftEventCategory, LabeledDriftEvent, LabeledEventAnalysis,
65    LineItemAnalysis, LineItemAnalyzer, LineItemEntry, StatisticalEvaluation, TemporalAnalysis,
66    TemporalAnalyzer, TemporalEntry,
67};
68
69pub use coherence::{
70    AccountType, ApprovalLevelData, AuditEvaluation, AuditEvaluator, AuditFindingData,
71    AuditRiskData, AuditTrailEvaluation, AuditTrailGap, BalanceSheetEvaluation,
72    BalanceSheetEvaluator, BalanceSnapshot, BankReconciliationEvaluation,
73    BankReconciliationEvaluator, BidEvaluationData, BudgetVarianceData, CashPositionData,
74    CoherenceEvaluation, ConcentrationMetrics, CountryPackData, CountryPackEvaluation,
75    CountryPackEvaluator, CountryPackThresholds, CovenantData, CrossProcessEvaluation,
76    CrossProcessEvaluator, CycleCountData, DocumentChainEvaluation, DocumentChainEvaluator,
77    DocumentReferenceData, EarnedValueData, EntityReferenceData, EsgEvaluation, EsgEvaluator,
78    EsgThresholds, ExpenseReportData, FairValueEvaluation, FinancialReportingEvaluation,
79    FinancialReportingEvaluator, FinancialStatementData, FrameworkViolation, GovernanceData,
80    HedgeEffectivenessData, HolidayData, HrPayrollEvaluation, HrPayrollEvaluator, ICMatchingData,
81    ICMatchingEvaluation, ICMatchingEvaluator, ImpairmentEvaluation, IsaComplianceEvaluation,
82    KpiData, LeaseAccountingEvaluation, LeaseAccountingEvaluator, LeaseEvaluation,
83    ManufacturingEvaluation, ManufacturingEvaluator, MaterialityData, NettingData, NetworkEdge,
84    NetworkEvaluation, NetworkEvaluator, NetworkNode, NetworkThresholds, O2CChainData,
85    P2PChainData, PayrollHoursData, PayrollLineItemData, PayrollRunData, PcaobComplianceEvaluation,
86    PerformanceObligation, ProductionOrderData, ProjectAccountingEvaluation,
87    ProjectAccountingEvaluator, ProjectAccountingThresholds, ProjectRevenueData,
88    QualityInspectionData, QuoteLineData, ReconciliationData, ReferentialData,
89    ReferentialIntegrityEvaluation, ReferentialIntegrityEvaluator, RetainageData, RevenueContract,
90    RevenueRecognitionEvaluation, RevenueRecognitionEvaluator, RoutingOperationData,
91    SafetyMetricData, SalesQuoteData, SalesQuoteEvaluation, SalesQuoteEvaluator,
92    SalesQuoteThresholds, ScorecardCoverageData, SourcingEvaluation, SourcingEvaluator,
93    SourcingProjectData, SoxComplianceEvaluation, SpendAnalysisData, StandardsComplianceEvaluation,
94    StandardsThresholds, StrengthStats, SubledgerEvaluator, SubledgerReconciliationEvaluation,
95    SupplierEsgData, TaxEvaluation, TaxEvaluator, TaxLineData, TaxRateData, TaxReturnData,
96    TaxThresholds, TimeEntryData, TreasuryEvaluation, TreasuryEvaluator, TreasuryThresholds,
97    UnmatchedICItem, VariableConsideration, ViolationSeverity, WaterUsageData, WithholdingData,
98    WorkpaperData,
99};
100
101pub use quality::{
102    CompletenessAnalysis, CompletenessAnalyzer, ConsistencyAnalysis, ConsistencyAnalyzer,
103    ConsistencyRule, DuplicateInfo, FieldCompleteness, FieldDefinition, FieldValue, FormatAnalysis,
104    FormatAnalyzer, FormatVariation, QualityEvaluation, UniqueRecord, UniquenessAnalysis,
105    UniquenessAnalyzer,
106};
107
108pub use ml::{
109    AnomalyScoringAnalysis, AnomalyScoringAnalyzer, CrossModalAnalysis, CrossModalAnalyzer,
110    DomainGapAnalysis, DomainGapAnalyzer, EmbeddingReadinessAnalysis, EmbeddingReadinessAnalyzer,
111    FeatureAnalysis, FeatureAnalyzer, FeatureQualityAnalysis, FeatureQualityAnalyzer, FeatureStats,
112    GnnReadinessAnalysis, GnnReadinessAnalyzer, GraphAnalysis, GraphAnalyzer, GraphMetrics,
113    LabelAnalysis, LabelAnalyzer, LabelDistribution, MLReadinessEvaluation,
114    SchemeDetectabilityAnalysis, SchemeDetectabilityAnalyzer, SplitAnalysis, SplitAnalyzer,
115    SplitMetrics, TemporalFidelityAnalysis, TemporalFidelityAnalyzer,
116};
117
118pub use report::{
119    BaselineComparison, ComparisonResult, EvaluationReport, HtmlReportGenerator,
120    JsonReportGenerator, MetricChange, ReportMetadata, ThresholdChecker, ThresholdResult,
121};
122
123pub use tuning::{
124    ConfigSuggestion, ConfigSuggestionGenerator, TuningAnalyzer, TuningCategory, TuningOpportunity,
125};
126
127pub use enhancement::{
128    AutoTuneResult, AutoTuner, ConfigPatch, EnhancementReport, Recommendation,
129    RecommendationCategory, RecommendationEngine, RecommendationPriority, RootCause,
130    SuggestedAction,
131};
132
133pub use privacy::{
134    LinkageAttack, LinkageConfig, LinkageResults, MembershipInferenceAttack, MiaConfig, MiaResults,
135    NistAlignmentReport, NistCriterion, PrivacyEvaluation, SynQPMatrix, SynQPQuadrant,
136};
137
138pub use benchmarks::{
139    // ACFE-calibrated benchmarks
140    acfe_calibrated_1k,
141    acfe_collusion_5k,
142    acfe_management_override_2k,
143    all_acfe_benchmarks,
144    all_benchmarks,
145    // Industry-specific benchmarks
146    all_industry_benchmarks,
147    anomaly_bench_1k,
148    data_quality_100k,
149    entity_match_5k,
150    financial_services_fraud_5k,
151    fraud_detect_10k,
152    get_benchmark,
153    get_industry_benchmark,
154    graph_fraud_10k,
155    healthcare_fraud_5k,
156    manufacturing_fraud_5k,
157    retail_fraud_10k,
158    technology_fraud_3k,
159    AcfeAlignment,
160    AcfeCalibration,
161    AcfeCategoryDistribution,
162    BaselineModelType,
163    BaselineResult,
164    BenchmarkBuilder,
165    BenchmarkSuite,
166    BenchmarkTaskType,
167    CostMatrix,
168    DatasetSpec,
169    EvaluationSpec,
170    FeatureSet,
171    IndustryBenchmarkAnalysis,
172    LeaderboardEntry,
173    MetricType,
174    SplitRatios,
175};
176
177pub use banking::{
178    AmlDetectabilityAnalysis, AmlDetectabilityAnalyzer, AmlTransactionData, BankingEvaluation,
179    KycCompletenessAnalysis, KycCompletenessAnalyzer, KycProfileData, TypologyData,
180};
181
182pub use process_mining::{
183    EventSequenceAnalysis, EventSequenceAnalyzer, ProcessEventData, ProcessMiningEvaluation,
184    VariantAnalysis, VariantAnalyzer, VariantData,
185};
186
187pub use causal::{CausalModelEvaluation, CausalModelEvaluator};
188
189pub use enrichment::{EnrichmentQualityEvaluation, EnrichmentQualityEvaluator};
190
191use serde::{Deserialize, Serialize};
192
193/// Comprehensive evaluation result combining all evaluation modules.
194#[derive(Debug, Clone, Serialize, Deserialize)]
195pub struct ComprehensiveEvaluation {
196    /// Statistical quality evaluation.
197    pub statistical: StatisticalEvaluation,
198    /// Semantic coherence evaluation.
199    pub coherence: CoherenceEvaluation,
200    /// Data quality evaluation.
201    pub quality: QualityEvaluation,
202    /// ML-readiness evaluation.
203    pub ml_readiness: MLReadinessEvaluation,
204    /// Privacy evaluation (optional — only populated when privacy testing is enabled).
205    #[serde(default, skip_serializing_if = "Option::is_none")]
206    pub privacy: Option<PrivacyEvaluation>,
207    /// Banking/KYC/AML evaluation (optional).
208    #[serde(default, skip_serializing_if = "Option::is_none")]
209    pub banking: Option<BankingEvaluation>,
210    /// OCEL 2.0 process mining evaluation (optional).
211    #[serde(default, skip_serializing_if = "Option::is_none")]
212    pub process_mining: Option<ProcessMiningEvaluation>,
213    /// Causal model evaluation (optional).
214    #[serde(default, skip_serializing_if = "Option::is_none")]
215    pub causal: Option<CausalModelEvaluation>,
216    /// LLM enrichment quality evaluation (optional).
217    #[serde(default, skip_serializing_if = "Option::is_none")]
218    pub enrichment_quality: Option<EnrichmentQualityEvaluation>,
219    /// Overall pass/fail status.
220    pub passes: bool,
221    /// Summary of all failures.
222    pub failures: Vec<String>,
223    /// Tuning opportunities identified.
224    pub tuning_opportunities: Vec<TuningOpportunity>,
225    /// Configuration suggestions.
226    pub config_suggestions: Vec<ConfigSuggestion>,
227}
228
229impl ComprehensiveEvaluation {
230    /// Create a new empty evaluation.
231    pub fn new() -> Self {
232        Self {
233            statistical: StatisticalEvaluation::default(),
234            coherence: CoherenceEvaluation::default(),
235            quality: QualityEvaluation::default(),
236            ml_readiness: MLReadinessEvaluation::default(),
237            privacy: None,
238            banking: None,
239            process_mining: None,
240            causal: None,
241            enrichment_quality: None,
242            passes: true,
243            failures: Vec::new(),
244            tuning_opportunities: Vec::new(),
245            config_suggestions: Vec::new(),
246        }
247    }
248
249    /// Check all evaluations against thresholds and update overall status.
250    pub fn check_all_thresholds(&mut self, thresholds: &EvaluationThresholds) {
251        self.failures.clear();
252
253        // Check statistical thresholds
254        self.statistical.check_thresholds(thresholds);
255        self.failures.extend(self.statistical.failures.clone());
256
257        // Check coherence thresholds
258        self.coherence.check_thresholds(thresholds);
259        self.failures.extend(self.coherence.failures.clone());
260
261        // Check quality thresholds
262        self.quality.check_thresholds(thresholds);
263        self.failures.extend(self.quality.failures.clone());
264
265        // Check ML thresholds
266        self.ml_readiness.check_thresholds(thresholds);
267        self.failures.extend(self.ml_readiness.failures.clone());
268
269        // Check privacy evaluation (if present)
270        if let Some(ref mut privacy) = self.privacy {
271            privacy.update_status();
272            self.failures.extend(privacy.failures.clone());
273        }
274
275        // Check banking evaluation
276        if let Some(ref mut banking) = self.banking {
277            banking.check_thresholds();
278            self.failures.extend(banking.issues.clone());
279        }
280
281        // Check process mining evaluation
282        if let Some(ref mut pm) = self.process_mining {
283            pm.check_thresholds();
284            self.failures.extend(pm.issues.clone());
285        }
286
287        // Check causal model evaluation
288        if let Some(ref causal) = self.causal {
289            if !causal.passes {
290                self.failures.extend(causal.issues.clone());
291            }
292        }
293
294        // Check enrichment quality evaluation
295        if let Some(ref enrichment) = self.enrichment_quality {
296            if !enrichment.passes {
297                self.failures.extend(enrichment.issues.clone());
298            }
299        }
300
301        self.passes = self.failures.is_empty();
302    }
303}
304
305impl Default for ComprehensiveEvaluation {
306    fn default() -> Self {
307        Self::new()
308    }
309}
310
311/// Main evaluator that coordinates all evaluation modules.
312pub struct Evaluator {
313    /// Evaluation configuration.
314    config: EvaluationConfig,
315}
316
317impl Evaluator {
318    /// Create a new evaluator with the given configuration.
319    pub fn new(config: EvaluationConfig) -> Self {
320        Self { config }
321    }
322
323    /// Create an evaluator with default configuration.
324    pub fn with_defaults() -> Self {
325        Self::new(EvaluationConfig::default())
326    }
327
328    /// Get the configuration.
329    pub fn config(&self) -> &EvaluationConfig {
330        &self.config
331    }
332
333    /// Run a comprehensive evaluation and return results.
334    ///
335    /// This is a placeholder - actual implementation would take
336    /// generation results as input.
337    pub fn run_evaluation(&self) -> ComprehensiveEvaluation {
338        let mut evaluation = ComprehensiveEvaluation::new();
339        evaluation.check_all_thresholds(&self.config.thresholds);
340        evaluation
341    }
342}
343
344#[cfg(test)]
345#[allow(clippy::unwrap_used)]
346mod tests {
347    use super::*;
348
349    #[test]
350    fn test_comprehensive_evaluation_new() {
351        let eval = ComprehensiveEvaluation::new();
352        assert!(eval.passes);
353        assert!(eval.failures.is_empty());
354    }
355
356    #[test]
357    fn test_evaluator_creation() {
358        let evaluator = Evaluator::with_defaults();
359        assert_eq!(evaluator.config().thresholds.benford_p_value_min, 0.05);
360    }
361}