Skip to main content

datasynth_eval/
lib.rs

1#![deny(clippy::unwrap_used)]
2// Allow some clippy lints that are common in test/evaluation code
3#![allow(clippy::field_reassign_with_default)]
4#![allow(clippy::too_many_arguments)]
5#![allow(clippy::upper_case_acronyms)] // MCAR, MAR, MNAR, ISO are standard abbreviations
6
7//! Synthetic Data Evaluation Framework
8//!
9//! This crate provides comprehensive evaluation capabilities for validating
10//! the quality and correctness of generated synthetic financial data.
11//!
12//! # Features
13//!
14//! - **Statistical Quality**: Benford's Law, amount distributions, line item patterns
15//! - **Semantic Coherence**: Balance sheet validation, subledger reconciliation
16//! - **Data Quality**: Uniqueness, completeness, format consistency
17//! - **ML-Readiness**: Feature distributions, label quality, graph structure
18//! - **Reporting**: HTML and JSON reports with pass/fail thresholds
19//!
20//! # Example
21//!
22//! ```ignore
23//! use datasynth_eval::{Evaluator, EvaluationConfig};
24//!
25//! let config = EvaluationConfig::default();
26//! let evaluator = Evaluator::new(config);
27//!
28//! // Evaluate generated data
29//! let result = evaluator.evaluate(&generation_result)?;
30//!
31//! // Generate report
32//! result.generate_html_report("evaluation_report.html")?;
33//! ```
34
35pub mod benchmarks;
36pub mod config;
37pub mod enhancement;
38pub mod error;
39pub mod gates;
40pub mod privacy;
41
42pub mod coherence;
43pub mod ml;
44pub mod quality;
45pub mod report;
46pub mod statistical;
47pub mod tuning;
48
49pub mod banking;
50pub mod causal;
51pub mod enrichment;
52pub mod process_mining;
53
54// Re-exports
55pub use config::{EvaluationConfig, EvaluationThresholds, PrivacyEvaluationConfig};
56pub use error::{EvalError, EvalResult};
57
58pub use statistical::{
59    AmountDistributionAnalysis, AmountDistributionAnalyzer, AnomalyRealismEvaluation,
60    AnomalyRealismEvaluator, BenfordAnalysis, BenfordAnalyzer, BenfordConformity,
61    DetectionDifficulty, DriftDetectionAnalysis, DriftDetectionAnalyzer, DriftDetectionEntry,
62    DriftDetectionMetrics, DriftEventCategory, LabeledDriftEvent, LabeledEventAnalysis,
63    LineItemAnalysis, LineItemAnalyzer, LineItemEntry, StatisticalEvaluation, TemporalAnalysis,
64    TemporalAnalyzer, TemporalEntry,
65};
66
67pub use coherence::{
68    AccountType, ApprovalLevelData, AuditEvaluation, AuditEvaluator, AuditFindingData,
69    AuditRiskData, AuditTrailEvaluation, AuditTrailGap, BalanceSheetEvaluation,
70    BalanceSheetEvaluator, BalanceSnapshot, BankReconciliationEvaluation,
71    BankReconciliationEvaluator, BidEvaluationData, BudgetVarianceData, CashPositionData,
72    CoherenceEvaluation, ConcentrationMetrics, CountryPackData, CountryPackEvaluation,
73    CountryPackEvaluator, CountryPackThresholds, CovenantData, CrossProcessEvaluation,
74    CrossProcessEvaluator, CycleCountData, DocumentChainEvaluation, DocumentChainEvaluator,
75    DocumentReferenceData, EarnedValueData, EntityReferenceData, EsgEvaluation, EsgEvaluator,
76    EsgThresholds, ExpenseReportData, FairValueEvaluation, FinancialReportingEvaluation,
77    FinancialReportingEvaluator, FinancialStatementData, FrameworkViolation, GovernanceData,
78    HedgeEffectivenessData, HolidayData, HrPayrollEvaluation, HrPayrollEvaluator, ICMatchingData,
79    ICMatchingEvaluation, ICMatchingEvaluator, ImpairmentEvaluation, IsaComplianceEvaluation,
80    KpiData, LeaseAccountingEvaluation, LeaseAccountingEvaluator, LeaseEvaluation,
81    ManufacturingEvaluation, ManufacturingEvaluator, MaterialityData, NettingData, NetworkEdge,
82    NetworkEvaluation, NetworkEvaluator, NetworkNode, NetworkThresholds, O2CChainData,
83    P2PChainData, PayrollHoursData, PayrollLineItemData, PayrollRunData, PcaobComplianceEvaluation,
84    PerformanceObligation, ProductionOrderData, ProjectAccountingEvaluation,
85    ProjectAccountingEvaluator, ProjectAccountingThresholds, ProjectRevenueData,
86    QualityInspectionData, QuoteLineData, ReconciliationData, ReferentialData,
87    ReferentialIntegrityEvaluation, ReferentialIntegrityEvaluator, RetainageData, RevenueContract,
88    RevenueRecognitionEvaluation, RevenueRecognitionEvaluator, RoutingOperationData,
89    SafetyMetricData, SalesQuoteData, SalesQuoteEvaluation, SalesQuoteEvaluator,
90    SalesQuoteThresholds, ScorecardCoverageData, SourcingEvaluation, SourcingEvaluator,
91    SourcingProjectData, SoxComplianceEvaluation, SpendAnalysisData, StandardsComplianceEvaluation,
92    StandardsThresholds, StrengthStats, SubledgerEvaluator, SubledgerReconciliationEvaluation,
93    SupplierEsgData, TaxEvaluation, TaxEvaluator, TaxLineData, TaxRateData, TaxReturnData,
94    TaxThresholds, TimeEntryData, TreasuryEvaluation, TreasuryEvaluator, TreasuryThresholds,
95    VariableConsideration, ViolationSeverity, WaterUsageData, WithholdingData, WorkpaperData,
96};
97
98pub use quality::{
99    CompletenessAnalysis, CompletenessAnalyzer, ConsistencyAnalysis, ConsistencyAnalyzer,
100    ConsistencyRule, DuplicateInfo, FieldCompleteness, FieldDefinition, FieldValue, FormatAnalysis,
101    FormatAnalyzer, FormatVariation, QualityEvaluation, UniqueRecord, UniquenessAnalysis,
102    UniquenessAnalyzer,
103};
104
105pub use ml::{
106    AnomalyScoringAnalysis, AnomalyScoringAnalyzer, CrossModalAnalysis, CrossModalAnalyzer,
107    DomainGapAnalysis, DomainGapAnalyzer, EmbeddingReadinessAnalysis, EmbeddingReadinessAnalyzer,
108    FeatureAnalysis, FeatureAnalyzer, FeatureQualityAnalysis, FeatureQualityAnalyzer, FeatureStats,
109    GnnReadinessAnalysis, GnnReadinessAnalyzer, GraphAnalysis, GraphAnalyzer, GraphMetrics,
110    LabelAnalysis, LabelAnalyzer, LabelDistribution, MLReadinessEvaluation,
111    SchemeDetectabilityAnalysis, SchemeDetectabilityAnalyzer, SplitAnalysis, SplitAnalyzer,
112    SplitMetrics, TemporalFidelityAnalysis, TemporalFidelityAnalyzer,
113};
114
115pub use report::{
116    BaselineComparison, ComparisonResult, EvaluationReport, HtmlReportGenerator,
117    JsonReportGenerator, MetricChange, ReportMetadata, ThresholdChecker, ThresholdResult,
118};
119
120pub use tuning::{
121    ConfigSuggestion, ConfigSuggestionGenerator, TuningAnalyzer, TuningCategory, TuningOpportunity,
122};
123
124pub use enhancement::{
125    AutoTuneResult, AutoTuner, ConfigPatch, EnhancementReport, Recommendation,
126    RecommendationCategory, RecommendationEngine, RecommendationPriority, RootCause,
127    SuggestedAction,
128};
129
130pub use privacy::{
131    LinkageAttack, LinkageConfig, LinkageResults, MembershipInferenceAttack, MiaConfig, MiaResults,
132    NistAlignmentReport, NistCriterion, PrivacyEvaluation, SynQPMatrix, SynQPQuadrant,
133};
134
135pub use benchmarks::{
136    // ACFE-calibrated benchmarks
137    acfe_calibrated_1k,
138    acfe_collusion_5k,
139    acfe_management_override_2k,
140    all_acfe_benchmarks,
141    all_benchmarks,
142    // Industry-specific benchmarks
143    all_industry_benchmarks,
144    anomaly_bench_1k,
145    data_quality_100k,
146    entity_match_5k,
147    financial_services_fraud_5k,
148    fraud_detect_10k,
149    get_benchmark,
150    get_industry_benchmark,
151    graph_fraud_10k,
152    healthcare_fraud_5k,
153    manufacturing_fraud_5k,
154    retail_fraud_10k,
155    technology_fraud_3k,
156    AcfeAlignment,
157    AcfeCalibration,
158    AcfeCategoryDistribution,
159    BaselineModelType,
160    BaselineResult,
161    BenchmarkBuilder,
162    BenchmarkSuite,
163    BenchmarkTaskType,
164    CostMatrix,
165    DatasetSpec,
166    EvaluationSpec,
167    FeatureSet,
168    IndustryBenchmarkAnalysis,
169    LeaderboardEntry,
170    MetricType,
171    SplitRatios,
172};
173
174pub use banking::{
175    AmlDetectabilityAnalysis, AmlDetectabilityAnalyzer, AmlTransactionData, BankingEvaluation,
176    KycCompletenessAnalysis, KycCompletenessAnalyzer, KycProfileData, TypologyData,
177};
178
179pub use process_mining::{
180    EventSequenceAnalysis, EventSequenceAnalyzer, ProcessEventData, ProcessMiningEvaluation,
181    VariantAnalysis, VariantAnalyzer, VariantData,
182};
183
184pub use causal::{CausalModelEvaluation, CausalModelEvaluator};
185
186pub use enrichment::{EnrichmentQualityEvaluation, EnrichmentQualityEvaluator};
187
188use serde::{Deserialize, Serialize};
189
190/// Comprehensive evaluation result combining all evaluation modules.
191#[derive(Debug, Clone, Serialize, Deserialize)]
192pub struct ComprehensiveEvaluation {
193    /// Statistical quality evaluation.
194    pub statistical: StatisticalEvaluation,
195    /// Semantic coherence evaluation.
196    pub coherence: CoherenceEvaluation,
197    /// Data quality evaluation.
198    pub quality: QualityEvaluation,
199    /// ML-readiness evaluation.
200    pub ml_readiness: MLReadinessEvaluation,
201    /// Privacy evaluation (optional — only populated when privacy testing is enabled).
202    #[serde(default, skip_serializing_if = "Option::is_none")]
203    pub privacy: Option<PrivacyEvaluation>,
204    /// Banking/KYC/AML evaluation (optional).
205    #[serde(default, skip_serializing_if = "Option::is_none")]
206    pub banking: Option<BankingEvaluation>,
207    /// OCEL 2.0 process mining evaluation (optional).
208    #[serde(default, skip_serializing_if = "Option::is_none")]
209    pub process_mining: Option<ProcessMiningEvaluation>,
210    /// Causal model evaluation (optional).
211    #[serde(default, skip_serializing_if = "Option::is_none")]
212    pub causal: Option<CausalModelEvaluation>,
213    /// LLM enrichment quality evaluation (optional).
214    #[serde(default, skip_serializing_if = "Option::is_none")]
215    pub enrichment_quality: Option<EnrichmentQualityEvaluation>,
216    /// Overall pass/fail status.
217    pub passes: bool,
218    /// Summary of all failures.
219    pub failures: Vec<String>,
220    /// Tuning opportunities identified.
221    pub tuning_opportunities: Vec<TuningOpportunity>,
222    /// Configuration suggestions.
223    pub config_suggestions: Vec<ConfigSuggestion>,
224}
225
226impl ComprehensiveEvaluation {
227    /// Create a new empty evaluation.
228    pub fn new() -> Self {
229        Self {
230            statistical: StatisticalEvaluation::default(),
231            coherence: CoherenceEvaluation::default(),
232            quality: QualityEvaluation::default(),
233            ml_readiness: MLReadinessEvaluation::default(),
234            privacy: None,
235            banking: None,
236            process_mining: None,
237            causal: None,
238            enrichment_quality: None,
239            passes: true,
240            failures: Vec::new(),
241            tuning_opportunities: Vec::new(),
242            config_suggestions: Vec::new(),
243        }
244    }
245
246    /// Check all evaluations against thresholds and update overall status.
247    pub fn check_all_thresholds(&mut self, thresholds: &EvaluationThresholds) {
248        self.failures.clear();
249
250        // Check statistical thresholds
251        self.statistical.check_thresholds(thresholds);
252        self.failures.extend(self.statistical.failures.clone());
253
254        // Check coherence thresholds
255        self.coherence.check_thresholds(thresholds);
256        self.failures.extend(self.coherence.failures.clone());
257
258        // Check quality thresholds
259        self.quality.check_thresholds(thresholds);
260        self.failures.extend(self.quality.failures.clone());
261
262        // Check ML thresholds
263        self.ml_readiness.check_thresholds(thresholds);
264        self.failures.extend(self.ml_readiness.failures.clone());
265
266        // Check privacy evaluation (if present)
267        if let Some(ref mut privacy) = self.privacy {
268            privacy.update_status();
269            self.failures.extend(privacy.failures.clone());
270        }
271
272        // Check banking evaluation
273        if let Some(ref mut banking) = self.banking {
274            banking.check_thresholds();
275            self.failures.extend(banking.issues.clone());
276        }
277
278        // Check process mining evaluation
279        if let Some(ref mut pm) = self.process_mining {
280            pm.check_thresholds();
281            self.failures.extend(pm.issues.clone());
282        }
283
284        // Check causal model evaluation
285        if let Some(ref causal) = self.causal {
286            if !causal.passes {
287                self.failures.extend(causal.issues.clone());
288            }
289        }
290
291        // Check enrichment quality evaluation
292        if let Some(ref enrichment) = self.enrichment_quality {
293            if !enrichment.passes {
294                self.failures.extend(enrichment.issues.clone());
295            }
296        }
297
298        self.passes = self.failures.is_empty();
299    }
300}
301
302impl Default for ComprehensiveEvaluation {
303    fn default() -> Self {
304        Self::new()
305    }
306}
307
308/// Main evaluator that coordinates all evaluation modules.
309pub struct Evaluator {
310    /// Evaluation configuration.
311    config: EvaluationConfig,
312}
313
314impl Evaluator {
315    /// Create a new evaluator with the given configuration.
316    pub fn new(config: EvaluationConfig) -> Self {
317        Self { config }
318    }
319
320    /// Create an evaluator with default configuration.
321    pub fn with_defaults() -> Self {
322        Self::new(EvaluationConfig::default())
323    }
324
325    /// Get the configuration.
326    pub fn config(&self) -> &EvaluationConfig {
327        &self.config
328    }
329
330    /// Run a comprehensive evaluation and return results.
331    ///
332    /// This is a placeholder - actual implementation would take
333    /// generation results as input.
334    pub fn run_evaluation(&self) -> ComprehensiveEvaluation {
335        let mut evaluation = ComprehensiveEvaluation::new();
336        evaluation.check_all_thresholds(&self.config.thresholds);
337        evaluation
338    }
339}
340
341#[cfg(test)]
342#[allow(clippy::unwrap_used)]
343mod tests {
344    use super::*;
345
346    #[test]
347    fn test_comprehensive_evaluation_new() {
348        let eval = ComprehensiveEvaluation::new();
349        assert!(eval.passes);
350        assert!(eval.failures.is_empty());
351    }
352
353    #[test]
354    fn test_evaluator_creation() {
355        let evaluator = Evaluator::with_defaults();
356        assert_eq!(evaluator.config().thresholds.benford_p_value_min, 0.05);
357    }
358}