Skip to main content

datasynth_eval/
lib.rs

1#![deny(clippy::unwrap_used)]
2// Allow some clippy lints that are common in test/evaluation code
3#![allow(clippy::field_reassign_with_default)]
4#![allow(clippy::too_many_arguments)]
5#![allow(clippy::upper_case_acronyms)] // MCAR, MAR, MNAR, ISO are standard abbreviations
6
7//! Synthetic Data Evaluation Framework
8//!
9//! This crate provides comprehensive evaluation capabilities for validating
10//! the quality and correctness of generated synthetic financial data.
11//!
12//! # Features
13//!
14//! - **Statistical Quality**: Benford's Law, amount distributions, line item patterns
15//! - **Semantic Coherence**: Balance sheet validation, subledger reconciliation
16//! - **Data Quality**: Uniqueness, completeness, format consistency
17//! - **ML-Readiness**: Feature distributions, label quality, graph structure
18//! - **Reporting**: HTML and JSON reports with pass/fail thresholds
19//!
20//! # Example
21//!
22//! ```ignore
23//! use datasynth_eval::{Evaluator, EvaluationConfig};
24//!
25//! let config = EvaluationConfig::default();
26//! let evaluator = Evaluator::new(config);
27//!
28//! // Evaluate generated data
29//! let result = evaluator.evaluate(&generation_result)?;
30//!
31//! // Generate report
32//! result.generate_html_report("evaluation_report.html")?;
33//! ```
34
35pub mod benchmarks;
36pub mod config;
37pub mod enhancement;
38pub mod error;
39pub mod gates;
40pub mod privacy;
41
42pub mod coherence;
43pub mod ml;
44pub mod quality;
45pub mod report;
46pub mod statistical;
47pub mod tuning;
48
49pub mod banking;
50pub mod causal;
51pub mod enrichment;
52pub mod process_mining;
53
54// Re-exports
55pub use config::{EvaluationConfig, EvaluationThresholds, PrivacyEvaluationConfig};
56pub use error::{EvalError, EvalResult};
57
58pub use statistical::{
59    AmountDistributionAnalysis, AmountDistributionAnalyzer, AnomalyRealismEvaluation,
60    AnomalyRealismEvaluator, BenfordAnalysis, BenfordAnalyzer, BenfordConformity,
61    DetectionDifficulty, DriftDetectionAnalysis, DriftDetectionAnalyzer, DriftDetectionEntry,
62    DriftDetectionMetrics, DriftEventCategory, LabeledDriftEvent, LabeledEventAnalysis,
63    LineItemAnalysis, LineItemAnalyzer, LineItemEntry, StatisticalEvaluation, TemporalAnalysis,
64    TemporalAnalyzer, TemporalEntry,
65};
66
67pub use coherence::{
68    AccountType, AuditEvaluation, AuditEvaluator, AuditFindingData, AuditRiskData,
69    AuditTrailEvaluation, AuditTrailGap, BalanceSheetEvaluation, BalanceSheetEvaluator,
70    BalanceSnapshot, BankReconciliationEvaluation, BankReconciliationEvaluator, BidEvaluationData,
71    BudgetVarianceData, CoherenceEvaluation, ConcentrationMetrics, CrossProcessEvaluation,
72    CrossProcessEvaluator, CycleCountData, DocumentChainEvaluation, DocumentChainEvaluator,
73    DocumentReferenceData, EntityReferenceData, ExpenseReportData, FairValueEvaluation,
74    FinancialReportingEvaluation, FinancialReportingEvaluator, FinancialStatementData,
75    FrameworkViolation, HrPayrollEvaluation, HrPayrollEvaluator, ICMatchingData,
76    ICMatchingEvaluation, ICMatchingEvaluator, ImpairmentEvaluation, IsaComplianceEvaluation,
77    KpiData, LeaseAccountingEvaluation, LeaseAccountingEvaluator, LeaseEvaluation,
78    ManufacturingEvaluation, ManufacturingEvaluator, MaterialityData, NetworkEdge,
79    NetworkEvaluation, NetworkEvaluator, NetworkNode, NetworkThresholds, O2CChainData,
80    P2PChainData, PayrollHoursData, PayrollLineItemData, PayrollRunData, PcaobComplianceEvaluation,
81    PerformanceObligation, ProductionOrderData, QualityInspectionData, ReconciliationData,
82    ReferentialData, ReferentialIntegrityEvaluation, ReferentialIntegrityEvaluator,
83    RevenueContract, RevenueRecognitionEvaluation, RevenueRecognitionEvaluator,
84    RoutingOperationData, ScorecardCoverageData, SourcingEvaluation, SourcingEvaluator,
85    SourcingProjectData, SoxComplianceEvaluation, SpendAnalysisData, StandardsComplianceEvaluation,
86    StandardsThresholds, StrengthStats, SubledgerEvaluator, SubledgerReconciliationEvaluation,
87    TimeEntryData, VariableConsideration, ViolationSeverity, WorkpaperData,
88};
89
90pub use quality::{
91    CompletenessAnalysis, CompletenessAnalyzer, ConsistencyAnalysis, ConsistencyAnalyzer,
92    ConsistencyRule, DuplicateInfo, FieldCompleteness, FieldDefinition, FieldValue, FormatAnalysis,
93    FormatAnalyzer, FormatVariation, QualityEvaluation, UniqueRecord, UniquenessAnalysis,
94    UniquenessAnalyzer,
95};
96
97pub use ml::{
98    AnomalyScoringAnalysis, AnomalyScoringAnalyzer, CrossModalAnalysis, CrossModalAnalyzer,
99    DomainGapAnalysis, DomainGapAnalyzer, EmbeddingReadinessAnalysis, EmbeddingReadinessAnalyzer,
100    FeatureAnalysis, FeatureAnalyzer, FeatureQualityAnalysis, FeatureQualityAnalyzer, FeatureStats,
101    GnnReadinessAnalysis, GnnReadinessAnalyzer, GraphAnalysis, GraphAnalyzer, GraphMetrics,
102    LabelAnalysis, LabelAnalyzer, LabelDistribution, MLReadinessEvaluation,
103    SchemeDetectabilityAnalysis, SchemeDetectabilityAnalyzer, SplitAnalysis, SplitAnalyzer,
104    SplitMetrics, TemporalFidelityAnalysis, TemporalFidelityAnalyzer,
105};
106
107pub use report::{
108    BaselineComparison, ComparisonResult, EvaluationReport, HtmlReportGenerator,
109    JsonReportGenerator, MetricChange, ReportMetadata, ThresholdChecker, ThresholdResult,
110};
111
112pub use tuning::{
113    ConfigSuggestion, ConfigSuggestionGenerator, TuningAnalyzer, TuningCategory, TuningOpportunity,
114};
115
116pub use enhancement::{
117    AutoTuneResult, AutoTuner, ConfigPatch, EnhancementReport, Recommendation,
118    RecommendationCategory, RecommendationEngine, RecommendationPriority, RootCause,
119    SuggestedAction,
120};
121
122pub use privacy::{
123    LinkageAttack, LinkageConfig, LinkageResults, MembershipInferenceAttack, MiaConfig, MiaResults,
124    NistAlignmentReport, NistCriterion, PrivacyEvaluation, SynQPMatrix, SynQPQuadrant,
125};
126
127pub use benchmarks::{
128    // ACFE-calibrated benchmarks
129    acfe_calibrated_1k,
130    acfe_collusion_5k,
131    acfe_management_override_2k,
132    all_acfe_benchmarks,
133    all_benchmarks,
134    // Industry-specific benchmarks
135    all_industry_benchmarks,
136    anomaly_bench_1k,
137    data_quality_100k,
138    entity_match_5k,
139    financial_services_fraud_5k,
140    fraud_detect_10k,
141    get_benchmark,
142    get_industry_benchmark,
143    graph_fraud_10k,
144    healthcare_fraud_5k,
145    manufacturing_fraud_5k,
146    retail_fraud_10k,
147    technology_fraud_3k,
148    AcfeAlignment,
149    AcfeCalibration,
150    AcfeCategoryDistribution,
151    BaselineModelType,
152    BaselineResult,
153    BenchmarkBuilder,
154    BenchmarkSuite,
155    BenchmarkTaskType,
156    CostMatrix,
157    DatasetSpec,
158    EvaluationSpec,
159    FeatureSet,
160    IndustryBenchmarkAnalysis,
161    LeaderboardEntry,
162    MetricType,
163    SplitRatios,
164};
165
166pub use banking::{
167    AmlDetectabilityAnalysis, AmlDetectabilityAnalyzer, AmlTransactionData, BankingEvaluation,
168    KycCompletenessAnalysis, KycCompletenessAnalyzer, KycProfileData, TypologyData,
169};
170
171pub use process_mining::{
172    EventSequenceAnalysis, EventSequenceAnalyzer, ProcessEventData, ProcessMiningEvaluation,
173    VariantAnalysis, VariantAnalyzer, VariantData,
174};
175
176pub use causal::{CausalModelEvaluation, CausalModelEvaluator};
177
178pub use enrichment::{EnrichmentQualityEvaluation, EnrichmentQualityEvaluator};
179
180use serde::{Deserialize, Serialize};
181
182/// Comprehensive evaluation result combining all evaluation modules.
183#[derive(Debug, Clone, Serialize, Deserialize)]
184pub struct ComprehensiveEvaluation {
185    /// Statistical quality evaluation.
186    pub statistical: StatisticalEvaluation,
187    /// Semantic coherence evaluation.
188    pub coherence: CoherenceEvaluation,
189    /// Data quality evaluation.
190    pub quality: QualityEvaluation,
191    /// ML-readiness evaluation.
192    pub ml_readiness: MLReadinessEvaluation,
193    /// Privacy evaluation (optional — only populated when privacy testing is enabled).
194    #[serde(default, skip_serializing_if = "Option::is_none")]
195    pub privacy: Option<PrivacyEvaluation>,
196    /// Banking/KYC/AML evaluation (optional).
197    #[serde(default, skip_serializing_if = "Option::is_none")]
198    pub banking: Option<BankingEvaluation>,
199    /// OCEL 2.0 process mining evaluation (optional).
200    #[serde(default, skip_serializing_if = "Option::is_none")]
201    pub process_mining: Option<ProcessMiningEvaluation>,
202    /// Causal model evaluation (optional).
203    #[serde(default, skip_serializing_if = "Option::is_none")]
204    pub causal: Option<CausalModelEvaluation>,
205    /// LLM enrichment quality evaluation (optional).
206    #[serde(default, skip_serializing_if = "Option::is_none")]
207    pub enrichment_quality: Option<EnrichmentQualityEvaluation>,
208    /// Overall pass/fail status.
209    pub passes: bool,
210    /// Summary of all failures.
211    pub failures: Vec<String>,
212    /// Tuning opportunities identified.
213    pub tuning_opportunities: Vec<TuningOpportunity>,
214    /// Configuration suggestions.
215    pub config_suggestions: Vec<ConfigSuggestion>,
216}
217
218impl ComprehensiveEvaluation {
219    /// Create a new empty evaluation.
220    pub fn new() -> Self {
221        Self {
222            statistical: StatisticalEvaluation::default(),
223            coherence: CoherenceEvaluation::default(),
224            quality: QualityEvaluation::default(),
225            ml_readiness: MLReadinessEvaluation::default(),
226            privacy: None,
227            banking: None,
228            process_mining: None,
229            causal: None,
230            enrichment_quality: None,
231            passes: true,
232            failures: Vec::new(),
233            tuning_opportunities: Vec::new(),
234            config_suggestions: Vec::new(),
235        }
236    }
237
238    /// Check all evaluations against thresholds and update overall status.
239    pub fn check_all_thresholds(&mut self, thresholds: &EvaluationThresholds) {
240        self.failures.clear();
241
242        // Check statistical thresholds
243        self.statistical.check_thresholds(thresholds);
244        self.failures.extend(self.statistical.failures.clone());
245
246        // Check coherence thresholds
247        self.coherence.check_thresholds(thresholds);
248        self.failures.extend(self.coherence.failures.clone());
249
250        // Check quality thresholds
251        self.quality.check_thresholds(thresholds);
252        self.failures.extend(self.quality.failures.clone());
253
254        // Check ML thresholds
255        self.ml_readiness.check_thresholds(thresholds);
256        self.failures.extend(self.ml_readiness.failures.clone());
257
258        // Check privacy evaluation (if present)
259        if let Some(ref mut privacy) = self.privacy {
260            privacy.update_status();
261            self.failures.extend(privacy.failures.clone());
262        }
263
264        // Check banking evaluation
265        if let Some(ref mut banking) = self.banking {
266            banking.check_thresholds();
267            self.failures.extend(banking.issues.clone());
268        }
269
270        // Check process mining evaluation
271        if let Some(ref mut pm) = self.process_mining {
272            pm.check_thresholds();
273            self.failures.extend(pm.issues.clone());
274        }
275
276        // Check causal model evaluation
277        if let Some(ref causal) = self.causal {
278            if !causal.passes {
279                self.failures.extend(causal.issues.clone());
280            }
281        }
282
283        // Check enrichment quality evaluation
284        if let Some(ref enrichment) = self.enrichment_quality {
285            if !enrichment.passes {
286                self.failures.extend(enrichment.issues.clone());
287            }
288        }
289
290        self.passes = self.failures.is_empty();
291    }
292}
293
294impl Default for ComprehensiveEvaluation {
295    fn default() -> Self {
296        Self::new()
297    }
298}
299
300/// Main evaluator that coordinates all evaluation modules.
301pub struct Evaluator {
302    /// Evaluation configuration.
303    config: EvaluationConfig,
304}
305
306impl Evaluator {
307    /// Create a new evaluator with the given configuration.
308    pub fn new(config: EvaluationConfig) -> Self {
309        Self { config }
310    }
311
312    /// Create an evaluator with default configuration.
313    pub fn with_defaults() -> Self {
314        Self::new(EvaluationConfig::default())
315    }
316
317    /// Get the configuration.
318    pub fn config(&self) -> &EvaluationConfig {
319        &self.config
320    }
321
322    /// Run a comprehensive evaluation and return results.
323    ///
324    /// This is a placeholder - actual implementation would take
325    /// generation results as input.
326    pub fn run_evaluation(&self) -> ComprehensiveEvaluation {
327        let mut evaluation = ComprehensiveEvaluation::new();
328        evaluation.check_all_thresholds(&self.config.thresholds);
329        evaluation
330    }
331}
332
333#[cfg(test)]
334#[allow(clippy::unwrap_used)]
335mod tests {
336    use super::*;
337
338    #[test]
339    fn test_comprehensive_evaluation_new() {
340        let eval = ComprehensiveEvaluation::new();
341        assert!(eval.passes);
342        assert!(eval.failures.is_empty());
343    }
344
345    #[test]
346    fn test_evaluator_creation() {
347        let evaluator = Evaluator::with_defaults();
348        assert_eq!(evaluator.config().thresholds.benford_p_value_min, 0.05);
349    }
350}