datasynth_eval/
lib.rs

1// Allow some clippy lints that are common in test/evaluation code
2#![allow(clippy::field_reassign_with_default)]
3#![allow(clippy::too_many_arguments)]
4#![allow(clippy::upper_case_acronyms)] // MCAR, MAR, MNAR, ISO are standard abbreviations
5
6//! Synthetic Data Evaluation Framework
7//!
8//! This crate provides comprehensive evaluation capabilities for validating
9//! the quality and correctness of generated synthetic financial data.
10//!
11//! # Features
12//!
13//! - **Statistical Quality**: Benford's Law, amount distributions, line item patterns
14//! - **Semantic Coherence**: Balance sheet validation, subledger reconciliation
15//! - **Data Quality**: Uniqueness, completeness, format consistency
16//! - **ML-Readiness**: Feature distributions, label quality, graph structure
17//! - **Reporting**: HTML and JSON reports with pass/fail thresholds
18//!
19//! # Example
20//!
21//! ```ignore
22//! use datasynth_eval::{Evaluator, EvaluationConfig};
23//!
24//! let config = EvaluationConfig::default();
25//! let evaluator = Evaluator::new(config);
26//!
27//! // Evaluate generated data
28//! let result = evaluator.evaluate(&generation_result)?;
29//!
30//! // Generate report
31//! result.generate_html_report("evaluation_report.html")?;
32//! ```
33
34pub mod benchmarks;
35pub mod config;
36pub mod enhancement;
37pub mod error;
38
39pub mod coherence;
40pub mod ml;
41pub mod quality;
42pub mod report;
43pub mod statistical;
44pub mod tuning;
45
46// Re-exports
47pub use config::{EvaluationConfig, EvaluationThresholds};
48pub use error::{EvalError, EvalResult};
49
50pub use statistical::{
51    AmountDistributionAnalysis, AmountDistributionAnalyzer, BenfordAnalysis, BenfordAnalyzer,
52    BenfordConformity, DetectionDifficulty, DriftDetectionAnalysis, DriftDetectionAnalyzer,
53    DriftDetectionEntry, DriftDetectionMetrics, DriftEventCategory, LabeledDriftEvent,
54    LabeledEventAnalysis, LineItemAnalysis, LineItemAnalyzer, LineItemEntry, StatisticalEvaluation,
55    TemporalAnalysis, TemporalAnalyzer, TemporalEntry,
56};
57
58pub use coherence::{
59    AuditTrailEvaluation, AuditTrailGap, BalanceSheetEvaluation, BalanceSheetEvaluator,
60    CoherenceEvaluation, ConcentrationMetrics, DocumentChainEvaluation, DocumentChainEvaluator,
61    FairValueEvaluation, FrameworkViolation, ICMatchingEvaluation, ICMatchingEvaluator,
62    ImpairmentEvaluation, IsaComplianceEvaluation, LeaseAccountingEvaluation,
63    LeaseAccountingEvaluator, LeaseEvaluation, NetworkEdge, NetworkEvaluation, NetworkEvaluator,
64    NetworkNode, NetworkThresholds, PcaobComplianceEvaluation, PerformanceObligation,
65    ReferentialIntegrityEvaluation, ReferentialIntegrityEvaluator, RevenueContract,
66    RevenueRecognitionEvaluation, RevenueRecognitionEvaluator, SoxComplianceEvaluation,
67    StandardsComplianceEvaluation, StandardsThresholds, StrengthStats, SubledgerEvaluator,
68    SubledgerReconciliationEvaluation, VariableConsideration, ViolationSeverity,
69};
70
71pub use quality::{
72    CompletenessAnalysis, CompletenessAnalyzer, ConsistencyAnalysis, ConsistencyAnalyzer,
73    ConsistencyRule, DuplicateInfo, FieldCompleteness, FormatAnalysis, FormatAnalyzer,
74    FormatVariation, QualityEvaluation, UniquenessAnalysis, UniquenessAnalyzer,
75};
76
77pub use ml::{
78    FeatureAnalysis, FeatureAnalyzer, FeatureStats, GraphAnalysis, GraphAnalyzer, GraphMetrics,
79    LabelAnalysis, LabelAnalyzer, LabelDistribution, MLReadinessEvaluation, SplitAnalysis,
80    SplitAnalyzer, SplitMetrics,
81};
82
83pub use report::{
84    BaselineComparison, ComparisonResult, EvaluationReport, HtmlReportGenerator,
85    JsonReportGenerator, MetricChange, ReportMetadata, ThresholdChecker, ThresholdResult,
86};
87
88pub use tuning::{
89    ConfigSuggestion, ConfigSuggestionGenerator, TuningAnalyzer, TuningCategory, TuningOpportunity,
90};
91
92pub use enhancement::{
93    AutoTuneResult, AutoTuner, ConfigPatch, EnhancementReport, Recommendation,
94    RecommendationCategory, RecommendationEngine, RecommendationPriority, RootCause,
95    SuggestedAction,
96};
97
98pub use benchmarks::{
99    // ACFE-calibrated benchmarks
100    acfe_calibrated_1k,
101    acfe_collusion_5k,
102    acfe_management_override_2k,
103    all_acfe_benchmarks,
104    all_benchmarks,
105    // Industry-specific benchmarks
106    all_industry_benchmarks,
107    anomaly_bench_1k,
108    data_quality_100k,
109    entity_match_5k,
110    financial_services_fraud_5k,
111    fraud_detect_10k,
112    get_benchmark,
113    get_industry_benchmark,
114    graph_fraud_10k,
115    healthcare_fraud_5k,
116    manufacturing_fraud_5k,
117    retail_fraud_10k,
118    technology_fraud_3k,
119    AcfeAlignment,
120    AcfeCalibration,
121    AcfeCategoryDistribution,
122    BaselineModelType,
123    BaselineResult,
124    BenchmarkBuilder,
125    BenchmarkSuite,
126    BenchmarkTaskType,
127    CostMatrix,
128    DatasetSpec,
129    EvaluationSpec,
130    FeatureSet,
131    IndustryBenchmarkAnalysis,
132    LeaderboardEntry,
133    MetricType,
134    SplitRatios,
135};
136
137use serde::{Deserialize, Serialize};
138
139/// Comprehensive evaluation result combining all evaluation modules.
140#[derive(Debug, Clone, Serialize, Deserialize)]
141pub struct ComprehensiveEvaluation {
142    /// Statistical quality evaluation.
143    pub statistical: StatisticalEvaluation,
144    /// Semantic coherence evaluation.
145    pub coherence: CoherenceEvaluation,
146    /// Data quality evaluation.
147    pub quality: QualityEvaluation,
148    /// ML-readiness evaluation.
149    pub ml_readiness: MLReadinessEvaluation,
150    /// Overall pass/fail status.
151    pub passes: bool,
152    /// Summary of all failures.
153    pub failures: Vec<String>,
154    /// Tuning opportunities identified.
155    pub tuning_opportunities: Vec<TuningOpportunity>,
156    /// Configuration suggestions.
157    pub config_suggestions: Vec<ConfigSuggestion>,
158}
159
160impl ComprehensiveEvaluation {
161    /// Create a new empty evaluation.
162    pub fn new() -> Self {
163        Self {
164            statistical: StatisticalEvaluation::default(),
165            coherence: CoherenceEvaluation::default(),
166            quality: QualityEvaluation::default(),
167            ml_readiness: MLReadinessEvaluation::default(),
168            passes: true,
169            failures: Vec::new(),
170            tuning_opportunities: Vec::new(),
171            config_suggestions: Vec::new(),
172        }
173    }
174
175    /// Check all evaluations against thresholds and update overall status.
176    pub fn check_all_thresholds(&mut self, thresholds: &EvaluationThresholds) {
177        self.failures.clear();
178
179        // Check statistical thresholds
180        self.statistical.check_thresholds(thresholds);
181        self.failures.extend(self.statistical.failures.clone());
182
183        // Check coherence thresholds
184        self.coherence.check_thresholds(thresholds);
185        self.failures.extend(self.coherence.failures.clone());
186
187        // Check quality thresholds
188        self.quality.check_thresholds(thresholds);
189        self.failures.extend(self.quality.failures.clone());
190
191        // Check ML thresholds
192        self.ml_readiness.check_thresholds(thresholds);
193        self.failures.extend(self.ml_readiness.failures.clone());
194
195        self.passes = self.failures.is_empty();
196    }
197}
198
199impl Default for ComprehensiveEvaluation {
200    fn default() -> Self {
201        Self::new()
202    }
203}
204
205/// Main evaluator that coordinates all evaluation modules.
206pub struct Evaluator {
207    /// Evaluation configuration.
208    config: EvaluationConfig,
209}
210
211impl Evaluator {
212    /// Create a new evaluator with the given configuration.
213    pub fn new(config: EvaluationConfig) -> Self {
214        Self { config }
215    }
216
217    /// Create an evaluator with default configuration.
218    pub fn with_defaults() -> Self {
219        Self::new(EvaluationConfig::default())
220    }
221
222    /// Get the configuration.
223    pub fn config(&self) -> &EvaluationConfig {
224        &self.config
225    }
226
227    /// Run a comprehensive evaluation and return results.
228    ///
229    /// This is a placeholder - actual implementation would take
230    /// generation results as input.
231    pub fn run_evaluation(&self) -> ComprehensiveEvaluation {
232        let mut evaluation = ComprehensiveEvaluation::new();
233        evaluation.check_all_thresholds(&self.config.thresholds);
234        evaluation
235    }
236}
237
238#[cfg(test)]
239mod tests {
240    use super::*;
241
242    #[test]
243    fn test_comprehensive_evaluation_new() {
244        let eval = ComprehensiveEvaluation::new();
245        assert!(eval.passes);
246        assert!(eval.failures.is_empty());
247    }
248
249    #[test]
250    fn test_evaluator_creation() {
251        let evaluator = Evaluator::with_defaults();
252        assert_eq!(evaluator.config().thresholds.benford_p_value_min, 0.05);
253    }
254}
datasynth_eval/lib.rs

datasynth_eval/
lib.rs