Skip to main content

datasynth_eval/
lib.rs

1// Allow some clippy lints that are common in test/evaluation code
2#![allow(clippy::field_reassign_with_default)]
3#![allow(clippy::too_many_arguments)]
4#![allow(clippy::upper_case_acronyms)] // MCAR, MAR, MNAR, ISO are standard abbreviations
5
6//! Synthetic Data Evaluation Framework
7//!
8//! This crate provides comprehensive evaluation capabilities for validating
9//! the quality and correctness of generated synthetic financial data.
10//!
11//! # Features
12//!
13//! - **Statistical Quality**: Benford's Law, amount distributions, line item patterns
14//! - **Semantic Coherence**: Balance sheet validation, subledger reconciliation
15//! - **Data Quality**: Uniqueness, completeness, format consistency
16//! - **ML-Readiness**: Feature distributions, label quality, graph structure
17//! - **Reporting**: HTML and JSON reports with pass/fail thresholds
18//!
19//! # Example
20//!
21//! ```ignore
22//! use datasynth_eval::{Evaluator, EvaluationConfig};
23//!
24//! let config = EvaluationConfig::default();
25//! let evaluator = Evaluator::new(config);
26//!
27//! // Evaluate generated data
28//! let result = evaluator.evaluate(&generation_result)?;
29//!
30//! // Generate report
31//! result.generate_html_report("evaluation_report.html")?;
32//! ```
33
34pub mod benchmarks;
35pub mod config;
36pub mod enhancement;
37pub mod error;
38
39pub mod coherence;
40pub mod ml;
41pub mod quality;
42pub mod report;
43pub mod statistical;
44pub mod tuning;
45
46// Re-exports
47pub use config::{EvaluationConfig, EvaluationThresholds};
48pub use error::{EvalError, EvalResult};
49
50pub use statistical::{
51    AmountDistributionAnalysis, AmountDistributionAnalyzer, BenfordAnalysis, BenfordAnalyzer,
52    BenfordConformity, LineItemAnalysis, LineItemAnalyzer, LineItemEntry, StatisticalEvaluation,
53    TemporalAnalysis, TemporalAnalyzer, TemporalEntry,
54};
55
56pub use coherence::{
57    AuditTrailEvaluation, AuditTrailGap, BalanceSheetEvaluation, BalanceSheetEvaluator,
58    CoherenceEvaluation, DocumentChainEvaluation, DocumentChainEvaluator, FairValueEvaluation,
59    FrameworkViolation, ICMatchingEvaluation, ICMatchingEvaluator, ImpairmentEvaluation,
60    IsaComplianceEvaluation, LeaseAccountingEvaluation, LeaseAccountingEvaluator, LeaseEvaluation,
61    PcaobComplianceEvaluation, PerformanceObligation, ReferentialIntegrityEvaluation,
62    ReferentialIntegrityEvaluator, RevenueContract, RevenueRecognitionEvaluation,
63    RevenueRecognitionEvaluator, SoxComplianceEvaluation, StandardsComplianceEvaluation,
64    StandardsThresholds, SubledgerEvaluator, SubledgerReconciliationEvaluation,
65    VariableConsideration, ViolationSeverity,
66};
67
68pub use quality::{
69    CompletenessAnalysis, CompletenessAnalyzer, ConsistencyAnalysis, ConsistencyAnalyzer,
70    ConsistencyRule, DuplicateInfo, FieldCompleteness, FormatAnalysis, FormatAnalyzer,
71    FormatVariation, QualityEvaluation, UniquenessAnalysis, UniquenessAnalyzer,
72};
73
74pub use ml::{
75    FeatureAnalysis, FeatureAnalyzer, FeatureStats, GraphAnalysis, GraphAnalyzer, GraphMetrics,
76    LabelAnalysis, LabelAnalyzer, LabelDistribution, MLReadinessEvaluation, SplitAnalysis,
77    SplitAnalyzer, SplitMetrics,
78};
79
80pub use report::{
81    BaselineComparison, ComparisonResult, EvaluationReport, HtmlReportGenerator,
82    JsonReportGenerator, MetricChange, ReportMetadata, ThresholdChecker, ThresholdResult,
83};
84
85pub use tuning::{
86    ConfigSuggestion, ConfigSuggestionGenerator, TuningAnalyzer, TuningCategory, TuningOpportunity,
87};
88
89pub use enhancement::{
90    AutoTuneResult, AutoTuner, ConfigPatch, EnhancementReport, Recommendation,
91    RecommendationCategory, RecommendationEngine, RecommendationPriority, RootCause,
92    SuggestedAction,
93};
94
95pub use benchmarks::{
96    all_benchmarks, anomaly_bench_1k, data_quality_100k, entity_match_5k, fraud_detect_10k,
97    get_benchmark, graph_fraud_10k, BaselineModelType, BaselineResult, BenchmarkBuilder,
98    BenchmarkSuite, BenchmarkTaskType, CostMatrix, DatasetSpec, EvaluationSpec, FeatureSet,
99    LeaderboardEntry, MetricType, SplitRatios,
100};
101
102use serde::{Deserialize, Serialize};
103
104/// Comprehensive evaluation result combining all evaluation modules.
105#[derive(Debug, Clone, Serialize, Deserialize)]
106pub struct ComprehensiveEvaluation {
107    /// Statistical quality evaluation.
108    pub statistical: StatisticalEvaluation,
109    /// Semantic coherence evaluation.
110    pub coherence: CoherenceEvaluation,
111    /// Data quality evaluation.
112    pub quality: QualityEvaluation,
113    /// ML-readiness evaluation.
114    pub ml_readiness: MLReadinessEvaluation,
115    /// Overall pass/fail status.
116    pub passes: bool,
117    /// Summary of all failures.
118    pub failures: Vec<String>,
119    /// Tuning opportunities identified.
120    pub tuning_opportunities: Vec<TuningOpportunity>,
121    /// Configuration suggestions.
122    pub config_suggestions: Vec<ConfigSuggestion>,
123}
124
125impl ComprehensiveEvaluation {
126    /// Create a new empty evaluation.
127    pub fn new() -> Self {
128        Self {
129            statistical: StatisticalEvaluation::default(),
130            coherence: CoherenceEvaluation::default(),
131            quality: QualityEvaluation::default(),
132            ml_readiness: MLReadinessEvaluation::default(),
133            passes: true,
134            failures: Vec::new(),
135            tuning_opportunities: Vec::new(),
136            config_suggestions: Vec::new(),
137        }
138    }
139
140    /// Check all evaluations against thresholds and update overall status.
141    pub fn check_all_thresholds(&mut self, thresholds: &EvaluationThresholds) {
142        self.failures.clear();
143
144        // Check statistical thresholds
145        self.statistical.check_thresholds(thresholds);
146        self.failures.extend(self.statistical.failures.clone());
147
148        // Check coherence thresholds
149        self.coherence.check_thresholds(thresholds);
150        self.failures.extend(self.coherence.failures.clone());
151
152        // Check quality thresholds
153        self.quality.check_thresholds(thresholds);
154        self.failures.extend(self.quality.failures.clone());
155
156        // Check ML thresholds
157        self.ml_readiness.check_thresholds(thresholds);
158        self.failures.extend(self.ml_readiness.failures.clone());
159
160        self.passes = self.failures.is_empty();
161    }
162}
163
164impl Default for ComprehensiveEvaluation {
165    fn default() -> Self {
166        Self::new()
167    }
168}
169
170/// Main evaluator that coordinates all evaluation modules.
171pub struct Evaluator {
172    /// Evaluation configuration.
173    config: EvaluationConfig,
174}
175
176impl Evaluator {
177    /// Create a new evaluator with the given configuration.
178    pub fn new(config: EvaluationConfig) -> Self {
179        Self { config }
180    }
181
182    /// Create an evaluator with default configuration.
183    pub fn with_defaults() -> Self {
184        Self::new(EvaluationConfig::default())
185    }
186
187    /// Get the configuration.
188    pub fn config(&self) -> &EvaluationConfig {
189        &self.config
190    }
191
192    /// Run a comprehensive evaluation and return results.
193    ///
194    /// This is a placeholder - actual implementation would take
195    /// generation results as input.
196    pub fn run_evaluation(&self) -> ComprehensiveEvaluation {
197        let mut evaluation = ComprehensiveEvaluation::new();
198        evaluation.check_all_thresholds(&self.config.thresholds);
199        evaluation
200    }
201}
202
203#[cfg(test)]
204mod tests {
205    use super::*;
206
207    #[test]
208    fn test_comprehensive_evaluation_new() {
209        let eval = ComprehensiveEvaluation::new();
210        assert!(eval.passes);
211        assert!(eval.failures.is_empty());
212    }
213
214    #[test]
215    fn test_evaluator_creation() {
216        let evaluator = Evaluator::with_defaults();
217        assert_eq!(evaluator.config().thresholds.benford_p_value_min, 0.05);
218    }
219}