datasynth_eval/
lib.rs

1// Allow some clippy lints that are common in test/evaluation code
2#![allow(clippy::field_reassign_with_default)]
3#![allow(clippy::too_many_arguments)]
4#![allow(clippy::upper_case_acronyms)] // MCAR, MAR, MNAR, ISO are standard abbreviations
5
6//! Synthetic Data Evaluation Framework
7//!
8//! This crate provides comprehensive evaluation capabilities for validating
9//! the quality and correctness of generated synthetic financial data.
10//!
11//! # Features
12//!
13//! - **Statistical Quality**: Benford's Law, amount distributions, line item patterns
14//! - **Semantic Coherence**: Balance sheet validation, subledger reconciliation
15//! - **Data Quality**: Uniqueness, completeness, format consistency
16//! - **ML-Readiness**: Feature distributions, label quality, graph structure
17//! - **Reporting**: HTML and JSON reports with pass/fail thresholds
18//!
19//! # Example
20//!
21//! ```ignore
22//! use datasynth_eval::{Evaluator, EvaluationConfig};
23//!
24//! let config = EvaluationConfig::default();
25//! let evaluator = Evaluator::new(config);
26//!
27//! // Evaluate generated data
28//! let result = evaluator.evaluate(&generation_result)?;
29//!
30//! // Generate report
31//! result.generate_html_report("evaluation_report.html")?;
32//! ```
33
34pub mod benchmarks;
35pub mod config;
36pub mod enhancement;
37pub mod error;
38
39pub mod coherence;
40pub mod ml;
41pub mod quality;
42pub mod report;
43pub mod statistical;
44pub mod tuning;
45
46// Re-exports
47pub use config::{EvaluationConfig, EvaluationThresholds};
48pub use error::{EvalError, EvalResult};
49
50pub use statistical::{
51    AmountDistributionAnalysis, AmountDistributionAnalyzer, BenfordAnalysis, BenfordAnalyzer,
52    BenfordConformity, LineItemAnalysis, LineItemAnalyzer, LineItemEntry, StatisticalEvaluation,
53    TemporalAnalysis, TemporalAnalyzer, TemporalEntry,
54};
55
56pub use coherence::{
57    BalanceSheetEvaluation, BalanceSheetEvaluator, CoherenceEvaluation, DocumentChainEvaluation,
58    DocumentChainEvaluator, ICMatchingEvaluation, ICMatchingEvaluator,
59    ReferentialIntegrityEvaluation, ReferentialIntegrityEvaluator, SubledgerEvaluator,
60    SubledgerReconciliationEvaluation,
61};
62
63pub use quality::{
64    CompletenessAnalysis, CompletenessAnalyzer, ConsistencyAnalysis, ConsistencyAnalyzer,
65    ConsistencyRule, DuplicateInfo, FieldCompleteness, FormatAnalysis, FormatAnalyzer,
66    FormatVariation, QualityEvaluation, UniquenessAnalysis, UniquenessAnalyzer,
67};
68
69pub use ml::{
70    FeatureAnalysis, FeatureAnalyzer, FeatureStats, GraphAnalysis, GraphAnalyzer, GraphMetrics,
71    LabelAnalysis, LabelAnalyzer, LabelDistribution, MLReadinessEvaluation, SplitAnalysis,
72    SplitAnalyzer, SplitMetrics,
73};
74
75pub use report::{
76    BaselineComparison, ComparisonResult, EvaluationReport, HtmlReportGenerator,
77    JsonReportGenerator, MetricChange, ReportMetadata, ThresholdChecker, ThresholdResult,
78};
79
80pub use tuning::{
81    ConfigSuggestion, ConfigSuggestionGenerator, TuningAnalyzer, TuningCategory, TuningOpportunity,
82};
83
84pub use enhancement::{
85    AutoTuneResult, AutoTuner, ConfigPatch, EnhancementReport, Recommendation,
86    RecommendationCategory, RecommendationEngine, RecommendationPriority, RootCause,
87    SuggestedAction,
88};
89
90pub use benchmarks::{
91    all_benchmarks, anomaly_bench_1k, data_quality_100k, entity_match_5k, fraud_detect_10k,
92    get_benchmark, graph_fraud_10k, BaselineModelType, BaselineResult, BenchmarkBuilder,
93    BenchmarkSuite, BenchmarkTaskType, CostMatrix, DatasetSpec, EvaluationSpec, FeatureSet,
94    LeaderboardEntry, MetricType, SplitRatios,
95};
96
97use serde::{Deserialize, Serialize};
98
99/// Comprehensive evaluation result combining all evaluation modules.
100#[derive(Debug, Clone, Serialize, Deserialize)]
101pub struct ComprehensiveEvaluation {
102    /// Statistical quality evaluation.
103    pub statistical: StatisticalEvaluation,
104    /// Semantic coherence evaluation.
105    pub coherence: CoherenceEvaluation,
106    /// Data quality evaluation.
107    pub quality: QualityEvaluation,
108    /// ML-readiness evaluation.
109    pub ml_readiness: MLReadinessEvaluation,
110    /// Overall pass/fail status.
111    pub passes: bool,
112    /// Summary of all failures.
113    pub failures: Vec<String>,
114    /// Tuning opportunities identified.
115    pub tuning_opportunities: Vec<TuningOpportunity>,
116    /// Configuration suggestions.
117    pub config_suggestions: Vec<ConfigSuggestion>,
118}
119
120impl ComprehensiveEvaluation {
121    /// Create a new empty evaluation.
122    pub fn new() -> Self {
123        Self {
124            statistical: StatisticalEvaluation::default(),
125            coherence: CoherenceEvaluation::default(),
126            quality: QualityEvaluation::default(),
127            ml_readiness: MLReadinessEvaluation::default(),
128            passes: true,
129            failures: Vec::new(),
130            tuning_opportunities: Vec::new(),
131            config_suggestions: Vec::new(),
132        }
133    }
134
135    /// Check all evaluations against thresholds and update overall status.
136    pub fn check_all_thresholds(&mut self, thresholds: &EvaluationThresholds) {
137        self.failures.clear();
138
139        // Check statistical thresholds
140        self.statistical.check_thresholds(thresholds);
141        self.failures.extend(self.statistical.failures.clone());
142
143        // Check coherence thresholds
144        self.coherence.check_thresholds(thresholds);
145        self.failures.extend(self.coherence.failures.clone());
146
147        // Check quality thresholds
148        self.quality.check_thresholds(thresholds);
149        self.failures.extend(self.quality.failures.clone());
150
151        // Check ML thresholds
152        self.ml_readiness.check_thresholds(thresholds);
153        self.failures.extend(self.ml_readiness.failures.clone());
154
155        self.passes = self.failures.is_empty();
156    }
157}
158
159impl Default for ComprehensiveEvaluation {
160    fn default() -> Self {
161        Self::new()
162    }
163}
164
165/// Main evaluator that coordinates all evaluation modules.
166pub struct Evaluator {
167    /// Evaluation configuration.
168    config: EvaluationConfig,
169}
170
171impl Evaluator {
172    /// Create a new evaluator with the given configuration.
173    pub fn new(config: EvaluationConfig) -> Self {
174        Self { config }
175    }
176
177    /// Create an evaluator with default configuration.
178    pub fn with_defaults() -> Self {
179        Self::new(EvaluationConfig::default())
180    }
181
182    /// Get the configuration.
183    pub fn config(&self) -> &EvaluationConfig {
184        &self.config
185    }
186
187    /// Run a comprehensive evaluation and return results.
188    ///
189    /// This is a placeholder - actual implementation would take
190    /// generation results as input.
191    pub fn run_evaluation(&self) -> ComprehensiveEvaluation {
192        let mut evaluation = ComprehensiveEvaluation::new();
193        evaluation.check_all_thresholds(&self.config.thresholds);
194        evaluation
195    }
196}
197
198#[cfg(test)]
199mod tests {
200    use super::*;
201
202    #[test]
203    fn test_comprehensive_evaluation_new() {
204        let eval = ComprehensiveEvaluation::new();
205        assert!(eval.passes);
206        assert!(eval.failures.is_empty());
207    }
208
209    #[test]
210    fn test_evaluator_creation() {
211        let evaluator = Evaluator::with_defaults();
212        assert_eq!(evaluator.config().thresholds.benford_p_value_min, 0.05);
213    }
214}