datasynth_eval/quality/
mod.rs1mod completeness;
7mod consistency;
8mod format;
9mod uniqueness;
10
11pub use completeness::{
12 CompletenessAnalysis, CompletenessAnalyzer, FieldCompleteness, FieldDefinition, FieldValue,
13};
14pub use consistency::{ConsistencyAnalysis, ConsistencyAnalyzer, ConsistencyRule};
15pub use format::{FormatAnalysis, FormatAnalyzer, FormatVariation};
16pub use uniqueness::{DuplicateInfo, UniqueRecord, UniquenessAnalysis, UniquenessAnalyzer};
17
18use serde::{Deserialize, Serialize};
19
20#[derive(Debug, Clone, Serialize, Deserialize)]
22pub struct QualityEvaluation {
23 pub uniqueness: Option<UniquenessAnalysis>,
25 pub completeness: Option<CompletenessAnalysis>,
27 pub format: Option<FormatAnalysis>,
29 pub consistency: Option<ConsistencyAnalysis>,
31 pub overall_score: f64,
33 pub passes: bool,
35 pub issues: Vec<String>,
37 pub failures: Vec<String>,
39}
40
41impl QualityEvaluation {
42 pub fn new() -> Self {
44 Self {
45 uniqueness: None,
46 completeness: None,
47 format: None,
48 consistency: None,
49 overall_score: 1.0,
50 passes: true,
51 issues: Vec::new(),
52 failures: Vec::new(),
53 }
54 }
55
56 pub fn check_thresholds(&mut self, thresholds: &crate::config::EvaluationThresholds) {
58 self.issues.clear();
59 self.failures.clear();
60 let mut scores = Vec::new();
61
62 if let Some(ref uniqueness) = self.uniqueness {
63 if uniqueness.duplicate_rate > thresholds.duplicate_rate_max {
64 self.issues.push(format!(
65 "Duplicate rate {} > {} (threshold)",
66 uniqueness.duplicate_rate, thresholds.duplicate_rate_max
67 ));
68 }
69 scores.push(1.0 - uniqueness.duplicate_rate);
70 }
71
72 if let Some(ref completeness) = self.completeness {
73 if completeness.overall_completeness < thresholds.completeness_rate_min {
74 self.issues.push(format!(
75 "Completeness {} < {} (threshold)",
76 completeness.overall_completeness, thresholds.completeness_rate_min
77 ));
78 }
79 scores.push(completeness.overall_completeness);
80 }
81
82 if let Some(ref format) = self.format {
83 if format.consistency_score < thresholds.format_consistency_min {
84 self.issues.push(format!(
85 "Format consistency {} < {} (threshold)",
86 format.consistency_score, thresholds.format_consistency_min
87 ));
88 }
89 scores.push(format.consistency_score);
90 }
91
92 if let Some(ref consistency) = self.consistency {
93 if consistency.pass_rate < thresholds.format_consistency_min {
95 self.issues.push(format!(
96 "Cross-field consistency {} < {} (threshold)",
97 consistency.pass_rate, thresholds.format_consistency_min
98 ));
99 }
100 scores.push(consistency.pass_rate);
101 }
102
103 self.overall_score = if scores.is_empty() {
104 1.0
105 } else {
106 scores.iter().sum::<f64>() / scores.len() as f64
107 };
108
109 self.failures = self.issues.clone();
111 self.passes = self.issues.is_empty();
112 }
113}
114
115impl Default for QualityEvaluation {
116 fn default() -> Self {
117 Self::new()
118 }
119}