datasynth_eval/coherence/
referential.rs1use crate::error::EvalResult;
7use serde::{Deserialize, Serialize};
8
9#[derive(Debug, Clone, Serialize, Deserialize)]
11pub struct ReferentialIntegrityEvaluation {
12 pub vendor_integrity: EntityIntegrity,
14 pub customer_integrity: EntityIntegrity,
16 pub material_integrity: EntityIntegrity,
18 pub employee_integrity: EntityIntegrity,
20 pub account_integrity: EntityIntegrity,
22 pub cost_center_integrity: EntityIntegrity,
24 pub overall_integrity_score: f64,
26 pub total_valid_references: usize,
28 pub total_invalid_references: usize,
30 pub total_orphaned_entities: usize,
32 pub passes: bool,
34}
35
36#[derive(Debug, Clone, Serialize, Deserialize)]
38pub struct EntityIntegrity {
39 pub entity_type: String,
41 pub total_entities: usize,
43 pub entities_referenced: usize,
45 pub valid_references: usize,
47 pub invalid_references: usize,
49 pub orphaned_entities: usize,
51 pub integrity_score: f64,
53 pub usage_rate: f64,
55}
56
57impl Default for EntityIntegrity {
58 fn default() -> Self {
59 Self {
60 entity_type: String::new(),
61 total_entities: 0,
62 entities_referenced: 0,
63 valid_references: 0,
64 invalid_references: 0,
65 orphaned_entities: 0,
66 integrity_score: 1.0,
67 usage_rate: 1.0,
68 }
69 }
70}
71
72#[derive(Debug, Clone, Default)]
74pub struct ReferentialData {
75 pub vendors: EntityReferenceData,
77 pub customers: EntityReferenceData,
79 pub materials: EntityReferenceData,
81 pub employees: EntityReferenceData,
83 pub accounts: EntityReferenceData,
85 pub cost_centers: EntityReferenceData,
87}
88
89#[derive(Debug, Clone, Default)]
91pub struct EntityReferenceData {
92 pub valid_ids: std::collections::HashSet<String>,
94 pub references: Vec<String>,
96}
97
98impl EntityReferenceData {
99 pub fn new() -> Self {
101 Self::default()
102 }
103
104 pub fn add_entity(&mut self, id: String) {
106 self.valid_ids.insert(id);
107 }
108
109 pub fn add_reference(&mut self, id: String) {
111 self.references.push(id);
112 }
113}
114
115pub struct ReferentialIntegrityEvaluator {
117 min_integrity_score: f64,
119 min_usage_rate: f64,
122}
123
124impl ReferentialIntegrityEvaluator {
125 pub fn new(min_integrity_score: f64, min_usage_rate: f64) -> Self {
127 Self {
128 min_integrity_score,
129 min_usage_rate,
130 }
131 }
132
133 pub fn evaluate(&self, data: &ReferentialData) -> EvalResult<ReferentialIntegrityEvaluation> {
135 let vendor_integrity = self.evaluate_entity("Vendor", &data.vendors);
136 let customer_integrity = self.evaluate_entity("Customer", &data.customers);
137 let material_integrity = self.evaluate_entity("Material", &data.materials);
138 let employee_integrity = self.evaluate_entity("Employee", &data.employees);
139 let account_integrity = self.evaluate_entity("Account", &data.accounts);
140 let cost_center_integrity = self.evaluate_entity("CostCenter", &data.cost_centers);
141
142 let integrities = [
144 &vendor_integrity,
145 &customer_integrity,
146 &material_integrity,
147 &employee_integrity,
148 &account_integrity,
149 &cost_center_integrity,
150 ];
151
152 let total_valid_references: usize = integrities.iter().map(|i| i.valid_references).sum();
153 let total_invalid_references: usize =
154 integrities.iter().map(|i| i.invalid_references).sum();
155 let total_orphaned_entities: usize = integrities.iter().map(|i| i.orphaned_entities).sum();
156
157 let total_refs = total_valid_references + total_invalid_references;
159 let overall_integrity_score = if total_refs > 0 {
160 total_valid_references as f64 / total_refs as f64
161 } else {
162 1.0
163 };
164
165 let usage_ok = integrities
168 .iter()
169 .all(|i| i.total_entities == 0 || i.usage_rate >= self.min_usage_rate);
170
171 let passes = overall_integrity_score >= self.min_integrity_score && usage_ok;
172
173 Ok(ReferentialIntegrityEvaluation {
174 vendor_integrity,
175 customer_integrity,
176 material_integrity,
177 employee_integrity,
178 account_integrity,
179 cost_center_integrity,
180 overall_integrity_score,
181 total_valid_references,
182 total_invalid_references,
183 total_orphaned_entities,
184 passes,
185 })
186 }
187
188 fn evaluate_entity(&self, entity_type: &str, data: &EntityReferenceData) -> EntityIntegrity {
190 let total_entities = data.valid_ids.len();
191
192 let mut valid_references = 0;
194 let mut invalid_references = 0;
195 let mut referenced_ids = std::collections::HashSet::new();
196
197 for reference in &data.references {
198 if data.valid_ids.contains(reference) {
199 valid_references += 1;
200 referenced_ids.insert(reference.clone());
201 } else {
202 invalid_references += 1;
203 }
204 }
205
206 let entities_referenced = referenced_ids.len();
207 let orphaned_entities = total_entities.saturating_sub(entities_referenced);
208
209 let total_refs = valid_references + invalid_references;
210 let integrity_score = if total_refs > 0 {
211 valid_references as f64 / total_refs as f64
212 } else {
213 1.0
214 };
215
216 let usage_rate = if total_entities > 0 {
217 entities_referenced as f64 / total_entities as f64
218 } else {
219 1.0
220 };
221
222 EntityIntegrity {
223 entity_type: entity_type.to_string(),
224 total_entities,
225 entities_referenced,
226 valid_references,
227 invalid_references,
228 orphaned_entities,
229 integrity_score,
230 usage_rate,
231 }
232 }
233}
234
235impl Default for ReferentialIntegrityEvaluator {
236 fn default() -> Self {
237 Self::new(0.99, 0.80) }
239}
240
241#[cfg(test)]
242#[allow(clippy::unwrap_used)]
243mod tests {
244 use super::*;
245
246 #[test]
247 fn test_perfect_integrity() {
248 let mut data = ReferentialData::default();
249
250 data.vendors.add_entity("V001".to_string());
252 data.vendors.add_entity("V002".to_string());
253
254 data.vendors.add_reference("V001".to_string());
256 data.vendors.add_reference("V002".to_string());
257 data.vendors.add_reference("V001".to_string());
258
259 let evaluator = ReferentialIntegrityEvaluator::default();
260 let result = evaluator.evaluate(&data).unwrap();
261
262 assert_eq!(result.vendor_integrity.integrity_score, 1.0);
263 assert_eq!(result.vendor_integrity.valid_references, 3);
264 assert_eq!(result.vendor_integrity.invalid_references, 0);
265 assert_eq!(result.vendor_integrity.orphaned_entities, 0);
266 }
267
268 #[test]
269 fn test_invalid_references() {
270 let mut data = ReferentialData::default();
271
272 data.vendors.add_entity("V001".to_string());
273
274 data.vendors.add_reference("V001".to_string());
276 data.vendors.add_reference("V999".to_string()); let evaluator = ReferentialIntegrityEvaluator::default();
279 let result = evaluator.evaluate(&data).unwrap();
280
281 assert_eq!(result.vendor_integrity.valid_references, 1);
282 assert_eq!(result.vendor_integrity.invalid_references, 1);
283 assert_eq!(result.vendor_integrity.integrity_score, 0.5);
284 }
285
286 #[test]
287 fn test_orphaned_entities() {
288 let mut data = ReferentialData::default();
289
290 data.vendors.add_entity("V001".to_string());
292 data.vendors.add_entity("V002".to_string());
293 data.vendors.add_entity("V003".to_string());
294
295 data.vendors.add_reference("V001".to_string());
296
297 let evaluator = ReferentialIntegrityEvaluator::default();
298 let result = evaluator.evaluate(&data).unwrap();
299
300 assert_eq!(result.vendor_integrity.entities_referenced, 1);
301 assert_eq!(result.vendor_integrity.orphaned_entities, 2);
302 assert!(result.vendor_integrity.usage_rate < 0.5);
303 }
304
305 #[test]
306 fn test_empty_data() {
307 let data = ReferentialData::default();
308 let evaluator = ReferentialIntegrityEvaluator::default();
309 let result = evaluator.evaluate(&data).unwrap();
310
311 assert_eq!(result.overall_integrity_score, 1.0);
312 assert!(result.passes);
313 }
314}