datasynth_eval/coherence/
referential.rs1use crate::error::EvalResult;
7use serde::{Deserialize, Serialize};
8
9#[derive(Debug, Clone, Serialize, Deserialize)]
11pub struct ReferentialIntegrityEvaluation {
12 pub vendor_integrity: EntityIntegrity,
14 pub customer_integrity: EntityIntegrity,
16 pub material_integrity: EntityIntegrity,
18 pub employee_integrity: EntityIntegrity,
20 pub account_integrity: EntityIntegrity,
22 pub cost_center_integrity: EntityIntegrity,
24 pub overall_integrity_score: f64,
26 pub total_valid_references: usize,
28 pub total_invalid_references: usize,
30 pub total_orphaned_entities: usize,
32 pub passes: bool,
34}
35
36#[derive(Debug, Clone, Serialize, Deserialize)]
38pub struct EntityIntegrity {
39 pub entity_type: String,
41 pub total_entities: usize,
43 pub entities_referenced: usize,
45 pub valid_references: usize,
47 pub invalid_references: usize,
49 pub orphaned_entities: usize,
51 pub integrity_score: f64,
53 pub usage_rate: f64,
55}
56
57impl Default for EntityIntegrity {
58 fn default() -> Self {
59 Self {
60 entity_type: String::new(),
61 total_entities: 0,
62 entities_referenced: 0,
63 valid_references: 0,
64 invalid_references: 0,
65 orphaned_entities: 0,
66 integrity_score: 1.0,
67 usage_rate: 1.0,
68 }
69 }
70}
71
72#[derive(Debug, Clone, Default)]
74pub struct ReferentialData {
75 pub vendors: EntityReferenceData,
77 pub customers: EntityReferenceData,
79 pub materials: EntityReferenceData,
81 pub employees: EntityReferenceData,
83 pub accounts: EntityReferenceData,
85 pub cost_centers: EntityReferenceData,
87}
88
89#[derive(Debug, Clone, Default)]
91pub struct EntityReferenceData {
92 pub valid_ids: std::collections::HashSet<String>,
94 pub references: Vec<String>,
96}
97
98impl EntityReferenceData {
99 pub fn new() -> Self {
101 Self::default()
102 }
103
104 pub fn add_entity(&mut self, id: String) {
106 self.valid_ids.insert(id);
107 }
108
109 pub fn add_reference(&mut self, id: String) {
111 self.references.push(id);
112 }
113}
114
115pub struct ReferentialIntegrityEvaluator {
117 min_integrity_score: f64,
119 #[allow(dead_code)] min_usage_rate: f64,
122}
123
124impl ReferentialIntegrityEvaluator {
125 pub fn new(min_integrity_score: f64, min_usage_rate: f64) -> Self {
127 Self {
128 min_integrity_score,
129 min_usage_rate,
130 }
131 }
132
133 pub fn evaluate(&self, data: &ReferentialData) -> EvalResult<ReferentialIntegrityEvaluation> {
135 let vendor_integrity = self.evaluate_entity("Vendor", &data.vendors);
136 let customer_integrity = self.evaluate_entity("Customer", &data.customers);
137 let material_integrity = self.evaluate_entity("Material", &data.materials);
138 let employee_integrity = self.evaluate_entity("Employee", &data.employees);
139 let account_integrity = self.evaluate_entity("Account", &data.accounts);
140 let cost_center_integrity = self.evaluate_entity("CostCenter", &data.cost_centers);
141
142 let integrities = [
144 &vendor_integrity,
145 &customer_integrity,
146 &material_integrity,
147 &employee_integrity,
148 &account_integrity,
149 &cost_center_integrity,
150 ];
151
152 let total_valid_references: usize = integrities.iter().map(|i| i.valid_references).sum();
153 let total_invalid_references: usize =
154 integrities.iter().map(|i| i.invalid_references).sum();
155 let total_orphaned_entities: usize = integrities.iter().map(|i| i.orphaned_entities).sum();
156
157 let total_refs = total_valid_references + total_invalid_references;
159 let overall_integrity_score = if total_refs > 0 {
160 total_valid_references as f64 / total_refs as f64
161 } else {
162 1.0
163 };
164
165 let passes = overall_integrity_score >= self.min_integrity_score;
166
167 Ok(ReferentialIntegrityEvaluation {
168 vendor_integrity,
169 customer_integrity,
170 material_integrity,
171 employee_integrity,
172 account_integrity,
173 cost_center_integrity,
174 overall_integrity_score,
175 total_valid_references,
176 total_invalid_references,
177 total_orphaned_entities,
178 passes,
179 })
180 }
181
182 fn evaluate_entity(&self, entity_type: &str, data: &EntityReferenceData) -> EntityIntegrity {
184 let total_entities = data.valid_ids.len();
185
186 let mut valid_references = 0;
188 let mut invalid_references = 0;
189 let mut referenced_ids = std::collections::HashSet::new();
190
191 for reference in &data.references {
192 if data.valid_ids.contains(reference) {
193 valid_references += 1;
194 referenced_ids.insert(reference.clone());
195 } else {
196 invalid_references += 1;
197 }
198 }
199
200 let entities_referenced = referenced_ids.len();
201 let orphaned_entities = total_entities.saturating_sub(entities_referenced);
202
203 let total_refs = valid_references + invalid_references;
204 let integrity_score = if total_refs > 0 {
205 valid_references as f64 / total_refs as f64
206 } else {
207 1.0
208 };
209
210 let usage_rate = if total_entities > 0 {
211 entities_referenced as f64 / total_entities as f64
212 } else {
213 1.0
214 };
215
216 EntityIntegrity {
217 entity_type: entity_type.to_string(),
218 total_entities,
219 entities_referenced,
220 valid_references,
221 invalid_references,
222 orphaned_entities,
223 integrity_score,
224 usage_rate,
225 }
226 }
227}
228
229impl Default for ReferentialIntegrityEvaluator {
230 fn default() -> Self {
231 Self::new(0.99, 0.80) }
233}
234
235#[cfg(test)]
236mod tests {
237 use super::*;
238
239 #[test]
240 fn test_perfect_integrity() {
241 let mut data = ReferentialData::default();
242
243 data.vendors.add_entity("V001".to_string());
245 data.vendors.add_entity("V002".to_string());
246
247 data.vendors.add_reference("V001".to_string());
249 data.vendors.add_reference("V002".to_string());
250 data.vendors.add_reference("V001".to_string());
251
252 let evaluator = ReferentialIntegrityEvaluator::default();
253 let result = evaluator.evaluate(&data).unwrap();
254
255 assert_eq!(result.vendor_integrity.integrity_score, 1.0);
256 assert_eq!(result.vendor_integrity.valid_references, 3);
257 assert_eq!(result.vendor_integrity.invalid_references, 0);
258 assert_eq!(result.vendor_integrity.orphaned_entities, 0);
259 }
260
261 #[test]
262 fn test_invalid_references() {
263 let mut data = ReferentialData::default();
264
265 data.vendors.add_entity("V001".to_string());
266
267 data.vendors.add_reference("V001".to_string());
269 data.vendors.add_reference("V999".to_string()); let evaluator = ReferentialIntegrityEvaluator::default();
272 let result = evaluator.evaluate(&data).unwrap();
273
274 assert_eq!(result.vendor_integrity.valid_references, 1);
275 assert_eq!(result.vendor_integrity.invalid_references, 1);
276 assert_eq!(result.vendor_integrity.integrity_score, 0.5);
277 }
278
279 #[test]
280 fn test_orphaned_entities() {
281 let mut data = ReferentialData::default();
282
283 data.vendors.add_entity("V001".to_string());
285 data.vendors.add_entity("V002".to_string());
286 data.vendors.add_entity("V003".to_string());
287
288 data.vendors.add_reference("V001".to_string());
289
290 let evaluator = ReferentialIntegrityEvaluator::default();
291 let result = evaluator.evaluate(&data).unwrap();
292
293 assert_eq!(result.vendor_integrity.entities_referenced, 1);
294 assert_eq!(result.vendor_integrity.orphaned_entities, 2);
295 assert!(result.vendor_integrity.usage_rate < 0.5);
296 }
297
298 #[test]
299 fn test_empty_data() {
300 let data = ReferentialData::default();
301 let evaluator = ReferentialIntegrityEvaluator::default();
302 let result = evaluator.evaluate(&data).unwrap();
303
304 assert_eq!(result.overall_integrity_score, 1.0);
305 assert!(result.passes);
306 }
307}