datasynth_eval/coherence/
referential.rs

1//! Referential integrity evaluation.
2//!
3//! Validates that all foreign key references point to valid master data entities
4//! and that created entities are actually used in transactions.
5
6use crate::error::EvalResult;
7use serde::{Deserialize, Serialize};
8
9/// Results of referential integrity evaluation.
10#[derive(Debug, Clone, Serialize, Deserialize)]
11pub struct ReferentialIntegrityEvaluation {
12    /// Vendor reference integrity.
13    pub vendor_integrity: EntityIntegrity,
14    /// Customer reference integrity.
15    pub customer_integrity: EntityIntegrity,
16    /// Material reference integrity.
17    pub material_integrity: EntityIntegrity,
18    /// Employee/User reference integrity.
19    pub employee_integrity: EntityIntegrity,
20    /// Account reference integrity.
21    pub account_integrity: EntityIntegrity,
22    /// Cost center reference integrity.
23    pub cost_center_integrity: EntityIntegrity,
24    /// Overall integrity score (0.0-1.0).
25    pub overall_integrity_score: f64,
26    /// Total valid references.
27    pub total_valid_references: usize,
28    /// Total invalid references.
29    pub total_invalid_references: usize,
30    /// Total orphaned entities (created but never used).
31    pub total_orphaned_entities: usize,
32    /// Passes integrity check.
33    pub passes: bool,
34}
35
36/// Integrity metrics for a single entity type.
37#[derive(Debug, Clone, Serialize, Deserialize)]
38pub struct EntityIntegrity {
39    /// Entity type name.
40    pub entity_type: String,
41    /// Total entities defined.
42    pub total_entities: usize,
43    /// Entities actually referenced.
44    pub entities_referenced: usize,
45    /// Valid references count.
46    pub valid_references: usize,
47    /// Invalid references count.
48    pub invalid_references: usize,
49    /// Orphaned entities (defined but never used).
50    pub orphaned_entities: usize,
51    /// Integrity score (0.0-1.0).
52    pub integrity_score: f64,
53    /// Usage rate (entities_referenced / total_entities).
54    pub usage_rate: f64,
55}
56
57impl Default for EntityIntegrity {
58    fn default() -> Self {
59        Self {
60            entity_type: String::new(),
61            total_entities: 0,
62            entities_referenced: 0,
63            valid_references: 0,
64            invalid_references: 0,
65            orphaned_entities: 0,
66            integrity_score: 1.0,
67            usage_rate: 1.0,
68        }
69    }
70}
71
72/// Input data for referential integrity evaluation.
73#[derive(Debug, Clone, Default)]
74pub struct ReferentialData {
75    /// Vendor reference data.
76    pub vendors: EntityReferenceData,
77    /// Customer reference data.
78    pub customers: EntityReferenceData,
79    /// Material reference data.
80    pub materials: EntityReferenceData,
81    /// Employee reference data.
82    pub employees: EntityReferenceData,
83    /// Account reference data.
84    pub accounts: EntityReferenceData,
85    /// Cost center reference data.
86    pub cost_centers: EntityReferenceData,
87}
88
89/// Reference data for a single entity type.
90#[derive(Debug, Clone, Default)]
91pub struct EntityReferenceData {
92    /// Set of all valid entity IDs.
93    pub valid_ids: std::collections::HashSet<String>,
94    /// List of all references made to this entity type.
95    pub references: Vec<String>,
96}
97
98impl EntityReferenceData {
99    /// Create new entity reference data.
100    pub fn new() -> Self {
101        Self::default()
102    }
103
104    /// Add a valid entity ID.
105    pub fn add_entity(&mut self, id: String) {
106        self.valid_ids.insert(id);
107    }
108
109    /// Add a reference to this entity type.
110    pub fn add_reference(&mut self, id: String) {
111        self.references.push(id);
112    }
113}
114
115/// Evaluator for referential integrity.
116pub struct ReferentialIntegrityEvaluator {
117    /// Minimum integrity score threshold.
118    min_integrity_score: f64,
119    /// Minimum usage rate threshold.
120    #[allow(dead_code)] // Reserved for usage rate validation
121    min_usage_rate: f64,
122}
123
124impl ReferentialIntegrityEvaluator {
125    /// Create a new evaluator with specified thresholds.
126    pub fn new(min_integrity_score: f64, min_usage_rate: f64) -> Self {
127        Self {
128            min_integrity_score,
129            min_usage_rate,
130        }
131    }
132
133    /// Evaluate referential integrity.
134    pub fn evaluate(&self, data: &ReferentialData) -> EvalResult<ReferentialIntegrityEvaluation> {
135        let vendor_integrity = self.evaluate_entity("Vendor", &data.vendors);
136        let customer_integrity = self.evaluate_entity("Customer", &data.customers);
137        let material_integrity = self.evaluate_entity("Material", &data.materials);
138        let employee_integrity = self.evaluate_entity("Employee", &data.employees);
139        let account_integrity = self.evaluate_entity("Account", &data.accounts);
140        let cost_center_integrity = self.evaluate_entity("CostCenter", &data.cost_centers);
141
142        // Aggregate totals
143        let integrities = [
144            &vendor_integrity,
145            &customer_integrity,
146            &material_integrity,
147            &employee_integrity,
148            &account_integrity,
149            &cost_center_integrity,
150        ];
151
152        let total_valid_references: usize = integrities.iter().map(|i| i.valid_references).sum();
153        let total_invalid_references: usize =
154            integrities.iter().map(|i| i.invalid_references).sum();
155        let total_orphaned_entities: usize = integrities.iter().map(|i| i.orphaned_entities).sum();
156
157        // Calculate overall integrity score (weighted by reference count)
158        let total_refs = total_valid_references + total_invalid_references;
159        let overall_integrity_score = if total_refs > 0 {
160            total_valid_references as f64 / total_refs as f64
161        } else {
162            1.0
163        };
164
165        let passes = overall_integrity_score >= self.min_integrity_score;
166
167        Ok(ReferentialIntegrityEvaluation {
168            vendor_integrity,
169            customer_integrity,
170            material_integrity,
171            employee_integrity,
172            account_integrity,
173            cost_center_integrity,
174            overall_integrity_score,
175            total_valid_references,
176            total_invalid_references,
177            total_orphaned_entities,
178            passes,
179        })
180    }
181
182    /// Evaluate a single entity type.
183    fn evaluate_entity(&self, entity_type: &str, data: &EntityReferenceData) -> EntityIntegrity {
184        let total_entities = data.valid_ids.len();
185
186        // Count valid and invalid references
187        let mut valid_references = 0;
188        let mut invalid_references = 0;
189        let mut referenced_ids = std::collections::HashSet::new();
190
191        for reference in &data.references {
192            if data.valid_ids.contains(reference) {
193                valid_references += 1;
194                referenced_ids.insert(reference.clone());
195            } else {
196                invalid_references += 1;
197            }
198        }
199
200        let entities_referenced = referenced_ids.len();
201        let orphaned_entities = total_entities.saturating_sub(entities_referenced);
202
203        let total_refs = valid_references + invalid_references;
204        let integrity_score = if total_refs > 0 {
205            valid_references as f64 / total_refs as f64
206        } else {
207            1.0
208        };
209
210        let usage_rate = if total_entities > 0 {
211            entities_referenced as f64 / total_entities as f64
212        } else {
213            1.0
214        };
215
216        EntityIntegrity {
217            entity_type: entity_type.to_string(),
218            total_entities,
219            entities_referenced,
220            valid_references,
221            invalid_references,
222            orphaned_entities,
223            integrity_score,
224            usage_rate,
225        }
226    }
227}
228
229impl Default for ReferentialIntegrityEvaluator {
230    fn default() -> Self {
231        Self::new(0.99, 0.80) // 99% integrity, 80% usage
232    }
233}
234
235#[cfg(test)]
236mod tests {
237    use super::*;
238
239    #[test]
240    fn test_perfect_integrity() {
241        let mut data = ReferentialData::default();
242
243        // Add vendors
244        data.vendors.add_entity("V001".to_string());
245        data.vendors.add_entity("V002".to_string());
246
247        // Add references to valid vendors
248        data.vendors.add_reference("V001".to_string());
249        data.vendors.add_reference("V002".to_string());
250        data.vendors.add_reference("V001".to_string());
251
252        let evaluator = ReferentialIntegrityEvaluator::default();
253        let result = evaluator.evaluate(&data).unwrap();
254
255        assert_eq!(result.vendor_integrity.integrity_score, 1.0);
256        assert_eq!(result.vendor_integrity.valid_references, 3);
257        assert_eq!(result.vendor_integrity.invalid_references, 0);
258        assert_eq!(result.vendor_integrity.orphaned_entities, 0);
259    }
260
261    #[test]
262    fn test_invalid_references() {
263        let mut data = ReferentialData::default();
264
265        data.vendors.add_entity("V001".to_string());
266
267        // Reference both valid and invalid
268        data.vendors.add_reference("V001".to_string());
269        data.vendors.add_reference("V999".to_string()); // Invalid
270
271        let evaluator = ReferentialIntegrityEvaluator::default();
272        let result = evaluator.evaluate(&data).unwrap();
273
274        assert_eq!(result.vendor_integrity.valid_references, 1);
275        assert_eq!(result.vendor_integrity.invalid_references, 1);
276        assert_eq!(result.vendor_integrity.integrity_score, 0.5);
277    }
278
279    #[test]
280    fn test_orphaned_entities() {
281        let mut data = ReferentialData::default();
282
283        // Add vendors but only reference one
284        data.vendors.add_entity("V001".to_string());
285        data.vendors.add_entity("V002".to_string());
286        data.vendors.add_entity("V003".to_string());
287
288        data.vendors.add_reference("V001".to_string());
289
290        let evaluator = ReferentialIntegrityEvaluator::default();
291        let result = evaluator.evaluate(&data).unwrap();
292
293        assert_eq!(result.vendor_integrity.entities_referenced, 1);
294        assert_eq!(result.vendor_integrity.orphaned_entities, 2);
295        assert!(result.vendor_integrity.usage_rate < 0.5);
296    }
297
298    #[test]
299    fn test_empty_data() {
300        let data = ReferentialData::default();
301        let evaluator = ReferentialIntegrityEvaluator::default();
302        let result = evaluator.evaluate(&data).unwrap();
303
304        assert_eq!(result.overall_integrity_score, 1.0);
305        assert!(result.passes);
306    }
307}