Skip to main content

datasynth_eval/coherence/
referential.rs

1//! Referential integrity evaluation.
2//!
3//! Validates that all foreign key references point to valid master data entities
4//! and that created entities are actually used in transactions.
5
6use crate::error::EvalResult;
7use serde::{Deserialize, Serialize};
8
9/// Results of referential integrity evaluation.
10#[derive(Debug, Clone, Serialize, Deserialize)]
11pub struct ReferentialIntegrityEvaluation {
12    /// Vendor reference integrity.
13    pub vendor_integrity: EntityIntegrity,
14    /// Customer reference integrity.
15    pub customer_integrity: EntityIntegrity,
16    /// Material reference integrity.
17    pub material_integrity: EntityIntegrity,
18    /// Employee/User reference integrity.
19    pub employee_integrity: EntityIntegrity,
20    /// Account reference integrity.
21    pub account_integrity: EntityIntegrity,
22    /// Cost center reference integrity.
23    pub cost_center_integrity: EntityIntegrity,
24    /// Overall integrity score (0.0-1.0).
25    pub overall_integrity_score: f64,
26    /// Total valid references.
27    pub total_valid_references: usize,
28    /// Total invalid references.
29    pub total_invalid_references: usize,
30    /// Total orphaned entities (created but never used).
31    pub total_orphaned_entities: usize,
32    /// Passes integrity check.
33    pub passes: bool,
34}
35
36/// Integrity metrics for a single entity type.
37#[derive(Debug, Clone, Serialize, Deserialize)]
38pub struct EntityIntegrity {
39    /// Entity type name.
40    pub entity_type: String,
41    /// Total entities defined.
42    pub total_entities: usize,
43    /// Entities actually referenced.
44    pub entities_referenced: usize,
45    /// Valid references count.
46    pub valid_references: usize,
47    /// Invalid references count.
48    pub invalid_references: usize,
49    /// Orphaned entities (defined but never used).
50    pub orphaned_entities: usize,
51    /// Integrity score (0.0-1.0).
52    pub integrity_score: f64,
53    /// Usage rate (entities_referenced / total_entities).
54    pub usage_rate: f64,
55}
56
57impl Default for EntityIntegrity {
58    fn default() -> Self {
59        Self {
60            entity_type: String::new(),
61            total_entities: 0,
62            entities_referenced: 0,
63            valid_references: 0,
64            invalid_references: 0,
65            orphaned_entities: 0,
66            integrity_score: 1.0,
67            usage_rate: 1.0,
68        }
69    }
70}
71
72/// Input data for referential integrity evaluation.
73#[derive(Debug, Clone, Default)]
74pub struct ReferentialData {
75    /// Vendor reference data.
76    pub vendors: EntityReferenceData,
77    /// Customer reference data.
78    pub customers: EntityReferenceData,
79    /// Material reference data.
80    pub materials: EntityReferenceData,
81    /// Employee reference data.
82    pub employees: EntityReferenceData,
83    /// Account reference data.
84    pub accounts: EntityReferenceData,
85    /// Cost center reference data.
86    pub cost_centers: EntityReferenceData,
87}
88
89/// Reference data for a single entity type.
90#[derive(Debug, Clone, Default)]
91pub struct EntityReferenceData {
92    /// Set of all valid entity IDs.
93    pub valid_ids: std::collections::HashSet<String>,
94    /// List of all references made to this entity type.
95    pub references: Vec<String>,
96}
97
98impl EntityReferenceData {
99    /// Create new entity reference data.
100    pub fn new() -> Self {
101        Self::default()
102    }
103
104    /// Add a valid entity ID.
105    pub fn add_entity(&mut self, id: String) {
106        self.valid_ids.insert(id);
107    }
108
109    /// Add a reference to this entity type.
110    pub fn add_reference(&mut self, id: String) {
111        self.references.push(id);
112    }
113}
114
115/// Evaluator for referential integrity.
116pub struct ReferentialIntegrityEvaluator {
117    /// Minimum integrity score threshold.
118    min_integrity_score: f64,
119    /// Minimum usage rate threshold — entity types whose usage rate falls
120    /// below this value cause the overall evaluation to fail.
121    min_usage_rate: f64,
122}
123
124impl ReferentialIntegrityEvaluator {
125    /// Create a new evaluator with specified thresholds.
126    pub fn new(min_integrity_score: f64, min_usage_rate: f64) -> Self {
127        Self {
128            min_integrity_score,
129            min_usage_rate,
130        }
131    }
132
133    /// Evaluate referential integrity.
134    pub fn evaluate(&self, data: &ReferentialData) -> EvalResult<ReferentialIntegrityEvaluation> {
135        let vendor_integrity = self.evaluate_entity("Vendor", &data.vendors);
136        let customer_integrity = self.evaluate_entity("Customer", &data.customers);
137        let material_integrity = self.evaluate_entity("Material", &data.materials);
138        let employee_integrity = self.evaluate_entity("Employee", &data.employees);
139        let account_integrity = self.evaluate_entity("Account", &data.accounts);
140        let cost_center_integrity = self.evaluate_entity("CostCenter", &data.cost_centers);
141
142        // Aggregate totals
143        let integrities = [
144            &vendor_integrity,
145            &customer_integrity,
146            &material_integrity,
147            &employee_integrity,
148            &account_integrity,
149            &cost_center_integrity,
150        ];
151
152        let total_valid_references: usize = integrities.iter().map(|i| i.valid_references).sum();
153        let total_invalid_references: usize =
154            integrities.iter().map(|i| i.invalid_references).sum();
155        let total_orphaned_entities: usize = integrities.iter().map(|i| i.orphaned_entities).sum();
156
157        // Calculate overall integrity score (weighted by reference count)
158        let total_refs = total_valid_references + total_invalid_references;
159        let overall_integrity_score = if total_refs > 0 {
160            total_valid_references as f64 / total_refs as f64
161        } else {
162            1.0
163        };
164
165        // Check usage rate: any entity type with defined entities must meet
166        // the minimum usage threshold.
167        let usage_ok = integrities
168            .iter()
169            .all(|i| i.total_entities == 0 || i.usage_rate >= self.min_usage_rate);
170
171        let passes = overall_integrity_score >= self.min_integrity_score && usage_ok;
172
173        Ok(ReferentialIntegrityEvaluation {
174            vendor_integrity,
175            customer_integrity,
176            material_integrity,
177            employee_integrity,
178            account_integrity,
179            cost_center_integrity,
180            overall_integrity_score,
181            total_valid_references,
182            total_invalid_references,
183            total_orphaned_entities,
184            passes,
185        })
186    }
187
188    /// Evaluate a single entity type.
189    fn evaluate_entity(&self, entity_type: &str, data: &EntityReferenceData) -> EntityIntegrity {
190        let total_entities = data.valid_ids.len();
191
192        // Count valid and invalid references
193        let mut valid_references = 0;
194        let mut invalid_references = 0;
195        let mut referenced_ids = std::collections::HashSet::new();
196
197        for reference in &data.references {
198            if data.valid_ids.contains(reference) {
199                valid_references += 1;
200                referenced_ids.insert(reference.clone());
201            } else {
202                invalid_references += 1;
203            }
204        }
205
206        let entities_referenced = referenced_ids.len();
207        let orphaned_entities = total_entities.saturating_sub(entities_referenced);
208
209        let total_refs = valid_references + invalid_references;
210        let integrity_score = if total_refs > 0 {
211            valid_references as f64 / total_refs as f64
212        } else {
213            1.0
214        };
215
216        let usage_rate = if total_entities > 0 {
217            entities_referenced as f64 / total_entities as f64
218        } else {
219            1.0
220        };
221
222        EntityIntegrity {
223            entity_type: entity_type.to_string(),
224            total_entities,
225            entities_referenced,
226            valid_references,
227            invalid_references,
228            orphaned_entities,
229            integrity_score,
230            usage_rate,
231        }
232    }
233}
234
235impl Default for ReferentialIntegrityEvaluator {
236    fn default() -> Self {
237        Self::new(0.99, 0.80) // 99% integrity, 80% usage
238    }
239}
240
241#[cfg(test)]
242#[allow(clippy::unwrap_used)]
243mod tests {
244    use super::*;
245
246    #[test]
247    fn test_perfect_integrity() {
248        let mut data = ReferentialData::default();
249
250        // Add vendors
251        data.vendors.add_entity("V001".to_string());
252        data.vendors.add_entity("V002".to_string());
253
254        // Add references to valid vendors
255        data.vendors.add_reference("V001".to_string());
256        data.vendors.add_reference("V002".to_string());
257        data.vendors.add_reference("V001".to_string());
258
259        let evaluator = ReferentialIntegrityEvaluator::default();
260        let result = evaluator.evaluate(&data).unwrap();
261
262        assert_eq!(result.vendor_integrity.integrity_score, 1.0);
263        assert_eq!(result.vendor_integrity.valid_references, 3);
264        assert_eq!(result.vendor_integrity.invalid_references, 0);
265        assert_eq!(result.vendor_integrity.orphaned_entities, 0);
266    }
267
268    #[test]
269    fn test_invalid_references() {
270        let mut data = ReferentialData::default();
271
272        data.vendors.add_entity("V001".to_string());
273
274        // Reference both valid and invalid
275        data.vendors.add_reference("V001".to_string());
276        data.vendors.add_reference("V999".to_string()); // Invalid
277
278        let evaluator = ReferentialIntegrityEvaluator::default();
279        let result = evaluator.evaluate(&data).unwrap();
280
281        assert_eq!(result.vendor_integrity.valid_references, 1);
282        assert_eq!(result.vendor_integrity.invalid_references, 1);
283        assert_eq!(result.vendor_integrity.integrity_score, 0.5);
284    }
285
286    #[test]
287    fn test_orphaned_entities() {
288        let mut data = ReferentialData::default();
289
290        // Add vendors but only reference one
291        data.vendors.add_entity("V001".to_string());
292        data.vendors.add_entity("V002".to_string());
293        data.vendors.add_entity("V003".to_string());
294
295        data.vendors.add_reference("V001".to_string());
296
297        let evaluator = ReferentialIntegrityEvaluator::default();
298        let result = evaluator.evaluate(&data).unwrap();
299
300        assert_eq!(result.vendor_integrity.entities_referenced, 1);
301        assert_eq!(result.vendor_integrity.orphaned_entities, 2);
302        assert!(result.vendor_integrity.usage_rate < 0.5);
303    }
304
305    #[test]
306    fn test_empty_data() {
307        let data = ReferentialData::default();
308        let evaluator = ReferentialIntegrityEvaluator::default();
309        let result = evaluator.evaluate(&data).unwrap();
310
311        assert_eq!(result.overall_integrity_score, 1.0);
312        assert!(result.passes);
313    }
314}