Skip to main content

embeddenator_testkit/
integrity.rs

1//! Data integrity validation utilities
2//!
3//! Provides tools for validating:
4//! - Sparse vector invariants
5//! - VSA operation properties (commutativity, self-inverse, etc.)
6//! - Data corruption detection
7//! - Algebraic invariants
8
9use embeddenator_vsa::SparseVec;
10use std::collections::HashSet;
11
12/// Results from integrity validation
13#[derive(Clone, Debug, Default)]
14pub struct IntegrityReport {
15    /// Total checks performed
16    pub checks_total: u64,
17    /// Checks that passed
18    pub checks_passed: u64,
19    /// Detected bitflips (single bit errors)
20    pub bitflips_detected: u64,
21    /// Multi-bit corruption events
22    pub corruption_events: u64,
23    /// Algebraic invariant violations
24    pub invariant_violations: u64,
25    /// Specific failure messages
26    pub failures: Vec<String>,
27}
28
29impl IntegrityReport {
30    pub fn new() -> Self {
31        Self::default()
32    }
33
34    /// Check if all validations passed
35    pub fn is_ok(&self) -> bool {
36        self.checks_passed == self.checks_total && self.failures.is_empty()
37    }
38
39    /// Pass rate as percentage
40    pub fn pass_rate(&self) -> f64 {
41        if self.checks_total == 0 {
42            100.0
43        } else {
44            (self.checks_passed as f64 / self.checks_total as f64) * 100.0
45        }
46    }
47
48    /// Record a passed check
49    pub fn pass(&mut self) {
50        self.checks_total += 1;
51        self.checks_passed += 1;
52    }
53
54    /// Record a failed check with message
55    pub fn fail(&mut self, msg: impl Into<String>) {
56        self.checks_total += 1;
57        self.failures.push(msg.into());
58    }
59
60    /// Record detected bitflip
61    pub fn record_bitflip(&mut self) {
62        self.bitflips_detected += 1;
63    }
64
65    /// Record corruption event
66    pub fn record_corruption(&mut self) {
67        self.corruption_events += 1;
68    }
69
70    /// Record invariant violation
71    pub fn record_invariant_violation(&mut self, msg: impl Into<String>) {
72        self.invariant_violations += 1;
73        self.failures.push(format!("INVARIANT: {}", msg.into()));
74    }
75
76    /// Generate summary report
77    pub fn summary(&self) -> String {
78        format!(
79            "Integrity Report:\n\
80             - Total checks: {}\n\
81             - Passed: {}\n\
82             - Failed: {}\n\
83             - Pass rate: {:.1}%\n\
84             - Bitflips: {}\n\
85             - Corruption events: {}\n\
86             - Invariant violations: {}",
87            self.checks_total,
88            self.checks_passed,
89            self.checks_total - self.checks_passed,
90            self.pass_rate(),
91            self.bitflips_detected,
92            self.corruption_events,
93            self.invariant_violations
94        )
95    }
96}
97
98/// Validates data integrity for VSA operations
99pub struct IntegrityValidator {
100    /// Enable verbose logging
101    pub verbose: bool,
102}
103
104impl IntegrityValidator {
105    pub fn new() -> Self {
106        Self { verbose: false }
107    }
108
109    pub fn verbose(mut self) -> Self {
110        self.verbose = true;
111        self
112    }
113
114    /// Validate sparse vector invariants
115    ///
116    /// Checks:
117    /// - No overlap between pos and neg indices
118    /// - Indices are sorted
119    /// - No duplicate indices
120    pub fn validate_sparse(&self, v: &SparseVec) -> IntegrityReport {
121        let mut report = IntegrityReport::default();
122
123        // Check no overlap between pos and neg
124        let pos_set: HashSet<_> = v.pos.iter().collect();
125        let neg_set: HashSet<_> = v.neg.iter().collect();
126        if pos_set.intersection(&neg_set).count() > 0 {
127            report.record_corruption();
128            report.fail("Overlap between pos and neg indices");
129        } else {
130            report.pass();
131        }
132
133        // Check sorted
134        if !v.pos.windows(2).all(|w| w[0] < w[1]) {
135            report.fail("pos indices not sorted");
136        } else {
137            report.pass();
138        }
139
140        if !v.neg.windows(2).all(|w| w[0] < w[1]) {
141            report.fail("neg indices not sorted");
142        } else {
143            report.pass();
144        }
145
146        report
147    }
148
149    /// Validate algebraic invariants for bind operation
150    ///
151    /// Checks:
152    /// - Commutativity: A ⊙ B = B ⊙ A
153    pub fn validate_bind_invariants(&self, a: &SparseVec, b: &SparseVec) -> IntegrityReport {
154        let mut report = IntegrityReport::default();
155
156        // Commutativity check
157        let ab = a.bind(b);
158        let ba = b.bind(a);
159
160        if ab.pos != ba.pos || ab.neg != ba.neg {
161            report.record_invariant_violation("Commutativity violation: A⊙B ≠ B⊙A");
162        } else {
163            report.pass();
164        }
165
166        report
167    }
168
169    /// Validate bundle operation properties
170    pub fn validate_bundle_invariants(&self, a: &SparseVec, b: &SparseVec) -> IntegrityReport {
171        let mut report = IntegrityReport::default();
172
173        // Commutativity check
174        let ab = a.bundle(b);
175        let ba = b.bundle(a);
176
177        if ab.pos != ba.pos || ab.neg != ba.neg {
178            report.record_invariant_violation("Bundle commutativity violation: A⊕B ≠ B⊕A");
179        } else {
180            report.pass();
181        }
182
183        report
184    }
185
186    /// Detect potential corruption by comparing two vectors
187    pub fn detect_differences(&self, expected: &SparseVec, actual: &SparseVec) -> IntegrityReport {
188        let mut report = IntegrityReport::default();
189
190        // Compare pos indices
191        if expected.pos != actual.pos {
192            let diff_count = expected.pos.len().abs_diff(actual.pos.len());
193            report.record_corruption();
194            report.fail(format!("pos indices differ by {} elements", diff_count));
195        } else {
196            report.pass();
197        }
198
199        // Compare neg indices
200        if expected.neg != actual.neg {
201            let diff_count = expected.neg.len().abs_diff(actual.neg.len());
202            report.record_corruption();
203            report.fail(format!("neg indices differ by {} elements", diff_count));
204        } else {
205            report.pass();
206        }
207
208        report
209    }
210}
211
212impl Default for IntegrityValidator {
213    fn default() -> Self {
214        Self::new()
215    }
216}
217
218#[cfg(test)]
219mod tests {
220    use super::*;
221
222    #[test]
223    fn test_integrity_report() {
224        let mut report = IntegrityReport::new();
225        assert!(report.is_ok());
226
227        report.pass();
228        assert_eq!(report.checks_total, 1);
229        assert_eq!(report.checks_passed, 1);
230
231        report.fail("test failure");
232        assert_eq!(report.checks_total, 2);
233        assert_eq!(report.checks_passed, 1);
234        assert!(!report.is_ok());
235    }
236
237    #[test]
238    fn test_validate_sparse() {
239        let validator = IntegrityValidator::new();
240
241        // Create a valid sparse vector
242        let sparse = SparseVec {
243            pos: vec![0, 10, 20],
244            neg: vec![5, 15, 25],
245        };
246
247        let report = validator.validate_sparse(&sparse);
248        assert!(report.is_ok());
249    }
250
251    #[test]
252    fn test_bind_invariants() {
253        let validator = IntegrityValidator::new();
254
255        let sparse_a = SparseVec {
256            pos: vec![0, 10, 20],
257            neg: vec![5, 15, 25],
258        };
259        let sparse_b = SparseVec {
260            pos: vec![1, 11, 21],
261            neg: vec![6, 16, 26],
262        };
263
264        let report = validator.validate_bind_invariants(&sparse_a, &sparse_b);
265        // Should pass commutativity
266        assert!(report.checks_passed > 0);
267    }
268}