ruvector_nervous_system/separate/
sparsification.rs1use serde::{Deserialize, Serialize};
7use std::collections::HashSet;
8
9#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
32pub struct SparseBitVector {
33 pub indices: Vec<u16>,
35
36 capacity: u16,
38}
39
40impl SparseBitVector {
41 pub fn new(capacity: u16) -> Self {
55 Self {
56 indices: Vec::new(),
57 capacity,
58 }
59 }
60
61 pub fn from_indices(mut indices: Vec<u16>, capacity: u16) -> Self {
76 indices.sort_unstable();
77 indices.dedup();
78 Self { indices, capacity }
79 }
80
81 pub fn set(&mut self, index: u16) {
91 assert!(index < self.capacity, "Index out of bounds");
92
93 match self.indices.binary_search(&index) {
95 Ok(_) => {} Err(pos) => self.indices.insert(pos, index),
97 }
98 }
99
100 pub fn is_set(&self, index: u16) -> bool {
110 self.indices.binary_search(&index).is_ok()
111 }
112
113 pub fn count(&self) -> usize {
115 self.indices.len()
116 }
117
118 pub fn capacity(&self) -> u16 {
120 self.capacity
121 }
122
123 pub fn intersection(&self, other: &Self) -> Self {
144 let mut result = Vec::new();
145 let mut i = 0;
146 let mut j = 0;
147
148 while i < self.indices.len() && j < other.indices.len() {
150 match self.indices[i].cmp(&other.indices[j]) {
151 std::cmp::Ordering::Equal => {
152 result.push(self.indices[i]);
153 i += 1;
154 j += 1;
155 }
156 std::cmp::Ordering::Less => i += 1,
157 std::cmp::Ordering::Greater => j += 1,
158 }
159 }
160
161 Self {
162 indices: result,
163 capacity: self.capacity,
164 }
165 }
166
167 pub fn union(&self, other: &Self) -> Self {
177 let mut result = Vec::new();
178 let mut i = 0;
179 let mut j = 0;
180
181 while i < self.indices.len() && j < other.indices.len() {
182 match self.indices[i].cmp(&other.indices[j]) {
183 std::cmp::Ordering::Equal => {
184 result.push(self.indices[i]);
185 i += 1;
186 j += 1;
187 }
188 std::cmp::Ordering::Less => {
189 result.push(self.indices[i]);
190 i += 1;
191 }
192 std::cmp::Ordering::Greater => {
193 result.push(other.indices[j]);
194 j += 1;
195 }
196 }
197 }
198
199 while i < self.indices.len() {
201 result.push(self.indices[i]);
202 i += 1;
203 }
204 while j < other.indices.len() {
205 result.push(other.indices[j]);
206 j += 1;
207 }
208
209 Self {
210 indices: result,
211 capacity: self.capacity,
212 }
213 }
214
215 pub fn jaccard_similarity(&self, other: &Self) -> f32 {
238 if self.indices.is_empty() && other.indices.is_empty() {
239 return 1.0;
240 }
241
242 let intersection_size = self.intersection_size(other);
243 let union_size = self.indices.len() + other.indices.len() - intersection_size;
244
245 if union_size == 0 {
246 return 0.0;
247 }
248
249 intersection_size as f32 / union_size as f32
250 }
251
252 pub fn hamming_distance(&self, other: &Self) -> u32 {
264 let intersection_size = self.intersection_size(other);
265 let total_active = self.indices.len() + other.indices.len();
266 (total_active - 2 * intersection_size) as u32
267 }
268
269 fn intersection_size(&self, other: &Self) -> usize {
271 let mut count = 0;
272 let mut i = 0;
273 let mut j = 0;
274
275 while i < self.indices.len() && j < other.indices.len() {
276 match self.indices[i].cmp(&other.indices[j]) {
277 std::cmp::Ordering::Equal => {
278 count += 1;
279 i += 1;
280 j += 1;
281 }
282 std::cmp::Ordering::Less => i += 1,
283 std::cmp::Ordering::Greater => j += 1,
284 }
285 }
286
287 count
288 }
289}
290
291#[cfg(test)]
292mod tests {
293 use super::*;
294
295 #[test]
296 fn test_sparse_bitvector_creation() {
297 let sparse = SparseBitVector::new(10000);
298 assert_eq!(sparse.count(), 0);
299 assert_eq!(sparse.capacity(), 10000);
300 }
301
302 #[test]
303 fn test_set_and_check() {
304 let mut sparse = SparseBitVector::new(100);
305 sparse.set(10);
306 sparse.set(20);
307 sparse.set(30);
308
309 assert!(sparse.is_set(10));
310 assert!(sparse.is_set(20));
311 assert!(sparse.is_set(30));
312 assert!(!sparse.is_set(15));
313 assert_eq!(sparse.count(), 3);
314 }
315
316 #[test]
317 fn test_from_indices() {
318 let sparse = SparseBitVector::from_indices(vec![30, 10, 20, 10], 100);
319 assert_eq!(sparse.count(), 3); assert!(sparse.is_set(10));
321 assert!(sparse.is_set(20));
322 assert!(sparse.is_set(30));
323 }
324
325 #[test]
326 fn test_intersection() {
327 let a = SparseBitVector::from_indices(vec![1, 2, 3, 4], 100);
328 let b = SparseBitVector::from_indices(vec![3, 4, 5, 6], 100);
329 let intersection = a.intersection(&b);
330
331 assert_eq!(intersection.count(), 2);
332 assert!(intersection.is_set(3));
333 assert!(intersection.is_set(4));
334 }
335
336 #[test]
337 fn test_union() {
338 let a = SparseBitVector::from_indices(vec![1, 2, 3], 100);
339 let b = SparseBitVector::from_indices(vec![3, 4, 5], 100);
340 let union = a.union(&b);
341
342 assert_eq!(union.count(), 5);
343 for i in 1..=5 {
344 assert!(union.is_set(i));
345 }
346 }
347
348 #[test]
349 fn test_jaccard_similarity() {
350 let a = SparseBitVector::from_indices(vec![1, 2, 3, 4], 100);
351 let b = SparseBitVector::from_indices(vec![3, 4, 5, 6], 100);
352
353 let sim = a.jaccard_similarity(&b);
357 assert!((sim - 0.333333).abs() < 0.001);
358 }
359
360 #[test]
361 fn test_jaccard_identical() {
362 let a = SparseBitVector::from_indices(vec![1, 2, 3], 100);
363 let b = SparseBitVector::from_indices(vec![1, 2, 3], 100);
364
365 let sim = a.jaccard_similarity(&b);
366 assert_eq!(sim, 1.0);
367 }
368
369 #[test]
370 fn test_jaccard_disjoint() {
371 let a = SparseBitVector::from_indices(vec![1, 2, 3], 100);
372 let b = SparseBitVector::from_indices(vec![4, 5, 6], 100);
373
374 let sim = a.jaccard_similarity(&b);
375 assert_eq!(sim, 0.0);
376 }
377
378 #[test]
379 fn test_hamming_distance() {
380 let a = SparseBitVector::from_indices(vec![1, 2, 3, 4], 100);
381 let b = SparseBitVector::from_indices(vec![3, 4, 5, 6], 100);
382
383 let dist = a.hamming_distance(&b);
385 assert_eq!(dist, 4);
386 }
387
388 #[test]
389 fn test_hamming_identical() {
390 let a = SparseBitVector::from_indices(vec![1, 2, 3], 100);
391 let b = SparseBitVector::from_indices(vec![1, 2, 3], 100);
392
393 let dist = a.hamming_distance(&b);
394 assert_eq!(dist, 0);
395 }
396
397 #[test]
398 #[should_panic(expected = "Index out of bounds")]
399 fn test_set_out_of_bounds() {
400 let mut sparse = SparseBitVector::new(100);
401 sparse.set(100); }
403}