ruvector_nervous_system/separate/
sparsification.rs

1//! Sparse bit vector for efficient k-winners-take-all representation
2//!
3//! Implements memory-efficient sparse bit vectors using index lists
4//! with fast set operations for similarity computation.
5
6use serde::{Deserialize, Serialize};
7use std::collections::HashSet;
8
9/// Sparse bit vector storing only active indices
10///
11/// Efficient representation for sparse binary vectors where only
12/// a small fraction of bits are set (active). Stores only the indices
13/// of active bits rather than the full bit array.
14///
15/// # Properties
16///
17/// - Memory: O(k) where k is number of active bits
18/// - Set operations: O(k1 + k2) for intersection/union
19/// - Typical k: 200-500 active bits out of 10000+ total
20///
21/// # Example
22///
23/// ```
24/// use ruvector_nervous_system::SparseBitVector;
25///
26/// let mut sparse = SparseBitVector::new(10000);
27/// sparse.set(42);
28/// sparse.set(100);
29/// sparse.set(500);
30/// ```
31#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
32pub struct SparseBitVector {
33    /// Sorted list of active bit indices
34    pub indices: Vec<u16>,
35
36    /// Total capacity (maximum index + 1)
37    capacity: u16,
38}
39
40impl SparseBitVector {
41    /// Create a new sparse bit vector with given capacity
42    ///
43    /// # Arguments
44    ///
45    /// * `capacity` - Maximum number of bits (max index + 1)
46    ///
47    /// # Example
48    ///
49    /// ```
50    /// use ruvector_nervous_system::SparseBitVector;
51    ///
52    /// let sparse = SparseBitVector::new(10000);
53    /// ```
54    pub fn new(capacity: u16) -> Self {
55        Self {
56            indices: Vec::new(),
57            capacity,
58        }
59    }
60
61    /// Create from a list of active indices
62    ///
63    /// # Arguments
64    ///
65    /// * `indices` - Vector of active bit indices
66    /// * `capacity` - Total capacity
67    ///
68    /// # Example
69    ///
70    /// ```
71    /// use ruvector_nervous_system::SparseBitVector;
72    ///
73    /// let sparse = SparseBitVector::from_indices(vec![10, 20, 30], 10000);
74    /// ```
75    pub fn from_indices(mut indices: Vec<u16>, capacity: u16) -> Self {
76        indices.sort_unstable();
77        indices.dedup();
78        Self { indices, capacity }
79    }
80
81    /// Set a bit to active
82    ///
83    /// # Arguments
84    ///
85    /// * `index` - Bit index to set
86    ///
87    /// # Panics
88    ///
89    /// Panics if index >= capacity
90    pub fn set(&mut self, index: u16) {
91        assert!(index < self.capacity, "Index out of bounds");
92
93        // Binary search for insertion point
94        match self.indices.binary_search(&index) {
95            Ok(_) => {} // Already present
96            Err(pos) => self.indices.insert(pos, index),
97        }
98    }
99
100    /// Check if a bit is active
101    ///
102    /// # Arguments
103    ///
104    /// * `index` - Bit index to check
105    ///
106    /// # Returns
107    ///
108    /// true if bit is set, false otherwise
109    pub fn is_set(&self, index: u16) -> bool {
110        self.indices.binary_search(&index).is_ok()
111    }
112
113    /// Get number of active bits
114    pub fn count(&self) -> usize {
115        self.indices.len()
116    }
117
118    /// Get capacity
119    pub fn capacity(&self) -> u16 {
120        self.capacity
121    }
122
123    /// Compute intersection with another sparse bit vector
124    ///
125    /// # Arguments
126    ///
127    /// * `other` - Other sparse bit vector
128    ///
129    /// # Returns
130    ///
131    /// New sparse bit vector containing intersection
132    ///
133    /// # Example
134    ///
135    /// ```
136    /// use ruvector_nervous_system::SparseBitVector;
137    ///
138    /// let a = SparseBitVector::from_indices(vec![1, 2, 3], 100);
139    /// let b = SparseBitVector::from_indices(vec![2, 3, 4], 100);
140    /// let intersection = a.intersection(&b);
141    /// assert_eq!(intersection.count(), 2); // {2, 3}
142    /// ```
143    pub fn intersection(&self, other: &Self) -> Self {
144        let mut result = Vec::new();
145        let mut i = 0;
146        let mut j = 0;
147
148        // Merge algorithm for sorted lists
149        while i < self.indices.len() && j < other.indices.len() {
150            match self.indices[i].cmp(&other.indices[j]) {
151                std::cmp::Ordering::Equal => {
152                    result.push(self.indices[i]);
153                    i += 1;
154                    j += 1;
155                }
156                std::cmp::Ordering::Less => i += 1,
157                std::cmp::Ordering::Greater => j += 1,
158            }
159        }
160
161        Self {
162            indices: result,
163            capacity: self.capacity,
164        }
165    }
166
167    /// Compute union with another sparse bit vector
168    ///
169    /// # Arguments
170    ///
171    /// * `other` - Other sparse bit vector
172    ///
173    /// # Returns
174    ///
175    /// New sparse bit vector containing union
176    pub fn union(&self, other: &Self) -> Self {
177        let mut result = Vec::new();
178        let mut i = 0;
179        let mut j = 0;
180
181        while i < self.indices.len() && j < other.indices.len() {
182            match self.indices[i].cmp(&other.indices[j]) {
183                std::cmp::Ordering::Equal => {
184                    result.push(self.indices[i]);
185                    i += 1;
186                    j += 1;
187                }
188                std::cmp::Ordering::Less => {
189                    result.push(self.indices[i]);
190                    i += 1;
191                }
192                std::cmp::Ordering::Greater => {
193                    result.push(other.indices[j]);
194                    j += 1;
195                }
196            }
197        }
198
199        // Add remaining elements
200        while i < self.indices.len() {
201            result.push(self.indices[i]);
202            i += 1;
203        }
204        while j < other.indices.len() {
205            result.push(other.indices[j]);
206            j += 1;
207        }
208
209        Self {
210            indices: result,
211            capacity: self.capacity,
212        }
213    }
214
215    /// Compute Jaccard similarity with another sparse bit vector
216    ///
217    /// Jaccard similarity = |A ∩ B| / |A ∪ B|
218    ///
219    /// # Arguments
220    ///
221    /// * `other` - Other sparse bit vector
222    ///
223    /// # Returns
224    ///
225    /// Similarity in range [0.0, 1.0]
226    ///
227    /// # Example
228    ///
229    /// ```
230    /// use ruvector_nervous_system::SparseBitVector;
231    ///
232    /// let a = SparseBitVector::from_indices(vec![1, 2, 3], 100);
233    /// let b = SparseBitVector::from_indices(vec![2, 3, 4], 100);
234    /// let sim = a.jaccard_similarity(&b);
235    /// assert!((sim - 0.5).abs() < 0.001); // 2/4 = 0.5
236    /// ```
237    pub fn jaccard_similarity(&self, other: &Self) -> f32 {
238        if self.indices.is_empty() && other.indices.is_empty() {
239            return 1.0;
240        }
241
242        let intersection_size = self.intersection_size(other);
243        let union_size = self.indices.len() + other.indices.len() - intersection_size;
244
245        if union_size == 0 {
246            return 0.0;
247        }
248
249        intersection_size as f32 / union_size as f32
250    }
251
252    /// Compute Hamming distance with another sparse bit vector
253    ///
254    /// Hamming distance = number of positions where bits differ
255    ///
256    /// # Arguments
257    ///
258    /// * `other` - Other sparse bit vector
259    ///
260    /// # Returns
261    ///
262    /// Hamming distance (number of differing bits)
263    pub fn hamming_distance(&self, other: &Self) -> u32 {
264        let intersection_size = self.intersection_size(other);
265        let total_active = self.indices.len() + other.indices.len();
266        (total_active - 2 * intersection_size) as u32
267    }
268
269    /// Helper: compute intersection size efficiently
270    fn intersection_size(&self, other: &Self) -> usize {
271        let mut count = 0;
272        let mut i = 0;
273        let mut j = 0;
274
275        while i < self.indices.len() && j < other.indices.len() {
276            match self.indices[i].cmp(&other.indices[j]) {
277                std::cmp::Ordering::Equal => {
278                    count += 1;
279                    i += 1;
280                    j += 1;
281                }
282                std::cmp::Ordering::Less => i += 1,
283                std::cmp::Ordering::Greater => j += 1,
284            }
285        }
286
287        count
288    }
289}
290
291#[cfg(test)]
292mod tests {
293    use super::*;
294
295    #[test]
296    fn test_sparse_bitvector_creation() {
297        let sparse = SparseBitVector::new(10000);
298        assert_eq!(sparse.count(), 0);
299        assert_eq!(sparse.capacity(), 10000);
300    }
301
302    #[test]
303    fn test_set_and_check() {
304        let mut sparse = SparseBitVector::new(100);
305        sparse.set(10);
306        sparse.set(20);
307        sparse.set(30);
308
309        assert!(sparse.is_set(10));
310        assert!(sparse.is_set(20));
311        assert!(sparse.is_set(30));
312        assert!(!sparse.is_set(15));
313        assert_eq!(sparse.count(), 3);
314    }
315
316    #[test]
317    fn test_from_indices() {
318        let sparse = SparseBitVector::from_indices(vec![30, 10, 20, 10], 100);
319        assert_eq!(sparse.count(), 3); // Deduped
320        assert!(sparse.is_set(10));
321        assert!(sparse.is_set(20));
322        assert!(sparse.is_set(30));
323    }
324
325    #[test]
326    fn test_intersection() {
327        let a = SparseBitVector::from_indices(vec![1, 2, 3, 4], 100);
328        let b = SparseBitVector::from_indices(vec![3, 4, 5, 6], 100);
329        let intersection = a.intersection(&b);
330
331        assert_eq!(intersection.count(), 2);
332        assert!(intersection.is_set(3));
333        assert!(intersection.is_set(4));
334    }
335
336    #[test]
337    fn test_union() {
338        let a = SparseBitVector::from_indices(vec![1, 2, 3], 100);
339        let b = SparseBitVector::from_indices(vec![3, 4, 5], 100);
340        let union = a.union(&b);
341
342        assert_eq!(union.count(), 5);
343        for i in 1..=5 {
344            assert!(union.is_set(i));
345        }
346    }
347
348    #[test]
349    fn test_jaccard_similarity() {
350        let a = SparseBitVector::from_indices(vec![1, 2, 3, 4], 100);
351        let b = SparseBitVector::from_indices(vec![3, 4, 5, 6], 100);
352
353        // Intersection: {3, 4} = 2
354        // Union: {1, 2, 3, 4, 5, 6} = 6
355        // Jaccard = 2/6 = 0.333...
356        let sim = a.jaccard_similarity(&b);
357        assert!((sim - 0.333333).abs() < 0.001);
358    }
359
360    #[test]
361    fn test_jaccard_identical() {
362        let a = SparseBitVector::from_indices(vec![1, 2, 3], 100);
363        let b = SparseBitVector::from_indices(vec![1, 2, 3], 100);
364
365        let sim = a.jaccard_similarity(&b);
366        assert_eq!(sim, 1.0);
367    }
368
369    #[test]
370    fn test_jaccard_disjoint() {
371        let a = SparseBitVector::from_indices(vec![1, 2, 3], 100);
372        let b = SparseBitVector::from_indices(vec![4, 5, 6], 100);
373
374        let sim = a.jaccard_similarity(&b);
375        assert_eq!(sim, 0.0);
376    }
377
378    #[test]
379    fn test_hamming_distance() {
380        let a = SparseBitVector::from_indices(vec![1, 2, 3, 4], 100);
381        let b = SparseBitVector::from_indices(vec![3, 4, 5, 6], 100);
382
383        // Symmetric difference: {1, 2, 5, 6} = 4
384        let dist = a.hamming_distance(&b);
385        assert_eq!(dist, 4);
386    }
387
388    #[test]
389    fn test_hamming_identical() {
390        let a = SparseBitVector::from_indices(vec![1, 2, 3], 100);
391        let b = SparseBitVector::from_indices(vec![1, 2, 3], 100);
392
393        let dist = a.hamming_distance(&b);
394        assert_eq!(dist, 0);
395    }
396
397    #[test]
398    #[should_panic(expected = "Index out of bounds")]
399    fn test_set_out_of_bounds() {
400        let mut sparse = SparseBitVector::new(100);
401        sparse.set(100); // Should panic
402    }
403}