ruvector_nervous_system/hdc/
vector.rs

1//! Hypervector data type and basic operations
2
3use super::{HYPERVECTOR_BITS, HYPERVECTOR_U64_LEN};
4use rand::Rng;
5use std::fmt;
6
7/// Error types for HDC operations
8#[derive(Debug, thiserror::Error)]
9pub enum HdcError {
10    #[error("Invalid hypervector dimension: expected {expected}, got {got}")]
11    InvalidDimension { expected: usize, got: usize },
12
13    #[error("Empty vector set provided")]
14    EmptyVectorSet,
15
16    #[error("Serialization error: {0}")]
17    SerializationError(String),
18}
19
20/// A binary hypervector with 10,000 bits packed into 156 u64 words
21///
22/// # Performance
23///
24/// - Memory: 156 * 8 = 1,248 bytes per vector
25/// - XOR binding: <50ns (single CPU cycle per u64)
26/// - Similarity: <100ns (SIMD popcount)
27///
28/// # Example
29///
30/// ```rust
31/// use ruvector_nervous_system::hdc::Hypervector;
32///
33/// let v1 = Hypervector::random();
34/// let v2 = Hypervector::random();
35/// let bound = v1.bind(&v2);
36/// let sim = v1.similarity(&v2);
37/// ```
38#[derive(Clone, PartialEq, Eq)]
39pub struct Hypervector {
40    pub(crate) bits: [u64; HYPERVECTOR_U64_LEN],
41}
42
43impl Hypervector {
44    /// Creates a new hypervector with all bits set to zero
45    ///
46    /// # Example
47    ///
48    /// ```rust
49    /// use ruvector_nervous_system::hdc::Hypervector;
50    ///
51    /// let zero = Hypervector::zero();
52    /// assert_eq!(zero.popcount(), 0);
53    /// ```
54    pub fn zero() -> Self {
55        Self {
56            bits: [0u64; HYPERVECTOR_U64_LEN],
57        }
58    }
59
60    /// Creates a random hypervector with ~50% bits set
61    ///
62    /// Uses thread-local RNG for performance.
63    ///
64    /// # Example
65    ///
66    /// ```rust
67    /// use ruvector_nervous_system::hdc::Hypervector;
68    ///
69    /// let random = Hypervector::random();
70    /// let count = random.popcount();
71    /// // Should be around 5000 ± 150
72    /// assert!(count > 4500 && count < 5500);
73    /// ```
74    pub fn random() -> Self {
75        let mut rng = rand::thread_rng();
76        let mut bits = [0u64; HYPERVECTOR_U64_LEN];
77
78        for word in bits.iter_mut() {
79            *word = rng.gen();
80        }
81
82        Self { bits }
83    }
84
85    /// Creates a hypervector from a seed for reproducibility
86    ///
87    /// # Example
88    ///
89    /// ```rust
90    /// use ruvector_nervous_system::hdc::Hypervector;
91    ///
92    /// let v1 = Hypervector::from_seed(42);
93    /// let v2 = Hypervector::from_seed(42);
94    /// assert_eq!(v1, v2);
95    /// ```
96    pub fn from_seed(seed: u64) -> Self {
97        use rand::SeedableRng;
98        let mut rng = rand::rngs::StdRng::seed_from_u64(seed);
99        let mut bits = [0u64; HYPERVECTOR_U64_LEN];
100
101        for word in bits.iter_mut() {
102            *word = rng.gen();
103        }
104
105        Self { bits }
106    }
107
108    /// Binds two hypervectors using XOR
109    ///
110    /// Binding is associative, commutative, and self-inverse:
111    /// - `a.bind(b) == b.bind(a)`
112    /// - `a.bind(b).bind(b) == a`
113    ///
114    /// # Performance
115    ///
116    /// <50ns on modern CPUs (single cycle XOR per u64)
117    ///
118    /// # Example
119    ///
120    /// ```rust
121    /// use ruvector_nervous_system::hdc::Hypervector;
122    ///
123    /// let a = Hypervector::random();
124    /// let b = Hypervector::random();
125    /// let bound = a.bind(&b);
126    ///
127    /// // Self-inverse property
128    /// assert_eq!(bound.bind(&b), a);
129    /// ```
130    #[inline]
131    pub fn bind(&self, other: &Self) -> Self {
132        let mut result = Self::zero();
133
134        for i in 0..HYPERVECTOR_U64_LEN {
135            result.bits[i] = self.bits[i] ^ other.bits[i];
136        }
137
138        result
139    }
140
141    /// Computes similarity between two hypervectors
142    ///
143    /// Returns a value in [0.0, 1.0] where:
144    /// - 1.0 = identical vectors
145    /// - 0.5 = random/orthogonal vectors
146    /// - 0.0 = completely opposite vectors
147    ///
148    /// # Performance
149    ///
150    /// <100ns with SIMD popcount
151    ///
152    /// # Example
153    ///
154    /// ```rust
155    /// use ruvector_nervous_system::hdc::Hypervector;
156    ///
157    /// let a = Hypervector::random();
158    /// let b = a.clone();
159    /// assert!((a.similarity(&b) - 1.0).abs() < 0.001);
160    /// ```
161    #[inline]
162    pub fn similarity(&self, other: &Self) -> f32 {
163        let hamming = self.hamming_distance(other);
164        1.0 - (2.0 * hamming as f32 / HYPERVECTOR_BITS as f32)
165    }
166
167    /// Computes Hamming distance (number of differing bits)
168    ///
169    /// # Performance
170    ///
171    /// <50ns with SIMD popcount instruction and loop unrolling
172    ///
173    /// # Example
174    ///
175    /// ```rust
176    /// use ruvector_nervous_system::hdc::Hypervector;
177    ///
178    /// let a = Hypervector::random();
179    /// assert_eq!(a.hamming_distance(&a), 0);
180    /// ```
181    #[inline]
182    pub fn hamming_distance(&self, other: &Self) -> u32 {
183        // Unrolled loop for better instruction-level parallelism
184        // Process 4 u64s at a time to maximize CPU pipeline utilization
185        let mut d0 = 0u32;
186        let mut d1 = 0u32;
187        let mut d2 = 0u32;
188        let mut d3 = 0u32;
189
190        let chunks = HYPERVECTOR_U64_LEN / 4;
191        let remainder = HYPERVECTOR_U64_LEN % 4;
192
193        // Main unrolled loop (4 words per iteration)
194        for i in 0..chunks {
195            let base = i * 4;
196            d0 += (self.bits[base] ^ other.bits[base]).count_ones();
197            d1 += (self.bits[base + 1] ^ other.bits[base + 1]).count_ones();
198            d2 += (self.bits[base + 2] ^ other.bits[base + 2]).count_ones();
199            d3 += (self.bits[base + 3] ^ other.bits[base + 3]).count_ones();
200        }
201
202        // Handle remaining elements
203        let base = chunks * 4;
204        for i in 0..remainder {
205            d0 += (self.bits[base + i] ^ other.bits[base + i]).count_ones();
206        }
207
208        d0 + d1 + d2 + d3
209    }
210
211    /// Counts the number of set bits (population count)
212    ///
213    /// # Example
214    ///
215    /// ```rust
216    /// use ruvector_nervous_system::hdc::Hypervector;
217    ///
218    /// let zero = Hypervector::zero();
219    /// assert_eq!(zero.popcount(), 0);
220    ///
221    /// let random = Hypervector::random();
222    /// let count = random.popcount();
223    /// // Should be around 5000 for random vectors
224    /// assert!(count > 4500 && count < 5500);
225    /// ```
226    #[inline]
227    pub fn popcount(&self) -> u32 {
228        self.bits.iter().map(|&w| w.count_ones()).sum()
229    }
230
231    /// Bundles multiple vectors by majority voting on each bit
232    ///
233    /// # Performance
234    ///
235    /// Optimized word-level implementation: O(n * 157 words) instead of O(n * 10000 bits)
236    ///
237    /// # Example
238    ///
239    /// ```rust
240    /// use ruvector_nervous_system::hdc::Hypervector;
241    ///
242    /// let v1 = Hypervector::random();
243    /// let v2 = Hypervector::random();
244    /// let v3 = Hypervector::random();
245    ///
246    /// let bundled = Hypervector::bundle(&[v1.clone(), v2, v3]).unwrap();
247    /// // Bundled vector is similar to all inputs
248    /// assert!(bundled.similarity(&v1) > 0.3);
249    /// ```
250    pub fn bundle(vectors: &[Self]) -> Result<Self, HdcError> {
251        if vectors.is_empty() {
252            return Err(HdcError::EmptyVectorSet);
253        }
254
255        if vectors.len() == 1 {
256            return Ok(vectors[0].clone());
257        }
258
259        let n = vectors.len();
260        let threshold = n / 2;
261        let mut result = Self::zero();
262
263        // Process word by word (64 bits at a time)
264        for word_idx in 0..HYPERVECTOR_U64_LEN {
265            // Count bits at each position within this word using bit-parallel counting
266            let mut counts = [0u8; 64];
267
268            for vector in vectors {
269                let word = vector.bits[word_idx];
270                // Unroll inner loop for cache efficiency
271                for bit_pos in 0..64 {
272                    counts[bit_pos] += ((word >> bit_pos) & 1) as u8;
273                }
274            }
275
276            // Build result word from majority votes
277            let mut result_word = 0u64;
278            for (bit_pos, &count) in counts.iter().enumerate() {
279                if count as usize > threshold {
280                    result_word |= 1u64 << bit_pos;
281                }
282            }
283            result.bits[word_idx] = result_word;
284        }
285
286        Ok(result)
287    }
288
289    /// Fast bundle for exactly 3 vectors using bitwise majority
290    ///
291    /// # Performance
292    ///
293    /// Single-pass bitwise operation: ~500ns for 10,000 bits
294    #[inline]
295    pub fn bundle_3(a: &Self, b: &Self, c: &Self) -> Self {
296        let mut result = Self::zero();
297
298        // Majority of 3 bits: (a & b) | (b & c) | (a & c)
299        for i in 0..HYPERVECTOR_U64_LEN {
300            let wa = a.bits[i];
301            let wb = b.bits[i];
302            let wc = c.bits[i];
303            result.bits[i] = (wa & wb) | (wb & wc) | (wa & wc);
304        }
305
306        result
307    }
308
309    /// Returns the internal bit array (for advanced use cases)
310    #[inline]
311    pub fn bits(&self) -> &[u64; HYPERVECTOR_U64_LEN] {
312        &self.bits
313    }
314}
315
316impl fmt::Debug for Hypervector {
317    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
318        write!(
319            f,
320            "Hypervector {{ bits: {} set / {} total }}",
321            self.popcount(),
322            HYPERVECTOR_BITS
323        )
324    }
325}
326
327impl Default for Hypervector {
328    fn default() -> Self {
329        Self::zero()
330    }
331}
332
333#[cfg(test)]
334mod tests {
335    use super::*;
336
337    #[test]
338    fn test_zero_vector() {
339        let zero = Hypervector::zero();
340        assert_eq!(zero.popcount(), 0);
341        assert_eq!(zero.hamming_distance(&zero), 0);
342    }
343
344    #[test]
345    fn test_random_vector_properties() {
346        let v = Hypervector::random();
347        let count = v.popcount();
348
349        // Random vector should have ~50% bits set (±3 sigma)
350        assert!(count > 4500 && count < 5500, "popcount: {}", count);
351    }
352
353    #[test]
354    fn test_from_seed_deterministic() {
355        let v1 = Hypervector::from_seed(42);
356        let v2 = Hypervector::from_seed(42);
357        let v3 = Hypervector::from_seed(43);
358
359        assert_eq!(v1, v2);
360        assert_ne!(v1, v3);
361    }
362
363    #[test]
364    fn test_bind_commutative() {
365        let a = Hypervector::random();
366        let b = Hypervector::random();
367
368        assert_eq!(a.bind(&b), b.bind(&a));
369    }
370
371    #[test]
372    fn test_bind_self_inverse() {
373        let a = Hypervector::random();
374        let b = Hypervector::random();
375
376        let bound = a.bind(&b);
377        let unbound = bound.bind(&b);
378
379        assert_eq!(a, unbound);
380    }
381
382    #[test]
383    fn test_similarity_bounds() {
384        let a = Hypervector::random();
385        let b = Hypervector::random();
386
387        let sim = a.similarity(&b);
388        // Cosine similarity formula: 1 - 2*hamming/dim gives range [-1, 1]
389        assert!(
390            sim >= -1.0 && sim <= 1.0,
391            "similarity out of bounds: {}",
392            sim
393        );
394    }
395
396    #[test]
397    fn test_similarity_identical() {
398        let a = Hypervector::random();
399        let sim = a.similarity(&a);
400
401        assert!((sim - 1.0).abs() < 0.001);
402    }
403
404    #[test]
405    fn test_similarity_random_approximately_zero() {
406        let a = Hypervector::random();
407        let b = Hypervector::random();
408
409        let sim = a.similarity(&b);
410        // Random vectors have ~50% bit overlap, so similarity ≈ 0.0
411        // 1 - 2*(5000/10000) = 1 - 1 = 0
412        assert!(sim > -0.2 && sim < 0.2, "similarity: {}", sim);
413    }
414
415    #[test]
416    fn test_hamming_distance_identical() {
417        let a = Hypervector::random();
418        assert_eq!(a.hamming_distance(&a), 0);
419    }
420
421    #[test]
422    fn test_bundle_single_vector() {
423        let v = Hypervector::random();
424        let bundled = Hypervector::bundle(&[v.clone()]).unwrap();
425
426        assert_eq!(bundled, v);
427    }
428
429    #[test]
430    fn test_bundle_empty_error() {
431        let result = Hypervector::bundle(&[]);
432        assert!(matches!(result, Err(HdcError::EmptyVectorSet)));
433    }
434
435    #[test]
436    fn test_bundle_majority_vote() {
437        let v1 = Hypervector::from_seed(1);
438        let v2 = Hypervector::from_seed(2);
439        let v3 = Hypervector::from_seed(3);
440
441        let bundled = Hypervector::bundle(&[v1.clone(), v2.clone(), v3]).unwrap();
442
443        // Bundled should be similar to all inputs
444        assert!(bundled.similarity(&v1) > 0.3);
445        assert!(bundled.similarity(&v2) > 0.3);
446    }
447
448    #[test]
449    fn test_bundle_odd_count() {
450        let vectors: Vec<_> = (0..5).map(|i| Hypervector::from_seed(i)).collect();
451        let bundled = Hypervector::bundle(&vectors).unwrap();
452
453        for v in &vectors {
454            assert!(bundled.similarity(v) > 0.3);
455        }
456    }
457
458    #[test]
459    fn test_debug_format() {
460        let v = Hypervector::zero();
461        let debug = format!("{:?}", v);
462        assert!(debug.contains("bits: 0 set"));
463    }
464}