Skip to main content

scirs2_ndimage/hyperdimensional_computing/
vector_ops.rs

1//! Vector Operations for Hyperdimensional Computing
2//!
3//! This module implements the core mathematical operations for hypervectors,
4//! including creation, similarity computation, bundling, binding, and other
5//! fundamental operations that form the basis of HDC computations.
6
7use scirs2_core::ndarray::{Array1, ArrayView1};
8use scirs2_core::random::prelude::*;
9use std::collections::{BTreeMap, HashMap, HashSet};
10
11use crate::error::{NdimageError, NdimageResult};
12use crate::hyperdimensional_computing::types::{HDCConfig, Hypervector};
13
14impl Hypervector {
15    /// Create a new random hypervector
16    ///
17    /// # Arguments
18    ///
19    /// * `dim` - Dimensionality of the hypervector
20    /// * `sparsity` - Fraction of dimensions that are non-zero
21    ///
22    /// # Returns
23    ///
24    /// A new random hypervector with the specified dimensionality and sparsity
25    pub fn random(dim: usize, sparsity: f64) -> Self {
26        let num_nonzero = (dim as f64 * sparsity) as usize;
27        let mut sparse_data = Vec::new();
28        let mut rng = scirs2_core::random::rng();
29        let mut used_indices = HashSet::new();
30
31        while sparse_data.len() < num_nonzero {
32            let idx = rng.random_range(0..dim);
33            if !used_indices.contains(&idx) {
34                used_indices.insert(idx);
35                let value = if rng.random_bool(0.5) { 1.0 } else { -1.0 };
36                sparse_data.push((idx, value));
37            }
38        }
39
40        sparse_data.sort_by_key(|&(idx_, _)| idx_);
41
42        let norm = (sparse_data.len() as f64).sqrt();
43
44        Self {
45            sparse_data,
46            dimension: dim,
47            norm,
48        }
49    }
50
51    /// Create hypervector from dense array
52    ///
53    /// # Arguments
54    ///
55    /// * `data` - Dense array to convert
56    /// * `sparsity` - Target sparsity level
57    ///
58    /// # Returns
59    ///
60    /// A sparse hypervector representation of the dense data
61    pub fn from_dense(data: &Array1<f64>, sparsity: f64) -> Self {
62        let mut sparse_data = Vec::new();
63        let threshold = sparsity * data.iter().map(|x| x.abs()).sum::<f64>() / data.len() as f64;
64
65        for (i, &value) in data.iter().enumerate() {
66            if value.abs() > threshold {
67                sparse_data.push((i, value));
68            }
69        }
70
71        let norm = sparse_data.iter().map(|&(_, v)| v * v).sum::<f64>().sqrt();
72
73        Self {
74            sparse_data,
75            dimension: data.len(),
76            norm,
77        }
78    }
79
80    /// Convert to dense representation
81    ///
82    /// # Returns
83    ///
84    /// Dense array representation of the hypervector
85    pub fn to_dense(&self) -> Array1<f64> {
86        let mut dense = Array1::zeros(self.dimension);
87        for &(idx, value) in &self.sparse_data {
88            dense[idx] = value;
89        }
90        dense
91    }
92
93    /// Compute cosine similarity with another hypervector
94    ///
95    /// # Arguments
96    ///
97    /// * `other` - The other hypervector to compare with
98    ///
99    /// # Returns
100    ///
101    /// Cosine similarity score between 0.0 and 1.0
102    pub fn similarity(&self, other: &Self) -> f64 {
103        if self.dimension != other.dimension {
104            return 0.0;
105        }
106
107        let mut dot_product = 0.0;
108        let mut i = 0;
109        let mut j = 0;
110
111        while i < self.sparse_data.len() && j < other.sparse_data.len() {
112            let (self_idx, self_val) = self.sparse_data[i];
113            let (other_idx, other_val) = other.sparse_data[j];
114
115            if self_idx == other_idx {
116                dot_product += self_val * other_val;
117                i += 1;
118                j += 1;
119            } else if self_idx < other_idx {
120                i += 1;
121            } else {
122                j += 1;
123            }
124        }
125
126        if self.norm > 0.0 && other.norm > 0.0 {
127            dot_product / (self.norm * other.norm)
128        } else {
129            0.0
130        }
131    }
132
133    /// Bundle (superposition) with another hypervector
134    ///
135    /// Bundling creates a representation that is similar to both input vectors
136    /// and can be used to represent sets or unions of concepts.
137    ///
138    /// # Arguments
139    ///
140    /// * `other` - The other hypervector to bundle with
141    ///
142    /// # Returns
143    ///
144    /// A new hypervector representing the bundle of both inputs
145    pub fn bundle(&self, other: &Self) -> NdimageResult<Self> {
146        if self.dimension != other.dimension {
147            return Err(NdimageError::InvalidInput(format!(
148                "Dimension mismatch: {} vs {}",
149                self.dimension, other.dimension
150            )));
151        }
152
153        let mut result_map = BTreeMap::new();
154
155        // Add self values
156        for &(idx, value) in &self.sparse_data {
157            *result_map.entry(idx).or_insert(0.0) += value;
158        }
159
160        // Add other values
161        for &(idx, value) in &other.sparse_data {
162            *result_map.entry(idx).or_insert(0.0) += value;
163        }
164
165        let sparse_data: Vec<(usize, f64)> = result_map
166            .into_iter()
167            .filter(|&(_, v)| v.abs() > 1e-10)
168            .collect();
169
170        let norm = sparse_data.iter().map(|&(_, v)| v * v).sum::<f64>().sqrt();
171
172        Ok(Self {
173            sparse_data,
174            dimension: self.dimension,
175            norm,
176        })
177    }
178
179    /// Bind (convolution) with another hypervector
180    ///
181    /// Binding creates a representation that is dissimilar to both inputs
182    /// and is used to encode associations and relationships between concepts.
183    ///
184    /// # Arguments
185    ///
186    /// * `other` - The other hypervector to bind with
187    ///
188    /// # Returns
189    ///
190    /// A new hypervector representing the binding of both inputs
191    pub fn bind(&self, other: &Self) -> NdimageResult<Self> {
192        if self.dimension != other.dimension {
193            return Err(NdimageError::InvalidInput(format!(
194                "Dimension mismatch: {} vs {}",
195                self.dimension, other.dimension
196            )));
197        }
198
199        // For sparse binding, we use circular convolution approximation
200        let mut result_map = BTreeMap::new();
201
202        for &(self_idx, self_val) in &self.sparse_data {
203            for &(other_idx, other_val) in &other.sparse_data {
204                let result_idx = (self_idx + other_idx) % self.dimension;
205                *result_map.entry(result_idx).or_insert(0.0) += self_val * other_val;
206            }
207        }
208
209        let sparse_data: Vec<(usize, f64)> = result_map
210            .into_iter()
211            .filter(|&(_, v)| v.abs() > 1e-10)
212            .collect();
213
214        let norm = sparse_data.iter().map(|&(_, v)| v * v).sum::<f64>().sqrt();
215
216        Ok(Self {
217            sparse_data,
218            dimension: self.dimension,
219            norm,
220        })
221    }
222
223    /// Unbind (inverse binding) with another hypervector
224    ///
225    /// Unbinding recovers the original vector that was bound with the given vector.
226    /// If C = A ⊛ B, then A ≈ C ⊛ B⁻¹
227    ///
228    /// # Arguments
229    ///
230    /// * `other` - The hypervector to unbind from this one
231    ///
232    /// # Returns
233    ///
234    /// A new hypervector representing the unbinding result
235    pub fn unbind(&self, other: &Self) -> NdimageResult<Self> {
236        if self.dimension != other.dimension {
237            return Err(NdimageError::InvalidInput(format!(
238                "Dimension mismatch: {} vs {}",
239                self.dimension, other.dimension
240            )));
241        }
242
243        // Create inverse of other vector (circular shift by dimension - index)
244        let mut other_inverse_data = Vec::new();
245        for &(idx, val) in &other.sparse_data {
246            let inv_idx = if idx == 0 { 0 } else { self.dimension - idx };
247            other_inverse_data.push((inv_idx, val));
248        }
249
250        let other_inverse = Self {
251            sparse_data: other_inverse_data,
252            dimension: other.dimension,
253            norm: other.norm,
254        };
255
256        // Bind with the inverse
257        self.bind(&other_inverse)
258    }
259
260    /// Normalize the hypervector
261    ///
262    /// # Returns
263    ///
264    /// A new normalized hypervector
265    pub fn normalize(&self) -> Self {
266        if self.norm <= 0.0 {
267            return self.clone();
268        }
269
270        let normalized_data = self
271            .sparse_data
272            .iter()
273            .map(|&(idx, val)| (idx, val / self.norm))
274            .collect();
275
276        Self {
277            sparse_data: normalized_data,
278            dimension: self.dimension,
279            norm: 1.0,
280        }
281    }
282
283    /// Scale the hypervector by a constant
284    ///
285    /// # Arguments
286    ///
287    /// * `factor` - Scaling factor
288    ///
289    /// # Returns
290    ///
291    /// A new scaled hypervector
292    pub fn scale(&self, factor: f64) -> Self {
293        let scaled_data = self
294            .sparse_data
295            .iter()
296            .map(|&(idx, val)| (idx, val * factor))
297            .collect();
298
299        Self {
300            sparse_data: scaled_data,
301            dimension: self.dimension,
302            norm: self.norm * factor.abs(),
303        }
304    }
305
306    /// Threshold the hypervector, keeping only values above the threshold
307    ///
308    /// # Arguments
309    ///
310    /// * `threshold` - Minimum absolute value to keep
311    ///
312    /// # Returns
313    ///
314    /// A new thresholded hypervector
315    pub fn threshold(&self, threshold: f64) -> Self {
316        let thresholded_data: Vec<(usize, f64)> = self
317            .sparse_data
318            .iter()
319            .filter_map(|&(idx, val)| {
320                if val.abs() > threshold {
321                    Some((idx, val))
322                } else {
323                    None
324                }
325            })
326            .collect();
327
328        let norm = thresholded_data
329            .iter()
330            .map(|&(_, v)| v * v)
331            .sum::<f64>()
332            .sqrt();
333
334        Self {
335            sparse_data: thresholded_data,
336            dimension: self.dimension,
337            norm,
338        }
339    }
340
341    /// Get the sparsity level of the hypervector
342    ///
343    /// # Returns
344    ///
345    /// Fraction of non-zero dimensions
346    pub fn sparsity(&self) -> f64 {
347        self.sparse_data.len() as f64 / self.dimension as f64
348    }
349
350    /// Check if the hypervector is empty (all zeros)
351    ///
352    /// # Returns
353    ///
354    /// True if the hypervector has no non-zero elements
355    pub fn is_empty(&self) -> bool {
356        self.sparse_data.is_empty()
357    }
358
359    /// Get the L2 norm of the hypervector
360    ///
361    /// # Returns
362    ///
363    /// The L2 norm value
364    pub fn l2_norm(&self) -> f64 {
365        self.norm
366    }
367
368    /// Create a zero hypervector
369    ///
370    /// # Arguments
371    ///
372    /// * `dim` - Dimensionality of the zero vector
373    ///
374    /// # Returns
375    ///
376    /// A hypervector with all zeros
377    pub fn zeros(dim: usize) -> Self {
378        Self {
379            sparse_data: Vec::new(),
380            dimension: dim,
381            norm: 0.0,
382        }
383    }
384}
385
386/// Advanced vector operations
387impl Hypervector {
388    /// Compute Hamming distance with another hypervector
389    ///
390    /// # Arguments
391    ///
392    /// * `other` - The other hypervector to compare with
393    ///
394    /// # Returns
395    ///
396    /// Hamming distance (number of differing dimensions)
397    pub fn hamming_distance(&self, other: &Self) -> usize {
398        if self.dimension != other.dimension {
399            return self.dimension; // Maximum distance
400        }
401
402        let mut distance = 0;
403        let mut i = 0;
404        let mut j = 0;
405
406        // Count positions where both have values but they differ
407        while i < self.sparse_data.len() && j < other.sparse_data.len() {
408            let (self_idx, self_val) = self.sparse_data[i];
409            let (other_idx, other_val) = other.sparse_data[j];
410
411            if self_idx == other_idx {
412                if (self_val > 0.0) != (other_val > 0.0) {
413                    distance += 1;
414                }
415                i += 1;
416                j += 1;
417            } else if self_idx < other_idx {
418                distance += 1; // Self has value, other doesn't
419                i += 1;
420            } else {
421                distance += 1; // Other has value, self doesn't
422                j += 1;
423            }
424        }
425
426        // Count remaining elements
427        distance += (self.sparse_data.len() - i) + (other.sparse_data.len() - j);
428
429        distance
430    }
431
432    /// Compute overlap (intersection) with another hypervector
433    ///
434    /// # Arguments
435    ///
436    /// * `other` - The other hypervector to compute overlap with
437    ///
438    /// # Returns
439    ///
440    /// Number of dimensions where both vectors have non-zero values
441    pub fn overlap(&self, other: &Self) -> usize {
442        if self.dimension != other.dimension {
443            return 0;
444        }
445
446        let mut overlap_count = 0;
447        let mut i = 0;
448        let mut j = 0;
449
450        while i < self.sparse_data.len() && j < other.sparse_data.len() {
451            let (self_idx, _) = self.sparse_data[i];
452            let (other_idx, _) = other.sparse_data[j];
453
454            if self_idx == other_idx {
455                overlap_count += 1;
456                i += 1;
457                j += 1;
458            } else if self_idx < other_idx {
459                i += 1;
460            } else {
461                j += 1;
462            }
463        }
464
465        overlap_count
466    }
467
468    /// Create a permuted version of the hypervector
469    ///
470    /// # Arguments
471    ///
472    /// * `permutation` - Permutation mapping
473    ///
474    /// # Returns
475    ///
476    /// A new permuted hypervector
477    pub fn permute(&self, permutation: &[usize]) -> NdimageResult<Self> {
478        if permutation.len() != self.dimension {
479            return Err(NdimageError::InvalidInput(
480                "Permutation size must match vector dimension".into(),
481            ));
482        }
483
484        let mut permuted_data = Vec::new();
485        for &(idx, val) in &self.sparse_data {
486            if idx < permutation.len() {
487                permuted_data.push((permutation[idx], val));
488            }
489        }
490
491        permuted_data.sort_by_key(|&(idx, _)| idx);
492
493        Ok(Self {
494            sparse_data: permuted_data,
495            dimension: self.dimension,
496            norm: self.norm,
497        })
498    }
499}
500
501/// Utility functions for vector operations
502pub mod vector_utils {
503    use super::*;
504
505    /// Create a random permutation of given size
506    ///
507    /// # Arguments
508    ///
509    /// * `size` - Size of the permutation
510    ///
511    /// # Returns
512    ///
513    /// A random permutation vector
514    pub fn random_permutation(size: usize) -> Vec<usize> {
515        let mut perm: Vec<usize> = (0..size).collect();
516        let mut rng = scirs2_core::random::rng();
517        perm.shuffle(&mut rng);
518        perm
519    }
520
521    /// Bundle multiple hypervectors together
522    ///
523    /// # Arguments
524    ///
525    /// * `vectors` - Vector of hypervectors to bundle
526    ///
527    /// # Returns
528    ///
529    /// A new hypervector representing the bundle of all inputs
530    pub fn bundle_multiple(vectors: &[Hypervector]) -> NdimageResult<Hypervector> {
531        if vectors.is_empty() {
532            return Err(NdimageError::InvalidInput("No vectors to bundle".into()));
533        }
534
535        let mut result = vectors[0].clone();
536        for vector in vectors.iter().skip(1) {
537            result = result.bundle(vector)?;
538        }
539
540        Ok(result)
541    }
542
543    /// Compute the centroid of multiple hypervectors
544    ///
545    /// # Arguments
546    ///
547    /// * `vectors` - Vector of hypervectors
548    ///
549    /// # Returns
550    ///
551    /// The centroid hypervector
552    pub fn centroid(vectors: &[Hypervector]) -> NdimageResult<Hypervector> {
553        if vectors.is_empty() {
554            return Err(NdimageError::InvalidInput("No vectors provided".into()));
555        }
556
557        let bundled = bundle_multiple(vectors)?;
558        let scale_factor = 1.0 / vectors.len() as f64;
559        Ok(bundled.scale(scale_factor))
560    }
561
562    /// Weight a hypervector by a given factor
563    ///
564    /// # Arguments
565    ///
566    /// * `hv` - The hypervector to weight
567    /// * `weight` - The weighting factor
568    ///
569    /// # Returns
570    ///
571    /// A new weighted hypervector
572    pub fn weight_hypervector(hv: &Hypervector, weight: f64) -> Hypervector {
573        hv.scale(weight)
574    }
575}
576
577#[cfg(test)]
578mod tests {
579    use super::*;
580    use approx::assert_abs_diff_eq;
581
582    #[test]
583    fn test_hypervector_random_creation() {
584        let hv = Hypervector::random(1000, 0.1);
585        assert_eq!(hv.dimension, 1000);
586        assert!(hv.sparse_data.len() > 90); // Around 10% sparsity
587        assert!(hv.sparse_data.len() < 110);
588        assert!(hv.norm > 0.0);
589    }
590
591    #[test]
592    fn test_hypervector_from_dense() {
593        let dense = Array1::from(vec![1.0, 0.0, -1.0, 0.5, 0.0]);
594        let hv = Hypervector::from_dense(&dense, 0.1);
595
596        assert_eq!(hv.dimension, 5);
597        assert!(!hv.sparse_data.is_empty());
598        assert!(hv.norm > 0.0);
599    }
600
601    #[test]
602    fn test_hypervector_to_dense() {
603        let hv = Hypervector {
604            sparse_data: vec![(0, 1.0), (2, -1.0), (4, 0.5)],
605            dimension: 5,
606            norm: 1.5,
607        };
608
609        let dense = hv.to_dense();
610        assert_eq!(dense[0], 1.0);
611        assert_eq!(dense[1], 0.0);
612        assert_eq!(dense[2], -1.0);
613        assert_eq!(dense[3], 0.0);
614        assert_eq!(dense[4], 0.5);
615    }
616
617    #[test]
618    fn test_hypervector_similarity() {
619        let hv1 = Hypervector {
620            sparse_data: vec![(0, 1.0), (1, 1.0)],
621            dimension: 10,
622            norm: 2.0_f64.sqrt(),
623        };
624
625        let hv2 = Hypervector {
626            sparse_data: vec![(0, 1.0), (1, 1.0)],
627            dimension: 10,
628            norm: 2.0_f64.sqrt(),
629        };
630
631        let hv3 = Hypervector {
632            sparse_data: vec![(2, 1.0), (3, 1.0)],
633            dimension: 10,
634            norm: 2.0_f64.sqrt(),
635        };
636
637        assert_abs_diff_eq!(hv1.similarity(&hv2), 1.0, epsilon = 1e-10);
638        assert_abs_diff_eq!(hv1.similarity(&hv3), 0.0, epsilon = 1e-10);
639    }
640
641    #[test]
642    fn test_hypervector_bundle() {
643        let hv1 = Hypervector {
644            sparse_data: vec![(0, 1.0), (1, 1.0)],
645            dimension: 10,
646            norm: 2.0_f64.sqrt(),
647        };
648
649        let hv2 = Hypervector {
650            sparse_data: vec![(1, 1.0), (2, 1.0)],
651            dimension: 10,
652            norm: 2.0_f64.sqrt(),
653        };
654
655        let bundled = hv1.bundle(&hv2).expect("Operation failed");
656        assert_eq!(bundled.dimension, 10);
657        assert_eq!(bundled.sparse_data.len(), 3); // indices 0, 1, 2
658
659        // Check specific values
660        assert_eq!(bundled.sparse_data[0], (0, 1.0)); // Only from hv1
661        assert_eq!(bundled.sparse_data[1], (1, 2.0)); // From both
662        assert_eq!(bundled.sparse_data[2], (2, 1.0)); // Only from hv2
663    }
664
665    #[test]
666    fn test_hypervector_bind() {
667        let hv1 = Hypervector {
668            sparse_data: vec![(0, 1.0), (1, 1.0)],
669            dimension: 10,
670            norm: 2.0_f64.sqrt(),
671        };
672
673        let hv2 = Hypervector {
674            sparse_data: vec![(1, 1.0), (2, 1.0)],
675            dimension: 10,
676            norm: 2.0_f64.sqrt(),
677        };
678
679        let bound = hv1.bind(&hv2).expect("Operation failed");
680        assert_eq!(bound.dimension, 10);
681        assert!(!bound.sparse_data.is_empty());
682        assert!(bound.norm > 0.0);
683    }
684
685    #[test]
686    fn test_hypervector_normalize() {
687        let hv = Hypervector {
688            sparse_data: vec![(0, 2.0), (1, 2.0)],
689            dimension: 10,
690            norm: 8.0_f64.sqrt(),
691        };
692
693        let normalized = hv.normalize();
694        assert_abs_diff_eq!(normalized.norm, 1.0, epsilon = 1e-10);
695        assert_abs_diff_eq!(
696            normalized.sparse_data[0].1,
697            2.0 / 8.0_f64.sqrt(),
698            epsilon = 1e-10
699        );
700    }
701
702    #[test]
703    fn test_hypervector_scale() {
704        let hv = Hypervector {
705            sparse_data: vec![(0, 1.0), (1, 2.0)],
706            dimension: 10,
707            norm: 5.0_f64.sqrt(),
708        };
709
710        let scaled = hv.scale(2.0);
711        assert_eq!(scaled.sparse_data[0].1, 2.0);
712        assert_eq!(scaled.sparse_data[1].1, 4.0);
713        assert_abs_diff_eq!(scaled.norm, 2.0 * 5.0_f64.sqrt(), epsilon = 1e-10);
714    }
715
716    #[test]
717    fn test_hypervector_threshold() {
718        let hv = Hypervector {
719            sparse_data: vec![(0, 0.1), (1, 0.5), (2, 1.0)],
720            dimension: 10,
721            norm: 1.0,
722        };
723
724        let thresholded = hv.threshold(0.3);
725        assert_eq!(thresholded.sparse_data.len(), 2); // Only values >= 0.3
726        assert_eq!(thresholded.sparse_data[0], (1, 0.5));
727        assert_eq!(thresholded.sparse_data[1], (2, 1.0));
728    }
729
730    #[test]
731    fn test_hypervector_sparsity() {
732        let hv = Hypervector {
733            sparse_data: vec![(0, 1.0), (1, 1.0)],
734            dimension: 10,
735            norm: 1.0,
736        };
737
738        assert_abs_diff_eq!(hv.sparsity(), 0.2, epsilon = 1e-10); // 2/10
739    }
740
741    #[test]
742    fn test_hypervector_hamming_distance() {
743        let hv1 = Hypervector {
744            sparse_data: vec![(0, 1.0), (1, 1.0), (2, -1.0)],
745            dimension: 10,
746            norm: 1.0,
747        };
748
749        let hv2 = Hypervector {
750            sparse_data: vec![(0, 1.0), (1, -1.0), (3, 1.0)],
751            dimension: 10,
752            norm: 1.0,
753        };
754
755        let distance = hv1.hamming_distance(&hv2);
756        assert_eq!(distance, 3); // Different at positions 1, 2, 3
757    }
758
759    #[test]
760    fn test_hypervector_overlap() {
761        let hv1 = Hypervector {
762            sparse_data: vec![(0, 1.0), (1, 1.0), (2, -1.0)],
763            dimension: 10,
764            norm: 1.0,
765        };
766
767        let hv2 = Hypervector {
768            sparse_data: vec![(0, 1.0), (1, -1.0), (3, 1.0)],
769            dimension: 10,
770            norm: 1.0,
771        };
772
773        let overlap = hv1.overlap(&hv2);
774        assert_eq!(overlap, 2); // Both have values at positions 0 and 1
775    }
776
777    #[test]
778    fn test_bundle_multiple() {
779        use vector_utils::*;
780
781        let hv1 = Hypervector::random(100, 0.1);
782        let hv2 = Hypervector::random(100, 0.1);
783        let hv3 = Hypervector::random(100, 0.1);
784
785        let vectors = vec![hv1, hv2, hv3];
786        let bundled = bundle_multiple(&vectors).expect("Operation failed");
787
788        assert_eq!(bundled.dimension, 100);
789        assert!(bundled.norm > 0.0);
790    }
791
792    #[test]
793    fn test_centroid() {
794        use vector_utils::*;
795
796        let hv1 = Hypervector::random(100, 0.1);
797        let hv2 = Hypervector::random(100, 0.1);
798
799        let vectors = vec![hv1, hv2];
800        let centroid_hv = centroid(&vectors).expect("Operation failed");
801
802        assert_eq!(centroid_hv.dimension, 100);
803        assert!(centroid_hv.norm > 0.0);
804    }
805}