1use scirs2_core::ndarray::{Array1, ArrayView1};
8use scirs2_core::random::prelude::*;
9use std::collections::{BTreeMap, HashMap, HashSet};
10
11use crate::error::{NdimageError, NdimageResult};
12use crate::hyperdimensional_computing::types::{HDCConfig, Hypervector};
13
14impl Hypervector {
15 pub fn random(dim: usize, sparsity: f64) -> Self {
26 let num_nonzero = (dim as f64 * sparsity) as usize;
27 let mut sparse_data = Vec::new();
28 let mut rng = scirs2_core::random::rng();
29 let mut used_indices = HashSet::new();
30
31 while sparse_data.len() < num_nonzero {
32 let idx = rng.random_range(0..dim);
33 if !used_indices.contains(&idx) {
34 used_indices.insert(idx);
35 let value = if rng.random_bool(0.5) { 1.0 } else { -1.0 };
36 sparse_data.push((idx, value));
37 }
38 }
39
40 sparse_data.sort_by_key(|&(idx_, _)| idx_);
41
42 let norm = (sparse_data.len() as f64).sqrt();
43
44 Self {
45 sparse_data,
46 dimension: dim,
47 norm,
48 }
49 }
50
51 pub fn from_dense(data: &Array1<f64>, sparsity: f64) -> Self {
62 let mut sparse_data = Vec::new();
63 let threshold = sparsity * data.iter().map(|x| x.abs()).sum::<f64>() / data.len() as f64;
64
65 for (i, &value) in data.iter().enumerate() {
66 if value.abs() > threshold {
67 sparse_data.push((i, value));
68 }
69 }
70
71 let norm = sparse_data.iter().map(|&(_, v)| v * v).sum::<f64>().sqrt();
72
73 Self {
74 sparse_data,
75 dimension: data.len(),
76 norm,
77 }
78 }
79
80 pub fn to_dense(&self) -> Array1<f64> {
86 let mut dense = Array1::zeros(self.dimension);
87 for &(idx, value) in &self.sparse_data {
88 dense[idx] = value;
89 }
90 dense
91 }
92
93 pub fn similarity(&self, other: &Self) -> f64 {
103 if self.dimension != other.dimension {
104 return 0.0;
105 }
106
107 let mut dot_product = 0.0;
108 let mut i = 0;
109 let mut j = 0;
110
111 while i < self.sparse_data.len() && j < other.sparse_data.len() {
112 let (self_idx, self_val) = self.sparse_data[i];
113 let (other_idx, other_val) = other.sparse_data[j];
114
115 if self_idx == other_idx {
116 dot_product += self_val * other_val;
117 i += 1;
118 j += 1;
119 } else if self_idx < other_idx {
120 i += 1;
121 } else {
122 j += 1;
123 }
124 }
125
126 if self.norm > 0.0 && other.norm > 0.0 {
127 dot_product / (self.norm * other.norm)
128 } else {
129 0.0
130 }
131 }
132
133 pub fn bundle(&self, other: &Self) -> NdimageResult<Self> {
146 if self.dimension != other.dimension {
147 return Err(NdimageError::InvalidInput(format!(
148 "Dimension mismatch: {} vs {}",
149 self.dimension, other.dimension
150 )));
151 }
152
153 let mut result_map = BTreeMap::new();
154
155 for &(idx, value) in &self.sparse_data {
157 *result_map.entry(idx).or_insert(0.0) += value;
158 }
159
160 for &(idx, value) in &other.sparse_data {
162 *result_map.entry(idx).or_insert(0.0) += value;
163 }
164
165 let sparse_data: Vec<(usize, f64)> = result_map
166 .into_iter()
167 .filter(|&(_, v)| v.abs() > 1e-10)
168 .collect();
169
170 let norm = sparse_data.iter().map(|&(_, v)| v * v).sum::<f64>().sqrt();
171
172 Ok(Self {
173 sparse_data,
174 dimension: self.dimension,
175 norm,
176 })
177 }
178
179 pub fn bind(&self, other: &Self) -> NdimageResult<Self> {
192 if self.dimension != other.dimension {
193 return Err(NdimageError::InvalidInput(format!(
194 "Dimension mismatch: {} vs {}",
195 self.dimension, other.dimension
196 )));
197 }
198
199 let mut result_map = BTreeMap::new();
201
202 for &(self_idx, self_val) in &self.sparse_data {
203 for &(other_idx, other_val) in &other.sparse_data {
204 let result_idx = (self_idx + other_idx) % self.dimension;
205 *result_map.entry(result_idx).or_insert(0.0) += self_val * other_val;
206 }
207 }
208
209 let sparse_data: Vec<(usize, f64)> = result_map
210 .into_iter()
211 .filter(|&(_, v)| v.abs() > 1e-10)
212 .collect();
213
214 let norm = sparse_data.iter().map(|&(_, v)| v * v).sum::<f64>().sqrt();
215
216 Ok(Self {
217 sparse_data,
218 dimension: self.dimension,
219 norm,
220 })
221 }
222
223 pub fn unbind(&self, other: &Self) -> NdimageResult<Self> {
236 if self.dimension != other.dimension {
237 return Err(NdimageError::InvalidInput(format!(
238 "Dimension mismatch: {} vs {}",
239 self.dimension, other.dimension
240 )));
241 }
242
243 let mut other_inverse_data = Vec::new();
245 for &(idx, val) in &other.sparse_data {
246 let inv_idx = if idx == 0 { 0 } else { self.dimension - idx };
247 other_inverse_data.push((inv_idx, val));
248 }
249
250 let other_inverse = Self {
251 sparse_data: other_inverse_data,
252 dimension: other.dimension,
253 norm: other.norm,
254 };
255
256 self.bind(&other_inverse)
258 }
259
260 pub fn normalize(&self) -> Self {
266 if self.norm <= 0.0 {
267 return self.clone();
268 }
269
270 let normalized_data = self
271 .sparse_data
272 .iter()
273 .map(|&(idx, val)| (idx, val / self.norm))
274 .collect();
275
276 Self {
277 sparse_data: normalized_data,
278 dimension: self.dimension,
279 norm: 1.0,
280 }
281 }
282
283 pub fn scale(&self, factor: f64) -> Self {
293 let scaled_data = self
294 .sparse_data
295 .iter()
296 .map(|&(idx, val)| (idx, val * factor))
297 .collect();
298
299 Self {
300 sparse_data: scaled_data,
301 dimension: self.dimension,
302 norm: self.norm * factor.abs(),
303 }
304 }
305
306 pub fn threshold(&self, threshold: f64) -> Self {
316 let thresholded_data: Vec<(usize, f64)> = self
317 .sparse_data
318 .iter()
319 .filter_map(|&(idx, val)| {
320 if val.abs() > threshold {
321 Some((idx, val))
322 } else {
323 None
324 }
325 })
326 .collect();
327
328 let norm = thresholded_data
329 .iter()
330 .map(|&(_, v)| v * v)
331 .sum::<f64>()
332 .sqrt();
333
334 Self {
335 sparse_data: thresholded_data,
336 dimension: self.dimension,
337 norm,
338 }
339 }
340
341 pub fn sparsity(&self) -> f64 {
347 self.sparse_data.len() as f64 / self.dimension as f64
348 }
349
350 pub fn is_empty(&self) -> bool {
356 self.sparse_data.is_empty()
357 }
358
359 pub fn l2_norm(&self) -> f64 {
365 self.norm
366 }
367
368 pub fn zeros(dim: usize) -> Self {
378 Self {
379 sparse_data: Vec::new(),
380 dimension: dim,
381 norm: 0.0,
382 }
383 }
384}
385
386impl Hypervector {
388 pub fn hamming_distance(&self, other: &Self) -> usize {
398 if self.dimension != other.dimension {
399 return self.dimension; }
401
402 let mut distance = 0;
403 let mut i = 0;
404 let mut j = 0;
405
406 while i < self.sparse_data.len() && j < other.sparse_data.len() {
408 let (self_idx, self_val) = self.sparse_data[i];
409 let (other_idx, other_val) = other.sparse_data[j];
410
411 if self_idx == other_idx {
412 if (self_val > 0.0) != (other_val > 0.0) {
413 distance += 1;
414 }
415 i += 1;
416 j += 1;
417 } else if self_idx < other_idx {
418 distance += 1; i += 1;
420 } else {
421 distance += 1; j += 1;
423 }
424 }
425
426 distance += (self.sparse_data.len() - i) + (other.sparse_data.len() - j);
428
429 distance
430 }
431
432 pub fn overlap(&self, other: &Self) -> usize {
442 if self.dimension != other.dimension {
443 return 0;
444 }
445
446 let mut overlap_count = 0;
447 let mut i = 0;
448 let mut j = 0;
449
450 while i < self.sparse_data.len() && j < other.sparse_data.len() {
451 let (self_idx, _) = self.sparse_data[i];
452 let (other_idx, _) = other.sparse_data[j];
453
454 if self_idx == other_idx {
455 overlap_count += 1;
456 i += 1;
457 j += 1;
458 } else if self_idx < other_idx {
459 i += 1;
460 } else {
461 j += 1;
462 }
463 }
464
465 overlap_count
466 }
467
468 pub fn permute(&self, permutation: &[usize]) -> NdimageResult<Self> {
478 if permutation.len() != self.dimension {
479 return Err(NdimageError::InvalidInput(
480 "Permutation size must match vector dimension".into(),
481 ));
482 }
483
484 let mut permuted_data = Vec::new();
485 for &(idx, val) in &self.sparse_data {
486 if idx < permutation.len() {
487 permuted_data.push((permutation[idx], val));
488 }
489 }
490
491 permuted_data.sort_by_key(|&(idx, _)| idx);
492
493 Ok(Self {
494 sparse_data: permuted_data,
495 dimension: self.dimension,
496 norm: self.norm,
497 })
498 }
499}
500
501pub mod vector_utils {
503 use super::*;
504
505 pub fn random_permutation(size: usize) -> Vec<usize> {
515 let mut perm: Vec<usize> = (0..size).collect();
516 let mut rng = scirs2_core::random::rng();
517 perm.shuffle(&mut rng);
518 perm
519 }
520
521 pub fn bundle_multiple(vectors: &[Hypervector]) -> NdimageResult<Hypervector> {
531 if vectors.is_empty() {
532 return Err(NdimageError::InvalidInput("No vectors to bundle".into()));
533 }
534
535 let mut result = vectors[0].clone();
536 for vector in vectors.iter().skip(1) {
537 result = result.bundle(vector)?;
538 }
539
540 Ok(result)
541 }
542
543 pub fn centroid(vectors: &[Hypervector]) -> NdimageResult<Hypervector> {
553 if vectors.is_empty() {
554 return Err(NdimageError::InvalidInput("No vectors provided".into()));
555 }
556
557 let bundled = bundle_multiple(vectors)?;
558 let scale_factor = 1.0 / vectors.len() as f64;
559 Ok(bundled.scale(scale_factor))
560 }
561
562 pub fn weight_hypervector(hv: &Hypervector, weight: f64) -> Hypervector {
573 hv.scale(weight)
574 }
575}
576
577#[cfg(test)]
578mod tests {
579 use super::*;
580 use approx::assert_abs_diff_eq;
581
582 #[test]
583 fn test_hypervector_random_creation() {
584 let hv = Hypervector::random(1000, 0.1);
585 assert_eq!(hv.dimension, 1000);
586 assert!(hv.sparse_data.len() > 90); assert!(hv.sparse_data.len() < 110);
588 assert!(hv.norm > 0.0);
589 }
590
591 #[test]
592 fn test_hypervector_from_dense() {
593 let dense = Array1::from(vec![1.0, 0.0, -1.0, 0.5, 0.0]);
594 let hv = Hypervector::from_dense(&dense, 0.1);
595
596 assert_eq!(hv.dimension, 5);
597 assert!(!hv.sparse_data.is_empty());
598 assert!(hv.norm > 0.0);
599 }
600
601 #[test]
602 fn test_hypervector_to_dense() {
603 let hv = Hypervector {
604 sparse_data: vec![(0, 1.0), (2, -1.0), (4, 0.5)],
605 dimension: 5,
606 norm: 1.5,
607 };
608
609 let dense = hv.to_dense();
610 assert_eq!(dense[0], 1.0);
611 assert_eq!(dense[1], 0.0);
612 assert_eq!(dense[2], -1.0);
613 assert_eq!(dense[3], 0.0);
614 assert_eq!(dense[4], 0.5);
615 }
616
617 #[test]
618 fn test_hypervector_similarity() {
619 let hv1 = Hypervector {
620 sparse_data: vec![(0, 1.0), (1, 1.0)],
621 dimension: 10,
622 norm: 2.0_f64.sqrt(),
623 };
624
625 let hv2 = Hypervector {
626 sparse_data: vec![(0, 1.0), (1, 1.0)],
627 dimension: 10,
628 norm: 2.0_f64.sqrt(),
629 };
630
631 let hv3 = Hypervector {
632 sparse_data: vec![(2, 1.0), (3, 1.0)],
633 dimension: 10,
634 norm: 2.0_f64.sqrt(),
635 };
636
637 assert_abs_diff_eq!(hv1.similarity(&hv2), 1.0, epsilon = 1e-10);
638 assert_abs_diff_eq!(hv1.similarity(&hv3), 0.0, epsilon = 1e-10);
639 }
640
641 #[test]
642 fn test_hypervector_bundle() {
643 let hv1 = Hypervector {
644 sparse_data: vec![(0, 1.0), (1, 1.0)],
645 dimension: 10,
646 norm: 2.0_f64.sqrt(),
647 };
648
649 let hv2 = Hypervector {
650 sparse_data: vec![(1, 1.0), (2, 1.0)],
651 dimension: 10,
652 norm: 2.0_f64.sqrt(),
653 };
654
655 let bundled = hv1.bundle(&hv2).expect("Operation failed");
656 assert_eq!(bundled.dimension, 10);
657 assert_eq!(bundled.sparse_data.len(), 3); assert_eq!(bundled.sparse_data[0], (0, 1.0)); assert_eq!(bundled.sparse_data[1], (1, 2.0)); assert_eq!(bundled.sparse_data[2], (2, 1.0)); }
664
665 #[test]
666 fn test_hypervector_bind() {
667 let hv1 = Hypervector {
668 sparse_data: vec![(0, 1.0), (1, 1.0)],
669 dimension: 10,
670 norm: 2.0_f64.sqrt(),
671 };
672
673 let hv2 = Hypervector {
674 sparse_data: vec![(1, 1.0), (2, 1.0)],
675 dimension: 10,
676 norm: 2.0_f64.sqrt(),
677 };
678
679 let bound = hv1.bind(&hv2).expect("Operation failed");
680 assert_eq!(bound.dimension, 10);
681 assert!(!bound.sparse_data.is_empty());
682 assert!(bound.norm > 0.0);
683 }
684
685 #[test]
686 fn test_hypervector_normalize() {
687 let hv = Hypervector {
688 sparse_data: vec![(0, 2.0), (1, 2.0)],
689 dimension: 10,
690 norm: 8.0_f64.sqrt(),
691 };
692
693 let normalized = hv.normalize();
694 assert_abs_diff_eq!(normalized.norm, 1.0, epsilon = 1e-10);
695 assert_abs_diff_eq!(
696 normalized.sparse_data[0].1,
697 2.0 / 8.0_f64.sqrt(),
698 epsilon = 1e-10
699 );
700 }
701
702 #[test]
703 fn test_hypervector_scale() {
704 let hv = Hypervector {
705 sparse_data: vec![(0, 1.0), (1, 2.0)],
706 dimension: 10,
707 norm: 5.0_f64.sqrt(),
708 };
709
710 let scaled = hv.scale(2.0);
711 assert_eq!(scaled.sparse_data[0].1, 2.0);
712 assert_eq!(scaled.sparse_data[1].1, 4.0);
713 assert_abs_diff_eq!(scaled.norm, 2.0 * 5.0_f64.sqrt(), epsilon = 1e-10);
714 }
715
716 #[test]
717 fn test_hypervector_threshold() {
718 let hv = Hypervector {
719 sparse_data: vec![(0, 0.1), (1, 0.5), (2, 1.0)],
720 dimension: 10,
721 norm: 1.0,
722 };
723
724 let thresholded = hv.threshold(0.3);
725 assert_eq!(thresholded.sparse_data.len(), 2); assert_eq!(thresholded.sparse_data[0], (1, 0.5));
727 assert_eq!(thresholded.sparse_data[1], (2, 1.0));
728 }
729
730 #[test]
731 fn test_hypervector_sparsity() {
732 let hv = Hypervector {
733 sparse_data: vec![(0, 1.0), (1, 1.0)],
734 dimension: 10,
735 norm: 1.0,
736 };
737
738 assert_abs_diff_eq!(hv.sparsity(), 0.2, epsilon = 1e-10); }
740
741 #[test]
742 fn test_hypervector_hamming_distance() {
743 let hv1 = Hypervector {
744 sparse_data: vec![(0, 1.0), (1, 1.0), (2, -1.0)],
745 dimension: 10,
746 norm: 1.0,
747 };
748
749 let hv2 = Hypervector {
750 sparse_data: vec![(0, 1.0), (1, -1.0), (3, 1.0)],
751 dimension: 10,
752 norm: 1.0,
753 };
754
755 let distance = hv1.hamming_distance(&hv2);
756 assert_eq!(distance, 3); }
758
759 #[test]
760 fn test_hypervector_overlap() {
761 let hv1 = Hypervector {
762 sparse_data: vec![(0, 1.0), (1, 1.0), (2, -1.0)],
763 dimension: 10,
764 norm: 1.0,
765 };
766
767 let hv2 = Hypervector {
768 sparse_data: vec![(0, 1.0), (1, -1.0), (3, 1.0)],
769 dimension: 10,
770 norm: 1.0,
771 };
772
773 let overlap = hv1.overlap(&hv2);
774 assert_eq!(overlap, 2); }
776
777 #[test]
778 fn test_bundle_multiple() {
779 use vector_utils::*;
780
781 let hv1 = Hypervector::random(100, 0.1);
782 let hv2 = Hypervector::random(100, 0.1);
783 let hv3 = Hypervector::random(100, 0.1);
784
785 let vectors = vec![hv1, hv2, hv3];
786 let bundled = bundle_multiple(&vectors).expect("Operation failed");
787
788 assert_eq!(bundled.dimension, 100);
789 assert!(bundled.norm > 0.0);
790 }
791
792 #[test]
793 fn test_centroid() {
794 use vector_utils::*;
795
796 let hv1 = Hypervector::random(100, 0.1);
797 let hv2 = Hypervector::random(100, 0.1);
798
799 let vectors = vec![hv1, hv2];
800 let centroid_hv = centroid(&vectors).expect("Operation failed");
801
802 assert_eq!(centroid_hv.dimension, 100);
803 assert!(centroid_hv.norm > 0.0);
804 }
805}