tenflowers_neural/
hyperdimensional.rs

1//! Hyperdimensional Computing (HDC) / Vector Symbolic Architectures (VSA).
2//!
3//! Implements brain-inspired, high-dimensional vector operations for
4//! classification, sequence encoding, and associative memory — all in
5//! pure Rust with no `unwrap()`.
6//!
7//! # Key Concepts
8//!
9//! - **Hypervector**: A very high-dimensional (≥10 000) binary or real-valued vector.
10//! - **Bundle** (superposition): combines multiple hypervectors into one that is similar to all.
11//! - **Bind** (binding): associates two hypervectors; result is dissimilar to both.
12//! - **Permute**: cyclic shift used to encode position in sequences.
13//!
14//! # Quick Example
15//!
16//! ```rust,ignore
17//! use tenflowers_neural::hyperdimensional::{BipolarHv, ItemMemory, HD_DIM};
18//! use tenflowers_neural::hyperdimensional::{bind_bipolar, bundle_bipolar};
19//!
20//! let mut mem = ItemMemory::new(HD_DIM);
21//! let a = mem.add_random("cat");
22//! let b = mem.add_random("dog");
23//! let bound = bind_bipolar(&a, &b);
24//! if let Some((label, _score)) = mem.lookup(&bound) {
25//!     println!("nearest: {}", label);
26//! }
27//! ```
28
29use scirs2_core::random::{rngs::StdRng, Rng, SeedableRng};
30use scirs2_core::RngExt;
31use std::collections::HashMap;
32use tenflowers_core::TensorError;
33
34// ─────────────────────────────────────────────────────────────────────────────
35// Seed helpers
36// ─────────────────────────────────────────────────────────────────────────────
37
38/// Build a deterministic (seeded) RNG from a u64 seed.
39#[inline]
40fn seeded_rng(seed: u64) -> StdRng {
41    StdRng::seed_from_u64(seed)
42}
43
44/// Build a non-deterministic RNG seeded from wall-clock nanoseconds.
45#[inline]
46fn nondeterministic_rng() -> StdRng {
47    use std::time::{SystemTime, UNIX_EPOCH};
48    let nanos = SystemTime::now()
49        .duration_since(UNIX_EPOCH)
50        .map(|d| d.subsec_nanos() as u64 ^ (d.as_secs().wrapping_mul(6_364_136_223_846_793_005)))
51        .unwrap_or(42);
52    StdRng::seed_from_u64(nanos)
53}
54
55// ─────────────────────────────────────────────────────────────────────────────
56// Constants
57// ─────────────────────────────────────────────────────────────────────────────
58
59/// Default hypervector dimensionality used throughout HDC literature.
60pub const HD_DIM: usize = 10_000;
61
62// ─────────────────────────────────────────────────────────────────────────────
63// HvType
64// ─────────────────────────────────────────────────────────────────────────────
65
66/// The algebraic type of a hypervector.
67#[derive(Debug, Clone, Copy, PartialEq, Eq)]
68pub enum HvType {
69    /// Each component is 0 or 1.
70    Binary,
71    /// Each component is +1 or -1.
72    Bipolar,
73    /// Each component is a continuous real number.
74    Real,
75}
76
77// ─────────────────────────────────────────────────────────────────────────────
78// BinaryHv
79// ─────────────────────────────────────────────────────────────────────────────
80
81/// A binary hypervector (each component is `true`/`false`).
82#[derive(Debug, Clone, PartialEq)]
83pub struct BinaryHv {
84    /// The raw bit data; length equals the dimensionality.
85    pub data: Vec<bool>,
86}
87
88impl BinaryHv {
89    /// Create a random binary hypervector with each bit independently set to
90    /// `true` with probability 0.5.
91    pub fn random(dim: usize) -> Self {
92        let mut rng = nondeterministic_rng();
93        let data = (0..dim).map(|_| rng.random::<f64>() < 0.5).collect();
94        Self { data }
95    }
96
97    /// Create a seeded random binary hypervector (for reproducible tests).
98    pub fn random_seeded(dim: usize, seed: u64) -> Self {
99        let mut rng = seeded_rng(seed);
100        let data = (0..dim).map(|_| rng.random::<f64>() < 0.5).collect();
101        Self { data }
102    }
103
104    /// All-zeros binary hypervector.
105    pub fn zeros(dim: usize) -> Self {
106        Self {
107            data: vec![false; dim],
108        }
109    }
110
111    /// All-ones binary hypervector.
112    pub fn ones(dim: usize) -> Self {
113        Self {
114            data: vec![true; dim],
115        }
116    }
117
118    /// Dimensionality of this hypervector.
119    #[inline]
120    pub fn dim(&self) -> usize {
121        self.data.len()
122    }
123
124    /// Normalised Hamming distance in `[0, 1]`.
125    ///
126    /// Returns `0.0` for identical vectors and `1.0` for complementary vectors.
127    pub fn hamming_distance(&self, other: &BinaryHv) -> f64 {
128        let d = self.dim().min(other.dim());
129        if d == 0 {
130            return 0.0;
131        }
132        let differing = self
133            .data
134            .iter()
135            .zip(other.data.iter())
136            .filter(|(a, b)| a != b)
137            .count();
138        differing as f64 / d as f64
139    }
140
141    /// Cosine similarity between the ±1 representations of the two binary HVs.
142    ///
143    /// Maps each bit `b` to `+1` if `true` and `-1` if `false` before computing
144    /// cosine.  Returns a value in `[-1, 1]`.
145    pub fn cosine_similarity(&self, other: &BinaryHv) -> f64 {
146        let d = self.dim().min(other.dim());
147        if d == 0 {
148            return 0.0;
149        }
150        // cos = (sum of products) / d   — because both have unit L2 norm √d after mapping
151        let dot: f64 = self
152            .data
153            .iter()
154            .zip(other.data.iter())
155            .map(|(a, b)| {
156                let av = if *a { 1.0_f64 } else { -1.0_f64 };
157                let bv = if *b { 1.0_f64 } else { -1.0_f64 };
158                av * bv
159            })
160            .sum();
161        dot / d as f64
162    }
163}
164
165// ─────────────────────────────────────────────────────────────────────────────
166// BipolarHv
167// ─────────────────────────────────────────────────────────────────────────────
168
169/// A bipolar hypervector (each component is +1 or -1).
170#[derive(Debug, Clone, PartialEq)]
171pub struct BipolarHv {
172    /// The raw data; each element must be exactly +1 or -1.
173    pub data: Vec<f64>,
174}
175
176impl BipolarHv {
177    /// Create a random bipolar hypervector with each component independently
178    /// sampled from {+1, -1} with equal probability.
179    pub fn random(dim: usize) -> Self {
180        let mut rng = nondeterministic_rng();
181        let data = (0..dim)
182            .map(|_| {
183                if rng.random::<f64>() < 0.5 {
184                    1.0_f64
185                } else {
186                    -1.0_f64
187                }
188            })
189            .collect();
190        Self { data }
191    }
192
193    /// Seeded variant for reproducible construction.
194    pub fn random_seeded(dim: usize, seed: u64) -> Self {
195        let mut rng = seeded_rng(seed);
196        let data = (0..dim)
197            .map(|_| {
198                if rng.random::<f64>() < 0.5 {
199                    1.0_f64
200                } else {
201                    -1.0_f64
202                }
203            })
204            .collect();
205        Self { data }
206    }
207
208    /// Dimensionality.
209    #[inline]
210    pub fn dim(&self) -> usize {
211        self.data.len()
212    }
213
214    /// Dot product between two bipolar HVs.
215    pub fn dot(&self, other: &BipolarHv) -> f64 {
216        self.data
217            .iter()
218            .zip(other.data.iter())
219            .map(|(a, b)| a * b)
220            .sum()
221    }
222
223    /// Cosine similarity.  For unit bipolar HVs the squared norm is `dim`, so
224    /// `cos(u, v) = dot(u, v) / dim`.
225    pub fn cosine_similarity(&self, other: &BipolarHv) -> f64 {
226        let d = self.dim().min(other.dim());
227        if d == 0 {
228            return 0.0;
229        }
230        let dp: f64 = self
231            .data
232            .iter()
233            .zip(other.data.iter())
234            .map(|(a, b)| a * b)
235            .sum();
236        let norm_a: f64 = self.data.iter().map(|x| x * x).sum::<f64>().sqrt();
237        let norm_b: f64 = other.data.iter().map(|x| x * x).sum::<f64>().sqrt();
238        if norm_a == 0.0 || norm_b == 0.0 {
239            return 0.0;
240        }
241        dp / (norm_a * norm_b)
242    }
243
244    /// Convert to `BinaryHv` by mapping +1 → true, -1 → false.
245    pub fn to_binary(&self) -> BinaryHv {
246        BinaryHv {
247            data: self.data.iter().map(|&x| x > 0.0).collect(),
248        }
249    }
250}
251
252// ─────────────────────────────────────────────────────────────────────────────
253// RealHv
254// ─────────────────────────────────────────────────────────────────────────────
255
256/// A real-valued hypervector (components are continuous floats).
257#[derive(Debug, Clone, PartialEq)]
258pub struct RealHv {
259    /// The raw continuous-valued data.
260    pub data: Vec<f64>,
261}
262
263impl RealHv {
264    /// Create a random real hypervector with each component i.i.d. N(0, 1).
265    pub fn random(dim: usize) -> Self {
266        let mut rng = nondeterministic_rng();
267        let data = (0..dim)
268            .map(|_| {
269                // Box-Muller transform for N(0,1)
270                let u1: f64 = rng.random::<f64>().max(1e-15);
271                let u2: f64 = rng.random::<f64>();
272                (-2.0 * u1.ln()).sqrt() * (2.0 * std::f64::consts::PI * u2).cos()
273            })
274            .collect();
275        Self { data }
276    }
277
278    /// Dimensionality.
279    #[inline]
280    pub fn dim(&self) -> usize {
281        self.data.len()
282    }
283
284    /// Cosine similarity.
285    pub fn cosine_similarity(&self, other: &RealHv) -> f64 {
286        let d = self.dim().min(other.dim());
287        if d == 0 {
288            return 0.0;
289        }
290        let dp: f64 = self.data[..d]
291            .iter()
292            .zip(other.data[..d].iter())
293            .map(|(a, b)| a * b)
294            .sum();
295        let na: f64 = self.data[..d].iter().map(|x| x * x).sum::<f64>().sqrt();
296        let nb: f64 = other.data[..d].iter().map(|x| x * x).sum::<f64>().sqrt();
297        if na == 0.0 || nb == 0.0 {
298            return 0.0;
299        }
300        dp / (na * nb)
301    }
302}
303
304// ─────────────────────────────────────────────────────────────────────────────
305// VSA Operations — Binary
306// ─────────────────────────────────────────────────────────────────────────────
307
308/// Bundle (superposition) of binary HVs via majority vote per position.
309///
310/// Tied votes (equal number of true and false inputs) are resolved to `true`
311/// (deterministic tie-break that preserves reproducibility).
312pub fn bundle_binary(hvs: &[&BinaryHv]) -> BinaryHv {
313    if hvs.is_empty() {
314        return BinaryHv::zeros(0);
315    }
316    let dim = hvs[0].dim();
317    let mut data = Vec::with_capacity(dim);
318    for pos in 0..dim {
319        let votes: i64 = hvs
320            .iter()
321            .map(|hv| {
322                if pos < hv.data.len() && hv.data[pos] {
323                    1
324                } else {
325                    -1
326                }
327            })
328            .sum();
329        // votes >= 0 → true (tie-break to true), votes < 0 → false
330        data.push(votes >= 0);
331    }
332    BinaryHv { data }
333}
334
335/// Bundle (superposition) of bipolar HVs: element-wise sum, then sign.
336///
337/// Zero-sum components are resolved to +1 (deterministic tie-break).
338/// This ensures that encoding the same inputs always yields the same output.
339pub fn bundle_bipolar(hvs: &[&BipolarHv]) -> BipolarHv {
340    if hvs.is_empty() {
341        return BipolarHv { data: vec![] };
342    }
343    let dim = hvs[0].dim();
344    let mut sums = vec![0.0_f64; dim];
345    for hv in hvs {
346        for (i, &v) in hv.data.iter().enumerate() {
347            if i < dim {
348                sums[i] += v;
349            }
350        }
351    }
352    let data = sums
353        .iter()
354        .map(|&s| if s >= 0.0 { 1.0 } else { -1.0 })
355        .collect();
356    BipolarHv { data }
357}
358
359/// Bind two binary HVs via element-wise XOR.
360pub fn bind_binary(a: &BinaryHv, b: &BinaryHv) -> BinaryHv {
361    let dim = a.dim().min(b.dim());
362    let data = a.data[..dim]
363        .iter()
364        .zip(b.data[..dim].iter())
365        .map(|(x, y)| x ^ y)
366        .collect();
367    BinaryHv { data }
368}
369
370/// Bind two bipolar HVs via element-wise product (Hadamard product).
371pub fn bind_bipolar(a: &BipolarHv, b: &BipolarHv) -> BipolarHv {
372    let dim = a.dim().min(b.dim());
373    let data = a.data[..dim]
374        .iter()
375        .zip(b.data[..dim].iter())
376        .map(|(x, y)| x * y)
377        .collect();
378    BipolarHv { data }
379}
380
381/// Permute a binary HV by a cyclic left shift of `k` positions.
382///
383/// `k` may be negative (right shift) or exceed `dim` (wraps around).
384pub fn permute_binary(hv: &BinaryHv, k: i64) -> BinaryHv {
385    let dim = hv.dim();
386    if dim == 0 {
387        return hv.clone();
388    }
389    let shift = ((k % dim as i64) + dim as i64) as usize % dim;
390    let mut data = vec![false; dim];
391    for i in 0..dim {
392        data[(i + dim - shift) % dim] = hv.data[i];
393    }
394    BinaryHv { data }
395}
396
397/// Permute a bipolar HV by a cyclic left shift of `k` positions.
398pub fn permute_bipolar(hv: &BipolarHv, k: i64) -> BipolarHv {
399    let dim = hv.dim();
400    if dim == 0 {
401        return hv.clone();
402    }
403    let shift = ((k % dim as i64) + dim as i64) as usize % dim;
404    let mut data = vec![0.0_f64; dim];
405    for i in 0..dim {
406        data[(i + dim - shift) % dim] = hv.data[i];
407    }
408    BipolarHv { data }
409}
410
411/// Unbind a binary HV (XOR is self-inverse: `unbind == bind`).
412#[inline]
413pub fn unbind_binary(bound: &BinaryHv, key: &BinaryHv) -> BinaryHv {
414    bind_binary(bound, key)
415}
416
417/// Unbind a bipolar HV (Hadamard product is self-inverse for ±1 vectors).
418#[inline]
419pub fn unbind_bipolar(bound: &BipolarHv, key: &BipolarHv) -> BipolarHv {
420    bind_bipolar(bound, key)
421}
422
423// ─────────────────────────────────────────────────────────────────────────────
424// Item Memory (Associative Memory)
425// ─────────────────────────────────────────────────────────────────────────────
426
427/// An associative item memory mapping string labels to bipolar hypervectors.
428///
429/// Lookup is nearest-neighbour cosine similarity.
430#[derive(Debug, Clone)]
431pub struct ItemMemory {
432    /// Stored label → hypervector pairs.
433    pub items: HashMap<String, BipolarHv>,
434    /// Dimensionality of stored hypervectors.
435    pub dim: usize,
436}
437
438impl ItemMemory {
439    /// Create an empty item memory with the given dimensionality.
440    pub fn new(dim: usize) -> Self {
441        Self {
442            items: HashMap::new(),
443            dim,
444        }
445    }
446
447    /// Store a hypervector under `label`.
448    pub fn add(&mut self, label: &str, hv: BipolarHv) {
449        self.items.insert(label.to_string(), hv);
450    }
451
452    /// Retrieve the stored hypervector for `label`, if present.
453    pub fn get(&self, label: &str) -> Option<&BipolarHv> {
454        self.items.get(label)
455    }
456
457    /// Find the label whose stored hypervector is most similar (cosine) to
458    /// `query`.  Returns `None` if the memory is empty.
459    pub fn lookup(&self, query: &BipolarHv) -> Option<(String, f64)> {
460        let mut best_label = None;
461        let mut best_score = f64::NEG_INFINITY;
462        for (label, hv) in &self.items {
463            let score = hv.cosine_similarity(query);
464            if score > best_score {
465                best_score = score;
466                best_label = Some(label.clone());
467            }
468        }
469        best_label.map(|l| (l, best_score))
470    }
471
472    /// Create a random hypervector, store it under `label`, and return a clone.
473    pub fn add_random(&mut self, label: &str) -> BipolarHv {
474        let hv = BipolarHv::random(self.dim);
475        self.items.insert(label.to_string(), hv.clone());
476        hv
477    }
478}
479
480// ─────────────────────────────────────────────────────────────────────────────
481// HD Encoding Strategies
482// ─────────────────────────────────────────────────────────────────────────────
483
484/// Level encoder: maps a continuous scalar to one of `n_levels` discrete
485/// bipolar hypervectors.
486///
487/// Consecutive level hypervectors differ by exactly `dim / n_levels` bits,
488/// giving them a graded similarity profile (thermometer-like in HV space).
489#[derive(Debug, Clone)]
490pub struct LevelEncoder {
491    /// Ordered level hypervectors (index 0 = lowest value).
492    pub levels: Vec<BipolarHv>,
493    /// Lower bound of the encoded range.
494    pub x_min: f64,
495    /// Upper bound of the encoded range.
496    pub x_max: f64,
497}
498
499impl LevelEncoder {
500    /// Build a new `LevelEncoder` with `n_levels` equally spaced levels.
501    ///
502    /// Construction: `levels[0]` is a random HV; each subsequent level flips
503    /// `dim / n_levels` randomly chosen positions (without replacement within
504    /// each step).
505    pub fn new(n_levels: usize, dim: usize, x_min: f64, x_max: f64) -> Self {
506        assert!(n_levels >= 1, "n_levels must be at least 1");
507        let mut rng = nondeterministic_rng();
508        // Build level 0 randomly.
509        let base: Vec<f64> = (0..dim)
510            .map(|_| if rng.random::<f64>() < 0.5 { 1.0 } else { -1.0 })
511            .collect();
512        let mut levels = vec![BipolarHv { data: base }];
513        // Determine how many bits to flip per step.
514        let flips_per_step = (dim / n_levels).max(1);
515        // Build a permutation of indices to deterministically choose flip positions.
516        let mut indices: Vec<usize> = (0..dim).collect();
517        // Shuffle once, then use slices of size `flips_per_step`.
518        for i in (1..dim).rev() {
519            let j = (rng.random::<f64>() * (i + 1) as f64) as usize;
520            indices.swap(i, j);
521        }
522        let mut current = levels[0].data.clone();
523        let mut flip_cursor = 0usize;
524        for _level in 1..n_levels {
525            // Flip the next `flips_per_step` indices.
526            for k in 0..flips_per_step {
527                let idx = indices[(flip_cursor + k) % dim];
528                current[idx] = -current[idx];
529            }
530            flip_cursor = (flip_cursor + flips_per_step) % dim;
531            levels.push(BipolarHv {
532                data: current.clone(),
533            });
534        }
535        Self {
536            levels,
537            x_min,
538            x_max,
539        }
540    }
541
542    /// Quantise `x` to the nearest level and return a reference to its HV.
543    pub fn encode(&self, x: f64) -> &BipolarHv {
544        let n = self.levels.len();
545        if n == 1 {
546            return &self.levels[0];
547        }
548        let clamped = x.max(self.x_min).min(self.x_max);
549        let t = (clamped - self.x_min) / (self.x_max - self.x_min);
550        let idx = ((t * (n - 1) as f64).round() as usize).min(n - 1);
551        &self.levels[idx]
552    }
553}
554
555/// Thermometer encoder: encodes a level index by flipping progressively more
556/// bits from a fixed base vector.
557///
558/// Level `k` has the first `k * dim / n_levels` bits flipped relative to the
559/// base, giving a monotone similarity ordering.
560#[derive(Debug, Clone)]
561pub struct ThermometerEncoder {
562    /// The base (all-random) hypervector for level 0.
563    pub base_hv: BipolarHv,
564    /// Ordered flip positions (length == dim).
565    pub flip_hvs: Vec<BipolarHv>,
566    /// Number of discrete levels.
567    pub n_levels: usize,
568}
569
570impl ThermometerEncoder {
571    /// Build a new `ThermometerEncoder` with `n_levels` levels and given `dim`.
572    pub fn new(n_levels: usize, dim: usize) -> Self {
573        let mut rng = nondeterministic_rng();
574        let base_data: Vec<f64> = (0..dim)
575            .map(|_| if rng.random::<f64>() < 0.5 { 1.0 } else { -1.0 })
576            .collect();
577        let base_hv = BipolarHv { data: base_data };
578        // Precompute intermediate flip HVs for each level (unused in encode_level
579        // but stored for transparency / inspection).
580        let mut flip_hvs = Vec::with_capacity(n_levels);
581        let flips_per_level = (dim / n_levels.max(1)).max(1);
582        let mut current = base_hv.data.clone();
583        for level in 0..n_levels {
584            let start = level * flips_per_level;
585            let end = ((level + 1) * flips_per_level).min(dim);
586            for i in start..end {
587                current[i] = -current[i];
588            }
589            flip_hvs.push(BipolarHv {
590                data: current.clone(),
591            });
592        }
593        Self {
594            base_hv,
595            flip_hvs,
596            n_levels,
597        }
598    }
599
600    /// Encode `level` (0-indexed) as a thermometer HV.
601    ///
602    /// Returns a new BipolarHv that has the first `level * dim / n_levels` bits
603    /// of the base flipped.
604    pub fn encode_level(&self, level: usize) -> BipolarHv {
605        let dim = self.base_hv.dim();
606        let n = self.n_levels.max(1);
607        let flips_per_level = (dim / n).max(1);
608        let n_flipped = (level * flips_per_level).min(dim);
609        let mut data = self.base_hv.data.clone();
610        for i in 0..n_flipped {
611            data[i] = -data[i];
612        }
613        BipolarHv { data }
614    }
615}
616
617/// ID encoder: maps integer IDs to random (maximally orthogonal) bipolar HVs.
618///
619/// Each new ID receives a freshly sampled random HV, which is then cached.
620#[derive(Debug, Clone)]
621pub struct IdEncoder {
622    /// Cached HV per integer ID.
623    pub ids: HashMap<usize, BipolarHv>,
624    /// HV dimensionality.
625    pub dim: usize,
626}
627
628impl IdEncoder {
629    /// Create an empty `IdEncoder`.
630    pub fn new(dim: usize) -> Self {
631        Self {
632            ids: HashMap::new(),
633            dim,
634        }
635    }
636
637    /// Return the HV for `id`, creating and caching a new random one if absent.
638    pub fn get_or_create(&mut self, id: usize) -> BipolarHv {
639        if !self.ids.contains_key(&id) {
640            let hv = BipolarHv::random(self.dim);
641            self.ids.insert(id, hv);
642        }
643        // Safe: we just inserted it above if missing.
644        match self.ids.get(&id) {
645            Some(hv) => hv.clone(),
646            None => BipolarHv::random(self.dim), // unreachable
647        }
648    }
649}
650
651// ─────────────────────────────────────────────────────────────────────────────
652// HD Classifier
653// ─────────────────────────────────────────────────────────────────────────────
654
655/// Online HD classifier that accumulates class prototype hypervectors.
656///
657/// Training bundles each new sample into the running class prototype; inference
658/// finds the most similar prototype by cosine similarity.
659#[derive(Debug, Clone)]
660pub struct HdClassifier {
661    /// Accumulated (un-normalised) prototype HV per class label.
662    pub class_hvs: HashMap<String, BipolarHv>,
663    /// HV dimensionality.
664    pub dim: usize,
665    /// Number of training samples added per class.
666    pub class_counts: HashMap<String, usize>,
667}
668
669impl HdClassifier {
670    /// Create a new (empty) classifier.
671    pub fn new(dim: usize) -> Self {
672        Self {
673            class_hvs: HashMap::new(),
674            dim,
675            class_counts: HashMap::new(),
676        }
677    }
678
679    /// Incorporate one labelled sample into the running class prototype.
680    pub fn train_one(&mut self, sample_hv: &BipolarHv, label: &str) {
681        let count = self.class_counts.entry(label.to_string()).or_insert(0);
682        *count += 1;
683        let entry = self
684            .class_hvs
685            .entry(label.to_string())
686            .or_insert_with(|| BipolarHv {
687                data: vec![0.0; self.dim],
688            });
689        // Accumulate raw sum — we binarise lazily at prediction time.
690        for (acc, &s) in entry.data.iter_mut().zip(sample_hv.data.iter()) {
691            *acc += s;
692        }
693    }
694
695    /// Predict the class label for `query`.  Returns `None` if no training has
696    /// occurred.
697    pub fn predict(&self, query: &BipolarHv) -> Option<String> {
698        self.predict_with_score(query).map(|(l, _)| l)
699    }
700
701    /// Predict class and return the cosine similarity score.
702    pub fn predict_with_score(&self, query: &BipolarHv) -> Option<(String, f64)> {
703        let mut best_label: Option<String> = None;
704        let mut best_score = f64::NEG_INFINITY;
705        for (label, acc_hv) in &self.class_hvs {
706            // Binarise the accumulated prototype on-the-fly.
707            let proto = binarise_accumulator(acc_hv);
708            let score = proto.cosine_similarity(query);
709            if score > best_score {
710                best_score = score;
711                best_label = Some(label.clone());
712            }
713        }
714        best_label.map(|l| (l, best_score))
715    }
716
717    /// Online error-corrective retraining.
718    ///
719    /// When a sample is predicted as `predicted` but the true label is
720    /// `true_label`, subtract the sample from the wrong prototype and add it
721    /// to the correct prototype.
722    pub fn retrain_wrong(&mut self, sample_hv: &BipolarHv, predicted: &str, true_label: &str) {
723        // Subtract from the wrong class.
724        if let Some(wrong_hv) = self.class_hvs.get_mut(predicted) {
725            for (acc, &s) in wrong_hv.data.iter_mut().zip(sample_hv.data.iter()) {
726                *acc -= s;
727            }
728        }
729        // Add to the correct class.
730        self.train_one(sample_hv, true_label);
731    }
732}
733
734/// Convert an accumulated (real-valued sum) HV to a bipolar HV.
735///
736/// Zero components are resolved to +1 (arbitrary but deterministic tie-break).
737fn binarise_accumulator(acc: &BipolarHv) -> BipolarHv {
738    let data = acc
739        .data
740        .iter()
741        .map(|&v| if v >= 0.0 { 1.0 } else { -1.0 })
742        .collect();
743    BipolarHv { data }
744}
745
746// ─────────────────────────────────────────────────────────────────────────────
747// Sparse Distributed Memory (Kanerva SDM)
748// ─────────────────────────────────────────────────────────────────────────────
749
750/// Configuration for a sparse distributed memory.
751#[derive(Debug, Clone)]
752pub struct SdmConfig {
753    /// Number of bits in the address space.
754    pub address_dim: usize,
755    /// Width of each data word (bits).
756    pub data_dim: usize,
757    /// Number of randomly placed hard locations.
758    pub n_hard_locations: usize,
759    /// Maximum Hamming distance for a location to be activated.
760    pub hamming_threshold: usize,
761}
762
763/// Kanerva Sparse Distributed Memory with integer counters.
764///
765/// Each hard location has an address (random binary) and a data counter array
766/// (one integer per data bit).  Writing increments (if data bit = 1) or
767/// decrements (if data bit = 0) activated locations.  Reading sums counters
768/// and thresholds at 0.
769#[derive(Debug, Clone)]
770pub struct SparseSdm {
771    /// Random binary addresses: `[n_locations][address_dim]`.
772    pub addresses: Vec<Vec<bool>>,
773    /// Integer counter arrays: `[n_locations][data_dim]`.
774    pub counters: Vec<Vec<i32>>,
775    /// Configuration used at construction.
776    pub config: SdmConfig,
777}
778
779impl SparseSdm {
780    /// Create a new SDM with randomly sampled hard-location addresses.
781    pub fn new(config: SdmConfig) -> Self {
782        let mut rng = nondeterministic_rng();
783        let addresses: Vec<Vec<bool>> = (0..config.n_hard_locations)
784            .map(|_| {
785                (0..config.address_dim)
786                    .map(|_| rng.random::<f64>() < 0.5)
787                    .collect()
788            })
789            .collect();
790        let counters = vec![vec![0i32; config.data_dim]; config.n_hard_locations];
791        Self {
792            addresses,
793            counters,
794            config,
795        }
796    }
797
798    /// Return the indices of all hard locations activated by `address`.
799    pub fn activated_locations(&self, address: &[bool]) -> Vec<usize> {
800        self.addresses
801            .iter()
802            .enumerate()
803            .filter_map(|(i, loc_addr)| {
804                let dist = hamming_bool(address, loc_addr);
805                if dist <= self.config.hamming_threshold {
806                    Some(i)
807                } else {
808                    None
809                }
810            })
811            .collect()
812    }
813
814    /// Write `data` to all activated hard locations.
815    pub fn write(&mut self, address: &[bool], data: &[bool]) {
816        let activated = self.activated_locations(address);
817        for loc_idx in activated {
818            for (bit_idx, &data_bit) in data.iter().enumerate() {
819                if bit_idx < self.config.data_dim {
820                    if data_bit {
821                        self.counters[loc_idx][bit_idx] += 1;
822                    } else {
823                        self.counters[loc_idx][bit_idx] -= 1;
824                    }
825                }
826            }
827        }
828    }
829
830    /// Read from all activated hard locations, sum counters, threshold at 0.
831    pub fn read(&self, address: &[bool]) -> Vec<bool> {
832        let activated = self.activated_locations(address);
833        let mut sums = vec![0i32; self.config.data_dim];
834        for loc_idx in &activated {
835            for (bit_idx, &counter) in self.counters[*loc_idx].iter().enumerate() {
836                sums[bit_idx] += counter;
837            }
838        }
839        sums.iter().map(|&s| s >= 0).collect()
840    }
841}
842
843/// Hamming distance between two boolean slices (counts differing positions).
844fn hamming_bool(a: &[bool], b: &[bool]) -> usize {
845    a.iter().zip(b.iter()).filter(|(x, y)| x != y).count()
846}
847
848// ─────────────────────────────────────────────────────────────────────────────
849// Online HDC (Incremental Learning)
850// ─────────────────────────────────────────────────────────────────────────────
851
852/// End-to-end online HDC pipeline: feature-level encoding → classifier.
853///
854/// Each feature dimension is represented by an ID hypervector; the scalar value
855/// of that feature is encoded by a level encoder.  The per-feature contribution
856/// is `bind(feature_id_hv, level_hv)` and contributions are bundled together.
857#[derive(Debug, Clone)]
858pub struct OnlineHdc {
859    /// Associative memory for feature IDs.
860    pub item_memory: ItemMemory,
861    /// Shared level encoder for all features.
862    pub level_encoder: LevelEncoder,
863    /// HD classifier accumulating class prototypes.
864    pub classifier: HdClassifier,
865    /// HV dimensionality.
866    pub dim: usize,
867    /// Number of features per sample.
868    pub n_features: usize,
869}
870
871impl OnlineHdc {
872    /// Construct a fresh pipeline.
873    ///
874    /// * `n_features`: number of input features.
875    /// * `n_levels`: quantisation resolution for the level encoder.
876    /// * `dim`: HV dimensionality.
877    ///
878    /// The level encoder covers the range `[-1.0, 1.0]` by default; normalise
879    /// your data if needed.
880    pub fn new(n_features: usize, n_levels: usize, dim: usize) -> Self {
881        let item_memory = ItemMemory::new(dim);
882        let level_encoder = LevelEncoder::new(n_levels, dim, -1.0, 1.0);
883        let classifier = HdClassifier::new(dim);
884        Self {
885            item_memory,
886            level_encoder,
887            classifier,
888            dim,
889            n_features,
890        }
891    }
892
893    /// Encode a feature vector into a single bipolar hypervector.
894    ///
895    /// For each feature `i`, retrieves (or creates) a random ID HV, encodes
896    /// `features[i]` via the level encoder, binds them, then bundles across
897    /// all features.
898    pub fn encode_sample(&mut self, features: &[f64]) -> BipolarHv {
899        let n = features.len().min(self.n_features);
900        let mut component_hvs: Vec<BipolarHv> = Vec::with_capacity(n);
901        for i in 0..n {
902            let id_label = format!("feature_{i}");
903            let id_hv = match self.item_memory.items.get(&id_label) {
904                Some(hv) => hv.clone(),
905                None => {
906                    let hv = BipolarHv::random(self.dim);
907                    self.item_memory.add(&id_label, hv.clone());
908                    hv
909                }
910            };
911            let level_hv = self.level_encoder.encode(features[i]).clone();
912            component_hvs.push(bind_bipolar(&id_hv, &level_hv));
913        }
914        if component_hvs.is_empty() {
915            return BipolarHv::random(self.dim);
916        }
917        let refs: Vec<&BipolarHv> = component_hvs.iter().collect();
918        bundle_bipolar(&refs)
919    }
920
921    /// Encode and train on a single labelled sample.
922    pub fn train(&mut self, features: &[f64], label: &str) {
923        let hv = self.encode_sample(features);
924        self.classifier.train_one(&hv, label);
925    }
926
927    /// Encode a query and predict its class label.
928    pub fn predict(&mut self, features: &[f64]) -> Option<String> {
929        let hv = self.encode_sample(features);
930        self.classifier.predict(&hv)
931    }
932
933    /// Compute accuracy over a slice of (features, label) pairs.
934    pub fn accuracy(&mut self, samples: &[(Vec<f64>, String)]) -> f64 {
935        if samples.is_empty() {
936            return 0.0;
937        }
938        let mut correct = 0usize;
939        for (features, true_label) in samples {
940            if let Some(pred) = self.predict(features) {
941                if &pred == true_label {
942                    correct += 1;
943                }
944            }
945        }
946        correct as f64 / samples.len() as f64
947    }
948}
949
950// ─────────────────────────────────────────────────────────────────────────────
951// Sequence Modeling with Permutation
952// ─────────────────────────────────────────────────────────────────────────────
953
954/// Sequence encoder using permutation-based positional encoding.
955///
956/// A sequence `[a, b, c]` is encoded as:
957/// `bundle(permute(hv_a, 2), permute(hv_b, 1), hv_c)`
958///
959/// This preserves order information: the same tokens in a different order
960/// produce a dissimilar HV.
961#[derive(Debug, Clone)]
962pub struct HdSequenceEncoder {
963    /// Backing item memory for token HVs.
964    pub item_memory: ItemMemory,
965    /// HV dimensionality.
966    pub dim: usize,
967}
968
969impl HdSequenceEncoder {
970    /// Create a new sequence encoder.
971    pub fn new(dim: usize) -> Self {
972        Self {
973            item_memory: ItemMemory::new(dim),
974            dim,
975        }
976    }
977
978    /// Retrieve or create the HV for a token string.
979    fn get_or_add_token(&mut self, token: &str) -> BipolarHv {
980        match self.item_memory.items.get(token) {
981            Some(hv) => hv.clone(),
982            None => {
983                let hv = BipolarHv::random(self.dim);
984                self.item_memory.add(token, hv.clone());
985                hv
986            }
987        }
988    }
989
990    /// Encode a token sequence into a single bipolar HV.
991    ///
992    /// Position 0 (last token in the reversed indexing scheme) receives
993    /// permutation 0, position 1 receives permutation 1, etc.
994    pub fn encode_sequence(&mut self, tokens: &[&str]) -> BipolarHv {
995        let n = tokens.len();
996        if n == 0 {
997            return BipolarHv::random(self.dim);
998        }
999        let mut components: Vec<BipolarHv> = Vec::with_capacity(n);
1000        for (i, &token) in tokens.iter().enumerate() {
1001            let hv = self.get_or_add_token(token);
1002            // shift amount: last token gets 0, second-to-last gets 1, …, first gets n-1
1003            let shift = (n - 1 - i) as i64;
1004            components.push(permute_bipolar(&hv, shift));
1005        }
1006        let refs: Vec<&BipolarHv> = components.iter().collect();
1007        bundle_bipolar(&refs)
1008    }
1009
1010    /// Query whether `token` appears at `position` (0-indexed from the start).
1011    ///
1012    /// Returns the cosine similarity between the decoded vector and the token
1013    /// HV.  A high value (>0.5) indicates likely presence at that position.
1014    pub fn query_position(
1015        &self,
1016        sequence_hv: &BipolarHv,
1017        token: &str,
1018        position: usize,
1019        seq_len: usize,
1020    ) -> f64 {
1021        let token_hv = match self.item_memory.items.get(token) {
1022            Some(hv) => hv,
1023            None => return 0.0,
1024        };
1025        // The shift applied at encoding was (seq_len - 1 - position).
1026        let shift = (seq_len.saturating_sub(1).saturating_sub(position)) as i64;
1027        // Undo the permutation.
1028        let unshifted = permute_bipolar(sequence_hv, -shift);
1029        unshifted.cosine_similarity(token_hv)
1030    }
1031}
1032
1033// ─────────────────────────────────────────────────────────────────────────────
1034// HD Metrics
1035// ─────────────────────────────────────────────────────────────────────────────
1036
1037/// Summary statistics measuring the separability of class HVs.
1038#[derive(Debug, Clone)]
1039pub struct HdcStats {
1040    /// Mean intra-class cosine similarity (same class pairs).
1041    pub mean_similarity_same_class: f64,
1042    /// Mean inter-class cosine similarity (cross-class pairs).
1043    pub mean_similarity_diff_class: f64,
1044    /// Separation = `mean_same - mean_diff` (higher is better).
1045    pub separation: f64,
1046}
1047
1048/// Compute HDC statistics from a map of class prototype HVs.
1049///
1050/// For classes with a single prototype each, intra-class similarity is
1051/// trivially 1.0 (a vector with itself).
1052pub fn compute_hdc_stats(class_hvs: &HashMap<String, BipolarHv>) -> HdcStats {
1053    let labels: Vec<&String> = class_hvs.keys().collect();
1054    let n = labels.len();
1055    if n == 0 {
1056        return HdcStats {
1057            mean_similarity_same_class: 0.0,
1058            mean_similarity_diff_class: 0.0,
1059            separation: 0.0,
1060        };
1061    }
1062    // Intra-class: each prototype with itself = 1.0.
1063    let mean_same = 1.0_f64;
1064    // Inter-class: all distinct pairs.
1065    let mut cross_sum = 0.0_f64;
1066    let mut cross_count = 0usize;
1067    for i in 0..n {
1068        for j in (i + 1)..n {
1069            let hv_i = &class_hvs[labels[i]];
1070            let hv_j = &class_hvs[labels[j]];
1071            cross_sum += hv_i.cosine_similarity(hv_j);
1072            cross_count += 1;
1073        }
1074    }
1075    let mean_diff = if cross_count > 0 {
1076        cross_sum / cross_count as f64
1077    } else {
1078        0.0
1079    };
1080    HdcStats {
1081        mean_similarity_same_class: mean_same,
1082        mean_similarity_diff_class: mean_diff,
1083        separation: mean_same - mean_diff,
1084    }
1085}
1086
1087/// Compute mean pairwise cosine similarity among a set of HVs.
1088///
1089/// For a set of truly random bipolar HVs, the expected value is ≈ 0 (orthogonality).
1090pub fn orthogonality_test(hvs: &[BipolarHv]) -> f64 {
1091    let n = hvs.len();
1092    if n < 2 {
1093        return 0.0;
1094    }
1095    let mut sum = 0.0_f64;
1096    let mut count = 0usize;
1097    for i in 0..n {
1098        for j in (i + 1)..n {
1099            sum += hvs[i].cosine_similarity(&hvs[j]);
1100            count += 1;
1101        }
1102    }
1103    if count == 0 {
1104        0.0
1105    } else {
1106        sum / count as f64
1107    }
1108}
1109
1110// ─────────────────────────────────────────────────────────────────────────────
1111// Error type
1112// ─────────────────────────────────────────────────────────────────────────────
1113
1114/// Errors that can arise from HDC operations.
1115#[derive(Debug, Clone)]
1116pub enum HdcError {
1117    /// Dimension mismatch between two hypervectors.
1118    DimensionMismatch { expected: usize, got: usize },
1119    /// An empty collection was supplied where at least one element was required.
1120    EmptyInput,
1121    /// The item memory was queried but is empty.
1122    EmptyMemory,
1123}
1124
1125impl std::fmt::Display for HdcError {
1126    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1127        match self {
1128            HdcError::DimensionMismatch { expected, got } => {
1129                write!(f, "HDC dimension mismatch: expected {expected}, got {got}")
1130            }
1131            HdcError::EmptyInput => write!(f, "HDC operation requires at least one input"),
1132            HdcError::EmptyMemory => write!(f, "HDC item memory is empty"),
1133        }
1134    }
1135}
1136
1137impl std::error::Error for HdcError {}
1138
1139impl From<HdcError> for TensorError {
1140    fn from(e: HdcError) -> Self {
1141        TensorError::invalid_argument(e.to_string())
1142    }
1143}
1144
1145// ─────────────────────────────────────────────────────────────────────────────
1146// Tests
1147// ─────────────────────────────────────────────────────────────────────────────
1148
1149#[cfg(test)]
1150mod tests {
1151    use super::*;
1152
1153    const DIM: usize = 1000; // smaller than HD_DIM for test speed
1154    const LARGE_DIM: usize = 5000;
1155
1156    // ── BinaryHv ─────────────────────────────────────────────────────────────
1157
1158    #[test]
1159    fn test_binary_random_approximately_half_ones() {
1160        let hv = BinaryHv::random_seeded(10_000, 0);
1161        let ones = hv.data.iter().filter(|&&b| b).count();
1162        let ratio = ones as f64 / 10_000.0;
1163        // Expect within 5% of 50%.
1164        assert!((ratio - 0.5).abs() < 0.05, "ratio={ratio}");
1165    }
1166
1167    #[test]
1168    fn test_binary_zeros_all_false() {
1169        let hv = BinaryHv::zeros(100);
1170        assert!(hv.data.iter().all(|&b| !b));
1171    }
1172
1173    #[test]
1174    fn test_binary_ones_all_true() {
1175        let hv = BinaryHv::ones(100);
1176        assert!(hv.data.iter().all(|&b| b));
1177    }
1178
1179    #[test]
1180    fn test_binary_hamming_identical() {
1181        let hv = BinaryHv::random_seeded(DIM, 1);
1182        assert!((hv.hamming_distance(&hv) - 0.0).abs() < 1e-9);
1183    }
1184
1185    #[test]
1186    fn test_binary_hamming_complement() {
1187        let hv = BinaryHv::random_seeded(100, 2);
1188        let comp = BinaryHv {
1189            data: hv.data.iter().map(|&b| !b).collect(),
1190        };
1191        let d = hv.hamming_distance(&comp);
1192        assert!((d - 1.0).abs() < 1e-9, "complement hamming={d}");
1193    }
1194
1195    #[test]
1196    fn test_binary_cosine_identical() {
1197        let hv = BinaryHv::random_seeded(DIM, 3);
1198        let cos = hv.cosine_similarity(&hv);
1199        assert!((cos - 1.0).abs() < 1e-9, "cos={cos}");
1200    }
1201
1202    #[test]
1203    fn test_binary_cosine_complement_is_minus_one() {
1204        let hv = BinaryHv::random_seeded(100, 4);
1205        let comp = BinaryHv {
1206            data: hv.data.iter().map(|&b| !b).collect(),
1207        };
1208        let cos = hv.cosine_similarity(&comp);
1209        assert!((cos + 1.0).abs() < 1e-9, "cos={cos}");
1210    }
1211
1212    // ── BipolarHv ─────────────────────────────────────────────────────────────
1213
1214    #[test]
1215    fn test_bipolar_random_approximately_half_positive() {
1216        let hv = BipolarHv::random_seeded(10_000, 5);
1217        let pos = hv.data.iter().filter(|&&v| v > 0.0).count();
1218        let ratio = pos as f64 / 10_000.0;
1219        assert!((ratio - 0.5).abs() < 0.05, "ratio={ratio}");
1220    }
1221
1222    #[test]
1223    fn test_bipolar_cosine_identical() {
1224        let hv = BipolarHv::random_seeded(DIM, 6);
1225        let cos = hv.cosine_similarity(&hv);
1226        assert!((cos - 1.0).abs() < 1e-9, "cos={cos}");
1227    }
1228
1229    #[test]
1230    fn test_bipolar_dot_product() {
1231        let hv = BipolarHv {
1232            data: vec![1.0, -1.0, 1.0, 1.0],
1233        };
1234        let other = BipolarHv {
1235            data: vec![1.0, 1.0, -1.0, 1.0],
1236        };
1237        let dot = hv.dot(&other);
1238        // 1*1 + (-1)*1 + 1*(-1) + 1*1 = 1 - 1 - 1 + 1 = 0
1239        assert!((dot - 0.0).abs() < 1e-9, "dot={dot}");
1240    }
1241
1242    // ── Bundle ────────────────────────────────────────────────────────────────
1243
1244    #[test]
1245    fn test_bundle_binary_identical_returns_same() {
1246        let hv = BinaryHv::random_seeded(DIM, 7);
1247        let refs: Vec<&BinaryHv> = vec![&hv, &hv, &hv];
1248        let result = bundle_binary(&refs);
1249        // Majority of identical = original
1250        assert_eq!(result.data, hv.data);
1251    }
1252
1253    #[test]
1254    fn test_bundle_bipolar_identical_returns_same() {
1255        let hv = BipolarHv::random_seeded(DIM, 8);
1256        let refs: Vec<&BipolarHv> = vec![&hv, &hv, &hv];
1257        let result = bundle_bipolar(&refs);
1258        assert_eq!(result.data, hv.data);
1259    }
1260
1261    #[test]
1262    fn test_bundle_bipolar_two_different_is_similar_to_both() {
1263        let a = BipolarHv::random_seeded(LARGE_DIM, 9);
1264        let b = BipolarHv::random_seeded(LARGE_DIM, 10);
1265        let bundled = bundle_bipolar(&[&a, &b]);
1266        let cos_a = bundled.cosine_similarity(&a);
1267        let cos_b = bundled.cosine_similarity(&b);
1268        // Bundle should be roughly equally similar to both (>0).
1269        assert!(cos_a > 0.0, "cos_a={cos_a}");
1270        assert!(cos_b > 0.0, "cos_b={cos_b}");
1271    }
1272
1273    // ── Bind / Unbind ─────────────────────────────────────────────────────────
1274
1275    #[test]
1276    fn test_bind_binary_self_inverse() {
1277        let a = BinaryHv::random_seeded(DIM, 11);
1278        let b = BinaryHv::random_seeded(DIM, 12);
1279        let bound = bind_binary(&a, &b);
1280        let recovered = unbind_binary(&bound, &b);
1281        assert_eq!(recovered.data, a.data);
1282    }
1283
1284    #[test]
1285    fn test_bind_bipolar_self_inverse() {
1286        let a = BipolarHv::random_seeded(DIM, 13);
1287        let b = BipolarHv::random_seeded(DIM, 14);
1288        let bound = bind_bipolar(&a, &b);
1289        let recovered = unbind_bipolar(&bound, &b);
1290        // After bind + unbind, recovered should be identical to a.
1291        let cos = recovered.cosine_similarity(&a);
1292        assert!((cos - 1.0).abs() < 1e-9, "cos={cos}");
1293    }
1294
1295    #[test]
1296    fn test_bind_bipolar_dissimilar_to_inputs() {
1297        let a = BipolarHv::random_seeded(LARGE_DIM, 15);
1298        let b = BipolarHv::random_seeded(LARGE_DIM, 16);
1299        let bound = bind_bipolar(&a, &b);
1300        let cos_a = bound.cosine_similarity(&a);
1301        let cos_b = bound.cosine_similarity(&b);
1302        // Bound should be nearly orthogonal to both inputs.
1303        assert!(cos_a.abs() < 0.2, "cos_a={cos_a}");
1304        assert!(cos_b.abs() < 0.2, "cos_b={cos_b}");
1305    }
1306
1307    // ── Permute ───────────────────────────────────────────────────────────────
1308
1309    #[test]
1310    fn test_permute_binary_zero_is_identity() {
1311        let hv = BinaryHv::random_seeded(DIM, 17);
1312        let perm = permute_binary(&hv, 0);
1313        assert_eq!(perm.data, hv.data);
1314    }
1315
1316    #[test]
1317    fn test_permute_bipolar_zero_is_identity() {
1318        let hv = BipolarHv::random_seeded(DIM, 18);
1319        let perm = permute_bipolar(&hv, 0);
1320        assert_eq!(perm.data, hv.data);
1321    }
1322
1323    #[test]
1324    fn test_permute_then_reverse_bipolar_is_identity() {
1325        let hv = BipolarHv::random_seeded(DIM, 19);
1326        let k = 137_i64;
1327        let perm = permute_bipolar(&hv, k);
1328        let back = permute_bipolar(&perm, -k);
1329        assert_eq!(back.data, hv.data);
1330    }
1331
1332    #[test]
1333    fn test_permute_then_reverse_binary_is_identity() {
1334        let hv = BinaryHv::random_seeded(DIM, 20);
1335        let k = 73_i64;
1336        let perm = permute_binary(&hv, k);
1337        let back = permute_binary(&perm, -k);
1338        assert_eq!(back.data, hv.data);
1339    }
1340
1341    #[test]
1342    fn test_permute_bipolar_nonzero_dissimilar() {
1343        let hv = BipolarHv::random_seeded(LARGE_DIM, 21);
1344        let perm = permute_bipolar(&hv, 1);
1345        let cos = hv.cosine_similarity(&perm);
1346        // A shift of 1 in 5000 dims → ~0 expected cosine.
1347        assert!(cos.abs() < 0.2, "cos={cos}");
1348    }
1349
1350    // ── ItemMemory ────────────────────────────────────────────────────────────
1351
1352    #[test]
1353    fn test_item_memory_lookup_finds_correct_label() {
1354        let mut mem = ItemMemory::new(DIM);
1355        let hv_a = mem.add_random("alpha");
1356        let _hv_b = mem.add_random("beta");
1357        let _hv_c = mem.add_random("gamma");
1358        let result = mem.lookup(&hv_a);
1359        assert!(result.is_some());
1360        let (label, _score) = result.expect("lookup must succeed");
1361        assert_eq!(label, "alpha");
1362    }
1363
1364    #[test]
1365    fn test_item_memory_get() {
1366        let mut mem = ItemMemory::new(DIM);
1367        mem.add_random("x");
1368        assert!(mem.get("x").is_some());
1369        assert!(mem.get("missing").is_none());
1370    }
1371
1372    #[test]
1373    fn test_item_memory_empty_lookup_returns_none() {
1374        let mem = ItemMemory::new(DIM);
1375        let query = BipolarHv::random_seeded(DIM, 22);
1376        assert!(mem.lookup(&query).is_none());
1377    }
1378
1379    // ── LevelEncoder ──────────────────────────────────────────────────────────
1380
1381    #[test]
1382    fn test_level_encoder_extreme_values_map_to_first_last_level() {
1383        let enc = LevelEncoder::new(10, DIM, 0.0, 1.0);
1384        let first = enc.encode(0.0);
1385        let last = enc.encode(1.0);
1386        // Should be the first and last level HVs.
1387        assert_eq!(first.data, enc.levels[0].data);
1388        assert_eq!(last.data, enc.levels[9].data);
1389    }
1390
1391    #[test]
1392    fn test_level_encoder_single_level() {
1393        let enc = LevelEncoder::new(1, DIM, -1.0, 1.0);
1394        let hv = enc.encode(0.0);
1395        assert_eq!(hv.data, enc.levels[0].data);
1396    }
1397
1398    #[test]
1399    fn test_level_encoder_monotone_similarity() {
1400        // Consecutive level HVs should be more similar than non-consecutive.
1401        let enc = LevelEncoder::new(20, DIM, 0.0, 1.0);
1402        let cos_adj = enc.levels[0].cosine_similarity(&enc.levels[1]);
1403        let cos_far = enc.levels[0].cosine_similarity(&enc.levels[10]);
1404        assert!(cos_adj > cos_far, "adj={cos_adj}, far={cos_far}");
1405    }
1406
1407    // ── ThermometerEncoder ────────────────────────────────────────────────────
1408
1409    #[test]
1410    fn test_thermometer_encoding_level_zero_near_base() {
1411        let enc = ThermometerEncoder::new(10, DIM);
1412        let l0 = enc.encode_level(0);
1413        // Level 0 should be the base_hv itself (no bits flipped).
1414        assert_eq!(l0.data, enc.base_hv.data);
1415    }
1416
1417    #[test]
1418    fn test_thermometer_encoding_monotone() {
1419        let enc = ThermometerEncoder::new(10, LARGE_DIM);
1420        // Higher levels should have lower similarity to level 0 (more bits differ).
1421        let cos_1 = enc.encode_level(0).cosine_similarity(&enc.encode_level(1));
1422        let cos_5 = enc.encode_level(0).cosine_similarity(&enc.encode_level(5));
1423        let cos_9 = enc.encode_level(0).cosine_similarity(&enc.encode_level(9));
1424        assert!(cos_1 > cos_5, "cos_1={cos_1}, cos_5={cos_5}");
1425        assert!(cos_5 > cos_9, "cos_5={cos_5}, cos_9={cos_9}");
1426    }
1427
1428    #[test]
1429    fn test_thermometer_different_levels_distinct() {
1430        let enc = ThermometerEncoder::new(5, DIM);
1431        let l0 = enc.encode_level(0);
1432        let l4 = enc.encode_level(4);
1433        // Should not be identical.
1434        assert_ne!(l0.data, l4.data);
1435    }
1436
1437    // ── IdEncoder ─────────────────────────────────────────────────────────────
1438
1439    #[test]
1440    fn test_id_encoder_same_id_returns_same_hv() {
1441        let mut enc = IdEncoder::new(DIM);
1442        let hv1 = enc.get_or_create(42);
1443        let hv2 = enc.get_or_create(42);
1444        assert_eq!(hv1.data, hv2.data);
1445    }
1446
1447    #[test]
1448    fn test_id_encoder_different_ids_orthogonal() {
1449        let mut enc = IdEncoder::new(LARGE_DIM);
1450        let hv0 = enc.get_or_create(0);
1451        let hv1 = enc.get_or_create(1);
1452        let cos = hv0.cosine_similarity(&hv1);
1453        assert!(cos.abs() < 0.2, "cos={cos}");
1454    }
1455
1456    // ── HdClassifier ──────────────────────────────────────────────────────────
1457
1458    #[test]
1459    fn test_classifier_learns_two_classes() {
1460        let mut clf = HdClassifier::new(LARGE_DIM);
1461        // Create two random class prototype HVs.
1462        let hv_a = BipolarHv::random_seeded(LARGE_DIM, 30);
1463        let hv_b = BipolarHv::random_seeded(LARGE_DIM, 31);
1464        // Train several samples per class.
1465        for _ in 0..5 {
1466            clf.train_one(&hv_a, "A");
1467            clf.train_one(&hv_b, "B");
1468        }
1469        assert_eq!(clf.predict(&hv_a).as_deref(), Some("A"));
1470        assert_eq!(clf.predict(&hv_b).as_deref(), Some("B"));
1471    }
1472
1473    #[test]
1474    fn test_classifier_predict_empty_returns_none() {
1475        let clf = HdClassifier::new(DIM);
1476        let query = BipolarHv::random_seeded(DIM, 32);
1477        assert!(clf.predict(&query).is_none());
1478    }
1479
1480    #[test]
1481    fn test_classifier_retrain_improves_score() {
1482        let mut clf = HdClassifier::new(LARGE_DIM);
1483        let hv_a = BipolarHv::random_seeded(LARGE_DIM, 33);
1484        let hv_b = BipolarHv::random_seeded(LARGE_DIM, 34);
1485        clf.train_one(&hv_a, "A");
1486        clf.train_one(&hv_b, "B");
1487        // Force a "misclassification" scenario: retrain hv_a from "B" to "A".
1488        clf.retrain_wrong(&hv_a, "B", "A");
1489        // After retraining, should still correctly identify A.
1490        let pred = clf.predict(&hv_a);
1491        assert_eq!(pred.as_deref(), Some("A"));
1492    }
1493
1494    #[test]
1495    fn test_classifier_predict_with_score_returns_valid_cosine() {
1496        let mut clf = HdClassifier::new(DIM);
1497        let hv = BipolarHv::random_seeded(DIM, 35);
1498        clf.train_one(&hv, "X");
1499        let (_, score) = clf.predict_with_score(&hv).expect("should have result");
1500        assert!(score > 0.0 && score <= 1.0 + 1e-9, "score={score}");
1501    }
1502
1503    // ── SparseSdm ─────────────────────────────────────────────────────────────
1504
1505    #[test]
1506    fn test_sdm_write_read_roundtrip() {
1507        // Use a generous hamming_threshold (45 out of 100 bits) so many hard
1508        // locations are activated, giving robust read-back signal.
1509        let config = SdmConfig {
1510            address_dim: 100,
1511            data_dim: 50,
1512            n_hard_locations: 500,
1513            hamming_threshold: 45,
1514        };
1515        let mut sdm = SparseSdm::new(config);
1516        // Build a simple address and data.
1517        let address: Vec<bool> = (0..100).map(|i| i % 2 == 0).collect();
1518        let data: Vec<bool> = (0..50).map(|i| i % 3 == 0).collect();
1519        // Write multiple times to reinforce.
1520        for _ in 0..15 {
1521            sdm.write(&address, &data);
1522        }
1523        let recovered = sdm.read(&address);
1524        // Count agreements.
1525        let agreement = recovered
1526            .iter()
1527            .zip(data.iter())
1528            .filter(|(r, d)| r == d)
1529            .count();
1530        let ratio = agreement as f64 / 50.0;
1531        assert!(ratio > 0.8, "agreement={ratio}");
1532    }
1533
1534    #[test]
1535    fn test_sdm_activated_locations_non_empty() {
1536        let config = SdmConfig {
1537            address_dim: 50,
1538            data_dim: 10,
1539            n_hard_locations: 500,
1540            hamming_threshold: 20,
1541        };
1542        let sdm = SparseSdm::new(config);
1543        let address: Vec<bool> = vec![false; 50];
1544        let activated = sdm.activated_locations(&address);
1545        // With threshold=20 out of 50 bits, expect many activations.
1546        assert!(!activated.is_empty(), "no locations activated");
1547    }
1548
1549    #[test]
1550    fn test_sdm_empty_write_does_not_panic() {
1551        let config = SdmConfig {
1552            address_dim: 20,
1553            data_dim: 10,
1554            n_hard_locations: 100,
1555            hamming_threshold: 5,
1556        };
1557        let mut sdm = SparseSdm::new(config);
1558        let addr: Vec<bool> = vec![true; 20];
1559        let data: Vec<bool> = vec![false; 10];
1560        // Should not panic.
1561        sdm.write(&addr, &data);
1562    }
1563
1564    // ── OnlineHdc ─────────────────────────────────────────────────────────────
1565
1566    #[test]
1567    fn test_online_hdc_same_sample_encodes_similarly() {
1568        let mut hdc = OnlineHdc::new(4, 10, LARGE_DIM);
1569        let sample = vec![0.1, -0.3, 0.7, -0.9];
1570        let hv1 = hdc.encode_sample(&sample);
1571        let hv2 = hdc.encode_sample(&sample);
1572        let cos = hv1.cosine_similarity(&hv2);
1573        assert!((cos - 1.0).abs() < 1e-9, "cos={cos}");
1574    }
1575
1576    #[test]
1577    fn test_online_hdc_train_predict_binary_classes() {
1578        let mut hdc = OnlineHdc::new(5, 20, LARGE_DIM);
1579        // Class A: positive features; Class B: negative features.
1580        for _ in 0..20 {
1581            hdc.train(&[0.9, 0.8, 0.7, 0.85, 0.75], "A");
1582            hdc.train(&[-0.9, -0.8, -0.7, -0.85, -0.75], "B");
1583        }
1584        let pred_a = hdc.predict(&[0.9, 0.8, 0.7, 0.85, 0.75]);
1585        let pred_b = hdc.predict(&[-0.9, -0.8, -0.7, -0.85, -0.75]);
1586        assert_eq!(pred_a.as_deref(), Some("A"), "pred_a={pred_a:?}");
1587        assert_eq!(pred_b.as_deref(), Some("B"), "pred_b={pred_b:?}");
1588    }
1589
1590    #[test]
1591    fn test_online_hdc_accuracy_on_trivial_dataset() {
1592        let mut hdc = OnlineHdc::new(3, 10, LARGE_DIM);
1593        let samples = vec![
1594            (vec![1.0, 0.9, 0.8], "pos".to_string()),
1595            (vec![-1.0, -0.9, -0.8], "neg".to_string()),
1596        ];
1597        for (f, l) in &samples {
1598            for _ in 0..30 {
1599                hdc.train(f, l);
1600            }
1601        }
1602        let acc = hdc.accuracy(&samples);
1603        assert!(acc > 0.5, "acc={acc}");
1604    }
1605
1606    // ── HdSequenceEncoder ─────────────────────────────────────────────────────
1607
1608    #[test]
1609    fn test_sequence_encoder_order_sensitive() {
1610        let mut enc = HdSequenceEncoder::new(LARGE_DIM);
1611        let ab = enc.encode_sequence(&["a", "b"]);
1612        let ba = enc.encode_sequence(&["b", "a"]);
1613        let cos = ab.cosine_similarity(&ba);
1614        // Different order → different HV (low cosine).
1615        assert!(cos < 0.8, "cos={cos}");
1616    }
1617
1618    #[test]
1619    fn test_sequence_encoder_same_sequence_same_hv() {
1620        let mut enc = HdSequenceEncoder::new(LARGE_DIM);
1621        let hv1 = enc.encode_sequence(&["x", "y", "z"]);
1622        let hv2 = enc.encode_sequence(&["x", "y", "z"]);
1623        let cos = hv1.cosine_similarity(&hv2);
1624        assert!((cos - 1.0).abs() < 1e-9, "cos={cos}");
1625    }
1626
1627    #[test]
1628    fn test_sequence_encoder_query_position_detects_presence() {
1629        let mut enc = HdSequenceEncoder::new(LARGE_DIM);
1630        let seq_hv = enc.encode_sequence(&["cat", "sat", "mat"]);
1631        // Query "cat" at position 0 (first in "cat sat mat").
1632        let score = enc.query_position(&seq_hv, "cat", 0, 3);
1633        // Should have positive similarity.
1634        assert!(score > 0.0, "score={score}");
1635    }
1636
1637    #[test]
1638    fn test_sequence_encoder_empty_sequence_no_panic() {
1639        let mut enc = HdSequenceEncoder::new(DIM);
1640        let _hv = enc.encode_sequence(&[]);
1641    }
1642
1643    // ── HD Metrics ────────────────────────────────────────────────────────────
1644
1645    #[test]
1646    fn test_orthogonality_test_random_hvs_near_zero() {
1647        let hvs: Vec<BipolarHv> = (0..10)
1648            .map(|i| BipolarHv::random_seeded(LARGE_DIM, i as u64 + 50))
1649            .collect();
1650        let mean_cos = orthogonality_test(&hvs);
1651        // 10 random HVs of dim 5000 should be close to orthogonal.
1652        assert!(mean_cos.abs() < 0.1, "mean_cos={mean_cos}");
1653    }
1654
1655    #[test]
1656    fn test_orthogonality_test_single_hv_returns_zero() {
1657        let hv = BipolarHv::random_seeded(DIM, 60);
1658        let result = orthogonality_test(&[hv]);
1659        assert!((result - 0.0).abs() < 1e-9);
1660    }
1661
1662    #[test]
1663    fn test_compute_hdc_stats_separation_positive() {
1664        // Construct two very different class HVs.
1665        let mut class_hvs = HashMap::new();
1666        class_hvs.insert("A".to_string(), BipolarHv::random_seeded(LARGE_DIM, 70));
1667        class_hvs.insert("B".to_string(), BipolarHv::random_seeded(LARGE_DIM, 71));
1668        let stats = compute_hdc_stats(&class_hvs);
1669        // With random orthogonal HVs, mean_same=1, mean_diff≈0 → separation≈1.
1670        assert!(stats.separation > 0.5, "sep={}", stats.separation);
1671    }
1672
1673    #[test]
1674    fn test_compute_hdc_stats_empty_returns_zeros() {
1675        let class_hvs: HashMap<String, BipolarHv> = HashMap::new();
1676        let stats = compute_hdc_stats(&class_hvs);
1677        assert!((stats.separation - 0.0).abs() < 1e-9);
1678    }
1679
1680    // ── Miscellaneous / Integration ────────────────────────────────────────────
1681
1682    #[test]
1683    fn test_hd_dim_constant() {
1684        assert_eq!(HD_DIM, 10_000);
1685    }
1686
1687    #[test]
1688    fn test_real_hv_cosine_identical() {
1689        let hv = RealHv::random(DIM);
1690        assert!((hv.cosine_similarity(&hv) - 1.0).abs() < 1e-9);
1691    }
1692
1693    #[test]
1694    fn test_binary_to_bipolar_conversion_via_to_binary() {
1695        let bip = BipolarHv::random_seeded(DIM, 80);
1696        let bin = bip.to_binary();
1697        // +1 → true, -1 → false.
1698        for (bip_v, bin_v) in bip.data.iter().zip(bin.data.iter()) {
1699            let expected = *bip_v > 0.0;
1700            assert_eq!(*bin_v, expected);
1701        }
1702    }
1703
1704    #[test]
1705    fn test_hdc_error_display() {
1706        let e = HdcError::DimensionMismatch {
1707            expected: 100,
1708            got: 200,
1709        };
1710        let s = format!("{e}");
1711        assert!(s.contains("100") && s.contains("200"), "msg={s}");
1712    }
1713
1714    #[test]
1715    fn test_bundle_binary_empty_returns_empty() {
1716        let result = bundle_binary(&[]);
1717        assert_eq!(result.dim(), 0);
1718    }
1719
1720    #[test]
1721    fn test_bundle_bipolar_empty_returns_empty() {
1722        let result = bundle_bipolar(&[]);
1723        assert_eq!(result.dim(), 0);
1724    }
1725
1726    #[test]
1727    fn test_permute_binary_full_cycle_is_identity() {
1728        let hv = BinaryHv::random_seeded(DIM, 90);
1729        let cycled = permute_binary(&hv, DIM as i64);
1730        assert_eq!(cycled.data, hv.data);
1731    }
1732
1733    #[test]
1734    fn test_permute_bipolar_full_cycle_is_identity() {
1735        let hv = BipolarHv::random_seeded(DIM, 91);
1736        let cycled = permute_bipolar(&hv, DIM as i64);
1737        assert_eq!(cycled.data, hv.data);
1738    }
1739
1740    #[test]
1741    fn test_item_memory_add_and_lookup_multiple() {
1742        let mut mem = ItemMemory::new(LARGE_DIM);
1743        let labels = ["red", "green", "blue", "yellow", "purple"];
1744        let mut hvs: Vec<BipolarHv> = Vec::new();
1745        for label in &labels {
1746            hvs.push(mem.add_random(label));
1747        }
1748        for (hv, &label) in hvs.iter().zip(labels.iter()) {
1749            let (found, _) = mem.lookup(hv).expect("must find");
1750            assert_eq!(found, label, "expected {label} got {found}");
1751        }
1752    }
1753
1754    #[test]
1755    fn test_level_encoder_clamping() {
1756        let enc = LevelEncoder::new(5, DIM, 0.0, 1.0);
1757        // Values outside range should clamp to boundary levels.
1758        let below = enc.encode(-5.0);
1759        let above = enc.encode(5.0);
1760        assert_eq!(below.data, enc.levels[0].data);
1761        assert_eq!(above.data, enc.levels[4].data);
1762    }
1763
1764    #[test]
1765    fn test_online_hdc_different_samples_differ() {
1766        let mut hdc = OnlineHdc::new(3, 10, LARGE_DIM);
1767        let hv_a = hdc.encode_sample(&[1.0, 1.0, 1.0]);
1768        let hv_b = hdc.encode_sample(&[-1.0, -1.0, -1.0]);
1769        let cos = hv_a.cosine_similarity(&hv_b);
1770        // Opposite features should produce dissimilar HVs.
1771        assert!(cos < 0.5, "cos={cos}");
1772    }
1773}
tenflowers_neural/hyperdimensional.rs

tenflowers_neural/
hyperdimensional.rs