1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
use std::collections::HashMap;
use rayon::prelude::*;
use rand::{Rng};
use bit_vec::{BitVec};
use crate::vector::{ dot, random_unit_vector };
pub struct HyperIndex<K:Send> {
planes: Vec<Vec<f32>>,
groups: HashMap<BitVec, Vec<K>>,
dims: usize
}
impl<K:Send> HyperIndex<K> {
pub fn new<R : Rng + Sized>(dimension: usize, hyperplane_count: u8, mut rng: &mut R) -> HyperIndex<K>
{
let mut planes = Vec::<Vec<f32>>::with_capacity(hyperplane_count as usize);
for _ in 0..hyperplane_count {
planes.push(random_unit_vector(dimension, &mut rng));
}
return HyperIndex {
planes: planes,
groups: HashMap::new(),
dims: dimension
}
}
pub fn dimensions(&self) -> usize {
return self.dims;
}
pub fn groups_len(&self) -> usize {
return self.groups.len();
}
pub fn planes_len(&self) -> usize {
return self.planes.len();
}
pub fn key(&self, vector: &Vec<f32>) -> BitVec {
let mut key = BitVec::with_capacity(self.planes.len());
let bits:Vec<bool> = self.planes.par_iter().map(|plane| {
let d = dot(plane, vector);
return d > 0f32;
}).collect();
for bit in bits.iter() {
key.push(*bit);
}
return key;
}
pub fn add(&mut self, key: K, vector: &Vec<f32>) {
let bits = self.key(&vector);
self.groups
.entry(bits)
.or_insert(Vec::new())
.push(key);
}
pub fn group(&self, key: &BitVec) -> Option<&Vec<K>> {
return self.groups.get(&key);
}
}
#[cfg(test)]
mod tests
{
use rand::prelude::*;
use crate::hyperindex::{ HyperIndex };
use crate::vector::{ random_unit_vector, modified_cosine_distance };
#[test]
fn new_creates_index<'a>() {
let a = HyperIndex::<usize>::new(300, 10, &mut thread_rng());
assert_eq!(300, a.dimensions());
assert_eq!(0, a.groups_len());
assert_eq!(10, a.planes_len());
}
#[test]
fn add_adds_points<'a>() {
let mut a = HyperIndex::new(300, 10, &mut thread_rng());
let v = random_unit_vector(300, &mut thread_rng());
a.add(0, &v);
}
#[test]
fn it_works<'a>() {
let mut a = HyperIndex::new(300, 10, &mut thread_rng());
let mut vectors = Vec::new();
let mut rng = thread_rng();
for key in 0..1000usize {
let v = random_unit_vector(300, &mut rng);
a.add(key, &v);
vectors.push((key, v));
}
println!("Groups:{:?}", a.groups_len());
println!();
println!("Linear results:");
let query_point = vectors[0].clone();
let mut nearest_linear: Vec<(f32, &(usize, Vec<f32>))> = vectors.iter().map(|item| (modified_cosine_distance(&item.1, &query_point.1), item)).collect();
nearest_linear.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
for i in 0..20 {
println!("idx:{:?}\t\tdist:{:?}", (nearest_linear[i].1).0, nearest_linear[i].0);
}
println!();
println!("Index results:");
let near = a.group(&a.key(&query_point.1));
if near.is_none() {
panic!();
}
let near = near.unwrap();
let mut results: Vec<(f32, &(usize, Vec<f32>))> = near.iter().map(|i| &vectors[*i]).map(|item| (modified_cosine_distance(&item.1, &query_point.1), item)).collect();
results.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
for i in 0.. results.len().min(20) {
println!("idx:{:?}\t\tdist:{:?}", (results[i].1).0, results[i].0);
}
}
}