1use crate::data::Integer;
2use crate::{
3 constants::DESCRIBE_MAX,
4 data::Numeric,
5 prelude::*,
6 table::general::{Bucket, HashTables},
7 utils::{all_eq, increase_capacity},
8};
9use fnv::{FnvHashMap as HashMap, FnvHashSet};
10use serde::{Deserialize, Serialize};
11use std::iter::FromIterator;
12
13#[derive(Debug, Deserialize, Serialize)]
16pub struct VecStore<N> {
17 pub map: Vec<Vec<N>>,
18}
19
20impl<N: Numeric> VecStore<N> {
21 fn push(&mut self, d: Vec<N>) -> u32 {
22 self.map.push(d);
23 (self.map.len() - 1) as u32
24 }
25
26 fn position(&self, d: &[N]) -> Option<u32> {
27 self.map.iter().position(|x| all_eq(x, d)).map(|x| x as u32)
28 }
29
30 fn get(&self, idx: u32) -> &Vec<N> {
31 &self.map[idx as usize]
32 }
33
34 fn increase_storage(&mut self, size: usize) {
35 increase_capacity(size, &mut self.map);
36 }
37}
38
39#[derive(Deserialize, Serialize)]
41pub struct MemoryTable<N, K>
42where
43 N: Numeric,
44 K: Integer,
45{
46 hash_tables: Vec<HashMap<Vec<K>, Bucket>>,
47 n_hash_tables: usize,
48 pub vec_store: VecStore<N>,
49 only_index_storage: bool,
50 counter: u32,
51}
52
53impl<N, K> MemoryTable<N, K>
54where
55 N: Numeric,
56 K: Integer,
57{
58 fn remove_idx(&mut self, idx: u32, hash: &[K], hash_table: usize) -> Result<()> {
59 let tbl = &mut self.hash_tables[hash_table];
60 let bucket = tbl.get_mut(hash);
61 match bucket {
62 None => return Err(Error::NotFound),
63 Some(bucket) => {
64 bucket.remove(&idx);
65 Ok(())
66 }
67 }
68 }
69 fn insert_idx(&mut self, idx: u32, hash: Vec<K>, hash_table: usize) {
70 debug_assert!(hash_table < self.n_hash_tables);
71 let tbl = unsafe { self.hash_tables.get_unchecked_mut(hash_table) };
72 let bucket = tbl.entry(hash).or_insert_with(|| FnvHashSet::default());
73 bucket.insert(idx);
74 }
75}
76
77impl<N, K> HashTables<N, K> for MemoryTable<N, K>
78where
79 N: Numeric,
80 K: Integer,
81{
82 fn new(n_hash_tables: usize, only_index_storage: bool, _: &str) -> Result<Box<Self>> {
83 let hash_tables = vec![HashMap::default(); n_hash_tables];
87 let vector_store = VecStore { map: vec![] };
88 let m = MemoryTable {
89 hash_tables,
90 n_hash_tables,
91 vec_store: vector_store,
92 only_index_storage,
93 counter: 0,
94 };
95 Ok(Box::new(m))
96 }
97
98 fn put(&mut self, hash: Vec<K>, d: &[N], hash_table: usize) -> Result<u32> {
99 let idx = self.counter;
101 self.insert_idx(idx, hash, hash_table);
102
103 if (hash_table == 0) && (!self.only_index_storage) {
107 self.vec_store.push(d.to_vec());
108 } else if hash_table == self.n_hash_tables - 1 {
109 self.counter += 1
110 }
111 Ok(idx)
112 }
113
114 fn delete(&mut self, hash: &[K], d: &[N], hash_table: usize) -> Result<()> {
116 let idx = match self.vec_store.position(d) {
118 None => return Ok(()),
119 Some(idx) => idx,
120 };
121 self.remove_idx(idx, &hash, hash_table)
124 }
125
126 fn update_by_idx(
127 &mut self,
128 old_hash: &[K],
129 new_hash: Vec<K>,
130 idx: u32,
131 hash_table: usize,
132 ) -> Result<()> {
133 self.remove_idx(idx, old_hash, hash_table)?;
134 self.insert_idx(idx, new_hash, hash_table);
135 Ok(())
136 }
137
138 fn query_bucket(&self, hash: &[K], hash_table: usize) -> Result<Bucket> {
140 let tbl = &self.hash_tables[hash_table];
141 match tbl.get(hash) {
142 None => Err(Error::NotFound),
143 Some(bucket) => Ok(bucket.clone()),
144 }
145 }
146
147 fn idx_to_datapoint(&self, idx: u32) -> Result<&Vec<N>> {
148 Ok(self.vec_store.get(idx))
149 }
150
151 fn increase_storage(&mut self, size: usize) {
152 increase_capacity(size, &mut self.hash_tables);
153 self.vec_store.increase_storage(size);
154 }
155
156 fn describe(&self) -> Result<String> {
157 let mut lengths = vec![];
158 let mut max_len = 0;
159 let mut min_len = 1000000;
160 let mut set: FnvHashSet<i32> = FnvHashSet::default();
161 for map in self.hash_tables.iter() {
163 for ((k, v), _) in map.iter().zip(0..DESCRIBE_MAX) {
166 let len = v.len();
167 let hash_values: FnvHashSet<i32> =
168 FnvHashSet::from_iter(k.iter().map(|&k| k.to_i32().unwrap()));
169 set = set.union(&hash_values).copied().collect();
170 lengths.push(len);
171 if len > max_len {
172 max_len = len
173 }
174 if len < min_len {
175 min_len = len
176 }
177 }
178 }
179
180 let avg = lengths.iter().sum::<usize>() as f32 / lengths.len() as f32;
181 let var = lengths
182 .iter()
183 .map(|&v| (avg - v as f32).powf(2.))
184 .sum::<f32>()
185 / lengths.len() as f32;
186 let std_dev = var.powf(0.5);
187
188 let mut out = String::from(&format!("No. of tables: {}\n", self.n_hash_tables));
189 out.push_str(&format!("Unique hash values:\n{:?}\n", set));
190 out.push_str("\nHash collisions:\n");
191 out.push_str(&format!("avg:\t{:?}\n", avg));
192 out.push_str(&format!("std-dev:\t{:?}\n", std_dev));
193 out.push_str(&format!("min:\t{:?}\n", min_len));
194 out.push_str(&format!("max:\t{:?}\n", max_len));
195
196 Ok(out)
197 }
198
199 fn get_unique_hash_int(&self) -> FnvHashSet<i32> {
200 let mut hash_numbers = FnvHashSet::default();
201
202 for ht in &self.hash_tables {
203 for ((hash, _), _i) in ht.iter().zip(0..100) {
204 for &v in hash {
205 hash_numbers.insert(v.to_i32().unwrap());
206 }
207 }
208 }
209 hash_numbers
210 }
211}
212
213impl<N, K> std::fmt::Debug for MemoryTable<N, K>
214where
215 N: Numeric,
216 K: Integer,
217{
218 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
219 write!(f, "hash_tables:\nhash, \t buckets\n")?;
220 for ht in self.hash_tables.iter() {
221 write!(f, "{:?}\n", ht)?;
222 }
223 Ok(())
224 }
225}