jupiter_rs/infograph/
table.rs

1use std::collections::HashMap;
2
3use linked_hash_map::LinkedHashMap;
4
5use crate::infograph::docs::{Doc, Element, Query};
6use std::rc::Rc;
7
8struct Index {
9    index: HashMap<String, usize>,
10    num_queries: usize,
11    num_hits: usize,
12    allocated_memory: usize,
13}
14
15pub struct IndexInfo {
16    pub name: String,
17    pub num_entries: usize,
18    pub num_queries: usize,
19    pub num_hits: usize,
20    pub allocated_memory: usize,
21}
22
23pub struct Table<'a> {
24    doc: &'a Doc,
25    query_cache: LinkedHashMap<String, Rc<Query>>,
26    auto_cache: LinkedHashMap<(String, String), Element<'a>>,
27    indices: HashMap<String, Index>,
28    num_queries: usize,
29    num_index_hits: usize,
30    num_auto_cache_hits: usize,
31    num_query_cache_hits: usize,
32    num_table_scans: usize,
33}
34
35impl<'a> Table<'a> {
36    pub fn new(doc: &'a Doc) -> Self {
37        Table {
38            doc,
39            query_cache: LinkedHashMap::with_capacity(128),
40            auto_cache: LinkedHashMap::with_capacity(1024),
41            indices: HashMap::with_capacity(3),
42            num_queries: 0,
43            num_index_hits: 0,
44            num_auto_cache_hits: 0,
45            num_query_cache_hits: 0,
46            num_table_scans: 0,
47        }
48    }
49
50    pub fn add_index(&mut self, query: impl AsRef<str>) {
51        let compiled_query = self.doc.compile(query.as_ref());
52        let mut index = Index {
53            index: HashMap::with_capacity(self.len()),
54            num_queries: 0,
55            num_hits: 0,
56            allocated_memory: 0,
57        };
58
59        index.allocated_memory += index.index.capacity() * 11 / 10
60            * (std::mem::size_of::<usize>() + std::mem::size_of::<String>());
61
62        let root = self.doc.root();
63
64        for position in 0..root.len() {
65            let element = root.at(position);
66            if let Some(key) = compiled_query.execute(element).as_str() {
67                index.allocated_memory += key.len();
68                index.index.insert(key.to_owned(), position);
69            }
70        }
71
72        self.indices.insert(query.as_ref().to_owned(), index);
73    }
74
75    pub fn query(&mut self, query: impl AsRef<str>, value: impl AsRef<str>) -> Option<Element> {
76        let query_string = query.as_ref();
77        let value_string = value.as_ref();
78
79        self.num_queries += 1;
80
81        if let Some(index) = self.indices.get_mut(query_string) {
82            Table::query_index(self.doc, index, value_string)
83        } else if let Some(result) = self
84            .auto_cache
85            .get_refresh(&(query_string.to_string(), value_string.to_string()))
86        {
87            self.num_auto_cache_hits += 1;
88            Some(result.clone())
89        } else {
90            self.num_table_scans += 1;
91            self.table_scan(query_string, value_string)
92        }
93    }
94
95    fn query_index(doc: &'a Doc, index: &mut Index, value_string: &str) -> Option<Element<'a>> {
96        index.num_queries += 1;
97
98        if let Some(position) = index.index.get(value_string) {
99            index.num_hits += 1;
100            Some(doc.root().at(*position))
101        } else {
102            None
103        }
104    }
105
106    fn compile_query(&mut self, query_string: &str) -> Rc<Query> {
107        if let Some(query) = self.query_cache.get_refresh(query_string) {
108            self.num_query_cache_hits += 1;
109
110            query.clone()
111        } else {
112            let query = Rc::new(self.doc.compile(query_string));
113            self.query_cache
114                .insert(query_string.to_owned(), query.clone());
115
116            if self.query_cache.len() == self.query_cache.capacity() {
117                self.query_cache.pop_front();
118            }
119
120            query
121        }
122    }
123
124    fn table_scan(&mut self, query_string: &str, value_string: &str) -> Option<Element> {
125        let query = self.compile_query(query_string);
126        for element in self.doc.root().iter() {
127            let child = query.execute(element);
128            if child.as_str().unwrap_or("") == value_string {
129                self.auto_cache.insert(
130                    (query_string.to_string(), value_string.to_string()),
131                    element,
132                );
133                if self.auto_cache.len() == self.auto_cache.capacity() {
134                    self.auto_cache.pop_front();
135                }
136
137                return Some(element);
138            }
139        }
140
141        None
142    }
143
144    pub fn len(&self) -> usize {
145        self.doc.root().len()
146    }
147
148    pub fn indices(&self) -> Vec<IndexInfo> {
149        self.indices
150            .iter()
151            .map(|(name, index)| IndexInfo {
152                name: name.clone(),
153                num_entries: index.index.len(),
154                num_queries: index.num_queries,
155                num_hits: index.num_hits,
156                allocated_memory: index.allocated_memory,
157            })
158            .collect()
159    }
160
161    pub fn num_queries(&self) -> usize {
162        self.num_queries
163    }
164
165    pub fn num_query_cache_hits(&self) -> usize {
166        self.num_query_cache_hits
167    }
168
169    pub fn num_query_cache_misses(&self) -> usize {
170        self.num_table_scans - self.num_query_cache_hits
171    }
172
173    pub fn query_cache_hit_ratio(&self) -> f32 {
174        if self.num_table_scans == 0 {
175            0.
176        } else {
177            100. * self.num_query_cache_hits as f32 / self.num_table_scans as f32
178        }
179    }
180
181    pub fn num_auto_cache_hits(&self) -> usize {
182        self.num_auto_cache_hits
183    }
184
185    pub fn num_auto_cache_misses(&self) -> usize {
186        self.num_queries - self.num_index_hits - self.num_auto_cache_hits
187    }
188
189    pub fn auto_cache_hit_ratio(&self) -> f32 {
190        let queries = self.num_queries - self.num_index_hits;
191        if queries == 0 {
192            0.
193        } else {
194            100. * self.num_query_cache_hits as f32 / queries as f32
195        }
196    }
197
198    pub fn num_index_hits(&self) -> usize {
199        self.num_index_hits
200    }
201
202    pub fn num_index_misses(&self) -> usize {
203        self.num_queries - self.num_index_hits
204    }
205
206    pub fn index_hit_ratio(&self) -> f32 {
207        if self.num_queries == 0 {
208            0.
209        } else {
210            100. * self.num_query_cache_hits as f32 / self.num_queries as f32
211        }
212    }
213
214    pub fn allocated_memory(&self) -> usize {
215        let allocated_index_memory: usize = self
216            .indices
217            .values()
218            .map(|index| index.allocated_memory)
219            .sum();
220        let allocated_auto_cache_content: usize = self
221            .auto_cache
222            .keys()
223            .map(|(path, key)| key.len() + path.len())
224            .sum();
225        let allocated_auto_cache_table: usize = self.auto_cache.capacity() * 11 / 10
226            * (std::mem::size_of::<(String, String)>() + std::mem::size_of::<&Element>());
227
228        self.doc.allocated_size()
229            + allocated_index_memory
230            + allocated_auto_cache_content
231            + allocated_auto_cache_table
232    }
233}