trs_dataframe/dataframe/column_store/
key_index.rs1use data_value::DataValue;
2use halfbrown::HashMap;
3use ndarray::ArrayView1;
4use serde::{Deserialize, Serialize};
5
6use crate::{error::Error, Key};
7
8#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)]
14pub struct KeyIndex {
15 pub keys: Vec<Key>,
16 indexes: HashMap<String, usize>,
17 pub alias: HashMap<String, String>,
18}
19
20impl KeyIndex {
21 pub fn new(keys: Vec<Key>) -> Self {
22 let mut indexes = HashMap::with_capacity(keys.len());
23 let mut removed = 0;
24 let mut actual_keys = Vec::with_capacity(keys.len());
25 for (idx, key) in keys.into_iter().enumerate() {
26 if indexes.contains_key(key.name()) {
27 removed += 1;
28 } else {
29 indexes.insert(key.name().to_string(), idx.saturating_sub(removed));
30 actual_keys.push(key)
31 }
32 }
33 Self {
34 keys: actual_keys,
35 indexes,
36 alias: HashMap::new(),
37 }
38 }
39
40 pub fn len(&self) -> usize {
41 self.keys.len()
42 }
43 pub fn is_empty(&self) -> bool {
44 self.keys.is_empty()
45 }
46
47 pub fn get_column_index(&self, key: &Key) -> Option<usize> {
48 self.get_column_index_by_name(key.name())
49 }
50
51 pub fn get_column_index_by_name(&self, key: &str) -> Option<usize> {
52 if let Some(f) = self.indexes.get(key) {
53 Some(*f)
54 } else {
55 self.alias
56 .get(key)
57 .and_then(|alias| self.indexes.get(alias).copied())
58 }
59 }
60
61 pub fn get_keys(&self) -> &[Key] {
62 &self.keys
63 }
64
65 pub fn get_key(&self, idx: usize) -> Option<Key> {
66 self.keys.get(idx).cloned()
67 }
68 pub fn get_complement_keys(&self, keys: &[Key]) -> Vec<Key> {
69 self.keys
70 .iter()
71 .filter(|key| !keys.contains(key))
72 .cloned()
73 .collect()
74 }
75
76 pub fn select(&self, keys: &[Key]) -> KeyIndex {
77 let mut new_keys = Vec::with_capacity(keys.len());
78 let mut new_indexes = HashMap::with_capacity(keys.len());
79
80 for key in keys.iter() {
81 if let Some(idx) = self.indexes.get(key.name()) {
82 new_indexes.insert(key.name().to_string(), *idx);
83 new_keys.push(key.to_owned());
84 } else if let Some(alias_key) = self.alias.get(key.name()) {
85 if let Some(idx) = self.indexes.get(alias_key) {
86 new_indexes.insert(key.name().to_string(), *idx);
87 new_keys.push(key.to_owned());
88 }
89 }
90 }
91 Self {
92 keys: new_keys,
93 indexes: new_indexes,
94 alias: HashMap::new(),
95 }
96 }
97
98 pub fn indexes(&self) -> Vec<usize> {
99 self.indexes.values().copied().collect()
100 }
101
102 pub fn store_key(&mut self, key: Key) {
103 if self.indexes.contains_key(key.name()) {
104 return;
105 }
106 self.keys.push(key.clone());
107 self.indexes
108 .insert(key.name().to_string(), self.keys.len() - 1);
109 }
110
111 pub fn remove_key(&mut self, key: &Key) -> Option<(Key, usize)> {
112 let idx = self.indexes.remove(key.name())?;
113 let current = self.keys.remove(idx);
114 Some((current, idx))
115 }
116
117 pub fn get_as_candidate(&self, row: ArrayView1<DataValue>) -> HashMap<Key, DataValue> {
118 let mut result = HashMap::with_capacity(self.keys.len());
119 for (key, idx) in self.indexes.iter() {
120 result.insert(key.into(), row[*idx].clone());
121 }
122 result
123 }
124
125 pub fn to_vec_row(&self, candidate: HashMap<Key, DataValue>) -> Vec<DataValue> {
126 self.keys
127 .iter()
128 .map(|key| candidate.get(key).cloned().unwrap_or_default())
129 .collect()
130 }
131
132 pub fn check_order_of_indexes(&self, other: &Self) -> Result<(), Error> {
133 for (self_key, other_key) in self.keys.iter().zip(other.keys.iter()) {
134 if self_key != other_key {
135 return Err(Error::IndexOutOfOrder(
136 self.keys.clone(),
137 other.keys.clone(),
138 ));
139 }
140 }
141 Ok(())
142 }
143
144 pub fn rename_key(&mut self, key: &str, new_key: Key) -> Result<(), Error> {
145 if let Some(idx) = self.indexes.remove(key) {
146 self.indexes.insert(new_key.to_string(), idx);
147 self.keys[idx] = new_key;
148 Ok(())
149 } else {
150 Err(Error::NotFound(key.into()))
151 }
152 }
153
154 pub fn add_alias(&mut self, key: &str, alias: &str) -> Result<(), Error> {
155 if !self.indexes.contains_key(key) {
156 return Err(Error::NotFound(key.into()));
157 }
158 self.alias.insert(alias.to_string(), key.to_string());
159 Ok(())
160 }
161}
162
163impl From<Vec<Key>> for KeyIndex {
164 fn from(keys: Vec<Key>) -> Self {
165 Self::new(keys)
166 }
167}
168
169#[cfg(test)]
170mod test {
171 use crate::DataType;
172
173 use super::*;
174 use rstest::*;
175
176 #[rstest]
177 fn test_alias() {
178 let key = Key::new("a", DataType::U32);
179 let mut key_index = KeyIndex::new(vec![key.clone()]);
180 assert_eq!(key_index.add_alias(key.name(), "alias"), Ok(()));
181 assert!(key_index.add_alias("c", "alias").is_err());
182 assert_eq!(key_index.alias.get("alias"), Some(&key.name().to_string()));
183 assert_eq!(key_index.get_column_index(&key), Some(0));
184 assert_eq!(
185 key_index.get_column_index(&Key::new("alias", DataType::U32)),
186 Some(0)
187 );
188 }
189
190 #[rstest]
191 fn test_rename() {
192 let key = Key::new("a", DataType::U32);
193 let mut key_index = KeyIndex::new(vec![key.clone(), Key::new("b", DataType::U32)]);
194 assert_eq!(key_index.rename_key("a", "new_key".into()), Ok(()));
195 assert_eq!(
196 key_index.get_column_index(&Key::new("new_key", DataType::U32)),
197 Some(0)
198 );
199 assert!(key_index.rename_key("c", "alias".into()).is_err());
200 }
201
202 #[rstest]
203 #[case(
204 vec![Key::new("a", DataType::U32), Key::new("b", DataType::U32)],
205 vec![Key::new("a", DataType::U32), Key::new("b", DataType::U32)]
206 )]
207 fn test_key_index_new(#[case] keys: Vec<Key>, #[case] expected: Vec<Key>) {
208 let key_index = KeyIndex::new(keys);
209 assert_eq!(key_index.keys, expected);
210 assert_eq!(key_index.get_keys(), expected);
211 }
212
213 #[rstest]
214 #[case(
215 vec![Key::new("a", DataType::U32), Key::new("b", DataType::U32)],
216 vec![Key::new("a", DataType::U32), Key::new("b", DataType::U32)],
217 Ok(())
218 )]
219 #[case(
220 vec![Key::new("a", DataType::U32), Key::new("b", DataType::U32)],
221 vec![Key::new("b", DataType::U32), Key::new("a", DataType::U32)],
222 Err(Error::IndexOutOfOrder(
223 vec![Key::new("a", DataType::U32), Key::new("b", DataType::U32)],
224 vec![Key::new("b", DataType::U32), Key::new("a", DataType::U32)]
225 ))
226 )]
227 fn test_key_index_check_order_of_indexes(
228 #[case] keys: Vec<Key>,
229 #[case] other_keys: Vec<Key>,
230 #[case] expected: Result<(), Error>,
231 ) {
232 let key_index: KeyIndex = keys.into();
233 let other_key_index = KeyIndex::new(other_keys);
234 assert_eq!(key_index.check_order_of_indexes(&other_key_index), expected);
235 }
236
237 #[rstest]
238 #[case(
239 KeyIndex::new(vec![Key::new("a", DataType::U32), Key::new("b", DataType::U32)]),
240 (Key::new("a", DataType::U32), 0)
241 )]
242 #[case(
243 KeyIndex::new(vec![Key::new("a", DataType::U32), Key::new("b", DataType::U32)]),
244 (Key::new("b", DataType::U32), 1)
245 )]
246 fn test_key_index_remove_key(#[case] mut key_index: KeyIndex, #[case] expected: (Key, usize)) {
247 let key = expected.0.clone();
248 assert_eq!(key_index.remove_key(&key), Some(expected));
249 }
250}