Skip to main content

trs_dataframe/dataframe/column_store/
key_index.rs

1use data_value::DataValue;
2use halfbrown::HashMap;
3use ndarray::ArrayView1;
4use serde::{Deserialize, Serialize};
5
6use crate::{error::Error, Key};
7
8/// [`KeyIndex`] is used to store the keys for the [`super::ColumnFrame`]
9/// The keys are stored in the order they are added - the order is preserved
10/// The keys are stored in the [`Vec`] and the indexes are stored in the [`HashMap`]
11/// The indexes are used to access the data in the [`super::ColumnFrame`] by the column [`Key`]
12/// NOTE: The keys are unique - if the key is already present, it will be removed
13#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
14pub struct KeyIndex {
15    pub keys: Vec<Key>,
16    indexes: HashMap<String, usize>,
17    pub alias: HashMap<String, String>,
18}
19
20impl KeyIndex {
21    pub fn new(keys: Vec<Key>) -> Self {
22        let mut indexes = HashMap::with_capacity(keys.len());
23        let mut removed = 0;
24        let mut actual_keys = Vec::with_capacity(keys.len());
25        for (idx, key) in keys.into_iter().enumerate() {
26            if indexes.contains_key(key.name()) {
27                removed += 1;
28            } else {
29                indexes.insert(key.name().to_string(), idx.saturating_sub(removed));
30                actual_keys.push(key)
31            }
32        }
33        Self {
34            keys: actual_keys,
35            indexes,
36            alias: HashMap::new(),
37        }
38    }
39
40    pub fn len(&self) -> usize {
41        self.keys.len()
42    }
43    pub fn is_empty(&self) -> bool {
44        self.keys.is_empty()
45    }
46
47    pub fn get_column_index(&self, key: &Key) -> Option<usize> {
48        self.get_column_index_by_name(key.name())
49    }
50
51    pub fn get_column_index_by_name(&self, key: &str) -> Option<usize> {
52        if let Some(f) = self.indexes.get(key) {
53            Some(*f)
54        } else {
55            self.alias
56                .get(key)
57                .and_then(|alias| self.indexes.get(alias).copied())
58        }
59    }
60
61    pub fn get_keys(&self) -> &[Key] {
62        &self.keys
63    }
64    pub fn get_keys_mut(&mut self) -> &mut [Key] {
65        &mut self.keys
66    }
67
68    pub fn get_key(&self, idx: usize) -> Option<Key> {
69        self.keys.get(idx).cloned()
70    }
71    pub fn get_complement_keys(&self, keys: &[Key]) -> Vec<Key> {
72        self.keys
73            .iter()
74            .filter(|key| !keys.contains(key))
75            .cloned()
76            .collect()
77    }
78
79    pub fn select(&self, keys: &[Key]) -> KeyIndex {
80        let mut new_keys = Vec::with_capacity(keys.len());
81        let mut new_indexes = HashMap::with_capacity(keys.len());
82
83        for key in keys.iter() {
84            if let Some(idx) = self.indexes.get(key.name()) {
85                new_indexes.insert(key.name().to_string(), *idx);
86                new_keys.push(key.to_owned());
87            } else if let Some(alias_key) = self.alias.get(key.name()) {
88                if let Some(idx) = self.indexes.get(alias_key) {
89                    new_indexes.insert(key.name().to_string(), *idx);
90                    new_keys.push(key.to_owned());
91                }
92            }
93        }
94        Self {
95            keys: new_keys,
96            indexes: new_indexes,
97            alias: HashMap::new(),
98        }
99    }
100
101    pub fn indexes(&self) -> Vec<usize> {
102        self.indexes.values().copied().collect()
103    }
104
105    pub fn store_key(&mut self, key: Key) {
106        if self.indexes.contains_key(key.name()) {
107            return;
108        }
109        self.keys.push(key.clone());
110        self.indexes
111            .insert(key.name().to_string(), self.keys.len() - 1);
112    }
113
114    pub fn remove_key(&mut self, key: &Key) -> Option<(Key, usize)> {
115        let idx = self.indexes.remove(key.name())?;
116        let current = self.keys.remove(idx);
117
118        Some((current, idx))
119    }
120
121    pub fn get_as_candidate(&self, row: ArrayView1<DataValue>) -> HashMap<Key, DataValue> {
122        let mut result = HashMap::with_capacity(self.keys.len());
123        for (key, idx) in self.indexes.iter() {
124            result.insert(key.into(), row[*idx].clone());
125        }
126        result
127    }
128
129    pub fn to_vec_row(&self, candidate: HashMap<Key, DataValue>) -> Vec<DataValue> {
130        self.keys
131            .iter()
132            .map(|key| candidate.get(key).cloned().unwrap_or_default())
133            .collect()
134    }
135
136    pub fn check_order_of_indexes(&self, other: &Self) -> Result<(), Error> {
137        for (self_key, other_key) in self.keys.iter().zip(other.keys.iter()) {
138            if self_key != other_key {
139                return Err(Error::IndexOutOfOrder(
140                    self.keys.clone(),
141                    other.keys.clone(),
142                ));
143            }
144        }
145        Ok(())
146    }
147
148    pub fn rename_key(&mut self, key: &str, new_key: Key) -> Result<(), Error> {
149        if let Some(idx) = self.indexes.remove(key) {
150            self.indexes.insert(new_key.to_string(), idx);
151            self.keys[idx] = new_key;
152            Ok(())
153        } else {
154            Err(Error::NotFound(key.into()))
155        }
156    }
157
158    pub fn add_alias(&mut self, key: &str, alias: &str) -> Result<(), Error> {
159        if !self.indexes.contains_key(key) {
160            return Err(Error::NotFound(key.into()));
161        }
162        self.alias.insert(alias.to_string(), key.to_string());
163        Ok(())
164    }
165}
166
167impl From<Vec<Key>> for KeyIndex {
168    fn from(keys: Vec<Key>) -> Self {
169        Self::new(keys)
170    }
171}
172
173#[cfg(test)]
174mod test {
175    use crate::DataType;
176
177    use super::*;
178    use rstest::*;
179
180    #[rstest]
181    fn test_alias() {
182        let key = Key::new("a", DataType::U32);
183        let mut key_index = KeyIndex::new(vec![key.clone()]);
184        assert_eq!(key_index.add_alias(key.name(), "alias"), Ok(()));
185        assert!(key_index.add_alias("c", "alias").is_err());
186        assert_eq!(key_index.alias.get("alias"), Some(&key.name().to_string()));
187        assert_eq!(key_index.get_column_index(&key), Some(0));
188        assert_eq!(
189            key_index.get_column_index(&Key::new("alias", DataType::U32)),
190            Some(0)
191        );
192    }
193
194    #[rstest]
195    fn test_rename() {
196        let key = Key::new("a", DataType::U32);
197        let mut key_index = KeyIndex::new(vec![key.clone(), Key::new("b", DataType::U32)]);
198        assert_eq!(key_index.rename_key("a", "new_key".into()), Ok(()));
199        assert_eq!(
200            key_index.get_column_index(&Key::new("new_key", DataType::U32)),
201            Some(0)
202        );
203        assert!(key_index.rename_key("c", "alias".into()).is_err());
204    }
205
206    #[rstest]
207    #[case(
208        vec![Key::new("a", DataType::U32), Key::new("b", DataType::U32)],
209        vec![Key::new("a", DataType::U32), Key::new("b", DataType::U32)]
210    )]
211    fn test_key_index_new(#[case] keys: Vec<Key>, #[case] expected: Vec<Key>) {
212        let key_index = KeyIndex::new(keys);
213        assert_eq!(key_index.keys, expected);
214        assert_eq!(key_index.get_keys(), expected);
215    }
216
217    #[rstest]
218    #[case(
219        vec![Key::new("a", DataType::U32), Key::new("b", DataType::U32)],
220        vec![Key::new("a", DataType::U32), Key::new("b", DataType::U32)],
221        Ok(())
222    )]
223    #[case(
224        vec![Key::new("a", DataType::U32), Key::new("b", DataType::U32)],
225        vec![Key::new("b", DataType::U32), Key::new("a", DataType::U32)],
226        Err(Error::IndexOutOfOrder(
227            vec![Key::new("a", DataType::U32), Key::new("b", DataType::U32)],
228            vec![Key::new("b", DataType::U32), Key::new("a", DataType::U32)]
229        ))
230    )]
231    fn test_key_index_check_order_of_indexes(
232        #[case] keys: Vec<Key>,
233        #[case] other_keys: Vec<Key>,
234        #[case] expected: Result<(), Error>,
235    ) {
236        let key_index: KeyIndex = keys.into();
237        let other_key_index = KeyIndex::new(other_keys);
238        assert_eq!(key_index.check_order_of_indexes(&other_key_index), expected);
239    }
240
241    #[rstest]
242    #[case(
243        KeyIndex::new(vec![Key::new("a", DataType::U32), Key::new("b", DataType::U32)]),
244        (Key::new("a", DataType::U32), 0)
245    )]
246    #[case(
247        KeyIndex::new(vec![Key::new("a", DataType::U32), Key::new("b", DataType::U32)]),
248        (Key::new("b", DataType::U32), 1)
249    )]
250    fn test_key_index_remove_key(#[case] mut key_index: KeyIndex, #[case] expected: (Key, usize)) {
251        let key = expected.0.clone();
252        assert_eq!(key_index.remove_key(&key), Some(expected));
253    }
254}