trs_dataframe/dataframe/column_store/
key_index.rs

1use data_value::DataValue;
2use halfbrown::HashMap;
3use ndarray::ArrayView1;
4use serde::{Deserialize, Serialize};
5
6use crate::{error::Error, Key};
7
8/// [`KeyIndex`] is used to store the keys for the [`super::ColumnFrame`]
9/// The keys are stored in the order they are added - the order is preserved
10/// The keys are stored in the [`Vec`] and the indexes are stored in the [`HashMap`]
11/// The indexes are used to access the data in the [`super::ColumnFrame`] by the column [`Key`]
12/// NOTE: The keys are unique - if the key is already present, it will be removed
13#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)]
14pub struct KeyIndex {
15    pub keys: Vec<Key>,
16    indexes: HashMap<String, usize>,
17    pub alias: HashMap<String, String>,
18}
19
20impl KeyIndex {
21    pub fn new(keys: Vec<Key>) -> Self {
22        let mut indexes = HashMap::with_capacity(keys.len());
23        let mut removed = 0;
24        let mut actual_keys = Vec::with_capacity(keys.len());
25        for (idx, key) in keys.into_iter().enumerate() {
26            if indexes.contains_key(key.name()) {
27                removed += 1;
28            } else {
29                indexes.insert(key.name().to_string(), idx.saturating_sub(removed));
30                actual_keys.push(key)
31            }
32        }
33        Self {
34            keys: actual_keys,
35            indexes,
36            alias: HashMap::new(),
37        }
38    }
39
40    pub fn len(&self) -> usize {
41        self.keys.len()
42    }
43    pub fn is_empty(&self) -> bool {
44        self.keys.is_empty()
45    }
46
47    pub fn get_column_index(&self, key: &Key) -> Option<usize> {
48        if let Some(f) = self.indexes.get(key.name()) {
49            Some(*f)
50        } else {
51            self.alias
52                .get(key.name())
53                .and_then(|alias| self.indexes.get(alias).copied())
54        }
55    }
56
57    pub fn get_keys(&self) -> &[Key] {
58        &self.keys
59    }
60
61    pub fn get_key(&self, idx: usize) -> Option<Key> {
62        self.keys.get(idx).cloned()
63    }
64    pub fn get_complement_keys(&self, keys: &[Key]) -> Vec<Key> {
65        self.keys
66            .iter()
67            .filter(|key| !keys.contains(key))
68            .cloned()
69            .collect()
70    }
71
72    pub fn select(&self, keys: &[Key]) -> KeyIndex {
73        let mut new_keys = Vec::with_capacity(keys.len());
74        let mut new_indexes = HashMap::with_capacity(keys.len());
75
76        for key in keys.iter() {
77            if let Some(idx) = self.indexes.get(key.name()) {
78                new_indexes.insert(key.name().to_string(), *idx);
79                new_keys.push(key.to_owned());
80            } else if let Some(alias_key) = self.alias.get(key.name()) {
81                if let Some(idx) = self.indexes.get(alias_key) {
82                    new_indexes.insert(key.name().to_string(), *idx);
83                    new_keys.push(key.to_owned());
84                }
85            }
86        }
87        Self {
88            keys: new_keys,
89            indexes: new_indexes,
90            alias: HashMap::new(),
91        }
92    }
93
94    pub fn indexes(&self) -> Vec<usize> {
95        self.indexes.values().copied().collect()
96    }
97
98    pub fn store_key(&mut self, key: Key) {
99        if self.indexes.contains_key(key.name()) {
100            return;
101        }
102        self.keys.push(key.clone());
103        self.indexes
104            .insert(key.name().to_string(), self.keys.len() - 1);
105    }
106
107    pub fn remove_key(&mut self, key: &Key) -> Option<(Key, usize)> {
108        let idx = self.indexes.remove(key.name())?;
109        let current = self.keys.remove(idx);
110        Some((current, idx))
111    }
112
113    pub fn get_as_candidate(&self, row: ArrayView1<DataValue>) -> HashMap<Key, DataValue> {
114        let mut result = HashMap::with_capacity(self.keys.len());
115        for (key, idx) in self.indexes.iter() {
116            result.insert(key.into(), row[*idx].clone());
117        }
118        result
119    }
120
121    pub fn to_vec_row(&self, candidate: HashMap<Key, DataValue>) -> Vec<DataValue> {
122        self.keys
123            .iter()
124            .map(|key| candidate.get(key).cloned().unwrap_or_default())
125            .collect()
126    }
127
128    pub fn check_order_of_indexes(&self, other: &Self) -> Result<(), Error> {
129        for (self_key, other_key) in self.keys.iter().zip(other.keys.iter()) {
130            if self_key != other_key {
131                return Err(Error::IndexOutOfOrder(
132                    self.keys.clone(),
133                    other.keys.clone(),
134                ));
135            }
136        }
137        Ok(())
138    }
139
140    pub fn rename_key(&mut self, key: &str, new_key: Key) -> Result<(), Error> {
141        if let Some(idx) = self.indexes.remove(key) {
142            self.indexes.insert(new_key.to_string(), idx);
143            self.keys[idx] = new_key;
144            Ok(())
145        } else {
146            Err(Error::NotFound(key.into()))
147        }
148    }
149
150    pub fn add_alias(&mut self, key: &str, alias: &str) -> Result<(), Error> {
151        if !self.indexes.contains_key(key) {
152            return Err(Error::NotFound(key.into()));
153        }
154        self.alias.insert(alias.to_string(), key.to_string());
155        Ok(())
156    }
157}
158
159impl From<Vec<Key>> for KeyIndex {
160    fn from(keys: Vec<Key>) -> Self {
161        Self::new(keys)
162    }
163}
164
165#[cfg(test)]
166mod test {
167    use crate::DataType;
168
169    use super::*;
170    use rstest::*;
171
172    #[rstest]
173    fn test_alias() {
174        let key = Key::new("a", DataType::U32);
175        let mut key_index = KeyIndex::new(vec![key.clone()]);
176        assert_eq!(key_index.add_alias(key.name(), "alias"), Ok(()));
177        assert!(key_index.add_alias("c", "alias").is_err());
178        assert_eq!(key_index.alias.get("alias"), Some(&key.name().to_string()));
179        assert_eq!(key_index.get_column_index(&key), Some(0));
180        assert_eq!(
181            key_index.get_column_index(&Key::new("alias", DataType::U32)),
182            Some(0)
183        );
184    }
185
186    #[rstest]
187    fn test_rename() {
188        let key = Key::new("a", DataType::U32);
189        let mut key_index = KeyIndex::new(vec![key.clone(), Key::new("b", DataType::U32)]);
190        assert_eq!(key_index.rename_key("a", "new_key".into()), Ok(()));
191        assert_eq!(
192            key_index.get_column_index(&Key::new("new_key", DataType::U32)),
193            Some(0)
194        );
195        assert!(key_index.rename_key("c", "alias".into()).is_err());
196    }
197
198    #[rstest]
199    #[case(
200        vec![Key::new("a", DataType::U32), Key::new("b", DataType::U32)],
201        vec![Key::new("a", DataType::U32), Key::new("b", DataType::U32)]
202    )]
203    fn test_key_index_new(#[case] keys: Vec<Key>, #[case] expected: Vec<Key>) {
204        let key_index = KeyIndex::new(keys);
205        assert_eq!(key_index.keys, expected);
206        assert_eq!(key_index.get_keys(), expected);
207    }
208
209    #[rstest]
210    #[case(
211        vec![Key::new("a", DataType::U32), Key::new("b", DataType::U32)],
212        vec![Key::new("a", DataType::U32), Key::new("b", DataType::U32)],
213        Ok(())
214    )]
215    #[case(
216        vec![Key::new("a", DataType::U32), Key::new("b", DataType::U32)],
217        vec![Key::new("b", DataType::U32), Key::new("a", DataType::U32)],
218        Err(Error::IndexOutOfOrder(
219            vec![Key::new("a", DataType::U32), Key::new("b", DataType::U32)],
220            vec![Key::new("b", DataType::U32), Key::new("a", DataType::U32)]
221        ))
222    )]
223    fn test_key_index_check_order_of_indexes(
224        #[case] keys: Vec<Key>,
225        #[case] other_keys: Vec<Key>,
226        #[case] expected: Result<(), Error>,
227    ) {
228        let key_index: KeyIndex = keys.into();
229        let other_key_index = KeyIndex::new(other_keys);
230        assert_eq!(key_index.check_order_of_indexes(&other_key_index), expected);
231    }
232
233    #[rstest]
234    #[case(
235        KeyIndex::new(vec![Key::new("a", DataType::U32), Key::new("b", DataType::U32)]),
236        (Key::new("a", DataType::U32), 0)
237    )]
238    #[case(
239        KeyIndex::new(vec![Key::new("a", DataType::U32), Key::new("b", DataType::U32)]),
240        (Key::new("b", DataType::U32), 1)
241    )]
242    fn test_key_index_remove_key(#[case] mut key_index: KeyIndex, #[case] expected: (Key, usize)) {
243        let key = expected.0.clone();
244        assert_eq!(key_index.remove_key(&key), Some(expected));
245    }
246}