trs_dataframe/dataframe/column_store/
key_index.rs

1use data_value::DataValue;
2use halfbrown::HashMap;
3use ndarray::ArrayView1;
4use serde::{Deserialize, Serialize};
5
6use crate::{error::Error, Key};
7
8/// [`KeyIndex`] is used to store the keys for the [`super::ColumnFrame`]
9/// The keys are stored in the order they are added - the order is preserved
10/// The keys are stored in the [`Vec`] and the indexes are stored in the [`HashMap`]
11/// The indexes are used to access the data in the [`super::ColumnFrame`] by the column [`Key`]
12/// NOTE: The keys are unique - if the key is already present, it will be removed
13#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)]
14pub struct KeyIndex {
15    pub keys: Vec<Key>,
16    indexes: HashMap<String, usize>,
17    pub alias: HashMap<String, String>,
18}
19
20impl KeyIndex {
21    pub fn new(keys: Vec<Key>) -> Self {
22        let mut indexes = HashMap::with_capacity(keys.len());
23        let mut removed = 0;
24        let mut actual_keys = Vec::with_capacity(keys.len());
25        for (idx, key) in keys.into_iter().enumerate() {
26            if indexes.contains_key(key.name()) {
27                removed += 1;
28            } else {
29                indexes.insert(key.name().to_string(), idx.saturating_sub(removed));
30                actual_keys.push(key)
31            }
32        }
33        Self {
34            keys: actual_keys,
35            indexes,
36            alias: HashMap::new(),
37        }
38    }
39
40    pub fn len(&self) -> usize {
41        self.keys.len()
42    }
43    pub fn is_empty(&self) -> bool {
44        self.keys.is_empty()
45    }
46
47    pub fn get_column_index(&self, key: &Key) -> Option<usize> {
48        if let Some(f) = self.indexes.get(key.name()) {
49            Some(*f)
50        } else {
51            self.alias
52                .get(key.name())
53                .and_then(|alias| self.indexes.get(alias).copied())
54        }
55    }
56
57    pub fn get_keys(&self) -> &[Key] {
58        &self.keys
59    }
60
61    pub fn get_key(&self, idx: usize) -> Option<Key> {
62        self.keys.get(idx).cloned()
63    }
64
65    pub fn get_complement_keys(&self, keys: &[Key]) -> Vec<Key> {
66        self.keys
67            .iter()
68            .filter(|key| !keys.contains(key))
69            .cloned()
70            .collect()
71    }
72
73    pub fn select(&self, keys: &[Key]) -> KeyIndex {
74        let mut new_keys = Vec::with_capacity(keys.len());
75        let mut new_indexes = HashMap::with_capacity(keys.len());
76
77        for key in keys.iter() {
78            if let Some(idx) = self.indexes.get(key.name()) {
79                new_indexes.insert(key.name().to_string(), *idx);
80                new_keys.push(key.to_owned());
81            } else if let Some(alias_key) = self.alias.get(key.name()) {
82                if let Some(idx) = self.indexes.get(alias_key) {
83                    new_indexes.insert(key.name().to_string(), *idx);
84                    new_keys.push(key.to_owned());
85                }
86            }
87        }
88        Self {
89            keys: new_keys,
90            indexes: new_indexes,
91            alias: HashMap::new(),
92        }
93    }
94
95    pub fn indexes(&self) -> Vec<usize> {
96        self.indexes.values().copied().collect()
97    }
98
99    pub fn store_key(&mut self, key: Key) {
100        if self.indexes.contains_key(key.name()) {
101            return;
102        }
103        self.keys.push(key.clone());
104        self.indexes
105            .insert(key.name().to_string(), self.keys.len() - 1);
106    }
107
108    pub fn remove_key(&mut self, key: &Key) -> Option<(Key, usize)> {
109        let idx = self.indexes.remove(key.name())?;
110        let current = self.keys.remove(idx);
111        Some((current, idx))
112    }
113
114    pub fn get_as_candidate(&self, row: ArrayView1<DataValue>) -> HashMap<Key, DataValue> {
115        let mut result = HashMap::with_capacity(self.keys.len());
116        for (key, idx) in self.indexes.iter() {
117            result.insert(key.into(), row[*idx].clone());
118        }
119        result
120    }
121
122    pub fn to_vec_row(&self, candidate: HashMap<Key, DataValue>) -> Vec<DataValue> {
123        self.keys
124            .iter()
125            .map(|key| candidate.get(key).cloned().unwrap_or_default())
126            .collect()
127    }
128
129    pub fn check_order_of_indexes(&self, other: &Self) -> Result<(), Error> {
130        for (self_key, other_key) in self.keys.iter().zip(other.keys.iter()) {
131            if self_key != other_key {
132                return Err(Error::IndexOutOfOrder(
133                    self.keys.clone(),
134                    other.keys.clone(),
135                ));
136            }
137        }
138        Ok(())
139    }
140
141    pub fn rename_key(&mut self, key: &str, new_key: Key) -> Result<(), Error> {
142        if let Some(idx) = self.indexes.remove(key) {
143            self.indexes.insert(new_key.to_string(), idx);
144            self.keys[idx] = new_key;
145            Ok(())
146        } else {
147            Err(Error::NotFound(key.into()))
148        }
149    }
150
151    pub fn add_alias(&mut self, key: &str, alias: &str) -> Result<(), Error> {
152        if !self.indexes.contains_key(key) {
153            return Err(Error::NotFound(key.into()));
154        }
155        self.alias.insert(alias.to_string(), key.to_string());
156        Ok(())
157    }
158}
159
160impl From<Vec<Key>> for KeyIndex {
161    fn from(keys: Vec<Key>) -> Self {
162        Self::new(keys)
163    }
164}
165
166#[cfg(test)]
167mod test {
168    use crate::DataType;
169
170    use super::*;
171    use rstest::*;
172
173    #[rstest]
174    fn test_alias() {
175        let key = Key::new("a", DataType::U32);
176        let mut key_index = KeyIndex::new(vec![key.clone()]);
177        assert_eq!(key_index.add_alias(key.name(), "alias"), Ok(()));
178        assert!(key_index.add_alias("c", "alias").is_err());
179        assert_eq!(key_index.alias.get("alias"), Some(&key.name().to_string()));
180        assert_eq!(key_index.get_column_index(&key), Some(0));
181        assert_eq!(
182            key_index.get_column_index(&Key::new("alias", DataType::U32)),
183            Some(0)
184        );
185    }
186
187    #[rstest]
188    fn test_rename() {
189        let key = Key::new("a", DataType::U32);
190        let mut key_index = KeyIndex::new(vec![key.clone(), Key::new("b", DataType::U32)]);
191        assert_eq!(key_index.rename_key("a", "new_key".into()), Ok(()));
192        assert_eq!(
193            key_index.get_column_index(&Key::new("new_key", DataType::U32)),
194            Some(0)
195        );
196        assert!(key_index.rename_key("c", "alias".into()).is_err());
197    }
198
199    #[rstest]
200    #[case(
201        vec![Key::new("a", DataType::U32), Key::new("b", DataType::U32)],
202        vec![Key::new("a", DataType::U32), Key::new("b", DataType::U32)]
203    )]
204    fn test_key_index_new(#[case] keys: Vec<Key>, #[case] expected: Vec<Key>) {
205        let key_index = KeyIndex::new(keys);
206        assert_eq!(key_index.keys, expected);
207        assert_eq!(key_index.get_keys(), expected);
208    }
209
210    #[rstest]
211    #[case(
212        vec![Key::new("a", DataType::U32), Key::new("b", DataType::U32)],
213        vec![Key::new("a", DataType::U32), Key::new("b", DataType::U32)],
214        Ok(())
215    )]
216    #[case(
217        vec![Key::new("a", DataType::U32), Key::new("b", DataType::U32)],
218        vec![Key::new("b", DataType::U32), Key::new("a", DataType::U32)],
219        Err(Error::IndexOutOfOrder(
220            vec![Key::new("a", DataType::U32), Key::new("b", DataType::U32)],
221            vec![Key::new("b", DataType::U32), Key::new("a", DataType::U32)]
222        ))
223    )]
224    fn test_key_index_check_order_of_indexes(
225        #[case] keys: Vec<Key>,
226        #[case] other_keys: Vec<Key>,
227        #[case] expected: Result<(), Error>,
228    ) {
229        let key_index: KeyIndex = keys.into();
230        let other_key_index = KeyIndex::new(other_keys);
231        assert_eq!(key_index.check_order_of_indexes(&other_key_index), expected);
232    }
233
234    #[rstest]
235    #[case(
236        KeyIndex::new(vec![Key::new("a", DataType::U32), Key::new("b", DataType::U32)]),
237        (Key::new("a", DataType::U32), 0)
238    )]
239    #[case(
240        KeyIndex::new(vec![Key::new("a", DataType::U32), Key::new("b", DataType::U32)]),
241        (Key::new("b", DataType::U32), 1)
242    )]
243    fn test_key_index_remove_key(#[case] mut key_index: KeyIndex, #[case] expected: (Key, usize)) {
244        let key = expected.0.clone();
245        assert_eq!(key_index.remove_key(&key), Some(expected));
246    }
247}