trs_dataframe/dataframe/column_store/
from.rs

1use data_value::DataValue;
2use halfbrown::HashMap;
3use ndarray::{Array1, Array2};
4
5use crate::{detect_dtype, detect_dtype_arr, Key, MLChefMap};
6
7use super::{ColumnFrame, KeyIndex};
8
9/// NOTE: Because of randomnes of the key order in the hashmap, the order of
10/// the keys are sorted!
11impl From<Vec<std::collections::HashMap<Key, DataValue>>> for ColumnFrame {
12    fn from(dataframe: Vec<std::collections::HashMap<Key, DataValue>>) -> Self {
13        let mut keys = dataframe
14            .iter()
15            .flat_map(|x| x.keys())
16            .cloned()
17            .collect::<Vec<_>>();
18        keys.sort();
19        let index = KeyIndex::new(keys);
20        let mut data_frame = Array2::default((dataframe.len(), index.len()));
21        for (idx, row) in dataframe.iter().enumerate() {
22            for (key, value) in row.iter() {
23                if let Some(column) = index.get_column_index(key) {
24                    if let Some(x) = data_frame.get_mut((idx, column)) {
25                        *x = value.clone();
26                    }
27                }
28            }
29        }
30        Self::new(index, data_frame)
31    }
32}
33
34/// NOTE: Because of randomnes of the key order in the hashmap, the order of
35/// the keys are sorted!
36impl From<Vec<HashMap<Key, DataValue>>> for ColumnFrame {
37    fn from(dataframe: Vec<HashMap<Key, DataValue>>) -> Self {
38        let mut keys = dataframe
39            .iter()
40            .flat_map(|x| x.keys())
41            .cloned()
42            .collect::<Vec<_>>();
43        keys.sort();
44        let index = KeyIndex::new(keys);
45        let mut data_frame = Array2::default((dataframe.len(), index.len()));
46        for (idx, row) in dataframe.iter().enumerate() {
47            for (key, value) in row.iter() {
48                if let Some(column) = index.get_column_index(key) {
49                    if let Some(x) = data_frame.get_mut((idx, column)) {
50                        *x = value.clone();
51                    }
52                }
53            }
54        }
55        Self::new(index, data_frame)
56    }
57}
58
59/// NOTE: Because of randomnes of the key order in the hashmap, the order of
60/// the keys are sorted!
61impl From<std::collections::HashMap<String, Vec<DataValue>>> for ColumnFrame {
62    fn from(dataframe: std::collections::HashMap<String, Vec<DataValue>>) -> Self {
63        let mut keys = dataframe
64            .keys()
65            .map(|x| x.as_str().into())
66            .collect::<Vec<_>>();
67        keys.sort();
68        let mut index = KeyIndex::new(keys);
69        let size = dataframe
70            .values()
71            .map(|x| x.len())
72            .max()
73            .unwrap_or_default();
74        let mut data_frame = Array2::default((size, index.len()));
75        for (key, value) in dataframe.into_iter() {
76            let dtype = detect_dtype_arr(&value);
77            let new_key = Key::new(&key, dtype);
78            for (idx, value) in value.into_iter().enumerate() {
79                if let Some(column) = index.get_column_index(&new_key) {
80                    if let Some(x) = data_frame.get_mut((idx, column)) {
81                        *x = value.clone();
82                    }
83                }
84            }
85            let _ = index.rename_key(key.as_str(), new_key);
86        }
87        Self::new(index, data_frame)
88    }
89}
90
91impl From<MLChefMap> for ColumnFrame {
92    fn from(dataframe: MLChefMap) -> Self {
93        let mut keys = dataframe
94            .keys()
95            .map(|x| x.as_str().into())
96            .collect::<Vec<_>>();
97        keys.sort();
98        let mut index = KeyIndex::new(keys);
99        let size = dataframe
100            .values()
101            .map(|x| x.len())
102            .max()
103            .unwrap_or_default();
104        let mut data_frame = Array2::default((size, index.len()));
105        for (key, value) in dataframe.into_iter() {
106            let dtype = detect_dtype_arr(&value);
107            let new_key = Key::new(&key, dtype);
108            for (idx, value) in value.into_iter().enumerate() {
109                if let Some(column) = index.get_column_index(&new_key) {
110                    if let Some(x) = data_frame.get_mut((idx, column)) {
111                        *x = value.clone();
112                    }
113                }
114            }
115            let _ = index.rename_key(key.as_str(), new_key);
116        }
117        Self::new(index, data_frame)
118    }
119}
120
121impl From<Vec<(Key, Vec<DataValue>)>> for ColumnFrame {
122    fn from(dataframe: Vec<(Key, Vec<DataValue>)>) -> Self {
123        if dataframe.is_empty() {
124            return Self::default();
125        }
126        let mut index = KeyIndex::new(vec![]);
127
128        let mut data_frame = Array2::default((
129            dataframe.first().expect("Expects data").1.len(),
130            dataframe.len(),
131        ));
132        for (key, value) in dataframe {
133            index.store_key(key.clone());
134            let column_index = index.get_column_index(&key).expect("BUG: Defined above!");
135            let mut res = data_frame.slice_mut(ndarray::s![.., column_index]);
136            res.assign(&Array1::from_vec(value));
137        }
138        Self::new(index, data_frame)
139    }
140}
141
142impl From<std::collections::HashMap<String, Array1<DataValue>>> for ColumnFrame {
143    fn from(mut dataframe: std::collections::HashMap<String, Array1<DataValue>>) -> Self {
144        let keys = dataframe.keys().map(|key| key.into()).collect::<Vec<_>>();
145        let mut index = KeyIndex::new(keys);
146        let mut arr = Array2::default((
147            dataframe.values().next().map_or(0, |v| v.len()),
148            index.len(),
149        ));
150
151        for (column_index, key) in index.get_keys_mut().iter_mut().enumerate() {
152            if let Some(value) = dataframe.remove(key.name()) {
153                let mut res = arr.slice_mut(ndarray::s![.., column_index]);
154                key.ctype = detect_dtype(res.get(0).unwrap_or_else(|| &DataValue::Null));
155                res.assign(&value);
156            }
157        }
158        ColumnFrame::new(index, arr)
159    }
160}
161
162#[cfg(feature = "polars-df")]
163impl From<polars::prelude::DataFrame> for ColumnFrame {
164    fn from(dataframe: polars::prelude::DataFrame) -> Self {
165        let keys = dataframe
166            .get_column_names()
167            .iter()
168            .map(|x| x.as_str().into())
169            .collect::<Vec<_>>();
170        let mut index = KeyIndex::new(keys);
171
172        let mut arr = Array2::default((dataframe.height(), index.len()));
173        for column in dataframe.iter() {
174            let mut key = Key::from(column.name().as_str());
175            let mut dtype = crate::DataType::Unknown;
176            if let Some(column_index) = index.get_column_index(&key) {
177                use crate::detect_dtype_arr;
178
179                let mut res = arr.slice_mut(ndarray::s![.., column_index]);
180                let values: Vec<DataValue> = column
181                    .iter()
182                    .map(crate::dataframe::from_polars_value)
183                    .collect();
184                dtype = detect_dtype_arr(&values);
185                res.assign(&Array1::from_vec(values));
186            }
187            key.ctype = dtype;
188            let _ = index.rename_key(column.name(), key);
189        }
190        ColumnFrame::new(index, arr)
191    }
192}