trs_dataframe/dataframe/column_store/
from.rs

1use data_value::DataValue;
2use halfbrown::HashMap;
3use ndarray::{Array1, Array2};
4
5use crate::{Key, MLChefMap};
6
7use super::{ColumnFrame, KeyIndex};
8
9/// NOTE: Because of randomnes of the key order in the hashmap, the order of
10/// the keys are sorted!
11impl From<Vec<std::collections::HashMap<Key, DataValue>>> for ColumnFrame {
12    fn from(dataframe: Vec<std::collections::HashMap<Key, DataValue>>) -> Self {
13        let mut keys = dataframe
14            .iter()
15            .flat_map(|x| x.keys())
16            .cloned()
17            .collect::<Vec<_>>();
18        keys.sort();
19        let index = KeyIndex::new(keys);
20        let mut data_frame = Array2::default((dataframe.len(), index.len()));
21        for (idx, row) in dataframe.iter().enumerate() {
22            for (key, value) in row.iter() {
23                if let Some(column) = index.get_column_index(key) {
24                    if let Some(x) = data_frame.get_mut((idx, column)) {
25                        *x = value.clone();
26                    }
27                }
28            }
29        }
30        Self::new(index, data_frame)
31    }
32}
33
34/// NOTE: Because of randomnes of the key order in the hashmap, the order of
35/// the keys are sorted!
36impl From<Vec<HashMap<Key, DataValue>>> for ColumnFrame {
37    fn from(dataframe: Vec<HashMap<Key, DataValue>>) -> Self {
38        let mut keys = dataframe
39            .iter()
40            .flat_map(|x| x.keys())
41            .cloned()
42            .collect::<Vec<_>>();
43        keys.sort();
44        let index = KeyIndex::new(keys);
45        let mut data_frame = Array2::default((dataframe.len(), index.len()));
46        for (idx, row) in dataframe.iter().enumerate() {
47            for (key, value) in row.iter() {
48                if let Some(column) = index.get_column_index(key) {
49                    if let Some(x) = data_frame.get_mut((idx, column)) {
50                        *x = value.clone();
51                    }
52                }
53            }
54        }
55        Self::new(index, data_frame)
56    }
57}
58
59/// NOTE: Because of randomnes of the key order in the hashmap, the order of
60/// the keys are sorted!
61impl From<std::collections::HashMap<String, Vec<DataValue>>> for ColumnFrame {
62    fn from(dataframe: std::collections::HashMap<String, Vec<DataValue>>) -> Self {
63        let mut keys = dataframe
64            .keys()
65            .map(|x| x.as_str().into())
66            .collect::<Vec<_>>();
67        keys.sort();
68        let index = KeyIndex::new(keys);
69        let size = dataframe
70            .values()
71            .map(|x| x.len())
72            .max()
73            .unwrap_or_default();
74        let mut data_frame = Array2::default((size, index.len()));
75        for (key, value) in dataframe.into_iter() {
76            let key = key.as_str().into();
77            for (idx, value) in value.into_iter().enumerate() {
78                if let Some(column) = index.get_column_index(&key) {
79                    if let Some(x) = data_frame.get_mut((idx, column)) {
80                        *x = value.clone();
81                    }
82                }
83            }
84        }
85        Self::new(index, data_frame)
86    }
87}
88
89impl From<MLChefMap> for ColumnFrame {
90    fn from(dataframe: MLChefMap) -> Self {
91        let mut keys = dataframe
92            .keys()
93            .map(|x| x.as_str().into())
94            .collect::<Vec<_>>();
95        keys.sort();
96        let index = KeyIndex::new(keys);
97        let size = dataframe
98            .values()
99            .map(|x| x.len())
100            .max()
101            .unwrap_or_default();
102        let mut data_frame = Array2::default((size, index.len()));
103        for (key, value) in dataframe.into_iter() {
104            let key = key.as_str().into();
105            for (idx, value) in value.into_iter().enumerate() {
106                if let Some(column) = index.get_column_index(&key) {
107                    if let Some(x) = data_frame.get_mut((idx, column)) {
108                        *x = value.clone();
109                    }
110                }
111            }
112        }
113        Self::new(index, data_frame)
114    }
115}
116
117impl From<Vec<(Key, Vec<DataValue>)>> for ColumnFrame {
118    fn from(dataframe: Vec<(Key, Vec<DataValue>)>) -> Self {
119        if dataframe.is_empty() {
120            return Self::default();
121        }
122        let mut index = KeyIndex::new(vec![]);
123
124        let mut data_frame = Array2::default((
125            dataframe.first().expect("Expects data").1.len(),
126            dataframe.len(),
127        ));
128        for (key, value) in dataframe {
129            index.store_key(key.clone());
130            let column_index = index.get_column_index(&key).expect("BUG: Defined above!");
131            let mut res = data_frame.slice_mut(ndarray::s![.., column_index]);
132            res.assign(&Array1::from_vec(value));
133        }
134        Self::new(index, data_frame)
135    }
136}
137
138impl From<std::collections::HashMap<String, Array1<DataValue>>> for ColumnFrame {
139    fn from(mut dataframe: std::collections::HashMap<String, Array1<DataValue>>) -> Self {
140        let keys = dataframe.keys().map(|key| key.into()).collect::<Vec<_>>();
141        let index = KeyIndex::new(keys);
142        let mut arr = Array2::default((
143            dataframe.values().next().map_or(0, |v| v.len()),
144            index.len(),
145        ));
146
147        for key in index.get_keys() {
148            if let Some(value) = dataframe.remove(key.name()) {
149                let column_index = index.get_column_index(key).expect("BUG: Defined above!");
150                let mut res = arr.slice_mut(ndarray::s![.., column_index]);
151                res.assign(&value);
152            }
153        }
154        ColumnFrame::new(index, arr)
155    }
156}
157
158#[cfg(feature = "polars-df")]
159impl From<polars::prelude::DataFrame> for ColumnFrame {
160    fn from(dataframe: polars::prelude::DataFrame) -> Self {
161        let keys = dataframe
162            .get_column_names()
163            .iter()
164            .map(|x| x.as_str().into())
165            .collect::<Vec<_>>();
166        let index = KeyIndex::new(keys);
167
168        let mut arr = Array2::default((dataframe.height(), index.len()));
169        for column in dataframe.iter() {
170            let key = Key::from(column.name().as_str());
171
172            if let Some(column_index) = index.get_column_index(&key) {
173                let mut res = arr.slice_mut(ndarray::s![.., column_index]);
174                res.assign(&Array1::from_vec(
175                    column
176                        .iter()
177                        .map(crate::dataframe::from_polars_value)
178                        .collect(),
179                ));
180            }
181        }
182        ColumnFrame::new(index, arr)
183    }
184}