Skip to main content

trs_dataframe/dataframe/column_store/
from.rs

1use data_value::DataValue;
2use halfbrown::HashMap;
3use ndarray::{Array1, Array2};
4
5use crate::{detect_dtype, detect_dtype_arr, Key, MLChefMap};
6
7use super::{ColumnFrame, KeyIndex};
8
9/// NOTE: Because of randomnes of the key order in the hashmap, the order of
10/// the keys are sorted!
11impl From<Vec<std::collections::HashMap<Key, DataValue>>> for ColumnFrame {
12    fn from(dataframe: Vec<std::collections::HashMap<Key, DataValue>>) -> Self {
13        let mut keys = dataframe
14            .iter()
15            .flat_map(|x| x.keys())
16            .cloned()
17            .collect::<Vec<_>>();
18        keys.sort();
19        keys.dedup();
20        let index = KeyIndex::new(keys);
21        let mut data_frame = Array2::default((dataframe.len(), index.len()));
22        for (idx, row) in dataframe.iter().enumerate() {
23            for (key, value) in row.iter() {
24                if let Some(column) = index.get_column_index(key) {
25                    if let Some(x) = data_frame.get_mut((idx, column)) {
26                        *x = value.clone();
27                    }
28                }
29            }
30        }
31        Self::new(index, data_frame)
32    }
33}
34
35/// NOTE: Because of randomnes of the key order in the hashmap, the order of
36/// the keys are sorted!
37impl From<Vec<HashMap<Key, DataValue>>> for ColumnFrame {
38    fn from(dataframe: Vec<HashMap<Key, DataValue>>) -> Self {
39        let mut keys = dataframe
40            .iter()
41            .flat_map(|x| x.keys())
42            .cloned()
43            .collect::<Vec<_>>();
44        keys.sort();
45        keys.dedup();
46        let index = KeyIndex::new(keys);
47        let mut data_frame = Array2::default((dataframe.len(), index.len()));
48        for (idx, row) in dataframe.iter().enumerate() {
49            for (key, value) in row.iter() {
50                if let Some(column) = index.get_column_index(key) {
51                    if let Some(x) = data_frame.get_mut((idx, column)) {
52                        *x = value.clone();
53                    }
54                }
55            }
56        }
57        Self::new(index, data_frame)
58    }
59}
60
61/// NOTE: Because of randomnes of the key order in the hashmap, the order of
62/// the keys are sorted!
63impl From<std::collections::HashMap<String, Vec<DataValue>>> for ColumnFrame {
64    fn from(dataframe: std::collections::HashMap<String, Vec<DataValue>>) -> Self {
65        let mut keys = dataframe
66            .keys()
67            .map(|x| x.as_str().into())
68            .collect::<Vec<_>>();
69        keys.sort();
70        keys.dedup();
71        let mut index = KeyIndex::new(keys);
72        let size = dataframe
73            .values()
74            .map(|x| x.len())
75            .max()
76            .unwrap_or_default();
77        let mut data_frame = Array2::default((size, index.len()));
78        for (key, value) in dataframe.into_iter() {
79            let dtype = detect_dtype_arr(&value);
80            let new_key = Key::new(&key, dtype);
81            for (idx, value) in value.into_iter().enumerate() {
82                if let Some(column) = index.get_column_index(&new_key) {
83                    if let Some(x) = data_frame.get_mut((idx, column)) {
84                        *x = value.clone();
85                    }
86                }
87            }
88            let _ = index.rename_key(key.as_str(), new_key);
89        }
90        Self::new(index, data_frame)
91    }
92}
93
94impl From<MLChefMap> for ColumnFrame {
95    fn from(dataframe: MLChefMap) -> Self {
96        let mut keys = dataframe
97            .keys()
98            .map(|x| x.as_str().into())
99            .collect::<Vec<_>>();
100        keys.sort();
101        keys.dedup();
102        let mut index = KeyIndex::new(keys);
103        let size = dataframe
104            .values()
105            .map(|x| x.len())
106            .max()
107            .unwrap_or_default();
108        let mut data_frame = Array2::default((size, index.len()));
109        for (key, value) in dataframe.into_iter() {
110            let dtype = detect_dtype_arr(&value);
111            let new_key = Key::new(&key, dtype);
112            for (idx, value) in value.into_iter().enumerate() {
113                if let Some(column) = index.get_column_index(&new_key) {
114                    if let Some(x) = data_frame.get_mut((idx, column)) {
115                        *x = value.clone();
116                    }
117                }
118            }
119            let _ = index.rename_key(key.as_str(), new_key);
120        }
121        Self::new(index, data_frame)
122    }
123}
124
125impl From<Vec<(Key, Vec<DataValue>)>> for ColumnFrame {
126    fn from(dataframe: Vec<(Key, Vec<DataValue>)>) -> Self {
127        if dataframe.is_empty() {
128            return Self::default();
129        }
130        let mut index = KeyIndex::new(vec![]);
131
132        let mut data_frame = Array2::default((
133            dataframe.first().expect("Expects data").1.len(),
134            dataframe.len(),
135        ));
136        for (key, value) in dataframe {
137            index.store_key(key.clone());
138            let column_index = index.get_column_index(&key).expect("BUG: Defined above!");
139            let mut res = data_frame.slice_mut(ndarray::s![.., column_index]);
140            res.assign(&Array1::from_vec(value));
141        }
142        Self::new(index, data_frame)
143    }
144}
145
146impl From<std::collections::HashMap<String, Array1<DataValue>>> for ColumnFrame {
147    fn from(mut dataframe: std::collections::HashMap<String, Array1<DataValue>>) -> Self {
148        let keys = dataframe.keys().map(|key| key.into()).collect::<Vec<_>>();
149        let mut index = KeyIndex::new(keys);
150        let mut arr = Array2::default((
151            dataframe.values().next().map_or(0, |v| v.len()),
152            index.len(),
153        ));
154
155        for (column_index, key) in index.get_keys_mut().iter_mut().enumerate() {
156            if let Some(value) = dataframe.remove(key.name()) {
157                let mut res = arr.slice_mut(ndarray::s![.., column_index]);
158                key.ctype = detect_dtype(res.get(0).unwrap_or_else(|| &DataValue::Null));
159                res.assign(&value);
160            }
161        }
162        ColumnFrame::new(index, arr)
163    }
164}
165
166#[cfg(feature = "polars-df")]
167impl From<polars::prelude::DataFrame> for ColumnFrame {
168    fn from(dataframe: polars::prelude::DataFrame) -> Self {
169        let keys = dataframe
170            .get_column_names()
171            .iter()
172            .map(|x| x.as_str().into())
173            .collect::<Vec<_>>();
174        let mut index = KeyIndex::new(keys);
175
176        let mut arr = Array2::default((dataframe.height(), index.len()));
177        for column in dataframe.iter() {
178            let mut key = Key::from(column.name().as_str());
179            let mut dtype = crate::DataType::Unknown;
180            if let Some(column_index) = index.get_column_index(&key) {
181                use crate::detect_dtype_arr;
182
183                let mut res = arr.slice_mut(ndarray::s![.., column_index]);
184                let values: Vec<DataValue> = column
185                    .iter()
186                    .map(crate::dataframe::from_polars_value)
187                    .collect();
188                dtype = detect_dtype_arr(&values);
189                res.assign(&Array1::from_vec(values));
190            }
191            key.ctype = dtype;
192            let _ = index.rename_key(column.name(), key);
193        }
194        ColumnFrame::new(index, arr)
195    }
196}