trs_dataframe/dataframe/column_store/
from.rs1use data_value::DataValue;
2use halfbrown::HashMap;
3use ndarray::{Array1, Array2};
4
5use crate::{detect_dtype, detect_dtype_arr, Key, MLChefMap};
6
7use super::{ColumnFrame, KeyIndex};
8
9impl From<Vec<std::collections::HashMap<Key, DataValue>>> for ColumnFrame {
12 fn from(dataframe: Vec<std::collections::HashMap<Key, DataValue>>) -> Self {
13 let mut keys = dataframe
14 .iter()
15 .flat_map(|x| x.keys())
16 .cloned()
17 .collect::<Vec<_>>();
18 keys.sort();
19 let index = KeyIndex::new(keys);
20 let mut data_frame = Array2::default((dataframe.len(), index.len()));
21 for (idx, row) in dataframe.iter().enumerate() {
22 for (key, value) in row.iter() {
23 if let Some(column) = index.get_column_index(key) {
24 if let Some(x) = data_frame.get_mut((idx, column)) {
25 *x = value.clone();
26 }
27 }
28 }
29 }
30 Self::new(index, data_frame)
31 }
32}
33
34impl From<Vec<HashMap<Key, DataValue>>> for ColumnFrame {
37 fn from(dataframe: Vec<HashMap<Key, DataValue>>) -> Self {
38 let mut keys = dataframe
39 .iter()
40 .flat_map(|x| x.keys())
41 .cloned()
42 .collect::<Vec<_>>();
43 keys.sort();
44 let index = KeyIndex::new(keys);
45 let mut data_frame = Array2::default((dataframe.len(), index.len()));
46 for (idx, row) in dataframe.iter().enumerate() {
47 for (key, value) in row.iter() {
48 if let Some(column) = index.get_column_index(key) {
49 if let Some(x) = data_frame.get_mut((idx, column)) {
50 *x = value.clone();
51 }
52 }
53 }
54 }
55 Self::new(index, data_frame)
56 }
57}
58
59impl From<std::collections::HashMap<String, Vec<DataValue>>> for ColumnFrame {
62 fn from(dataframe: std::collections::HashMap<String, Vec<DataValue>>) -> Self {
63 let mut keys = dataframe
64 .keys()
65 .map(|x| x.as_str().into())
66 .collect::<Vec<_>>();
67 keys.sort();
68 let mut index = KeyIndex::new(keys);
69 let size = dataframe
70 .values()
71 .map(|x| x.len())
72 .max()
73 .unwrap_or_default();
74 let mut data_frame = Array2::default((size, index.len()));
75 for (key, value) in dataframe.into_iter() {
76 let dtype = detect_dtype_arr(&value);
77 let new_key = Key::new(&key, dtype);
78 for (idx, value) in value.into_iter().enumerate() {
79 if let Some(column) = index.get_column_index(&new_key) {
80 if let Some(x) = data_frame.get_mut((idx, column)) {
81 *x = value.clone();
82 }
83 }
84 }
85 let _ = index.rename_key(key.as_str(), new_key);
86 }
87 Self::new(index, data_frame)
88 }
89}
90
91impl From<MLChefMap> for ColumnFrame {
92 fn from(dataframe: MLChefMap) -> Self {
93 let mut keys = dataframe
94 .keys()
95 .map(|x| x.as_str().into())
96 .collect::<Vec<_>>();
97 keys.sort();
98 let mut index = KeyIndex::new(keys);
99 let size = dataframe
100 .values()
101 .map(|x| x.len())
102 .max()
103 .unwrap_or_default();
104 let mut data_frame = Array2::default((size, index.len()));
105 for (key, value) in dataframe.into_iter() {
106 let dtype = detect_dtype_arr(&value);
107 let new_key = Key::new(&key, dtype);
108 for (idx, value) in value.into_iter().enumerate() {
109 if let Some(column) = index.get_column_index(&new_key) {
110 if let Some(x) = data_frame.get_mut((idx, column)) {
111 *x = value.clone();
112 }
113 }
114 }
115 let _ = index.rename_key(key.as_str(), new_key);
116 }
117 Self::new(index, data_frame)
118 }
119}
120
121impl From<Vec<(Key, Vec<DataValue>)>> for ColumnFrame {
122 fn from(dataframe: Vec<(Key, Vec<DataValue>)>) -> Self {
123 if dataframe.is_empty() {
124 return Self::default();
125 }
126 let mut index = KeyIndex::new(vec![]);
127
128 let mut data_frame = Array2::default((
129 dataframe.first().expect("Expects data").1.len(),
130 dataframe.len(),
131 ));
132 for (key, value) in dataframe {
133 index.store_key(key.clone());
134 let column_index = index.get_column_index(&key).expect("BUG: Defined above!");
135 let mut res = data_frame.slice_mut(ndarray::s![.., column_index]);
136 res.assign(&Array1::from_vec(value));
137 }
138 Self::new(index, data_frame)
139 }
140}
141
142impl From<std::collections::HashMap<String, Array1<DataValue>>> for ColumnFrame {
143 fn from(mut dataframe: std::collections::HashMap<String, Array1<DataValue>>) -> Self {
144 let keys = dataframe.keys().map(|key| key.into()).collect::<Vec<_>>();
145 let mut index = KeyIndex::new(keys);
146 let mut arr = Array2::default((
147 dataframe.values().next().map_or(0, |v| v.len()),
148 index.len(),
149 ));
150
151 for (column_index, key) in index.get_keys_mut().iter_mut().enumerate() {
152 if let Some(value) = dataframe.remove(key.name()) {
153 let mut res = arr.slice_mut(ndarray::s![.., column_index]);
154 key.ctype = detect_dtype(res.get(0).unwrap_or_else(|| &DataValue::Null));
155 res.assign(&value);
156 }
157 }
158 ColumnFrame::new(index, arr)
159 }
160}
161
162#[cfg(feature = "polars-df")]
163impl From<polars::prelude::DataFrame> for ColumnFrame {
164 fn from(dataframe: polars::prelude::DataFrame) -> Self {
165 let keys = dataframe
166 .get_column_names()
167 .iter()
168 .map(|x| x.as_str().into())
169 .collect::<Vec<_>>();
170 let mut index = KeyIndex::new(keys);
171
172 let mut arr = Array2::default((dataframe.height(), index.len()));
173 for column in dataframe.iter() {
174 let mut key = Key::from(column.name().as_str());
175 let mut dtype = crate::DataType::Unknown;
176 if let Some(column_index) = index.get_column_index(&key) {
177 use crate::detect_dtype_arr;
178
179 let mut res = arr.slice_mut(ndarray::s![.., column_index]);
180 let values: Vec<DataValue> = column
181 .iter()
182 .map(crate::dataframe::from_polars_value)
183 .collect();
184 dtype = detect_dtype_arr(&values);
185 res.assign(&Array1::from_vec(values));
186 }
187 key.ctype = dtype;
188 let _ = index.rename_key(column.name(), key);
189 }
190 ColumnFrame::new(index, arr)
191 }
192}