trs_dataframe/dataframe/column_store/
from.rs1use data_value::DataValue;
2use halfbrown::HashMap;
3use ndarray::{Array1, Array2};
4
5use crate::{detect_dtype, detect_dtype_arr, Key, MLChefMap};
6
7use super::{ColumnFrame, KeyIndex};
8
9impl From<Vec<std::collections::HashMap<Key, DataValue>>> for ColumnFrame {
12 fn from(dataframe: Vec<std::collections::HashMap<Key, DataValue>>) -> Self {
13 let mut keys = dataframe
14 .iter()
15 .flat_map(|x| x.keys())
16 .cloned()
17 .collect::<Vec<_>>();
18 keys.sort();
19 keys.dedup();
20 let index = KeyIndex::new(keys);
21 let mut data_frame = Array2::default((dataframe.len(), index.len()));
22 for (idx, row) in dataframe.iter().enumerate() {
23 for (key, value) in row.iter() {
24 if let Some(column) = index.get_column_index(key) {
25 if let Some(x) = data_frame.get_mut((idx, column)) {
26 *x = value.clone();
27 }
28 }
29 }
30 }
31 Self::new(index, data_frame)
32 }
33}
34
35impl From<Vec<HashMap<Key, DataValue>>> for ColumnFrame {
38 fn from(dataframe: Vec<HashMap<Key, DataValue>>) -> Self {
39 let mut keys = dataframe
40 .iter()
41 .flat_map(|x| x.keys())
42 .cloned()
43 .collect::<Vec<_>>();
44 keys.sort();
45 keys.dedup();
46 let index = KeyIndex::new(keys);
47 let mut data_frame = Array2::default((dataframe.len(), index.len()));
48 for (idx, row) in dataframe.iter().enumerate() {
49 for (key, value) in row.iter() {
50 if let Some(column) = index.get_column_index(key) {
51 if let Some(x) = data_frame.get_mut((idx, column)) {
52 *x = value.clone();
53 }
54 }
55 }
56 }
57 Self::new(index, data_frame)
58 }
59}
60
61impl From<std::collections::HashMap<String, Vec<DataValue>>> for ColumnFrame {
64 fn from(dataframe: std::collections::HashMap<String, Vec<DataValue>>) -> Self {
65 let mut keys = dataframe
66 .keys()
67 .map(|x| x.as_str().into())
68 .collect::<Vec<_>>();
69 keys.sort();
70 keys.dedup();
71 let mut index = KeyIndex::new(keys);
72 let size = dataframe
73 .values()
74 .map(|x| x.len())
75 .max()
76 .unwrap_or_default();
77 let mut data_frame = Array2::default((size, index.len()));
78 for (key, value) in dataframe.into_iter() {
79 let dtype = detect_dtype_arr(&value);
80 let new_key = Key::new(&key, dtype);
81 for (idx, value) in value.into_iter().enumerate() {
82 if let Some(column) = index.get_column_index(&new_key) {
83 if let Some(x) = data_frame.get_mut((idx, column)) {
84 *x = value.clone();
85 }
86 }
87 }
88 let _ = index.rename_key(key.as_str(), new_key);
89 }
90 Self::new(index, data_frame)
91 }
92}
93
94impl From<MLChefMap> for ColumnFrame {
95 fn from(dataframe: MLChefMap) -> Self {
96 let mut keys = dataframe
97 .keys()
98 .map(|x| x.as_str().into())
99 .collect::<Vec<_>>();
100 keys.sort();
101 keys.dedup();
102 let mut index = KeyIndex::new(keys);
103 let size = dataframe
104 .values()
105 .map(|x| x.len())
106 .max()
107 .unwrap_or_default();
108 let mut data_frame = Array2::default((size, index.len()));
109 for (key, value) in dataframe.into_iter() {
110 let dtype = detect_dtype_arr(&value);
111 let new_key = Key::new(&key, dtype);
112 for (idx, value) in value.into_iter().enumerate() {
113 if let Some(column) = index.get_column_index(&new_key) {
114 if let Some(x) = data_frame.get_mut((idx, column)) {
115 *x = value.clone();
116 }
117 }
118 }
119 let _ = index.rename_key(key.as_str(), new_key);
120 }
121 Self::new(index, data_frame)
122 }
123}
124
125impl From<Vec<(Key, Vec<DataValue>)>> for ColumnFrame {
126 fn from(dataframe: Vec<(Key, Vec<DataValue>)>) -> Self {
127 if dataframe.is_empty() {
128 return Self::default();
129 }
130 let mut index = KeyIndex::new(vec![]);
131
132 let mut data_frame = Array2::default((
133 dataframe.first().expect("Expects data").1.len(),
134 dataframe.len(),
135 ));
136 for (key, value) in dataframe {
137 index.store_key(key.clone());
138 let column_index = index.get_column_index(&key).expect("BUG: Defined above!");
139 let mut res = data_frame.slice_mut(ndarray::s![.., column_index]);
140 res.assign(&Array1::from_vec(value));
141 }
142 Self::new(index, data_frame)
143 }
144}
145
146impl From<std::collections::HashMap<String, Array1<DataValue>>> for ColumnFrame {
147 fn from(mut dataframe: std::collections::HashMap<String, Array1<DataValue>>) -> Self {
148 let keys = dataframe.keys().map(|key| key.into()).collect::<Vec<_>>();
149 let mut index = KeyIndex::new(keys);
150 let mut arr = Array2::default((
151 dataframe.values().next().map_or(0, |v| v.len()),
152 index.len(),
153 ));
154
155 for (column_index, key) in index.get_keys_mut().iter_mut().enumerate() {
156 if let Some(value) = dataframe.remove(key.name()) {
157 let mut res = arr.slice_mut(ndarray::s![.., column_index]);
158 key.ctype = detect_dtype(res.get(0).unwrap_or_else(|| &DataValue::Null));
159 res.assign(&value);
160 }
161 }
162 ColumnFrame::new(index, arr)
163 }
164}
165
166#[cfg(feature = "polars-df")]
167impl From<polars::prelude::DataFrame> for ColumnFrame {
168 fn from(dataframe: polars::prelude::DataFrame) -> Self {
169 let keys = dataframe
170 .get_column_names()
171 .iter()
172 .map(|x| x.as_str().into())
173 .collect::<Vec<_>>();
174 let mut index = KeyIndex::new(keys);
175
176 let mut arr = Array2::default((dataframe.height(), index.len()));
177 for column in dataframe.iter() {
178 let mut key = Key::from(column.name().as_str());
179 let mut dtype = crate::DataType::Unknown;
180 if let Some(column_index) = index.get_column_index(&key) {
181 use crate::detect_dtype_arr;
182
183 let mut res = arr.slice_mut(ndarray::s![.., column_index]);
184 let values: Vec<DataValue> = column
185 .iter()
186 .map(crate::dataframe::from_polars_value)
187 .collect();
188 dtype = detect_dtype_arr(&values);
189 res.assign(&Array1::from_vec(values));
190 }
191 key.ctype = dtype;
192 let _ = index.rename_key(column.name(), key);
193 }
194 ColumnFrame::new(index, arr)
195 }
196}