trs_dataframe/dataframe/column_store/
from.rs1use data_value::DataValue;
2use halfbrown::HashMap;
3
4use crate::{detect_dtype_arr, Key, MLChefMap};
5
6use super::{typed_array::TypedDataArray, ColumnFrame, KeyIndex};
7
8impl From<Vec<std::collections::HashMap<Key, DataValue>>> for ColumnFrame {
11 fn from(dataframe: Vec<std::collections::HashMap<Key, DataValue>>) -> Self {
12 let mut keys = dataframe
13 .iter()
14 .flat_map(|x| x.keys())
15 .cloned()
16 .collect::<Vec<_>>();
17 keys.sort();
18 keys.dedup();
19 let index = KeyIndex::new(keys);
20 let nrows = dataframe.len();
21 let ncols = index.len();
22 let mut columns: Vec<Vec<DataValue>> = vec![vec![DataValue::default(); nrows]; ncols];
23 for (row_idx, row) in dataframe.iter().enumerate() {
24 for (key, value) in row.iter() {
25 if let Some(col_idx) = index.get_column_index(key) {
26 columns[col_idx][row_idx] = value.clone();
27 }
28 }
29 }
30 Self::new(index, columns)
31 }
32}
33
34impl From<Vec<HashMap<Key, DataValue>>> for ColumnFrame {
37 fn from(dataframe: Vec<HashMap<Key, DataValue>>) -> Self {
38 let mut keys = dataframe
39 .iter()
40 .flat_map(|x| x.keys())
41 .cloned()
42 .collect::<Vec<_>>();
43 keys.sort();
44 keys.dedup();
45 let index = KeyIndex::new(keys);
46 let nrows = dataframe.len();
47 let ncols = index.len();
48 let mut columns: Vec<Vec<DataValue>> = vec![vec![DataValue::default(); nrows]; ncols];
49 for (row_idx, row) in dataframe.iter().enumerate() {
50 for (key, value) in row.iter() {
51 if let Some(col_idx) = index.get_column_index(key) {
52 columns[col_idx][row_idx] = value.clone();
53 }
54 }
55 }
56 Self::new(index, columns)
57 }
58}
59
60fn from_string_keyed<S: AsRef<str>>(
61 dataframe: impl IntoIterator<Item = (S, Vec<DataValue>)>,
62 sorted_keys: Vec<Key>,
63) -> ColumnFrame {
64 let mut index = KeyIndex::new(sorted_keys);
65 let entries: Vec<(S, Vec<DataValue>)> = dataframe.into_iter().collect();
66 let size = entries
67 .iter()
68 .map(|(_, v)| v.len())
69 .max()
70 .unwrap_or_default();
71 let ncols = index.len();
72 let mut data_frame: Vec<Vec<DataValue>> =
73 (0..ncols).map(|_| vec![DataValue::Null; size]).collect();
74 let mut detected: Vec<(String, Key)> = Vec::new();
79 for (key, mut value) in entries {
80 let key_ref: Key = key.as_ref().into();
81 if let Some(col_idx) = index.get_column_index(&key_ref) {
82 let dtype_from_input = detect_dtype_arr(&value);
83 value.resize(size, DataValue::default());
84 data_frame[col_idx] = value;
85 detected.push((
86 key.as_ref().to_string(),
87 Key::new(key.as_ref(), dtype_from_input),
88 ));
89 }
90 }
91 for (name, new_key) in detected {
92 let _ = index.rename_key(&name, new_key);
93 }
94 ColumnFrame::new(index, data_frame)
95}
96
97impl From<std::collections::HashMap<String, Vec<DataValue>>> for ColumnFrame {
100 fn from(dataframe: std::collections::HashMap<String, Vec<DataValue>>) -> Self {
101 let mut keys = dataframe
102 .keys()
103 .map(|x| x.as_str().into())
104 .collect::<Vec<_>>();
105 keys.sort();
106 keys.dedup();
107 from_string_keyed(dataframe, keys)
108 }
109}
110
111impl From<MLChefMap> for ColumnFrame {
112 fn from(dataframe: MLChefMap) -> Self {
113 let mut keys = dataframe
114 .keys()
115 .map(|x| x.as_str().into())
116 .collect::<Vec<_>>();
117 keys.sort();
118 keys.dedup();
119 from_string_keyed(dataframe, keys)
120 }
121}
122
123impl From<Vec<(Key, Vec<DataValue>)>> for ColumnFrame {
124 fn from(dataframe: Vec<(Key, Vec<DataValue>)>) -> Self {
125 if dataframe.is_empty() {
126 return Self::default();
127 }
128 let mut index = KeyIndex::new(vec![]);
129 let mut data_frame: Vec<TypedDataArray> = Vec::with_capacity(dataframe.len());
130 for (key, value) in dataframe {
131 let dtype = key.ctype;
132 index.store_key(key);
133 data_frame.push(TypedDataArray::new(dtype, value));
134 }
135 ColumnFrame::new(index, data_frame)
136 }
137}
138
139impl From<std::collections::HashMap<String, ndarray::Array1<DataValue>>> for ColumnFrame {
140 fn from(mut dataframe: std::collections::HashMap<String, ndarray::Array1<DataValue>>) -> Self {
141 let keys = dataframe.keys().map(|key| key.into()).collect::<Vec<_>>();
142 let index = KeyIndex::new(keys);
143 let nrows = dataframe.values().next().map_or(0, |v| v.len());
144 let ncols = index.len();
145 let mut data_frame: Vec<Vec<DataValue>> =
146 (0..ncols).map(|_| vec![DataValue::Null; nrows]).collect();
147
148 for (column_index, key) in index.get_keys().iter().enumerate() {
149 if let Some(value) = dataframe.remove(key.name()) {
150 data_frame[column_index] = value.to_vec();
151 }
152 }
153 ColumnFrame::new(index, data_frame)
154 }
155}
156
157#[cfg(feature = "polars-df")]
158impl From<polars::prelude::DataFrame> for ColumnFrame {
159 fn from(dataframe: polars::prelude::DataFrame) -> Self {
160 let keys = dataframe
161 .get_column_names()
162 .iter()
163 .map(|x| x.as_str().into())
164 .collect::<Vec<_>>();
165 let mut index = KeyIndex::new(keys);
166 let ncols = index.len();
167 let nrows = dataframe.height();
168
169 let mut data_frame: Vec<TypedDataArray> = (0..ncols)
170 .map(|i| {
171 index
172 .get_keys()
173 .get(i)
174 .map(|k| TypedDataArray::default_init(k, nrows))
175 .unwrap_or_default()
176 })
177 .collect();
178 for column in dataframe.iter() {
179 let mut key = Key::from(column.name().as_str());
180 let mut dtype = crate::DataType::Unknown;
181 if let Some(col_idx) = index.get_column_index(&key) {
182 use crate::detect_dtype_arr;
183
184 let values: Vec<DataValue> = column
185 .iter()
186 .map(crate::dataframe::from_polars_value)
187 .collect();
188 dtype = detect_dtype_arr(&values);
189 data_frame[col_idx] = TypedDataArray::new(dtype, values);
190 }
191 key.ctype = dtype;
192 let _ = index.rename_key(column.name(), key);
193 }
194 ColumnFrame::new(index, data_frame)
195 }
196}