use data_value::DataValue;
use halfbrown::HashMap;
use crate::{detect_dtype_arr, Key, MLChefMap};
use super::{typed_array::TypedDataArray, ColumnFrame, KeyIndex};
impl From<Vec<std::collections::HashMap<Key, DataValue>>> for ColumnFrame {
fn from(dataframe: Vec<std::collections::HashMap<Key, DataValue>>) -> Self {
let mut keys = dataframe
.iter()
.flat_map(|x| x.keys())
.cloned()
.collect::<Vec<_>>();
keys.sort();
keys.dedup();
let index = KeyIndex::new(keys);
let nrows = dataframe.len();
let ncols = index.len();
let mut columns: Vec<Vec<DataValue>> = vec![vec![DataValue::default(); nrows]; ncols];
for (row_idx, row) in dataframe.iter().enumerate() {
for (key, value) in row.iter() {
if let Some(col_idx) = index.get_column_index(key) {
columns[col_idx][row_idx] = value.clone();
}
}
}
Self::new(index, columns)
}
}
impl From<Vec<HashMap<Key, DataValue>>> for ColumnFrame {
fn from(dataframe: Vec<HashMap<Key, DataValue>>) -> Self {
let mut keys = dataframe
.iter()
.flat_map(|x| x.keys())
.cloned()
.collect::<Vec<_>>();
keys.sort();
keys.dedup();
let index = KeyIndex::new(keys);
let nrows = dataframe.len();
let ncols = index.len();
let mut columns: Vec<Vec<DataValue>> = vec![vec![DataValue::default(); nrows]; ncols];
for (row_idx, row) in dataframe.iter().enumerate() {
for (key, value) in row.iter() {
if let Some(col_idx) = index.get_column_index(key) {
columns[col_idx][row_idx] = value.clone();
}
}
}
Self::new(index, columns)
}
}
fn from_string_keyed<S: AsRef<str>>(
dataframe: impl IntoIterator<Item = (S, Vec<DataValue>)>,
sorted_keys: Vec<Key>,
) -> ColumnFrame {
let mut index = KeyIndex::new(sorted_keys);
let entries: Vec<(S, Vec<DataValue>)> = dataframe.into_iter().collect();
let size = entries
.iter()
.map(|(_, v)| v.len())
.max()
.unwrap_or_default();
let ncols = index.len();
let mut data_frame: Vec<Vec<DataValue>> =
(0..ncols).map(|_| vec![DataValue::Null; size]).collect();
let mut detected: Vec<(String, Key)> = Vec::new();
for (key, mut value) in entries {
let key_ref: Key = key.as_ref().into();
if let Some(col_idx) = index.get_column_index(&key_ref) {
let dtype_from_input = detect_dtype_arr(&value);
value.resize(size, DataValue::default());
data_frame[col_idx] = value;
detected.push((
key.as_ref().to_string(),
Key::new(key.as_ref(), dtype_from_input),
));
}
}
for (name, new_key) in detected {
let _ = index.rename_key(&name, new_key);
}
ColumnFrame::new(index, data_frame)
}
impl From<std::collections::HashMap<String, Vec<DataValue>>> for ColumnFrame {
fn from(dataframe: std::collections::HashMap<String, Vec<DataValue>>) -> Self {
let mut keys = dataframe
.keys()
.map(|x| x.as_str().into())
.collect::<Vec<_>>();
keys.sort();
keys.dedup();
from_string_keyed(dataframe, keys)
}
}
impl From<MLChefMap> for ColumnFrame {
fn from(dataframe: MLChefMap) -> Self {
let mut keys = dataframe
.keys()
.map(|x| x.as_str().into())
.collect::<Vec<_>>();
keys.sort();
keys.dedup();
from_string_keyed(dataframe, keys)
}
}
impl From<Vec<(Key, Vec<DataValue>)>> for ColumnFrame {
fn from(dataframe: Vec<(Key, Vec<DataValue>)>) -> Self {
if dataframe.is_empty() {
return Self::default();
}
let mut index = KeyIndex::new(vec![]);
let mut data_frame: Vec<TypedDataArray> = Vec::with_capacity(dataframe.len());
for (key, value) in dataframe {
let dtype = key.ctype;
index.store_key(key);
data_frame.push(TypedDataArray::new(dtype, value));
}
ColumnFrame::new(index, data_frame)
}
}
impl From<std::collections::HashMap<String, ndarray::Array1<DataValue>>> for ColumnFrame {
fn from(mut dataframe: std::collections::HashMap<String, ndarray::Array1<DataValue>>) -> Self {
let keys = dataframe.keys().map(|key| key.into()).collect::<Vec<_>>();
let index = KeyIndex::new(keys);
let nrows = dataframe.values().next().map_or(0, |v| v.len());
let ncols = index.len();
let mut data_frame: Vec<Vec<DataValue>> =
(0..ncols).map(|_| vec![DataValue::Null; nrows]).collect();
for (column_index, key) in index.get_keys().iter().enumerate() {
if let Some(value) = dataframe.remove(key.name()) {
data_frame[column_index] = value.to_vec();
}
}
ColumnFrame::new(index, data_frame)
}
}
#[cfg(feature = "polars-df")]
impl From<polars::prelude::DataFrame> for ColumnFrame {
fn from(dataframe: polars::prelude::DataFrame) -> Self {
let keys = dataframe
.get_column_names()
.iter()
.map(|x| x.as_str().into())
.collect::<Vec<_>>();
let mut index = KeyIndex::new(keys);
let ncols = index.len();
let nrows = dataframe.height();
let mut data_frame: Vec<TypedDataArray> = (0..ncols)
.map(|i| {
index
.get_keys()
.get(i)
.map(|k| TypedDataArray::default_init(k, nrows))
.unwrap_or_default()
})
.collect();
for column in dataframe.iter() {
let mut key = Key::from(column.name().as_str());
let mut dtype = crate::DataType::Unknown;
if let Some(col_idx) = index.get_column_index(&key) {
use crate::detect_dtype_arr;
let values: Vec<DataValue> = column
.iter()
.map(crate::dataframe::from_polars_value)
.collect();
dtype = detect_dtype_arr(&values);
data_frame[col_idx] = TypedDataArray::new(dtype, values);
}
key.ctype = dtype;
let _ = index.rename_key(column.name(), key);
}
ColumnFrame::new(index, data_frame)
}
}