use data_value::DataValue;
use halfbrown::HashMap;
use ndarray::{Array1, Array2};
use crate::{detect_dtype, detect_dtype_arr, Key, MLChefMap};
use super::{ColumnFrame, KeyIndex};
impl From<Vec<std::collections::HashMap<Key, DataValue>>> for ColumnFrame {
fn from(dataframe: Vec<std::collections::HashMap<Key, DataValue>>) -> Self {
let mut keys = dataframe
.iter()
.flat_map(|x| x.keys())
.cloned()
.collect::<Vec<_>>();
keys.sort();
keys.dedup();
let index = KeyIndex::new(keys);
let mut data_frame = Array2::default((dataframe.len(), index.len()));
for (idx, row) in dataframe.iter().enumerate() {
for (key, value) in row.iter() {
if let Some(column) = index.get_column_index(key) {
if let Some(x) = data_frame.get_mut((idx, column)) {
*x = value.clone();
}
}
}
}
Self::new(index, data_frame)
}
}
impl From<Vec<HashMap<Key, DataValue>>> for ColumnFrame {
fn from(dataframe: Vec<HashMap<Key, DataValue>>) -> Self {
let mut keys = dataframe
.iter()
.flat_map(|x| x.keys())
.cloned()
.collect::<Vec<_>>();
keys.sort();
keys.dedup();
let index = KeyIndex::new(keys);
let mut data_frame = Array2::default((dataframe.len(), index.len()));
for (idx, row) in dataframe.iter().enumerate() {
for (key, value) in row.iter() {
if let Some(column) = index.get_column_index(key) {
if let Some(x) = data_frame.get_mut((idx, column)) {
*x = value.clone();
}
}
}
}
Self::new(index, data_frame)
}
}
impl From<std::collections::HashMap<String, Vec<DataValue>>> for ColumnFrame {
fn from(dataframe: std::collections::HashMap<String, Vec<DataValue>>) -> Self {
let mut keys = dataframe
.keys()
.map(|x| x.as_str().into())
.collect::<Vec<_>>();
keys.sort();
keys.dedup();
let mut index = KeyIndex::new(keys);
let size = dataframe
.values()
.map(|x| x.len())
.max()
.unwrap_or_default();
let mut data_frame = Array2::default((size, index.len()));
for (key, value) in dataframe.into_iter() {
let dtype = detect_dtype_arr(&value);
let new_key = Key::new(&key, dtype);
for (idx, value) in value.into_iter().enumerate() {
if let Some(column) = index.get_column_index(&new_key) {
if let Some(x) = data_frame.get_mut((idx, column)) {
*x = value.clone();
}
}
}
let _ = index.rename_key(key.as_str(), new_key);
}
Self::new(index, data_frame)
}
}
impl From<MLChefMap> for ColumnFrame {
fn from(dataframe: MLChefMap) -> Self {
let mut keys = dataframe
.keys()
.map(|x| x.as_str().into())
.collect::<Vec<_>>();
keys.sort();
keys.dedup();
let mut index = KeyIndex::new(keys);
let size = dataframe
.values()
.map(|x| x.len())
.max()
.unwrap_or_default();
let mut data_frame = Array2::default((size, index.len()));
for (key, value) in dataframe.into_iter() {
let dtype = detect_dtype_arr(&value);
let new_key = Key::new(&key, dtype);
for (idx, value) in value.into_iter().enumerate() {
if let Some(column) = index.get_column_index(&new_key) {
if let Some(x) = data_frame.get_mut((idx, column)) {
*x = value.clone();
}
}
}
let _ = index.rename_key(key.as_str(), new_key);
}
Self::new(index, data_frame)
}
}
impl From<Vec<(Key, Vec<DataValue>)>> for ColumnFrame {
fn from(dataframe: Vec<(Key, Vec<DataValue>)>) -> Self {
if dataframe.is_empty() {
return Self::default();
}
let mut index = KeyIndex::new(vec![]);
let mut data_frame = Array2::default((
dataframe.first().expect("Expects data").1.len(),
dataframe.len(),
));
for (key, value) in dataframe {
index.store_key(key.clone());
let column_index = index.get_column_index(&key).expect("BUG: Defined above!");
let mut res = data_frame.slice_mut(ndarray::s![.., column_index]);
res.assign(&Array1::from_vec(value));
}
Self::new(index, data_frame)
}
}
impl From<std::collections::HashMap<String, Array1<DataValue>>> for ColumnFrame {
fn from(mut dataframe: std::collections::HashMap<String, Array1<DataValue>>) -> Self {
let keys = dataframe.keys().map(|key| key.into()).collect::<Vec<_>>();
let mut index = KeyIndex::new(keys);
let mut arr = Array2::default((
dataframe.values().next().map_or(0, |v| v.len()),
index.len(),
));
for (column_index, key) in index.get_keys_mut().iter_mut().enumerate() {
if let Some(value) = dataframe.remove(key.name()) {
let mut res = arr.slice_mut(ndarray::s![.., column_index]);
key.ctype = detect_dtype(res.get(0).unwrap_or_else(|| &DataValue::Null));
res.assign(&value);
}
}
ColumnFrame::new(index, arr)
}
}
#[cfg(feature = "polars-df")]
impl From<polars::prelude::DataFrame> for ColumnFrame {
fn from(dataframe: polars::prelude::DataFrame) -> Self {
let keys = dataframe
.get_column_names()
.iter()
.map(|x| x.as_str().into())
.collect::<Vec<_>>();
let mut index = KeyIndex::new(keys);
let mut arr = Array2::default((dataframe.height(), index.len()));
for column in dataframe.iter() {
let mut key = Key::from(column.name().as_str());
let mut dtype = crate::DataType::Unknown;
if let Some(column_index) = index.get_column_index(&key) {
use crate::detect_dtype_arr;
let mut res = arr.slice_mut(ndarray::s![.., column_index]);
let values: Vec<DataValue> = column
.iter()
.map(crate::dataframe::from_polars_value)
.collect();
dtype = detect_dtype_arr(&values);
res.assign(&Array1::from_vec(values));
}
key.ctype = dtype;
let _ = index.rename_key(column.name(), key);
}
ColumnFrame::new(index, arr)
}
}