use std::collections::HashMap;
use crate::column::{BooleanColumn, Column, ColumnTrait, Float64Column, Int64Column, StringColumn};
use crate::error::{Error, Result};
use crate::index::DataFrameIndex;
use crate::optimized::dataframe::OptimizedDataFrame;
use crate::optimized::split_dataframe::core::OptimizedDataFrame as SplitDataFrame;
use crate::DataValue;
pub(crate) fn from_standard_dataframe(
df: &crate::dataframe::DataFrame,
) -> Result<OptimizedDataFrame> {
let mut split_df = SplitDataFrame::new();
for col_name in df.column_names() {
if let Ok(col) = df.get_column::<String>(&col_name) {
let mut values = Vec::new();
for i in 0..col.len() {
if let Some(val) = col.get(i) {
values.push(ToString::to_string(&val));
} else {
values.push(String::new());
}
}
let all_ints = values
.iter()
.all(|s| s.is_empty() || s.parse::<i64>().is_ok());
if all_ints {
let int_values: Vec<i64> = values
.iter()
.map(|s| s.parse::<i64>().unwrap_or(0))
.collect();
split_df.add_column(
col_name.clone(),
Column::Int64(Int64Column::new(int_values)),
)?;
continue;
}
let all_floats = values
.iter()
.all(|s| s.is_empty() || s.parse::<f64>().is_ok());
if all_floats {
let float_values: Vec<f64> = values
.iter()
.map(|s| s.parse::<f64>().unwrap_or(0.0))
.collect();
split_df.add_column(
col_name.clone(),
Column::Float64(Float64Column::new(float_values)),
)?;
continue;
}
let all_bools = values.iter().all(|s| {
let s = s.to_lowercase();
s.is_empty() || s == "true" || s == "false" || s == "1" || s == "0"
});
if all_bools {
let bool_values: Vec<bool> = values
.iter()
.map(|s| {
let s = s.to_lowercase();
!s.is_empty() && (s == "true" || s == "1")
})
.collect();
split_df.add_column(
col_name.clone(),
Column::Boolean(BooleanColumn::new(bool_values)),
)?;
continue;
}
split_df.add_column(col_name.clone(), Column::String(StringColumn::new(values)))?;
}
}
let df_index = df.get_index();
match df_index {
DataFrameIndex::Simple(simple_index) => {
split_df.set_index_from_simple_index(simple_index.clone())?;
}
DataFrameIndex::Multi(multi_index) => {
split_df.set_index(DataFrameIndex::Multi(multi_index.clone()))?;
}
}
let mut opt_df = OptimizedDataFrame::new();
for name in split_df.column_names() {
if let Ok(column_view) = split_df.column(name) {
let column = column_view.column().clone();
opt_df.add_column(name.clone(), column)?;
}
}
if let Some(split_index) = split_df.get_index() {
if let DataFrameIndex::Simple(simple_index) = split_index {
let _ = opt_df.set_default_index();
opt_df.set_index_from_simple_index(simple_index.clone())?;
}
}
Ok(opt_df)
}
pub(crate) fn to_standard_dataframe(
df: &OptimizedDataFrame,
) -> Result<crate::dataframe::DataFrame> {
let mut split_df = SplitDataFrame::new();
for col_name in df.column_names() {
let col_view = df.column(col_name)?;
let col = col_view.column();
split_df.add_column(col_name.clone(), col.clone())?;
}
if let Some(df_index) = df.get_index() {
if let DataFrameIndex::Simple(simple_index) = df_index {
split_df.set_index_from_simple_index(simple_index.clone())?;
} else if let DataFrameIndex::Multi(multi_index) = df_index {
split_df.set_index(DataFrameIndex::Multi(multi_index.clone()))?;
}
}
let mut std_df = crate::dataframe::DataFrame::new();
for col_name in split_df.column_names() {
let col_view = split_df.column(col_name)?;
let col = col_view.column();
match col {
Column::Int64(int_col) => {
let series = crate::series::Series::new(
(0..int_col.len())
.map(|i| {
let val = int_col.get(i);
match val {
Ok(Some(v)) => Some(Box::new(v.clone())),
_ => None,
}
})
.collect(),
Some(col_name.clone()),
)?;
std_df.add_column(col_name.clone(), series)?;
}
Column::Float64(float_col) => {
let series = crate::series::Series::new(
(0..float_col.len())
.map(|i| {
let val = float_col.get(i);
match val {
Ok(Some(v)) => Some(Box::new(v.clone())),
_ => None,
}
})
.collect(),
Some(col_name.clone()),
)?;
std_df.add_column(col_name.clone(), series)?;
}
Column::String(str_col) => {
let series = crate::series::Series::new(
(0..str_col.len())
.map(|i| {
let val = str_col.get(i);
match val {
Ok(Some(s)) => Some(Box::new(s.to_string())),
_ => None,
}
})
.collect(),
Some(col_name.clone()),
)?;
std_df.add_column(col_name.clone(), series)?;
}
Column::Boolean(bool_col) => {
let series = crate::series::Series::new(
(0..bool_col.len())
.map(|i| {
let val = bool_col.get(i);
match val {
Ok(Some(v)) => Some(Box::new(v.clone())),
_ => None,
}
})
.collect(),
Some(col_name.clone()),
)?;
std_df.add_column(col_name.clone(), series)?;
}
}
}
if let Some(split_index) = split_df.get_index() {
match split_index {
DataFrameIndex::Simple(simple_index) => {
std_df.set_index(simple_index.clone())?;
}
DataFrameIndex::Multi(multi_index) => {
std_df.set_multi_index(multi_index.clone())?;
}
}
}
Ok(std_df)
}
pub fn optimize_dataframe(df: &crate::dataframe::DataFrame) -> Result<OptimizedDataFrame> {
from_standard_dataframe(df)
}
pub fn standard_dataframe(df: &OptimizedDataFrame) -> Result<crate::dataframe::DataFrame> {
to_standard_dataframe(df)
}