use std::collections::HashMap;
use std::path::Path;
use crate::column::{BooleanColumn, Column, Float64Column, Int64Column, StringColumn};
use crate::error::{Error, Result};
#[cfg(feature = "parquet")]
use crate::optimized::split_dataframe::io::ParquetCompression;
use super::core::{ColumnView, JsonOrient, OptimizedDataFrame};
#[cfg(feature = "excel")]
use simple_excel_writer::{Sheet, Workbook};
impl OptimizedDataFrame {
pub fn from_csv<P: AsRef<Path>>(path: P, has_header: bool) -> Result<Self> {
use crate::optimized::split_dataframe::core::OptimizedDataFrame as SplitDataFrame;
let split_df = SplitDataFrame::from_csv(path, has_header)?;
let mut df = Self::new();
for name in split_df.column_names() {
let column_result = split_df.column(name);
if let Ok(column_view) = column_result {
let column = column_view.column;
df.add_column(name.to_string(), column.clone())?;
}
}
if let Some(index) = split_df.get_index() {
df.index = Some(index.clone());
}
Ok(df)
}
pub fn to_csv<P: AsRef<Path>>(&self, path: P, write_header: bool) -> Result<()> {
use crate::optimized::split_dataframe::core::OptimizedDataFrame as SplitDataFrame;
let mut split_df = SplitDataFrame::new();
for name in &self.column_names {
if let Ok(column_view) = self.column(name) {
let column = column_view.column;
split_df.add_column(name.clone(), column.clone())?;
}
}
if let Some(ref index) = self.index {
match index {
crate::index::DataFrameIndex::Simple(simple_index) => {
split_df.set_index_from_simple_index(simple_index.clone())?;
}
crate::index::DataFrameIndex::Multi(multi_index) => {
split_df.set_index(crate::index::DataFrameIndex::Multi(multi_index.clone()))?;
}
}
}
split_df.to_csv(path, write_header)
}
#[cfg(feature = "excel")]
pub fn from_excel<P: AsRef<Path>>(
path: P,
sheet_name: Option<&str>,
header: bool,
skip_rows: usize,
use_cols: Option<&[&str]>,
) -> Result<Self> {
use crate::optimized::split_dataframe::core::OptimizedDataFrame as SplitDataFrame;
let split_df = SplitDataFrame::from_excel(path, sheet_name, header, skip_rows, use_cols)?;
let mut df = Self::new();
for name in split_df.column_names() {
let column_result = split_df.column(name);
if let Ok(column_view) = column_result {
let column = column_view.column;
df.add_column(name.to_string(), column.clone())?;
}
}
if let Some(index) = split_df.get_index() {
df.index = Some(index.clone());
}
Ok(df)
}
#[cfg(feature = "excel")]
pub fn to_excel<P: AsRef<Path>>(
&self,
path: P,
sheet_name: Option<&str>,
index: bool,
) -> Result<()> {
use crate::optimized::split_dataframe::core::OptimizedDataFrame as SplitDataFrame;
let mut split_df = SplitDataFrame::new();
for name in &self.column_names {
let column_result = self.column(name);
if let Ok(column_view) = column_result {
let column = column_view.column();
split_df.add_column(name.clone(), column.clone())?;
}
}
if let Some(ref index) = self.index {
let _ = split_df.set_index(index.clone());
}
split_df.to_excel(path, sheet_name, index)
}
#[cfg(feature = "parquet")]
pub fn to_parquet<P: AsRef<Path>>(
&self,
path: P,
compression: Option<ParquetCompression>,
) -> Result<()> {
use crate::optimized::split_dataframe::core::OptimizedDataFrame as SplitDataFrame;
use crate::optimized::split_dataframe::io::ParquetCompression as SplitParquetCompression;
let mut split_df = SplitDataFrame::new();
for name in &self.column_names {
if let Ok(column_view) = self.column(name) {
let column = column_view.column;
split_df.add_column(name.clone(), column.clone())?;
}
}
if let Some(ref index) = self.index {
match index {
crate::index::DataFrameIndex::Simple(simple_index) => {
split_df.set_index_from_simple_index(simple_index.clone())?;
}
crate::index::DataFrameIndex::Multi(multi_index) => {
split_df.set_index(crate::index::DataFrameIndex::Multi(multi_index.clone()))?;
}
}
}
let split_compression = compression.map(|c| match c {
ParquetCompression::None => SplitParquetCompression::None,
ParquetCompression::Snappy => SplitParquetCompression::Snappy,
ParquetCompression::Gzip => SplitParquetCompression::Gzip,
ParquetCompression::Lzo => SplitParquetCompression::Lzo,
ParquetCompression::Brotli => SplitParquetCompression::Brotli,
ParquetCompression::Lz4 => SplitParquetCompression::Lz4,
ParquetCompression::Zstd => SplitParquetCompression::Zstd,
});
split_df.to_parquet(path, split_compression)
}
#[cfg(feature = "parquet")]
pub fn from_parquet<P: AsRef<Path>>(path: P) -> Result<Self> {
use crate::optimized::split_dataframe::core::OptimizedDataFrame as SplitDataFrame;
let split_df = SplitDataFrame::from_parquet(path)?;
let mut df = Self::new();
for name in split_df.column_names() {
let column_result = split_df.column(name);
if let Ok(column_view) = column_result {
let column = column_view.column;
df.add_column(name.to_string(), column.clone())?;
}
}
if let Some(index) = split_df.get_index() {
df.index = Some(index.clone());
}
Ok(df)
}
pub fn from_json<P: AsRef<Path>>(path: P) -> Result<Self> {
use crate::optimized::split_dataframe::core::OptimizedDataFrame as SplitDataFrame;
use crate::optimized::split_dataframe::serialize::JsonOrient as SplitJsonOrient;
let split_df = SplitDataFrame::from_json(path)?;
let mut df = Self::new();
for name in split_df.column_names() {
let column_result = split_df.column(name);
if let Ok(column_view) = column_result {
let column = column_view.column;
df.add_column(name.to_string(), column.clone())?;
}
}
if let Some(index) = split_df.get_index() {
df.index = Some(index.clone());
}
Ok(df)
}
pub fn to_json<P: AsRef<Path>>(&self, path: P, orient: JsonOrient) -> Result<()> {
use crate::optimized::split_dataframe::core::OptimizedDataFrame as SplitDataFrame;
use crate::optimized::split_dataframe::serialize::JsonOrient as SplitJsonOrient;
let mut split_df = SplitDataFrame::new();
for name in &self.column_names {
if let Ok(column_view) = self.column(name) {
let column = column_view.column;
split_df.add_column(name.clone(), column.clone())?;
}
}
if let Some(ref index) = self.index {
match index {
crate::index::DataFrameIndex::Simple(simple_index) => {
split_df.set_index_from_simple_index(simple_index.clone())?;
}
crate::index::DataFrameIndex::Multi(multi_index) => {
split_df.set_index(crate::index::DataFrameIndex::Multi(multi_index.clone()))?;
}
}
}
let split_orient = match orient {
JsonOrient::Records => SplitJsonOrient::Records,
JsonOrient::Columns => SplitJsonOrient::Columns,
};
split_df.to_json(path, split_orient)
}
pub(super) fn infer_and_create_column(data: &[String], name: &str) -> Column {
if data.is_empty() {
return Column::String(StringColumn::new(Vec::new()));
}
let is_int64 = data
.iter()
.all(|s| s.parse::<i64>().is_ok() || s.trim().is_empty());
if is_int64 {
let int_data: Vec<i64> = data.iter().map(|s| s.parse::<i64>().unwrap_or(0)).collect();
return Column::Int64(Int64Column::new(int_data));
}
let is_float64 = data
.iter()
.all(|s| s.parse::<f64>().is_ok() || s.trim().is_empty());
if is_float64 {
let float_data: Vec<f64> = data
.iter()
.map(|s| s.parse::<f64>().unwrap_or(0.0))
.collect();
return Column::Float64(Float64Column::new(float_data));
}
let bool_values = ["true", "false", "0", "1", "yes", "no", "t", "f"];
let is_boolean = data
.iter()
.all(|s| bool_values.contains(&s.to_lowercase().trim()) || s.trim().is_empty());
if is_boolean {
let bool_data: Vec<bool> = data
.iter()
.map(|s| {
let lower = s.to_lowercase();
let trimmed = lower.trim();
match trimmed {
"true" | "1" | "yes" | "t" => true,
"false" | "0" | "no" | "f" => false,
_ => false, }
})
.collect();
return Column::Boolean(BooleanColumn::new(bool_data));
}
Column::String(StringColumn::new(data.to_vec()))
}
pub(super) fn from_standard_dataframe(df: &crate::dataframe::DataFrame) -> Result<Self> {
crate::optimized::convert::from_standard_dataframe(df)
}
pub fn from_dataframe(df: &crate::dataframe::DataFrame) -> Result<Self> {
Self::from_standard_dataframe(df)
}
pub(super) fn to_standard_dataframe(&self) -> Result<crate::dataframe::DataFrame> {
crate::optimized::convert::to_standard_dataframe(self)
}
}