use std::collections::HashMap;
use std::path::Path;
use crate::column::{BooleanColumn, Column, Float64Column, Int64Column, StringColumn};
use crate::dataframe::DataFrame;
use crate::error::{Error, Result};
use crate::io::xlsx;
use crate::optimized::split_dataframe::core::OptimizedDataFrame as SplitDataFrame;
use crate::optimized::OptimizedDataFrame;
use crate::series::Series;
#[derive(Debug, Clone)]
pub struct ExcelCell {
pub value: String,
pub formula: Option<String>,
pub data_type: String,
pub format: ExcelCellFormat,
}
#[derive(Debug, Clone)]
pub struct ExcelCellFormat {
pub font_bold: bool,
pub font_italic: bool,
pub font_color: Option<String>,
pub background_color: Option<String>,
pub number_format: Option<String>,
}
impl Default for ExcelCellFormat {
fn default() -> Self {
Self {
font_bold: false,
font_italic: false,
font_color: None,
background_color: None,
number_format: None,
}
}
}
#[derive(Debug, Clone)]
pub struct NamedRange {
pub name: String,
pub sheet_name: String,
pub range: String,
pub comment: Option<String>,
}
#[derive(Debug, Clone)]
pub struct ExcelReadOptions {
pub preserve_formulas: bool,
pub include_formatting: bool,
pub read_named_ranges: bool,
pub use_memory_map: bool,
pub optimize_memory: bool,
}
impl Default for ExcelReadOptions {
fn default() -> Self {
Self {
preserve_formulas: false,
include_formatting: false,
read_named_ranges: false,
use_memory_map: true,
optimize_memory: true,
}
}
}
#[derive(Debug, Clone)]
pub struct ExcelWriteOptions {
pub preserve_formulas: bool,
pub apply_formatting: bool,
pub write_named_ranges: bool,
pub protect_sheets: bool,
pub optimize_large_files: bool,
}
impl Default for ExcelWriteOptions {
fn default() -> Self {
Self {
preserve_formulas: false,
apply_formatting: false,
write_named_ranges: false,
protect_sheets: false,
optimize_large_files: false,
}
}
}
#[derive(Debug, Clone)]
pub struct ExcelWorkbookInfo {
pub sheet_names: Vec<String>,
pub sheet_count: usize,
pub total_cells: usize,
}
#[derive(Debug, Clone)]
pub struct ExcelSheetInfo {
pub name: String,
pub rows: usize,
pub columns: usize,
pub range: String,
}
#[derive(Debug, Clone)]
pub struct ExcelFileAnalysis {
pub workbook_info: ExcelWorkbookInfo,
pub formula_count: usize,
pub formatted_cell_count: usize,
pub named_range_count: usize,
pub complexity_score: f64,
}
pub fn read_excel<P: AsRef<Path>>(
path: P,
sheet_name: Option<&str>,
header: bool,
skip_rows: usize,
use_cols: Option<&[&str]>,
) -> Result<DataFrame> {
let split = xlsx::read_split_dataframe(path.as_ref(), sheet_name, header, skip_rows, use_cols)?;
split_to_standard(&split)
}
pub fn write_excel<P: AsRef<Path>>(
df: &OptimizedDataFrame,
path: P,
sheet_name: Option<&str>,
index: bool,
) -> Result<()> {
let split = optimized_to_split(df)?;
xlsx::write_split_dataframe(&split, path.as_ref(), sheet_name, index)
}
pub fn list_sheet_names<P: AsRef<Path>>(path: P) -> Result<Vec<String>> {
xlsx::list_sheets(path.as_ref())
}
pub fn get_workbook_info<P: AsRef<Path>>(path: P) -> Result<ExcelWorkbookInfo> {
let dims = xlsx::sheet_dimensions(path.as_ref())?;
let sheet_names: Vec<String> = dims.iter().map(|d| d.0.clone()).collect();
let total_cells = dims.iter().map(|d| d.1 * d.2).sum();
Ok(ExcelWorkbookInfo {
sheet_names: sheet_names.clone(),
sheet_count: sheet_names.len(),
total_cells,
})
}
pub fn get_sheet_info<P: AsRef<Path>>(path: P, sheet_name: &str) -> Result<ExcelSheetInfo> {
let dims = xlsx::sheet_dimensions(path.as_ref())?;
let (_, rows, cols) = dims
.iter()
.find(|(n, _, _)| n == sheet_name)
.cloned()
.ok_or_else(|| {
Error::IoError(format!("Could not find sheet '{sheet_name}' in workbook"))
})?;
let last_col_letter = if cols == 0 {
'A'
} else {
let letter_idx = cols.saturating_sub(1) as u32;
if letter_idx <= 25 {
char::from_u32(b'A' as u32 + letter_idx).unwrap_or('Z')
} else {
'Z'
}
};
let range = format!("A1:{last_col_letter}{rows}");
Ok(ExcelSheetInfo {
name: sheet_name.to_string(),
rows,
columns: cols,
range,
})
}
pub fn read_excel_sheets<P: AsRef<Path>>(
path: P,
sheet_names: Option<&[&str]>,
header: bool,
skip_rows: usize,
use_cols: Option<&[&str]>,
) -> Result<HashMap<String, DataFrame>> {
let mut all = xlsx::read_all_sheets(path.as_ref(), header, skip_rows, use_cols)?;
let mut out = HashMap::new();
let names: Vec<String> = match sheet_names {
Some(wanted) => {
for &n in wanted {
if !all.contains_key(n) {
return Err(Error::IoError(format!(
"Sheet '{n}' not found. Available sheets: {:?}",
all.keys().collect::<Vec<_>>()
)));
}
}
wanted.iter().map(|s| (*s).to_string()).collect()
}
None => all.keys().cloned().collect(),
};
for name in names {
if let Some(split) = all.remove(&name) {
out.insert(name, split_to_standard(&split)?);
}
}
Ok(out)
}
pub fn read_excel_with_info<P: AsRef<Path>>(
path: P,
sheet_name: Option<&str>,
header: bool,
skip_rows: usize,
use_cols: Option<&[&str]>,
) -> Result<(DataFrame, ExcelWorkbookInfo)> {
let df = read_excel(path.as_ref(), sheet_name, header, skip_rows, use_cols)?;
let info = get_workbook_info(path.as_ref())?;
Ok((df, info))
}
pub fn write_excel_sheets<P: AsRef<Path>>(
sheets: &HashMap<String, &OptimizedDataFrame>,
path: P,
index: bool,
) -> Result<()> {
let materialised: Vec<(String, SplitDataFrame)> = sheets
.iter()
.map(|(name, df)| {
let split = optimized_to_split(df)?;
Ok::<_, Error>((name.clone(), split))
})
.collect::<Result<Vec<_>>>()?;
let refs: Vec<(String, &SplitDataFrame)> =
materialised.iter().map(|(n, d)| (n.clone(), d)).collect();
xlsx::write_split_dataframe_sheets(&refs, path.as_ref(), index)
}
pub fn read_excel_enhanced<P: AsRef<Path>>(
path: P,
sheet_name: Option<&str>,
_options: ExcelReadOptions,
) -> Result<(DataFrame, Vec<ExcelCell>, Vec<NamedRange>)> {
let df = read_excel(path.as_ref(), sheet_name, true, 0, None)?;
Ok((df, Vec::new(), Vec::new()))
}
pub fn write_excel_enhanced<P: AsRef<Path>>(
df: &OptimizedDataFrame,
path: P,
sheet_name: Option<&str>,
_cells: &[ExcelCell],
_named_ranges: &[NamedRange],
_options: ExcelWriteOptions,
) -> Result<()> {
write_excel(df, path, sheet_name, false)
}
pub fn optimize_excel_file<P1: AsRef<Path>, P2: AsRef<Path>>(
input_path: P1,
output_path: P2,
_compression_level: u8,
) -> Result<()> {
let df = read_excel(input_path.as_ref(), None, true, 0, None)?;
let optimized_df = OptimizedDataFrame::from_dataframe(&df)?;
write_excel(&optimized_df, output_path.as_ref(), None, false)
}
pub fn analyze_excel_file<P: AsRef<Path>>(path: P) -> Result<ExcelFileAnalysis> {
let workbook_info = get_workbook_info(path.as_ref())?;
let complexity_score = workbook_info.total_cells as f64 * 0.1;
Ok(ExcelFileAnalysis {
workbook_info,
formula_count: 0,
formatted_cell_count: 0,
named_range_count: 0,
complexity_score,
})
}
fn split_to_standard(split: &SplitDataFrame) -> Result<DataFrame> {
let mut df = DataFrame::new();
for (col, col_name) in split.columns.iter().zip(split.column_names.iter()) {
let strings: Vec<String> = column_to_strings(col);
let series = Series::new(strings, Some(col_name.clone()))?;
df.add_column(col_name.clone(), series)?;
}
Ok(df)
}
fn column_to_strings(col: &Column) -> Vec<String> {
match col {
Column::Int64(c) => (0..c.len())
.map(|i| match c.get(i) {
Ok(Some(v)) => v.to_string(),
_ => String::new(),
})
.collect(),
Column::Float64(c) => (0..c.len())
.map(|i| match c.get(i) {
Ok(Some(v)) => v.to_string(),
_ => String::new(),
})
.collect(),
Column::String(c) => (0..c.len())
.map(|i| match c.get(i) {
Ok(Some(v)) => v.to_string(),
_ => String::new(),
})
.collect(),
Column::Boolean(c) => (0..c.len())
.map(|i| match c.get(i) {
Ok(Some(v)) => v.to_string(),
_ => String::new(),
})
.collect(),
}
}
fn optimized_to_split(df: &OptimizedDataFrame) -> Result<SplitDataFrame> {
let mut split = SplitDataFrame::new();
for name in df.column_names() {
let view = df.column(name)?;
split.add_column(name.clone(), view.column().clone())?;
}
Ok(split)
}
use crate::column::ColumnTrait;
#[allow(dead_code)]
fn _unused_helpers(_: &StringColumn, _: &Int64Column, _: &Float64Column, _: &BooleanColumn) {}