tuitab 0.4.2

Terminal tabular data explorer — CSV/JSON/Parquet/Excel/SQLite viewer with filtering, sorting, pivot tables, and charts
use crate::data::dataframe::DataFrame;
use crate::data::io::wrap_polars_df;
use color_eyre::{eyre::eyre, Result};
use polars::prelude::*;
use std::path::Path;

pub(super) fn load_excel(path: &Path) -> Result<DataFrame> {
    use calamine::{open_workbook_auto, Reader};

    let mut workbook = open_workbook_auto(path)?;
    let sheet_names = workbook.sheet_names().to_owned();
    if sheet_names.is_empty() {
        return Err(eyre!("Excel file is empty"));
    }
    let range = workbook
        .worksheet_range(&sheet_names[0])
        .ok_or_else(|| eyre!("Cannot read first sheet"))??;

    parse_excel_range(range)
}

pub fn load_excel_sheet_by_name(path: &Path, sheet_name: &str) -> Result<DataFrame> {
    use calamine::{open_workbook_auto, Reader};

    let mut workbook = open_workbook_auto(path)?;
    let range = workbook
        .worksheet_range(sheet_name)
        .ok_or_else(|| eyre!("Sheet '{}' not found", sheet_name))??;

    parse_excel_range(range)
}

pub fn load_excel_overview(path: &Path) -> Result<DataFrame> {
    let names = excel_sheet_names(path)?;
    if names.is_empty() {
        return Err(eyre!("Excel file has no sheets"));
    }
    let pdf =
        polars::prelude::DataFrame::new_infer_height(vec![
            Series::new("Sheet".into(), &names).into()
        ])?;
    let mut df = wrap_polars_df(pdf)?;
    if !df.columns.is_empty() {
        df.columns[0].width = 40;
    }
    Ok(df)
}

pub fn excel_sheet_names(path: &Path) -> Result<Vec<String>> {
    use calamine::{open_workbook_auto, Reader};
    let workbook = open_workbook_auto(path)?;
    Ok(workbook.sheet_names().to_owned())
}

pub(super) fn save_xlsx(df: &DataFrame, path: &Path) -> Result<()> {
    use rust_xlsxwriter::{Format, Workbook};

    let ordered_df = df.to_display_polars_df();
    let col_names: Vec<String> = ordered_df
        .get_column_names()
        .iter()
        .map(|s| s.to_string())
        .collect();

    let mut workbook = Workbook::new();
    let sheet = workbook.add_worksheet();
    let header_fmt = Format::new().set_bold();

    for (ci, name) in col_names.iter().enumerate() {
        sheet
            .write_string_with_format(0, ci as u16, name, &header_fmt)
            .map_err(|e| eyre!("{}", e))?;
    }

    let nrows = ordered_df.height();
    let ncols = col_names.len();
    for row_idx in 0..nrows {
        for ci in 0..ncols {
            let series = &ordered_df.columns()[ci];
            let cell_text = series
                .get(row_idx)
                .map(|v| {
                    let s = format!("{}", v);
                    if s.starts_with('"') && s.ends_with('"') {
                        s[1..s.len() - 1].to_string()
                    } else {
                        s
                    }
                })
                .unwrap_or_default();
            if let Ok(n) = cell_text.parse::<f64>() {
                sheet
                    .write_number((row_idx + 1) as u32, ci as u16, n)
                    .map_err(|e| eyre!("{}", e))?;
            } else {
                sheet
                    .write_string((row_idx + 1) as u32, ci as u16, &cell_text)
                    .map_err(|e| eyre!("{}", e))?;
            }
        }
    }

    workbook.save(path).map_err(|e| eyre!("{}", e))?;
    Ok(())
}

fn parse_excel_range(range: calamine::Range<calamine::DataType>) -> Result<DataFrame> {
    let all_rows: Vec<Vec<String>> = range
        .rows()
        .map(|row| row.iter().map(|c| c.to_string()).collect())
        .collect();

    let mut iter = all_rows.into_iter();
    let header_row = iter
        .next()
        .ok_or_else(|| eyre!("Excel sheet has no headers"))?;

    let mut seen: std::collections::HashMap<String, usize> = std::collections::HashMap::new();
    let headers: Vec<String> = header_row
        .into_iter()
        .enumerate()
        .map(|(i, h)| {
            let base = if h.is_empty() {
                format!("column_{}", i + 1)
            } else {
                h
            };
            let count = seen.entry(base.clone()).or_insert(0);
            *count += 1;
            if *count == 1 {
                base
            } else {
                format!("{}_{}", base, count)
            }
        })
        .collect();

    let col_count = headers.len();
    let mut cols_data: Vec<Vec<String>> = vec![Vec::new(); col_count];

    for row in iter {
        for (i, cell) in row.into_iter().enumerate() {
            if i < col_count {
                cols_data[i].push(cell);
            }
        }
    }

    let mut series_vec = Vec::new();
    for (i, col_data) in cols_data.into_iter().enumerate() {
        series_vec.push(Series::new(headers[i].as_str().into(), &col_data).into());
    }

    let pdf = polars::prelude::DataFrame::new_infer_height(series_vec)?;
    wrap_polars_df(pdf)
}