#[cfg(feature = "python")]
mod python;
use std::{
fs::File,
io::{BufReader, Cursor},
};
use calamine::{
Data, HeaderRow, Range, Reader, Sheet as CalamineSheet, Sheets, Table, open_workbook_auto,
open_workbook_auto_from_rs,
};
#[cfg(feature = "python")]
use calamine::{DataRef, ReaderRef};
#[cfg(feature = "python")]
use pyo3::pyclass;
use crate::{
ExcelSheet, ExcelTable,
error::{ErrorContext, FastExcelError, FastExcelErrorKind, FastExcelResult},
types::{
dtype::{DTypeCoercion, DTypes},
excelsheet::{SelectedColumns, SkipRows},
idx_or_name::IdxOrName,
},
};
use super::excelsheet::table::{extract_table_names, extract_table_range};
enum ExcelSheets {
File(Sheets<BufReader<File>>),
Bytes(Sheets<Cursor<Vec<u8>>>),
}
impl ExcelSheets {
fn worksheet_range(&mut self, name: &str) -> FastExcelResult<Range<Data>> {
match self {
Self::File(sheets) => sheets.worksheet_range(name),
Self::Bytes(sheets) => sheets.worksheet_range(name),
}
.map_err(|err| FastExcelErrorKind::CalamineError(err).into())
.with_context(|| format!("Error while loading sheet {name}"))
}
#[allow(dead_code)]
fn sheet_metadata(&self) -> &[CalamineSheet] {
match self {
ExcelSheets::File(sheets) => sheets.sheets_metadata(),
ExcelSheets::Bytes(sheets) => sheets.sheets_metadata(),
}
}
fn table_names(&mut self, sheet_name: Option<&str>) -> FastExcelResult<Vec<&str>> {
let names = match self {
Self::File(sheets) => extract_table_names(sheets, sheet_name),
Self::Bytes(sheets) => extract_table_names(sheets, sheet_name),
}?;
Ok(names.into_iter().map(String::as_str).collect())
}
fn defined_names(&mut self) -> FastExcelResult<Vec<DefinedName>> {
let defined_names = match self {
Self::File(sheets) => sheets.defined_names(),
Self::Bytes(sheets) => sheets.defined_names(),
}
.to_vec()
.into_iter()
.map(|(name, formula)| DefinedName { name, formula })
.collect();
Ok(defined_names)
}
#[cfg(feature = "python")]
fn supports_by_ref(&self) -> bool {
matches!(
self,
Self::File(Sheets::Xlsx(_)) | Self::Bytes(Sheets::Xlsx(_))
)
}
fn with_header_row(&mut self, header_row: HeaderRow) -> &mut Self {
match self {
Self::File(sheets) => {
sheets.with_header_row(header_row);
self
}
Self::Bytes(sheets) => {
sheets.with_header_row(header_row);
self
}
}
}
#[cfg(feature = "python")]
fn worksheet_range_ref(&mut self, name: &str) -> FastExcelResult<Range<DataRef<'_>>> {
match self {
ExcelSheets::File(Sheets::Xlsx(sheets)) => Ok(sheets.worksheet_range_ref(name)?),
ExcelSheets::Bytes(Sheets::Xlsx(sheets)) => Ok(sheets.worksheet_range_ref(name)?),
_ => Err(FastExcelErrorKind::Internal(
"sheets do not support worksheet_range_ref".to_string(),
)
.into()),
}
.with_context(|| format!("Error while loading sheet {name}"))
}
fn get_table(&mut self, name: &str) -> FastExcelResult<Table<Data>> {
match self {
Self::File(sheets) => extract_table_range(name, sheets),
Self::Bytes(sheets) => extract_table_range(name, sheets),
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
#[cfg_attr(feature = "python", pyclass(name = "DefinedName", skip_from_py_object))]
pub struct DefinedName {
pub name: String,
pub formula: String,
}
#[non_exhaustive]
#[derive(Debug)]
pub struct LoadSheetOrTableOptions {
pub header_row: Option<usize>,
pub column_names: Option<Vec<String>>,
pub skip_rows: SkipRows,
pub n_rows: Option<usize>,
pub schema_sample_rows: Option<usize>,
pub dtype_coercion: DTypeCoercion,
pub selected_columns: SelectedColumns,
pub dtypes: Option<DTypes>,
pub skip_whitespace_tail_rows: bool,
pub whitespace_as_null: bool,
}
impl LoadSheetOrTableOptions {
fn calamine_header_row(&self) -> HeaderRow {
match (self.header_row, &self.skip_rows) {
(None | Some(0), SkipRows::SkipEmptyRowsAtBeginning) => HeaderRow::FirstNonEmptyRow,
(None, _) => HeaderRow::Row(0),
(Some(row), _) => HeaderRow::Row(row as u32),
}
}
pub(crate) fn data_header_row(&self) -> Option<usize> {
self.header_row.and(Some(0))
}
pub fn new_for_sheet() -> Self {
Self {
header_row: Some(0),
column_names: Default::default(),
skip_rows: Default::default(),
n_rows: Default::default(),
schema_sample_rows: Default::default(),
dtype_coercion: Default::default(),
selected_columns: Default::default(),
dtypes: Default::default(),
skip_whitespace_tail_rows: Default::default(),
whitespace_as_null: Default::default(),
}
}
pub fn new_for_table() -> Self {
Self {
header_row: None,
column_names: Default::default(),
skip_rows: Default::default(),
n_rows: Default::default(),
schema_sample_rows: Default::default(),
dtype_coercion: Default::default(),
selected_columns: Default::default(),
dtypes: Default::default(),
skip_whitespace_tail_rows: Default::default(),
whitespace_as_null: Default::default(),
}
}
pub fn header_row(mut self, header_row: usize) -> Self {
self.header_row = Some(header_row);
self
}
pub fn no_header_row(mut self) -> Self {
self.header_row = None;
self
}
pub fn column_names<I: IntoIterator<Item = impl Into<String>>>(
mut self,
column_names: I,
) -> Self {
self.column_names = Some(column_names.into_iter().map(Into::into).collect());
self
}
pub fn skip_rows(mut self, skip_rows: SkipRows) -> Self {
self.skip_rows = skip_rows;
self
}
pub fn n_rows(mut self, n_rows: usize) -> Self {
self.n_rows = Some(n_rows);
self
}
pub fn schema_sample_rows(mut self, schema_sample_rows: usize) -> Self {
self.schema_sample_rows = Some(schema_sample_rows);
self
}
pub fn dtype_coercion(mut self, dtype_coercion: DTypeCoercion) -> Self {
self.dtype_coercion = dtype_coercion;
self
}
pub fn selected_columns(mut self, selected_columns: SelectedColumns) -> Self {
self.selected_columns = selected_columns;
self
}
pub fn with_dtypes(mut self, dtypes: DTypes) -> Self {
self.dtypes = Some(dtypes);
self
}
pub fn skip_whitespace_tail_rows(mut self, skip_whitespace_tail_rows: bool) -> Self {
self.skip_whitespace_tail_rows = skip_whitespace_tail_rows;
self
}
pub fn whitespace_as_null(mut self, whitespace_as_null: bool) -> Self {
self.whitespace_as_null = whitespace_as_null;
self
}
}
#[cfg_attr(feature = "python", pyclass(name = "_ExcelReader"))]
pub struct ExcelReader {
sheets: ExcelSheets,
sheet_metadata: Vec<CalamineSheet>,
#[cfg(feature = "python")]
source: String,
}
impl ExcelReader {
pub(crate) fn try_from_path(path: &str) -> FastExcelResult<Self> {
let sheets = open_workbook_auto(path)
.map_err(|err| FastExcelErrorKind::CalamineError(err).into())
.with_context(|| format!("Could not open workbook at {path}"))?;
let sheet_metadata = sheets.sheets_metadata().to_owned();
Ok(Self {
sheets: ExcelSheets::File(sheets),
sheet_metadata,
#[cfg(feature = "python")]
source: path.to_owned(),
})
}
fn find_sheet_meta(&self, idx_or_name: IdxOrName) -> FastExcelResult<&CalamineSheet> {
match idx_or_name {
IdxOrName::Name(name) => {
if let Some(sheet) = self.sheet_metadata.iter().find(|s| s.name == name) {
Ok(sheet)
} else {
Err(FastExcelErrorKind::SheetNotFound(IdxOrName::Name(name.clone())).into()).with_context(|| {
let available_sheets = self.sheet_metadata.iter().map(|s| format!("\"{}\"", s.name)).collect::<Vec<_>>().join(", ");
format!(
"Sheet \"{name}\" not found in file. Available sheets: {available_sheets}."
)
})
}
}
IdxOrName::Idx(idx) => self
.sheet_metadata
.get(idx)
.ok_or_else(|| FastExcelErrorKind::SheetNotFound(IdxOrName::Idx(idx)).into())
.with_context(|| {
format!(
"Sheet index {idx} is out of range. File has {} sheets.",
self.sheet_metadata.len()
)
}),
}
}
pub fn load_sheet(
&mut self,
idx_or_name: IdxOrName,
opts: LoadSheetOrTableOptions,
) -> FastExcelResult<ExcelSheet> {
let calamine_header_row = opts.calamine_header_row();
let sheet_meta = self.find_sheet_meta(idx_or_name)?.to_owned();
let range = self
.sheets
.with_header_row(calamine_header_row)
.worksheet_range(&sheet_meta.name)?;
ExcelSheet::try_new(sheet_meta, range.into(), opts)
}
pub fn load_table(
&mut self,
name: &str,
opts: LoadSheetOrTableOptions,
) -> FastExcelResult<ExcelTable> {
let table = self.sheets.get_table(name)?;
ExcelTable::try_new(table, opts)
}
pub fn sheet_names(&self) -> Vec<&str> {
self.sheet_metadata
.iter()
.map(|s| s.name.as_str())
.collect()
}
pub fn table_names(&mut self, sheet_name: Option<&str>) -> FastExcelResult<Vec<&str>> {
self.sheets.table_names(sheet_name)
}
pub fn defined_names(&mut self) -> FastExcelResult<Vec<DefinedName>> {
self.sheets.defined_names()
}
}
impl TryFrom<&[u8]> for ExcelReader {
type Error = FastExcelError;
fn try_from(bytes: &[u8]) -> Result<Self, Self::Error> {
let cursor = Cursor::new(bytes.to_vec());
let sheets = open_workbook_auto_from_rs(cursor)
.map_err(|err| FastExcelErrorKind::CalamineError(err).into())
.with_context(|| "Could not open workbook from bytes")?;
let sheet_metadata = sheets.sheets_metadata().to_owned();
Ok(Self {
sheets: ExcelSheets::Bytes(sheets),
sheet_metadata,
#[cfg(feature = "python")]
source: "bytes".to_owned(),
})
}
}