use std::{collections::HashSet, sync::Arc};
use arrow_array::{RecordBatch, StructArray};
use arrow_schema::Field;
use pyo3::{
Borrowed, Bound, FromPyObject, IntoPyObject, Py, PyAny, PyErr, PyResult, Python, pyclass,
pymethods,
types::{PyAnyMethods, PyCapsule, PyList, PyListMethods, PyString, PyTuple},
};
use pyo3_arrow::ffi::{to_array_pycapsules, to_schema_pycapsule};
use crate::{
ExcelSheet,
data::{
ExcelSheetData, record_batch_from_data_and_columns_with_skip_rows,
selected_columns_to_schema,
},
error::{
ErrorContext, FastExcelError, FastExcelErrorKind, FastExcelResult, py_errors::IntoPyResult,
},
types::{
dtype::DTypes,
excelsheet::{SelectedColumns, SheetVisible, SkipRows, column_info::ColumnInfo},
idx_or_name::IdxOrName,
},
};
impl TryFrom<&Bound<'_, PyList>> for SelectedColumns {
type Error = FastExcelError;
fn try_from(py_list: &Bound<'_, PyList>) -> FastExcelResult<Self> {
use FastExcelErrorKind::InvalidParameters;
if py_list.is_empty() {
Err(InvalidParameters("list of selected columns is empty".to_string()).into())
} else if let Ok(selection) = py_list.extract::<Vec<IdxOrName>>() {
Ok(Self::Selection(selection))
} else {
Err(
InvalidParameters(format!("expected list[int] | list[str], got {py_list:?}"))
.into(),
)
}
}
}
impl TryFrom<Option<&Bound<'_, PyAny>>> for SelectedColumns {
type Error = FastExcelError;
fn try_from(py_any_opt: Option<&Bound<'_, PyAny>>) -> FastExcelResult<Self> {
match py_any_opt {
None => Ok(Self::All),
Some(py_any) => {
if let Ok(py_str) = py_any.extract::<String>() {
py_str.parse()
} else if let Ok(py_list) = py_any.cast::<PyList>() {
py_list.try_into()
} else if let Ok(py_function) = py_any.extract::<Py<PyAny>>() {
Ok(Self::DynamicSelection(py_function))
} else {
Err(FastExcelErrorKind::InvalidParameters(format!(
"unsupported object type {object_type}",
object_type = py_any.get_type()
))
.into())
}
}
.with_context(|| {
format!("could not determine selected columns from provided object: {py_any}")
}),
}
}
}
impl<'py> IntoPyObject<'py> for &SheetVisible {
type Target = PyString;
type Output = Bound<'py, Self::Target>;
type Error = FastExcelError;
fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
Ok(PyString::new(
py,
match self {
SheetVisible::Visible => "visible",
SheetVisible::Hidden => "hidden",
SheetVisible::VeryHidden => "veryhidden",
},
))
}
}
impl SkipRows {
pub(crate) fn should_skip_row(&self, row_idx: usize, py: Python) -> FastExcelResult<bool> {
match self {
SkipRows::Simple(offset) => Ok(row_idx < *offset),
SkipRows::List(skip_set) => Ok(skip_set.contains(&row_idx)),
SkipRows::Callable(func) => {
let result = func.call1(py, (row_idx,)).map_err(|e| {
FastExcelErrorKind::InvalidParameters(format!(
"Error calling skip_rows function for row {row_idx}: {e}"
))
})?;
result.extract::<bool>(py).map_err(|e| {
FastExcelErrorKind::InvalidParameters(format!(
"skip_rows callable must return bool, got error: {e}"
))
.into()
})
}
SkipRows::SkipEmptyRowsAtBeginning => {
Ok(false)
}
}
}
}
#[derive(Debug, Clone)]
#[pyclass(skip_from_py_object)]
pub(crate) struct CellError {
#[pyo3(get)]
pub position: (usize, usize),
#[pyo3(get)]
pub row_offset: usize,
#[pyo3(get)]
pub detail: String,
}
#[pymethods]
impl CellError {
#[getter]
pub fn offset_position(&self) -> (usize, usize) {
let (row, col) = self.position;
(row - self.row_offset, col)
}
pub fn __repr__(&self) -> String {
let (row, col) = self.position;
let (offset_row, offset_col) = self.offset_position();
format!(
"CellError(position=({row}, {col}), offset_position=({offset_row}, {offset_col}), row_offset={row_offset}, detail={detail:?})",
row_offset = self.row_offset,
detail = &self.detail,
)
}
}
#[pyclass]
pub(crate) struct CellErrors {
pub errors: Vec<CellError>,
}
#[pymethods]
impl CellErrors {
#[getter]
pub fn errors<'p>(&'p self, _py: Python<'p>) -> Vec<CellError> {
self.errors.clone()
}
pub fn __repr__(&self) -> String {
let errors_repr: Vec<String> = self.errors.iter().map(|e| e.__repr__()).collect();
format!("CellErrors(errors=[{}])", errors_repr.join(", "))
}
}
impl<'a, 'py> FromPyObject<'a, 'py> for SkipRows {
type Error = PyErr;
fn extract(obj: Borrowed<'a, 'py, PyAny>) -> Result<Self, Self::Error> {
if obj.is_none() {
return Ok(SkipRows::SkipEmptyRowsAtBeginning);
}
if let Ok(skip_count) = obj.extract::<usize>() {
return Ok(SkipRows::Simple(skip_count));
}
if let Ok(skip_list) = obj.extract::<Vec<usize>>() {
let skip_set: HashSet<usize> = skip_list.into_iter().collect();
return Ok(SkipRows::List(skip_set));
}
if obj.hasattr("__call__").unwrap_or(false) {
return Ok(SkipRows::Callable(Arc::new(obj.to_owned().into())));
}
Err(FastExcelErrorKind::InvalidParameters(
"skip_rows must be int, list of int, callable, or None".to_string(),
)
.into())
.into_pyresult()
}
}
impl TryFrom<&ExcelSheet> for RecordBatch {
type Error = FastExcelError;
fn try_from(sheet: &ExcelSheet) -> FastExcelResult<Self> {
let offset = sheet.offset();
let limit = sheet.limit();
match &sheet.data {
ExcelSheetData::Owned(range) => record_batch_from_data_and_columns_with_skip_rows(
&sheet.selected_columns,
range,
sheet.pagination.skip_rows(),
offset,
limit,
sheet.opts.whitespace_as_null,
),
ExcelSheetData::Ref(range) => record_batch_from_data_and_columns_with_skip_rows(
&sheet.selected_columns,
range,
sheet.pagination.skip_rows(),
offset,
limit,
sheet.opts.whitespace_as_null,
),
}
.with_context(|| format!("could not convert sheet {} to RecordBatch", sheet.name()))
}
}
#[pymethods]
impl ExcelSheet {
#[getter("width")]
pub fn py_width(&mut self) -> usize {
self.width()
}
#[getter("height")]
pub fn py_height(&mut self) -> usize {
self.height()
}
#[getter("total_height")]
pub fn py_total_height(&mut self) -> usize {
self.total_height()
}
#[getter("offset")]
pub fn py_offset(&self) -> usize {
self.offset()
}
#[getter("selected_columns")]
pub fn py_selected_columns(&self) -> Vec<ColumnInfo> {
self.selected_columns().to_owned()
}
#[pyo3(name = "available_columns")]
pub fn py_available_columns(&mut self) -> FastExcelResult<Vec<ColumnInfo>> {
self.available_columns()
}
#[getter("specified_dtypes")]
pub fn py_specified_dtypes(&self) -> Option<&DTypes> {
self.specified_dtypes()
}
#[getter("name")]
pub fn py_name(&self) -> &str {
self.name()
}
#[getter("visible")]
pub fn py_visible<'py>(&'py self, py: Python<'py>) -> FastExcelResult<Bound<'py, PyString>> {
let visible: SheetVisible = self.visible();
(&visible).into_pyobject(py)
}
#[cfg(feature = "pyarrow")]
pub fn to_arrow<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
use pyo3::IntoPyObjectExt;
use crate::error::py_errors::IntoPyResult;
py.detach(|| RecordBatch::try_from(self))
.with_context(|| {
format!(
"could not create RecordBatch from sheet \"{}\"",
self.name()
)
})
.and_then(|rb| {
use arrow_pyarrow::ToPyArrow;
rb.to_pyarrow(py)
.map_err(|err| FastExcelErrorKind::ArrowError(err.to_string()).into())
})
.with_context(|| {
format!(
"could not convert RecordBatch to pyarrow for sheet \"{}\"",
self.name()
)
})
.into_pyresult()
.and_then(|obj| obj.into_bound_py_any(py))
}
#[cfg(feature = "pyarrow")]
pub fn to_arrow_with_errors<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
use arrow_pyarrow::IntoPyArrow;
use pyo3::IntoPyObjectExt;
use crate::data::record_batch_from_data_and_columns_with_errors;
let offset = self.offset();
let limit = self.limit();
let (rb, errors) = py
.detach(|| {
record_batch_from_data_and_columns_with_errors(
&self.selected_columns,
self.data(),
offset,
limit,
self.opts.whitespace_as_null,
)
})
.with_context(|| {
format!(
"could not create RecordBatch from sheet \"{}\"",
self.name()
)
})?;
let rb = rb
.into_pyarrow(py)
.map_err(|err| FastExcelErrorKind::ArrowError(err.to_string()).into())
.with_context(|| {
format!(
"could not convert RecordBatch to pyarrow for sheet \"{}\"",
self.name()
)
})?;
(rb, errors).into_bound_py_any(py)
}
pub fn __arrow_c_schema__<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyCapsule>> {
let schema = selected_columns_to_schema(&self.selected_columns);
Ok(to_schema_pycapsule(py, &schema)?)
}
pub fn __arrow_c_array__<'py>(
&self,
py: Python<'py>,
requested_schema: Option<Bound<'py, PyCapsule>>,
) -> PyResult<Bound<'py, PyTuple>> {
let record_batch = RecordBatch::try_from(self)
.with_context(|| {
format!(
"could not create RecordBatch from sheet \"{}\"",
self.name()
)
})
.into_pyresult()?;
let field = Field::new_struct("", record_batch.schema_ref().fields().clone(), false);
let array = Arc::new(StructArray::from(record_batch));
Ok(to_array_pycapsules(
py,
field.into(),
array.as_ref(),
requested_schema,
)?)
}
pub fn __repr__(&self) -> String {
format!("ExcelSheet<{}>", self.name())
}
}