use arrow::array::{Array, ArrayRef};
use arrow::datatypes::DataType;
use crate::ErrorKind;
pub trait Datatype {
const NULLABLE: bool = false;
type Typed: Clone;
type Value<'a>
where
Self: 'a;
type Owned;
fn datatype() -> DataType;
fn matches(actual: &DataType) -> bool {
datatypes_compatible(actual, &Self::datatype())
}
fn downcast(array: &dyn Array) -> Result<Self::Typed, ColumnError>;
fn is_null(typed: &Self::Typed, index: usize) -> bool;
fn value(typed: &Self::Typed, index: usize) -> Self::Value<'_>;
fn build(values: impl Iterator<Item = Option<Self::Owned>>) -> Result<ArrayRef, ColumnError>;
fn to_owned_value(value: Self::Value<'_>) -> Self::Owned;
}
pub trait PrimitiveDatatype: Datatype {
type Native;
fn values(typed: &Self::Typed) -> &[Self::Native];
}
pub trait RefDatatype: Datatype {
type Ref: ?Sized;
fn value_ref(typed: &Self::Typed, index: usize) -> &Self::Ref;
}
#[diagnostic::on_unimplemented(
message = "`{Self}` cannot be built infallibly",
note = "dictionary encoding can fail (key overflow): use `Column::try_from_values` instead of `from_values`"
)]
pub trait InfallibleBuild: Datatype {}
#[derive(Debug, thiserror::Error)]
pub enum ColumnError {
#[error("Expected datatype {expected:?}, found {actual:?}")]
WrongDatatype {
expected: DataType,
actual: DataType,
},
#[error(
"Found {null_count} null(s) at a non-nullable level. Use `Option<…>` in the logical type to allow nulls"
)]
UnexpectedNulls { null_count: usize },
#[error("Failed to build the array: {0}")]
Build(arrow::error::ArrowError),
}
impl ColumnError {
pub fn for_column(self, column: String) -> ErrorKind {
match self {
Self::WrongDatatype { expected, actual } => ErrorKind::WrongDatatype {
column,
expected,
actual,
},
Self::UnexpectedNulls { null_count } => {
ErrorKind::UnexpectedNulls { column, null_count }
}
Self::Build(err) => ErrorKind::BuildRecordBatch(err),
}
}
}
impl From<ColumnError> for arrow::error::ArrowError {
fn from(err: ColumnError) -> Self {
Self::ExternalError(Box::new(err))
}
}
pub(crate) fn downcast_array<A: Array + Clone + 'static>(
array: &dyn Array,
) -> Result<A, ColumnError> {
array
.as_any()
.downcast_ref::<A>()
.cloned()
.ok_or_else(|| ColumnError::WrongDatatype {
expected: DataType::Null, actual: array.data_type().clone(),
})
}
macro_rules! impl_flat_datatype {
($rust:ty, $array:ty, $value:ty, $datatype:expr) => {
impl Datatype for $rust {
type Typed = $array;
type Value<'a> = $value;
type Owned = $rust;
fn datatype() -> DataType {
$datatype
}
fn downcast(array: &dyn Array) -> Result<Self::Typed, ColumnError> {
downcast_array::<$array>(array)
}
fn is_null(typed: &Self::Typed, index: usize) -> bool {
typed.is_null(index)
}
fn value(typed: &Self::Typed, index: usize) -> Self::Value<'_> {
typed.value(index)
}
fn build(
values: impl Iterator<Item = Option<Self::Owned>>,
) -> Result<ArrayRef, ColumnError> {
Ok(std::sync::Arc::new(<$array>::from_iter(values)))
}
fn to_owned_value(value: Self::Value<'_>) -> Self::Owned {
value.into()
}
}
impl crate::datatype::InfallibleBuild for $rust {}
};
}
pub(crate) use impl_flat_datatype;
macro_rules! impl_primitive_datatype {
($logical:ty, $native:ty) => {
impl crate::datatype::PrimitiveDatatype for $logical {
type Native = $native;
fn values(typed: &Self::Typed) -> &[$native] {
typed.values()
}
}
impl crate::datatype::RefDatatype for $logical {
type Ref = $native;
fn value_ref(typed: &Self::Typed, index: usize) -> &$native {
&typed.values()[index]
}
}
};
}
pub(crate) use impl_primitive_datatype;
pub fn datatypes_compatible(actual: &DataType, declared: &DataType) -> bool {
match (actual, declared) {
(DataType::List(actual), DataType::List(declared))
| (DataType::LargeList(actual), DataType::LargeList(declared)) => {
datatypes_compatible(actual.data_type(), declared.data_type())
}
(
DataType::FixedSizeList(actual, actual_size),
DataType::FixedSizeList(declared, declared_size),
) => {
actual_size == declared_size
&& datatypes_compatible(actual.data_type(), declared.data_type())
}
(
DataType::Dictionary(actual_key, actual_value),
DataType::Dictionary(declared_key, declared_value),
) => {
datatypes_compatible(actual_key, declared_key)
&& datatypes_compatible(actual_value, declared_value)
}
_ => actual == declared,
}
}
macro_rules! impl_marker_datatype {
($marker:ty, $array:ty, $value:ty, $owned:ty, $datatype:expr) => {
impl Datatype for $marker {
type Typed = $array;
type Value<'a> = $value;
type Owned = $owned;
fn datatype() -> DataType {
$datatype
}
fn downcast(array: &dyn Array) -> Result<Self::Typed, ColumnError> {
crate::datatype::downcast_array::<$array>(array)
}
fn is_null(typed: &Self::Typed, index: usize) -> bool {
typed.is_null(index)
}
fn value(typed: &Self::Typed, index: usize) -> Self::Value<'_> {
typed.value(index)
}
fn build(
values: impl Iterator<Item = Option<Self::Owned>>,
) -> Result<ArrayRef, ColumnError> {
Ok(std::sync::Arc::new(<$array>::from_iter(values)))
}
fn to_owned_value(value: Self::Value<'_>) -> Self::Owned {
value.into()
}
}
impl crate::datatype::InfallibleBuild for $marker {}
};
}
pub(crate) use impl_marker_datatype;