use arrow::array::{Array, ArrayRef};
use arrow::datatypes::DataType;
use crate::ErrorKind;
pub trait LogicalType {
const NULLABLE: bool = false;
type Typed: Clone;
type Value<'a>
where
Self: 'a;
type Owned;
fn downcast(array: &dyn Array) -> Result<Self::Typed, ColumnError>;
fn is_null(typed: &Self::Typed, index: usize) -> bool;
unsafe fn is_null_unchecked(typed: &Self::Typed, index: usize) -> bool {
Self::is_null(typed, index)
}
fn value(typed: &Self::Typed, index: usize) -> Self::Value<'_>;
unsafe fn value_unchecked(typed: &Self::Typed, index: usize) -> Self::Value<'_> {
Self::value(typed, index)
}
fn to_owned_value(value: Self::Value<'_>) -> Self::Owned;
}
pub trait ConcreteType: LogicalType {
fn datatype() -> DataType;
fn build(values: impl Iterator<Item = Option<Self::Owned>>) -> Result<ArrayRef, ColumnError>;
}
pub trait PrimitiveType: LogicalType {
type Native;
fn values(typed: &Self::Typed) -> &[Self::Native];
}
pub trait RefType: LogicalType {
type Ref: ?Sized;
fn value_ref(typed: &Self::Typed, index: usize) -> &Self::Ref;
}
#[diagnostic::on_unimplemented(
message = "`{Self}` cannot be built infallibly",
note = "dictionary/run encoding can fail (overflow): use `Column::try_from_values` instead of `from_values`"
)]
pub trait InfallibleBuild: ConcreteType {}
#[derive(Debug, thiserror::Error)]
pub enum ColumnError {
#[error("Expected {expected}, found {actual:?}")]
WrongDatatype {
expected: String,
actual: DataType,
},
#[error(
"Found {null_count} null(s) at a non-nullable level. Use `Option<…>` in the logical type to allow nulls"
)]
UnexpectedNulls { null_count: usize },
#[error("Failed to build the array: {0}")]
Build(arrow::error::ArrowError),
}
impl ColumnError {
pub fn for_column(self, column: String) -> ErrorKind {
match self {
Self::WrongDatatype { expected, actual } => ErrorKind::WrongDatatype {
column,
expected,
actual,
},
Self::UnexpectedNulls { null_count } => {
ErrorKind::UnexpectedNulls { column, null_count }
}
Self::Build(err) => ErrorKind::BuildRecordBatch(err),
}
}
}
impl From<ColumnError> for arrow::error::ArrowError {
fn from(err: ColumnError) -> Self {
Self::ExternalError(Box::new(err))
}
}
pub(crate) fn downcast_array<A: Array + Clone + 'static>(
array: &dyn Array,
expected: impl FnOnce() -> String,
) -> Result<A, ColumnError> {
array
.as_any()
.downcast_ref::<A>()
.cloned()
.ok_or_else(|| ColumnError::WrongDatatype {
expected: expected(),
actual: array.data_type().clone(),
})
}
#[inline]
pub(crate) unsafe fn leaf_is_null_unchecked(array: &dyn Array, index: usize) -> bool {
array
.nulls()
.is_some_and(|nulls| unsafe { !nulls.inner().value_unchecked(index) })
}
macro_rules! impl_flat_datatype {
($rust:ty, $array:ty, $value:ty, $datatype:expr) => {
impl LogicalType for $rust {
type Typed = $array;
type Value<'a> = $value;
type Owned = $rust;
fn downcast(array: &dyn Array) -> Result<Self::Typed, ColumnError> {
downcast_array::<$array>(array, || format!("{:?}", $datatype))
}
#[inline]
fn is_null(typed: &Self::Typed, index: usize) -> bool {
typed.is_null(index)
}
#[inline]
unsafe fn is_null_unchecked(typed: &Self::Typed, index: usize) -> bool {
unsafe { crate::datatype::leaf_is_null_unchecked(typed, index) }
}
#[inline]
fn value(typed: &Self::Typed, index: usize) -> Self::Value<'_> {
typed.value(index)
}
#[inline]
unsafe fn value_unchecked(typed: &Self::Typed, index: usize) -> Self::Value<'_> {
unsafe { typed.value_unchecked(index) }
}
fn to_owned_value(value: Self::Value<'_>) -> Self::Owned {
value.into()
}
}
impl crate::datatype::ConcreteType for $rust {
fn datatype() -> DataType {
$datatype
}
fn build(
values: impl Iterator<Item = Option<Self::Owned>>,
) -> Result<ArrayRef, ColumnError> {
Ok(std::sync::Arc::new(<$array>::from_iter(values)))
}
}
impl crate::datatype::InfallibleBuild for $rust {}
};
}
pub(crate) use impl_flat_datatype;
macro_rules! impl_primitive_datatype {
($logical:ty, $native:ty) => {
impl crate::datatype::PrimitiveType for $logical {
type Native = $native;
#[inline]
fn values(typed: &Self::Typed) -> &[$native] {
typed.values()
}
}
impl crate::datatype::RefType for $logical {
type Ref = $native;
#[inline]
fn value_ref(typed: &Self::Typed, index: usize) -> &$native {
&typed.values()[index]
}
}
};
}
pub(crate) use impl_primitive_datatype;
macro_rules! impl_marker_datatype {
($marker:ty, $array:ty, $value:ty, $owned:ty, $datatype:expr) => {
impl LogicalType for $marker {
type Typed = $array;
type Value<'a> = $value;
type Owned = $owned;
fn downcast(array: &dyn Array) -> Result<Self::Typed, ColumnError> {
crate::datatype::downcast_array::<$array>(array, || format!("{:?}", $datatype))
}
#[inline]
fn is_null(typed: &Self::Typed, index: usize) -> bool {
typed.is_null(index)
}
#[inline]
unsafe fn is_null_unchecked(typed: &Self::Typed, index: usize) -> bool {
unsafe { crate::datatype::leaf_is_null_unchecked(typed, index) }
}
#[inline]
fn value(typed: &Self::Typed, index: usize) -> Self::Value<'_> {
typed.value(index)
}
#[inline]
unsafe fn value_unchecked(typed: &Self::Typed, index: usize) -> Self::Value<'_> {
unsafe { typed.value_unchecked(index) }
}
fn to_owned_value(value: Self::Value<'_>) -> Self::Owned {
value.into()
}
}
impl crate::datatype::ConcreteType for $marker {
fn datatype() -> DataType {
$datatype
}
fn build(
values: impl Iterator<Item = Option<Self::Owned>>,
) -> Result<ArrayRef, ColumnError> {
Ok(std::sync::Arc::new(<$array>::from_iter(values)))
}
}
impl crate::datatype::InfallibleBuild for $marker {}
};
}
pub(crate) use impl_marker_datatype;