use half::f16;
use crate::{
datatypes::{
field_from_meta, DataType, Field, FieldMeta, IntervalUnit, MapMeta, RunEndEncodedMeta,
TimeUnit, UnionMode,
},
error::{fail, ErrorKind, Result},
types::{DayTimeInterval, MonthDayNanoInterval},
view::{
BitsWithOffset, BooleanView, BytesView, BytesViewView, DecimalView, DictionaryView,
FixedSizeBinaryView, FixedSizeListView, ListView, MapView, NullView, PrimitiveView,
RunEndEncodedView, StructView, TimeView, TimestampView, UnionView, View,
},
};
#[allow(unused)]
const _: () = {
trait AssertExpectedTraits: Clone + std::fmt::Debug + PartialEq + Send + Sync {}
impl AssertExpectedTraits for Array {}
};
#[derive(Clone, Debug, PartialEq)]
#[non_exhaustive]
pub enum Array {
Null(NullArray),
Boolean(BooleanArray),
Int8(PrimitiveArray<i8>),
Int16(PrimitiveArray<i16>),
Int32(PrimitiveArray<i32>),
Int64(PrimitiveArray<i64>),
UInt8(PrimitiveArray<u8>),
UInt16(PrimitiveArray<u16>),
UInt32(PrimitiveArray<u32>),
UInt64(PrimitiveArray<u64>),
Float16(PrimitiveArray<f16>),
Float32(PrimitiveArray<f32>),
Float64(PrimitiveArray<f64>),
Date32(PrimitiveArray<i32>),
Date64(PrimitiveArray<i64>),
Time32(TimeArray<i32>),
Time64(TimeArray<i64>),
Timestamp(TimestampArray),
Duration(TimeArray<i64>),
YearMonthInterval(PrimitiveArray<i32>),
DayTimeInterval(PrimitiveArray<DayTimeInterval>),
MonthDayNanoInterval(PrimitiveArray<MonthDayNanoInterval>),
Utf8(BytesArray<i32>),
LargeUtf8(BytesArray<i64>),
Utf8View(BytesViewArray),
Binary(BytesArray<i32>),
LargeBinary(BytesArray<i64>),
FixedSizeBinary(FixedSizeBinaryArray),
BinaryView(BytesViewArray),
Decimal128(DecimalArray<i128>),
Struct(StructArray),
List(ListArray<i32>),
LargeList(ListArray<i64>),
FixedSizeList(FixedSizeListArray),
Dictionary(DictionaryArray),
RunEndEncoded(RunEndEncodedArray),
Map(MapArray),
Union(UnionArray),
}
impl Array {
pub fn data_type(&self) -> DataType {
use DataType as T;
match self {
Self::Null(_) => T::Null,
Self::Boolean(_) => T::Boolean,
Self::Int8(_) => T::Int8,
Self::Int16(_) => T::Int16,
Self::Int32(_) => T::Int32,
Self::Int64(_) => T::Int64,
Self::UInt8(_) => T::UInt8,
Self::UInt16(_) => T::UInt16,
Self::UInt32(_) => T::UInt32,
Self::UInt64(_) => T::UInt64,
Self::Float16(_) => T::Float16,
Self::Float32(_) => T::Float32,
Self::Float64(_) => T::Float64,
Self::Decimal128(arr) => T::Decimal128(arr.precision, arr.scale),
Self::Date32(_) => T::Date32,
Self::Date64(_) => T::Date64,
Self::Time32(arr) => T::Time32(arr.unit),
Self::Time64(arr) => T::Time64(arr.unit),
Self::Timestamp(arr) => T::Timestamp(arr.unit, arr.timezone.clone()),
Self::Duration(arr) => T::Duration(arr.unit),
Self::DayTimeInterval(_) => T::Interval(IntervalUnit::DayTime),
Self::YearMonthInterval(_) => T::Interval(IntervalUnit::YearMonth),
Self::MonthDayNanoInterval(_) => T::Interval(IntervalUnit::MonthDayNano),
Self::Binary(_) => T::Binary,
Self::LargeBinary(_) => T::LargeBinary,
Self::FixedSizeBinary(arr) => T::FixedSizeBinary(arr.n),
Self::BinaryView(_) => T::BinaryView,
Self::Utf8(_) => T::Utf8,
Self::LargeUtf8(_) => T::LargeUtf8,
Self::Utf8View(_) => T::Utf8View,
Self::Dictionary(arr) => T::Dictionary(
Box::new(arr.keys.data_type()),
Box::new(arr.values.data_type()),
),
Self::List(arr) => T::List(Box::new(field_from_meta(
arr.elements.data_type(),
arr.meta.clone(),
))),
Self::LargeList(arr) => T::LargeList(Box::new(field_from_meta(
arr.elements.data_type(),
arr.meta.clone(),
))),
Self::FixedSizeList(arr) => T::FixedSizeList(
Box::new(field_from_meta(arr.elements.data_type(), arr.meta.clone())),
arr.n,
),
Self::Struct(arr) => T::Struct(
arr.fields
.iter()
.map(|(meta, field)| field_from_meta(field.data_type(), meta.clone()))
.collect(),
),
Self::Union(arr) => T::Union(
arr.fields
.iter()
.map(|(type_id, meta, field)| {
(*type_id, field_from_meta(field.data_type(), meta.clone()))
})
.collect(),
match arr.offsets {
Some(_) => UnionMode::Dense,
None => UnionMode::Sparse,
},
),
Self::Map(arr) => T::Map(
Box::new(Field {
name: arr.meta.entries_name.clone(),
data_type: DataType::Struct(vec![
field_from_meta(arr.keys.data_type(), arr.meta.keys.clone()),
field_from_meta(arr.values.data_type(), arr.meta.values.clone()),
]),
..Field::default()
}),
arr.meta.sorted,
),
Self::RunEndEncoded(arr) => T::RunEndEncoded(
Box::new(Field {
name: arr.meta.run_ends_name.clone(),
data_type: arr.run_ends.data_type(),
nullable: false,
metadata: Default::default(),
}),
Box::new(field_from_meta(
arr.values.data_type(),
arr.meta.values.clone(),
)),
),
}
}
pub fn as_view(&self) -> View<'_> {
match self {
Self::Null(array) => View::Null(array.as_view()),
Self::Boolean(array) => View::Boolean(array.as_view()),
Self::Int8(array) => View::Int8(array.as_view()),
Self::Int16(array) => View::Int16(array.as_view()),
Self::Int32(array) => View::Int32(array.as_view()),
Self::Int64(array) => View::Int64(array.as_view()),
Self::UInt8(array) => View::UInt8(array.as_view()),
Self::UInt16(array) => View::UInt16(array.as_view()),
Self::UInt32(array) => View::UInt32(array.as_view()),
Self::UInt64(array) => View::UInt64(array.as_view()),
Self::Float16(array) => View::Float16(array.as_view()),
Self::Float32(array) => View::Float32(array.as_view()),
Self::Float64(array) => View::Float64(array.as_view()),
Self::Decimal128(array) => View::Decimal128(array.as_view()),
Self::Date32(array) => View::Date32(array.as_view()),
Self::Date64(array) => View::Date64(array.as_view()),
Self::Time32(array) => View::Time32(array.as_view()),
Self::Time64(array) => View::Time64(array.as_view()),
Self::Timestamp(array) => View::Timestamp(array.as_view()),
Self::Duration(array) => View::Duration(array.as_view()),
Self::YearMonthInterval(array) => View::YearMonthInterval(array.as_view()),
Self::DayTimeInterval(array) => View::DayTimeInterval(array.as_view()),
Self::MonthDayNanoInterval(array) => View::MonthDayNanoInterval(array.as_view()),
Self::Binary(array) => View::Binary(array.as_view()),
Self::LargeBinary(array) => View::LargeBinary(array.as_view()),
Self::FixedSizeBinary(array) => View::FixedSizeBinary(array.as_view()),
Self::BinaryView(array) => View::BinaryView(array.as_view()),
Self::Utf8(array) => View::Utf8(array.as_view()),
Self::LargeUtf8(array) => View::LargeUtf8(array.as_view()),
Self::Utf8View(array) => View::Utf8View(array.as_view()),
Self::List(array) => View::List(array.as_view()),
Self::LargeList(array) => View::LargeList(array.as_view()),
Self::FixedSizeList(array) => View::FixedSizeList(array.as_view()),
Self::Struct(array) => View::Struct(array.as_view()),
Self::Map(array) => View::Map(array.as_view()),
Self::Dictionary(array) => View::Dictionary(array.as_view()),
Self::RunEndEncoded(array) => View::RunEndEncoded(array.as_view()),
Self::Union(array) => View::Union(array.as_view()),
}
}
}
#[derive(Clone, Debug, PartialEq)]
pub struct NullArray {
pub len: usize,
}
impl NullArray {
pub fn as_view(&self) -> NullView {
NullView { len: self.len }
}
}
#[derive(Clone, Debug, PartialEq)]
pub struct BooleanArray {
pub len: usize,
pub validity: Option<Vec<u8>>,
pub values: Vec<u8>,
}
impl BooleanArray {
pub fn as_view(&self) -> BooleanView<'_> {
BooleanView {
len: self.len,
validity: self
.validity
.as_ref()
.map(|data| BitsWithOffset { offset: 0, data }),
values: BitsWithOffset {
offset: 0,
data: &self.values,
},
}
}
}
#[derive(Clone, Debug, PartialEq)]
pub struct PrimitiveArray<T> {
pub validity: Option<Vec<u8>>,
pub values: Vec<T>,
}
impl<T> PrimitiveArray<T> {
pub fn as_view(&self) -> PrimitiveView<'_, T> {
PrimitiveView {
validity: self
.validity
.as_ref()
.map(|data| BitsWithOffset { offset: 0, data }),
values: &self.values,
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct TimeArray<T> {
pub unit: TimeUnit,
pub validity: Option<Vec<u8>>,
pub values: Vec<T>,
}
impl<T> TimeArray<T> {
pub fn as_view(&self) -> TimeView<'_, T> {
TimeView {
unit: self.unit,
validity: self
.validity
.as_ref()
.map(|data| BitsWithOffset { offset: 0, data }),
values: &self.values,
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct TimestampArray {
pub unit: TimeUnit,
pub timezone: Option<String>,
pub validity: Option<Vec<u8>>,
pub values: Vec<i64>,
}
impl TimestampArray {
pub fn as_view(&self) -> TimestampView<'_> {
TimestampView {
unit: self.unit,
timezone: self.timezone.clone(),
validity: self
.validity
.as_ref()
.map(|data| BitsWithOffset { offset: 0, data }),
values: &self.values,
}
}
}
#[derive(Clone, Debug, PartialEq)]
pub struct StructArray {
pub len: usize,
pub validity: Option<Vec<u8>>,
pub fields: Vec<(FieldMeta, Array)>,
}
impl StructArray {
pub fn as_view(&self) -> StructView<'_> {
StructView {
len: self.len,
validity: self
.validity
.as_ref()
.map(|data| BitsWithOffset { offset: 0, data }),
fields: self
.fields
.iter()
.map(|(meta, array)| (meta.clone(), array.as_view()))
.collect(),
}
}
}
#[derive(Clone, Debug, PartialEq)]
pub struct MapArray {
pub validity: Option<Vec<u8>>,
pub offsets: Vec<i32>,
pub meta: MapMeta,
pub keys: Box<Array>,
pub values: Box<Array>,
}
impl MapArray {
pub fn as_view(&self) -> MapView<'_> {
MapView {
validity: self
.validity
.as_ref()
.map(|data| BitsWithOffset { offset: 0, data }),
offsets: &self.offsets,
meta: self.meta.clone(),
keys: Box::new(self.keys.as_view()),
values: Box::new(self.values.as_view()),
}
}
}
impl MapArray {
#[allow(clippy::type_complexity, unused)]
pub(crate) fn into_logical_array(
self,
) -> Result<(Array, String, bool, Option<Vec<u8>>, Vec<i32>)> {
let Some(&last_offset) = self.offsets.last() else {
fail!(ErrorKind::Unsupported, "Invalid map array");
};
let entries = Array::Struct(StructArray {
len: usize::try_from(last_offset)?,
validity: None,
fields: vec![
(self.meta.keys, *self.keys),
(self.meta.values, *self.values),
],
});
Ok((
entries,
self.meta.entries_name,
self.meta.sorted,
self.validity,
self.offsets,
))
}
}
#[derive(Clone, Debug, PartialEq)]
pub struct ListArray<O> {
pub validity: Option<Vec<u8>>,
pub offsets: Vec<O>,
pub meta: FieldMeta,
pub elements: Box<Array>,
}
impl<O> ListArray<O> {
pub fn as_view(&self) -> ListView<'_, O> {
ListView {
validity: self
.validity
.as_ref()
.map(|data| BitsWithOffset { offset: 0, data }),
offsets: &self.offsets,
meta: self.meta.clone(),
elements: Box::new(self.elements.as_view()),
}
}
}
#[derive(Clone, Debug, PartialEq)]
pub struct FixedSizeListArray {
pub len: usize,
pub n: i32,
pub validity: Option<Vec<u8>>,
pub meta: FieldMeta,
pub elements: Box<Array>,
}
impl FixedSizeListArray {
pub fn as_view(&self) -> FixedSizeListView<'_> {
FixedSizeListView {
len: self.len,
n: self.n,
validity: self
.validity
.as_ref()
.map(|data| BitsWithOffset { offset: 0, data }),
meta: self.meta.clone(),
elements: Box::new(self.elements.as_view()),
}
}
}
#[derive(Clone, Debug, PartialEq)]
pub struct BytesArray<O> {
pub validity: Option<Vec<u8>>,
pub offsets: Vec<O>,
pub data: Vec<u8>,
}
impl<O> BytesArray<O> {
pub fn as_view(&self) -> BytesView<'_, O> {
BytesView {
validity: self
.validity
.as_ref()
.map(|data| BitsWithOffset { offset: 0, data }),
offsets: &self.offsets,
data: &self.data,
}
}
}
#[derive(Clone, Debug, PartialEq)]
pub struct BytesViewArray {
pub validity: Option<Vec<u8>>,
pub data: Vec<u128>,
pub buffers: Vec<Vec<u8>>,
}
impl BytesViewArray {
pub fn as_view(&self) -> BytesViewView<'_> {
BytesViewView {
validity: self
.validity
.as_ref()
.map(|data| BitsWithOffset { offset: 0, data }),
data: &self.data,
buffers: self.buffers.iter().map(|b| b.as_slice()).collect(),
}
}
}
#[derive(Clone, Debug, PartialEq)]
pub struct FixedSizeBinaryArray {
pub n: i32,
pub validity: Option<Vec<u8>>,
pub data: Vec<u8>,
}
impl FixedSizeBinaryArray {
pub fn as_view(&self) -> FixedSizeBinaryView<'_> {
FixedSizeBinaryView {
n: self.n,
validity: self
.validity
.as_ref()
.map(|data| BitsWithOffset { offset: 0, data }),
data: &self.data,
}
}
}
#[derive(Clone, Debug, PartialEq)]
pub struct DecimalArray<T> {
pub precision: u8,
pub scale: i8,
pub validity: Option<Vec<u8>>,
pub values: Vec<T>,
}
impl<T> DecimalArray<T> {
pub fn as_view(&self) -> DecimalView<'_, T> {
DecimalView {
precision: self.precision,
scale: self.scale,
validity: self
.validity
.as_ref()
.map(|data| BitsWithOffset { offset: 0, data }),
values: &self.values,
}
}
}
#[derive(Clone, Debug, PartialEq)]
pub struct DictionaryArray {
pub keys: Box<Array>,
pub values: Box<Array>,
}
impl DictionaryArray {
pub fn as_view(&self) -> DictionaryView<'_> {
DictionaryView {
keys: Box::new(self.keys.as_view()),
values: Box::new(self.values.as_view()),
}
}
}
#[derive(Clone, Debug, PartialEq)]
pub struct UnionArray {
pub types: Vec<i8>,
pub offsets: Option<Vec<i32>>,
pub fields: Vec<(i8, FieldMeta, Array)>,
}
impl UnionArray {
pub fn as_view(&self) -> UnionView<'_> {
UnionView {
types: &self.types,
offsets: self.offsets.as_deref(),
fields: self
.fields
.iter()
.map(|(type_id, meta, array)| (*type_id, meta.clone(), array.as_view()))
.collect(),
}
}
}
#[derive(Clone, Debug, PartialEq)]
pub struct RunEndEncodedArray {
pub meta: RunEndEncodedMeta,
pub run_ends: Box<Array>,
pub values: Box<Array>,
}
impl RunEndEncodedArray {
pub fn as_view(&self) -> RunEndEncodedView<'_> {
RunEndEncodedView {
meta: self.meta.clone(),
run_ends: Box::new(self.run_ends.as_view()),
values: Box::new(self.values.as_view()),
}
}
}