use half::f16;
use crate::{
datatypes::{
field_from_meta, DataType, Field, FieldMeta, IntervalUnit, MapMeta, RunEndEncodedMeta,
TimeUnit, UnionMode,
},
error::{fail, ErrorKind, Result},
types::{DayTimeInterval, MonthDayNanoInterval},
};
#[allow(unused)]
const _: () = {
trait AssertExpectedTraits: Clone + std::fmt::Debug + PartialEq + Send + Sync {}
impl AssertExpectedTraits for View<'_> {}
};
#[derive(Clone, Debug, PartialEq)]
#[non_exhaustive]
pub enum View<'a> {
Null(NullView),
Boolean(BooleanView<'a>),
Int8(PrimitiveView<'a, i8>),
Int16(PrimitiveView<'a, i16>),
Int32(PrimitiveView<'a, i32>),
Int64(PrimitiveView<'a, i64>),
UInt8(PrimitiveView<'a, u8>),
UInt16(PrimitiveView<'a, u16>),
UInt32(PrimitiveView<'a, u32>),
UInt64(PrimitiveView<'a, u64>),
Float16(PrimitiveView<'a, f16>),
Float32(PrimitiveView<'a, f32>),
Float64(PrimitiveView<'a, f64>),
Date32(PrimitiveView<'a, i32>),
Date64(PrimitiveView<'a, i64>),
Time32(TimeView<'a, i32>),
Time64(TimeView<'a, i64>),
Timestamp(TimestampView<'a>),
Duration(TimeView<'a, i64>),
YearMonthInterval(PrimitiveView<'a, i32>),
DayTimeInterval(PrimitiveView<'a, DayTimeInterval>),
MonthDayNanoInterval(PrimitiveView<'a, MonthDayNanoInterval>),
Utf8(BytesView<'a, i32>),
LargeUtf8(BytesView<'a, i64>),
Utf8View(BytesViewView<'a>),
Binary(BytesView<'a, i32>),
LargeBinary(BytesView<'a, i64>),
FixedSizeBinary(FixedSizeBinaryView<'a>),
BinaryView(BytesViewView<'a>),
Decimal128(DecimalView<'a, i128>),
Struct(StructView<'a>),
List(ListView<'a, i32>),
LargeList(ListView<'a, i64>),
FixedSizeList(FixedSizeListView<'a>),
Dictionary(DictionaryView<'a>),
RunEndEncoded(RunEndEncodedView<'a>),
Map(MapView<'a>),
Union(UnionView<'a>),
}
impl View<'_> {
pub fn data_type(&self) -> DataType {
use DataType as T;
match self {
Self::Null(_) => T::Null,
Self::Boolean(_) => T::Boolean,
Self::Int8(_) => T::Int8,
Self::Int16(_) => T::Int16,
Self::Int32(_) => T::Int32,
Self::Int64(_) => T::Int64,
Self::UInt8(_) => T::UInt8,
Self::UInt16(_) => T::UInt16,
Self::UInt32(_) => T::UInt32,
Self::UInt64(_) => T::UInt64,
Self::Float16(_) => T::Float16,
Self::Float32(_) => T::Float32,
Self::Float64(_) => T::Float64,
Self::Decimal128(arr) => T::Decimal128(arr.precision, arr.scale),
Self::Date32(_) => T::Date32,
Self::Date64(_) => T::Date64,
Self::Time32(arr) => T::Time32(arr.unit),
Self::Time64(arr) => T::Time64(arr.unit),
Self::Timestamp(arr) => T::Timestamp(arr.unit, arr.timezone.clone()),
Self::Duration(arr) => T::Duration(arr.unit),
Self::DayTimeInterval(_) => T::Interval(IntervalUnit::DayTime),
Self::YearMonthInterval(_) => T::Interval(IntervalUnit::YearMonth),
Self::MonthDayNanoInterval(_) => T::Interval(IntervalUnit::MonthDayNano),
Self::Binary(_) => T::Binary,
Self::LargeBinary(_) => T::LargeBinary,
Self::FixedSizeBinary(arr) => T::FixedSizeBinary(arr.n),
Self::BinaryView(_) => T::BinaryView,
Self::Utf8(_) => T::Utf8,
Self::LargeUtf8(_) => T::LargeUtf8,
Self::Utf8View(_) => T::Utf8View,
Self::Dictionary(arr) => T::Dictionary(
Box::new(arr.keys.data_type()),
Box::new(arr.values.data_type()),
),
Self::List(arr) => T::List(Box::new(field_from_meta(
arr.elements.data_type(),
arr.meta.clone(),
))),
Self::LargeList(arr) => T::LargeList(Box::new(field_from_meta(
arr.elements.data_type(),
arr.meta.clone(),
))),
Self::FixedSizeList(arr) => T::FixedSizeList(
Box::new(field_from_meta(arr.elements.data_type(), arr.meta.clone())),
arr.n,
),
Self::Struct(arr) => T::Struct(
arr.fields
.iter()
.map(|(meta, field)| field_from_meta(field.data_type(), meta.clone()))
.collect(),
),
Self::Union(arr) => T::Union(
arr.fields
.iter()
.map(|(type_id, meta, field)| {
(*type_id, field_from_meta(field.data_type(), meta.clone()))
})
.collect(),
match arr.offsets {
Some(_) => UnionMode::Dense,
None => UnionMode::Sparse,
},
),
Self::Map(arr) => T::Map(
Box::new(Field {
name: arr.meta.entries_name.clone(),
data_type: DataType::Struct(vec![
field_from_meta(arr.keys.data_type(), arr.meta.keys.clone()),
field_from_meta(arr.values.data_type(), arr.meta.values.clone()),
]),
..Field::default()
}),
arr.meta.sorted,
),
Self::RunEndEncoded(arr) => T::RunEndEncoded(
Box::new(Field {
name: arr.meta.run_ends_name.clone(),
data_type: arr.run_ends.data_type(),
nullable: false,
metadata: Default::default(),
}),
Box::new(field_from_meta(
arr.values.data_type(),
arr.meta.values.clone(),
)),
),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct BitsWithOffset<'a> {
pub offset: usize,
pub data: &'a [u8],
}
#[derive(Clone, Debug, PartialEq)]
pub struct NullView {
pub len: usize,
}
#[derive(Clone, Debug, PartialEq)]
pub struct BooleanView<'a> {
pub len: usize,
pub validity: Option<BitsWithOffset<'a>>,
pub values: BitsWithOffset<'a>,
}
#[derive(Clone, Debug, PartialEq)]
pub struct PrimitiveView<'a, T> {
pub validity: Option<BitsWithOffset<'a>>,
pub values: &'a [T],
}
#[derive(Debug, Clone, PartialEq)]
pub struct TimeView<'a, T> {
pub unit: TimeUnit,
pub validity: Option<BitsWithOffset<'a>>,
pub values: &'a [T],
}
#[derive(Debug, Clone, PartialEq)]
pub struct TimestampView<'a> {
pub unit: TimeUnit,
pub timezone: Option<String>,
pub validity: Option<BitsWithOffset<'a>>,
pub values: &'a [i64],
}
#[derive(Clone, Debug, PartialEq)]
pub struct StructView<'a> {
pub len: usize,
pub validity: Option<BitsWithOffset<'a>>,
pub fields: Vec<(FieldMeta, View<'a>)>,
}
#[derive(Clone, Debug, PartialEq)]
pub struct MapView<'a> {
pub validity: Option<BitsWithOffset<'a>>,
pub offsets: &'a [i32],
pub meta: MapMeta,
pub keys: Box<View<'a>>,
pub values: Box<View<'a>>,
}
impl<'a> MapView<'a> {
#[allow(unused)]
pub(crate) fn from_logical_view(
entries: View<'a>,
entries_name: String,
sorted: bool,
validity: Option<BitsWithOffset<'a>>,
offsets: &'a [i32],
) -> Result<Self> {
let View::Struct(entries) = entries else {
fail!(ErrorKind::Unsupported, "Expected struct array");
};
let Ok(entries_fields) = <[(FieldMeta, View); 2]>::try_from(entries.fields) else {
fail!(ErrorKind::Unsupported, "Expected two entries");
};
let [(keys_meta, keys_view), (values_meta, values_view)] = entries_fields;
Ok(MapView {
validity,
offsets,
meta: MapMeta {
entries_name,
sorted,
keys: keys_meta,
values: values_meta,
},
keys: Box::new(keys_view),
values: Box::new(values_view),
})
}
}
#[derive(Clone, Debug, PartialEq)]
pub struct ListView<'a, O> {
pub validity: Option<BitsWithOffset<'a>>,
pub offsets: &'a [O],
pub meta: FieldMeta,
pub elements: Box<View<'a>>,
}
#[derive(Clone, Debug, PartialEq)]
pub struct FixedSizeListView<'a> {
pub len: usize,
pub n: i32,
pub validity: Option<BitsWithOffset<'a>>,
pub meta: FieldMeta,
pub elements: Box<View<'a>>,
}
#[derive(Clone, Debug, PartialEq)]
pub struct BytesView<'a, O> {
pub validity: Option<BitsWithOffset<'a>>,
pub offsets: &'a [O],
pub data: &'a [u8],
}
#[derive(Clone, Debug, PartialEq)]
pub struct BytesViewView<'a> {
pub validity: Option<BitsWithOffset<'a>>,
pub data: &'a [u128],
pub buffers: Vec<&'a [u8]>,
}
#[derive(Clone, Debug, PartialEq)]
pub struct FixedSizeBinaryView<'a> {
pub n: i32,
pub validity: Option<BitsWithOffset<'a>>,
pub data: &'a [u8],
}
#[derive(Clone, Debug, PartialEq)]
pub struct DecimalView<'a, T> {
pub precision: u8,
pub scale: i8,
pub validity: Option<BitsWithOffset<'a>>,
pub values: &'a [T],
}
#[derive(Clone, Debug, PartialEq)]
pub struct DictionaryView<'a> {
pub keys: Box<View<'a>>,
pub values: Box<View<'a>>,
}
#[derive(Clone, Debug, PartialEq)]
pub struct UnionView<'a> {
pub types: &'a [i8],
pub offsets: Option<&'a [i32]>,
pub fields: Vec<(i8, FieldMeta, View<'a>)>,
}
#[derive(Clone, Debug, PartialEq)]
pub struct RunEndEncodedView<'a> {
pub meta: RunEndEncodedMeta,
pub run_ends: Box<View<'a>>,
pub values: Box<View<'a>>,
}