use arrow_schema::{DataType, IntervalUnit, UnionMode};
#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)]
pub enum PrimitiveType {
Int8,
Int16,
Int32,
Int64,
Int128,
Int256,
UInt8,
UInt16,
UInt32,
UInt64,
Float16,
Float32,
Float64,
}
#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)]
pub enum OffsetType {
Int32,
Int64,
}
#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)]
pub enum BytesType {
Binary,
Utf8,
}
#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)]
pub enum DictionaryKeyType {
Int8,
Int16,
Int32,
Int64,
UInt8,
UInt16,
UInt32,
UInt64,
}
#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)]
pub enum RunEndType {
Int16,
Int32,
Int64,
}
#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)]
pub enum PhysicalType {
Null,
Boolean,
Primitive(PrimitiveType),
FixedSizeBinary(usize),
Bytes(OffsetType, BytesType),
FixedSizeList(usize),
List(OffsetType),
Struct,
Union(UnionMode),
Dictionary(DictionaryKeyType),
Run(RunEndType),
}
impl From<&DataType> for PhysicalType {
fn from(value: &DataType) -> Self {
match value {
DataType::Null => Self::Null,
DataType::Boolean => Self::Boolean,
DataType::Int8 => Self::Primitive(PrimitiveType::Int8),
DataType::Int16 => Self::Primitive(PrimitiveType::Int16),
DataType::Int32 => Self::Primitive(PrimitiveType::Int32),
DataType::Int64 => Self::Primitive(PrimitiveType::Int64),
DataType::UInt8 => Self::Primitive(PrimitiveType::UInt8),
DataType::UInt16 => Self::Primitive(PrimitiveType::UInt16),
DataType::UInt32 => Self::Primitive(PrimitiveType::UInt32),
DataType::UInt64 => Self::Primitive(PrimitiveType::UInt64),
DataType::Float16 => Self::Primitive(PrimitiveType::Float16),
DataType::Float32 => Self::Primitive(PrimitiveType::Float32),
DataType::Float64 => Self::Primitive(PrimitiveType::Float64),
DataType::Timestamp(_, _) => Self::Primitive(PrimitiveType::Int64),
DataType::Date32 => Self::Primitive(PrimitiveType::Int32),
DataType::Date64 => Self::Primitive(PrimitiveType::Int64),
DataType::Time32(_) => Self::Primitive(PrimitiveType::Int32),
DataType::Time64(_) => Self::Primitive(PrimitiveType::Int64),
DataType::Duration(_) => Self::Primitive(PrimitiveType::Int64),
DataType::Decimal128(_, _) => Self::Primitive(PrimitiveType::Int128),
DataType::Decimal256(_, _) => Self::Primitive(PrimitiveType::Int256),
DataType::Interval(IntervalUnit::YearMonth) => {
Self::Primitive(PrimitiveType::Int32)
}
DataType::Interval(IntervalUnit::DayTime) => {
Self::Primitive(PrimitiveType::Int64)
}
DataType::Interval(IntervalUnit::MonthDayNano) => {
Self::Primitive(PrimitiveType::Int128)
}
DataType::FixedSizeBinary(size) => Self::FixedSizeBinary(*size as usize),
DataType::Binary => Self::Bytes(OffsetType::Int32, BytesType::Binary),
DataType::LargeBinary => Self::Bytes(OffsetType::Int64, BytesType::Binary),
DataType::Utf8 => Self::Bytes(OffsetType::Int32, BytesType::Utf8),
DataType::LargeUtf8 => Self::Bytes(OffsetType::Int64, BytesType::Utf8),
DataType::List(_) => Self::List(OffsetType::Int32),
DataType::FixedSizeList(_, size) => Self::FixedSizeList(*size as usize),
DataType::LargeList(_) => Self::List(OffsetType::Int64),
DataType::Struct(_) => Self::Struct,
DataType::Union(_, _, mode) => Self::Union(*mode),
DataType::Dictionary(k, _) => match k.as_ref() {
DataType::Int8 => Self::Dictionary(DictionaryKeyType::Int8),
DataType::Int16 => Self::Dictionary(DictionaryKeyType::Int16),
DataType::Int32 => Self::Dictionary(DictionaryKeyType::Int32),
DataType::Int64 => Self::Dictionary(DictionaryKeyType::Int64),
DataType::UInt8 => Self::Dictionary(DictionaryKeyType::UInt8),
DataType::UInt16 => Self::Dictionary(DictionaryKeyType::UInt16),
DataType::UInt32 => Self::Dictionary(DictionaryKeyType::UInt32),
DataType::UInt64 => Self::Dictionary(DictionaryKeyType::UInt64),
d => panic!("illegal dictionary key data type {d}"),
},
DataType::Map(_, _) => Self::List(OffsetType::Int32),
DataType::RunEndEncoded(f, _) => match f.data_type() {
DataType::Int16 => Self::Run(RunEndType::Int16),
DataType::Int32 => Self::Run(RunEndType::Int32),
DataType::Int64 => Self::Run(RunEndType::Int64),
d => panic!("illegal run end data type {d}"),
},
}
}
}