#![deny(missing_docs)]
mod field;
mod physical_type;
mod schema;
pub use field::Field;
pub use physical_type::*;
pub use schema::Schema;
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum DataType {
Null,
Boolean,
Int8,
Int16,
Int32,
Int64,
UInt8,
UInt16,
UInt32,
UInt64,
Float16,
Float32,
Float64,
Timestamp(TimeUnit, Option<String>),
Date32,
Date64,
Time32(TimeUnit),
Time64(TimeUnit),
Duration(TimeUnit),
Interval(IntervalUnit),
Binary,
FixedSizeBinary(usize),
LargeBinary,
Utf8,
LargeUtf8,
List(Box<Field>),
FixedSizeList(Box<Field>, usize),
LargeList(Box<Field>),
Struct(Vec<Field>),
Union(Vec<Field>, Option<Vec<i32>>, UnionMode),
Map(Box<Field>, bool),
Dictionary(IntegerType, Box<DataType>),
Decimal(usize, usize),
Extension(String, Box<DataType>, Option<String>),
}
impl std::fmt::Display for DataType {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "{:?}", self)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum UnionMode {
Dense,
Sparse,
}
impl UnionMode {
pub fn sparse(is_sparse: bool) -> Self {
if is_sparse {
Self::Sparse
} else {
Self::Dense
}
}
pub fn is_sparse(&self) -> bool {
matches!(self, Self::Sparse)
}
pub fn is_dense(&self) -> bool {
matches!(self, Self::Dense)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum TimeUnit {
Second,
Millisecond,
Microsecond,
Nanosecond,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum IntervalUnit {
YearMonth,
DayTime,
MonthDayNano,
}
impl DataType {
pub(crate) fn equals_datatype(&self, other: &DataType) -> bool {
match (&self, other) {
(DataType::List(a), DataType::List(b))
| (DataType::LargeList(a), DataType::LargeList(b)) => {
a.is_nullable() == b.is_nullable() && a.data_type().equals_datatype(b.data_type())
}
(DataType::FixedSizeList(a, a_size), DataType::FixedSizeList(b, b_size)) => {
a_size == b_size
&& a.is_nullable() == b.is_nullable()
&& a.data_type().equals_datatype(b.data_type())
}
(DataType::Struct(a), DataType::Struct(b)) => {
a.len() == b.len()
&& a.iter().zip(b).all(|(a, b)| {
a.is_nullable() == b.is_nullable()
&& a.data_type().equals_datatype(b.data_type())
})
}
_ => self == other,
}
}
pub fn to_physical_type(&self) -> PhysicalType {
use DataType::*;
match self {
Null => PhysicalType::Null,
Boolean => PhysicalType::Boolean,
Int8 => PhysicalType::Primitive(PrimitiveType::Int8),
Int16 => PhysicalType::Primitive(PrimitiveType::Int16),
Int32 | Date32 | Time32(_) | Interval(IntervalUnit::YearMonth) => {
PhysicalType::Primitive(PrimitiveType::Int32)
}
Int64 | Date64 | Timestamp(_, _) | Time64(_) | Duration(_) => {
PhysicalType::Primitive(PrimitiveType::Int64)
}
Decimal(_, _) => PhysicalType::Primitive(PrimitiveType::Int128),
UInt8 => PhysicalType::Primitive(PrimitiveType::UInt8),
UInt16 => PhysicalType::Primitive(PrimitiveType::UInt16),
UInt32 => PhysicalType::Primitive(PrimitiveType::UInt32),
UInt64 => PhysicalType::Primitive(PrimitiveType::UInt64),
Float16 => unreachable!(),
Float32 => PhysicalType::Primitive(PrimitiveType::Float32),
Float64 => PhysicalType::Primitive(PrimitiveType::Float64),
Interval(IntervalUnit::DayTime) => PhysicalType::Primitive(PrimitiveType::DaysMs),
Interval(IntervalUnit::MonthDayNano) => {
PhysicalType::Primitive(PrimitiveType::MonthDayNano)
}
Binary => PhysicalType::Binary,
FixedSizeBinary(_) => PhysicalType::FixedSizeBinary,
LargeBinary => PhysicalType::LargeBinary,
Utf8 => PhysicalType::Utf8,
LargeUtf8 => PhysicalType::LargeUtf8,
List(_) => PhysicalType::List,
FixedSizeList(_, _) => PhysicalType::FixedSizeList,
LargeList(_) => PhysicalType::LargeList,
Struct(_) => PhysicalType::Struct,
Union(_, _, _) => PhysicalType::Union,
Map(_, _) => PhysicalType::Map,
Dictionary(key, _) => PhysicalType::Dictionary(*key),
Extension(_, key, _) => key.to_physical_type(),
}
}
pub fn to_logical_type(&self) -> &DataType {
use DataType::*;
match self {
Extension(_, key, _) => key.to_logical_type(),
_ => self,
}
}
}
impl From<IntegerType> for DataType {
fn from(item: IntegerType) -> Self {
match item {
IntegerType::Int8 => DataType::Int8,
IntegerType::Int16 => DataType::Int16,
IntegerType::Int32 => DataType::Int32,
IntegerType::Int64 => DataType::Int64,
IntegerType::UInt8 => DataType::UInt8,
IntegerType::UInt16 => DataType::UInt16,
IntegerType::UInt32 => DataType::UInt32,
IntegerType::UInt64 => DataType::UInt64,
}
}
}
use std::collections::BTreeMap;
use std::sync::Arc;
pub type SchemaRef = Arc<Schema>;
pub type Metadata = Option<BTreeMap<String, String>>;
pub type Extension = Option<(String, Option<String>)>;
pub fn get_extension(metadata: &Option<BTreeMap<String, String>>) -> Extension {
if let Some(metadata) = metadata {
if let Some(name) = metadata.get("ARROW:extension:name") {
let metadata = metadata.get("ARROW:extension:metadata").cloned();
Some((name.clone(), metadata))
} else {
None
}
} else {
None
}
}