#![deny(missing_docs)]
mod field;
mod physical_type;
mod schema;
pub use field::Field;
pub use physical_type::*;
pub use schema::Schema;
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum DataType {
Null,
Boolean,
Int8,
Int16,
Int32,
Int64,
UInt8,
UInt16,
UInt32,
UInt64,
Float16,
Float32,
Float64,
Timestamp(TimeUnit, Option<String>),
Date32,
Date64,
Time32(TimeUnit),
Time64(TimeUnit),
Duration(TimeUnit),
Interval(IntervalUnit),
Binary,
FixedSizeBinary(i32),
LargeBinary,
Utf8,
LargeUtf8,
List(Box<Field>),
FixedSizeList(Box<Field>, i32),
LargeList(Box<Field>),
Struct(Vec<Field>),
Union(Vec<Field>, Option<Vec<i32>>, bool),
Map(Box<Field>, bool),
Dictionary(Box<DataType>, Box<DataType>),
Decimal(usize, usize),
Extension(String, Box<DataType>, Option<String>),
}
impl std::fmt::Display for DataType {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "{:?}", self)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum TimeUnit {
Second,
Millisecond,
Microsecond,
Nanosecond,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum IntervalUnit {
YearMonth,
DayTime,
MonthDayNano,
}
impl DataType {
pub(crate) fn equals_datatype(&self, other: &DataType) -> bool {
match (&self, other) {
(DataType::List(a), DataType::List(b))
| (DataType::LargeList(a), DataType::LargeList(b)) => {
a.is_nullable() == b.is_nullable() && a.data_type().equals_datatype(b.data_type())
}
(DataType::FixedSizeList(a, a_size), DataType::FixedSizeList(b, b_size)) => {
a_size == b_size
&& a.is_nullable() == b.is_nullable()
&& a.data_type().equals_datatype(b.data_type())
}
(DataType::Struct(a), DataType::Struct(b)) => {
a.len() == b.len()
&& a.iter().zip(b).all(|(a, b)| {
a.is_nullable() == b.is_nullable()
&& a.data_type().equals_datatype(b.data_type())
})
}
_ => self == other,
}
}
pub fn to_physical_type(&self) -> PhysicalType {
use DataType::*;
match self {
Null => PhysicalType::Null,
Boolean => PhysicalType::Boolean,
Int8 => PhysicalType::Primitive(PrimitiveType::Int8),
Int16 => PhysicalType::Primitive(PrimitiveType::Int16),
Int32 | Date32 | Time32(_) | Interval(IntervalUnit::YearMonth) => {
PhysicalType::Primitive(PrimitiveType::Int32)
}
Int64 | Date64 | Timestamp(_, _) | Time64(_) | Duration(_) => {
PhysicalType::Primitive(PrimitiveType::Int64)
}
Decimal(_, _) => PhysicalType::Primitive(PrimitiveType::Int128),
UInt8 => PhysicalType::Primitive(PrimitiveType::UInt8),
UInt16 => PhysicalType::Primitive(PrimitiveType::UInt16),
UInt32 => PhysicalType::Primitive(PrimitiveType::UInt32),
UInt64 => PhysicalType::Primitive(PrimitiveType::UInt64),
Float16 => unreachable!(),
Float32 => PhysicalType::Primitive(PrimitiveType::Float32),
Float64 => PhysicalType::Primitive(PrimitiveType::Float64),
Interval(IntervalUnit::DayTime) => PhysicalType::Primitive(PrimitiveType::DaysMs),
Interval(IntervalUnit::MonthDayNano) => {
PhysicalType::Primitive(PrimitiveType::MonthDayNano)
}
Binary => PhysicalType::Binary,
FixedSizeBinary(_) => PhysicalType::FixedSizeBinary,
LargeBinary => PhysicalType::LargeBinary,
Utf8 => PhysicalType::Utf8,
LargeUtf8 => PhysicalType::LargeUtf8,
List(_) => PhysicalType::List,
FixedSizeList(_, _) => PhysicalType::FixedSizeList,
LargeList(_) => PhysicalType::LargeList,
Struct(_) => PhysicalType::Struct,
Union(_, _, _) => PhysicalType::Union,
Map(_, _) => PhysicalType::Map,
Dictionary(key, _) => PhysicalType::Dictionary(to_dictionary_index_type(key.as_ref())),
Extension(_, key, _) => key.to_physical_type(),
}
}
pub fn to_logical_type(&self) -> &DataType {
use DataType::*;
match self {
Extension(_, key, _) => key.to_logical_type(),
_ => self,
}
}
}
fn to_dictionary_index_type(data_type: &DataType) -> DictionaryIndexType {
match data_type {
DataType::Int8 => DictionaryIndexType::Int8,
DataType::Int16 => DictionaryIndexType::Int16,
DataType::Int32 => DictionaryIndexType::Int32,
DataType::Int64 => DictionaryIndexType::Int64,
DataType::UInt8 => DictionaryIndexType::UInt8,
DataType::UInt16 => DictionaryIndexType::UInt16,
DataType::UInt32 => DictionaryIndexType::UInt32,
DataType::UInt64 => DictionaryIndexType::UInt64,
_ => ::core::unreachable!("A dictionary key type can only be of integer types"),
}
}
use std::collections::BTreeMap;
use std::sync::Arc;
pub type SchemaRef = Arc<Schema>;
pub type Metadata = Option<BTreeMap<String, String>>;
pub type Extension = Option<(String, Option<String>)>;
pub fn get_extension(metadata: &Option<BTreeMap<String, String>>) -> Extension {
if let Some(metadata) = metadata {
if let Some(name) = metadata.get("ARROW:extension:name") {
let metadata = metadata.get("ARROW:extension:metadata").cloned();
Some((name.clone(), metadata))
} else {
None
}
} else {
None
}
}