pub use arrow_buffer::BooleanBufferBuilder;
pub use arrow_buffer::NullBufferBuilder;
mod boolean_builder;
pub use boolean_builder::*;
mod buffer_builder;
pub use buffer_builder::*;
mod fixed_size_binary_builder;
pub use fixed_size_binary_builder::*;
mod fixed_size_list_builder;
pub use fixed_size_list_builder::*;
mod fixed_size_binary_dictionary_builder;
pub use fixed_size_binary_dictionary_builder::*;
mod generic_bytes_builder;
pub use generic_bytes_builder::*;
mod generic_list_builder;
pub use generic_list_builder::*;
mod map_builder;
pub use map_builder::*;
mod null_builder;
pub use null_builder::*;
mod primitive_builder;
pub use primitive_builder::*;
mod primitive_dictionary_builder;
pub use primitive_dictionary_builder::*;
mod primitive_run_builder;
pub use primitive_run_builder::*;
mod struct_builder;
pub use struct_builder::*;
mod generic_bytes_dictionary_builder;
pub use generic_bytes_dictionary_builder::*;
mod generic_byte_run_builder;
pub use generic_byte_run_builder::*;
mod generic_bytes_view_builder;
pub use generic_bytes_view_builder::*;
mod generic_list_view_builder;
pub use generic_list_view_builder::*;
mod union_builder;
pub use union_builder::*;
use crate::types::{Int16Type, Int32Type, Int64Type, Int8Type};
use crate::ArrayRef;
use arrow_schema::{DataType, IntervalUnit, TimeUnit};
use std::any::Any;
pub trait ArrayBuilder: Any + Send + Sync {
fn len(&self) -> usize;
fn is_empty(&self) -> bool {
self.len() == 0
}
fn finish(&mut self) -> ArrayRef;
fn finish_cloned(&self) -> ArrayRef;
fn as_any(&self) -> &dyn Any;
fn as_any_mut(&mut self) -> &mut dyn Any;
fn into_box_any(self: Box<Self>) -> Box<dyn Any>;
}
impl ArrayBuilder for Box<dyn ArrayBuilder> {
fn len(&self) -> usize {
(**self).len()
}
fn is_empty(&self) -> bool {
(**self).is_empty()
}
fn finish(&mut self) -> ArrayRef {
(**self).finish()
}
fn finish_cloned(&self) -> ArrayRef {
(**self).finish_cloned()
}
fn as_any(&self) -> &dyn Any {
(**self).as_any()
}
fn as_any_mut(&mut self) -> &mut dyn Any {
(**self).as_any_mut()
}
fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
self
}
}
pub type ListBuilder<T> = GenericListBuilder<i32, T>;
pub type LargeListBuilder<T> = GenericListBuilder<i64, T>;
pub type ListViewBuilder<T> = GenericListViewBuilder<i32, T>;
pub type LargeListViewBuilder<T> = GenericListViewBuilder<i64, T>;
pub type BinaryBuilder = GenericBinaryBuilder<i32>;
pub type LargeBinaryBuilder = GenericBinaryBuilder<i64>;
pub type StringBuilder = GenericStringBuilder<i32>;
pub type LargeStringBuilder = GenericStringBuilder<i64>;
pub fn make_builder(datatype: &DataType, capacity: usize) -> Box<dyn ArrayBuilder> {
use crate::builder::*;
match datatype {
DataType::Null => Box::new(NullBuilder::new()),
DataType::Boolean => Box::new(BooleanBuilder::with_capacity(capacity)),
DataType::Int8 => Box::new(Int8Builder::with_capacity(capacity)),
DataType::Int16 => Box::new(Int16Builder::with_capacity(capacity)),
DataType::Int32 => Box::new(Int32Builder::with_capacity(capacity)),
DataType::Int64 => Box::new(Int64Builder::with_capacity(capacity)),
DataType::UInt8 => Box::new(UInt8Builder::with_capacity(capacity)),
DataType::UInt16 => Box::new(UInt16Builder::with_capacity(capacity)),
DataType::UInt32 => Box::new(UInt32Builder::with_capacity(capacity)),
DataType::UInt64 => Box::new(UInt64Builder::with_capacity(capacity)),
DataType::Float16 => Box::new(Float16Builder::with_capacity(capacity)),
DataType::Float32 => Box::new(Float32Builder::with_capacity(capacity)),
DataType::Float64 => Box::new(Float64Builder::with_capacity(capacity)),
DataType::Binary => Box::new(BinaryBuilder::with_capacity(capacity, 1024)),
DataType::LargeBinary => Box::new(LargeBinaryBuilder::with_capacity(capacity, 1024)),
DataType::FixedSizeBinary(len) => {
Box::new(FixedSizeBinaryBuilder::with_capacity(capacity, *len))
}
DataType::Decimal128(p, s) => Box::new(
Decimal128Builder::with_capacity(capacity).with_data_type(DataType::Decimal128(*p, *s)),
),
DataType::Decimal256(p, s) => Box::new(
Decimal256Builder::with_capacity(capacity).with_data_type(DataType::Decimal256(*p, *s)),
),
DataType::Utf8 => Box::new(StringBuilder::with_capacity(capacity, 1024)),
DataType::LargeUtf8 => Box::new(LargeStringBuilder::with_capacity(capacity, 1024)),
DataType::Date32 => Box::new(Date32Builder::with_capacity(capacity)),
DataType::Date64 => Box::new(Date64Builder::with_capacity(capacity)),
DataType::Time32(TimeUnit::Second) => {
Box::new(Time32SecondBuilder::with_capacity(capacity))
}
DataType::Time32(TimeUnit::Millisecond) => {
Box::new(Time32MillisecondBuilder::with_capacity(capacity))
}
DataType::Time64(TimeUnit::Microsecond) => {
Box::new(Time64MicrosecondBuilder::with_capacity(capacity))
}
DataType::Time64(TimeUnit::Nanosecond) => {
Box::new(Time64NanosecondBuilder::with_capacity(capacity))
}
DataType::Timestamp(TimeUnit::Second, tz) => Box::new(
TimestampSecondBuilder::with_capacity(capacity)
.with_data_type(DataType::Timestamp(TimeUnit::Second, tz.clone())),
),
DataType::Timestamp(TimeUnit::Millisecond, tz) => Box::new(
TimestampMillisecondBuilder::with_capacity(capacity)
.with_data_type(DataType::Timestamp(TimeUnit::Millisecond, tz.clone())),
),
DataType::Timestamp(TimeUnit::Microsecond, tz) => Box::new(
TimestampMicrosecondBuilder::with_capacity(capacity)
.with_data_type(DataType::Timestamp(TimeUnit::Microsecond, tz.clone())),
),
DataType::Timestamp(TimeUnit::Nanosecond, tz) => Box::new(
TimestampNanosecondBuilder::with_capacity(capacity)
.with_data_type(DataType::Timestamp(TimeUnit::Nanosecond, tz.clone())),
),
DataType::Interval(IntervalUnit::YearMonth) => {
Box::new(IntervalYearMonthBuilder::with_capacity(capacity))
}
DataType::Interval(IntervalUnit::DayTime) => {
Box::new(IntervalDayTimeBuilder::with_capacity(capacity))
}
DataType::Interval(IntervalUnit::MonthDayNano) => {
Box::new(IntervalMonthDayNanoBuilder::with_capacity(capacity))
}
DataType::Duration(TimeUnit::Second) => {
Box::new(DurationSecondBuilder::with_capacity(capacity))
}
DataType::Duration(TimeUnit::Millisecond) => {
Box::new(DurationMillisecondBuilder::with_capacity(capacity))
}
DataType::Duration(TimeUnit::Microsecond) => {
Box::new(DurationMicrosecondBuilder::with_capacity(capacity))
}
DataType::Duration(TimeUnit::Nanosecond) => {
Box::new(DurationNanosecondBuilder::with_capacity(capacity))
}
DataType::List(field) => {
let builder = make_builder(field.data_type(), capacity);
Box::new(ListBuilder::with_capacity(builder, capacity).with_field(field.clone()))
}
DataType::LargeList(field) => {
let builder = make_builder(field.data_type(), capacity);
Box::new(LargeListBuilder::with_capacity(builder, capacity).with_field(field.clone()))
}
DataType::FixedSizeList(field, size) => {
let size = *size;
let values_builder_capacity = {
let size: usize = size.try_into().unwrap();
capacity * size
};
let builder = make_builder(field.data_type(), values_builder_capacity);
Box::new(
FixedSizeListBuilder::with_capacity(builder, size, capacity)
.with_field(field.clone()),
)
}
DataType::ListView(field) => {
let builder = make_builder(field.data_type(), capacity);
Box::new(ListViewBuilder::with_capacity(builder, capacity).with_field(field.clone()))
}
DataType::LargeListView(field) => {
let builder = make_builder(field.data_type(), capacity);
Box::new(
LargeListViewBuilder::with_capacity(builder, capacity).with_field(field.clone()),
)
}
DataType::Map(field, _) => match field.data_type() {
DataType::Struct(fields) => {
let map_field_names = MapFieldNames {
key: fields[0].name().clone(),
value: fields[1].name().clone(),
entry: field.name().clone(),
};
let key_builder = make_builder(fields[0].data_type(), capacity);
let value_builder = make_builder(fields[1].data_type(), capacity);
Box::new(
MapBuilder::with_capacity(
Some(map_field_names),
key_builder,
value_builder,
capacity,
)
.with_keys_field(fields[0].clone())
.with_values_field(fields[1].clone()),
)
}
t => panic!("The field of Map data type {t:?} should have a child Struct field"),
},
DataType::Struct(fields) => Box::new(StructBuilder::from_fields(fields.clone(), capacity)),
t @ DataType::Dictionary(key_type, value_type) => {
macro_rules! dict_builder {
($key_type:ty) => {
match &**value_type {
DataType::Utf8 => {
let dict_builder: StringDictionaryBuilder<$key_type> =
StringDictionaryBuilder::with_capacity(capacity, 256, 1024);
Box::new(dict_builder)
}
DataType::LargeUtf8 => {
let dict_builder: LargeStringDictionaryBuilder<$key_type> =
LargeStringDictionaryBuilder::with_capacity(capacity, 256, 1024);
Box::new(dict_builder)
}
DataType::Binary => {
let dict_builder: BinaryDictionaryBuilder<$key_type> =
BinaryDictionaryBuilder::with_capacity(capacity, 256, 1024);
Box::new(dict_builder)
}
DataType::LargeBinary => {
let dict_builder: LargeBinaryDictionaryBuilder<$key_type> =
LargeBinaryDictionaryBuilder::with_capacity(capacity, 256, 1024);
Box::new(dict_builder)
}
t => panic!("Dictionary value type {t:?} is not currently supported"),
}
};
}
match &**key_type {
DataType::Int8 => dict_builder!(Int8Type),
DataType::Int16 => dict_builder!(Int16Type),
DataType::Int32 => dict_builder!(Int32Type),
DataType::Int64 => dict_builder!(Int64Type),
_ => {
panic!("Data type {t:?} with key type {key_type:?} is not currently supported")
}
}
}
t => panic!("Data type {t:?} is not currently supported"),
}
}