datafusion_table_providers/sql/arrow_sql_gen/
arrow.rs

1use datafusion::arrow::{
2    array::{
3        types::Int8Type, ArrayBuilder, BinaryBuilder, BooleanBuilder, Date32Builder, Date64Builder,
4        Decimal128Builder, Decimal256Builder, FixedSizeBinaryBuilder, FixedSizeListBuilder,
5        Float32Builder, Float64Builder, Int16Builder, Int32Builder, Int64Builder, Int8Builder,
6        IntervalMonthDayNanoBuilder, LargeBinaryBuilder, LargeStringBuilder, ListBuilder,
7        NullBuilder, StringBuilder, StringDictionaryBuilder, StructBuilder,
8        Time64NanosecondBuilder, TimestampMicrosecondBuilder, TimestampMillisecondBuilder,
9        TimestampNanosecondBuilder, TimestampSecondBuilder, UInt16Builder, UInt32Builder,
10        UInt64Builder, UInt8Builder,
11    },
12    datatypes::{DataType, TimeUnit, UInt16Type},
13};
14
15pub fn map_data_type_to_array_builder_optional(
16    data_type: Option<&DataType>,
17) -> Option<Box<dyn ArrayBuilder>> {
18    match data_type {
19        Some(data_type) => Some(map_data_type_to_array_builder(data_type)),
20        None => None,
21    }
22}
23
24#[allow(clippy::too_many_lines)]
25pub fn map_data_type_to_array_builder(data_type: &DataType) -> Box<dyn ArrayBuilder> {
26    match data_type {
27        DataType::Int8 => Box::new(Int8Builder::new()),
28        DataType::Int16 => Box::new(Int16Builder::new()),
29        DataType::Int32 => Box::new(Int32Builder::new()),
30        DataType::Int64 => Box::new(Int64Builder::new()),
31        DataType::UInt8 => Box::new(UInt8Builder::new()),
32        DataType::UInt16 => Box::new(UInt16Builder::new()),
33        DataType::UInt32 => Box::new(UInt32Builder::new()),
34        DataType::UInt64 => Box::new(UInt64Builder::new()),
35        DataType::Float32 => Box::new(Float32Builder::new()),
36        DataType::Float64 => Box::new(Float64Builder::new()),
37        DataType::Utf8 => Box::new(StringBuilder::new()),
38        DataType::LargeUtf8 => Box::new(LargeStringBuilder::new()),
39        DataType::Boolean => Box::new(BooleanBuilder::new()),
40        DataType::Binary => Box::new(BinaryBuilder::new()),
41        DataType::LargeBinary => Box::new(LargeBinaryBuilder::new()),
42        DataType::Interval(_) => Box::new(IntervalMonthDayNanoBuilder::new()),
43        DataType::Decimal128(precision, scale) => Box::new(
44            Decimal128Builder::new()
45                .with_precision_and_scale(*precision, *scale)
46                .unwrap_or_default(),
47        ),
48        DataType::Decimal256(precision, scale) => Box::new(
49            Decimal256Builder::new()
50                .with_precision_and_scale(*precision, *scale)
51                .unwrap_or_default(),
52        ),
53        DataType::Timestamp(time_unit, time_zone) => match time_unit {
54            TimeUnit::Microsecond => {
55                Box::new(TimestampMicrosecondBuilder::new().with_timezone_opt(time_zone.clone()))
56            }
57            TimeUnit::Second => {
58                Box::new(TimestampSecondBuilder::new().with_timezone_opt(time_zone.clone()))
59            }
60            TimeUnit::Millisecond => {
61                Box::new(TimestampMillisecondBuilder::new().with_timezone_opt(time_zone.clone()))
62            }
63            TimeUnit::Nanosecond => {
64                Box::new(TimestampNanosecondBuilder::new().with_timezone_opt(time_zone.clone()))
65            }
66        },
67        DataType::Dictionary(ref key_type, ref value_type) => match (&**key_type, &**value_type) {
68            (DataType::Int8, DataType::Utf8) => {
69                Box::new(StringDictionaryBuilder::<Int8Type>::new())
70            }
71            (DataType::UInt16, DataType::Utf8) => {
72                Box::new(StringDictionaryBuilder::<UInt16Type>::new())
73            }
74            _ => unimplemented!("Unimplemented dictionary type"),
75        },
76        DataType::Date32 => Box::new(Date32Builder::new()),
77        DataType::Date64 => Box::new(Date64Builder::new()),
78        // For time format, always use nanosecond
79        DataType::Time64(TimeUnit::Nanosecond) => Box::new(Time64NanosecondBuilder::new()),
80        DataType::FixedSizeBinary(s) => Box::new(FixedSizeBinaryBuilder::new(*s)),
81        // We can't recursively call map_data_type_to_array_builder here because downcasting will not work if the
82        // values_builder is boxed.
83        DataType::List(values_field) | DataType::LargeList(values_field) => {
84            match values_field.data_type() {
85                DataType::Int8 => Box::new(ListBuilder::new(Int8Builder::new())),
86                DataType::Int16 => Box::new(ListBuilder::new(Int16Builder::new())),
87                DataType::Int32 => Box::new(ListBuilder::new(Int32Builder::new())),
88                DataType::Int64 => Box::new(ListBuilder::new(Int64Builder::new())),
89                DataType::UInt32 => Box::new(ListBuilder::new(UInt32Builder::new())),
90                DataType::Float32 => Box::new(ListBuilder::new(Float32Builder::new())),
91                DataType::Float64 => Box::new(ListBuilder::new(Float64Builder::new())),
92                DataType::Utf8 => Box::new(ListBuilder::new(StringBuilder::new())),
93                DataType::Boolean => Box::new(ListBuilder::new(BooleanBuilder::new())),
94                DataType::Binary => Box::new(ListBuilder::new(BinaryBuilder::new())),
95                _ => unimplemented!("Unsupported list value data type {:?}", data_type),
96            }
97        }
98        DataType::FixedSizeList(values_field, size) => match values_field.data_type() {
99            DataType::Int8 => Box::new(FixedSizeListBuilder::new(
100                Int8Builder::new(),
101                size.to_owned(),
102            )),
103            DataType::Int16 => Box::new(FixedSizeListBuilder::new(
104                Int16Builder::new(),
105                size.to_owned(),
106            )),
107            DataType::Int32 => Box::new(FixedSizeListBuilder::new(
108                Int32Builder::new(),
109                size.to_owned(),
110            )),
111            DataType::Int64 => Box::new(FixedSizeListBuilder::new(
112                Int64Builder::new(),
113                size.to_owned(),
114            )),
115            DataType::UInt32 => Box::new(FixedSizeListBuilder::new(
116                UInt32Builder::new(),
117                size.to_owned(),
118            )),
119            DataType::Float32 => Box::new(FixedSizeListBuilder::new(
120                Float32Builder::new(),
121                size.to_owned(),
122            )),
123            DataType::Float64 => Box::new(FixedSizeListBuilder::new(
124                Float64Builder::new(),
125                size.to_owned(),
126            )),
127            DataType::Utf8 => Box::new(FixedSizeListBuilder::new(
128                StringBuilder::new(),
129                size.to_owned(),
130            )),
131            DataType::Boolean => Box::new(FixedSizeListBuilder::new(
132                BooleanBuilder::new(),
133                size.to_owned(),
134            )),
135            _ => unimplemented!("Unsupported list value data type {:?}", data_type),
136        },
137        DataType::Null => Box::new(NullBuilder::new()),
138        DataType::Struct(fields) => {
139            let mut field_builders = Vec::with_capacity(fields.len());
140            for field in fields {
141                field_builders.push(map_data_type_to_array_builder(field.data_type()));
142            }
143            Box::new(StructBuilder::new(fields.clone(), field_builders))
144        }
145        _ => unimplemented!("Unsupported data type {:?}", data_type),
146    }
147}