use crate::builder::{ArrayBuilder, BufferBuilder};
use crate::{Array, ArrayRef, MapArray, StructArray};
use arrow_buffer::Buffer;
use arrow_buffer::{NullBuffer, NullBufferBuilder};
use arrow_data::ArrayData;
use arrow_schema::{ArrowError, DataType, Field};
use std::any::Any;
use std::sync::Arc;
#[derive(Debug)]
pub struct MapBuilder<K: ArrayBuilder, V: ArrayBuilder> {
offsets_builder: BufferBuilder<i32>,
null_buffer_builder: NullBufferBuilder,
field_names: MapFieldNames,
key_builder: K,
value_builder: V,
}
#[derive(Debug, Clone)]
pub struct MapFieldNames {
pub entry: String,
pub key: String,
pub value: String,
}
impl Default for MapFieldNames {
fn default() -> Self {
Self {
entry: "entries".to_string(),
key: "keys".to_string(),
value: "values".to_string(),
}
}
}
impl<K: ArrayBuilder, V: ArrayBuilder> MapBuilder<K, V> {
pub fn new(field_names: Option<MapFieldNames>, key_builder: K, value_builder: V) -> Self {
let capacity = key_builder.len();
Self::with_capacity(field_names, key_builder, value_builder, capacity)
}
pub fn with_capacity(
field_names: Option<MapFieldNames>,
key_builder: K,
value_builder: V,
capacity: usize,
) -> Self {
let mut offsets_builder = BufferBuilder::<i32>::new(capacity + 1);
offsets_builder.append(0);
Self {
offsets_builder,
null_buffer_builder: NullBufferBuilder::new(capacity),
field_names: field_names.unwrap_or_default(),
key_builder,
value_builder,
}
}
pub fn keys(&mut self) -> &mut K {
&mut self.key_builder
}
pub fn values(&mut self) -> &mut V {
&mut self.value_builder
}
pub fn entries(&mut self) -> (&mut K, &mut V) {
(&mut self.key_builder, &mut self.value_builder)
}
#[inline]
pub fn append(&mut self, is_valid: bool) -> Result<(), ArrowError> {
if self.key_builder.len() != self.value_builder.len() {
return Err(ArrowError::InvalidArgumentError(format!(
"Cannot append to a map builder when its keys and values have unequal lengths of {} and {}",
self.key_builder.len(),
self.value_builder.len()
)));
}
self.offsets_builder.append(self.key_builder.len() as i32);
self.null_buffer_builder.append(is_valid);
Ok(())
}
pub fn finish(&mut self) -> MapArray {
let len = self.len();
let keys_arr = self.key_builder.finish();
let values_arr = self.value_builder.finish();
let offset_buffer = self.offsets_builder.finish();
self.offsets_builder.append(0);
let null_bit_buffer = self.null_buffer_builder.finish();
self.finish_helper(keys_arr, values_arr, offset_buffer, null_bit_buffer, len)
}
pub fn finish_cloned(&self) -> MapArray {
let len = self.len();
let keys_arr = self.key_builder.finish_cloned();
let values_arr = self.value_builder.finish_cloned();
let offset_buffer = Buffer::from_slice_ref(self.offsets_builder.as_slice());
let nulls = self.null_buffer_builder.finish_cloned();
self.finish_helper(keys_arr, values_arr, offset_buffer, nulls, len)
}
fn finish_helper(
&self,
keys_arr: Arc<dyn Array>,
values_arr: Arc<dyn Array>,
offset_buffer: Buffer,
nulls: Option<NullBuffer>,
len: usize,
) -> MapArray {
assert!(
keys_arr.null_count() == 0,
"Keys array must have no null values, found {} null value(s)",
keys_arr.null_count()
);
let keys_field = Arc::new(Field::new(
self.field_names.key.as_str(),
keys_arr.data_type().clone(),
false, ));
let values_field = Arc::new(Field::new(
self.field_names.value.as_str(),
values_arr.data_type().clone(),
true,
));
let struct_array =
StructArray::from(vec![(keys_field, keys_arr), (values_field, values_arr)]);
let map_field = Arc::new(Field::new(
self.field_names.entry.as_str(),
struct_array.data_type().clone(),
false, ));
let array_data = ArrayData::builder(DataType::Map(map_field, false)) .len(len)
.add_buffer(offset_buffer)
.add_child_data(struct_array.into_data())
.nulls(nulls);
let array_data = unsafe { array_data.build_unchecked() };
MapArray::from(array_data)
}
}
impl<K: ArrayBuilder, V: ArrayBuilder> ArrayBuilder for MapBuilder<K, V> {
fn len(&self) -> usize {
self.null_buffer_builder.len()
}
fn finish(&mut self) -> ArrayRef {
Arc::new(self.finish())
}
fn finish_cloned(&self) -> ArrayRef {
Arc::new(self.finish_cloned())
}
fn as_any(&self) -> &dyn Any {
self
}
fn as_any_mut(&mut self) -> &mut dyn Any {
self
}
fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
self
}
}
#[cfg(test)]
mod tests {
use crate::builder::{make_builder, Int32Builder, StringBuilder};
use crate::{Int32Array, StringArray};
use super::*;
#[test]
#[should_panic(expected = "Keys array must have no null values, found 1 null value(s)")]
fn test_map_builder_with_null_keys_panics() {
let mut builder = MapBuilder::new(None, StringBuilder::new(), Int32Builder::new());
builder.keys().append_null();
builder.values().append_value(42);
builder.append(true).unwrap();
builder.finish();
}
#[test]
fn test_boxed_map_builder() {
let keys_builder = make_builder(&DataType::Utf8, 5);
let values_builder = make_builder(&DataType::Int32, 5);
let mut builder = MapBuilder::new(None, keys_builder, values_builder);
builder
.keys()
.as_any_mut()
.downcast_mut::<StringBuilder>()
.expect("should be an StringBuilder")
.append_value("1");
builder
.values()
.as_any_mut()
.downcast_mut::<Int32Builder>()
.expect("should be an Int32Builder")
.append_value(42);
builder.append(true).unwrap();
let map_array = builder.finish();
assert_eq!(
map_array
.keys()
.as_any()
.downcast_ref::<StringArray>()
.expect("should be an StringArray")
.value(0),
"1"
);
assert_eq!(
map_array
.values()
.as_any()
.downcast_ref::<Int32Array>()
.expect("should be an Int32Array")
.value(0),
42
);
}
}