use std::sync::Arc;
use arrow::array::{ArrayRef, StructArray};
use arrow::datatypes::{Field, FieldRef, Fields};
use arrow_schema::DataType;
use datafusion_common::Result;
pub(super) fn build_struct_fields(data_types: &[DataType]) -> Result<Fields> {
data_types
.iter()
.enumerate()
.map(|(i, dt)| Ok(Field::new(format!("c{i}"), dt.clone(), true)))
.collect()
}
pub(super) fn build_struct_inlist_values(
join_key_arrays: &[ArrayRef],
) -> Result<Option<ArrayRef>> {
let source_array: ArrayRef = if join_key_arrays.len() == 1 {
Arc::clone(&join_key_arrays[0])
} else {
let fields = build_struct_fields(
&join_key_arrays
.iter()
.map(|arr| arr.data_type().clone())
.collect::<Vec<_>>(),
)?;
let arrays_with_fields: Vec<(FieldRef, ArrayRef)> = fields
.iter()
.cloned()
.zip(join_key_arrays.iter().cloned())
.collect();
Arc::new(StructArray::from(arrays_with_fields))
};
Ok(Some(source_array))
}
#[cfg(test)]
mod tests {
use super::*;
use arrow::array::{
DictionaryArray, Int8Array, Int32Array, StringArray, StringDictionaryBuilder,
};
use arrow_schema::DataType;
use std::sync::Arc;
#[test]
fn test_build_single_column_inlist_array() {
let array = Arc::new(Int32Array::from(vec![1, 2, 3, 2, 1])) as ArrayRef;
let result = build_struct_inlist_values(std::slice::from_ref(&array))
.unwrap()
.unwrap();
assert!(array.eq(&result));
}
#[test]
fn test_build_multi_column_inlist() {
let array1 = Arc::new(Int32Array::from(vec![1, 2, 3, 2, 1])) as ArrayRef;
let array2 =
Arc::new(StringArray::from(vec!["a", "b", "c", "b", "a"])) as ArrayRef;
let result = build_struct_inlist_values(&[array1, array2])
.unwrap()
.unwrap();
assert_eq!(
*result.data_type(),
DataType::Struct(
build_struct_fields(&[DataType::Int32, DataType::Utf8]).unwrap()
)
);
}
#[test]
fn test_build_multi_column_inlist_with_dictionary() {
let mut builder = StringDictionaryBuilder::<arrow::datatypes::Int8Type>::new();
builder.append_value("foo");
builder.append_value("foo");
builder.append_value("foo");
let dict_array = Arc::new(builder.finish()) as ArrayRef;
let int_array = Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef;
let result = build_struct_inlist_values(&[dict_array, int_array])
.unwrap()
.unwrap();
assert_eq!(result.len(), 3);
assert_eq!(
*result.data_type(),
DataType::Struct(
build_struct_fields(&[
DataType::Dictionary(
Box::new(DataType::Int8),
Box::new(DataType::Utf8)
),
DataType::Int32
])
.unwrap()
)
);
}
#[test]
fn test_build_single_column_dictionary_inlist() {
let keys = Int8Array::from(vec![0i8, 0, 0]);
let values = Arc::new(StringArray::from(vec!["foo"]));
let dict_array = Arc::new(DictionaryArray::new(keys, values)) as ArrayRef;
let result = build_struct_inlist_values(std::slice::from_ref(&dict_array))
.unwrap()
.unwrap();
assert_eq!(result.len(), 3);
assert_eq!(result.data_type(), dict_array.data_type());
}
}