use std::sync::Arc;
use arrow::array::*;
use arrow::buffer::{NullBuffer, OffsetBuffer, ScalarBuffer};
use arrow::datatypes::*;
use tokio::io::AsyncReadExt;
use super::ClickHouseArrowDeserializer;
use crate::arrow::builder::TypedBuilder;
use crate::arrow::builder::list::TypedListBuilder;
use crate::arrow::types::LIST_ITEM_FIELD_NAME;
use crate::io::{ClickHouseBytesRead, ClickHouseRead};
use crate::{Error, Result, Type};
macro_rules! bulk_offsets {
($r:expr, $rbuf:expr, $rows:expr) => {{
$rbuf.clear();
let total_bytes_needed = 8 + ($rows * 8);
if $rbuf.capacity() < total_bytes_needed {
$rbuf.reserve(total_bytes_needed - $rbuf.capacity());
}
$rbuf.resize(total_bytes_needed, 0);
let initial_off = 0_u64.to_le_bytes();
$rbuf[..8].copy_from_slice(&initial_off);
let _ = $r.try_copy_to_slice(&mut $rbuf[8..total_bytes_needed])?;
total_bytes_needed
}};
(tokio; $r:expr, $rbuf:expr, $rows:expr) => {{
$rbuf.clear();
let total_bytes_needed = 8 + ($rows * 8);
if $rbuf.capacity() < total_bytes_needed {
$rbuf.reserve(total_bytes_needed - $rbuf.capacity());
}
$rbuf.resize(total_bytes_needed, 0);
let initial_off = 0_u64.to_le_bytes();
$rbuf[..8].copy_from_slice(&initial_off);
let _ = $r.read_exact(&mut $rbuf[8..total_bytes_needed]).await?;
total_bytes_needed
}};
}
pub(super) use bulk_offsets;
#[expect(clippy::cast_possible_wrap)]
#[expect(clippy::cast_possible_truncation)]
pub(crate) async fn deserialize_async<R: ClickHouseRead>(
inner_type: &Type,
builder: &mut TypedBuilder,
data_type: &DataType,
reader: &mut R,
rows: usize,
nulls: &[u8],
rbuffer: &mut Vec<u8>,
) -> Result<ArrayRef> {
type B = TypedListBuilder;
let (DataType::List(inner) | DataType::ListView(inner) | DataType::LargeList(inner)) =
data_type
else {
return Err(Error::ArrowDeserialize(format!("Unexpected list type: {data_type:?}")));
};
let TypedBuilder::List(list_builder) = builder else {
return Err(Error::ArrowDeserialize(format!(
"Unexpected builder type: {}",
builder.as_ref()
)));
};
let inner_data_type = inner.data_type();
let inner_nullable = inner_type.strip_low_cardinality().is_nullable();
macro_rules! list_deser {
($b:expr, $b_ty:ident, $t:ty) => {{
let offset_bytes = bulk_offsets!(tokio; reader, rbuffer, rows);
let offsets: &[u64] = bytemuck::cast_slice::<u8, u64>(&rbuffer[..offset_bytes]);
let offset_buffer =
OffsetBuffer::new(offsets.iter().map(|&o| o as $t).collect::<ScalarBuffer<_>>());
let total_values = *offsets.last().unwrap_or(&0) as usize;
let inner_array = inner_type.deserialize_arrow_async(
$b,
reader,
inner_data_type,
total_values,
&[],
rbuffer,
).await?;
let null_buffer = (!nulls.is_empty())
.then_some(NullBuffer::from(nulls.iter().map(|&n| n == 0).collect::<Vec<bool>>()));
let inner_dt = inner_array.data_type().clone();
let field = Arc::new(Field::new(LIST_ITEM_FIELD_NAME, inner_dt, inner_nullable));
let list_array = $b_ty::new(field, offset_buffer, inner_array, null_buffer);
if list_array.len() != rows {
return Err(Error::DeserializeError(format!(
"ListArray length {} does not match expected rows {rows}",
list_array.len()
)));
}
Ok(Arc::new(list_array))
}};
}
match list_builder {
B::List(b) => list_deser!(b, ListArray, i32),
B::LargeList(b) => list_deser!(b, LargeListArray, i64),
B::FixedList((size, b)) => {
let inner_array = inner_type
.deserialize_arrow_async(b, reader, inner_data_type, rows, &[], rbuffer)
.await?;
let null_buffer = (!nulls.is_empty())
.then_some(NullBuffer::from(nulls.iter().map(|&n| n == 0).collect::<Vec<bool>>()));
let inner_dt = inner_array.data_type().clone();
let field = Arc::new(Field::new(LIST_ITEM_FIELD_NAME, inner_dt, inner_nullable));
let list_array = FixedSizeListArray::new(field, *size, inner_array, null_buffer);
if list_array.len() != rows {
return Err(Error::DeserializeError(format!(
"ListArray length {} does not match expected rows {rows}",
list_array.len()
)));
}
Ok(Arc::new(list_array))
}
}
}
#[expect(clippy::cast_possible_truncation)]
#[expect(clippy::cast_possible_wrap)]
pub(super) fn deserialize<R: ClickHouseBytesRead>(
builder: &mut TypedListBuilder,
reader: &mut R,
inner_type: &Type,
data_type: &DataType,
rows: usize,
nulls: &[u8],
rbuffer: &mut Vec<u8>,
) -> Result<ArrayRef> {
type B = TypedListBuilder;
let (DataType::List(inner) | DataType::ListView(inner) | DataType::LargeList(inner)) =
data_type
else {
return Err(Error::ArrowDeserialize(format!("Unexpected list type: {data_type:?}")));
};
let inner_data_type = inner.data_type();
let inner_nullable = inner_type.strip_low_cardinality().is_nullable();
macro_rules! list_deser {
($b:expr, $b_ty:ident, $t:ty) => {{
let offset_bytes = bulk_offsets!(reader, rbuffer, rows);
let offsets: &[u64] = bytemuck::cast_slice::<u8, u64>(&rbuffer[..offset_bytes]);
let offset_buffer =
OffsetBuffer::new(offsets.iter().map(|&o| o as $t).collect::<ScalarBuffer<_>>());
let total_values = *offsets.last().unwrap_or(&0) as usize;
let inner_array = inner_type.deserialize_arrow(
$b,
reader,
inner_data_type,
total_values,
&[],
rbuffer,
)?;
let null_buffer = (!nulls.is_empty())
.then_some(NullBuffer::from(nulls.iter().map(|&n| n == 0).collect::<Vec<bool>>()));
let inner_dt = inner_array.data_type().clone();
let field = Arc::new(Field::new(LIST_ITEM_FIELD_NAME, inner_dt, inner_nullable));
let list_array = $b_ty::new(field, offset_buffer, inner_array, null_buffer);
if list_array.len() != rows {
return Err(Error::DeserializeError(format!(
"ListArray length {} does not match expected rows {rows}",
list_array.len()
)));
}
Ok(Arc::new(list_array))
}};
}
match builder {
B::List(b) => list_deser!(b, ListArray, i32),
B::LargeList(b) => list_deser!(b, LargeListArray, i64),
B::FixedList((size, b)) => {
let inner_array =
inner_type.deserialize_arrow(b, reader, inner_data_type, rows, &[], rbuffer)?;
let null_buffer = (!nulls.is_empty())
.then_some(NullBuffer::from(nulls.iter().map(|&n| n == 0).collect::<Vec<bool>>()));
let inner_dt = inner_array.data_type().clone();
let field = Arc::new(Field::new(LIST_ITEM_FIELD_NAME, inner_dt, inner_nullable));
let list_array = FixedSizeListArray::new(field, *size, inner_array, null_buffer);
if list_array.len() != rows {
return Err(Error::DeserializeError(format!(
"ListArray length {} does not match expected rows {rows}",
list_array.len()
)));
}
Ok(Arc::new(list_array))
}
}
}
#[cfg(test)]
mod tests {
use std::io::Cursor;
use arrow::array::*;
use arrow::datatypes::*;
use chrono_tz::Tz;
use super::*;
use crate::ArrowOptions;
use crate::arrow::block::LIST_ITEM_FIELD_NAME;
use crate::arrow::ch_to_arrow_type;
use crate::native::types::Type;
#[tokio::test]
async fn test_deserialize_list_int32() {
let inner_type = Type::Int32;
let rows = 3;
let nulls = vec![];
let input = vec![
2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, ];
let mut reader = Cursor::new(input);
let data_type =
DataType::List(Arc::new(Field::new(LIST_ITEM_FIELD_NAME, DataType::Int32, false)));
let mut builder =
TypedBuilder::try_new(&Type::Array(Box::new(inner_type.clone())), &data_type).unwrap();
let result = deserialize_async(
&inner_type,
&mut builder,
&data_type,
&mut reader,
rows,
&nulls,
&mut vec![],
)
.await
.expect("Failed to deserialize List(Int32)");
let list_array = result.as_any().downcast_ref::<ListArray>().unwrap();
let values = list_array.values().as_any().downcast_ref::<Int32Array>().unwrap();
assert_eq!(list_array.len(), 3);
assert_eq!(values, &Int32Array::from(vec![1, 2, 3, 4, 5]));
assert_eq!(list_array.offsets().iter().copied().collect::<Vec<_>>(), vec![0, 2, 3, 5]);
assert_eq!(list_array.nulls(), None);
}
#[tokio::test]
async fn test_deserialize_nullable_list_int32() {
let inner_type = Type::Int32;
let rows = 3;
let nulls = vec![0, 1, 0]; let input = vec![
2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, ];
let mut reader = Cursor::new(input);
let data_type =
DataType::List(Arc::new(Field::new(LIST_ITEM_FIELD_NAME, DataType::Int32, false)));
let mut builder =
TypedBuilder::try_new(&Type::Array(Box::new(inner_type.clone())), &data_type).unwrap();
let result = deserialize_async(
&inner_type,
&mut builder,
&data_type,
&mut reader,
rows,
&nulls,
&mut vec![],
)
.await
.expect("Failed to deserialize nullable List(Int32)");
let list_array = result.as_any().downcast_ref::<ListArray>().unwrap();
let values = list_array.values().as_any().downcast_ref::<Int32Array>().unwrap();
assert_eq!(list_array.len(), 3);
assert_eq!(values, &Int32Array::from(vec![1, 2, 3, 4, 5]));
assert_eq!(list_array.offsets().iter().copied().collect::<Vec<_>>(), vec![0, 2, 3, 5]);
assert_eq!(list_array.nulls().unwrap().iter().collect::<Vec<bool>>(), vec![
true, false, true
]);
}
#[tokio::test]
async fn test_deserialize_list_nullable_int32() {
let inner_type = Type::Nullable(Box::new(Type::Int32));
let rows = 3;
let nulls = vec![];
let input = vec![
2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, ];
let mut reader = Cursor::new(input);
let data_type =
DataType::List(Arc::new(Field::new(LIST_ITEM_FIELD_NAME, DataType::Int32, true)));
let mut builder =
TypedBuilder::try_new(&Type::Array(Box::new(inner_type.clone())), &data_type).unwrap();
let result = deserialize_async(
&inner_type,
&mut builder,
&data_type,
&mut reader,
rows,
&nulls,
&mut vec![],
)
.await
.expect("Failed to deserialize List(Nullable(Int32))");
let list_array = result.as_any().downcast_ref::<ListArray>().unwrap();
let values = list_array.values().as_any().downcast_ref::<Int32Array>().unwrap();
assert_eq!(list_array.len(), 3);
assert_eq!(values, &Int32Array::from(vec![Some(1), None, Some(3), None, Some(5)]));
assert_eq!(list_array.offsets().iter().copied().collect::<Vec<_>>(), vec![0, 2, 3, 5]);
assert_eq!(list_array.nulls(), None);
}
#[tokio::test]
async fn test_deserialize_list_zero_rows() {
let inner_type = Type::Int32;
let rows = 0;
let nulls = vec![];
let input = vec![]; let mut reader = Cursor::new(input);
let data_type =
DataType::List(Arc::new(Field::new(LIST_ITEM_FIELD_NAME, DataType::Int32, false)));
let mut builder =
TypedBuilder::try_new(&Type::Array(Box::new(inner_type.clone())), &data_type).unwrap();
let result = deserialize_async(
&inner_type,
&mut builder,
&data_type,
&mut reader,
rows,
&nulls,
&mut vec![],
)
.await
.expect("Failed to deserialize List(Int32) with zero rows");
let list_array = result.as_any().downcast_ref::<ListArray>().unwrap();
let values = list_array.values().as_any().downcast_ref::<Int32Array>().unwrap();
assert_eq!(list_array.len(), 0);
assert_eq!(values, &Int32Array::from(Vec::<i32>::new()));
assert_eq!(list_array.offsets().iter().copied().collect::<Vec<_>>(), vec![0]);
assert_eq!(list_array.nulls(), None);
}
#[tokio::test]
async fn test_deserialize_list_string() {
let inner_type = Type::String;
let rows = 3;
let nulls = vec![];
let input = vec![
2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 1, b'a', 1, b'b', 1, b'c', 1, b'd', 1, b'e', ];
let mut reader = Cursor::new(input);
let data_type =
DataType::List(Arc::new(Field::new(LIST_ITEM_FIELD_NAME, DataType::Utf8, false)));
let mut builder =
TypedBuilder::try_new(&Type::Array(Box::new(inner_type.clone())), &data_type).unwrap();
let result = deserialize_async(
&inner_type,
&mut builder,
&data_type,
&mut reader,
rows,
&nulls,
&mut vec![],
)
.await
.expect("Failed to deserialize Array(String)");
let list_array = result.as_any().downcast_ref::<ListArray>().unwrap();
let values = list_array.values().as_any().downcast_ref::<StringArray>().unwrap();
assert_eq!(list_array.len(), 3);
assert_eq!(values, &StringArray::from(vec!["a", "b", "c", "d", "e"]));
assert_eq!(list_array.offsets().iter().copied().collect::<Vec<i32>>(), vec![0, 2, 3, 5]);
assert_eq!(list_array.nulls(), None);
}
#[tokio::test]
async fn test_deserialize_list_nullable_string() {
let inner_type = Type::Nullable(Box::new(Type::String));
let rows = 3;
let nulls = vec![];
let input = vec![
2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, b'a', 0, 1, b'c', 0, 1, b'e', ];
let mut reader = Cursor::new(input);
let data_type =
DataType::List(Arc::new(Field::new(LIST_ITEM_FIELD_NAME, DataType::Utf8, true)));
let mut builder =
TypedBuilder::try_new(&Type::Array(Box::new(inner_type.clone())), &data_type).unwrap();
let result = deserialize_async(
&inner_type,
&mut builder,
&data_type,
&mut reader,
rows,
&nulls,
&mut vec![],
)
.await
.expect("Failed to deserialize Array(Nullable(String))");
let list_array = result.as_any().downcast_ref::<ListArray>().unwrap();
let values = list_array.values().as_any().downcast_ref::<StringArray>().unwrap();
assert_eq!(list_array.len(), 3);
assert_eq!(values, &StringArray::from(vec![Some("a"), None, Some("c"), None, Some("e")]));
assert_eq!(list_array.offsets().iter().copied().collect::<Vec<i32>>(), vec![0, 2, 3, 5]);
assert_eq!(list_array.nulls(), None);
}
#[tokio::test]
async fn test_deserialize_nested_list_int32() {
let inner_type = Type::Array(Box::new(Type::Int32));
let rows = 2;
let nulls = vec![];
let input = vec![
2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, ];
let mut reader = Cursor::new(input);
let inner_data_type =
DataType::List(Arc::new(Field::new(LIST_ITEM_FIELD_NAME, DataType::Int32, false)));
let inner_field = Arc::new(Field::new(LIST_ITEM_FIELD_NAME, inner_data_type, false));
let data_type = DataType::List(inner_field);
let mut builder =
TypedBuilder::try_new(&Type::Array(Box::new(inner_type.clone())), &data_type).unwrap();
let result = deserialize_async(
&inner_type,
&mut builder,
&data_type,
&mut reader,
rows,
&nulls,
&mut vec![],
)
.await
.expect("Failed to deserialize Array(Array(Int32))");
let list_array = result.as_any().downcast_ref::<ListArray>().unwrap();
let inner_list_array = list_array.values().as_any().downcast_ref::<ListArray>().unwrap();
let values = inner_list_array.values().as_any().downcast_ref::<Int32Array>().unwrap();
assert_eq!(list_array.len(), 2);
assert_eq!(inner_list_array.len(), 3);
assert_eq!(values, &Int32Array::from(vec![1, 2, 3, 4, 5]));
assert_eq!(list_array.offsets().iter().copied().collect::<Vec<i32>>(), vec![0, 2, 3]);
assert_eq!(inner_list_array.offsets().iter().copied().collect::<Vec<i32>>(), vec![
0, 2, 3, 5
]);
assert_eq!(list_array.nulls(), None);
}
#[tokio::test]
async fn test_deserialize_list_nullable_array_int32() {
let inner_type = Type::Nullable(Box::new(Type::Array(Box::new(Type::Int32))));
let rows = 3;
let nulls = vec![];
let input = vec![
2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0,
2, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, ];
let mut reader = Cursor::new(input);
let inner_data_type =
DataType::List(Arc::new(Field::new(LIST_ITEM_FIELD_NAME, DataType::Int32, false)));
let inner_field = Arc::new(Field::new(LIST_ITEM_FIELD_NAME, inner_data_type, true));
let data_type = DataType::List(inner_field);
let mut builder =
TypedBuilder::try_new(&Type::Array(Box::new(inner_type.clone())), &data_type).unwrap();
let result = deserialize_async(
&inner_type,
&mut builder,
&data_type,
&mut reader,
rows,
&nulls,
&mut vec![],
)
.await
.expect("Failed to deserialize Array(Nullable(Array(Int32)))");
let list_array = result.as_any().downcast_ref::<ListArray>().unwrap();
let inner_list_array = list_array.values().as_any().downcast_ref::<ListArray>().unwrap();
let values = inner_list_array.values().as_any().downcast_ref::<Int32Array>().unwrap();
assert_eq!(list_array.len(), 3);
assert_eq!(inner_list_array.len(), 5);
assert_eq!(values, &Int32Array::from(vec![1, 2, 3, 4, 5]));
assert_eq!(list_array.offsets().iter().copied().collect::<Vec<i32>>(), vec![0, 2, 3, 5]);
assert_eq!(inner_list_array.offsets().iter().copied().collect::<Vec<i32>>(), vec![
0, 2, 2, 3, 3, 5
]);
assert_eq!(
inner_list_array.nulls().unwrap().iter().collect::<Vec<bool>>(),
vec![true, false, true, false, true] );
assert_eq!(list_array.nulls(), None);
}
#[tokio::test]
async fn test_deserialize_list_array_nullable_int32() {
let inner_type = Type::Array(Box::new(Type::Nullable(Box::new(Type::Int32))));
let rows = 2;
let nulls = vec![];
let input = vec![
2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, ];
let mut reader = Cursor::new(input);
let inner_data_type =
DataType::List(Arc::new(Field::new(LIST_ITEM_FIELD_NAME, DataType::Int32, true)));
let inner_field = Arc::new(Field::new(LIST_ITEM_FIELD_NAME, inner_data_type, false));
let data_type = DataType::List(inner_field);
let mut builder =
TypedBuilder::try_new(&Type::Array(Box::new(inner_type.clone())), &data_type).unwrap();
let result = deserialize_async(
&inner_type,
&mut builder,
&data_type,
&mut reader,
rows,
&nulls,
&mut vec![],
)
.await
.expect("Failed to deserialize Array(Array(Nullable(Int32)))");
let list_array = result.as_any().downcast_ref::<ListArray>().unwrap();
let inner_list_array = list_array.values().as_any().downcast_ref::<ListArray>().unwrap();
let values = inner_list_array.values().as_any().downcast_ref::<Int32Array>().unwrap();
assert_eq!(list_array.len(), 2);
assert_eq!(inner_list_array.len(), 3);
assert_eq!(values, &Int32Array::from(vec![Some(1), None, Some(3), None, Some(5)]));
assert_eq!(list_array.offsets().iter().copied().collect::<Vec<i32>>(), vec![0, 2, 3]);
assert_eq!(inner_list_array.offsets().iter().copied().collect::<Vec<i32>>(), vec![
0, 2, 3, 5
]);
assert_eq!(list_array.nulls(), None);
}
#[tokio::test]
async fn test_deserialize_nested_list_string() {
let inner_type = Type::Array(Box::new(Type::String));
let rows = 2;
let nulls = vec![];
let input = vec![
2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 1, b'a', 1, b'b', 1, b'c', 1, b'd', 1, b'e', ];
let mut reader = Cursor::new(input);
let inner_data_type =
DataType::List(Arc::new(Field::new(LIST_ITEM_FIELD_NAME, DataType::Utf8, false)));
let inner_field = Arc::new(Field::new(LIST_ITEM_FIELD_NAME, inner_data_type, false));
let data_type = DataType::List(inner_field);
let mut builder =
TypedBuilder::try_new(&Type::Array(Box::new(inner_type.clone())), &data_type).unwrap();
let result = deserialize_async(
&inner_type,
&mut builder,
&data_type,
&mut reader,
rows,
&nulls,
&mut vec![],
)
.await
.expect("Failed to deserialize Array(Array(String))");
let list_array = result.as_any().downcast_ref::<ListArray>().unwrap();
let inner_list_array = list_array.values().as_any().downcast_ref::<ListArray>().unwrap();
let values = inner_list_array.values().as_any().downcast_ref::<StringArray>().unwrap();
assert_eq!(list_array.len(), 2);
assert_eq!(inner_list_array.len(), 3);
assert_eq!(values, &StringArray::from(vec!["a", "b", "c", "d", "e"]));
assert_eq!(list_array.offsets().iter().copied().collect::<Vec<i32>>(), vec![0, 2, 3]);
assert_eq!(inner_list_array.offsets().iter().copied().collect::<Vec<i32>>(), vec![
0, 2, 3, 5
]);
assert_eq!(list_array.nulls(), None);
}
#[tokio::test]
async fn test_deserialize_list_nullable_array_string() {
let inner_type = Type::Nullable(Box::new(Type::Array(Box::new(Type::String))));
let rows = 3;
let nulls = vec![];
let input = vec![
2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0,
2, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 1, b'a', 1, b'b', 1, b'c', 1, b'd', 1, b'e', ];
let mut reader = Cursor::new(input);
let inner_data_type =
DataType::List(Arc::new(Field::new(LIST_ITEM_FIELD_NAME, DataType::Utf8, false)));
let inner_field = Arc::new(Field::new(LIST_ITEM_FIELD_NAME, inner_data_type, true));
let data_type = DataType::List(inner_field);
let mut builder =
TypedBuilder::try_new(&Type::Array(Box::new(inner_type.clone())), &data_type).unwrap();
let result = deserialize_async(
&inner_type,
&mut builder,
&data_type,
&mut reader,
rows,
&nulls,
&mut vec![],
)
.await
.expect("Failed to deserialize Array(Nullable(Array(String)))");
let list_array = result.as_any().downcast_ref::<ListArray>().unwrap();
let inner_list_array = list_array.values().as_any().downcast_ref::<ListArray>().unwrap();
let values = inner_list_array.values().as_any().downcast_ref::<StringArray>().unwrap();
assert_eq!(list_array.len(), 3);
assert_eq!(inner_list_array.len(), 5); assert_eq!(values, &StringArray::from(vec!["a", "b", "c", "d", "e"]));
assert_eq!(list_array.offsets().iter().copied().collect::<Vec<i32>>(), vec![0, 2, 3, 5]);
assert_eq!(
inner_list_array.offsets().iter().copied().collect::<Vec<i32>>(),
vec![0, 2, 2, 3, 3, 5] );
assert_eq!(
inner_list_array.nulls().unwrap().iter().collect::<Vec<bool>>(),
vec![true, false, true, false, true] );
assert_eq!(list_array.nulls(), None);
}
#[tokio::test]
async fn test_deserialize_list_array_nullable_string() {
let inner_type = Type::Array(Box::new(Type::Nullable(Box::new(Type::String))));
let rows = 2;
let nulls = vec![];
let input = vec![
2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, b'a', 0, 1, b'c', 0, 1, b'e', ];
let mut reader = Cursor::new(input);
let inner_data_type =
DataType::List(Arc::new(Field::new(LIST_ITEM_FIELD_NAME, DataType::Utf8, true)));
let inner_field = Arc::new(Field::new(LIST_ITEM_FIELD_NAME, inner_data_type, false));
let data_type = DataType::List(inner_field);
let mut builder =
TypedBuilder::try_new(&Type::Array(Box::new(inner_type.clone())), &data_type).unwrap();
let result = deserialize_async(
&inner_type,
&mut builder,
&data_type,
&mut reader,
rows,
&nulls,
&mut vec![],
)
.await
.expect("Failed to deserialize Array(Array(Nullable(String)))");
let list_array = result.as_any().downcast_ref::<ListArray>().unwrap();
let inner_list_array = list_array.values().as_any().downcast_ref::<ListArray>().unwrap();
let values = inner_list_array.values().as_any().downcast_ref::<StringArray>().unwrap();
assert_eq!(list_array.len(), 2);
assert_eq!(inner_list_array.len(), 3);
assert_eq!(values, &StringArray::from(vec![Some("a"), None, Some("c"), None, Some("e")]));
assert_eq!(list_array.offsets().iter().copied().collect::<Vec<i32>>(), vec![0, 2, 3]);
assert_eq!(inner_list_array.offsets().iter().copied().collect::<Vec<i32>>(), vec![
0, 2, 3, 5
]);
assert_eq!(list_array.nulls(), None);
}
#[tokio::test]
async fn test_deserialize_list_empty_inner() {
let inner_type = Type::Int32;
let rows = 2;
let nulls = vec![];
let input = vec![
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
];
let mut reader = Cursor::new(input);
let data_type =
DataType::List(Arc::new(Field::new(LIST_ITEM_FIELD_NAME, DataType::Int32, false)));
let mut builder =
TypedBuilder::try_new(&Type::Array(Box::new(inner_type.clone())), &data_type).unwrap();
let result = deserialize_async(
&inner_type,
&mut builder,
&data_type,
&mut reader,
rows,
&nulls,
&mut vec![],
)
.await
.expect("Failed to deserialize Array(Int32) with empty inner arrays");
let list_array = result.as_any().downcast_ref::<ListArray>().unwrap();
let values = list_array.values().as_any().downcast_ref::<Int32Array>().unwrap();
assert_eq!(list_array.len(), 2);
assert_eq!(values, &Int32Array::from(Vec::<i32>::new()));
assert_eq!(list_array.offsets().iter().copied().collect::<Vec<i32>>(), vec![0, 0, 0]);
assert_eq!(list_array.nulls(), None);
}
#[tokio::test]
async fn test_deserialize_list_float64() {
let inner_type = Type::Float64;
let rows = 3;
let nulls = vec![];
let input = vec![
2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 240, 63, 0, 0, 0, 0, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0, 8, 64, 0, 0, 0, 0, 0, 0, 16, 64, 0, 0, 0, 0, 0, 0, 20, 64, ];
let mut reader = Cursor::new(input);
let data_type =
DataType::List(Arc::new(Field::new(LIST_ITEM_FIELD_NAME, DataType::Float64, false)));
let mut builder =
TypedBuilder::try_new(&Type::Array(Box::new(inner_type.clone())), &data_type).unwrap();
let result = deserialize_async(
&inner_type,
&mut builder,
&data_type,
&mut reader,
rows,
&nulls,
&mut vec![],
)
.await
.expect("Failed to deserialize Array(Float64)");
let list_array = result.as_any().downcast_ref::<ListArray>().unwrap();
let values = list_array.values().as_any().downcast_ref::<Float64Array>().unwrap();
assert_eq!(list_array.len(), 3);
assert_eq!(values, &Float64Array::from(vec![1.0, 2.0, 3.0, 4.0, 5.0]));
assert_eq!(list_array.offsets().iter().copied().collect::<Vec<i32>>(), vec![0, 2, 3, 5]);
assert_eq!(list_array.nulls(), None);
}
#[tokio::test]
async fn test_deserialize_list_nullable_float64() {
let inner_type = Type::Nullable(Box::new(Type::Float64));
let rows = 3;
let nulls = vec![];
let input = vec![
2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 240, 63, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 64, ];
let mut reader = Cursor::new(input);
let data_type =
DataType::List(Arc::new(Field::new(LIST_ITEM_FIELD_NAME, DataType::Float64, true)));
let mut builder =
TypedBuilder::try_new(&Type::Array(Box::new(inner_type.clone())), &data_type).unwrap();
let result = deserialize_async(
&inner_type,
&mut builder,
&data_type,
&mut reader,
rows,
&nulls,
&mut vec![],
)
.await
.expect("Failed to deserialize Array(Nullable(Float64))");
let list_array = result.as_any().downcast_ref::<ListArray>().unwrap();
let values = list_array.values().as_any().downcast_ref::<Float64Array>().unwrap();
assert_eq!(list_array.len(), 3);
assert_eq!(values, &Float64Array::from(vec![Some(1.0), None, Some(3.0), None, Some(5.0)]));
assert_eq!(list_array.offsets().iter().copied().collect::<Vec<i32>>(), vec![0, 2, 3, 5]);
assert_eq!(list_array.nulls(), None);
}
#[tokio::test]
async fn test_deserialize_list_datetime() {
let inner_type = Type::DateTime(Tz::UTC);
let rows = 2;
let nulls = vec![];
let input = vec![
2, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 232, 3, 0, 0, 208, 7, 0, 0, 184, 11, 0, 0, 160, 15, 0, 0, ];
let mut reader = Cursor::new(input);
let inner_dt = DataType::Timestamp(TimeUnit::Second, Some(Arc::from("UTC")));
let data_type = DataType::List(Arc::new(Field::new(LIST_ITEM_FIELD_NAME, inner_dt, false)));
let mut builder =
TypedBuilder::try_new(&Type::Array(Box::new(inner_type.clone())), &data_type).unwrap();
let result = deserialize_async(
&inner_type,
&mut builder,
&data_type,
&mut reader,
rows,
&nulls,
&mut vec![],
)
.await
.expect("Failed to deserialize Array(DateTime)");
let list_array = result.as_any().downcast_ref::<ListArray>().unwrap();
let values = list_array.values().as_any().downcast_ref::<TimestampSecondArray>().unwrap();
assert_eq!(list_array.len(), 2);
assert_eq!(
values,
&TimestampSecondArray::from(vec![1000, 2000, 3000, 4000])
.with_timezone_opt(Some("UTC"))
);
assert_eq!(list_array.offsets().iter().copied().collect::<Vec<i32>>(), vec![0, 2, 4]);
assert_eq!(list_array.nulls(), None);
}
#[tokio::test]
async fn test_deserialize_list_nullable_datetime() {
let inner_type = Type::Nullable(Box::new(Type::DateTime(Tz::UTC)));
let rows = 3;
let nulls = vec![];
let input = vec![
2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 232, 3, 0, 0, 0, 0, 0, 0, 184, 11, 0, 0, 0, 0, 0, 0, 136, 19, 0, 0, ];
let mut reader = Cursor::new(input);
let inner_dt = DataType::Timestamp(TimeUnit::Second, Some(Arc::from("UTC")));
let data_type = DataType::List(Arc::new(Field::new(LIST_ITEM_FIELD_NAME, inner_dt, true)));
let mut builder =
TypedBuilder::try_new(&Type::Array(Box::new(inner_type.clone())), &data_type).unwrap();
let result = deserialize_async(
&inner_type,
&mut builder,
&data_type,
&mut reader,
rows,
&nulls,
&mut vec![],
)
.await
.expect("Failed to deserialize Array(Nullable(DateTime))");
let list_array = result.as_any().downcast_ref::<ListArray>().unwrap();
let values = list_array.values().as_any().downcast_ref::<TimestampSecondArray>().unwrap();
assert_eq!(list_array.len(), 3);
assert_eq!(
values,
&TimestampSecondArray::from(vec![Some(1000), None, Some(3000), None, Some(5000)],)
.with_timezone_opt(Some("UTC"))
);
assert_eq!(list_array.offsets().iter().copied().collect::<Vec<i32>>(), vec![0, 2, 3, 5]);
assert_eq!(list_array.nulls(), None);
}
#[tokio::test]
async fn test_deserialize_list_low_cardinality_nullable_string() {
let inner_type = Type::LowCardinality(Box::new(Type::Nullable(Box::new(Type::String))));
let rows = 5;
let input = vec![
2, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 3, b'l', b'o', b'w', 4, b'c', b'a', b'r', b'd', 4, b't', b'e', b's', b't', 8, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 2, 1, 0, 3, 3, ];
let mut reader = Cursor::new(input);
let opts = Some(ArrowOptions::default().with_strings_as_strings(true));
let data_type =
ch_to_arrow_type(&Type::Array(Box::new(inner_type.clone())), opts).unwrap().0;
let mut builder =
TypedBuilder::try_new(&Type::Array(Box::new(inner_type.clone())), &data_type).unwrap();
let result = deserialize_async(
&inner_type,
&mut builder,
&data_type,
&mut reader,
rows,
&[],
&mut vec![],
)
.await
.expect("Failed to deserialize Array(LowCardinality(Nullable(String)))");
let list_array = result.as_any().downcast_ref::<ListArray>().unwrap();
let values =
list_array.values().as_any().downcast_ref::<DictionaryArray<Int32Type>>().unwrap();
assert_eq!(list_array.len(), rows);
assert_eq!(list_array.offsets().iter().copied().collect::<Vec<i32>>(), vec![
0, 2, 2, 4, 7, 8
]);
assert_eq!(list_array.nulls(), None);
let expected_keys = Int32Array::from(vec![
Some(1),
Some(0), Some(1),
Some(2), Some(1),
Some(0),
Some(3), Some(3), ]);
let expected_values =
StringArray::from(vec![None, Some("low"), Some("card"), Some("test")]);
let expected_dict =
DictionaryArray::<Int32Type>::try_new(expected_keys, Arc::new(expected_values))
.unwrap();
assert_eq!(values, &expected_dict);
}
}
#[cfg(test)]
mod tests_sync {
use std::io::Cursor;
use arrow::array::*;
use arrow::datatypes::*;
use chrono_tz::Tz;
use super::*;
use crate::ArrowOptions;
use crate::arrow::block::LIST_ITEM_FIELD_NAME;
use crate::arrow::ch_to_arrow_type;
use crate::native::types::Type;
fn test_list_deser(
input: Vec<u8>,
inner_type: &Type,
data_type: &DataType,
rows: usize,
nulls: &[u8],
) -> Result<ArrayRef> {
let mut reader = Cursor::new(input);
let mut builder = TypedListBuilder::try_new(inner_type, data_type).unwrap();
deserialize(&mut builder, &mut reader, inner_type, data_type, rows, nulls, &mut vec![])
}
#[test]
fn test_deserialize_list_int32() {
let inner_type = Type::Int32;
let rows = 3;
let nulls = vec![];
let input = vec![
2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, ];
let data_type =
DataType::List(Arc::new(Field::new(LIST_ITEM_FIELD_NAME, DataType::Int32, false)));
let result = test_list_deser(input, &inner_type, &data_type, rows, &nulls)
.expect("Failed to deserialize List(Int32)");
let list_array = result.as_any().downcast_ref::<ListArray>().unwrap();
let values = list_array.values().as_any().downcast_ref::<Int32Array>().unwrap();
assert_eq!(list_array.len(), 3);
assert_eq!(values, &Int32Array::from(vec![1, 2, 3, 4, 5]));
assert_eq!(list_array.offsets().iter().copied().collect::<Vec<_>>(), vec![0, 2, 3, 5]);
assert_eq!(list_array.nulls(), None);
}
#[test]
fn test_deserialize_nullable_list_int32() {
let inner_type = Type::Int32;
let rows = 3;
let nulls = vec![0, 1, 0]; let input = vec![
2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, ];
let data_type =
DataType::List(Arc::new(Field::new(LIST_ITEM_FIELD_NAME, DataType::Int32, false)));
let result = test_list_deser(input, &inner_type, &data_type, rows, &nulls)
.expect("Failed to deserialize nullable List(Int32)");
let list_array = result.as_any().downcast_ref::<ListArray>().unwrap();
let values = list_array.values().as_any().downcast_ref::<Int32Array>().unwrap();
assert_eq!(list_array.len(), 3);
assert_eq!(values, &Int32Array::from(vec![1, 2, 3, 4, 5]));
assert_eq!(list_array.offsets().iter().copied().collect::<Vec<_>>(), vec![0, 2, 3, 5]);
assert_eq!(list_array.nulls().unwrap().iter().collect::<Vec<bool>>(), vec![
true, false, true
]);
}
#[test]
fn test_deserialize_list_nullable_int32() {
let inner_type = Type::Nullable(Box::new(Type::Int32));
let rows = 3;
let nulls = vec![];
let input = vec![
2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, ];
let data_type =
DataType::List(Arc::new(Field::new(LIST_ITEM_FIELD_NAME, DataType::Int32, true)));
let result = test_list_deser(input, &inner_type, &data_type, rows, &nulls)
.expect("Failed to deserialize List(Nullable(Int32))");
let list_array = result.as_any().downcast_ref::<ListArray>().unwrap();
let values = list_array.values().as_any().downcast_ref::<Int32Array>().unwrap();
assert_eq!(list_array.len(), 3);
assert_eq!(values, &Int32Array::from(vec![Some(1), None, Some(3), None, Some(5)]));
assert_eq!(list_array.offsets().iter().copied().collect::<Vec<_>>(), vec![0, 2, 3, 5]);
assert_eq!(list_array.nulls(), None);
}
#[test]
fn test_deserialize_list_zero_rows() {
let inner_type = Type::Int32;
let rows = 0;
let nulls = vec![];
let input = vec![]; let data_type =
DataType::List(Arc::new(Field::new(LIST_ITEM_FIELD_NAME, DataType::Int32, false)));
let result = test_list_deser(input, &inner_type, &data_type, rows, &nulls)
.expect("Failed to deserialize List(Int32) with zero rows");
let list_array = result.as_any().downcast_ref::<ListArray>().unwrap();
let values = list_array.values().as_any().downcast_ref::<Int32Array>().unwrap();
assert_eq!(list_array.len(), 0);
assert_eq!(values, &Int32Array::from(Vec::<i32>::new()));
assert_eq!(list_array.offsets().iter().copied().collect::<Vec<_>>(), vec![0]);
assert_eq!(list_array.nulls(), None);
}
#[test]
fn test_deserialize_list_string() {
let inner_type = Type::String;
let rows = 3;
let nulls = vec![];
let input = vec![
2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 1, b'a', 1, b'b', 1, b'c', 1, b'd', 1, b'e', ];
let data_type =
DataType::List(Arc::new(Field::new(LIST_ITEM_FIELD_NAME, DataType::Utf8, false)));
let result = test_list_deser(input, &inner_type, &data_type, rows, &nulls)
.expect("Failed to deserialize Array(String)");
let list_array = result.as_any().downcast_ref::<ListArray>().unwrap();
let values = list_array.values().as_any().downcast_ref::<StringArray>().unwrap();
assert_eq!(list_array.len(), 3);
assert_eq!(values, &StringArray::from(vec!["a", "b", "c", "d", "e"]));
assert_eq!(list_array.offsets().iter().copied().collect::<Vec<i32>>(), vec![0, 2, 3, 5]);
assert_eq!(list_array.nulls(), None);
}
#[test]
fn test_deserialize_list_nullable_string() {
let inner_type = Type::Nullable(Box::new(Type::String));
let rows = 3;
let nulls = vec![];
let input = vec![
2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, b'a', 0, 1, b'c', 0, 1, b'e', ];
let data_type =
DataType::List(Arc::new(Field::new(LIST_ITEM_FIELD_NAME, DataType::Utf8, true)));
let result = test_list_deser(input, &inner_type, &data_type, rows, &nulls)
.expect("Failed to deserialize Array(Nullable(String))");
let list_array = result.as_any().downcast_ref::<ListArray>().unwrap();
let values = list_array.values().as_any().downcast_ref::<StringArray>().unwrap();
assert_eq!(list_array.len(), 3);
assert_eq!(values, &StringArray::from(vec![Some("a"), None, Some("c"), None, Some("e")]));
assert_eq!(list_array.offsets().iter().copied().collect::<Vec<i32>>(), vec![0, 2, 3, 5]);
assert_eq!(list_array.nulls(), None);
}
#[test]
fn test_deserialize_nested_list_int32() {
let inner_type = Type::Array(Box::new(Type::Int32));
let rows = 2;
let nulls = vec![];
let input = vec![
2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, ];
let inner_data_type =
DataType::List(Arc::new(Field::new(LIST_ITEM_FIELD_NAME, DataType::Int32, false)));
let inner_field = Arc::new(Field::new(LIST_ITEM_FIELD_NAME, inner_data_type, false));
let data_type = DataType::List(inner_field);
let result = test_list_deser(input, &inner_type, &data_type, rows, &nulls)
.expect("Failed to deserialize Array(Array(Int32))");
let list_array = result.as_any().downcast_ref::<ListArray>().unwrap();
let inner_list_array = list_array.values().as_any().downcast_ref::<ListArray>().unwrap();
let values = inner_list_array.values().as_any().downcast_ref::<Int32Array>().unwrap();
assert_eq!(list_array.len(), 2);
assert_eq!(inner_list_array.len(), 3);
assert_eq!(values, &Int32Array::from(vec![1, 2, 3, 4, 5]));
assert_eq!(list_array.offsets().iter().copied().collect::<Vec<i32>>(), vec![0, 2, 3]);
assert_eq!(inner_list_array.offsets().iter().copied().collect::<Vec<i32>>(), vec![
0, 2, 3, 5
]);
assert_eq!(list_array.nulls(), None);
}
#[test]
fn test_deserialize_list_nullable_array_int32() {
let inner_type = Type::Nullable(Box::new(Type::Array(Box::new(Type::Int32))));
let rows = 3;
let nulls = vec![];
let input = vec![
2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0,
2, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, ];
let inner_data_type =
DataType::List(Arc::new(Field::new(LIST_ITEM_FIELD_NAME, DataType::Int32, false)));
let inner_field = Arc::new(Field::new(LIST_ITEM_FIELD_NAME, inner_data_type, true));
let data_type = DataType::List(inner_field);
let result = test_list_deser(input, &inner_type, &data_type, rows, &nulls)
.expect("Failed to deserialize Array(Nullable(Array(Int32)))");
let list_array = result.as_any().downcast_ref::<ListArray>().unwrap();
let inner_list_array = list_array.values().as_any().downcast_ref::<ListArray>().unwrap();
let values = inner_list_array.values().as_any().downcast_ref::<Int32Array>().unwrap();
assert_eq!(list_array.len(), 3);
assert_eq!(inner_list_array.len(), 5);
assert_eq!(values, &Int32Array::from(vec![1, 2, 3, 4, 5]));
assert_eq!(list_array.offsets().iter().copied().collect::<Vec<i32>>(), vec![0, 2, 3, 5]);
assert_eq!(inner_list_array.offsets().iter().copied().collect::<Vec<i32>>(), vec![
0, 2, 2, 3, 3, 5
]);
assert_eq!(
inner_list_array.nulls().unwrap().iter().collect::<Vec<bool>>(),
vec![true, false, true, false, true] );
assert_eq!(list_array.nulls(), None);
}
#[test]
fn test_deserialize_list_array_nullable_int32() {
let inner_type = Type::Array(Box::new(Type::Nullable(Box::new(Type::Int32))));
let rows = 2;
let nulls = vec![];
let input = vec![
2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, ];
let inner_data_type =
DataType::List(Arc::new(Field::new(LIST_ITEM_FIELD_NAME, DataType::Int32, true)));
let inner_field = Arc::new(Field::new(LIST_ITEM_FIELD_NAME, inner_data_type, false));
let data_type = DataType::List(inner_field);
let result = test_list_deser(input, &inner_type, &data_type, rows, &nulls)
.expect("Failed to deserialize Array(Array(Nullable(Int32)))");
let list_array = result.as_any().downcast_ref::<ListArray>().unwrap();
let inner_list_array = list_array.values().as_any().downcast_ref::<ListArray>().unwrap();
let values = inner_list_array.values().as_any().downcast_ref::<Int32Array>().unwrap();
assert_eq!(list_array.len(), 2);
assert_eq!(inner_list_array.len(), 3);
assert_eq!(values, &Int32Array::from(vec![Some(1), None, Some(3), None, Some(5)]));
assert_eq!(list_array.offsets().iter().copied().collect::<Vec<i32>>(), vec![0, 2, 3]);
assert_eq!(inner_list_array.offsets().iter().copied().collect::<Vec<i32>>(), vec![
0, 2, 3, 5
]);
assert_eq!(list_array.nulls(), None);
}
#[test]
fn test_deserialize_nested_list_string() {
let inner_type = Type::Array(Box::new(Type::String));
let rows = 2;
let nulls = vec![];
let input = vec![
2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 1, b'a', 1, b'b', 1, b'c', 1, b'd', 1, b'e', ];
let inner_data_type =
DataType::List(Arc::new(Field::new(LIST_ITEM_FIELD_NAME, DataType::Utf8, false)));
let inner_field = Arc::new(Field::new(LIST_ITEM_FIELD_NAME, inner_data_type, false));
let data_type = DataType::List(inner_field);
let result = test_list_deser(input, &inner_type, &data_type, rows, &nulls)
.expect("Failed to deserialize Array(Array(String))");
let list_array = result.as_any().downcast_ref::<ListArray>().unwrap();
let inner_list_array = list_array.values().as_any().downcast_ref::<ListArray>().unwrap();
let values = inner_list_array.values().as_any().downcast_ref::<StringArray>().unwrap();
assert_eq!(list_array.len(), 2);
assert_eq!(inner_list_array.len(), 3);
assert_eq!(values, &StringArray::from(vec!["a", "b", "c", "d", "e"]));
assert_eq!(list_array.offsets().iter().copied().collect::<Vec<i32>>(), vec![0, 2, 3]);
assert_eq!(inner_list_array.offsets().iter().copied().collect::<Vec<i32>>(), vec![
0, 2, 3, 5
]);
assert_eq!(list_array.nulls(), None);
}
#[test]
fn test_deserialize_list_nullable_array_string() {
let inner_type = Type::Nullable(Box::new(Type::Array(Box::new(Type::String))));
let rows = 3;
let nulls = vec![];
let input = vec![
2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0,
2, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 1, b'a', 1, b'b', 1, b'c', 1, b'd', 1, b'e', ];
let inner_data_type =
DataType::List(Arc::new(Field::new(LIST_ITEM_FIELD_NAME, DataType::Utf8, false)));
let inner_field = Arc::new(Field::new(LIST_ITEM_FIELD_NAME, inner_data_type, true));
let data_type = DataType::List(inner_field);
let result = test_list_deser(input, &inner_type, &data_type, rows, &nulls)
.expect("Failed to deserialize Array(Nullable(Array(String)))");
let list_array = result.as_any().downcast_ref::<ListArray>().unwrap();
let inner_list_array = list_array.values().as_any().downcast_ref::<ListArray>().unwrap();
let values = inner_list_array.values().as_any().downcast_ref::<StringArray>().unwrap();
assert_eq!(list_array.len(), 3);
assert_eq!(inner_list_array.len(), 5); assert_eq!(values, &StringArray::from(vec!["a", "b", "c", "d", "e"]));
assert_eq!(list_array.offsets().iter().copied().collect::<Vec<i32>>(), vec![0, 2, 3, 5]);
assert_eq!(
inner_list_array.offsets().iter().copied().collect::<Vec<i32>>(),
vec![0, 2, 2, 3, 3, 5] );
assert_eq!(
inner_list_array.nulls().unwrap().iter().collect::<Vec<bool>>(),
vec![true, false, true, false, true] );
assert_eq!(list_array.nulls(), None);
}
#[test]
fn test_deserialize_list_array_nullable_string() {
let inner_type = Type::Array(Box::new(Type::Nullable(Box::new(Type::String))));
let rows = 2;
let nulls = vec![];
let input = vec![
2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, b'a', 0, 1, b'c', 0, 1, b'e', ];
let inner_data_type =
DataType::List(Arc::new(Field::new(LIST_ITEM_FIELD_NAME, DataType::Utf8, true)));
let inner_field = Arc::new(Field::new(LIST_ITEM_FIELD_NAME, inner_data_type, false));
let data_type = DataType::List(inner_field);
let result = test_list_deser(input, &inner_type, &data_type, rows, &nulls)
.expect("Failed to deserialize Array(Array(Nullable(String)))");
let list_array = result.as_any().downcast_ref::<ListArray>().unwrap();
let inner_list_array = list_array.values().as_any().downcast_ref::<ListArray>().unwrap();
let values = inner_list_array.values().as_any().downcast_ref::<StringArray>().unwrap();
assert_eq!(list_array.len(), 2);
assert_eq!(inner_list_array.len(), 3);
assert_eq!(values, &StringArray::from(vec![Some("a"), None, Some("c"), None, Some("e")]));
assert_eq!(list_array.offsets().iter().copied().collect::<Vec<i32>>(), vec![0, 2, 3]);
assert_eq!(inner_list_array.offsets().iter().copied().collect::<Vec<i32>>(), vec![
0, 2, 3, 5
]);
assert_eq!(list_array.nulls(), None);
}
#[test]
fn test_deserialize_list_empty_inner() {
let inner_type = Type::Int32;
let rows = 2;
let nulls = vec![];
let input = vec![
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
];
let data_type =
DataType::List(Arc::new(Field::new(LIST_ITEM_FIELD_NAME, DataType::Int32, false)));
let result = test_list_deser(input, &inner_type, &data_type, rows, &nulls)
.expect("Failed to deserialize Array(Int32) with empty inner arrays");
let list_array = result.as_any().downcast_ref::<ListArray>().unwrap();
let values = list_array.values().as_any().downcast_ref::<Int32Array>().unwrap();
assert_eq!(list_array.len(), 2);
assert_eq!(values, &Int32Array::from(Vec::<i32>::new()));
assert_eq!(list_array.offsets().iter().copied().collect::<Vec<i32>>(), vec![0, 0, 0]);
assert_eq!(list_array.nulls(), None);
}
#[test]
fn test_deserialize_list_float64() {
let inner_type = Type::Float64;
let rows = 3;
let nulls = vec![];
let input = vec![
2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 240, 63, 0, 0, 0, 0, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0, 8, 64, 0, 0, 0, 0, 0, 0, 16, 64, 0, 0, 0, 0, 0, 0, 20, 64, ];
let data_type =
DataType::List(Arc::new(Field::new(LIST_ITEM_FIELD_NAME, DataType::Float64, false)));
let result = test_list_deser(input, &inner_type, &data_type, rows, &nulls)
.expect("Failed to deserialize Array(Float64)");
let list_array = result.as_any().downcast_ref::<ListArray>().unwrap();
let values = list_array.values().as_any().downcast_ref::<Float64Array>().unwrap();
assert_eq!(list_array.len(), 3);
assert_eq!(values, &Float64Array::from(vec![1.0, 2.0, 3.0, 4.0, 5.0]));
assert_eq!(list_array.offsets().iter().copied().collect::<Vec<i32>>(), vec![0, 2, 3, 5]);
assert_eq!(list_array.nulls(), None);
}
#[test]
fn test_deserialize_list_nullable_float64() {
let inner_type = Type::Nullable(Box::new(Type::Float64));
let rows = 3;
let nulls = vec![];
let input = vec![
2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 240, 63, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 64, ];
let data_type =
DataType::List(Arc::new(Field::new(LIST_ITEM_FIELD_NAME, DataType::Float64, true)));
let result = test_list_deser(input, &inner_type, &data_type, rows, &nulls)
.expect("Failed to deserialize Array(Nullable(Float64))");
let list_array = result.as_any().downcast_ref::<ListArray>().unwrap();
let values = list_array.values().as_any().downcast_ref::<Float64Array>().unwrap();
assert_eq!(list_array.len(), 3);
assert_eq!(values, &Float64Array::from(vec![Some(1.0), None, Some(3.0), None, Some(5.0)]));
assert_eq!(list_array.offsets().iter().copied().collect::<Vec<i32>>(), vec![0, 2, 3, 5]);
assert_eq!(list_array.nulls(), None);
}
#[test]
fn test_deserialize_list_datetime() {
let inner_type = Type::DateTime(Tz::UTC);
let rows = 2;
let nulls = vec![];
let input = vec![
2, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 232, 3, 0, 0, 208, 7, 0, 0, 184, 11, 0, 0, 160, 15, 0, 0, ];
let inner_dt = DataType::Timestamp(TimeUnit::Second, Some(Arc::from("UTC")));
let data_type = DataType::List(Arc::new(Field::new(LIST_ITEM_FIELD_NAME, inner_dt, false)));
let result = test_list_deser(input, &inner_type, &data_type, rows, &nulls)
.expect("Failed to deserialize Array(DateTime)");
let list_array = result.as_any().downcast_ref::<ListArray>().unwrap();
let values = list_array.values().as_any().downcast_ref::<TimestampSecondArray>().unwrap();
assert_eq!(list_array.len(), 2);
assert_eq!(
values,
&TimestampSecondArray::from(vec![1000, 2000, 3000, 4000])
.with_timezone_opt(Some("UTC"))
);
assert_eq!(list_array.offsets().iter().copied().collect::<Vec<i32>>(), vec![0, 2, 4]);
assert_eq!(list_array.nulls(), None);
}
#[test]
fn test_deserialize_list_nullable_datetime() {
let inner_type = Type::Nullable(Box::new(Type::DateTime(Tz::UTC)));
let rows = 3;
let nulls = vec![];
let input = vec![
2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 232, 3, 0, 0, 0, 0, 0, 0, 184, 11, 0, 0, 0, 0, 0, 0, 136, 19, 0, 0, ];
let inner_dt = DataType::Timestamp(TimeUnit::Second, Some(Arc::from("UTC")));
let data_type = DataType::List(Arc::new(Field::new(LIST_ITEM_FIELD_NAME, inner_dt, true)));
let result = test_list_deser(input, &inner_type, &data_type, rows, &nulls)
.expect("Failed to deserialize Array(Nullable(DateTime))");
let list_array = result.as_any().downcast_ref::<ListArray>().unwrap();
let values = list_array.values().as_any().downcast_ref::<TimestampSecondArray>().unwrap();
assert_eq!(list_array.len(), 3);
assert_eq!(
values,
&TimestampSecondArray::from(vec![Some(1000), None, Some(3000), None, Some(5000)],)
.with_timezone_opt(Some("UTC"))
);
assert_eq!(list_array.offsets().iter().copied().collect::<Vec<i32>>(), vec![0, 2, 3, 5]);
assert_eq!(list_array.nulls(), None);
}
#[test]
fn test_deserialize_list_low_cardinality_nullable_string() {
let inner_type = Type::LowCardinality(Box::new(Type::Nullable(Box::new(Type::String))));
let rows = 5;
let input = vec![
2, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 3, b'l', b'o', b'w', 4, b'c', b'a', b'r', b'd', 4, b't', b'e', b's', b't', 8, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 2, 1, 0, 3, 3, ];
let opts = Some(ArrowOptions::default().with_strings_as_strings(true));
let data_type =
ch_to_arrow_type(&Type::Array(Box::new(inner_type.clone())), opts).unwrap().0;
let result = test_list_deser(input, &inner_type, &data_type, rows, &[])
.expect("Failed to deserialize Array(LowCardinality(Nullable(String)))");
let list_array = result.as_any().downcast_ref::<ListArray>().unwrap();
let values =
list_array.values().as_any().downcast_ref::<DictionaryArray<Int32Type>>().unwrap();
assert_eq!(list_array.len(), rows);
assert_eq!(list_array.offsets().iter().copied().collect::<Vec<i32>>(), vec![
0, 2, 2, 4, 7, 8
]);
assert_eq!(list_array.nulls(), None);
let expected_keys = Int32Array::from(vec![
Some(1),
Some(0), Some(1),
Some(2), Some(1),
Some(0),
Some(3), Some(3), ]);
let expected_values =
StringArray::from(vec![None, Some("low"), Some("card"), Some("test")]);
let expected_dict =
DictionaryArray::<Int32Type>::try_new(expected_keys, Arc::new(expected_values))
.unwrap();
assert_eq!(values, &expected_dict);
}
}