use arrow2::array::*;
use arrow2::bitmap::Bitmap;
use arrow2::datatypes::{DataType, Field, IntegerType, TimeUnit, UnionMode};
use arrow2::offset::Offsets;
use arrow_array::ArrayRef;
use arrow_data::ArrayDataBuilder;
use proptest::num::i32;
fn test_arrow2_roundtrip(array: &dyn arrow_array::Array) {
let arrow2 = Box::<dyn Array>::from(array);
assert_eq!(arrow2.len(), array.len());
let back = ArrayRef::from(arrow2);
assert_eq!(back.len(), array.len());
match array.data_type() {
d @ arrow_schema::DataType::Union(_, arrow_schema::UnionMode::Sparse) => {
let data = array.to_data();
let type_ids = data.buffers()[0].slice_with_length(data.offset(), data.len());
let child_data = data
.child_data()
.iter()
.map(|x| x.slice(data.offset(), data.len()))
.collect();
let data = ArrayDataBuilder::new(d.clone())
.len(data.len())
.buffers(vec![type_ids])
.child_data(child_data)
.build()
.unwrap();
assert_eq!(back.to_data(), data);
}
_ => assert_eq!(array, back.as_ref()),
}
assert_eq!(array.data_type(), back.data_type());
}
fn test_arrow_roundtrip(array: &dyn Array) {
let arrow = ArrayRef::from(array);
assert_eq!(arrow.len(), array.len());
let back = Box::<dyn Array>::from(arrow);
assert_eq!(back.len(), array.len());
assert_eq!(array, back.as_ref());
assert_eq!(array.data_type(), back.data_type());
}
fn test_conversion(array: &dyn Array) {
test_arrow_roundtrip(array);
let to_arrow = ArrayRef::from(array);
test_arrow2_roundtrip(to_arrow.as_ref());
if !array.is_empty() {
let sliced = array.sliced(1, array.len() - 1);
test_arrow_roundtrip(sliced.as_ref());
let sliced = to_arrow.slice(1, array.len() - 1);
test_arrow2_roundtrip(sliced.as_ref());
let sliced = array.sliced(0, array.len() - 1);
test_arrow_roundtrip(sliced.as_ref());
let sliced = to_arrow.slice(0, array.len() - 1);
test_arrow2_roundtrip(sliced.as_ref());
}
if array.len() > 2 {
let sliced = array.sliced(1, array.len() - 2);
test_arrow_roundtrip(sliced.as_ref());
let sliced = to_arrow.slice(1, array.len() - 2);
test_arrow2_roundtrip(sliced.as_ref());
}
}
#[test]
fn test_null() {
let data_type = DataType::Null;
let array = NullArray::new(data_type, 7);
test_conversion(&array);
}
#[test]
fn test_primitive() {
let data_type = DataType::Int32;
let array = PrimitiveArray::new(data_type, vec![1, 2, 3].into(), None);
test_conversion(&array);
let data_type = DataType::Timestamp(TimeUnit::Second, Some("UTC".into()));
let nulls = Bitmap::from_iter([true, true, false]);
let array = PrimitiveArray::new(data_type, vec![1_i64, 24, 0].into(), Some(nulls));
test_conversion(&array);
}
#[test]
fn test_boolean() {
let data_type = DataType::Boolean;
let values = [false, false, true, true, true].into_iter().collect();
let validity = [false, true, true, false, false].into_iter().collect();
let array = BooleanArray::new(data_type, values, Some(validity));
test_conversion(&array);
}
#[test]
fn test_utf8() {
let array = Utf8Array::<i32>::from_iter([Some("asd\0"), None, Some("45\0848"), Some("")]);
test_conversion(&array);
let array = Utf8Array::<i64>::from_iter([Some("asd"), None, Some("45\n848"), Some("")]);
test_conversion(&array);
let array = Utf8Array::<i32>::new_empty(DataType::Utf8);
test_conversion(&array);
}
#[test]
fn test_binary() {
let array = BinaryArray::<i32>::from_iter([Some("s".as_bytes()), Some(b"sd\xFFfk\x23"), None]);
test_conversion(&array);
let array = BinaryArray::<i64>::from_iter([Some("45848".as_bytes()), Some(b"\x03\xFF"), None]);
test_conversion(&array);
let array = BinaryArray::<i32>::new_empty(DataType::Binary);
test_conversion(&array);
}
fn make_struct() -> StructArray {
let a1 = BinaryArray::<i32>::from_iter([Some("s".as_bytes()), Some(b"sd\xFFfk\x23"), None]);
let a2 = BinaryArray::<i64>::from_iter([Some("45848".as_bytes()), Some(b"\x03\xFF"), None]);
let data_type = DataType::Timestamp(TimeUnit::Millisecond, Some("UTC".into()));
let nulls = Bitmap::from_iter([true, true, false]);
let a3 = PrimitiveArray::new(data_type, vec![1_i64, 24, 0].into(), Some(nulls));
let nulls = [true, true, false].into_iter().collect();
StructArray::new(
DataType::Struct(vec![
Field::new("a1", a1.data_type().clone(), true),
Field::new("a2", a2.data_type().clone(), true),
Field::new("a3", a3.data_type().clone(), true),
]),
vec![Box::new(a1), Box::new(a2), Box::new(a3)],
Some(nulls),
)
}
#[test]
fn test_struct() {
let array = make_struct();
test_conversion(&array);
}
#[test]
fn test_list() {
let values = Utf8Array::<i32>::from_iter([
Some("asd\0"),
None,
Some("45\0848"),
Some(""),
Some("335"),
Some("test"),
]);
let validity = [true, true, false, false, true].into_iter().collect();
let offsets = Offsets::try_from_iter(vec![0, 2, 2, 2, 0]).unwrap();
let data_type = DataType::List(Box::new(Field::new("element", DataType::Utf8, true)));
let list = ListArray::<i32>::new(
data_type.clone(),
offsets.into(),
Box::new(values.clone()),
Some(validity),
);
test_conversion(&list);
let list = ListArray::<i32>::new_empty(data_type);
test_conversion(&list);
let validity = [true, true, false, false, true].into_iter().collect();
let offsets = Offsets::try_from_iter(vec![0, 2, 2, 2, 0]).unwrap();
let data_type = DataType::LargeList(Box::new(Field::new("element", DataType::Utf8, true)));
let list = ListArray::<i64>::new(
data_type.clone(),
offsets.into(),
Box::new(values),
Some(validity),
);
test_conversion(&list);
let list = ListArray::<i64>::new_empty(data_type);
test_conversion(&list);
}
#[test]
fn test_list_struct() {
let values = make_struct();
let validity = [true, true, false, true].into_iter().collect();
let offsets = Offsets::try_from_iter(vec![0, 1, 0, 2]).unwrap();
let list = ListArray::<i32>::new(
DataType::List(Box::new(Field::new(
"element",
values.data_type().clone(),
true,
))),
offsets.into(),
Box::new(values),
Some(validity),
);
test_conversion(&list);
}
#[test]
fn test_dictionary() {
let nulls = [true, false, true, true, true].into_iter().collect();
let keys = PrimitiveArray::new(DataType::Int16, vec![1_i16, 1, 0, 2, 2].into(), Some(nulls));
let values = make_struct();
let dictionary = DictionaryArray::try_new(
DataType::Dictionary(
IntegerType::Int16,
Box::new(values.data_type().clone()),
false,
),
keys,
Box::new(values),
)
.unwrap();
test_conversion(&dictionary);
}
#[test]
fn test_fixed_size_binary() {
let data = (0_u8..16).collect::<Vec<_>>();
let nulls = [false, false, true, true, true, false, false, true]
.into_iter()
.collect();
let array = FixedSizeBinaryArray::new(DataType::FixedSizeBinary(2), data.into(), Some(nulls));
test_conversion(&array);
}
#[test]
fn test_fixed_size_list() {
let values = vec![1_i64, 2, 3, 4, 5, 6, 7, 8];
let nulls = [false, false, true, true, true, true, false, false]
.into_iter()
.collect();
let values = PrimitiveArray::new(DataType::Int64, values.into(), Some(nulls));
let nulls = [true, true, false, true].into_iter().collect();
let array = FixedSizeListArray::new(
DataType::FixedSizeList(Box::new(Field::new("element", DataType::Int64, true)), 2),
Box::new(values),
Some(nulls),
);
test_conversion(&array);
}
#[test]
fn test_map() {
let keys = Utf8Array::<i32>::from_iter(
["key1", "key2", "key3", "key1", "key2"]
.into_iter()
.map(Some),
);
let values = PrimitiveArray::<i32>::from_iter([Some(1), None, Some(3), Some(1), None]);
let fields = StructArray::new(
DataType::Struct(vec![
Field::new("keys", DataType::Utf8, false), Field::new("values", DataType::Int32, true),
]),
vec![Box::new(keys), Box::new(values)],
None, );
let validity = [true, true, false, false].into_iter().collect();
let offsets = Offsets::try_from_iter(vec![0, 2, 0, 2]).unwrap();
let data_type = DataType::Map(
Box::new(Field::new("entries", fields.data_type().clone(), true)),
false,
);
let map = MapArray::new(
data_type.clone(),
offsets.into(),
Box::new(fields),
Some(validity),
);
test_conversion(&map);
let map = MapArray::new_empty(data_type);
test_conversion(&map);
}
#[test]
fn test_dense_union() {
let fields = vec![
Field::new("a1", DataType::Int32, true),
Field::new("a2", DataType::Int64, true),
];
let a1 = PrimitiveArray::from_iter([Some(2), None]);
let a2 = PrimitiveArray::from_iter([Some(2_i64), None, Some(3)]);
let types = vec![1, 0, 0, 1, 1];
let offsets = vec![0, 0, 1, 1, 2];
let union = UnionArray::new(
DataType::Union(fields.clone(), Some(vec![0, 1]), UnionMode::Dense),
types.into(),
vec![Box::new(a1.clone()), Box::new(a2.clone())],
Some(offsets.into()),
);
test_conversion(&union);
let types = vec![1, 4, 4, 1, 1];
let offsets = vec![0, 0, 1, 1, 2];
let union = UnionArray::new(
DataType::Union(fields, Some(vec![4, 1]), UnionMode::Dense),
types.into(),
vec![Box::new(a1), Box::new(a2)],
Some(offsets.into()),
);
test_conversion(&union);
}
#[test]
fn test_sparse_union() {
let fields = vec![
Field::new("a1", DataType::Int32, true),
Field::new("a2", DataType::Int64, true),
];
let a1 = PrimitiveArray::from_iter([None, Some(2), None, None, None]);
let a2 = PrimitiveArray::from_iter([Some(2_i64), None, None, None, Some(3)]);
let types = vec![1, 0, 0, 1, 1];
let union = UnionArray::new(
DataType::Union(fields.clone(), Some(vec![0, 1]), UnionMode::Sparse),
types.into(),
vec![Box::new(a1.clone()), Box::new(a2.clone())],
None,
);
test_conversion(&union);
let types = vec![1, 4, 4, 1, 1];
let union = UnionArray::new(
DataType::Union(fields, Some(vec![4, 1]), UnionMode::Sparse),
types.into(),
vec![Box::new(a1), Box::new(a2)],
None,
);
test_conversion(&union);
}