use arrow_array::cast::AsArray;
use arrow_array::types::*;
use arrow_array::*;
use arrow_buffer::{ArrowNativeType, NullBuffer};
use arrow_schema::{ArrowError, DataType, SortOptions};
use std::{cmp::Ordering, collections::HashMap};
fn compare_run_end_encoded<R: RunEndIndexType>(
left: &dyn Array,
right: &dyn Array,
opts: SortOptions,
) -> Result<DynComparator, ArrowError> {
let left = left.as_run::<R>();
let right = right.as_run::<R>();
let c_opts = child_opts(opts);
let cmp = make_comparator(left.values().as_ref(), right.values().as_ref(), c_opts)?;
let l_run_ends = left.run_ends().clone();
let r_run_ends = right.run_ends().clone();
let f = compare(left, right, opts, move |i, j| {
let l_physical = l_run_ends.get_physical_index(i);
let r_physical = r_run_ends.get_physical_index(j);
cmp(l_physical, r_physical)
});
Ok(f)
}
pub type DynComparator = Box<dyn Fn(usize, usize) -> Ordering + Send + Sync>;
fn child_opts(opts: SortOptions) -> SortOptions {
SortOptions {
descending: false,
nulls_first: opts.nulls_first != opts.descending,
}
}
fn compare<A, F>(l: &A, r: &A, opts: SortOptions, cmp: F) -> DynComparator
where
A: Array + Clone,
F: Fn(usize, usize) -> Ordering + Send + Sync + 'static,
{
let l = l.logical_nulls().filter(|x| x.null_count() > 0);
let r = r.logical_nulls().filter(|x| x.null_count() > 0);
match (opts.nulls_first, opts.descending) {
(true, true) => compare_impl::<true, true, _>(l, r, cmp),
(true, false) => compare_impl::<true, false, _>(l, r, cmp),
(false, true) => compare_impl::<false, true, _>(l, r, cmp),
(false, false) => compare_impl::<false, false, _>(l, r, cmp),
}
}
fn compare_impl<const NULLS_FIRST: bool, const DESCENDING: bool, F>(
l: Option<NullBuffer>,
r: Option<NullBuffer>,
cmp: F,
) -> DynComparator
where
F: Fn(usize, usize) -> Ordering + Send + Sync + 'static,
{
let cmp = move |i, j| match DESCENDING {
true => cmp(i, j).reverse(),
false => cmp(i, j),
};
let (left_null, right_null) = match NULLS_FIRST {
true => (Ordering::Less, Ordering::Greater),
false => (Ordering::Greater, Ordering::Less),
};
match (l, r) {
(None, None) => Box::new(cmp),
(Some(l), None) => Box::new(move |i, j| match l.is_null(i) {
true => left_null,
false => cmp(i, j),
}),
(None, Some(r)) => Box::new(move |i, j| match r.is_null(j) {
true => right_null,
false => cmp(i, j),
}),
(Some(l), Some(r)) => Box::new(move |i, j| match (l.is_null(i), r.is_null(j)) {
(true, true) => Ordering::Equal,
(true, false) => left_null,
(false, true) => right_null,
(false, false) => cmp(i, j),
}),
}
}
fn compare_primitive<T: ArrowPrimitiveType>(
left: &dyn Array,
right: &dyn Array,
opts: SortOptions,
) -> DynComparator
where
T::Native: ArrowNativeTypeOp,
{
let left = left.as_primitive::<T>();
let right = right.as_primitive::<T>();
let l_values = left.values().clone();
let r_values = right.values().clone();
compare(&left, &right, opts, move |i, j| {
l_values[i].compare(r_values[j])
})
}
fn compare_boolean(left: &dyn Array, right: &dyn Array, opts: SortOptions) -> DynComparator {
let left = left.as_boolean();
let right = right.as_boolean();
let l_values = left.values().clone();
let r_values = right.values().clone();
compare(left, right, opts, move |i, j| {
l_values.value(i).cmp(&r_values.value(j))
})
}
fn compare_bytes<T: ByteArrayType>(
left: &dyn Array,
right: &dyn Array,
opts: SortOptions,
) -> DynComparator {
let left = left.as_bytes::<T>();
let right = right.as_bytes::<T>();
let l = left.clone();
let r = right.clone();
compare(left, right, opts, move |i, j| {
let l: &[u8] = l.value(i).as_ref();
let r: &[u8] = r.value(j).as_ref();
l.cmp(r)
})
}
fn compare_byte_view<T: ByteViewType>(
left: &dyn Array,
right: &dyn Array,
opts: SortOptions,
) -> DynComparator {
let left = left.as_byte_view::<T>();
let right = right.as_byte_view::<T>();
let l = left.clone();
let r = right.clone();
compare(left, right, opts, move |i, j| {
crate::cmp::compare_byte_view(&l, i, &r, j)
})
}
fn compare_dict<K: ArrowDictionaryKeyType>(
left: &dyn Array,
right: &dyn Array,
opts: SortOptions,
) -> Result<DynComparator, ArrowError> {
let left = left.as_dictionary::<K>();
let right = right.as_dictionary::<K>();
let c_opts = child_opts(opts);
let cmp = make_comparator(left.values().as_ref(), right.values().as_ref(), c_opts)?;
let left_keys = left.keys().values().clone();
let right_keys = right.keys().values().clone();
let f = compare(left, right, opts, move |i, j| {
let l = left_keys[i].as_usize();
let r = right_keys[j].as_usize();
cmp(l, r)
});
Ok(f)
}
fn compare_list<O: OffsetSizeTrait>(
left: &dyn Array,
right: &dyn Array,
opts: SortOptions,
) -> Result<DynComparator, ArrowError> {
let left = left.as_list::<O>();
let right = right.as_list::<O>();
let c_opts = child_opts(opts);
let cmp = make_comparator(left.values().as_ref(), right.values().as_ref(), c_opts)?;
let l_o = left.offsets().clone();
let r_o = right.offsets().clone();
let f = compare(left, right, opts, move |i, j| {
let l_end = l_o[i + 1].as_usize();
let l_start = l_o[i].as_usize();
let r_end = r_o[j + 1].as_usize();
let r_start = r_o[j].as_usize();
for (i, j) in (l_start..l_end).zip(r_start..r_end) {
match cmp(i, j) {
Ordering::Equal => continue,
r => return r,
}
}
(l_end - l_start).cmp(&(r_end - r_start))
});
Ok(f)
}
fn compare_fixed_list(
left: &dyn Array,
right: &dyn Array,
opts: SortOptions,
) -> Result<DynComparator, ArrowError> {
let left = left.as_fixed_size_list();
let right = right.as_fixed_size_list();
let c_opts = child_opts(opts);
let cmp = make_comparator(left.values().as_ref(), right.values().as_ref(), c_opts)?;
let l_size = left.value_length().to_usize().unwrap();
let r_size = right.value_length().to_usize().unwrap();
let size_cmp = l_size.cmp(&r_size);
let f = compare(left, right, opts, move |i, j| {
let l_start = i * l_size;
let l_end = l_start + l_size;
let r_start = j * r_size;
let r_end = r_start + r_size;
for (i, j) in (l_start..l_end).zip(r_start..r_end) {
match cmp(i, j) {
Ordering::Equal => continue,
r => return r,
}
}
size_cmp
});
Ok(f)
}
fn compare_list_view<O: OffsetSizeTrait>(
left: &dyn Array,
right: &dyn Array,
opts: SortOptions,
) -> Result<DynComparator, ArrowError> {
let left = left.as_list_view::<O>();
let right = right.as_list_view::<O>();
let c_opts = child_opts(opts);
let cmp = make_comparator(left.values().as_ref(), right.values().as_ref(), c_opts)?;
let l_offsets = left.offsets().clone();
let l_sizes = left.sizes().clone();
let r_offsets = right.offsets().clone();
let r_sizes = right.sizes().clone();
let f = compare(left, right, opts, move |i, j| {
let l_start = l_offsets[i].as_usize();
let l_len = l_sizes[i].as_usize();
let l_end = l_start + l_len;
let r_start = r_offsets[j].as_usize();
let r_len = r_sizes[j].as_usize();
let r_end = r_start + r_len;
for (i, j) in (l_start..l_end).zip(r_start..r_end) {
match cmp(i, j) {
Ordering::Equal => continue,
r => return r,
}
}
l_len.cmp(&r_len)
});
Ok(f)
}
fn compare_map(
left: &dyn Array,
right: &dyn Array,
opts: SortOptions,
) -> Result<DynComparator, ArrowError> {
let left = left.as_map();
let right = right.as_map();
let c_opts = child_opts(opts);
let cmp = make_comparator(left.entries(), right.entries(), c_opts)?;
let l_o = left.offsets().clone();
let r_o = right.offsets().clone();
let f = compare(left, right, opts, move |i, j| {
let l_end = l_o[i + 1].as_usize();
let l_start = l_o[i].as_usize();
let r_end = r_o[j + 1].as_usize();
let r_start = r_o[j].as_usize();
for (i, j) in (l_start..l_end).zip(r_start..r_end) {
match cmp(i, j) {
Ordering::Equal => continue,
r => return r,
}
}
(l_end - l_start).cmp(&(r_end - r_start))
});
Ok(f)
}
fn compare_struct(
left: &dyn Array,
right: &dyn Array,
opts: SortOptions,
) -> Result<DynComparator, ArrowError> {
let left = left.as_struct();
let right = right.as_struct();
if left.columns().len() != right.columns().len() {
return Err(ArrowError::InvalidArgumentError(
"Cannot compare StructArray with different number of columns".to_string(),
));
}
let c_opts = child_opts(opts);
let columns = left.columns().iter().zip(right.columns());
let comparators = columns
.map(|(l, r)| make_comparator(l, r, c_opts))
.collect::<Result<Vec<_>, _>>()?;
let f = compare(left, right, opts, move |i, j| {
for cmp in &comparators {
match cmp(i, j) {
Ordering::Equal => continue,
r => return r,
}
}
Ordering::Equal
});
Ok(f)
}
fn compare_union(
left: &dyn Array,
right: &dyn Array,
opts: SortOptions,
) -> Result<DynComparator, ArrowError> {
let left = left.as_union();
let right = right.as_union();
let (left_fields, left_mode) = match left.data_type() {
DataType::Union(fields, mode) => (fields, mode),
_ => unreachable!(),
};
let (right_fields, right_mode) = match right.data_type() {
DataType::Union(fields, mode) => (fields, mode),
_ => unreachable!(),
};
if left_fields != right_fields {
return Err(ArrowError::InvalidArgumentError(format!(
"Cannot compare UnionArrays with different fields: left={:?}, right={:?}",
left_fields, right_fields
)));
}
if left_mode != right_mode {
return Err(ArrowError::InvalidArgumentError(format!(
"Cannot compare UnionArrays with different modes: left={:?}, right={:?}",
left_mode, right_mode
)));
}
let c_opts = child_opts(opts);
let mut field_comparators = HashMap::with_capacity(left_fields.len());
for (type_id, _field) in left_fields.iter() {
let left_child = left.child(type_id);
let right_child = right.child(type_id);
let cmp = make_comparator(left_child.as_ref(), right_child.as_ref(), c_opts)?;
field_comparators.insert(type_id, cmp);
}
let left_type_ids = left.type_ids().clone();
let right_type_ids = right.type_ids().clone();
let left_offsets = left.offsets().cloned();
let right_offsets = right.offsets().cloned();
let f = compare(left, right, opts, move |i, j| {
let left_type_id = left_type_ids[i];
let right_type_id = right_type_ids[j];
match left_type_id.cmp(&right_type_id) {
Ordering::Equal => {
let left_offset = left_offsets.as_ref().map(|o| o[i] as usize).unwrap_or(i);
let right_offset = right_offsets.as_ref().map(|o| o[j] as usize).unwrap_or(j);
let cmp = field_comparators
.get(&left_type_id)
.expect("type id not found in field_comparators");
cmp(left_offset, right_offset)
}
other => other,
}
});
Ok(f)
}
pub fn make_comparator(
left: &dyn Array,
right: &dyn Array,
opts: SortOptions,
) -> Result<DynComparator, ArrowError> {
use arrow_schema::DataType::*;
macro_rules! primitive_helper {
($t:ty, $left:expr, $right:expr, $nulls_first:expr) => {
Ok(compare_primitive::<$t>($left, $right, $nulls_first))
};
}
downcast_primitive! {
left.data_type(), right.data_type() => (primitive_helper, left, right, opts),
(Boolean, Boolean) => Ok(compare_boolean(left, right, opts)),
(Utf8, Utf8) => Ok(compare_bytes::<Utf8Type>(left, right, opts)),
(LargeUtf8, LargeUtf8) => Ok(compare_bytes::<LargeUtf8Type>(left, right, opts)),
(Utf8View, Utf8View) => Ok(compare_byte_view::<StringViewType>(left, right, opts)),
(Binary, Binary) => Ok(compare_bytes::<BinaryType>(left, right, opts)),
(LargeBinary, LargeBinary) => Ok(compare_bytes::<LargeBinaryType>(left, right, opts)),
(BinaryView, BinaryView) => Ok(compare_byte_view::<BinaryViewType>(left, right, opts)),
(FixedSizeBinary(_), FixedSizeBinary(_)) => {
let left = left.as_fixed_size_binary();
let right = right.as_fixed_size_binary();
let l = left.clone();
let r = right.clone();
Ok(compare(left, right, opts, move |i, j| {
l.value(i).cmp(r.value(j))
}))
},
(List(_), List(_)) => compare_list::<i32>(left, right, opts),
(LargeList(_), LargeList(_)) => compare_list::<i64>(left, right, opts),
(ListView(_), ListView(_)) => compare_list_view::<i32>(left, right, opts),
(LargeListView(_), LargeListView(_)) => compare_list_view::<i64>(left, right, opts),
(FixedSizeList(_, _), FixedSizeList(_, _)) => compare_fixed_list(left, right, opts),
(Struct(_), Struct(_)) => compare_struct(left, right, opts),
(Dictionary(l_key, _), Dictionary(r_key, _)) => {
macro_rules! dict_helper {
($t:ty, $left:expr, $right:expr, $opts: expr) => {
compare_dict::<$t>($left, $right, $opts)
};
}
downcast_integer! {
l_key.as_ref(), r_key.as_ref() => (dict_helper, left, right, opts),
_ => unreachable!()
}
},
(RunEndEncoded(l_run_ends, _), RunEndEncoded(r_run_ends, _)) => {
macro_rules! run_end_helper {
($t:ty, $left:expr, $right:expr, $opts:expr) => {
compare_run_end_encoded::<$t>($left, $right, $opts)
};
}
downcast_run_end_index! {
l_run_ends.data_type(), r_run_ends.data_type() => (run_end_helper, left, right, opts),
_ => Err(ArrowError::InvalidArgumentError(format!(
"Cannot compare RunEndEncoded arrays with different run ends types: left={:?}, right={:?}",
l_run_ends.data_type(),
r_run_ends.data_type()
)))
}
},
(Map(_, _), Map(_, _)) => compare_map(left, right, opts),
(Null, Null) => Ok(Box::new(|_, _| Ordering::Equal)),
(Union(_, _), Union(_, _)) => compare_union(left, right, opts),
(lhs, rhs) => Err(ArrowError::InvalidArgumentError(match lhs == rhs {
true => format!("The data type type {lhs:?} has no natural order"),
false => "Can't compare arrays of different types".to_string(),
}))
}
}
#[cfg(test)]
mod tests {
use super::*;
use arrow_array::builder::{Int32Builder, ListBuilder, MapBuilder, StringBuilder};
use arrow_buffer::{IntervalDayTime, NullBuffer, OffsetBuffer, ScalarBuffer, i256};
use arrow_schema::{DataType, Field, Fields, UnionFields};
use half::f16;
use std::sync::Arc;
#[test]
fn test_fixed_size_binary() {
let items = vec![vec![1u8], vec![2u8]];
let array = FixedSizeBinaryArray::try_from_iter(items.into_iter()).unwrap();
let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
assert_eq!(Ordering::Less, cmp(0, 1));
}
#[test]
fn test_fixed_size_binary_fixed_size_binary() {
let items = vec![vec![1u8]];
let array1 = FixedSizeBinaryArray::try_from_iter(items.into_iter()).unwrap();
let items = vec![vec![2u8]];
let array2 = FixedSizeBinaryArray::try_from_iter(items.into_iter()).unwrap();
let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
assert_eq!(Ordering::Less, cmp(0, 0));
}
#[test]
fn test_i32() {
let array = Int32Array::from(vec![1, 2]);
let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
assert_eq!(Ordering::Less, (cmp)(0, 1));
}
#[test]
fn test_i32_i32() {
let array1 = Int32Array::from(vec![1]);
let array2 = Int32Array::from(vec![2]);
let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
assert_eq!(Ordering::Less, cmp(0, 0));
}
#[test]
fn test_f16() {
let array = Float16Array::from(vec![f16::from_f32(1.0), f16::from_f32(2.0)]);
let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
assert_eq!(Ordering::Less, cmp(0, 1));
}
#[test]
fn test_f64() {
let array = Float64Array::from(vec![1.0, 2.0]);
let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
assert_eq!(Ordering::Less, cmp(0, 1));
}
#[test]
fn test_f64_nan() {
let array = Float64Array::from(vec![1.0, f64::NAN]);
let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
assert_eq!(Ordering::Less, cmp(0, 1));
assert_eq!(Ordering::Equal, cmp(1, 1));
}
#[test]
fn test_f64_zeros() {
let array = Float64Array::from(vec![-0.0, 0.0]);
let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
assert_eq!(Ordering::Less, cmp(0, 1));
assert_eq!(Ordering::Greater, cmp(1, 0));
}
#[test]
fn test_interval_day_time() {
let array = IntervalDayTimeArray::from(vec![
IntervalDayTimeType::make_value(0, 1000),
IntervalDayTimeType::make_value(1, 2),
IntervalDayTimeType::make_value(0, 90_000_000),
]);
let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
assert_eq!(Ordering::Less, cmp(0, 1));
assert_eq!(Ordering::Greater, cmp(1, 0));
assert_eq!(Ordering::Greater, cmp(1, 2));
assert_eq!(Ordering::Less, cmp(2, 1));
}
#[test]
fn test_interval_year_month() {
let array = IntervalYearMonthArray::from(vec![
IntervalYearMonthType::make_value(1, 0),
IntervalYearMonthType::make_value(0, 13),
IntervalYearMonthType::make_value(1, 1),
]);
let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
assert_eq!(Ordering::Less, cmp(0, 1));
assert_eq!(Ordering::Greater, cmp(1, 0));
assert_eq!(Ordering::Equal, cmp(1, 2));
assert_eq!(Ordering::Equal, cmp(2, 1));
}
#[test]
fn test_interval_month_day_nano() {
let array = IntervalMonthDayNanoArray::from(vec![
IntervalMonthDayNanoType::make_value(0, 100, 0),
IntervalMonthDayNanoType::make_value(1, 0, 0),
IntervalMonthDayNanoType::make_value(0, 100, 2),
]);
let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
assert_eq!(Ordering::Less, cmp(0, 1));
assert_eq!(Ordering::Greater, cmp(1, 0));
assert_eq!(Ordering::Greater, cmp(1, 2));
assert_eq!(Ordering::Less, cmp(2, 1));
}
#[test]
fn test_decimali32() {
let array = vec![Some(5_i32), Some(2_i32), Some(3_i32)]
.into_iter()
.collect::<Decimal32Array>()
.with_precision_and_scale(8, 6)
.unwrap();
let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
assert_eq!(Ordering::Less, cmp(1, 0));
assert_eq!(Ordering::Greater, cmp(0, 2));
}
#[test]
fn test_decimali64() {
let array = vec![Some(5_i64), Some(2_i64), Some(3_i64)]
.into_iter()
.collect::<Decimal64Array>()
.with_precision_and_scale(16, 6)
.unwrap();
let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
assert_eq!(Ordering::Less, cmp(1, 0));
assert_eq!(Ordering::Greater, cmp(0, 2));
}
#[test]
fn test_decimali128() {
let array = vec![Some(5_i128), Some(2_i128), Some(3_i128)]
.into_iter()
.collect::<Decimal128Array>()
.with_precision_and_scale(23, 6)
.unwrap();
let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
assert_eq!(Ordering::Less, cmp(1, 0));
assert_eq!(Ordering::Greater, cmp(0, 2));
}
#[test]
fn test_decimali256() {
let array = vec![
Some(i256::from_i128(5_i128)),
Some(i256::from_i128(2_i128)),
Some(i256::from_i128(3_i128)),
]
.into_iter()
.collect::<Decimal256Array>()
.with_precision_and_scale(53, 6)
.unwrap();
let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
assert_eq!(Ordering::Less, cmp(1, 0));
assert_eq!(Ordering::Greater, cmp(0, 2));
}
#[test]
fn test_dict() {
let data = vec!["a", "b", "c", "a", "a", "c", "c"];
let array = data.into_iter().collect::<DictionaryArray<Int16Type>>();
let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
assert_eq!(Ordering::Less, cmp(0, 1));
assert_eq!(Ordering::Equal, cmp(3, 4));
assert_eq!(Ordering::Greater, cmp(2, 3));
}
#[test]
fn test_multiple_dict() {
let d1 = vec!["a", "b", "c", "d"];
let a1 = d1.into_iter().collect::<DictionaryArray<Int16Type>>();
let d2 = vec!["e", "f", "g", "a"];
let a2 = d2.into_iter().collect::<DictionaryArray<Int16Type>>();
let cmp = make_comparator(&a1, &a2, SortOptions::default()).unwrap();
assert_eq!(Ordering::Less, cmp(0, 0));
assert_eq!(Ordering::Equal, cmp(0, 3));
assert_eq!(Ordering::Greater, cmp(1, 3));
}
#[test]
fn test_primitive_dict() {
let values = Int32Array::from(vec![1_i32, 0, 2, 5]);
let keys = Int8Array::from_iter_values([0, 0, 1, 3]);
let array1 = DictionaryArray::new(keys, Arc::new(values));
let values = Int32Array::from(vec![2_i32, 3, 4, 5]);
let keys = Int8Array::from_iter_values([0, 1, 1, 3]);
let array2 = DictionaryArray::new(keys, Arc::new(values));
let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
assert_eq!(Ordering::Less, cmp(0, 0));
assert_eq!(Ordering::Less, cmp(0, 3));
assert_eq!(Ordering::Equal, cmp(3, 3));
assert_eq!(Ordering::Greater, cmp(3, 1));
assert_eq!(Ordering::Greater, cmp(3, 2));
}
#[test]
fn test_float_dict() {
let values = Float32Array::from(vec![1.0, 0.5, 2.1, 5.5]);
let keys = Int8Array::from_iter_values([0, 0, 1, 3]);
let array1 = DictionaryArray::try_new(keys, Arc::new(values)).unwrap();
let values = Float32Array::from(vec![1.2, 3.2, 4.0, 5.5]);
let keys = Int8Array::from_iter_values([0, 1, 1, 3]);
let array2 = DictionaryArray::new(keys, Arc::new(values));
let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
assert_eq!(Ordering::Less, cmp(0, 0));
assert_eq!(Ordering::Less, cmp(0, 3));
assert_eq!(Ordering::Equal, cmp(3, 3));
assert_eq!(Ordering::Greater, cmp(3, 1));
assert_eq!(Ordering::Greater, cmp(3, 2));
}
#[test]
fn test_timestamp_dict() {
let values = TimestampSecondArray::from(vec![1, 0, 2, 5]);
let keys = Int8Array::from_iter_values([0, 0, 1, 3]);
let array1 = DictionaryArray::new(keys, Arc::new(values));
let values = TimestampSecondArray::from(vec![2, 3, 4, 5]);
let keys = Int8Array::from_iter_values([0, 1, 1, 3]);
let array2 = DictionaryArray::new(keys, Arc::new(values));
let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
assert_eq!(Ordering::Less, cmp(0, 0));
assert_eq!(Ordering::Less, cmp(0, 3));
assert_eq!(Ordering::Equal, cmp(3, 3));
assert_eq!(Ordering::Greater, cmp(3, 1));
assert_eq!(Ordering::Greater, cmp(3, 2));
}
#[test]
fn test_interval_dict() {
let v1 = IntervalDayTime::new(0, 1);
let v2 = IntervalDayTime::new(0, 2);
let v3 = IntervalDayTime::new(12, 2);
let values = IntervalDayTimeArray::from(vec![Some(v1), Some(v2), None, Some(v3)]);
let keys = Int8Array::from_iter_values([0, 0, 1, 3]);
let array1 = DictionaryArray::new(keys, Arc::new(values));
let values = IntervalDayTimeArray::from(vec![Some(v3), Some(v2), None, Some(v1)]);
let keys = Int8Array::from_iter_values([0, 1, 1, 3]);
let array2 = DictionaryArray::new(keys, Arc::new(values));
let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
assert_eq!(Ordering::Less, cmp(0, 0)); assert_eq!(Ordering::Equal, cmp(0, 3)); assert_eq!(Ordering::Greater, cmp(3, 3)); assert_eq!(Ordering::Greater, cmp(3, 1)); assert_eq!(Ordering::Greater, cmp(3, 2)); }
#[test]
fn test_duration_dict() {
let values = DurationSecondArray::from(vec![1, 0, 2, 5]);
let keys = Int8Array::from_iter_values([0, 0, 1, 3]);
let array1 = DictionaryArray::new(keys, Arc::new(values));
let values = DurationSecondArray::from(vec![2, 3, 4, 5]);
let keys = Int8Array::from_iter_values([0, 1, 1, 3]);
let array2 = DictionaryArray::new(keys, Arc::new(values));
let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
assert_eq!(Ordering::Less, cmp(0, 0));
assert_eq!(Ordering::Less, cmp(0, 3));
assert_eq!(Ordering::Equal, cmp(3, 3));
assert_eq!(Ordering::Greater, cmp(3, 1));
assert_eq!(Ordering::Greater, cmp(3, 2));
}
#[test]
fn test_decimal_dict() {
let values = Decimal128Array::from(vec![1, 0, 2, 5]);
let keys = Int8Array::from_iter_values([0, 0, 1, 3]);
let array1 = DictionaryArray::new(keys, Arc::new(values));
let values = Decimal128Array::from(vec![2, 3, 4, 5]);
let keys = Int8Array::from_iter_values([0, 1, 1, 3]);
let array2 = DictionaryArray::new(keys, Arc::new(values));
let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
assert_eq!(Ordering::Less, cmp(0, 0));
assert_eq!(Ordering::Less, cmp(0, 3));
assert_eq!(Ordering::Equal, cmp(3, 3));
assert_eq!(Ordering::Greater, cmp(3, 1));
assert_eq!(Ordering::Greater, cmp(3, 2));
}
#[test]
fn test_decimal256_dict() {
let values = Decimal256Array::from(vec![
i256::from_i128(1),
i256::from_i128(0),
i256::from_i128(2),
i256::from_i128(5),
]);
let keys = Int8Array::from_iter_values([0, 0, 1, 3]);
let array1 = DictionaryArray::new(keys, Arc::new(values));
let values = Decimal256Array::from(vec![
i256::from_i128(2),
i256::from_i128(3),
i256::from_i128(4),
i256::from_i128(5),
]);
let keys = Int8Array::from_iter_values([0, 1, 1, 3]);
let array2 = DictionaryArray::new(keys, Arc::new(values));
let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
assert_eq!(Ordering::Less, cmp(0, 0));
assert_eq!(Ordering::Less, cmp(0, 3));
assert_eq!(Ordering::Equal, cmp(3, 3));
assert_eq!(Ordering::Greater, cmp(3, 1));
assert_eq!(Ordering::Greater, cmp(3, 2));
}
fn test_bytes_impl<T: ByteArrayType>() {
let offsets = OffsetBuffer::from_lengths([3, 3, 1]);
let a = GenericByteArray::<T>::new(offsets, b"abcdefa".into(), None);
let cmp = make_comparator(&a, &a, SortOptions::default()).unwrap();
assert_eq!(Ordering::Less, cmp(0, 1));
assert_eq!(Ordering::Greater, cmp(0, 2));
assert_eq!(Ordering::Equal, cmp(1, 1));
}
#[test]
fn test_bytes() {
test_bytes_impl::<Utf8Type>();
test_bytes_impl::<LargeUtf8Type>();
test_bytes_impl::<BinaryType>();
test_bytes_impl::<LargeBinaryType>();
}
fn assert_cmp_cases<A: Array>(
array1: &A,
array2: &A,
opts: SortOptions,
cases: &[(usize, usize, Ordering)],
) {
let cmp = make_comparator(array1, array2, opts).unwrap();
for (left, right, expected) in cases {
assert_eq!(cmp(*left, *right), *expected);
}
}
#[test]
fn test_lists() {
let mut a = ListBuilder::new(ListBuilder::new(Int32Builder::new()));
a.extend([
Some(vec![Some(vec![Some(1), Some(2), None]), Some(vec![None])]),
Some(vec![
Some(vec![Some(1), Some(2), Some(3)]),
Some(vec![Some(1)]),
]),
Some(vec![]),
]);
let a = a.finish();
let mut b = ListBuilder::new(ListBuilder::new(Int32Builder::new()));
b.extend([
Some(vec![Some(vec![Some(1), Some(2), None]), Some(vec![None])]),
Some(vec![
Some(vec![Some(1), Some(2), None]),
Some(vec![Some(1)]),
]),
Some(vec![
Some(vec![Some(1), Some(2), Some(3), Some(4)]),
Some(vec![Some(1)]),
]),
None,
]);
let b = b.finish();
assert_cmp_cases(
&a,
&b,
SortOptions {
descending: false,
nulls_first: true,
},
&[
(0, 0, Ordering::Equal),
(0, 1, Ordering::Less),
(0, 2, Ordering::Less),
(1, 2, Ordering::Less),
(1, 3, Ordering::Greater),
(2, 0, Ordering::Less),
],
);
assert_cmp_cases(
&a,
&b,
SortOptions {
descending: true,
nulls_first: true,
},
&[
(0, 0, Ordering::Equal),
(0, 1, Ordering::Less),
(0, 2, Ordering::Less),
(1, 2, Ordering::Greater),
(1, 3, Ordering::Greater),
(2, 0, Ordering::Greater),
],
);
assert_cmp_cases(
&a,
&b,
SortOptions {
descending: true,
nulls_first: false,
},
&[
(0, 0, Ordering::Equal),
(0, 1, Ordering::Greater),
(0, 2, Ordering::Greater),
(1, 2, Ordering::Greater),
(1, 3, Ordering::Less),
(2, 0, Ordering::Greater),
],
);
assert_cmp_cases(
&a,
&b,
SortOptions {
descending: false,
nulls_first: false,
},
&[
(0, 0, Ordering::Equal),
(0, 1, Ordering::Greater),
(0, 2, Ordering::Greater),
(1, 2, Ordering::Less),
(1, 3, Ordering::Less),
(2, 0, Ordering::Less),
],
);
}
fn list_view_array<O: OffsetSizeTrait>(
values: Vec<i32>,
offsets: &[usize],
sizes: &[usize],
valid: Option<&[bool]>,
) -> GenericListViewArray<O> {
let offsets = offsets
.iter()
.map(|v| O::from_usize(*v).unwrap())
.collect::<ScalarBuffer<O>>();
let sizes = sizes
.iter()
.map(|v| O::from_usize(*v).unwrap())
.collect::<ScalarBuffer<O>>();
let field = Arc::new(Field::new_list_field(DataType::Int32, true));
let values = Int32Array::from(values);
let nulls = valid.map(NullBuffer::from);
GenericListViewArray::new(field, offsets, sizes, Arc::new(values), nulls)
}
fn test_list_view_comparisons<O: OffsetSizeTrait>() {
let array = list_view_array::<O>(
vec![1, 2, 3, 4, 5],
&[0, 2, 1, 0, 3],
&[2, 2, 2, 0, 2],
Some(&[true, true, true, true, false]),
);
assert_cmp_cases(
&array,
&array,
SortOptions {
descending: false,
nulls_first: true,
},
&[
(0, 2, Ordering::Less), (1, 2, Ordering::Greater), (3, 0, Ordering::Less), (4, 0, Ordering::Less), ],
);
assert_cmp_cases(
&array,
&array,
SortOptions {
descending: false,
nulls_first: false,
},
&[
(0, 2, Ordering::Less),
(1, 2, Ordering::Greater),
(3, 0, Ordering::Less),
(4, 0, Ordering::Greater), ],
);
assert_cmp_cases(
&array,
&array,
SortOptions {
descending: true,
nulls_first: true,
},
&[
(0, 2, Ordering::Greater),
(1, 2, Ordering::Less),
(3, 0, Ordering::Greater),
(4, 0, Ordering::Less),
],
);
assert_cmp_cases(
&array,
&array,
SortOptions {
descending: true,
nulls_first: false,
},
&[
(0, 2, Ordering::Greater),
(1, 2, Ordering::Less),
(3, 0, Ordering::Greater),
(4, 0, Ordering::Greater),
],
);
}
#[test]
fn test_list_view() {
test_list_view_comparisons::<i32>();
}
#[test]
fn test_large_list_view() {
test_list_view_comparisons::<i64>();
}
#[test]
fn test_struct() {
let fields = Fields::from(vec![
Field::new("a", DataType::Int32, true),
Field::new_list("b", Field::new_list_field(DataType::Int32, true), true),
]);
let a = Int32Array::from(vec![Some(1), Some(2), None, None]);
let mut b = ListBuilder::new(Int32Builder::new());
b.extend([Some(vec![Some(1), Some(2)]), Some(vec![None]), None, None]);
let b = b.finish();
let nulls = Some(NullBuffer::from_iter([true, true, true, false]));
let values = vec![Arc::new(a) as _, Arc::new(b) as _];
let s1 = StructArray::new(fields.clone(), values, nulls);
let a = Int32Array::from(vec![None, Some(2), None]);
let mut b = ListBuilder::new(Int32Builder::new());
b.extend([None, None, Some(vec![])]);
let b = b.finish();
let values = vec![Arc::new(a) as _, Arc::new(b) as _];
let s2 = StructArray::new(fields.clone(), values, None);
let opts = SortOptions {
descending: false,
nulls_first: true,
};
let cmp = make_comparator(&s1, &s2, opts).unwrap();
assert_eq!(cmp(0, 1), Ordering::Less); assert_eq!(cmp(0, 0), Ordering::Greater); assert_eq!(cmp(1, 1), Ordering::Greater); assert_eq!(cmp(2, 2), Ordering::Less); assert_eq!(cmp(3, 0), Ordering::Less); assert_eq!(cmp(2, 0), Ordering::Equal); assert_eq!(cmp(3, 0), Ordering::Less);
let opts = SortOptions {
descending: true,
nulls_first: true,
};
let cmp = make_comparator(&s1, &s2, opts).unwrap();
assert_eq!(cmp(0, 1), Ordering::Greater); assert_eq!(cmp(0, 0), Ordering::Greater); assert_eq!(cmp(1, 1), Ordering::Greater); assert_eq!(cmp(2, 2), Ordering::Less); assert_eq!(cmp(3, 0), Ordering::Less); assert_eq!(cmp(2, 0), Ordering::Equal); assert_eq!(cmp(3, 0), Ordering::Less);
let opts = SortOptions {
descending: true,
nulls_first: false,
};
let cmp = make_comparator(&s1, &s2, opts).unwrap();
assert_eq!(cmp(0, 1), Ordering::Greater); assert_eq!(cmp(0, 0), Ordering::Less); assert_eq!(cmp(1, 1), Ordering::Less); assert_eq!(cmp(2, 2), Ordering::Greater); assert_eq!(cmp(3, 0), Ordering::Greater); assert_eq!(cmp(2, 0), Ordering::Equal); assert_eq!(cmp(3, 0), Ordering::Greater);
let opts = SortOptions {
descending: false,
nulls_first: false,
};
let cmp = make_comparator(&s1, &s2, opts).unwrap();
assert_eq!(cmp(0, 1), Ordering::Less); assert_eq!(cmp(0, 0), Ordering::Less); assert_eq!(cmp(1, 1), Ordering::Less); assert_eq!(cmp(2, 2), Ordering::Greater); assert_eq!(cmp(3, 0), Ordering::Greater); assert_eq!(cmp(2, 0), Ordering::Equal); assert_eq!(cmp(3, 0), Ordering::Greater); }
#[test]
fn test_map() {
let string_builder = StringBuilder::new();
let int_builder = Int32Builder::new();
let mut map1_builder = MapBuilder::new(None, string_builder, int_builder);
map1_builder.keys().append_value("a");
map1_builder.values().append_value(100);
map1_builder.keys().append_value("b");
map1_builder.values().append_value(1);
map1_builder.append(true).unwrap();
map1_builder.keys().append_value("b");
map1_builder.values().append_value(999);
map1_builder.keys().append_value("c");
map1_builder.values().append_value(1);
map1_builder.append(true).unwrap();
map1_builder.append(true).unwrap();
map1_builder.keys().append_value("x");
map1_builder.values().append_value(1);
map1_builder.append(true).unwrap();
let map1 = map1_builder.finish();
let string_builder = StringBuilder::new();
let int_builder = Int32Builder::new();
let mut map2_builder = MapBuilder::new(None, string_builder, int_builder);
map2_builder.keys().append_value("a");
map2_builder.values().append_value(1);
map2_builder.keys().append_value("c");
map2_builder.values().append_value(999);
map2_builder.append(true).unwrap();
map2_builder.keys().append_value("b");
map2_builder.values().append_value(1);
map2_builder.keys().append_value("d");
map2_builder.values().append_value(999);
map2_builder.append(true).unwrap();
map2_builder.keys().append_value("a");
map2_builder.values().append_value(1);
map2_builder.append(true).unwrap();
map2_builder.append(false).unwrap();
let map2 = map2_builder.finish();
let opts = SortOptions {
descending: false,
nulls_first: true,
};
let cmp = make_comparator(&map1, &map2, opts).unwrap();
assert_eq!(cmp(0, 0), Ordering::Greater);
assert_eq!(cmp(1, 1), Ordering::Greater);
assert_eq!(cmp(0, 1), Ordering::Less);
assert_eq!(cmp(2, 2), Ordering::Less);
assert_eq!(cmp(3, 3), Ordering::Greater);
assert_eq!(cmp(3, 0), Ordering::Greater);
assert_eq!(cmp(2, 0), Ordering::Less);
let opts = SortOptions {
descending: true,
nulls_first: true,
};
let cmp = make_comparator(&map1, &map2, opts).unwrap();
assert_eq!(cmp(0, 0), Ordering::Less); assert_eq!(cmp(1, 1), Ordering::Less); assert_eq!(cmp(0, 1), Ordering::Greater); assert_eq!(cmp(3, 3), Ordering::Greater); assert_eq!(cmp(2, 2), Ordering::Greater);
let opts = SortOptions {
descending: false,
nulls_first: false,
};
let cmp = make_comparator(&map1, &map2, opts).unwrap();
assert_eq!(cmp(0, 0), Ordering::Greater); assert_eq!(cmp(1, 1), Ordering::Greater); assert_eq!(cmp(3, 3), Ordering::Less); assert_eq!(cmp(2, 2), Ordering::Less); }
#[test]
fn test_map_vs_list_consistency() {
let string_builder = StringBuilder::new();
let int_builder = Int32Builder::new();
let mut map1_builder = MapBuilder::new(None, string_builder, int_builder);
map1_builder.keys().append_value("a");
map1_builder.values().append_value(1);
map1_builder.keys().append_value("b");
map1_builder.values().append_value(2);
map1_builder.append(true).unwrap();
map1_builder.keys().append_value("x");
map1_builder.values().append_value(10);
map1_builder.append(true).unwrap();
map1_builder.append(true).unwrap();
map1_builder.keys().append_value("c");
map1_builder.values().append_value(3);
map1_builder.append(true).unwrap();
let map1 = map1_builder.finish();
let string_builder = StringBuilder::new();
let int_builder = Int32Builder::new();
let mut map2_builder = MapBuilder::new(None, string_builder, int_builder);
map2_builder.keys().append_value("a");
map2_builder.values().append_value(1);
map2_builder.keys().append_value("b");
map2_builder.values().append_value(2);
map2_builder.append(true).unwrap();
map2_builder.keys().append_value("y");
map2_builder.values().append_value(20);
map2_builder.append(true).unwrap();
map2_builder.keys().append_value("d");
map2_builder.values().append_value(4);
map2_builder.append(true).unwrap();
map2_builder.append(false).unwrap();
let map2 = map2_builder.finish();
let list1: ListArray = map1.clone().into();
let list2: ListArray = map2.clone().into();
let test_cases = [
SortOptions {
descending: false,
nulls_first: true,
},
SortOptions {
descending: true,
nulls_first: true,
},
SortOptions {
descending: false,
nulls_first: false,
},
SortOptions {
descending: true,
nulls_first: false,
},
];
for opts in test_cases {
let map_cmp = make_comparator(&map1, &map2, opts).unwrap();
let list_cmp = make_comparator(&list1, &list2, opts).unwrap();
for i in 0..map1.len() {
for j in 0..map2.len() {
let map_result = map_cmp(i, j);
let list_result = list_cmp(i, j);
assert_eq!(
map_result, list_result,
"Map comparison and List comparison should be equal for indices ({i}, {j}) with opts {opts:?}. Map: {map_result:?}, List: {list_result:?}"
);
}
}
}
}
#[test]
fn test_dense_union() {
let int_array = Int32Array::from(vec![1, 2, 3]);
let str_array = StringArray::from(vec!["b", "a"]);
let type_ids = [0, 1, 0, 1, 0].into_iter().collect::<ScalarBuffer<i8>>();
let offsets = [0, 0, 1, 1, 2].into_iter().collect::<ScalarBuffer<i32>>();
let union_fields = [
(0, Arc::new(Field::new("A", DataType::Int32, false))),
(1, Arc::new(Field::new("B", DataType::Utf8, false))),
]
.into_iter()
.collect::<UnionFields>();
let children = vec![Arc::new(int_array) as ArrayRef, Arc::new(str_array)];
let array1 =
UnionArray::try_new(union_fields.clone(), type_ids, Some(offsets), children).unwrap();
let int_array2 = Int32Array::from(vec![2, 1]);
let str_array2 = StringArray::from(vec!["a", "c"]);
let type_ids2 = [0, 1, 0, 1].into_iter().collect::<ScalarBuffer<i8>>();
let offsets2 = [0, 0, 1, 1].into_iter().collect::<ScalarBuffer<i32>>();
let children2 = vec![Arc::new(int_array2) as ArrayRef, Arc::new(str_array2)];
let array2 =
UnionArray::try_new(union_fields, type_ids2, Some(offsets2), children2).unwrap();
let opts = SortOptions {
descending: false,
nulls_first: true,
};
let cmp = make_comparator(&array1, &array2, opts).unwrap();
assert_eq!(cmp(0, 0), Ordering::Less);
assert_eq!(cmp(0, 1), Ordering::Less);
assert_eq!(cmp(1, 1), Ordering::Greater);
assert_eq!(cmp(2, 0), Ordering::Equal);
assert_eq!(cmp(3, 1), Ordering::Equal);
assert_eq!(cmp(1, 3), Ordering::Less);
let opts_desc = SortOptions {
descending: true,
nulls_first: true,
};
let cmp_desc = make_comparator(&array1, &array2, opts_desc).unwrap();
assert_eq!(cmp_desc(0, 0), Ordering::Greater); assert_eq!(cmp_desc(0, 1), Ordering::Greater); assert_eq!(cmp_desc(1, 1), Ordering::Less); }
#[test]
fn test_sparse_union() {
let int_array = Int32Array::from(vec![Some(1), None, Some(3)]);
let str_array = StringArray::from(vec![None, Some("b"), None]);
let type_ids = [0, 1, 0].into_iter().collect::<ScalarBuffer<i8>>();
let union_fields = [
(0, Arc::new(Field::new("a", DataType::Int32, false))),
(1, Arc::new(Field::new("b", DataType::Utf8, false))),
]
.into_iter()
.collect::<UnionFields>();
let children = vec![Arc::new(int_array) as ArrayRef, Arc::new(str_array)];
let array = UnionArray::try_new(union_fields, type_ids, None, children).unwrap();
let opts = SortOptions::default();
let cmp = make_comparator(&array, &array, opts).unwrap();
assert_eq!(cmp(0, 2), Ordering::Less); assert_eq!(cmp(0, 1), Ordering::Less); }
#[test]
#[should_panic(expected = "index out of bounds")]
fn test_union_out_of_bounds() {
let int_array = Int32Array::from(vec![1, 2]);
let str_array = StringArray::from(vec!["a"]);
let type_ids = [0, 1, 0].into_iter().collect::<ScalarBuffer<i8>>();
let offsets = [0, 0, 1].into_iter().collect::<ScalarBuffer<i32>>();
let union_fields = [
(0, Arc::new(Field::new("A", DataType::Int32, false))),
(1, Arc::new(Field::new("B", DataType::Utf8, false))),
]
.into_iter()
.collect::<UnionFields>();
let children = vec![Arc::new(int_array) as ArrayRef, Arc::new(str_array)];
let array = UnionArray::try_new(union_fields, type_ids, Some(offsets), children).unwrap();
let opts = SortOptions::default();
let cmp = make_comparator(&array, &array, opts).unwrap();
cmp(0, 3);
}
#[test]
fn test_union_incompatible_fields() {
let int_array1 = Int32Array::from(vec![1, 2]);
let str_array1 = StringArray::from(vec!["a", "b"]);
let type_ids1 = [0, 1].into_iter().collect::<ScalarBuffer<i8>>();
let offsets1 = [0, 0].into_iter().collect::<ScalarBuffer<i32>>();
let union_fields1 = [
(0, Arc::new(Field::new("A", DataType::Int32, false))),
(1, Arc::new(Field::new("B", DataType::Utf8, false))),
]
.into_iter()
.collect::<UnionFields>();
let children1 = vec![Arc::new(int_array1) as ArrayRef, Arc::new(str_array1)];
let array1 =
UnionArray::try_new(union_fields1, type_ids1, Some(offsets1), children1).unwrap();
let int_array2 = Int32Array::from(vec![3, 4]);
let float_array2 = Float64Array::from(vec![1.0, 2.0]);
let type_ids2 = [0, 1].into_iter().collect::<ScalarBuffer<i8>>();
let offsets2 = [0, 0].into_iter().collect::<ScalarBuffer<i32>>();
let union_fields2 = [
(0, Arc::new(Field::new("A", DataType::Int32, false))),
(1, Arc::new(Field::new("C", DataType::Float64, false))),
]
.into_iter()
.collect::<UnionFields>();
let children2 = vec![Arc::new(int_array2) as ArrayRef, Arc::new(float_array2)];
let array2 =
UnionArray::try_new(union_fields2, type_ids2, Some(offsets2), children2).unwrap();
let opts = SortOptions::default();
let Result::Err(ArrowError::InvalidArgumentError(out)) =
make_comparator(&array1, &array2, opts)
else {
panic!("expected error when making comparator of incompatible union arrays");
};
assert_eq!(
&out,
"Cannot compare UnionArrays with different fields: left=[(0, Field { name: \"A\", data_type: Int32 }), (1, Field { name: \"B\", data_type: Utf8 })], right=[(0, Field { name: \"A\", data_type: Int32 }), (1, Field { name: \"C\", data_type: Float64 })]"
);
}
#[test]
fn test_union_incompatible_modes() {
let int_array1 = Int32Array::from(vec![1, 2]);
let str_array1 = StringArray::from(vec!["a", "b"]);
let type_ids1 = [0, 1].into_iter().collect::<ScalarBuffer<i8>>();
let offsets1 = [0, 0].into_iter().collect::<ScalarBuffer<i32>>();
let union_fields1 = [
(0, Arc::new(Field::new("A", DataType::Int32, false))),
(1, Arc::new(Field::new("B", DataType::Utf8, false))),
]
.into_iter()
.collect::<UnionFields>();
let children1 = vec![Arc::new(int_array1) as ArrayRef, Arc::new(str_array1)];
let array1 =
UnionArray::try_new(union_fields1.clone(), type_ids1, Some(offsets1), children1)
.unwrap();
let int_array2 = Int32Array::from(vec![Some(3), None]);
let str_array2 = StringArray::from(vec![None, Some("c")]);
let type_ids2 = [0, 1].into_iter().collect::<ScalarBuffer<i8>>();
let children2 = vec![Arc::new(int_array2) as ArrayRef, Arc::new(str_array2)];
let array2 = UnionArray::try_new(union_fields1, type_ids2, None, children2).unwrap();
let opts = SortOptions::default();
let Result::Err(ArrowError::InvalidArgumentError(out)) =
make_comparator(&array1, &array2, opts)
else {
panic!("expected error when making comparator of union arrays with different modes");
};
assert_eq!(
&out,
"Cannot compare UnionArrays with different modes: left=Dense, right=Sparse"
);
}
#[test]
fn test_null_array_cmp() {
let a = NullArray::new(3);
let b = NullArray::new(3);
let cmp = make_comparator(&a, &b, SortOptions::default()).unwrap();
assert_eq!(cmp(0, 0), Ordering::Equal);
assert_eq!(cmp(0, 1), Ordering::Equal);
assert_eq!(cmp(2, 0), Ordering::Equal);
}
#[test]
fn test_run_end_encoded_int32() {
let run_ends1 = Int32Array::from(vec![2, 5, 6]);
let values1 = Int32Array::from(vec![1, 2, 3]);
let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
let run_ends2 = Int32Array::from(vec![1, 3, 6]);
let values2 = Int32Array::from(vec![1, 2, 3]);
let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
assert_eq!(cmp(0, 0), Ordering::Equal);
assert_eq!(cmp(0, 1), Ordering::Less);
assert_eq!(cmp(2, 1), Ordering::Equal);
assert_eq!(cmp(5, 5), Ordering::Equal);
assert_eq!(cmp(1, 2), Ordering::Less);
assert_eq!(cmp(4, 4), Ordering::Less);
}
#[test]
fn test_run_end_encoded_with_nulls() {
let run_ends1 = Int32Array::from(vec![2, 4, 5]);
let values1 = Int32Array::from(vec![Some(1), None, Some(2)]);
let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
let run_ends2 = Int32Array::from(vec![1, 3, 4, 5]);
let values2 = Int32Array::from(vec![None, Some(1), Some(2), None]);
let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
let opts = SortOptions::default();
let cmp = make_comparator(&array1, &array2, opts).unwrap();
assert_eq!(cmp(0, 1), Ordering::Equal);
assert_eq!(cmp(2, 0), Ordering::Equal);
assert_eq!(cmp(0, 0), Ordering::Greater);
assert_eq!(cmp(2, 1), Ordering::Less);
}
#[test]
fn test_run_end_encoded_int16() {
let run_ends1 = Int16Array::from(vec![3_i16, 5, 6]);
let values1 = StringArray::from(vec!["a", "b", "c"]);
let array1 = RunArray::<Int16Type>::try_new(&run_ends1, &values1).unwrap();
let run_ends2 = Int16Array::from(vec![2_i16, 4, 6]);
let values2 = StringArray::from(vec!["a", "b", "c"]);
let array2 = RunArray::<Int16Type>::try_new(&run_ends2, &values2).unwrap();
let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
assert_eq!(cmp(0, 0), Ordering::Equal); assert_eq!(cmp(2, 2), Ordering::Less); assert_eq!(cmp(3, 2), Ordering::Equal); assert_eq!(cmp(5, 4), Ordering::Equal); }
#[test]
fn test_run_end_encoded_int64() {
let run_ends1 = Int64Array::from(vec![2_i64, 4, 6]);
let values1 = Int64Array::from(vec![10_i64, 20, 30]);
let array1 = RunArray::<Int64Type>::try_new(&run_ends1, &values1).unwrap();
let run_ends2 = Int64Array::from(vec![3_i64, 5, 6]);
let values2 = Int64Array::from(vec![10_i64, 20, 30]);
let array2 = RunArray::<Int64Type>::try_new(&run_ends2, &values2).unwrap();
let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
assert_eq!(cmp(0, 0), Ordering::Equal); assert_eq!(cmp(1, 2), Ordering::Equal); assert_eq!(cmp(2, 3), Ordering::Equal); assert_eq!(cmp(4, 4), Ordering::Greater); }
#[test]
fn test_run_end_encoded_sliced() {
let run_ends = Int32Array::from(vec![2, 5, 7, 8]);
let values = Int32Array::from(vec![1, 2, 3, 4]);
let array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
let slice1 = array.slice(1, 4);
let slice2 = array.slice(3, 4);
let cmp = make_comparator(&slice1, &slice2, SortOptions::default()).unwrap();
assert_eq!(cmp(0, 0), Ordering::Less);
assert_eq!(cmp(1, 0), Ordering::Equal);
assert_eq!(cmp(3, 2), Ordering::Less);
assert_eq!(cmp(1, 3), Ordering::Less);
let run_ends2 = Int32Array::from(vec![2, 4]);
let values2 = Int32Array::from(vec![1, 2]);
let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
let cmp = make_comparator(&slice1, &array2, SortOptions::default()).unwrap();
assert_eq!(cmp(0, 0), Ordering::Equal);
assert_eq!(cmp(1, 1), Ordering::Greater);
assert_eq!(cmp(3, 3), Ordering::Equal);
}
#[test]
fn test_run_end_encoded_sliced_with_nulls() {
let run_ends = Int32Array::from(vec![2, 4, 6, 7, 8]);
let values = Int32Array::from(vec![Some(1), None, Some(2), None, Some(3)]);
let array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
let slice1 = array.slice(1, 5);
let slice2 = array.slice(3, 5);
let opts = SortOptions::default(); let cmp = make_comparator(&slice1, &slice2, opts).unwrap();
assert_eq!(cmp(0, 0), Ordering::Greater);
assert_eq!(cmp(1, 0), Ordering::Equal);
assert_eq!(cmp(1, 1), Ordering::Less);
assert_eq!(cmp(3, 1), Ordering::Equal);
assert_eq!(cmp(4, 4), Ordering::Less);
assert_eq!(cmp(3, 3), Ordering::Greater);
}
#[test]
fn test_run_end_encoded_different_types() {
let run_ends1 = Int32Array::from(vec![2, 4, 6]);
let values1 = Int32Array::from(vec![1, 2, 3]);
let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
let run_ends2 = Int64Array::from(vec![2_i64, 4, 6]);
let values2 = Int64Array::from(vec![1_i64, 2, 3]);
let array2 = RunArray::<Int64Type>::try_new(&run_ends2, &values2).unwrap();
let result = make_comparator(&array1, &array2, SortOptions::default());
assert!(result.is_err());
let err = match result {
Err(e) => e.to_string(),
Ok(_) => panic!("Expected error"),
};
assert!(err.contains("Cannot compare RunEndEncoded arrays"));
}
}