mod lex_sort;
mod row;
use arrow2::array::*;
use arrow2::compute::sort::*;
use arrow2::datatypes::*;
use arrow2::types::NativeType;
fn to_indices_boolean_arrays(data: &[Option<bool>], options: SortOptions, expected_data: &[i32]) {
let output = BooleanArray::from(data);
let expected = Int32Array::from_slice(expected_data);
let output = sort_to_indices(&output, &options, None).unwrap();
assert_eq!(output, expected)
}
fn primitive_arrays<T>(
data: &[Option<T>],
data_type: DataType,
options: SortOptions,
expected_data: &[Option<T>],
) where
T: NativeType,
{
let input = PrimitiveArray::<T>::from(data).to(data_type.clone());
let expected = PrimitiveArray::<T>::from(expected_data).to(data_type);
let output = sort(&input, &options, None).unwrap();
assert_eq!(expected, output.as_ref())
}
fn to_indices_string_arrays(data: &[Option<&str>], options: SortOptions, expected_data: &[i32]) {
let input = Utf8Array::<i32>::from(data);
let expected = Int32Array::from_slice(expected_data);
let output = sort_to_indices(&input, &options, None).unwrap();
assert_eq!(output, expected)
}
fn string_arrays(data: &[Option<&str>], options: SortOptions, expected_data: &[Option<&str>]) {
let input = Utf8Array::<i32>::from(data);
let expected = Utf8Array::<i32>::from(expected_data);
let output = sort(&input, &options, None).unwrap();
assert_eq!(expected, output.as_ref())
}
fn string_dict_arrays(data: &[Option<&str>], options: SortOptions, expected_data: &[Option<&str>]) {
let mut input = MutableDictionaryArray::<i32, MutableUtf8Array<i32>>::new();
input.try_extend(data.iter().copied()).unwrap();
let input = input.into_arc();
let mut expected = MutableDictionaryArray::<i32, MutableUtf8Array<i32>>::new();
expected.try_extend(expected_data.iter().copied()).unwrap();
let expected = expected.into_arc();
let output = sort(input.as_ref(), &options, None).unwrap();
assert_eq!(expected.as_ref(), output.as_ref())
}
#[test]
fn boolean() {
to_indices_boolean_arrays(
&[None, Some(false), Some(true), Some(true), Some(false), None],
SortOptions {
descending: false,
nulls_first: true,
},
&[0, 5, 1, 4, 2, 3],
);
to_indices_boolean_arrays(
&[None, Some(false), Some(true), Some(true), Some(false), None],
SortOptions {
descending: true,
nulls_first: false,
},
&[2, 3, 1, 4, 5, 0],
);
to_indices_boolean_arrays(
&[None, Some(false), Some(true), Some(true), Some(false), None],
SortOptions {
descending: true,
nulls_first: true,
},
&[5, 0, 2, 3, 1, 4],
);
}
#[test]
#[ignore] fn test_nans() {
primitive_arrays::<f64>(
&[None, Some(0.0), Some(2.0), Some(-1.0), Some(f64::NAN), None],
DataType::Float64,
SortOptions {
descending: true,
nulls_first: true,
},
&[None, None, Some(f64::NAN), Some(2.0), Some(0.0), Some(-1.0)],
);
primitive_arrays::<f64>(
&[Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
DataType::Float64,
SortOptions {
descending: true,
nulls_first: true,
},
&[Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
);
primitive_arrays::<f64>(
&[None, Some(0.0), Some(2.0), Some(-1.0), Some(f64::NAN), None],
DataType::Float64,
SortOptions {
descending: false,
nulls_first: true,
},
&[None, None, Some(-1.0), Some(0.0), Some(2.0), Some(f64::NAN)],
);
primitive_arrays::<f64>(
&[Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
DataType::Float64,
SortOptions {
descending: false,
nulls_first: true,
},
&[Some(1.0), Some(f64::NAN), Some(f64::NAN), Some(f64::NAN)],
);
}
#[test]
fn to_indices_strings() {
to_indices_string_arrays(
&[
None,
Some("bad"),
Some("sad"),
None,
Some("glad"),
Some("-ad"),
],
SortOptions {
descending: false,
nulls_first: true,
},
&[0, 3, 5, 1, 4, 2],
);
to_indices_string_arrays(
&[
None,
Some("bad"),
Some("sad"),
None,
Some("glad"),
Some("-ad"),
],
SortOptions {
descending: true,
nulls_first: false,
},
&[2, 4, 1, 5, 0, 3],
);
to_indices_string_arrays(
&[
None,
Some("bad"),
Some("sad"),
None,
Some("glad"),
Some("-ad"),
],
SortOptions {
descending: false,
nulls_first: true,
},
&[0, 3, 5, 1, 4, 2],
);
to_indices_string_arrays(
&[
None,
Some("bad"),
Some("sad"),
None,
Some("glad"),
Some("-ad"),
],
SortOptions {
descending: true,
nulls_first: true,
},
&[0, 3, 2, 4, 1, 5],
);
}
#[test]
fn strings() {
string_arrays(
&[
None,
Some("bad"),
Some("sad"),
None,
Some("glad"),
Some("-ad"),
],
SortOptions {
descending: false,
nulls_first: true,
},
&[
None,
None,
Some("-ad"),
Some("bad"),
Some("glad"),
Some("sad"),
],
);
string_arrays(
&[
None,
Some("bad"),
Some("sad"),
None,
Some("glad"),
Some("-ad"),
],
SortOptions {
descending: true,
nulls_first: false,
},
&[
Some("sad"),
Some("glad"),
Some("bad"),
Some("-ad"),
None,
None,
],
);
string_arrays(
&[
None,
Some("bad"),
Some("sad"),
None,
Some("glad"),
Some("-ad"),
],
SortOptions {
descending: false,
nulls_first: true,
},
&[
None,
None,
Some("-ad"),
Some("bad"),
Some("glad"),
Some("sad"),
],
);
string_arrays(
&[
None,
Some("bad"),
Some("sad"),
None,
Some("glad"),
Some("-ad"),
],
SortOptions {
descending: true,
nulls_first: true,
},
&[
None,
None,
Some("sad"),
Some("glad"),
Some("bad"),
Some("-ad"),
],
);
}
#[test]
fn string_dicts() {
string_dict_arrays(
&[
None,
Some("bad"),
Some("sad"),
None,
Some("glad"),
Some("-ad"),
],
SortOptions {
descending: false,
nulls_first: true,
},
&[
None,
None,
Some("-ad"),
Some("bad"),
Some("glad"),
Some("sad"),
],
);
string_dict_arrays(
&[
None,
Some("bad"),
Some("sad"),
None,
Some("glad"),
Some("-ad"),
],
SortOptions {
descending: true,
nulls_first: false,
},
&[
Some("sad"),
Some("glad"),
Some("bad"),
Some("-ad"),
None,
None,
],
);
string_dict_arrays(
&[
None,
Some("bad"),
Some("sad"),
None,
Some("glad"),
Some("-ad"),
],
SortOptions {
descending: false,
nulls_first: true,
},
&[
None,
None,
Some("-ad"),
Some("bad"),
Some("glad"),
Some("sad"),
],
);
string_dict_arrays(
&[
None,
Some("bad"),
Some("sad"),
None,
Some("glad"),
Some("-ad"),
],
SortOptions {
descending: true,
nulls_first: true,
},
&[
None,
None,
Some("sad"),
Some("glad"),
Some("bad"),
Some("-ad"),
],
);
}
#[test]
fn consistency() {
use arrow2::array::new_null_array;
use arrow2::datatypes::DataType::*;
use arrow2::datatypes::TimeUnit;
let datatypes = vec![
Null,
Boolean,
UInt8,
UInt16,
UInt32,
UInt64,
Int8,
Int16,
Int32,
Int64,
Float32,
Float64,
Timestamp(TimeUnit::Second, None),
Timestamp(TimeUnit::Millisecond, None),
Timestamp(TimeUnit::Microsecond, None),
Timestamp(TimeUnit::Nanosecond, None),
Time64(TimeUnit::Microsecond),
Time64(TimeUnit::Nanosecond),
Date32,
Time32(TimeUnit::Second),
Time32(TimeUnit::Millisecond),
Date64,
Utf8,
LargeUtf8,
Binary,
LargeBinary,
Duration(TimeUnit::Second),
Duration(TimeUnit::Millisecond),
Duration(TimeUnit::Microsecond),
Duration(TimeUnit::Nanosecond),
];
datatypes.into_iter().for_each(|d1| {
let array = new_null_array(d1.clone(), 10);
let options = SortOptions {
descending: true,
nulls_first: true,
};
if can_sort(&d1) {
assert!(sort(array.as_ref(), &options, None).is_ok());
} else {
assert!(sort(array.as_ref(), &options, None).is_err());
}
});
}