use std::cmp::{Ordering, Reverse};
use crate::array::*;
use crate::compute::take;
use crate::datatypes::*;
use crate::error::{ArrowError, Result};
use TimeUnit::*;
pub fn sort(values: &ArrayRef, options: Option<SortOptions>) -> Result<ArrayRef> {
let indices = sort_to_indices(values, options)?;
take(values, &indices, None)
}
pub fn sort_to_indices(
values: &ArrayRef,
options: Option<SortOptions>,
) -> Result<UInt32Array> {
let options = options.unwrap_or_default();
let range = values.offset()..values.len();
let (v, n): (Vec<usize>, Vec<usize>) = if values.data_type() == &DataType::Float32 {
let array = values
.as_any()
.downcast_ref::<Float32Array>()
.expect("Unable to downcast array");
#[allow(clippy::cmp_nan)]
range.partition(|index| array.is_valid(*index) && array.value(*index) != f32::NAN)
} else if values.data_type() == &DataType::Float64 {
let array = values
.as_any()
.downcast_ref::<Float64Array>()
.expect("Unable to downcast array");
#[allow(clippy::cmp_nan)]
range.partition(|index| array.is_valid(*index) && array.value(*index) != f64::NAN)
} else {
range.partition(|index| values.is_valid(*index))
};
let n = n.into_iter().map(|i| i as u32).collect();
match values.data_type() {
DataType::Boolean => sort_primitive::<BooleanType>(values, v, n, &options),
DataType::Int8 => sort_primitive::<Int8Type>(values, v, n, &options),
DataType::Int16 => sort_primitive::<Int16Type>(values, v, n, &options),
DataType::Int32 => sort_primitive::<Int32Type>(values, v, n, &options),
DataType::Int64 => sort_primitive::<Int64Type>(values, v, n, &options),
DataType::UInt8 => sort_primitive::<UInt8Type>(values, v, n, &options),
DataType::UInt16 => sort_primitive::<UInt16Type>(values, v, n, &options),
DataType::UInt32 => sort_primitive::<UInt32Type>(values, v, n, &options),
DataType::UInt64 => sort_primitive::<UInt64Type>(values, v, n, &options),
DataType::Float32 => sort_primitive::<Float32Type>(values, v, n, &options),
DataType::Float64 => sort_primitive::<Float64Type>(values, v, n, &options),
DataType::Date32(_) => sort_primitive::<Date32Type>(values, v, n, &options),
DataType::Date64(_) => sort_primitive::<Date64Type>(values, v, n, &options),
DataType::Time32(Second) => {
sort_primitive::<Time32SecondType>(values, v, n, &options)
}
DataType::Time32(Millisecond) => {
sort_primitive::<Time32MillisecondType>(values, v, n, &options)
}
DataType::Time64(Microsecond) => {
sort_primitive::<Time64MicrosecondType>(values, v, n, &options)
}
DataType::Time64(Nanosecond) => {
sort_primitive::<Time64NanosecondType>(values, v, n, &options)
}
DataType::Timestamp(Second, _) => {
sort_primitive::<TimestampSecondType>(values, v, n, &options)
}
DataType::Timestamp(Millisecond, _) => {
sort_primitive::<TimestampMillisecondType>(values, v, n, &options)
}
DataType::Timestamp(Microsecond, _) => {
sort_primitive::<TimestampMicrosecondType>(values, v, n, &options)
}
DataType::Timestamp(Nanosecond, _) => {
sort_primitive::<TimestampNanosecondType>(values, v, n, &options)
}
DataType::Interval(IntervalUnit::YearMonth) => {
sort_primitive::<IntervalYearMonthType>(values, v, n, &options)
}
DataType::Interval(IntervalUnit::DayTime) => {
sort_primitive::<IntervalDayTimeType>(values, v, n, &options)
}
DataType::Duration(TimeUnit::Second) => {
sort_primitive::<DurationSecondType>(values, v, n, &options)
}
DataType::Duration(TimeUnit::Millisecond) => {
sort_primitive::<DurationMillisecondType>(values, v, n, &options)
}
DataType::Duration(TimeUnit::Microsecond) => {
sort_primitive::<DurationMicrosecondType>(values, v, n, &options)
}
DataType::Duration(TimeUnit::Nanosecond) => {
sort_primitive::<DurationNanosecondType>(values, v, n, &options)
}
DataType::Utf8 => sort_string(values, v, n, &options),
t => Err(ArrowError::ComputeError(format!(
"Sort not supported for data type {:?}",
t
))),
}
}
#[derive(Clone, Copy, Debug)]
pub struct SortOptions {
pub descending: bool,
pub nulls_first: bool,
}
impl Default for SortOptions {
fn default() -> Self {
Self {
descending: false,
nulls_first: true,
}
}
}
fn sort_primitive<T>(
values: &ArrayRef,
value_indices: Vec<usize>,
null_indices: Vec<u32>,
options: &SortOptions,
) -> Result<UInt32Array>
where
T: ArrowPrimitiveType,
T::Native: std::cmp::PartialOrd,
{
let values = as_primitive_array::<T>(values);
let mut valids = value_indices
.into_iter()
.map(|index| (index as u32, values.value(index)))
.collect::<Vec<(u32, T::Native)>>();
let mut nulls = null_indices;
if !options.descending {
valids.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or_else(|| Ordering::Greater));
} else {
valids.sort_by(|a, b| {
a.1.partial_cmp(&b.1)
.unwrap_or_else(|| Ordering::Greater)
.reverse()
});
nulls.reverse();
}
let mut valid_indices: Vec<u32> = valids.iter().map(|tuple| tuple.0).collect();
if options.nulls_first {
nulls.append(&mut valid_indices);
return Ok(UInt32Array::from(nulls));
}
valid_indices.append(&mut nulls);
Ok(UInt32Array::from(valid_indices))
}
fn sort_string(
values: &ArrayRef,
value_indices: Vec<usize>,
null_indices: Vec<u32>,
options: &SortOptions,
) -> Result<UInt32Array> {
let values = as_string_array(values);
let mut valids = value_indices
.into_iter()
.map(|index| (index as u32, values.value(index)))
.collect::<Vec<(u32, &str)>>();
let mut nulls = null_indices;
if !options.descending {
valids.sort_by_key(|a| a.1);
} else {
valids.sort_by_key(|a| Reverse(a.1));
nulls.reverse();
}
let mut valid_indices: Vec<u32> = valids.iter().map(|tuple| tuple.0).collect();
if options.nulls_first {
nulls.append(&mut valid_indices);
return Ok(UInt32Array::from(nulls));
}
valid_indices.append(&mut nulls);
Ok(UInt32Array::from(valid_indices))
}
#[derive(Clone, Debug)]
pub struct SortColumn {
pub values: ArrayRef,
pub options: Option<SortOptions>,
}
pub fn lexsort(columns: &[SortColumn]) -> Result<Vec<ArrayRef>> {
let indices = lexsort_to_indices(columns)?;
columns
.iter()
.map(|c| take(&c.values, &indices, None))
.collect()
}
pub fn lexsort_to_indices(columns: &[SortColumn]) -> Result<UInt32Array> {
if columns.len() == 1 {
let column = &columns[0];
return sort_to_indices(&column.values, column.options);
}
let mut row_count = None;
let flat_columns = columns
.iter()
.map(|column| -> Result<(&OrdArray, SortOptions)> {
let curr_row_count = column.values.len() - column.values.offset();
match row_count {
None => {
row_count = Some(curr_row_count);
}
Some(cnt) => {
if curr_row_count != cnt {
return Err(ArrowError::ComputeError(
"lexical sort columns have different row counts".to_string(),
));
}
}
}
Ok((
as_ordarray(&column.values)?,
column.options.unwrap_or_default(),
))
})
.collect::<Result<Vec<(&OrdArray, SortOptions)>>>()?;
let lex_comparator = |a_idx: &usize, b_idx: &usize| -> Ordering {
for column in flat_columns.iter() {
let values = &column.0;
let sort_option = column.1;
match (values.is_valid(*a_idx), values.is_valid(*b_idx)) {
(true, true) => {
match values.cmp_value(*a_idx, *b_idx) {
Ordering::Equal => continue,
order => {
if sort_option.descending {
return order.reverse();
} else {
return order;
}
}
}
}
(false, true) => {
return if sort_option.nulls_first {
Ordering::Less
} else {
Ordering::Greater
};
}
(true, false) => {
return if sort_option.nulls_first {
Ordering::Greater
} else {
Ordering::Less
};
}
(false, false) => continue,
}
}
Ordering::Equal
};
let mut value_indices = (0..row_count.unwrap()).collect::<Vec<usize>>();
value_indices.sort_by(lex_comparator);
Ok(UInt32Array::from(
value_indices
.into_iter()
.map(|i| i as u32)
.collect::<Vec<u32>>(),
))
}
#[cfg(test)]
mod tests {
use super::*;
use std::{convert::TryFrom, sync::Arc};
fn test_sort_to_indices_primitive_arrays<T>(
data: Vec<Option<T::Native>>,
options: Option<SortOptions>,
expected_data: Vec<u32>,
) where
T: ArrowPrimitiveType,
PrimitiveArray<T>: From<Vec<Option<T::Native>>> + ArrayEqual,
{
let output = PrimitiveArray::<T>::from(data);
let expected = UInt32Array::from(expected_data);
let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options).unwrap();
assert!(output.equals(&expected))
}
fn test_sort_primitive_arrays<T>(
data: Vec<Option<T::Native>>,
options: Option<SortOptions>,
expected_data: Vec<Option<T::Native>>,
) where
T: ArrowPrimitiveType,
PrimitiveArray<T>: From<Vec<Option<T::Native>>> + ArrayEqual,
{
let output = PrimitiveArray::<T>::from(data);
let expected = PrimitiveArray::<T>::from(expected_data);
let output = sort(&(Arc::new(output) as ArrayRef), options).unwrap();
let output = output.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();
assert!(output.equals(&expected))
}
fn test_sort_to_indices_string_arrays(
data: Vec<Option<&str>>,
options: Option<SortOptions>,
expected_data: Vec<u32>,
) {
let output = StringArray::try_from(data).expect("Unable to create string array");
let expected = UInt32Array::from(expected_data);
let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options).unwrap();
assert!(output.equals(&expected))
}
fn test_sort_string_arrays(
data: Vec<Option<&str>>,
options: Option<SortOptions>,
expected_data: Vec<Option<&str>>,
) {
let output = StringArray::try_from(data).expect("Unable to create string array");
let expected =
StringArray::try_from(expected_data).expect("Unable to create string array");
let output = sort(&(Arc::new(output) as ArrayRef), options).unwrap();
let output = output.as_any().downcast_ref::<StringArray>().unwrap();
assert!(output.equals(&expected))
}
fn test_lex_sort_arrays(input: Vec<SortColumn>, expected_output: Vec<ArrayRef>) {
let sorted = lexsort(&input).unwrap();
let sorted2cmp = sorted.iter().map(|arr| -> Box<&dyn ArrayEqual> {
match arr.data_type() {
DataType::Int64 => Box::new(as_primitive_array::<Int64Type>(&arr)),
DataType::UInt32 => Box::new(as_primitive_array::<UInt32Type>(&arr)),
DataType::Utf8 => Box::new(as_string_array(&arr)),
_ => panic!("unexpected array type"),
}
});
for (i, values) in sorted2cmp.enumerate() {
assert!(
values.equals(&(*expected_output[i])),
"expect {:#?} to be: {:#?}",
sorted,
expected_output
);
}
}
#[test]
fn test_sort_to_indices_primitives() {
test_sort_to_indices_primitive_arrays::<Int8Type>(
vec![None, Some(0), Some(2), Some(-1), Some(0), None],
None,
vec![0, 5, 3, 1, 4, 2],
);
test_sort_to_indices_primitive_arrays::<Int16Type>(
vec![None, Some(0), Some(2), Some(-1), Some(0), None],
None,
vec![0, 5, 3, 1, 4, 2],
);
test_sort_to_indices_primitive_arrays::<Int32Type>(
vec![None, Some(0), Some(2), Some(-1), Some(0), None],
None,
vec![0, 5, 3, 1, 4, 2],
);
test_sort_to_indices_primitive_arrays::<Int64Type>(
vec![None, Some(0), Some(2), Some(-1), Some(0), None],
None,
vec![0, 5, 3, 1, 4, 2],
);
test_sort_to_indices_primitive_arrays::<Float32Type>(
vec![
None,
Some(-0.05),
Some(2.225),
Some(-1.01),
Some(-0.05),
None,
],
None,
vec![0, 5, 3, 1, 4, 2],
);
test_sort_to_indices_primitive_arrays::<Float64Type>(
vec![
None,
Some(-0.05),
Some(2.225),
Some(-1.01),
Some(-0.05),
None,
],
None,
vec![0, 5, 3, 1, 4, 2],
);
test_sort_to_indices_primitive_arrays::<Int8Type>(
vec![None, Some(0), Some(2), Some(-1), Some(0), None],
Some(SortOptions {
descending: true,
nulls_first: false,
}),
vec![2, 1, 4, 3, 5, 0],
);
test_sort_to_indices_primitive_arrays::<Int16Type>(
vec![None, Some(0), Some(2), Some(-1), Some(0), None],
Some(SortOptions {
descending: true,
nulls_first: false,
}),
vec![2, 1, 4, 3, 5, 0],
);
test_sort_to_indices_primitive_arrays::<Int32Type>(
vec![None, Some(0), Some(2), Some(-1), Some(0), None],
Some(SortOptions {
descending: true,
nulls_first: false,
}),
vec![2, 1, 4, 3, 5, 0],
);
test_sort_to_indices_primitive_arrays::<Int64Type>(
vec![None, Some(0), Some(2), Some(-1), Some(0), None],
Some(SortOptions {
descending: true,
nulls_first: false,
}),
vec![2, 1, 4, 3, 5, 0],
);
test_sort_to_indices_primitive_arrays::<Float32Type>(
vec![
None,
Some(0.005),
Some(20.22),
Some(-10.3),
Some(0.005),
None,
],
Some(SortOptions {
descending: true,
nulls_first: false,
}),
vec![2, 1, 4, 3, 5, 0],
);
test_sort_to_indices_primitive_arrays::<Float64Type>(
vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(0.0), None],
Some(SortOptions {
descending: true,
nulls_first: false,
}),
vec![2, 1, 4, 3, 5, 0],
);
test_sort_to_indices_primitive_arrays::<Int8Type>(
vec![None, Some(0), Some(2), Some(-1), Some(0), None],
Some(SortOptions {
descending: true,
nulls_first: true,
}),
vec![5, 0, 2, 1, 4, 3],
);
test_sort_to_indices_primitive_arrays::<Int16Type>(
vec![None, Some(0), Some(2), Some(-1), Some(0), None],
Some(SortOptions {
descending: true,
nulls_first: true,
}),
vec![5, 0, 2, 1, 4, 3],
);
test_sort_to_indices_primitive_arrays::<Int32Type>(
vec![None, Some(0), Some(2), Some(-1), Some(0), None],
Some(SortOptions {
descending: true,
nulls_first: true,
}),
vec![5, 0, 2, 1, 4, 3],
);
test_sort_to_indices_primitive_arrays::<Int64Type>(
vec![None, Some(0), Some(2), Some(-1), Some(0), None],
Some(SortOptions {
descending: true,
nulls_first: true,
}),
vec![5, 0, 2, 1, 4, 3],
);
test_sort_to_indices_primitive_arrays::<Float32Type>(
vec![None, Some(0.1), Some(0.2), Some(-1.3), Some(0.01), None],
Some(SortOptions {
descending: true,
nulls_first: true,
}),
vec![5, 0, 2, 1, 4, 3],
);
test_sort_to_indices_primitive_arrays::<Float64Type>(
vec![None, Some(10.1), Some(100.2), Some(-1.3), Some(10.01), None],
Some(SortOptions {
descending: true,
nulls_first: true,
}),
vec![5, 0, 2, 1, 4, 3],
);
test_sort_to_indices_primitive_arrays::<BooleanType>(
vec![None, Some(false), Some(true), Some(true), Some(false), None],
None,
vec![0, 5, 1, 4, 2, 3],
);
test_sort_to_indices_primitive_arrays::<BooleanType>(
vec![None, Some(false), Some(true), Some(true), Some(false), None],
Some(SortOptions {
descending: true,
nulls_first: false,
}),
vec![2, 3, 1, 4, 5, 0],
);
test_sort_to_indices_primitive_arrays::<BooleanType>(
vec![None, Some(false), Some(true), Some(true), Some(false), None],
Some(SortOptions {
descending: true,
nulls_first: true,
}),
vec![5, 0, 2, 3, 1, 4],
);
}
#[test]
fn test_sort_primitives() {
test_sort_primitive_arrays::<UInt8Type>(
vec![None, Some(3), Some(5), Some(2), Some(3), None],
None,
vec![None, None, Some(2), Some(3), Some(3), Some(5)],
);
test_sort_primitive_arrays::<UInt16Type>(
vec![None, Some(3), Some(5), Some(2), Some(3), None],
None,
vec![None, None, Some(2), Some(3), Some(3), Some(5)],
);
test_sort_primitive_arrays::<UInt32Type>(
vec![None, Some(3), Some(5), Some(2), Some(3), None],
None,
vec![None, None, Some(2), Some(3), Some(3), Some(5)],
);
test_sort_primitive_arrays::<UInt64Type>(
vec![None, Some(3), Some(5), Some(2), Some(3), None],
None,
vec![None, None, Some(2), Some(3), Some(3), Some(5)],
);
test_sort_primitive_arrays::<Int8Type>(
vec![None, Some(0), Some(2), Some(-1), Some(0), None],
Some(SortOptions {
descending: true,
nulls_first: false,
}),
vec![Some(2), Some(0), Some(0), Some(-1), None, None],
);
test_sort_primitive_arrays::<Int16Type>(
vec![None, Some(0), Some(2), Some(-1), Some(0), None],
Some(SortOptions {
descending: true,
nulls_first: false,
}),
vec![Some(2), Some(0), Some(0), Some(-1), None, None],
);
test_sort_primitive_arrays::<Int32Type>(
vec![None, Some(0), Some(2), Some(-1), Some(0), None],
Some(SortOptions {
descending: true,
nulls_first: false,
}),
vec![Some(2), Some(0), Some(0), Some(-1), None, None],
);
test_sort_primitive_arrays::<Int16Type>(
vec![None, Some(0), Some(2), Some(-1), Some(0), None],
Some(SortOptions {
descending: true,
nulls_first: false,
}),
vec![Some(2), Some(0), Some(0), Some(-1), None, None],
);
test_sort_primitive_arrays::<Int8Type>(
vec![None, Some(0), Some(2), Some(-1), Some(0), None],
Some(SortOptions {
descending: true,
nulls_first: true,
}),
vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
);
test_sort_primitive_arrays::<Int16Type>(
vec![None, Some(0), Some(2), Some(-1), Some(0), None],
Some(SortOptions {
descending: true,
nulls_first: true,
}),
vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
);
test_sort_primitive_arrays::<Int32Type>(
vec![None, Some(0), Some(2), Some(-1), Some(0), None],
Some(SortOptions {
descending: true,
nulls_first: true,
}),
vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
);
test_sort_primitive_arrays::<Int64Type>(
vec![None, Some(0), Some(2), Some(-1), Some(0), None],
Some(SortOptions {
descending: true,
nulls_first: true,
}),
vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
);
test_sort_primitive_arrays::<Float32Type>(
vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(0.0), None],
Some(SortOptions {
descending: true,
nulls_first: true,
}),
vec![None, None, Some(2.0), Some(0.0), Some(0.0), Some(-1.0)],
);
test_sort_primitive_arrays::<Float64Type>(
vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(f64::NAN), None],
Some(SortOptions {
descending: true,
nulls_first: true,
}),
vec![None, None, Some(f64::NAN), Some(2.0), Some(0.0), Some(-1.0)],
);
test_sort_primitive_arrays::<Int8Type>(
vec![None, Some(0), Some(2), Some(-1), Some(0), None],
Some(SortOptions {
descending: false,
nulls_first: true,
}),
vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
);
test_sort_primitive_arrays::<Int16Type>(
vec![None, Some(0), Some(2), Some(-1), Some(0), None],
Some(SortOptions {
descending: false,
nulls_first: true,
}),
vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
);
test_sort_primitive_arrays::<Int32Type>(
vec![None, Some(0), Some(2), Some(-1), Some(0), None],
Some(SortOptions {
descending: false,
nulls_first: true,
}),
vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
);
test_sort_primitive_arrays::<Int64Type>(
vec![None, Some(0), Some(2), Some(-1), Some(0), None],
Some(SortOptions {
descending: false,
nulls_first: true,
}),
vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
);
test_sort_primitive_arrays::<Float32Type>(
vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(0.0), None],
Some(SortOptions {
descending: false,
nulls_first: true,
}),
vec![None, None, Some(-1.0), Some(0.0), Some(0.0), Some(2.0)],
);
test_sort_primitive_arrays::<Float64Type>(
vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(f64::NAN), None],
Some(SortOptions {
descending: false,
nulls_first: true,
}),
vec![None, None, Some(-1.0), Some(0.0), Some(2.0), Some(f64::NAN)],
);
}
#[test]
fn test_sort_to_indices_strings() {
test_sort_to_indices_string_arrays(
vec![
None,
Some("bad"),
Some("sad"),
None,
Some("glad"),
Some("-ad"),
],
None,
vec![0, 3, 5, 1, 4, 2],
);
test_sort_to_indices_string_arrays(
vec![
None,
Some("bad"),
Some("sad"),
None,
Some("glad"),
Some("-ad"),
],
Some(SortOptions {
descending: true,
nulls_first: false,
}),
vec![2, 4, 1, 5, 3, 0],
);
test_sort_to_indices_string_arrays(
vec![
None,
Some("bad"),
Some("sad"),
None,
Some("glad"),
Some("-ad"),
],
Some(SortOptions {
descending: false,
nulls_first: true,
}),
vec![0, 3, 5, 1, 4, 2],
);
test_sort_to_indices_string_arrays(
vec![
None,
Some("bad"),
Some("sad"),
None,
Some("glad"),
Some("-ad"),
],
Some(SortOptions {
descending: true,
nulls_first: true,
}),
vec![3, 0, 2, 4, 1, 5],
);
}
#[test]
fn test_sort_strings() {
test_sort_string_arrays(
vec![
None,
Some("bad"),
Some("sad"),
None,
Some("glad"),
Some("-ad"),
],
None,
vec![
None,
None,
Some("-ad"),
Some("bad"),
Some("glad"),
Some("sad"),
],
);
test_sort_string_arrays(
vec![
None,
Some("bad"),
Some("sad"),
None,
Some("glad"),
Some("-ad"),
],
Some(SortOptions {
descending: true,
nulls_first: false,
}),
vec![
Some("sad"),
Some("glad"),
Some("bad"),
Some("-ad"),
None,
None,
],
);
test_sort_string_arrays(
vec![
None,
Some("bad"),
Some("sad"),
None,
Some("glad"),
Some("-ad"),
],
Some(SortOptions {
descending: false,
nulls_first: true,
}),
vec![
None,
None,
Some("-ad"),
Some("bad"),
Some("glad"),
Some("sad"),
],
);
test_sort_string_arrays(
vec![
None,
Some("bad"),
Some("sad"),
None,
Some("glad"),
Some("-ad"),
],
Some(SortOptions {
descending: true,
nulls_first: true,
}),
vec![
None,
None,
Some("sad"),
Some("glad"),
Some("bad"),
Some("-ad"),
],
);
}
#[test]
fn test_lex_sort_single_column() {
let input = vec![SortColumn {
values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
Some(17),
Some(2),
Some(-1),
Some(0),
])) as ArrayRef,
options: None,
}];
let expected = vec![Arc::new(PrimitiveArray::<Int64Type>::from(vec![
Some(-1),
Some(0),
Some(2),
Some(17),
])) as ArrayRef];
test_lex_sort_arrays(input, expected);
}
#[test]
fn test_lex_sort_unaligned_rows() {
let input = vec![
SortColumn {
values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![None, Some(-1)]))
as ArrayRef,
options: None,
},
SortColumn {
values: Arc::new(
StringArray::try_from(vec![Some("foo")])
.expect("Unable to create string array"),
) as ArrayRef,
options: None,
},
];
assert!(
lexsort(&input).is_err(),
"lexsort should reject columns with different row counts"
);
}
#[test]
fn test_lex_sort_mixed_types() {
let input = vec![
SortColumn {
values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
Some(0),
Some(2),
Some(-1),
Some(0),
])) as ArrayRef,
options: None,
},
SortColumn {
values: Arc::new(PrimitiveArray::<UInt32Type>::from(vec![
Some(101),
Some(8),
Some(7),
Some(102),
])) as ArrayRef,
options: None,
},
SortColumn {
values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
Some(-1),
Some(-2),
Some(-3),
Some(-4),
])) as ArrayRef,
options: None,
},
];
let expected = vec![
Arc::new(PrimitiveArray::<Int64Type>::from(vec![
Some(-1),
Some(0),
Some(0),
Some(2),
])) as ArrayRef,
Arc::new(PrimitiveArray::<UInt32Type>::from(vec![
Some(7),
Some(101),
Some(102),
Some(8),
])) as ArrayRef,
Arc::new(PrimitiveArray::<Int64Type>::from(vec![
Some(-3),
Some(-1),
Some(-4),
Some(-2),
])) as ArrayRef,
];
test_lex_sort_arrays(input, expected);
let input = vec![
SortColumn {
values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
Some(0),
Some(2),
Some(-1),
Some(0),
])) as ArrayRef,
options: Some(SortOptions {
descending: true,
nulls_first: true,
}),
},
SortColumn {
values: Arc::new(
StringArray::try_from(vec![
Some("foo"),
Some("9"),
Some("7"),
Some("bar"),
])
.expect("Unable to create string array"),
) as ArrayRef,
options: Some(SortOptions {
descending: true,
nulls_first: true,
}),
},
];
let expected = vec![
Arc::new(PrimitiveArray::<Int64Type>::from(vec![
Some(2),
Some(0),
Some(0),
Some(-1),
])) as ArrayRef,
Arc::new(
StringArray::try_from(vec![
Some("9"),
Some("foo"),
Some("bar"),
Some("7"),
])
.expect("Unable to create string array"),
) as ArrayRef,
];
test_lex_sort_arrays(input, expected);
let input = vec![
SortColumn {
values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
None,
Some(-1),
Some(2),
None,
])) as ArrayRef,
options: Some(SortOptions {
descending: true,
nulls_first: true,
}),
},
SortColumn {
values: Arc::new(
StringArray::try_from(vec![
Some("foo"),
Some("world"),
Some("hello"),
None,
])
.expect("Unable to create string array"),
) as ArrayRef,
options: Some(SortOptions {
descending: true,
nulls_first: true,
}),
},
];
let expected = vec![
Arc::new(PrimitiveArray::<Int64Type>::from(vec![
None,
None,
Some(2),
Some(-1),
])) as ArrayRef,
Arc::new(
StringArray::try_from(vec![
None,
Some("foo"),
Some("hello"),
Some("world"),
])
.expect("Unable to create string array"),
) as ArrayRef,
];
test_lex_sort_arrays(input, expected);
let input = vec![
SortColumn {
values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
None,
Some(-1),
Some(2),
None,
])) as ArrayRef,
options: Some(SortOptions {
descending: true,
nulls_first: false,
}),
},
SortColumn {
values: Arc::new(
StringArray::try_from(vec![
Some("foo"),
Some("world"),
Some("hello"),
None,
])
.expect("Unable to create string array"),
) as ArrayRef,
options: Some(SortOptions {
descending: true,
nulls_first: false,
}),
},
];
let expected = vec![
Arc::new(PrimitiveArray::<Int64Type>::from(vec![
Some(2),
Some(-1),
None,
None,
])) as ArrayRef,
Arc::new(
StringArray::try_from(vec![
Some("hello"),
Some("world"),
Some("foo"),
None,
])
.expect("Unable to create string array"),
) as ArrayRef,
];
test_lex_sort_arrays(input, expected);
let input = vec![
SortColumn {
values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
None,
Some(-1),
Some(2),
Some(-1),
None,
])) as ArrayRef,
options: Some(SortOptions {
descending: false,
nulls_first: false,
}),
},
SortColumn {
values: Arc::new(
StringArray::try_from(vec![
Some("foo"),
Some("bar"),
Some("world"),
Some("hello"),
None,
])
.expect("Unable to create string array"),
) as ArrayRef,
options: Some(SortOptions {
descending: true,
nulls_first: true,
}),
},
];
let expected = vec![
Arc::new(PrimitiveArray::<Int64Type>::from(vec![
Some(-1),
Some(-1),
Some(2),
None,
None,
])) as ArrayRef,
Arc::new(
StringArray::try_from(vec![
Some("hello"),
Some("bar"),
Some("world"),
None,
Some("foo"),
])
.expect("Unable to create string array"),
) as ArrayRef,
];
test_lex_sort_arrays(input, expected);
}
}