use regex::Regex;
use std::collections::HashMap;
use std::sync::Arc;
use crate::array::*;
use crate::buffer::{Buffer, MutableBuffer};
use crate::compute::util::combine_option_bitmap;
use crate::datatypes::{ArrowNumericType, DataType};
use crate::error::{ArrowError, Result};
use crate::util::bit_util;
macro_rules! compare_op {
($left: expr, $right:expr, $op:expr) => {{
if $left.len() != $right.len() {
return Err(ArrowError::ComputeError(
"Cannot perform comparison operation on arrays of different length"
.to_string(),
));
}
let null_bit_buffer =
combine_option_bitmap($left.data_ref(), $right.data_ref(), $left.len())?;
let byte_capacity = bit_util::ceil($left.len(), 8);
let actual_capacity = bit_util::round_upto_multiple_of_64(byte_capacity);
let mut buffer = MutableBuffer::new(actual_capacity);
buffer.resize(byte_capacity);
let data = buffer.as_mut_ptr();
for i in 0..$left.len() {
if $op($left.value(i), $right.value(i)) {
unsafe {
bit_util::set_bit_raw(data, i);
}
}
}
let data = ArrayData::new(
DataType::Boolean,
$left.len(),
None,
null_bit_buffer,
0,
vec![buffer.into()],
vec![],
);
Ok(BooleanArray::from(Arc::new(data)))
}};
}
macro_rules! compare_op_scalar {
($left: expr, $right:expr, $op:expr) => {{
let null_bit_buffer = $left.data().null_buffer().cloned();
let byte_capacity = bit_util::ceil($left.len(), 8);
let actual_capacity = bit_util::round_upto_multiple_of_64(byte_capacity);
let mut buffer = MutableBuffer::new(actual_capacity);
buffer.resize(byte_capacity);
let data = buffer.as_mut_ptr();
for i in 0..$left.len() {
if $op($left.value(i), $right) {
unsafe {
bit_util::set_bit_raw(data, i);
}
}
}
let data = ArrayData::new(
DataType::Boolean,
$left.len(),
None,
null_bit_buffer,
0,
vec![buffer.into()],
vec![],
);
Ok(BooleanArray::from(Arc::new(data)))
}};
}
pub fn no_simd_compare_op<T, F>(
left: &PrimitiveArray<T>,
right: &PrimitiveArray<T>,
op: F,
) -> Result<BooleanArray>
where
T: ArrowNumericType,
F: Fn(T::Native, T::Native) -> bool,
{
compare_op!(left, right, op)
}
pub fn no_simd_compare_op_scalar<T, F>(
left: &PrimitiveArray<T>,
right: T::Native,
op: F,
) -> Result<BooleanArray>
where
T: ArrowNumericType,
F: Fn(T::Native, T::Native) -> bool,
{
compare_op_scalar!(left, right, op)
}
pub fn like_utf8(left: &StringArray, right: &StringArray) -> Result<BooleanArray> {
let mut map = HashMap::new();
if left.len() != right.len() {
return Err(ArrowError::ComputeError(
"Cannot perform comparison operation on arrays of different length"
.to_string(),
));
}
let null_bit_buffer =
combine_option_bitmap(left.data_ref(), right.data_ref(), left.len())?;
let mut result = BooleanBufferBuilder::new(left.len());
for i in 0..left.len() {
let haystack = left.value(i);
let pat = right.value(i);
let re = if let Some(ref regex) = map.get(pat) {
regex
} else {
let re_pattern = pat.replace("%", ".*").replace("_", ".");
let re = Regex::new(&format!("^{}$", re_pattern)).map_err(|e| {
ArrowError::ComputeError(format!(
"Unable to build regex from LIKE pattern: {}",
e
))
})?;
map.insert(pat, re);
map.get(pat).unwrap()
};
result.append(re.is_match(haystack));
}
let data = ArrayData::new(
DataType::Boolean,
left.len(),
None,
null_bit_buffer,
0,
vec![result.finish()],
vec![],
);
Ok(BooleanArray::from(Arc::new(data)))
}
fn is_like_pattern(c: char) -> bool {
c == '%' || c == '_'
}
pub fn like_utf8_scalar(left: &StringArray, right: &str) -> Result<BooleanArray> {
let null_bit_buffer = left.data().null_buffer().cloned();
let mut result = BooleanBufferBuilder::new(left.len());
if !right.contains(is_like_pattern) {
for i in 0..left.len() {
result.append(left.value(i) == right);
}
} else if right.ends_with('%') && !right[..right.len() - 1].contains(is_like_pattern)
{
for i in 0..left.len() {
result.append(left.value(i).starts_with(&right[..right.len() - 1]));
}
} else if right.starts_with('%') && !right[1..].contains(is_like_pattern) {
for i in 0..left.len() {
result.append(left.value(i).ends_with(&right[1..]));
}
} else {
let re_pattern = right.replace("%", ".*").replace("_", ".");
let re = Regex::new(&format!("^{}$", re_pattern)).map_err(|e| {
ArrowError::ComputeError(format!(
"Unable to build regex from LIKE pattern: {}",
e
))
})?;
for i in 0..left.len() {
let haystack = left.value(i);
result.append(re.is_match(haystack));
}
};
let data = ArrayData::new(
DataType::Boolean,
left.len(),
None,
null_bit_buffer,
0,
vec![result.finish()],
vec![],
);
Ok(BooleanArray::from(Arc::new(data)))
}
pub fn nlike_utf8(left: &StringArray, right: &StringArray) -> Result<BooleanArray> {
let mut map = HashMap::new();
if left.len() != right.len() {
return Err(ArrowError::ComputeError(
"Cannot perform comparison operation on arrays of different length"
.to_string(),
));
}
let null_bit_buffer =
combine_option_bitmap(left.data_ref(), right.data_ref(), left.len())?;
let mut result = BooleanBufferBuilder::new(left.len());
for i in 0..left.len() {
let haystack = left.value(i);
let pat = right.value(i);
let re = if let Some(ref regex) = map.get(pat) {
regex
} else {
let re_pattern = pat.replace("%", ".*").replace("_", ".");
let re = Regex::new(&format!("^{}$", re_pattern)).map_err(|e| {
ArrowError::ComputeError(format!(
"Unable to build regex from LIKE pattern: {}",
e
))
})?;
map.insert(pat, re);
map.get(pat).unwrap()
};
result.append(!re.is_match(haystack));
}
let data = ArrayData::new(
DataType::Boolean,
left.len(),
None,
null_bit_buffer,
0,
vec![result.finish()],
vec![],
);
Ok(BooleanArray::from(Arc::new(data)))
}
pub fn nlike_utf8_scalar(left: &StringArray, right: &str) -> Result<BooleanArray> {
let null_bit_buffer = left.data().null_buffer().cloned();
let mut result = BooleanBufferBuilder::new(left.len());
if !right.contains(is_like_pattern) {
for i in 0..left.len() {
result.append(left.value(i) != right);
}
} else if right.ends_with('%') && !right[..right.len() - 1].contains(is_like_pattern)
{
for i in 0..left.len() {
result.append(!left.value(i).starts_with(&right[..right.len() - 1]));
}
} else if right.starts_with('%') && !right[1..].contains(is_like_pattern) {
for i in 0..left.len() {
result.append(!left.value(i).ends_with(&right[1..]));
}
} else {
let re_pattern = right.replace("%", ".*").replace("_", ".");
let re = Regex::new(&format!("^{}$", re_pattern)).map_err(|e| {
ArrowError::ComputeError(format!(
"Unable to build regex from LIKE pattern: {}",
e
))
})?;
for i in 0..left.len() {
let haystack = left.value(i);
result.append(!re.is_match(haystack));
}
}
let data = ArrayData::new(
DataType::Boolean,
left.len(),
None,
null_bit_buffer,
0,
vec![result.finish()],
vec![],
);
Ok(BooleanArray::from(Arc::new(data)))
}
pub fn eq_utf8(left: &StringArray, right: &StringArray) -> Result<BooleanArray> {
compare_op!(left, right, |a, b| a == b)
}
pub fn eq_utf8_scalar(left: &StringArray, right: &str) -> Result<BooleanArray> {
compare_op_scalar!(left, right, |a, b| a == b)
}
pub fn neq_utf8(left: &StringArray, right: &StringArray) -> Result<BooleanArray> {
compare_op!(left, right, |a, b| a != b)
}
pub fn neq_utf8_scalar(left: &StringArray, right: &str) -> Result<BooleanArray> {
compare_op_scalar!(left, right, |a, b| a != b)
}
pub fn lt_utf8(left: &StringArray, right: &StringArray) -> Result<BooleanArray> {
compare_op!(left, right, |a, b| a < b)
}
pub fn lt_utf8_scalar(left: &StringArray, right: &str) -> Result<BooleanArray> {
compare_op_scalar!(left, right, |a, b| a < b)
}
pub fn lt_eq_utf8(left: &StringArray, right: &StringArray) -> Result<BooleanArray> {
compare_op!(left, right, |a, b| a <= b)
}
pub fn lt_eq_utf8_scalar(left: &StringArray, right: &str) -> Result<BooleanArray> {
compare_op_scalar!(left, right, |a, b| a <= b)
}
pub fn gt_utf8(left: &StringArray, right: &StringArray) -> Result<BooleanArray> {
compare_op!(left, right, |a, b| a > b)
}
pub fn gt_utf8_scalar(left: &StringArray, right: &str) -> Result<BooleanArray> {
compare_op_scalar!(left, right, |a, b| a > b)
}
pub fn gt_eq_utf8(left: &StringArray, right: &StringArray) -> Result<BooleanArray> {
compare_op!(left, right, |a, b| a >= b)
}
pub fn gt_eq_utf8_scalar(left: &StringArray, right: &str) -> Result<BooleanArray> {
compare_op_scalar!(left, right, |a, b| a >= b)
}
#[cfg(simd)]
fn simd_compare_op<T, SIMD_OP, SCALAR_OP>(
left: &PrimitiveArray<T>,
right: &PrimitiveArray<T>,
simd_op: SIMD_OP,
scalar_op: SCALAR_OP,
) -> Result<BooleanArray>
where
T: ArrowNumericType,
SIMD_OP: Fn(T::Simd, T::Simd) -> T::SimdMask,
SCALAR_OP: Fn(T::Native, T::Native) -> bool,
{
use std::borrow::BorrowMut;
let len = left.len();
if len != right.len() {
return Err(ArrowError::ComputeError(
"Cannot perform comparison operation on arrays of different length"
.to_string(),
));
}
let null_bit_buffer = combine_option_bitmap(left.data_ref(), right.data_ref(), len)?;
let lanes = T::lanes();
let buffer_size = bit_util::ceil(len, 8);
let mut result = MutableBuffer::new(buffer_size).with_bitset(buffer_size, false);
assert!(
lanes % 8 == 0,
"Number of vector lanes must be multiple of 8"
);
let mut left_chunks = left.values().chunks_exact(lanes);
let mut right_chunks = right.values().chunks_exact(lanes);
let result_remainder = left_chunks
.borrow_mut()
.zip(right_chunks.borrow_mut())
.fold(
result.typed_data_mut(),
|result_slice, (left_slice, right_slice)| {
let simd_left = T::load(left_slice);
let simd_right = T::load(right_slice);
let simd_result = simd_op(simd_left, simd_right);
let bitmask = T::mask_to_u64(&simd_result);
let bytes = bitmask.to_le_bytes();
&result_slice[0..lanes / 8].copy_from_slice(&bytes[0..lanes / 8]);
&mut result_slice[lanes / 8..]
},
);
let left_remainder = left_chunks.remainder();
let right_remainder = right_chunks.remainder();
assert_eq!(left_remainder.len(), right_remainder.len());
let remainder_bitmask = left_remainder
.iter()
.zip(right_remainder.iter())
.enumerate()
.fold(0_u64, |mut mask, (i, (scalar_left, scalar_right))| {
let bit = if scalar_op(*scalar_left, *scalar_right) {
1_u64
} else {
0_u64
};
mask |= bit << i;
mask
});
let remainder_mask_as_bytes =
&remainder_bitmask.to_le_bytes()[0..bit_util::ceil(left_remainder.len(), 8)];
result_remainder.copy_from_slice(remainder_mask_as_bytes);
let data = ArrayData::new(
DataType::Boolean,
len,
None,
null_bit_buffer,
0,
vec![result.into()],
vec![],
);
Ok(BooleanArray::from(Arc::new(data)))
}
#[cfg(simd)]
fn simd_compare_op_scalar<T, SIMD_OP, SCALAR_OP>(
left: &PrimitiveArray<T>,
right: T::Native,
simd_op: SIMD_OP,
scalar_op: SCALAR_OP,
) -> Result<BooleanArray>
where
T: ArrowNumericType,
SIMD_OP: Fn(T::Simd, T::Simd) -> T::SimdMask,
SCALAR_OP: Fn(T::Native, T::Native) -> bool,
{
use std::borrow::BorrowMut;
let len = left.len();
let lanes = T::lanes();
let buffer_size = bit_util::ceil(len, 8);
let mut result = MutableBuffer::new(buffer_size).with_bitset(buffer_size, false);
assert!(
lanes % 8 == 0,
"Number of vector lanes must be multiple of 8"
);
let mut left_chunks = left.values().chunks_exact(lanes);
let simd_right = T::init(right);
let result_remainder = left_chunks.borrow_mut().fold(
result.typed_data_mut(),
|result_slice, left_slice| {
let simd_left = T::load(left_slice);
let simd_result = simd_op(simd_left, simd_right);
let bitmask = T::mask_to_u64(&simd_result);
let bytes = bitmask.to_le_bytes();
&result_slice[0..lanes / 8].copy_from_slice(&bytes[0..lanes / 8]);
&mut result_slice[lanes / 8..]
},
);
let left_remainder = left_chunks.remainder();
let remainder_bitmask =
left_remainder
.iter()
.enumerate()
.fold(0_u64, |mut mask, (i, scalar_left)| {
let bit = if scalar_op(*scalar_left, right) {
1_u64
} else {
0_u64
};
mask |= bit << i;
mask
});
let remainder_mask_as_bytes =
&remainder_bitmask.to_le_bytes()[0..bit_util::ceil(left_remainder.len(), 8)];
result_remainder.copy_from_slice(remainder_mask_as_bytes);
let null_bit_buffer = left
.data_ref()
.null_buffer()
.map(|b| b.bit_slice(left.offset(), left.len()));
let null_count = left.null_count();
let data = ArrayData::new(
DataType::Boolean,
len,
Some(null_count),
null_bit_buffer,
0,
vec![result.into()],
vec![],
);
Ok(BooleanArray::from(Arc::new(data)))
}
pub fn eq<T>(left: &PrimitiveArray<T>, right: &PrimitiveArray<T>) -> Result<BooleanArray>
where
T: ArrowNumericType,
{
#[cfg(simd)]
return simd_compare_op(left, right, T::eq, |a, b| a == b);
#[cfg(not(simd))]
return compare_op!(left, right, |a, b| a == b);
}
pub fn eq_scalar<T>(left: &PrimitiveArray<T>, right: T::Native) -> Result<BooleanArray>
where
T: ArrowNumericType,
{
#[cfg(simd)]
return simd_compare_op_scalar(left, right, T::eq, |a, b| a == b);
#[cfg(not(simd))]
return compare_op_scalar!(left, right, |a, b| a == b);
}
pub fn neq<T>(left: &PrimitiveArray<T>, right: &PrimitiveArray<T>) -> Result<BooleanArray>
where
T: ArrowNumericType,
{
#[cfg(simd)]
return simd_compare_op(left, right, T::ne, |a, b| a != b);
#[cfg(not(simd))]
return compare_op!(left, right, |a, b| a != b);
}
pub fn neq_scalar<T>(left: &PrimitiveArray<T>, right: T::Native) -> Result<BooleanArray>
where
T: ArrowNumericType,
{
#[cfg(simd)]
return simd_compare_op_scalar(left, right, T::ne, |a, b| a != b);
#[cfg(not(simd))]
return compare_op_scalar!(left, right, |a, b| a != b);
}
pub fn lt<T>(left: &PrimitiveArray<T>, right: &PrimitiveArray<T>) -> Result<BooleanArray>
where
T: ArrowNumericType,
{
#[cfg(simd)]
return simd_compare_op(left, right, T::lt, |a, b| a < b);
#[cfg(not(simd))]
return compare_op!(left, right, |a, b| a < b);
}
pub fn lt_scalar<T>(left: &PrimitiveArray<T>, right: T::Native) -> Result<BooleanArray>
where
T: ArrowNumericType,
{
#[cfg(simd)]
return simd_compare_op_scalar(left, right, T::lt, |a, b| a < b);
#[cfg(not(simd))]
return compare_op_scalar!(left, right, |a, b| a < b);
}
pub fn lt_eq<T>(
left: &PrimitiveArray<T>,
right: &PrimitiveArray<T>,
) -> Result<BooleanArray>
where
T: ArrowNumericType,
{
#[cfg(simd)]
return simd_compare_op(left, right, T::le, |a, b| a <= b);
#[cfg(not(simd))]
return compare_op!(left, right, |a, b| a <= b);
}
pub fn lt_eq_scalar<T>(left: &PrimitiveArray<T>, right: T::Native) -> Result<BooleanArray>
where
T: ArrowNumericType,
{
#[cfg(simd)]
return simd_compare_op_scalar(left, right, T::le, |a, b| a <= b);
#[cfg(not(simd))]
return compare_op_scalar!(left, right, |a, b| a <= b);
}
pub fn gt<T>(left: &PrimitiveArray<T>, right: &PrimitiveArray<T>) -> Result<BooleanArray>
where
T: ArrowNumericType,
{
#[cfg(simd)]
return simd_compare_op(left, right, T::gt, |a, b| a > b);
#[cfg(not(simd))]
return compare_op!(left, right, |a, b| a > b);
}
pub fn gt_scalar<T>(left: &PrimitiveArray<T>, right: T::Native) -> Result<BooleanArray>
where
T: ArrowNumericType,
{
#[cfg(simd)]
return simd_compare_op_scalar(left, right, T::gt, |a, b| a > b);
#[cfg(not(simd))]
return compare_op_scalar!(left, right, |a, b| a > b);
}
pub fn gt_eq<T>(
left: &PrimitiveArray<T>,
right: &PrimitiveArray<T>,
) -> Result<BooleanArray>
where
T: ArrowNumericType,
{
#[cfg(simd)]
return simd_compare_op(left, right, T::ge, |a, b| a >= b);
#[cfg(not(simd))]
return compare_op!(left, right, |a, b| a >= b);
}
pub fn gt_eq_scalar<T>(left: &PrimitiveArray<T>, right: T::Native) -> Result<BooleanArray>
where
T: ArrowNumericType,
{
#[cfg(simd)]
return simd_compare_op_scalar(left, right, T::ge, |a, b| a >= b);
#[cfg(not(simd))]
return compare_op_scalar!(left, right, |a, b| a >= b);
}
pub fn contains<T, OffsetSize>(
left: &PrimitiveArray<T>,
right: &GenericListArray<OffsetSize>,
) -> Result<BooleanArray>
where
T: ArrowNumericType,
OffsetSize: OffsetSizeTrait,
{
let left_len = left.len();
if left_len != right.len() {
return Err(ArrowError::ComputeError(
"Cannot perform comparison operation on arrays of different length"
.to_string(),
));
}
let num_bytes = bit_util::ceil(left_len, 8);
let not_both_null_bit_buffer =
match combine_option_bitmap(left.data_ref(), right.data_ref(), left_len)? {
Some(buff) => buff,
None => new_all_set_buffer(num_bytes),
};
let not_both_null_bitmap = not_both_null_bit_buffer.as_slice();
let mut bool_buf = MutableBuffer::new(num_bytes).with_bitset(num_bytes, false);
let bool_slice = bool_buf.as_slice_mut();
for i in 0..left_len {
if bit_util::get_bit(not_both_null_bitmap, i) {
let list = right.value(i);
let list = list.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();
for j in 0..list.len() {
if list.is_valid(j) && (left.value(i) == list.value(j)) {
bit_util::set_bit(bool_slice, i);
continue;
}
}
}
}
let data = ArrayData::new(
DataType::Boolean,
left.len(),
None,
None,
0,
vec![bool_buf.into()],
vec![],
);
Ok(BooleanArray::from(Arc::new(data)))
}
pub fn contains_utf8<OffsetSize>(
left: &GenericStringArray<OffsetSize>,
right: &ListArray,
) -> Result<BooleanArray>
where
OffsetSize: StringOffsetSizeTrait,
{
let left_len = left.len();
if left_len != right.len() {
return Err(ArrowError::ComputeError(
"Cannot perform comparison operation on arrays of different length"
.to_string(),
));
}
let num_bytes = bit_util::ceil(left_len, 8);
let not_both_null_bit_buffer =
match combine_option_bitmap(left.data_ref(), right.data_ref(), left_len)? {
Some(buff) => buff,
None => new_all_set_buffer(num_bytes),
};
let not_both_null_bitmap = not_both_null_bit_buffer.as_slice();
let mut bool_buf = MutableBuffer::new(num_bytes).with_bitset(num_bytes, false);
let bool_slice = &mut bool_buf;
for i in 0..left_len {
if bit_util::get_bit(not_both_null_bitmap, i) {
let list = right.value(i);
let list = list
.as_any()
.downcast_ref::<GenericStringArray<OffsetSize>>()
.unwrap();
for j in 0..list.len() {
if list.is_valid(j) && (left.value(i) == list.value(j)) {
bit_util::set_bit(bool_slice, i);
continue;
}
}
}
}
let data = ArrayData::new(
DataType::Boolean,
left.len(),
None,
None,
0,
vec![bool_buf.into()],
vec![],
);
Ok(BooleanArray::from(Arc::new(data)))
}
#[inline]
fn new_all_set_buffer(len: usize) -> Buffer {
let buffer = MutableBuffer::new(len);
let buffer = buffer.with_bitset(len, true);
buffer.into()
}
#[rustfmt::skip::macros(vec)]
#[cfg(test)]
mod tests {
use super::*;
use crate::datatypes::{Int8Type, ToByteSlice};
use crate::{array::Int32Array, array::Int64Array, datatypes::Field};
macro_rules! cmp_i64 {
($KERNEL:ident, $A_VEC:expr, $B_VEC:expr, $EXPECTED:expr) => {
let a = Int64Array::from($A_VEC);
let b = Int64Array::from($B_VEC);
let c = $KERNEL(&a, &b).unwrap();
assert_eq!(BooleanArray::from($EXPECTED), c);
};
}
macro_rules! cmp_i64_scalar {
($KERNEL:ident, $A_VEC:expr, $B:literal, $EXPECTED:expr) => {
let a = Int64Array::from($A_VEC);
let c = $KERNEL(&a, $B).unwrap();
assert_eq!(BooleanArray::from($EXPECTED), c);
};
}
#[test]
fn test_primitive_array_eq() {
cmp_i64!(
eq,
vec![8, 8, 8, 8, 8, 8, 8, 8, 8, 8],
vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
vec![false, false, true, false, false, false, false, true, false, false]
);
}
#[test]
fn test_primitive_array_eq_scalar() {
cmp_i64_scalar!(
eq_scalar,
vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
8,
vec![false, false, true, false, false, false, false, true, false, false]
);
}
#[test]
fn test_primitive_array_eq_with_slice() {
let a = Int32Array::from(vec![6, 7, 8, 8, 10]);
let b = Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
let b_slice = b.slice(5, 5);
let c = b_slice.as_any().downcast_ref().unwrap();
let d = eq(&c, &a).unwrap();
assert_eq!(true, d.value(0));
assert_eq!(true, d.value(1));
assert_eq!(true, d.value(2));
assert_eq!(false, d.value(3));
assert_eq!(true, d.value(4));
}
#[test]
fn test_primitive_array_neq() {
cmp_i64!(
neq,
vec![8, 8, 8, 8, 8, 8, 8, 8, 8, 8],
vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
vec![true, true, false, true, true, true, true, false, true, true]
);
}
#[test]
fn test_primitive_array_neq_scalar() {
cmp_i64_scalar!(
neq_scalar,
vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
8,
vec![true, true, false, true, true, true, true, false, true, true]
);
}
#[test]
fn test_primitive_array_lt() {
cmp_i64!(
lt,
vec![8, 8, 8, 8, 8, 8, 8, 8, 8, 8],
vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
vec![false, false, false, true, true, false, false, false, true, true]
);
}
#[test]
fn test_primitive_array_lt_scalar() {
cmp_i64_scalar!(
lt_scalar,
vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
8,
vec![true, true, false, false, false, true, true, false, false, false]
);
}
#[test]
fn test_primitive_array_lt_nulls() {
cmp_i64!(
lt,
vec![None, None, Some(1), Some(1), None, None, Some(2), Some(2),],
vec![None, Some(1), None, Some(1), None, Some(3), None, Some(3),],
vec![None, None, None, Some(false), None, None, None, Some(true)]
);
}
#[test]
fn test_primitive_array_lt_scalar_nulls() {
cmp_i64_scalar!(
lt_scalar,
vec![None, Some(1), Some(2), Some(3), None, Some(1), Some(2), Some(3), Some(2), None],
2,
vec![None, Some(true), Some(false), Some(false), None, Some(true), Some(false), Some(false), Some(false), None]
);
}
#[test]
fn test_primitive_array_lt_eq() {
cmp_i64!(
lt_eq,
vec![8, 8, 8, 8, 8, 8, 8, 8, 8, 8],
vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
vec![false, false, true, true, true, false, false, true, true, true]
);
}
#[test]
fn test_primitive_array_lt_eq_scalar() {
cmp_i64_scalar!(
lt_eq_scalar,
vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
8,
vec![true, true, true, false, false, true, true, true, false, false]
);
}
#[test]
fn test_primitive_array_lt_eq_nulls() {
cmp_i64!(
lt_eq,
vec![None, None, Some(1), None, None, Some(1), None, None, Some(1)],
vec![None, Some(1), Some(0), None, Some(1), Some(2), None, None, Some(3)],
vec![None, None, Some(false), None, None, Some(true), None, None, Some(true)]
);
}
#[test]
fn test_primitive_array_lt_eq_scalar_nulls() {
cmp_i64_scalar!(
lt_eq_scalar,
vec![None, Some(1), Some(2), None, Some(1), Some(2), None, Some(1), Some(2)],
1,
vec![None, Some(true), Some(false), None, Some(true), Some(false), None, Some(true), Some(false)]
);
}
#[test]
fn test_primitive_array_gt() {
cmp_i64!(
gt,
vec![8, 8, 8, 8, 8, 8, 8, 8, 8, 8],
vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
vec![true, true, false, false, false, true, true, false, false, false]
);
}
#[test]
fn test_primitive_array_gt_scalar() {
cmp_i64_scalar!(
gt_scalar,
vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
8,
vec![false, false, false, true, true, false, false, false, true, true]
);
}
#[test]
fn test_primitive_array_gt_nulls() {
cmp_i64!(
gt,
vec![None, None, Some(1), None, None, Some(2), None, None, Some(3)],
vec![None, Some(1), Some(1), None, Some(1), Some(1), None, Some(1), Some(1)],
vec![None, None, Some(false), None, None, Some(true), None, None, Some(true)]
);
}
#[test]
fn test_primitive_array_gt_scalar_nulls() {
cmp_i64_scalar!(
gt_scalar,
vec![None, Some(1), Some(2), None, Some(1), Some(2), None, Some(1), Some(2)],
1,
vec![None, Some(false), Some(true), None, Some(false), Some(true), None, Some(false), Some(true)]
);
}
#[test]
fn test_primitive_array_gt_eq() {
cmp_i64!(
gt_eq,
vec![8, 8, 8, 8, 8, 8, 8, 8, 8, 8],
vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
vec![true, true, true, false, false, true, true, true, false, false]
);
}
#[test]
fn test_primitive_array_gt_eq_scalar() {
cmp_i64_scalar!(
gt_eq_scalar,
vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
8,
vec![false, false, true, true, true, false, false, true, true, true]
);
}
#[test]
fn test_primitive_array_gt_eq_nulls() {
cmp_i64!(
gt_eq,
vec![None, None, Some(1), None, Some(1), Some(2), None, None, Some(1)],
vec![None, Some(1), None, None, Some(1), Some(1), None, Some(2), Some(2)],
vec![None, None, None, None, Some(true), Some(true), None, None, Some(false)]
);
}
#[test]
fn test_primitive_array_gt_eq_scalar_nulls() {
cmp_i64_scalar!(
gt_eq_scalar,
vec![None, Some(1), Some(2), None, Some(2), Some(3), None, Some(3), Some(4)],
2,
vec![None, Some(false), Some(true), None, Some(true), Some(true), None, Some(true), Some(true)]
);
}
#[test]
fn test_primitive_array_compare_slice() {
let a: Int32Array = (0..100).map(Some).collect();
let a = a.slice(50, 50);
let a = a.as_any().downcast_ref::<Int32Array>().unwrap();
let b: Int32Array = (100..200).map(Some).collect();
let b = b.slice(50, 50);
let b = b.as_any().downcast_ref::<Int32Array>().unwrap();
let actual = lt(&a, &b).unwrap();
let expected: BooleanArray = (0..50).map(|_| Some(true)).collect();
assert_eq!(expected, actual);
}
#[test]
fn test_primitive_array_compare_scalar_slice() {
let a: Int32Array = (0..100).map(Some).collect();
let a = a.slice(50, 50);
let a = a.as_any().downcast_ref::<Int32Array>().unwrap();
let actual = lt_scalar(&a, 200).unwrap();
let expected: BooleanArray = (0..50).map(|_| Some(true)).collect();
assert_eq!(expected, actual);
}
#[test]
fn test_length_of_result_buffer() {
let item_count = 130;
let select_mask: BooleanArray = vec![true; item_count].into();
let array_a: PrimitiveArray<Int8Type> = vec![1; item_count].into();
let array_b: PrimitiveArray<Int8Type> = vec![2; item_count].into();
let result_mask = gt_eq(&array_a, &array_b).unwrap();
assert_eq!(
result_mask.data().buffers()[0].len(),
select_mask.data().buffers()[0].len()
);
}
#[test]
fn test_contains() {
let value_data = Int32Array::from(vec![
Some(0),
Some(1),
Some(2),
Some(3),
Some(4),
Some(5),
Some(6),
None,
Some(7),
])
.data();
let value_offsets = Buffer::from(&[0i64, 3, 6, 6, 9].to_byte_slice());
let list_data_type =
DataType::LargeList(Box::new(Field::new("item", DataType::Int32, true)));
let list_data = ArrayData::builder(list_data_type)
.len(4)
.add_buffer(value_offsets)
.add_child_data(value_data)
.null_bit_buffer(Buffer::from([0b00001011]))
.build();
let list_array = LargeListArray::from(list_data);
let nulls = Int32Array::from(vec![None, None, None, None]);
let nulls_result = contains(&nulls, &list_array).unwrap();
assert_eq!(
nulls_result
.as_any()
.downcast_ref::<BooleanArray>()
.unwrap(),
&BooleanArray::from(vec![false, false, false, false]),
);
let values = Int32Array::from(vec![Some(0), Some(0), Some(0), Some(0)]);
let values_result = contains(&values, &list_array).unwrap();
assert_eq!(
values_result
.as_any()
.downcast_ref::<BooleanArray>()
.unwrap(),
&BooleanArray::from(vec![true, false, false, false]),
);
}
#[test]
fn test_contains_utf8() {
let values_builder = StringBuilder::new(10);
let mut builder = ListBuilder::new(values_builder);
builder.values().append_value("Lorem").unwrap();
builder.values().append_value("ipsum").unwrap();
builder.values().append_null().unwrap();
builder.append(true).unwrap();
builder.values().append_value("sit").unwrap();
builder.values().append_value("amet").unwrap();
builder.values().append_value("Lorem").unwrap();
builder.append(true).unwrap();
builder.append(false).unwrap();
builder.values().append_value("ipsum").unwrap();
builder.append(true).unwrap();
let list_array = builder.finish();
let nulls = StringArray::from(vec![None, None, None, None]);
let nulls_result = contains_utf8(&nulls, &list_array).unwrap();
assert_eq!(
nulls_result
.as_any()
.downcast_ref::<BooleanArray>()
.unwrap(),
&BooleanArray::from(vec![false, false, false, false]),
);
let values = StringArray::from(vec![
Some("Lorem"),
Some("Lorem"),
Some("Lorem"),
Some("Lorem"),
]);
let values_result = contains_utf8(&values, &list_array).unwrap();
assert_eq!(
values_result
.as_any()
.downcast_ref::<BooleanArray>()
.unwrap(),
&BooleanArray::from(vec![true, true, false, false]),
);
}
macro_rules! test_utf8 {
($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => {
#[test]
fn $test_name() {
let left = StringArray::from($left);
let right = StringArray::from($right);
let res = $op(&left, &right).unwrap();
let expected = $expected;
assert_eq!(expected.len(), res.len());
for i in 0..res.len() {
let v = res.value(i);
assert_eq!(v, expected[i]);
}
}
};
}
macro_rules! test_utf8_scalar {
($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => {
#[test]
fn $test_name() {
let left = StringArray::from($left);
let res = $op(&left, $right).unwrap();
let expected = $expected;
assert_eq!(expected.len(), res.len());
for i in 0..res.len() {
let v = res.value(i);
assert_eq!(
v,
expected[i],
"unexpected result when comparing {} at position {} to {} ",
left.value(i),
i,
$right
);
}
}
};
}
test_utf8!(
test_utf8_array_like,
vec!["arrow", "arrow", "arrow", "arrow", "arrow", "arrows", "arrow"],
vec!["arrow", "ar%", "%ro%", "foo", "arr", "arrow_", "arrow_"],
like_utf8,
vec![true, true, true, false, false, true, false]
);
test_utf8_scalar!(
test_utf8_array_like_scalar,
vec!["arrow", "parquet", "datafusion", "flight"],
"%ar%",
like_utf8_scalar,
vec![true, true, false, false]
);
test_utf8_scalar!(
test_utf8_array_like_scalar_start,
vec!["arrow", "parrow", "arrows", "arr"],
"arrow%",
like_utf8_scalar,
vec![true, false, true, false]
);
test_utf8_scalar!(
test_utf8_array_like_scalar_end,
vec!["arrow", "parrow", "arrows", "arr"],
"%arrow",
like_utf8_scalar,
vec![true, true, false, false]
);
test_utf8_scalar!(
test_utf8_array_like_scalar_equals,
vec!["arrow", "parrow", "arrows", "arr"],
"arrow",
like_utf8_scalar,
vec![true, false, false, false]
);
test_utf8_scalar!(
test_utf8_array_like_scalar_one,
vec!["arrow", "arrows", "parrow", "arr"],
"arrow_",
like_utf8_scalar,
vec![false, true, false, false]
);
test_utf8!(
test_utf8_array_nlike,
vec!["arrow", "arrow", "arrow", "arrow", "arrow", "arrows", "arrow"],
vec!["arrow", "ar%", "%ro%", "foo", "arr", "arrow_", "arrow_"],
nlike_utf8,
vec![false, false, false, true, true, false, true]
);
test_utf8_scalar!(
test_utf8_array_nlike_scalar,
vec!["arrow", "parquet", "datafusion", "flight"],
"%ar%",
nlike_utf8_scalar,
vec![false, false, true, true]
);
test_utf8!(
test_utf8_array_eq,
vec!["arrow", "arrow", "arrow", "arrow"],
vec!["arrow", "parquet", "datafusion", "flight"],
eq_utf8,
vec![true, false, false, false]
);
test_utf8_scalar!(
test_utf8_array_eq_scalar,
vec!["arrow", "parquet", "datafusion", "flight"],
"arrow",
eq_utf8_scalar,
vec![true, false, false, false]
);
test_utf8_scalar!(
test_utf8_array_nlike_scalar_start,
vec!["arrow", "parrow", "arrows", "arr"],
"arrow%",
nlike_utf8_scalar,
vec![false, true, false, true]
);
test_utf8_scalar!(
test_utf8_array_nlike_scalar_end,
vec!["arrow", "parrow", "arrows", "arr"],
"%arrow",
nlike_utf8_scalar,
vec![false, false, true, true]
);
test_utf8_scalar!(
test_utf8_array_nlike_scalar_equals,
vec!["arrow", "parrow", "arrows", "arr"],
"arrow",
nlike_utf8_scalar,
vec![false, true, true, true]
);
test_utf8_scalar!(
test_utf8_array_nlike_scalar_one,
vec!["arrow", "arrows", "parrow", "arr"],
"arrow_",
nlike_utf8_scalar,
vec![true, false, true, true]
);
test_utf8!(
test_utf8_array_neq,
vec!["arrow", "arrow", "arrow", "arrow"],
vec!["arrow", "parquet", "datafusion", "flight"],
neq_utf8,
vec![false, true, true, true]
);
test_utf8_scalar!(
test_utf8_array_neq_scalar,
vec!["arrow", "parquet", "datafusion", "flight"],
"arrow",
neq_utf8_scalar,
vec![false, true, true, true]
);
test_utf8!(
test_utf8_array_lt,
vec!["arrow", "datafusion", "flight", "parquet"],
vec!["flight", "flight", "flight", "flight"],
lt_utf8,
vec![true, true, false, false]
);
test_utf8_scalar!(
test_utf8_array_lt_scalar,
vec!["arrow", "datafusion", "flight", "parquet"],
"flight",
lt_utf8_scalar,
vec![true, true, false, false]
);
test_utf8!(
test_utf8_array_lt_eq,
vec!["arrow", "datafusion", "flight", "parquet"],
vec!["flight", "flight", "flight", "flight"],
lt_eq_utf8,
vec![true, true, true, false]
);
test_utf8_scalar!(
test_utf8_array_lt_eq_scalar,
vec!["arrow", "datafusion", "flight", "parquet"],
"flight",
lt_eq_utf8_scalar,
vec![true, true, true, false]
);
test_utf8!(
test_utf8_array_gt,
vec!["arrow", "datafusion", "flight", "parquet"],
vec!["flight", "flight", "flight", "flight"],
gt_utf8,
vec![false, false, false, true]
);
test_utf8_scalar!(
test_utf8_array_gt_scalar,
vec!["arrow", "datafusion", "flight", "parquet"],
"flight",
gt_utf8_scalar,
vec![false, false, false, true]
);
test_utf8!(
test_utf8_array_gt_eq,
vec!["arrow", "datafusion", "flight", "parquet"],
vec!["flight", "flight", "flight", "flight"],
gt_eq_utf8,
vec![false, false, true, true]
);
test_utf8_scalar!(
test_utf8_array_gt_eq_scalar,
vec!["arrow", "datafusion", "flight", "parquet"],
"flight",
gt_eq_utf8_scalar,
vec![false, false, true, true]
);
}