pub trait Array: Send + Sync + DynClone + 'static {
Show 13 methods
fn as_any(&self) -> &dyn Any;
fn as_any_mut(&mut self) -> &mut dyn Any;
fn len(&self) -> usize;
fn data_type(&self) -> &DataType;
fn validity(&self) -> Option<&Bitmap>;
fn slice(&self, offset: usize, length: usize) -> Box<dyn Array>;
unsafe fn slice_unchecked(
&self,
offset: usize,
length: usize
) -> Box<dyn Array>;
fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array>;
fn to_boxed(&self) -> Box<dyn Array>;
fn is_empty(&self) -> bool { ... }
fn null_count(&self) -> usize { ... }
fn is_null(&self, i: usize) -> bool { ... }
fn is_valid(&self, i: usize) -> bool { ... }
}
Expand description
A trait representing an immutable Arrow array. Arrow arrays are trait objects
that are infallibly downcasted to concrete types according to the Array::data_type
.
Required Methods§
sourcefn as_any(&self) -> &dyn Any
fn as_any(&self) -> &dyn Any
Converts itself to a reference of Any
, which enables downcasting to concrete types.
sourcefn as_any_mut(&mut self) -> &mut dyn Any
fn as_any_mut(&mut self) -> &mut dyn Any
Converts itself to a mutable reference of Any
, which enables mutable downcasting to concrete types.
sourcefn len(&self) -> usize
fn len(&self) -> usize
The length of the Array
. Every array has a length corresponding to the number of
elements (slots).
sourcefn data_type(&self) -> &DataType
fn data_type(&self) -> &DataType
The DataType
of the Array
. In combination with Array::as_any
, this can be
used to downcast trait objects (dyn Array
) to concrete arrays.
sourcefn slice(&self, offset: usize, length: usize) -> Box<dyn Array>
fn slice(&self, offset: usize, length: usize) -> Box<dyn Array>
Slices the Array
, returning a new Box<dyn Array>
.
Implementation
This operation is O(1)
over len
, as it amounts to increase two ref counts
and moving the struct to the heap.
Panic
This function panics iff offset + length > self.len()
.
sourceunsafe fn slice_unchecked(&self, offset: usize, length: usize) -> Box<dyn Array>
unsafe fn slice_unchecked(&self, offset: usize, length: usize) -> Box<dyn Array>
Slices the Array
, returning a new Box<dyn Array>
.
Implementation
This operation is O(1)
over len
, as it amounts to increase two ref counts
and moving the struct to the heap.
Safety
The caller must ensure that offset + length <= self.len()
Provided Methods§
sourcefn is_empty(&self) -> bool
fn is_empty(&self) -> bool
whether the array is empty
Examples found in repository?
234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253
pub fn any(array: &BooleanArray) -> bool {
if array.is_empty() {
false
} else if array.validity().is_some() {
array.into_iter().any(|v| v == Some(true))
} else {
let vals = array.values();
vals.unset_bits() != vals.len()
}
}
/// Check if all of the values in the array are `true`
pub fn all(array: &BooleanArray) -> bool {
if array.is_empty() || array.null_count() > 0 {
false
} else {
let vals = array.values();
vals.unset_bits() == 0
}
}
More examples
238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257
pub fn any(array: &BooleanArray) -> bool {
if array.is_empty() {
false
} else if array.validity().is_some() {
array.into_iter().any(|v| v == Some(true))
} else {
let vals = array.values();
vals.unset_bits() != vals.len()
}
}
/// Returns whether all values in the array are `true`
pub fn all(array: &BooleanArray) -> bool {
if array.is_empty() || array.null_count() > 0 {
false
} else {
let vals = array.values();
vals.unset_bits() == 0
}
}
sourcefn null_count(&self) -> usize
fn null_count(&self) -> usize
The number of null slots on this Array
.
Implementation
This is O(1)
since the number of null elements is pre-computed.
Examples found in repository?
More examples
90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105
pub fn sum_primitive<T>(array: &PrimitiveArray<T>) -> Option<T>
where
T: NativeType + Simd + Add<Output = T> + std::iter::Sum<T>,
T::Simd: Add<Output = T::Simd> + Sum<T>,
{
let null_count = array.null_count();
if null_count == array.len() {
return None;
}
match array.validity() {
None => Some(nonnull_sum(array.values())),
Some(bitmap) => Some(null_sum(array.values(), bitmap)),
}
}
160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304
pub fn min_primitive<T>(array: &PrimitiveArray<T>) -> Option<T>
where
T: NativeType + Simd,
T::Simd: SimdOrd<T>,
{
let null_count = array.null_count();
// Includes case array.len() == 0
if null_count == array.len() {
return None;
}
let values = array.values();
Some(if let Some(validity) = array.validity() {
null_min_primitive(values, validity)
} else {
nonnull_min_primitive(values)
})
}
/// Returns the maximum value in the array, according to the natural order.
/// For floating point arrays any NaN values are considered to be greater than any other non-null value
pub fn max_primitive<T>(array: &PrimitiveArray<T>) -> Option<T>
where
T: NativeType + Simd,
T::Simd: SimdOrd<T>,
{
let null_count = array.null_count();
// Includes case array.len() == 0
if null_count == array.len() {
return None;
}
let values = array.values();
Some(if let Some(validity) = array.validity() {
null_max_primitive(values, validity)
} else {
nonnull_max_primitive(values)
})
}
/// Helper to compute min/max of [`BinaryArray`] and [`Utf8Array`]
macro_rules! min_max_binary_utf8 {
($array: expr, $cmp: expr) => {
if $array.null_count() == $array.len() {
None
} else if $array.validity().is_some() {
$array
.iter()
.reduce(|v1, v2| match (v1, v2) {
(None, v2) => v2,
(v1, None) => v1,
(Some(v1), Some(v2)) => {
if $cmp(v1, v2) {
Some(v2)
} else {
Some(v1)
}
}
})
.unwrap_or(None)
} else {
$array
.values_iter()
.reduce(|v1, v2| if $cmp(v1, v2) { v2 } else { v1 })
}
};
}
/// Returns the maximum value in the binary array, according to the natural order.
pub fn max_binary<O: Offset>(array: &BinaryArray<O>) -> Option<&[u8]> {
min_max_binary_utf8!(array, |a, b| a < b)
}
/// Returns the minimum value in the binary array, according to the natural order.
pub fn min_binary<O: Offset>(array: &BinaryArray<O>) -> Option<&[u8]> {
min_max_binary_utf8!(array, |a, b| a > b)
}
/// Returns the maximum value in the string array, according to the natural order.
pub fn max_string<O: Offset>(array: &Utf8Array<O>) -> Option<&str> {
min_max_binary_utf8!(array, |a, b| a < b)
}
/// Returns the minimum value in the string array, according to the natural order.
pub fn min_string<O: Offset>(array: &Utf8Array<O>) -> Option<&str> {
min_max_binary_utf8!(array, |a, b| a > b)
}
/// Returns the minimum value in the boolean array.
///
/// ```
/// use arrow2::{
/// array::BooleanArray,
/// compute::aggregate::min_boolean,
/// };
///
/// let a = BooleanArray::from(vec![Some(true), None, Some(false)]);
/// assert_eq!(min_boolean(&a), Some(false))
/// ```
pub fn min_boolean(array: &BooleanArray) -> Option<bool> {
// short circuit if all nulls / zero length array
let null_count = array.null_count();
if null_count == array.len() {
None
} else if null_count == 0 {
Some(array.values().unset_bits() == 0)
} else {
// Note the min bool is false (0), so short circuit as soon as we see it
array
.iter()
.find(|&b| b == Some(false))
.flatten()
.or(Some(true))
}
}
/// Returns the maximum value in the boolean array
///
/// ```
/// use arrow2::{
/// array::BooleanArray,
/// compute::aggregate::max_boolean,
/// };
///
/// let a = BooleanArray::from(vec![Some(true), None, Some(false)]);
/// assert_eq!(max_boolean(&a), Some(true))
/// ```
pub fn max_boolean(array: &BooleanArray) -> Option<bool> {
// short circuit if all nulls / zero length array
let null_count = array.null_count();
if null_count == array.len() {
None
} else if null_count == 0 {
Some(array.values().unset_bits() < array.len())
} else {
// Note the max bool is true (1), so short circuit as soon as we see it
array
.iter()
.find(|&b| b == Some(true))
.flatten()
.or(Some(false))
}
}
35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
pub fn div<T>(lhs: &PrimitiveArray<T>, rhs: &PrimitiveArray<T>) -> PrimitiveArray<T>
where
T: NativeArithmetics + Div<Output = T>,
{
if rhs.null_count() == 0 {
binary(lhs, rhs, lhs.data_type().clone(), |a, b| a / b)
} else {
check_same_len(lhs, rhs).unwrap();
let values = lhs.iter().zip(rhs.iter()).map(|(l, r)| match (l, r) {
(Some(l), Some(r)) => Some(*l / *r),
_ => None,
});
PrimitiveArray::from_trusted_len_iter(values).to(lhs.data_type().clone())
}
}
142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164
pub fn try_new(
data_type: DataType,
keys: PrimitiveArray<K>,
values: Box<dyn Array>,
) -> Result<Self, Error> {
check_data_type(K::KEY_TYPE, &data_type, values.data_type())?;
if keys.null_count() != keys.len() {
if K::always_fits_usize() {
// safety: we just checked that conversion to `usize` always
// succeeds
unsafe { check_indexes_unchecked(keys.values(), values.len()) }?;
} else {
check_indexes(keys.values(), values.len())?;
}
}
Ok(Self {
data_type,
keys,
values,
})
}
88 89 90 91 92 93 94 95 96 97 98 99 100 101
pub fn take<I: Index>(values: &BooleanArray, indices: &PrimitiveArray<I>) -> BooleanArray {
let data_type = values.data_type().clone();
let indices_has_validity = indices.null_count() > 0;
let values_has_validity = values.null_count() > 0;
let (values, validity) = match (values_has_validity, indices_has_validity) {
(false, false) => take_no_validity(values.values(), indices.values()),
(true, false) => take_values_validity(values, indices.values()),
(false, true) => take_indices_validity(values.values(), indices),
(true, true) => take_values_indices_validity(values, indices),
};
BooleanArray::new(data_type, values, validity)
}
- src/compute/take/primitive.rs
- src/array/growable/utf8.rs
- src/array/growable/boolean.rs
- src/compute/take/binary.rs
- src/array/growable/fixed_binary.rs
- src/compute/take/utf8.rs
- src/io/parquet/write/fixed_len_bytes.rs
- src/array/growable/binary.rs
- src/io/parquet/write/boolean/basic.rs
- src/io/parquet/write/primitive/basic.rs
- src/compute/cast/dictionary_to.rs
- src/array/growable/list.rs
- src/compute/boolean.rs
- src/array/growable/primitive.rs
- src/io/parquet/write/boolean/nested.rs
- src/io/parquet/write/utf8/nested.rs
- src/io/parquet/write/binary/nested.rs
- src/array/growable/fixed_size_list.rs
- src/io/parquet/write/dictionary.rs
- src/array/growable/dictionary.rs
- src/array/growable/structure.rs
- src/io/parquet/write/primitive/nested.rs
- src/io/parquet/write/utf8/basic.rs
- src/io/parquet/write/binary/basic.rs
- src/ffi/array.rs
- src/compute/if_then_else.rs
- src/io/ipc/write/serialize.rs
sourcefn is_null(&self, i: usize) -> bool
fn is_null(&self, i: usize) -> bool
Examples found in repository?
More examples
102 103 104 105 106 107 108 109 110 111 112 113 114
pub fn get_display<'a, F: Write + 'a>(
array: &'a dyn Array,
null: &'static str,
) -> Box<dyn Fn(&mut F, usize) -> Result + 'a> {
let value_display = get_value_display(array, null);
Box::new(move |f, row| {
if array.is_null(row) {
f.write_str(null)
} else {
value_display(f, row)
}
})
}
472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493
fn serialize_utf8_dict<'a, K: DictionaryKey, O: Offset>(
array: &'a dyn Any,
) -> Box<dyn StreamingIterator<Item = [u8]> + 'a> {
let array = array.downcast_ref::<DictionaryArray<K>>().unwrap();
let values = array
.values()
.as_any()
.downcast_ref::<Utf8Array<O>>()
.unwrap();
Box::new(BufStreamingIterator::new(
array.keys_iter(),
move |x, buf| {
if let Some(i) = x {
if !values.is_null(i) {
let val = values.value(i);
buf.extend_from_slice(val.as_bytes());
}
}
},
vec![],
))
}
sourcefn is_valid(&self, i: usize) -> bool
fn is_valid(&self, i: usize) -> bool
Examples found in repository?
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
pub fn write_value<K: DictionaryKey, W: Write>(
array: &DictionaryArray<K>,
index: usize,
null: &'static str,
f: &mut W,
) -> Result {
let keys = array.keys();
let values = array.values();
if keys.is_valid(index) {
let key = array.key_value(index);
get_display(values.as_ref(), null)(f, key)
} else {
write!(f, "{}", null)
}
}
More examples
80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101
fn extend(&mut self, index: usize, start: usize, len: usize) {
(self.extend_null_bits[index])(&mut self.validity, start, len);
let array = self.arrays[index];
if array.null_count() == 0 {
self.values
.iter_mut()
.for_each(|child| child.extend(index, start, len))
} else {
(start..start + len).for_each(|i| {
if array.is_valid(i) {
self.values
.iter_mut()
.for_each(|child| child.extend(index, i, 1))
} else {
self.values
.iter_mut()
.for_each(|child| child.extend_validity(1))
}
})
}
}
479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544
pub fn build_comparator_impl<'a>(
pairs: &'a [(&'a [&'a dyn Array], &SortOptions)],
build_compare_fn: &dyn Fn(&dyn Array, &dyn Array) -> Result<DynComparator>,
) -> Result<Comparator<'a>> {
// prepare the comparison function of _values_ between all pairs of arrays
let indices_pairs = (0..pairs[0].0.len())
.combinations(2)
.map(|indices| (indices[0], indices[1]));
let data = indices_pairs
.map(|(lhs_index, rhs_index)| {
let multi_column_comparator = pairs
.iter()
.map(move |(arrays, _)| {
Ok((
Box::new(move |row| arrays[lhs_index].is_valid(row)) as IsValid<'a>,
Box::new(move |row| arrays[rhs_index].is_valid(row)) as IsValid<'a>,
build_compare_fn(arrays[lhs_index], arrays[rhs_index])?,
))
})
.collect::<Result<Vec<_>>>()?;
Ok(((lhs_index, rhs_index), multi_column_comparator))
})
.collect::<Result<AHashMap<(usize, usize), Vec<(IsValid, IsValid, DynComparator)>>>>()?;
// prepare a comparison function taking into account _nulls_ and sort options
let cmp = move |left_index, left_row, right_index, right_row| {
let data = data.get(&(left_index, right_index)).unwrap();
//data.iter().zip(pairs.iter()).for_each()
for c in 0..pairs.len() {
let descending = pairs[c].1.descending;
let null_first = pairs[c].1.nulls_first;
let (l_is_valid, r_is_valid, value_comparator) = &data[c];
let result = match ((l_is_valid)(left_row), (r_is_valid)(right_row)) {
(true, true) => {
let result = (value_comparator)(left_row, right_row);
match descending {
true => result.reverse(),
false => result,
}
}
(false, true) => {
if null_first {
Ordering::Less
} else {
Ordering::Greater
}
}
(true, false) => {
if null_first {
Ordering::Greater
} else {
Ordering::Less
}
}
(false, false) => Ordering::Equal,
};
if result != Ordering::Equal {
// we found a relevant comparison => short-circuit and return it
return result;
}
}
Ordering::Equal
};
Ok(Box::new(cmp))
}
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176
pub fn new_scalar(array: &dyn Array, index: usize) -> Box<dyn Scalar> {
use PhysicalType::*;
match array.data_type().to_physical_type() {
Null => Box::new(NullScalar::new()),
Boolean => {
let array = array.as_any().downcast_ref::<BooleanArray>().unwrap();
let value = if array.is_valid(index) {
Some(array.value(index))
} else {
None
};
Box::new(BooleanScalar::new(value))
}
Primitive(primitive) => with_match_primitive_type!(primitive, |$T| {
let array = array
.as_any()
.downcast_ref::<PrimitiveArray<$T>>()
.unwrap();
let value = if array.is_valid(index) {
Some(array.value(index))
} else {
None
};
Box::new(PrimitiveScalar::new(array.data_type().clone(), value))
}),
Utf8 => dyn_new_utf8!(array, index, i32),
LargeUtf8 => dyn_new_utf8!(array, index, i64),
Binary => dyn_new_binary!(array, index, i32),
LargeBinary => dyn_new_binary!(array, index, i64),
List => dyn_new_list!(array, index, i32),
LargeList => dyn_new_list!(array, index, i64),
Struct => {
let array = array.as_any().downcast_ref::<StructArray>().unwrap();
if array.is_valid(index) {
let values = array
.values()
.iter()
.map(|x| new_scalar(x.as_ref(), index))
.collect();
Box::new(StructScalar::new(array.data_type().clone(), Some(values)))
} else {
Box::new(StructScalar::new(array.data_type().clone(), None))
}
}
FixedSizeBinary => {
let array = array
.as_any()
.downcast_ref::<FixedSizeBinaryArray>()
.unwrap();
let value = if array.is_valid(index) {
Some(array.value(index))
} else {
None
};
Box::new(FixedSizeBinaryScalar::new(array.data_type().clone(), value))
}
FixedSizeList => {
let array = array.as_any().downcast_ref::<FixedSizeListArray>().unwrap();
let value = if array.is_valid(index) {
Some(array.value(index))
} else {
None
};
Box::new(FixedSizeListScalar::new(array.data_type().clone(), value))
}
Union => {
let array = array.as_any().downcast_ref::<UnionArray>().unwrap();
Box::new(UnionScalar::new(
array.data_type().clone(),
array.types()[index],
array.value(index),
))
}
Map => todo!(),
Dictionary(key_type) => match_integer_type!(key_type, |$T| {
let array = array
.as_any()
.downcast_ref::<DictionaryArray<$T>>()
.unwrap();
let value = if array.is_valid(index) {
Some(array.value(index).into())
} else {
None
};
Box::new(DictionaryScalar::<$T>::new(
array.data_type().clone(),
value,
))
}),
}
}
Trait Implementations§
source§impl<O: Offset> PartialEq<&(dyn Array + 'static)> for BinaryArray<O>
impl<O: Offset> PartialEq<&(dyn Array + 'static)> for BinaryArray<O>
source§impl PartialEq<&(dyn Array + 'static)> for BooleanArray
impl PartialEq<&(dyn Array + 'static)> for BooleanArray
source§impl<K: DictionaryKey> PartialEq<&(dyn Array + 'static)> for DictionaryArray<K>
impl<K: DictionaryKey> PartialEq<&(dyn Array + 'static)> for DictionaryArray<K>
source§impl PartialEq<&(dyn Array + 'static)> for FixedSizeBinaryArray
impl PartialEq<&(dyn Array + 'static)> for FixedSizeBinaryArray
source§impl PartialEq<&(dyn Array + 'static)> for FixedSizeListArray
impl PartialEq<&(dyn Array + 'static)> for FixedSizeListArray
source§impl<O: Offset> PartialEq<&(dyn Array + 'static)> for ListArray<O>
impl<O: Offset> PartialEq<&(dyn Array + 'static)> for ListArray<O>
source§impl PartialEq<&(dyn Array + 'static)> for MapArray
impl PartialEq<&(dyn Array + 'static)> for MapArray
source§impl PartialEq<&(dyn Array + 'static)> for NullArray
impl PartialEq<&(dyn Array + 'static)> for NullArray
source§impl<T: NativeType> PartialEq<&(dyn Array + 'static)> for PrimitiveArray<T>
impl<T: NativeType> PartialEq<&(dyn Array + 'static)> for PrimitiveArray<T>
source§impl PartialEq<&(dyn Array + 'static)> for StructArray
impl PartialEq<&(dyn Array + 'static)> for StructArray
source§impl PartialEq<&(dyn Array + 'static)> for UnionArray
impl PartialEq<&(dyn Array + 'static)> for UnionArray
source§impl<O: Offset> PartialEq<&(dyn Array + 'static)> for Utf8Array<O>
impl<O: Offset> PartialEq<&(dyn Array + 'static)> for Utf8Array<O>
source§impl<O: Offset> PartialEq<BinaryArray<O>> for &dyn Array
impl<O: Offset> PartialEq<BinaryArray<O>> for &dyn Array
source§fn eq(&self, other: &BinaryArray<O>) -> bool
fn eq(&self, other: &BinaryArray<O>) -> bool
self
and other
values to be equal, and is used
by ==
.source§impl<T: NativeType> PartialEq<PrimitiveArray<T>> for &dyn Array
impl<T: NativeType> PartialEq<PrimitiveArray<T>> for &dyn Array
source§fn eq(&self, other: &PrimitiveArray<T>) -> bool
fn eq(&self, other: &PrimitiveArray<T>) -> bool
self
and other
values to be equal, and is used
by ==
.