use std::fmt::{self, Debug, Display, Formatter};
use std::sync::OnceLock;
use crate::enums::error::MinarrowError;
use crate::enums::shape_dim::ShapeDim;
use crate::traits::concatenate::Concatenate;
use crate::traits::print::MAX_PREVIEW;
#[cfg(feature = "select")]
use crate::traits::selection::{DataSelector, RowSelection};
use crate::traits::shape::Shape;
use crate::{Array, BitmaskV, FieldArray, MaskedArray, TextArray};
#[derive(Clone, PartialEq)]
pub struct ArrayV {
pub array: Array, pub offset: usize,
len: usize,
null_count: OnceLock<usize>,
}
impl ArrayV {
#[inline]
pub fn new(array: Array, offset: usize, len: usize) -> Self {
assert!(
offset + len <= array.len(),
"ArrayView: window out of bounds (offset + len = {}, array.len = {})",
offset + len,
array.len()
);
Self {
array,
offset,
len,
null_count: OnceLock::new(),
}
}
#[inline]
pub fn new_nc(array: Array, offset: usize, len: usize, null_count: usize) -> Self {
assert!(
offset + len <= array.len(),
"ArrayView: window out of bounds (offset + len = {}, array.len = {})",
offset + len,
array.len()
);
let lock = OnceLock::new();
let _ = lock.set(null_count); Self {
array,
offset,
len,
null_count: lock,
}
}
#[inline]
pub fn len(&self) -> usize {
self.len
}
#[inline]
pub fn is_empty(&self) -> bool {
self.len == 0
}
#[inline]
pub fn get<T: MaskedArray + 'static>(&self, i: usize) -> Option<T::CopyType> {
if i >= self.len {
return None;
}
self.array.inner::<T>().get(self.offset + i)
}
#[inline]
pub fn get_unchecked<T: MaskedArray + 'static>(&self, i: usize) -> Option<T::CopyType> {
unsafe { self.array.inner::<T>().get_unchecked(self.offset + i) }
}
#[inline]
pub fn get_str(&self, i: usize) -> Option<&str> {
if i >= self.len {
return None;
}
match &self.array {
Array::TextArray(TextArray::String32(arr)) => arr.get_str(self.offset + i),
#[cfg(feature = "large_string")]
Array::TextArray(TextArray::String64(arr)) => arr.get_str(self.offset + i),
#[cfg(feature = "default_categorical_8")]
Array::TextArray(TextArray::Categorical8(arr)) => arr.get_str(self.offset + i),
#[cfg(feature = "extended_categorical")]
Array::TextArray(TextArray::Categorical16(arr)) => arr.get_str(self.offset + i),
#[cfg(any(not(feature = "default_categorical_8"), feature = "extended_categorical"))]
Array::TextArray(TextArray::Categorical32(arr)) => arr.get_str(self.offset + i),
#[cfg(feature = "extended_categorical")]
Array::TextArray(TextArray::Categorical64(arr)) => arr.get_str(self.offset + i),
_ => None,
}
}
#[inline]
pub unsafe fn get_str_unchecked(&self, i: usize) -> Option<&str> {
match &self.array {
Array::TextArray(TextArray::String32(arr)) => {
if arr.is_null(self.offset + i) {
None
} else {
Some(unsafe { arr.get_str_unchecked(self.offset + i) })
}
}
#[cfg(feature = "large_string")]
Array::TextArray(TextArray::String64(arr)) => {
if arr.is_null(self.offset + i) {
None
} else {
Some(unsafe { arr.get_str_unchecked(self.offset + i) })
}
}
#[cfg(feature = "default_categorical_8")]
Array::TextArray(TextArray::Categorical8(arr)) => {
if arr.is_null(self.offset + i) {
None
} else {
Some(unsafe { arr.get_str_unchecked(self.offset + i) })
}
}
#[cfg(feature = "extended_categorical")]
Array::TextArray(TextArray::Categorical16(arr)) => {
if arr.is_null(self.offset + i) {
None
} else {
Some(unsafe { arr.get_str_unchecked(self.offset + i) })
}
}
#[cfg(any(not(feature = "default_categorical_8"), feature = "extended_categorical"))]
Array::TextArray(TextArray::Categorical32(arr)) => {
if arr.is_null(self.offset + i) {
None
} else {
Some(unsafe { arr.get_str_unchecked(self.offset + i) })
}
}
#[cfg(feature = "extended_categorical")]
Array::TextArray(TextArray::Categorical64(arr)) => {
if arr.is_null(self.offset + i) {
None
} else {
Some(unsafe { arr.get_str_unchecked(self.offset + i) })
}
}
_ => None,
}
}
#[cfg(feature = "scalar_type")]
#[inline]
pub fn get_scalar(&self, i: usize) -> Option<crate::Scalar> {
if i >= self.len {
return None;
}
self.array.get_scalar(self.offset + i)
}
#[inline]
pub fn slice(&self, offset: usize, len: usize) -> Self {
assert!(offset + len <= self.len, "ArrayView::slice: out of bounds");
Self {
array: self.array.clone(), offset: self.offset + offset,
len,
null_count: OnceLock::new(),
}
}
#[inline]
pub fn to_array(&self) -> Array {
self.array.slice_clone(self.offset, self.len)
}
pub fn to_typed_vec<T: crate::Numeric>(
&self,
) -> Result<crate::Vec64<T>, crate::enums::error::KernelError> {
use crate::enums::error::KernelError;
use crate::{NumericArray, Vec64};
use num_traits::NumCast;
let offset = self.offset;
let len = self.len;
macro_rules! cast_slice {
($arr:expr) => {{
let slice = &$arr.data.as_slice()[offset..offset + len];
slice
.iter()
.map(|&v| {
NumCast::from(v).ok_or_else(|| {
KernelError::UnsupportedType("numeric cast failed".into())
})
})
.collect::<Result<Vec64<T>, _>>()
}};
}
match &self.array {
Array::NumericArray(num) => match num {
NumericArray::Int32(a) => cast_slice!(a),
NumericArray::Int64(a) => cast_slice!(a),
NumericArray::UInt32(a) => cast_slice!(a),
NumericArray::UInt64(a) => cast_slice!(a),
NumericArray::Float32(a) => cast_slice!(a),
NumericArray::Float64(a) => cast_slice!(a),
#[cfg(feature = "extended_numeric_types")]
NumericArray::Int8(a) => cast_slice!(a),
#[cfg(feature = "extended_numeric_types")]
NumericArray::Int16(a) => cast_slice!(a),
#[cfg(feature = "extended_numeric_types")]
NumericArray::UInt8(a) => cast_slice!(a),
#[cfg(feature = "extended_numeric_types")]
NumericArray::UInt16(a) => cast_slice!(a),
NumericArray::Null => {
Err(KernelError::UnsupportedType("null numeric array".into()))
}
},
Array::BooleanArray(a) => {
(0..len)
.map(|i| {
let v = a.get(offset + i).unwrap_or(false);
NumCast::from(if v { 1u8 } else { 0u8 }).ok_or_else(|| {
KernelError::UnsupportedType("bool to numeric cast failed".into())
})
})
.collect::<Result<Vec64<T>, _>>()
}
_ => Err(KernelError::UnsupportedType(
"to_typed_vec requires a numeric array".into(),
)),
}
}
#[cfg(feature = "select")]
pub fn gather_indices(&self, indices: &[usize]) -> Array {
use crate::{
BooleanArray, CategoricalArray, FloatArray, IntegerArray, NumericArray, StringArray,
TextArray,
};
#[cfg(feature = "datetime")]
use crate::{DatetimeArray, TemporalArray};
match &self.array {
Array::Null => Array::Null,
Array::NumericArray(num_arr) => match num_arr {
NumericArray::Int32(_) => {
let mut new_arr = IntegerArray::<i32>::with_capacity(indices.len(), true);
for &idx in indices {
if let Some(val) = self.get::<IntegerArray<i32>>(idx) {
new_arr.push(val);
} else {
new_arr.push_null();
}
}
Array::from_int32(new_arr)
}
NumericArray::Int64(_) => {
let mut new_arr = IntegerArray::<i64>::with_capacity(indices.len(), true);
for &idx in indices {
if let Some(val) = self.get::<IntegerArray<i64>>(idx) {
new_arr.push(val);
} else {
new_arr.push_null();
}
}
Array::from_int64(new_arr)
}
NumericArray::Float32(_) => {
let mut new_arr = FloatArray::<f32>::with_capacity(indices.len(), true);
for &idx in indices {
if let Some(val) = self.get::<FloatArray<f32>>(idx) {
new_arr.push(val);
} else {
new_arr.push_null();
}
}
Array::from_float32(new_arr)
}
NumericArray::Float64(_) => {
let mut new_arr = FloatArray::<f64>::with_capacity(indices.len(), true);
for &idx in indices {
if let Some(val) = self.get::<FloatArray<f64>>(idx) {
new_arr.push(val);
} else {
new_arr.push_null();
}
}
Array::from_float64(new_arr)
}
NumericArray::UInt32(_) => {
let mut new_arr = IntegerArray::<u32>::with_capacity(indices.len(), true);
for &idx in indices {
if let Some(val) = self.get::<IntegerArray<u32>>(idx) {
new_arr.push(val);
} else {
new_arr.push_null();
}
}
Array::from_uint32(new_arr)
}
NumericArray::UInt64(_) => {
let mut new_arr = IntegerArray::<u64>::with_capacity(indices.len(), true);
for &idx in indices {
if let Some(val) = self.get::<IntegerArray<u64>>(idx) {
new_arr.push(val);
} else {
new_arr.push_null();
}
}
Array::from_uint64(new_arr)
}
#[cfg(feature = "extended_numeric_types")]
NumericArray::Int8(_) => {
let mut new_arr = IntegerArray::<i8>::with_capacity(indices.len(), true);
for &idx in indices {
if let Some(val) = self.get::<IntegerArray<i8>>(idx) {
new_arr.push(val);
} else {
new_arr.push_null();
}
}
Array::from_int8(new_arr)
}
#[cfg(feature = "extended_numeric_types")]
NumericArray::Int16(_) => {
let mut new_arr = IntegerArray::<i16>::with_capacity(indices.len(), true);
for &idx in indices {
if let Some(val) = self.get::<IntegerArray<i16>>(idx) {
new_arr.push(val);
} else {
new_arr.push_null();
}
}
Array::from_int16(new_arr)
}
#[cfg(feature = "extended_numeric_types")]
NumericArray::UInt8(_) => {
let mut new_arr = IntegerArray::<u8>::with_capacity(indices.len(), true);
for &idx in indices {
if let Some(val) = self.get::<IntegerArray<u8>>(idx) {
new_arr.push(val);
} else {
new_arr.push_null();
}
}
Array::from_uint8(new_arr)
}
#[cfg(feature = "extended_numeric_types")]
NumericArray::UInt16(_) => {
let mut new_arr = IntegerArray::<u16>::with_capacity(indices.len(), true);
for &idx in indices {
if let Some(val) = self.get::<IntegerArray<u16>>(idx) {
new_arr.push(val);
} else {
new_arr.push_null();
}
}
Array::from_uint16(new_arr)
}
NumericArray::Null => Array::Null,
},
Array::TextArray(text_arr) => match text_arr {
TextArray::String32(_) => {
let mut values: Vec<&str> = Vec::with_capacity(indices.len());
for &idx in indices {
if let Some(val) = self.get_str(idx) {
values.push(val);
} else {
values.push("");
}
}
let mut new_arr = StringArray::<u32>::from_vec(values, None);
for (i, &idx) in indices.iter().enumerate() {
if self.get_str(idx).is_none() {
new_arr.set_null(i);
}
}
Array::from_string32(new_arr)
}
#[cfg(feature = "large_string")]
TextArray::String64(_) => {
let mut values: Vec<&str> = Vec::with_capacity(indices.len());
for &idx in indices {
if let Some(val) = self.get_str(idx) {
values.push(val);
} else {
values.push("");
}
}
let mut new_arr = StringArray::<u64>::from_vec(values, None);
for (i, &idx) in indices.iter().enumerate() {
if self.get_str(idx).is_none() {
new_arr.set_null(i);
}
}
Array::from_string64(new_arr)
}
#[cfg(any(not(feature = "default_categorical_8"), feature = "extended_categorical"))]
TextArray::Categorical32(_) => {
let mut values: Vec<&str> = Vec::with_capacity(indices.len());
for &idx in indices {
if let Some(val) = self.get_str(idx) {
values.push(val);
} else {
values.push("");
}
}
let mut new_arr = CategoricalArray::<u32>::from_vec(values, None);
for (i, &idx) in indices.iter().enumerate() {
if self.get_str(idx).is_none() {
new_arr.set_null(i);
}
}
Array::from_categorical32(new_arr)
}
#[cfg(feature = "default_categorical_8")]
TextArray::Categorical8(_) => {
let mut values: Vec<&str> = Vec::with_capacity(indices.len());
for &idx in indices {
if let Some(val) = self.get_str(idx) {
values.push(val);
} else {
values.push("");
}
}
let mut new_arr = CategoricalArray::<u8>::from_vec(values, None);
for (i, &idx) in indices.iter().enumerate() {
if self.get_str(idx).is_none() {
new_arr.set_null(i);
}
}
Array::from_categorical8(new_arr)
}
#[cfg(feature = "extended_categorical")]
TextArray::Categorical16(_) => {
let mut values: Vec<&str> = Vec::with_capacity(indices.len());
for &idx in indices {
if let Some(val) = self.get_str(idx) {
values.push(val);
} else {
values.push("");
}
}
let mut new_arr = CategoricalArray::<u16>::from_vec(values, None);
for (i, &idx) in indices.iter().enumerate() {
if self.get_str(idx).is_none() {
new_arr.set_null(i);
}
}
Array::from_categorical16(new_arr)
}
#[cfg(feature = "extended_categorical")]
TextArray::Categorical64(_) => {
let mut values: Vec<&str> = Vec::with_capacity(indices.len());
for &idx in indices {
if let Some(val) = self.get_str(idx) {
values.push(val);
} else {
values.push("");
}
}
let mut new_arr = CategoricalArray::<u64>::from_vec(values, None);
for (i, &idx) in indices.iter().enumerate() {
if self.get_str(idx).is_none() {
new_arr.set_null(i);
}
}
Array::from_categorical64(new_arr)
}
TextArray::Null => Array::Null,
},
Array::BooleanArray(_) => {
let mut new_arr = BooleanArray::with_capacity(indices.len(), true);
for &idx in indices {
if let Some(val) = self.get::<BooleanArray<()>>(idx) {
new_arr.push(val);
} else {
new_arr.push_null();
}
}
Array::from_bool(new_arr)
}
#[cfg(feature = "datetime")]
Array::TemporalArray(temp_arr) => match temp_arr {
TemporalArray::Datetime32(arr) => {
let mut new_arr = DatetimeArray::<i32>::with_capacity(
indices.len(),
true,
Some(arr.time_unit),
);
for &idx in indices {
if let Some(val) = self.get::<DatetimeArray<i32>>(idx) {
new_arr.push(val);
} else {
new_arr.push_null();
}
}
Array::from_datetime_i32(new_arr)
}
TemporalArray::Datetime64(arr) => {
let mut new_arr = DatetimeArray::<i64>::with_capacity(
indices.len(),
true,
Some(arr.time_unit),
);
for &idx in indices {
if let Some(val) = self.get::<DatetimeArray<i64>>(idx) {
new_arr.push(val);
} else {
new_arr.push_null();
}
}
Array::from_datetime_i64(new_arr)
}
TemporalArray::Null => Array::Null,
},
}
}
#[inline]
pub fn data_ptr_and_byte_len(&self) -> (*const u8, usize, usize) {
let (ptr, _total_len, elem_size) = self.array.data_ptr_and_byte_len();
let windowed_ptr = unsafe { ptr.add(self.offset * elem_size) };
(windowed_ptr, self.len, elem_size)
}
#[inline]
pub fn end(&self) -> usize {
self.offset + self.len
}
#[inline]
pub fn as_tuple(&self) -> (Array, usize, usize) {
(self.array.clone(), self.offset, self.len) }
#[inline]
pub fn as_tuple_ref(&self) -> (&Array, usize, usize) {
(&self.array, self.offset, self.len)
}
#[inline]
pub fn null_count(&self) -> usize {
*self
.null_count
.get_or_init(|| match self.array.null_mask() {
Some(mask) => mask.view(self.offset, self.len).count_zeros(),
None => 0,
})
}
#[inline]
pub fn null_mask_view(&self) -> Option<BitmaskV> {
self.array
.null_mask()
.map(|mask| mask.view(self.offset, self.len))
}
#[inline]
pub fn set_null_count(&self, count: usize) -> Result<(), usize> {
self.null_count.set(count).map_err(|_| count)
}
}
impl From<Array> for ArrayV {
fn from(array: Array) -> Self {
let len = array.len();
let null_count = array.null_count();
ArrayV {
array,
offset: 0,
len,
null_count: null_count.into(),
}
}
}
impl From<FieldArray> for ArrayV {
fn from(field_array: FieldArray) -> Self {
let len = field_array.len();
let null_count = field_array.null_count();
ArrayV {
array: field_array.array,
offset: 0,
len,
null_count: null_count.into(),
}
}
}
impl From<&FieldArray> for ArrayV {
fn from(field_array: &FieldArray) -> Self {
let len = field_array.len();
let null_count = field_array.null_count();
ArrayV {
array: field_array.array.clone(),
offset: 0,
len,
null_count: null_count.into(),
}
}
}
#[cfg(feature = "views")]
impl From<crate::NumericArrayV> for ArrayV {
fn from(view: crate::NumericArrayV) -> Self {
let len = view.len();
ArrayV::new(Array::NumericArray(view.array), view.offset, len)
}
}
#[cfg(feature = "views")]
impl From<crate::TextArrayV> for ArrayV {
fn from(view: crate::TextArrayV) -> Self {
let len = view.len();
ArrayV::new(Array::TextArray(view.array), view.offset, len)
}
}
#[cfg(all(feature = "views", feature = "datetime"))]
impl From<crate::TemporalArrayV> for ArrayV {
fn from(view: crate::TemporalArrayV) -> Self {
let len = view.len();
ArrayV::new(Array::TemporalArray(view.array), view.offset, len)
}
}
#[cfg(feature = "views")]
impl From<crate::BooleanArrayV> for ArrayV {
fn from(view: crate::BooleanArrayV) -> Self {
let len = view.len();
ArrayV::new(Array::BooleanArray(view.array), view.offset, len)
}
}
#[cfg(feature = "scalar_type")]
impl From<crate::Scalar> for ArrayV {
fn from(scalar: crate::Scalar) -> Self {
let array = scalar.array_from_value(1);
ArrayV::new(array, 0, 1)
}
}
impl Debug for ArrayV {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
f.debug_struct("ArrayView")
.field("offset", &self.offset)
.field("len", &self.len)
.field("array", &self.array)
.field("cached_null_count", &self.null_count.get())
.finish()
}
}
impl Display for ArrayV {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
let nulls = self.null_count();
let head_len = self.len.min(MAX_PREVIEW);
writeln!(
f,
"ArrayView [{} values] (offset: {}, nulls: {})",
self.len, self.offset, nulls
)?;
let sliced_array = self.array.slice_clone(self.offset, head_len);
for line in format!("{}", sliced_array).lines() {
writeln!(f, " {line}")?;
}
if self.len > MAX_PREVIEW {
writeln!(f, " ... ({} more rows)", self.len - MAX_PREVIEW)?;
}
Ok(())
}
}
impl Shape for ArrayV {
fn shape(&self) -> ShapeDim {
ShapeDim::Rank1(self.len())
}
}
impl Concatenate for ArrayV {
fn concat(self, other: Self) -> Result<Self, MinarrowError> {
let self_array = self.to_array();
let other_array = other.to_array();
let concatenated = self_array.concat(other_array)?;
Ok(ArrayV::from(concatenated))
}
}
#[cfg(feature = "select")]
impl RowSelection for ArrayV {
type View = ArrayV;
fn r<S: DataSelector>(&self, selection: S) -> ArrayV {
if selection.is_contiguous() {
let indices = selection.resolve_indices(self.len());
if indices.is_empty() {
return ArrayV::new(self.array.clone(), self.offset, 0);
}
let new_offset = self.offset + indices[0];
let new_len = indices.len();
ArrayV::new(self.array.clone(), new_offset, new_len)
} else {
let indices = selection.resolve_indices(self.len());
let gathered_array = self.gather_indices(&indices);
ArrayV::new(gathered_array, 0, indices.len())
}
}
fn get_row_count(&self) -> usize {
self.len
}
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
use super::*;
use crate::{Array, Bitmask, IntegerArray, NumericArray, vec64};
#[test]
fn test_array_view_basic_indexing_and_slice() {
let mut arr = IntegerArray::<i32>::default();
arr.push(11);
arr.push(22);
arr.push(33);
arr.push(44);
let array = Array::NumericArray(NumericArray::Int32(Arc::new(arr)));
let view = ArrayV::new(array, 1, 2);
assert_eq!(view.len(), 2);
assert_eq!(view.offset, 1);
assert_eq!(view.get::<IntegerArray<i32>>(0), Some(22));
assert_eq!(view.get::<IntegerArray<i32>>(1), Some(33));
assert_eq!(view.get::<IntegerArray<i32>>(2), None);
let sub = view.slice(1, 1);
assert_eq!(sub.len(), 1);
assert_eq!(sub.get::<IntegerArray<i32>>(0), Some(33));
assert_eq!(sub.get::<IntegerArray<i32>>(1), None);
}
#[test]
fn test_array_view_null_count_and_cache() {
let mut arr = IntegerArray::<i32>::default();
arr.push(1);
arr.push(2);
arr.push(3);
arr.push(4);
let mut mask = Bitmask::new_set_all(4, true);
mask.set(2, false);
arr.null_mask = Some(mask);
let array = Array::NumericArray(NumericArray::Int32(Arc::new(arr)));
let view = ArrayV::new(array, 0, 4);
assert_eq!(view.null_count(), 1, "Null count should detect one null");
assert_eq!(view.null_count(), 1);
let view2 = view.slice(0, 2);
assert_eq!(view2.null_count(), 0);
let view3 = view.slice(2, 2);
assert_eq!(view3.null_count(), 1);
}
#[test]
fn test_array_view_with_supplied_null_count() {
let mut arr = IntegerArray::<i32>::default();
arr.push(5);
arr.push(6);
let array = Array::NumericArray(NumericArray::Int32(Arc::new(arr)));
let view = ArrayV::new_nc(array, 0, 2, 99);
assert_eq!(view.null_count(), 99);
assert!(view.set_null_count(101).is_err());
assert_eq!(view.null_count(), 99);
}
#[test]
fn test_array_view_to_array_and_as_tuple() {
let mut arr = IntegerArray::<i32>::default();
for v in 10..20 {
arr.push(v);
}
let array = Array::NumericArray(NumericArray::Int32(Arc::new(arr)));
let view = ArrayV::new(array.clone(), 4, 3);
let arr2 = view.to_array();
if let Array::NumericArray(NumericArray::Int32(a2)) = arr2 {
assert_eq!(a2.data, vec64![14, 15, 16]);
} else {
panic!("Unexpected variant");
}
let tup = view.as_tuple();
assert_eq!(&tup.0, &array);
assert_eq!(tup.1, 4);
assert_eq!(tup.2, 3);
}
#[test]
fn test_array_view_null_mask_view() {
let mut arr = IntegerArray::<i32>::default();
arr.push(2);
arr.push(4);
arr.push(6);
let mut mask = Bitmask::new_set_all(3, true);
mask.set(0, false);
arr.null_mask = Some(mask);
let array = Array::NumericArray(NumericArray::Int32(Arc::new(arr)));
let view = ArrayV::new(array.clone(), 1, 2);
let mask_view = view.null_mask_view().expect("Should have mask");
assert_eq!(mask_view.len(), 2);
assert!(mask_view.get(0));
assert!(mask_view.get(1));
}
}