use std::fmt::{Display, Formatter};
use std::iter::FromIterator;
use std::sync::Arc;
#[cfg(feature = "views")]
use crate::ArrayV;
#[cfg(feature = "views")]
use crate::SuperArrayV;
use crate::enums::{error::MinarrowError, shape_dim::ShapeDim};
use crate::ffi::arrow_dtype::ArrowType;
#[cfg(feature = "size")]
use crate::traits::byte_size::ByteSize;
use crate::traits::consolidate::Consolidate;
use crate::traits::{concatenate::Concatenate, shape::Shape};
use crate::{Array, Field, FieldArray};
#[derive(Clone, Copy, Debug, PartialEq)]
pub enum RechunkStrategy {
Count(usize),
#[cfg(feature = "size")]
Memory(usize),
Auto,
}
#[derive(Clone, Debug, PartialEq)]
pub struct SuperArray {
pub chunks: Vec<Array>,
pub field: Option<Arc<Field>>,
pub null_counts: Option<Vec<usize>>,
}
impl SuperArray {
#[inline]
pub fn new() -> Self {
Self {
chunks: Vec::new(),
field: None,
null_counts: None,
}
}
pub fn from_arrays(chunks: Vec<Array>) -> Self {
if chunks.len() > 1 {
let dtype = chunks[0].arrow_type();
for (i, chunk) in chunks.iter().enumerate().skip(1) {
assert_eq!(
chunk.arrow_type(),
dtype,
"Chunk {i} ArrowType mismatch (expected {:?}, got {:?})",
dtype,
chunk.arrow_type()
);
}
}
Self {
chunks,
field: None,
null_counts: None,
}
}
pub fn from_arrays_with_field(chunks: Vec<Array>, field: impl Into<Arc<Field>>) -> Self {
let field = field.into();
for (i, chunk) in chunks.iter().enumerate() {
assert_eq!(
chunk.arrow_type(),
field.dtype,
"Chunk {i} ArrowType mismatch (expected {:?}, got {:?})",
field.dtype,
chunk.arrow_type()
);
}
Self {
chunks,
field: Some(field),
null_counts: None,
}
}
pub fn from_arrays_nc(chunks: Vec<Array>, null_counts: Vec<usize>) -> Self {
assert_eq!(
chunks.len(),
null_counts.len(),
"null_counts length ({}) must match chunks length ({})",
null_counts.len(),
chunks.len()
);
if chunks.len() > 1 {
let dtype = chunks[0].arrow_type();
for (i, chunk) in chunks.iter().enumerate().skip(1) {
assert_eq!(
chunk.arrow_type(),
dtype,
"Chunk {i} ArrowType mismatch (expected {:?}, got {:?})",
dtype,
chunk.arrow_type()
);
}
}
Self {
chunks,
field: None,
null_counts: Some(null_counts),
}
}
pub fn from_field_array_chunks(chunks: Vec<FieldArray>) -> Self {
assert!(
!chunks.is_empty(),
"from_field_array_chunks: input chunks cannot be empty"
);
let field = chunks[0].field.clone();
for (i, fa) in chunks.iter().enumerate().skip(1) {
assert_eq!(
fa.field.dtype, field.dtype,
"Chunk {i} ArrowType mismatch (expected {:?}, got {:?})",
field.dtype, fa.field.dtype
);
assert_eq!(
fa.field.nullable, field.nullable,
"Chunk {i} nullability mismatch"
);
assert_eq!(
fa.field.name, field.name,
"Chunk {i} field name mismatch (expected '{}', got '{}')",
field.name, fa.field.name
);
}
let null_counts: Vec<usize> = chunks.iter().map(|fa| fa.null_count).collect();
let arrays = chunks.into_iter().map(|fa| fa.array).collect();
Self {
chunks: arrays,
field: Some(field),
null_counts: Some(null_counts),
}
}
pub fn from_chunks(chunks: Vec<FieldArray>) -> Self {
Self::from_field_array_chunks(chunks)
}
#[cfg(feature = "views")]
pub fn from_slices(slices: &[ArrayV], field: Arc<Field>) -> Self {
assert!(!slices.is_empty(), "from_slices requires non-empty slice");
let mut arrays = Vec::with_capacity(slices.len());
let mut null_counts = Vec::with_capacity(slices.len());
for (i, view) in slices.iter().enumerate() {
assert_eq!(
view.array.arrow_type(),
field.dtype,
"Slice {i} ArrowType does not match field"
);
assert_eq!(
view.array.is_nullable(),
field.nullable,
"Slice {i} nullability does not match field"
);
arrays.push(view.array.slice_clone(view.offset, view.len()));
null_counts.push(view.null_count());
}
Self {
chunks: arrays,
field: Some(field),
null_counts: Some(null_counts),
}
}
#[cfg(feature = "views")]
pub fn slice(&self, offset: usize, len: usize) -> SuperArrayV {
assert!(offset + len <= self.len(), "slice out of bounds");
let field = self.field.clone().expect("slice() requires field metadata");
let mut remaining = len;
let mut off = offset;
let mut slices = Vec::new();
for chunk in &self.chunks {
let this_len = chunk.len();
if off >= this_len {
off -= this_len;
continue;
}
let take = remaining.min(this_len - off);
slices.push(ArrayV::new(chunk.clone(), off, take));
remaining -= take;
if remaining == 0 {
break;
}
off = 0;
}
SuperArrayV { slices, len, field }
}
#[inline]
pub fn field(&self) -> Option<&Field> {
self.field.as_deref()
}
#[inline]
pub fn field_ref(&self) -> &Field {
self.field
.as_ref()
.expect("field_ref() called but SuperArray has no field metadata")
}
#[inline]
pub fn has_field(&self) -> bool {
self.field.is_some()
}
#[inline]
pub fn field_arc(&self) -> Option<&Arc<Field>> {
self.field.as_ref()
}
#[inline]
pub fn arrow_type(&self) -> ArrowType {
if let Some(chunk) = self.chunks.first() {
chunk.arrow_type()
} else if let Some(ref field) = self.field {
field.dtype.clone()
} else {
panic!("arrow_type() called on empty SuperArray with no field")
}
}
#[inline]
pub fn is_nullable(&self) -> bool {
self.field_ref().nullable
}
#[inline]
pub fn n_chunks(&self) -> usize {
self.chunks.len()
}
pub fn len(&self) -> usize {
self.chunks.iter().map(|c| c.len()).sum()
}
#[inline]
pub fn is_empty(&self) -> bool {
self.n_chunks() == 0 || self.len() == 0
}
#[inline]
pub fn chunk(&self, idx: usize) -> Option<&Array> {
self.chunks.get(idx)
}
#[inline]
pub fn chunk_null_count(&self, idx: usize) -> Option<usize> {
self.null_counts
.as_ref()
.and_then(|nc| nc.get(idx).copied())
}
pub fn push(&mut self, chunk: Array) {
if let Some(first) = self.chunks.first() {
assert_eq!(
chunk.arrow_type(),
first.arrow_type(),
"Chunk ArrowType mismatch"
);
} else if let Some(ref field) = self.field {
assert_eq!(
chunk.arrow_type(),
field.dtype,
"Chunk ArrowType mismatch with field"
);
}
if let Some(ref mut nc) = self.null_counts {
nc.push(chunk.null_count());
}
self.chunks.push(chunk);
}
pub fn push_with_null_count(&mut self, chunk: Array, null_count: usize) {
if let Some(first) = self.chunks.first() {
assert_eq!(
chunk.arrow_type(),
first.arrow_type(),
"Chunk ArrowType mismatch"
);
} else if let Some(ref field) = self.field {
assert_eq!(
chunk.arrow_type(),
field.dtype,
"Chunk ArrowType mismatch with field"
);
}
self.chunks.push(chunk);
if let Some(ref mut nc) = self.null_counts {
nc.push(null_count);
} else {
self.null_counts = Some(vec![null_count]);
}
}
pub fn push_field_array(&mut self, chunk: FieldArray) {
if let Some(ref field) = self.field {
assert_eq!(chunk.field.dtype, field.dtype, "Chunk ArrowType mismatch");
assert_eq!(
chunk.field.nullable, field.nullable,
"Chunk nullability mismatch"
);
assert_eq!(chunk.field.name, field.name, "Chunk field name mismatch");
} else if !self.chunks.is_empty() {
assert_eq!(
chunk.array.arrow_type(),
self.chunks[0].arrow_type(),
"Chunk ArrowType mismatch"
);
}
if self.field.is_none() {
self.field = Some(chunk.field.clone());
}
self.chunks.push(chunk.array);
if let Some(ref mut nc) = self.null_counts {
nc.push(chunk.null_count);
} else {
self.null_counts = Some(vec![chunk.null_count]);
}
}
pub fn insert_rows(
&mut self,
index: usize,
other: impl Into<SuperArray>,
) -> Result<(), MinarrowError> {
let other = other.into();
let total_len = self.len();
if index > total_len {
return Err(MinarrowError::IndexError(format!(
"Index {} out of bounds for SuperArray of length {}",
index, total_len
)));
}
if other.is_empty() {
return Ok(());
}
if !self.chunks.is_empty() && !other.chunks.is_empty() {
let self_type = self.chunks[0].arrow_type();
let other_type = other.chunks[0].arrow_type();
if self_type != other_type {
return Err(MinarrowError::IncompatibleTypeError {
from: "SuperArray",
to: "SuperArray",
message: Some(format!(
"Type mismatch: {:?} vs {:?}",
self_type, other_type
)),
});
}
}
if self.chunks.is_empty() {
self.chunks = other.chunks;
self.field = other.field;
self.null_counts = other.null_counts;
return Ok(());
}
let mut cumulative = 0;
let mut chunk_idx = None;
for (i, chunk) in self.chunks.iter().enumerate() {
let chunk_len = chunk.len();
if index <= cumulative + chunk_len {
chunk_idx = Some((i, index - cumulative));
break;
}
cumulative += chunk_len;
}
let (target_idx, local_index) = chunk_idx.ok_or_else(|| {
MinarrowError::IndexError(format!("Failed to find chunk for index {}", index))
})?;
let target_chunk_len = self.chunks[target_idx].len();
let field = self.field.clone().unwrap_or_else(|| {
Arc::new(Field::new(
"data",
self.chunks[0].arrow_type(),
self.chunks[0].is_nullable(),
None,
))
});
if local_index == 0 {
let mut new_chunks = Vec::with_capacity(self.chunks.len() + other.chunks.len());
new_chunks.extend(self.chunks.drain(0..target_idx));
new_chunks.extend(other.chunks.into_iter());
new_chunks.extend(self.chunks.drain(..));
self.chunks = new_chunks;
self.null_counts = None;
} else if local_index == target_chunk_len {
let mut new_chunks = Vec::with_capacity(self.chunks.len() + other.chunks.len());
new_chunks.extend(self.chunks.drain(0..=target_idx));
new_chunks.extend(other.chunks.into_iter());
new_chunks.extend(self.chunks.drain(..));
self.chunks = new_chunks;
self.null_counts = None;
} else {
let target_chunk = self.chunks.remove(target_idx);
let split_result = target_chunk.split(local_index, &field)?;
let split_arrays: Vec<Array> = split_result.chunks;
let mut new_chunks = Vec::with_capacity(self.chunks.len() + other.chunks.len() + 2);
new_chunks.extend(self.chunks.drain(0..target_idx));
new_chunks.push(split_arrays[0].clone());
new_chunks.extend(other.chunks.into_iter());
new_chunks.push(split_arrays[1].clone());
new_chunks.extend(self.chunks.drain(..));
self.chunks = new_chunks;
self.null_counts = None;
}
Ok(())
}
pub fn rechunk(&mut self, strategy: RechunkStrategy) -> Result<(), MinarrowError> {
if self.chunks.is_empty() || self.len() == 0 {
return Ok(());
}
let chunk_size = match strategy {
RechunkStrategy::Count(size) => {
if size == 0 {
return Err(MinarrowError::IndexError(
"Count chunk size must be greater than 0".to_string(),
));
}
size
}
RechunkStrategy::Auto => 8192,
#[cfg(feature = "size")]
RechunkStrategy::Memory(bytes_per_chunk) => {
let total_bytes = self.est_bytes();
let total_len = self.len();
if total_bytes == 0 {
return Err(MinarrowError::IndexError(
"Cannot rechunk: array has 0 estimated bytes".to_string(),
));
}
((bytes_per_chunk * total_len) / total_bytes).max(1)
}
};
if self.chunks.len() == 1 && self.chunks[0].len() == chunk_size {
return Ok(());
}
let field = self.field.clone().unwrap_or_else(|| {
Arc::new(Field::new(
"data",
self.chunks[0].arrow_type(),
self.chunks[0].is_nullable(),
None,
))
});
let mut new_chunks: Vec<Array> = Vec::new();
let mut accumulator: Option<Array> = None;
for chunk in self.chunks.drain(..) {
let mut remaining = chunk;
while remaining.len() > 0 {
if let Some(ref mut acc) = accumulator {
let acc_len = acc.len();
let needed = chunk_size - acc_len;
if remaining.len() <= needed {
*acc = acc.clone().concat(remaining)?;
if acc.len() == chunk_size {
new_chunks.push(accumulator.take().unwrap());
}
break; } else {
let split_result = remaining.split(needed, &field)?;
let parts = split_result.chunks;
let to_add = parts[0].clone();
remaining = parts[1].clone();
*acc = acc.clone().concat(to_add)?;
new_chunks.push(accumulator.take().unwrap());
}
} else {
if remaining.len() == chunk_size {
new_chunks.push(remaining);
break;
} else if remaining.len() > chunk_size {
let split_result = remaining.split(chunk_size, &field)?;
let parts = split_result.chunks;
new_chunks.push(parts[0].clone());
remaining = parts[1].clone();
} else {
accumulator = Some(remaining);
break;
}
}
}
}
if let Some(final_chunk) = accumulator {
new_chunks.push(final_chunk);
}
self.chunks = new_chunks;
self.null_counts = None;
Ok(())
}
pub fn rechunk_to(
&mut self,
up_to_index: usize,
strategy: RechunkStrategy,
) -> Result<(), MinarrowError> {
let total_len = self.len();
if up_to_index > total_len {
return Err(MinarrowError::IndexError(format!(
"rechunk_to index {} out of bounds for array of length {}",
up_to_index, total_len
)));
}
if up_to_index == 0 || self.chunks.is_empty() {
return Ok(());
}
if up_to_index == total_len {
return self.rechunk(strategy);
}
let field = self.field.clone().unwrap_or_else(|| {
Arc::new(Field::new(
"data",
self.chunks[0].arrow_type(),
self.chunks[0].is_nullable(),
None,
))
});
let mut current_offset = 0;
let mut split_point = 0;
for (i, chunk) in self.chunks.iter().enumerate() {
let chunk_end = current_offset + chunk.len();
if chunk_end > up_to_index {
split_point = i;
break;
}
current_offset = chunk_end;
}
let mut to_rechunk: Vec<Array> = self.chunks.drain(..=split_point).collect();
let keep_chunks: Vec<Array> = self.chunks.drain(..).collect();
if current_offset < up_to_index {
let split_chunk = to_rechunk.pop().unwrap();
let split_at = up_to_index - current_offset;
let split_result = split_chunk.split(split_at, &field)?;
let parts = split_result.chunks;
to_rechunk.push(parts[0].clone());
self.chunks.push(parts[1].clone());
}
self.chunks.extend(keep_chunks);
let mut temp = SuperArray::from_arrays(to_rechunk);
temp.field = self.field.clone();
temp.rechunk(strategy)?;
let mut result = temp.chunks;
result.extend(self.chunks.drain(..));
self.chunks = result;
self.null_counts = None;
Ok(())
}
#[inline]
pub fn into_chunks(self) -> Vec<Array> {
self.chunks
}
#[inline]
pub fn chunks(&self) -> &[Array] {
&self.chunks
}
}
impl Default for SuperArray {
fn default() -> Self {
Self::new()
}
}
impl From<Vec<FieldArray>> for SuperArray {
fn from(arrays: Vec<FieldArray>) -> Self {
SuperArray::from_field_array_chunks(arrays)
}
}
impl FromIterator<FieldArray> for SuperArray {
fn from_iter<T: IntoIterator<Item = FieldArray>>(iter: T) -> Self {
let chunks: Vec<FieldArray> = iter.into_iter().collect();
Self::from_field_array_chunks(chunks)
}
}
impl From<FieldArray> for SuperArray {
fn from(fa: FieldArray) -> Self {
SuperArray {
chunks: vec![fa.array],
field: Some(fa.field),
null_counts: Some(vec![fa.null_count]),
}
}
}
impl From<Array> for SuperArray {
fn from(array: Array) -> Self {
SuperArray {
chunks: vec![array],
field: None,
null_counts: None,
}
}
}
impl From<Vec<Array>> for SuperArray {
fn from(arrays: Vec<Array>) -> Self {
SuperArray::from_arrays(arrays)
}
}
impl FromIterator<Array> for SuperArray {
fn from_iter<T: IntoIterator<Item = Array>>(iter: T) -> Self {
let chunks: Vec<Array> = iter.into_iter().collect();
Self::from_arrays(chunks)
}
}
impl Shape for SuperArray {
fn shape(&self) -> ShapeDim {
ShapeDim::Rank1(self.len())
}
}
impl Consolidate for SuperArray {
type Output = Array;
fn consolidate(self) -> Array {
#[cfg(feature = "arena")]
{
self.consolidate_arena()
}
#[cfg(not(feature = "arena"))]
{
self.consolidate_concat()
}
}
}
impl SuperArray {
#[cfg_attr(feature = "arena", allow(dead_code))]
fn consolidate_concat(self) -> Array {
assert!(
!self.chunks.is_empty(),
"consolidate() called on empty SuperArray"
);
self.chunks
.into_iter()
.reduce(|acc, arr| acc.concat(arr).expect("Failed to concatenate arrays"))
.expect("Expected at least one array")
}
#[cfg(feature = "arena")]
fn consolidate_arena(self) -> Array {
assert!(
!self.chunks.is_empty(),
"consolidate() called on empty SuperArray"
);
if self.chunks.len() == 1 {
return self.chunks.into_iter().next().unwrap();
}
let dtype = self.chunks[0].arrow_type();
let refs: Vec<&Array> = self.chunks.iter().collect();
crate::structs::arena::consolidate_array_arena(&refs, &dtype)
}
}
impl Concatenate for SuperArray {
fn concat(self, other: Self) -> Result<Self, MinarrowError> {
if self.chunks.is_empty() && other.chunks.is_empty() {
return Ok(SuperArray::new());
}
if self.chunks.is_empty() {
return Ok(other);
}
if other.chunks.is_empty() {
return Ok(self);
}
let self_type = self.chunks[0].arrow_type();
let other_type = other.chunks[0].arrow_type();
if self_type != other_type {
return Err(MinarrowError::IncompatibleTypeError {
from: "SuperArray",
to: "SuperArray",
message: Some(format!(
"Type mismatch: {:?} vs {:?}",
self_type, other_type
)),
});
}
let mut result_chunks = self.chunks;
result_chunks.extend(other.chunks);
let null_counts = match (self.null_counts, other.null_counts) {
(Some(mut self_nc), Some(other_nc)) => {
self_nc.extend(other_nc);
Some(self_nc)
}
_ => None,
};
Ok(SuperArray {
chunks: result_chunks,
field: self.field.or(other.field),
null_counts,
})
}
}
impl Display for SuperArray {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
let name = self
.field
.as_ref()
.map(|f| f.name.as_str())
.unwrap_or("<unnamed>");
let dtype = if let Some(chunk) = self.chunks.first() {
format!("{:?}", chunk.arrow_type())
} else if let Some(ref field) = self.field {
format!("{:?}", field.dtype)
} else {
"<empty>".to_string()
};
writeln!(
f,
"SuperArray \"{}\" [{} rows, {} chunks] (dtype: {})",
name,
self.len(),
self.n_chunks(),
dtype
)?;
for (i, chunk) in self.chunks.iter().enumerate() {
let null_count = self
.null_counts
.as_ref()
.and_then(|nc| nc.get(i).copied())
.map(|n| n.to_string())
.unwrap_or_else(|| "?".to_string());
writeln!(
f,
" ├─ Chunk {i}: {} rows, nulls: {}",
chunk.len(),
null_count
)?;
let indent = " │ ";
for line in format!("{}", chunk).lines() {
writeln!(f, "{indent}{line}")?;
}
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[cfg(feature = "views")]
use crate::NumericArray;
use crate::ffi::arrow_dtype::ArrowType;
use crate::{Array, Field, Vec64, fa_i32};
#[allow(dead_code)]
fn field(name: &str, dtype: ArrowType, nullable: bool) -> Field {
Field {
name: name.to_string(),
dtype,
nullable,
metadata: Default::default(),
}
}
fn int_array(data: &[i32]) -> Array {
Array::from_int32(crate::IntegerArray::<i32> {
data: Vec64::from_slice(data).into(),
null_mask: None,
})
}
#[test]
fn test_new_and_push_array() {
let mut ca = SuperArray::new();
assert_eq!(ca.n_chunks(), 0);
ca.push(int_array(&[1, 2, 3]));
assert_eq!(ca.n_chunks(), 1);
assert_eq!(ca.len(), 3);
ca.push(int_array(&[4, 5]));
assert_eq!(ca.n_chunks(), 2);
assert_eq!(ca.len(), 5);
}
#[test]
fn test_new_and_push_field_array() {
let mut ca = SuperArray::new();
assert_eq!(ca.n_chunks(), 0);
ca.push_field_array(fa_i32!("a", 1, 2, 3));
assert_eq!(ca.n_chunks(), 1);
assert_eq!(ca.len(), 3);
assert!(ca.field().is_some());
ca.push_field_array(fa_i32!("a", 4, 5));
assert_eq!(ca.n_chunks(), 2);
assert_eq!(ca.len(), 5);
}
#[test]
#[should_panic(expected = "Chunk ArrowType mismatch")]
fn test_type_mismatch() {
let mut ca = SuperArray::new();
ca.push(int_array(&[1, 2, 3]));
let wrong = Array::from_float64(crate::FloatArray::<f64> {
data: Vec64::from_slice(&[1.0, 2.0]).into(),
null_mask: None,
});
ca.push(wrong);
}
#[test]
#[should_panic(expected = "Chunk field name mismatch")]
fn test_name_mismatch() {
let mut ca = SuperArray::new();
ca.push_field_array(fa_i32!("a", 1, 2, 3));
ca.push_field_array(fa_i32!("b", 4, 5)); }
#[test]
fn test_from_field_array_chunks() {
let c1 = fa_i32!("a", 1, 2, 3);
let c2 = fa_i32!("a", 4);
let ca = SuperArray::from_field_array_chunks(vec![c1.clone(), c2.clone()]);
assert_eq!(ca.n_chunks(), 2);
assert_eq!(ca.len(), 4);
assert_eq!(ca.field().unwrap().name, "a");
}
#[test]
fn test_from_arrays() {
let a1 = int_array(&[1, 2, 3]);
let a2 = int_array(&[4, 5]);
let ca = SuperArray::from_arrays(vec![a1, a2]);
assert_eq!(ca.n_chunks(), 2);
assert_eq!(ca.len(), 5);
assert!(ca.field().is_none());
}
#[test]
#[should_panic(expected = "from_field_array_chunks: input chunks cannot be empty")]
fn test_from_field_array_chunks_empty() {
let _ = SuperArray::from_field_array_chunks(Vec::new());
}
#[cfg(feature = "views")]
#[test]
fn test_slice_and_materialise() {
use crate::NumericArray;
let c1 = fa_i32!("a", 10, 20, 30);
let c2 = fa_i32!("a", 40, 50);
let ca = SuperArray::from_field_array_chunks(vec![c1.clone(), c2.clone()]);
let sl = ca.slice(2, 3);
assert_eq!(sl.len, 3);
let arr = sl.consolidate();
if let Array::NumericArray(NumericArray::Int32(ia)) = arr {
assert_eq!(&*ia.data, &[30, 40, 50]);
} else {
panic!("Expected Int32");
}
}
#[cfg(feature = "views")]
#[test]
fn test_from_slices() {
let c1 = fa_i32!("a", 10, 20, 30);
let c2 = fa_i32!("a", 40, 50);
let ca = SuperArray::from_field_array_chunks(vec![c1.clone(), c2.clone()]);
let sl = ca.slice(1, 4);
let slices = &sl.slices;
let field = c1.field.clone();
let ca2 = SuperArray::from_slices(slices, field);
assert_eq!(ca2.n_chunks(), 2);
assert_eq!(ca2.len(), 4);
}
#[test]
fn test_is_empty_and_default() {
let ca = SuperArray::default();
assert!(ca.is_empty());
let ca2 = SuperArray::from_chunks(vec![fa_i32!("a", 1)]);
assert!(!ca2.is_empty());
}
#[test]
fn test_metadata_accessors() {
let ca = SuperArray::from_chunks(vec![fa_i32!("z", 1, 2, 3, 4)]);
assert_eq!(ca.arrow_type(), ArrowType::Int32);
assert!(!ca.is_nullable());
assert_eq!(ca.field().unwrap().name, "z");
assert_eq!(ca.chunks().len(), 1);
}
#[cfg(feature = "views")]
#[test]
fn test_insert_rows_into_first_chunk() {
let mut ca = SuperArray::from_chunks(vec![fa_i32!("a", 1, 2, 3), fa_i32!("a", 4, 5)]);
let other = SuperArray::from_arrays(vec![int_array(&[99, 88])]);
ca.insert_rows(1, other).unwrap();
assert_eq!(ca.len(), 7);
assert_eq!(ca.n_chunks(), 4);
let result = ca.consolidate();
if let Array::NumericArray(NumericArray::Int32(ia)) = result {
assert_eq!(&*ia.data, &[1, 99, 88, 2, 3, 4, 5]);
} else {
panic!("Expected Int32");
}
}
#[cfg(feature = "views")]
#[test]
fn test_insert_rows_into_second_chunk() {
let mut ca = SuperArray::from_chunks(vec![fa_i32!("a", 1, 2), fa_i32!("a", 3, 4, 5)]);
let other = SuperArray::from_arrays(vec![int_array(&[99])]);
ca.insert_rows(3, other).unwrap();
assert_eq!(ca.len(), 6);
assert_eq!(ca.n_chunks(), 4);
let result = ca.consolidate();
if let Array::NumericArray(NumericArray::Int32(ia)) = result {
assert_eq!(&*ia.data, &[1, 2, 3, 99, 4, 5]);
} else {
panic!("Expected Int32");
}
}
#[cfg(feature = "views")]
#[test]
fn test_insert_rows_prepend() {
let mut ca = SuperArray::from_chunks(vec![fa_i32!("a", 1, 2, 3)]);
let other = SuperArray::from_arrays(vec![int_array(&[99])]);
ca.insert_rows(0, other).unwrap();
assert_eq!(ca.len(), 4);
let result = ca.consolidate();
if let Array::NumericArray(NumericArray::Int32(ia)) = result {
assert_eq!(&*ia.data, &[99, 1, 2, 3]);
} else {
panic!("Expected Int32");
}
}
#[cfg(feature = "views")]
#[test]
fn test_insert_rows_append() {
let mut ca = SuperArray::from_chunks(vec![fa_i32!("a", 1, 2, 3)]);
let other = SuperArray::from_arrays(vec![int_array(&[99])]);
ca.insert_rows(3, other).unwrap();
assert_eq!(ca.len(), 4);
let result = ca.consolidate();
if let Array::NumericArray(NumericArray::Int32(ia)) = result {
assert_eq!(&*ia.data, &[1, 2, 3, 99]);
} else {
panic!("Expected Int32");
}
}
#[test]
fn test_insert_rows_type_mismatch() {
let mut ca = SuperArray::from_arrays(vec![int_array(&[1, 2, 3])]);
let wrong_type = Array::from_float64(crate::FloatArray::<f64> {
data: Vec64::from_slice(&[99.0]).into(),
null_mask: None,
});
let other = SuperArray::from_arrays(vec![wrong_type]);
let result = ca.insert_rows(0, other);
assert!(result.is_err());
}
#[test]
fn test_insert_rows_out_of_bounds() {
let mut ca = SuperArray::from_arrays(vec![int_array(&[1, 2, 3])]);
let other = SuperArray::from_arrays(vec![int_array(&[99])]);
let result = ca.insert_rows(10, other);
assert!(result.is_err());
}
#[test]
fn test_rechunk_uniform() {
let mut ca = SuperArray::from_chunks(vec![
fa_i32!("a", 1, 2, 3),
fa_i32!("a", 4, 5),
fa_i32!("a", 6, 7, 8, 9),
]);
ca.rechunk(RechunkStrategy::Count(3)).unwrap();
assert_eq!(ca.n_chunks(), 3);
assert_eq!(ca.len(), 9);
assert_eq!(ca.chunks[0].len(), 3);
assert_eq!(ca.chunks[1].len(), 3);
assert_eq!(ca.chunks[2].len(), 3);
let result = ca.consolidate();
if let Array::NumericArray(NumericArray::Int32(ia)) = result {
assert_eq!(&*ia.data, &[1, 2, 3, 4, 5, 6, 7, 8, 9]);
} else {
panic!("Expected Int32");
}
}
#[test]
fn test_rechunk_auto() {
let mut ca = SuperArray::from_chunks(vec![fa_i32!("a", 1, 2, 3), fa_i32!("a", 4, 5)]);
ca.rechunk(RechunkStrategy::Auto).unwrap();
assert_eq!(ca.n_chunks(), 1);
assert_eq!(ca.len(), 5);
}
#[test]
#[cfg(feature = "size")]
fn test_rechunk_by_memory() {
let mut ca = SuperArray::from_chunks(vec![
fa_i32!("a", 1, 2, 3, 4, 5, 6, 7, 8),
fa_i32!("a", 9, 10, 11, 12),
]);
ca.rechunk(RechunkStrategy::Memory(16)).unwrap();
assert_eq!(ca.len(), 12);
assert!(ca.n_chunks() >= 3);
let result = ca.consolidate();
if let Array::NumericArray(NumericArray::Int32(ia)) = result {
assert_eq!(&*ia.data, &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]);
} else {
panic!("Expected Int32");
}
}
#[test]
fn test_rechunk_uniform_zero_error() {
let mut ca = SuperArray::from_chunks(vec![fa_i32!("a", 1, 2, 3)]);
let result = ca.rechunk(RechunkStrategy::Count(0));
assert!(result.is_err());
}
#[test]
fn test_rechunk_empty_array() {
let mut ca = SuperArray::new();
ca.rechunk(RechunkStrategy::Auto).unwrap();
assert_eq!(ca.n_chunks(), 0);
}
#[test]
fn test_rechunk_preserves_data_order() {
let mut ca = SuperArray::from_chunks(vec![
fa_i32!("a", 10, 20),
fa_i32!("a", 30),
fa_i32!("a", 40, 50, 60),
]);
ca.rechunk(RechunkStrategy::Count(2)).unwrap();
let result = ca.consolidate();
if let Array::NumericArray(NumericArray::Int32(ia)) = result {
assert_eq!(&*ia.data, &[10, 20, 30, 40, 50, 60]);
} else {
panic!("Expected Int32");
}
}
}