use crate::{
bitmap::Bitmap,
buffer::Buffer,
datatypes::DataType,
error::{ArrowError, Result},
};
use super::{
specification::{try_check_offsets, try_check_offsets_bounds},
Array, GenericBinaryArray, Offset,
};
mod ffi;
pub(super) mod fmt;
mod iterator;
pub use iterator::*;
mod from;
mod mutable;
pub use mutable::*;
#[derive(Clone)]
pub struct BinaryArray<O: Offset> {
data_type: DataType,
offsets: Buffer<O>,
values: Buffer<u8>,
validity: Option<Bitmap>,
}
impl<O: Offset> BinaryArray<O> {
pub fn try_new(
data_type: DataType,
offsets: Buffer<O>,
values: Buffer<u8>,
validity: Option<Bitmap>,
) -> Result<Self> {
try_check_offsets(&offsets, values.len())?;
if validity
.as_ref()
.map_or(false, |validity| validity.len() != offsets.len() - 1)
{
return Err(ArrowError::oos(
"validity mask length must match the number of values",
));
}
if data_type.to_physical_type() != Self::default_data_type().to_physical_type() {
return Err(ArrowError::oos(
"BinaryArray can only be initialized with DataType::Binary or DataType::LargeBinary",
));
}
Ok(Self {
data_type,
offsets,
values,
validity,
})
}
pub fn new(
data_type: DataType,
offsets: Buffer<O>,
values: Buffer<u8>,
validity: Option<Bitmap>,
) -> Self {
Self::try_new(data_type, offsets, values, validity).unwrap()
}
pub fn from_data(
data_type: DataType,
offsets: Buffer<O>,
values: Buffer<u8>,
validity: Option<Bitmap>,
) -> Self {
Self::new(data_type, offsets, values, validity)
}
pub fn new_empty(data_type: DataType) -> Self {
Self::new(
data_type,
Buffer::from(vec![O::zero()]),
Buffer::new(),
None,
)
}
#[inline]
pub fn new_null(data_type: DataType, length: usize) -> Self {
Self::new(
data_type,
Buffer::new_zeroed(length + 1),
Buffer::new(),
Some(Bitmap::new_zeroed(length)),
)
}
pub fn default_data_type() -> DataType {
if O::is_large() {
DataType::LargeBinary
} else {
DataType::Binary
}
}
}
impl<O: Offset> BinaryArray<O> {
pub unsafe fn try_new_unchecked(
data_type: DataType,
offsets: Buffer<O>,
values: Buffer<u8>,
validity: Option<Bitmap>,
) -> Result<Self> {
try_check_offsets_bounds(&offsets, values.len())?;
if validity
.as_ref()
.map_or(false, |validity| validity.len() != offsets.len() - 1)
{
return Err(ArrowError::oos(
"validity mask length must match the number of values",
));
}
if data_type.to_physical_type() != Self::default_data_type().to_physical_type() {
return Err(ArrowError::oos(
"BinaryArray can only be initialized with DataType::Binary or DataType::LargeBinary",
));
}
Ok(Self {
data_type,
offsets,
values,
validity,
})
}
pub unsafe fn new_unchecked(
data_type: DataType,
offsets: Buffer<O>,
values: Buffer<u8>,
validity: Option<Bitmap>,
) -> Self {
Self::try_new_unchecked(data_type, offsets, values, validity).unwrap()
}
pub unsafe fn from_data_unchecked(
data_type: DataType,
offsets: Buffer<O>,
values: Buffer<u8>,
validity: Option<Bitmap>,
) -> Self {
Self::new_unchecked(data_type, offsets, values, validity)
}
}
impl<O: Offset> BinaryArray<O> {
#[must_use]
pub fn slice(&self, offset: usize, length: usize) -> Self {
assert!(
offset + length <= self.len(),
"the offset of the new Buffer cannot exceed the existing length"
);
unsafe { self.slice_unchecked(offset, length) }
}
#[must_use]
pub unsafe fn slice_unchecked(&self, offset: usize, length: usize) -> Self {
let validity = self
.validity
.clone()
.map(|x| x.slice_unchecked(offset, length));
let offsets = self.offsets.clone().slice_unchecked(offset, length + 1);
Self {
data_type: self.data_type.clone(),
offsets,
values: self.values.clone(),
validity,
}
}
#[must_use]
pub fn with_validity(&self, validity: Option<Bitmap>) -> Self {
if matches!(&validity, Some(bitmap) if bitmap.len() != self.len()) {
panic!("validity's length must be equal to the array's length")
}
let mut arr = self.clone();
arr.validity = validity;
arr
}
}
impl<O: Offset> BinaryArray<O> {
#[inline]
pub fn len(&self) -> usize {
self.offsets.len() - 1
}
pub fn value(&self, i: usize) -> &[u8] {
let start = self.offsets[i].to_usize();
let end = self.offsets[i + 1].to_usize();
unsafe { self.values.get_unchecked(start..end) }
}
pub unsafe fn value_unchecked(&self, i: usize) -> &[u8] {
let start = self.offsets.get_unchecked(i).to_usize();
let end = self.offsets.get_unchecked(i + 1).to_usize();
self.values.get_unchecked(start..end)
}
#[inline]
pub fn validity(&self) -> Option<&Bitmap> {
self.validity.as_ref()
}
#[inline]
pub fn offsets(&self) -> &Buffer<O> {
&self.offsets
}
#[inline]
pub fn values(&self) -> &Buffer<u8> {
&self.values
}
}
impl<O: Offset> Array for BinaryArray<O> {
#[inline]
fn as_any(&self) -> &dyn std::any::Any {
self
}
#[inline]
fn len(&self) -> usize {
self.len()
}
#[inline]
fn data_type(&self) -> &DataType {
&self.data_type
}
fn validity(&self) -> Option<&Bitmap> {
self.validity.as_ref()
}
fn slice(&self, offset: usize, length: usize) -> Box<dyn Array> {
Box::new(self.slice(offset, length))
}
unsafe fn slice_unchecked(&self, offset: usize, length: usize) -> Box<dyn Array> {
Box::new(self.slice_unchecked(offset, length))
}
fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array> {
Box::new(self.with_validity(validity))
}
}
unsafe impl<O: Offset> GenericBinaryArray<O> for BinaryArray<O> {
#[inline]
fn values(&self) -> &[u8] {
self.values()
}
#[inline]
fn offsets(&self) -> &[O] {
self.offsets()
}
}