use crate::{
bitmap::{
utils::{BitmapIter, ZipValidity},
Bitmap,
},
buffer::Buffer,
datatypes::DataType,
error::Error,
offset::{Offset, Offsets, OffsetsBuffer},
trusted_len::TrustedLen,
};
use either::Either;
use super::{specification::try_check_offsets_bounds, Array, GenericBinaryArray};
mod ffi;
pub(super) mod fmt;
mod iterator;
pub use iterator::*;
mod from;
mod mutable_values;
pub use mutable_values::*;
mod mutable;
pub use mutable::*;
#[derive(Clone)]
pub struct BinaryArray<O: Offset> {
data_type: DataType,
offsets: OffsetsBuffer<O>,
values: Buffer<u8>,
validity: Option<Bitmap>,
}
impl<O: Offset> BinaryArray<O> {
pub fn try_new(
data_type: DataType,
offsets: OffsetsBuffer<O>,
values: Buffer<u8>,
validity: Option<Bitmap>,
) -> Result<Self, Error> {
try_check_offsets_bounds(&offsets, values.len())?;
if validity
.as_ref()
.map_or(false, |validity| validity.len() != offsets.len())
{
return Err(Error::oos(
"validity mask length must match the number of values",
));
}
if data_type.to_physical_type() != Self::default_data_type().to_physical_type() {
return Err(Error::oos(
"BinaryArray can only be initialized with DataType::Binary or DataType::LargeBinary",
));
}
Ok(Self {
data_type,
offsets,
values,
validity,
})
}
pub fn from_slice<T: AsRef<[u8]>, P: AsRef<[T]>>(slice: P) -> Self {
Self::from_trusted_len_values_iter(slice.as_ref().iter())
}
pub fn from<T: AsRef<[u8]>, P: AsRef<[Option<T>]>>(slice: P) -> Self {
MutableBinaryArray::<O>::from(slice).into()
}
pub fn iter(&self) -> ZipValidity<&[u8], BinaryValueIter<O>, BitmapIter> {
ZipValidity::new_with_validity(self.values_iter(), self.validity.as_ref())
}
pub fn values_iter(&self) -> BinaryValueIter<O> {
BinaryValueIter::new(self)
}
#[inline]
pub fn len(&self) -> usize {
self.offsets.len()
}
#[inline]
pub fn value(&self, i: usize) -> &[u8] {
assert!(i < self.len());
unsafe { self.value_unchecked(i) }
}
#[inline]
pub unsafe fn value_unchecked(&self, i: usize) -> &[u8] {
let (start, end) = self.offsets.start_end_unchecked(i);
self.values.get_unchecked(start..end)
}
#[inline]
pub fn data_type(&self) -> &DataType {
&self.data_type
}
#[inline]
pub fn values(&self) -> &Buffer<u8> {
&self.values
}
#[inline]
pub fn offsets(&self) -> &OffsetsBuffer<O> {
&self.offsets
}
#[inline]
pub fn validity(&self) -> Option<&Bitmap> {
self.validity.as_ref()
}
#[must_use]
pub fn slice(&self, offset: usize, length: usize) -> Self {
assert!(
offset + length <= self.len(),
"the offset of the new Buffer cannot exceed the existing length"
);
unsafe { self.slice_unchecked(offset, length) }
}
#[must_use]
pub unsafe fn slice_unchecked(&self, offset: usize, length: usize) -> Self {
let validity = self
.validity
.clone()
.map(|bitmap| bitmap.slice_unchecked(offset, length))
.and_then(|bitmap| (bitmap.unset_bits() > 0).then(|| bitmap));
let offsets = self.offsets.clone().slice_unchecked(offset, length + 1);
Self {
data_type: self.data_type.clone(),
offsets,
values: self.values.clone(),
validity,
}
}
pub fn boxed(self) -> Box<dyn Array> {
Box::new(self)
}
pub fn arced(self) -> std::sync::Arc<dyn Array> {
std::sync::Arc::new(self)
}
#[must_use]
pub fn with_validity(mut self, validity: Option<Bitmap>) -> Self {
self.set_validity(validity);
self
}
pub fn set_validity(&mut self, validity: Option<Bitmap>) {
if matches!(&validity, Some(bitmap) if bitmap.len() != self.len()) {
panic!("validity must be equal to the array's length")
}
self.validity = validity;
}
pub fn into_mut(mut self) -> Either<Self, MutableBinaryArray<O>> {
use Either::*;
if let Some(bitmap) = self.validity {
match bitmap.into_mut() {
Left(bitmap) => Left(BinaryArray::new(
self.data_type,
self.offsets,
self.values,
Some(bitmap),
)),
Right(mutable_bitmap) => match (
self.values.get_mut().map(std::mem::take),
self.offsets.get_mut(),
) {
(None, None) => Left(BinaryArray::new(
self.data_type,
self.offsets,
self.values,
Some(mutable_bitmap.into()),
)),
(None, Some(offsets)) => Left(BinaryArray::new(
self.data_type,
offsets.into(),
self.values,
Some(mutable_bitmap.into()),
)),
(Some(mutable_values), None) => Left(BinaryArray::new(
self.data_type,
self.offsets,
mutable_values.into(),
Some(mutable_bitmap.into()),
)),
(Some(values), Some(offsets)) => Right(
MutableBinaryArray::try_new(
self.data_type,
offsets,
values,
Some(mutable_bitmap),
)
.unwrap(),
),
},
}
} else {
match (
self.values.get_mut().map(std::mem::take),
self.offsets.get_mut(),
) {
(None, None) => Left(BinaryArray::new(
self.data_type,
self.offsets,
self.values,
None,
)),
(None, Some(offsets)) => Left(BinaryArray::new(
self.data_type,
offsets.into(),
self.values,
None,
)),
(Some(values), None) => Left(BinaryArray::new(
self.data_type,
self.offsets,
values.into(),
None,
)),
(Some(values), Some(offsets)) => Right(
MutableBinaryArray::try_new(self.data_type, offsets, values, None).unwrap(),
),
}
}
}
pub fn new_empty(data_type: DataType) -> Self {
Self::new(data_type, OffsetsBuffer::new(), Buffer::new(), None)
}
#[inline]
pub fn new_null(data_type: DataType, length: usize) -> Self {
Self::new(
data_type,
Offsets::new_zeroed(length).into(),
Buffer::new(),
Some(Bitmap::new_zeroed(length)),
)
}
pub fn default_data_type() -> DataType {
if O::IS_LARGE {
DataType::LargeBinary
} else {
DataType::Binary
}
}
pub fn new(
data_type: DataType,
offsets: OffsetsBuffer<O>,
values: Buffer<u8>,
validity: Option<Bitmap>,
) -> Self {
Self::try_new(data_type, offsets, values, validity).unwrap()
}
#[inline]
pub fn from_trusted_len_values_iter<T: AsRef<[u8]>, I: TrustedLen<Item = T>>(
iterator: I,
) -> Self {
MutableBinaryArray::<O>::from_trusted_len_values_iter(iterator).into()
}
pub fn from_iter_values<T: AsRef<[u8]>, I: Iterator<Item = T>>(iterator: I) -> Self {
MutableBinaryArray::<O>::from_iter_values(iterator).into()
}
#[inline]
pub unsafe fn from_trusted_len_iter_unchecked<I, P>(iterator: I) -> Self
where
P: AsRef<[u8]>,
I: Iterator<Item = Option<P>>,
{
MutableBinaryArray::<O>::from_trusted_len_iter_unchecked(iterator).into()
}
#[inline]
pub fn from_trusted_len_iter<I, P>(iterator: I) -> Self
where
P: AsRef<[u8]>,
I: TrustedLen<Item = Option<P>>,
{
unsafe { Self::from_trusted_len_iter_unchecked(iterator) }
}
#[inline]
pub unsafe fn try_from_trusted_len_iter_unchecked<E, I, P>(iterator: I) -> Result<Self, E>
where
P: AsRef<[u8]>,
I: IntoIterator<Item = Result<Option<P>, E>>,
{
MutableBinaryArray::<O>::try_from_trusted_len_iter_unchecked(iterator).map(|x| x.into())
}
#[inline]
pub fn try_from_trusted_len_iter<E, I, P>(iter: I) -> Result<Self, E>
where
P: AsRef<[u8]>,
I: TrustedLen<Item = Result<Option<P>, E>>,
{
unsafe { Self::try_from_trusted_len_iter_unchecked(iter) }
}
}
impl<O: Offset> Array for BinaryArray<O> {
#[inline]
fn as_any(&self) -> &dyn std::any::Any {
self
}
#[inline]
fn as_any_mut(&mut self) -> &mut dyn std::any::Any {
self
}
#[inline]
fn len(&self) -> usize {
self.len()
}
#[inline]
fn data_type(&self) -> &DataType {
&self.data_type
}
fn validity(&self) -> Option<&Bitmap> {
self.validity.as_ref()
}
fn slice(&self, offset: usize, length: usize) -> Box<dyn Array> {
Box::new(self.slice(offset, length))
}
unsafe fn slice_unchecked(&self, offset: usize, length: usize) -> Box<dyn Array> {
Box::new(self.slice_unchecked(offset, length))
}
fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array> {
Box::new(self.clone().with_validity(validity))
}
fn to_boxed(&self) -> Box<dyn Array> {
Box::new(self.clone())
}
}
unsafe impl<O: Offset> GenericBinaryArray<O> for BinaryArray<O> {
#[inline]
fn values(&self) -> &[u8] {
self.values()
}
#[inline]
fn offsets(&self) -> &[O] {
self.offsets().buffer()
}
}