use std::{iter::FromIterator, sync::Arc};
use crate::{
array::{specification::check_offsets, Array, MutableArray, Offset, TryExtend, TryPush},
bitmap::MutableBitmap,
datatypes::DataType,
error::{ArrowError, Result},
trusted_len::TrustedLen,
};
use super::BinaryArray;
use crate::array::physical_binary::*;
#[derive(Debug)]
pub struct MutableBinaryArray<O: Offset> {
data_type: DataType,
offsets: Vec<O>,
values: Vec<u8>,
validity: Option<MutableBitmap>,
}
impl<O: Offset> From<MutableBinaryArray<O>> for BinaryArray<O> {
fn from(other: MutableBinaryArray<O>) -> Self {
BinaryArray::<O>::new(
other.data_type,
other.offsets.into(),
other.values.into(),
other.validity.map(|x| x.into()),
)
}
}
impl<O: Offset> Default for MutableBinaryArray<O> {
fn default() -> Self {
Self::new()
}
}
impl<O: Offset> MutableBinaryArray<O> {
pub fn new() -> Self {
Self::with_capacity(0)
}
pub fn from_data(
data_type: DataType,
offsets: Vec<O>,
values: Vec<u8>,
validity: Option<MutableBitmap>,
) -> Self {
check_offsets(&offsets, values.len());
if let Some(ref validity) = validity {
assert_eq!(offsets.len() - 1, validity.len());
}
if data_type.to_physical_type() != Self::default_data_type().to_physical_type() {
panic!("MutableBinaryArray can only be initialized with DataType::Binary or DataType::LargeBinary")
}
Self {
data_type,
offsets,
values,
validity,
}
}
fn default_data_type() -> DataType {
BinaryArray::<O>::default_data_type()
}
pub fn with_capacity(capacity: usize) -> Self {
let mut offsets = Vec::<O>::with_capacity(capacity + 1);
offsets.push(O::default());
Self {
data_type: BinaryArray::<O>::default_data_type(),
offsets,
values: Vec::<u8>::new(),
validity: None,
}
}
pub fn with_capacities(capacity: usize, values: usize) -> Self {
let mut offsets = Vec::<O>::with_capacity(capacity + 1);
offsets.push(O::default());
Self {
data_type: Self::default_data_type(),
offsets,
values: Vec::<u8>::with_capacity(values),
validity: None,
}
}
pub fn reserve(&mut self, additional: usize) {
self.offsets.reserve(additional);
if let Some(x) = self.validity.as_mut() {
x.reserve(additional)
}
}
#[inline]
fn last_offset(&self) -> O {
*self.offsets.last().unwrap()
}
pub fn push<T: AsRef<[u8]>>(&mut self, value: Option<T>) {
self.try_push(value).unwrap()
}
fn try_from_iter<P: AsRef<[u8]>, I: IntoIterator<Item = Option<P>>>(iter: I) -> Result<Self> {
let iterator = iter.into_iter();
let (lower, _) = iterator.size_hint();
let mut primitive = Self::with_capacity(lower);
for item in iterator {
primitive.try_push(item.as_ref())?
}
Ok(primitive)
}
fn init_validity(&mut self) {
let mut validity = MutableBitmap::with_capacity(self.offsets.capacity());
validity.extend_constant(self.len(), true);
validity.set(self.len() - 1, false);
self.validity = Some(validity)
}
pub fn into_arc(self) -> Arc<dyn Array> {
let a: BinaryArray<O> = self.into();
Arc::new(a)
}
pub fn shrink_to_fit(&mut self) {
self.values.shrink_to_fit();
self.offsets.shrink_to_fit();
if let Some(validity) = &mut self.validity {
validity.shrink_to_fit()
}
}
}
impl<O: Offset> MutableBinaryArray<O> {
pub fn values(&self) -> &Vec<u8> {
&self.values
}
pub fn offsets(&self) -> &Vec<O> {
&self.offsets
}
}
impl<O: Offset> MutableArray for MutableBinaryArray<O> {
fn len(&self) -> usize {
self.offsets.len() - 1
}
fn validity(&self) -> Option<&MutableBitmap> {
self.validity.as_ref()
}
fn as_box(&mut self) -> Box<dyn Array> {
Box::new(BinaryArray::new(
self.data_type.clone(),
std::mem::take(&mut self.offsets).into(),
std::mem::take(&mut self.values).into(),
std::mem::take(&mut self.validity).map(|x| x.into()),
))
}
fn as_arc(&mut self) -> Arc<dyn Array> {
Arc::new(BinaryArray::new(
self.data_type.clone(),
std::mem::take(&mut self.offsets).into(),
std::mem::take(&mut self.values).into(),
std::mem::take(&mut self.validity).map(|x| x.into()),
))
}
fn data_type(&self) -> &DataType {
&self.data_type
}
fn as_any(&self) -> &dyn std::any::Any {
self
}
fn as_mut_any(&mut self) -> &mut dyn std::any::Any {
self
}
#[inline]
fn push_null(&mut self) {
self.push::<&[u8]>(None)
}
fn shrink_to_fit(&mut self) {
self.shrink_to_fit()
}
}
impl<O: Offset, P: AsRef<[u8]>> FromIterator<Option<P>> for MutableBinaryArray<O> {
fn from_iter<I: IntoIterator<Item = Option<P>>>(iter: I) -> Self {
Self::try_from_iter(iter).unwrap()
}
}
impl<O: Offset> MutableBinaryArray<O> {
#[inline]
pub unsafe fn from_trusted_len_iter_unchecked<I, P>(iterator: I) -> Self
where
P: AsRef<[u8]>,
I: Iterator<Item = Option<P>>,
{
let (validity, offsets, values) = trusted_len_unzip(iterator);
Self::from_data(Self::default_data_type(), offsets, values, validity)
}
#[inline]
pub fn from_trusted_len_iter<I, P>(iterator: I) -> Self
where
P: AsRef<[u8]>,
I: TrustedLen<Item = Option<P>>,
{
unsafe { Self::from_trusted_len_iter_unchecked(iterator) }
}
#[inline]
pub unsafe fn from_trusted_len_values_iter_unchecked<T: AsRef<[u8]>, I: Iterator<Item = T>>(
iterator: I,
) -> Self {
let (offsets, values) = unsafe { trusted_len_values_iter(iterator) };
Self::from_data(Self::default_data_type(), offsets, values, None)
}
#[inline]
pub fn from_trusted_len_values_iter<T: AsRef<[u8]>, I: TrustedLen<Item = T>>(
iterator: I,
) -> Self {
unsafe { Self::from_trusted_len_values_iter_unchecked(iterator) }
}
#[inline]
pub unsafe fn try_from_trusted_len_iter_unchecked<E, I, P>(
iterator: I,
) -> std::result::Result<Self, E>
where
P: AsRef<[u8]>,
I: IntoIterator<Item = std::result::Result<Option<P>, E>>,
{
let iterator = iterator.into_iter();
let (mut validity, offsets, values) = try_trusted_len_unzip(iterator)?;
if validity.as_mut().unwrap().null_count() == 0 {
validity = None;
}
Ok(Self::from_data(
Self::default_data_type(),
offsets,
values,
validity,
))
}
#[inline]
pub fn try_from_trusted_len_iter<E, I, P>(iterator: I) -> std::result::Result<Self, E>
where
P: AsRef<[u8]>,
I: TrustedLen<Item = std::result::Result<Option<P>, E>>,
{
unsafe { Self::try_from_trusted_len_iter_unchecked(iterator) }
}
#[inline]
pub fn extend_trusted_len_values<I, P>(&mut self, iterator: I)
where
P: AsRef<[u8]>,
I: TrustedLen<Item = P>,
{
unsafe { self.extend_trusted_len_values_unchecked(iterator) }
}
#[inline]
pub unsafe fn extend_trusted_len_values_unchecked<I, P>(&mut self, iterator: I)
where
P: AsRef<[u8]>,
I: Iterator<Item = P>,
{
let (_, upper) = iterator.size_hint();
let additional = upper.expect("extend_trusted_len_values requires an upper limit");
extend_from_trusted_len_values_iter(&mut self.offsets, &mut self.values, iterator);
if let Some(validity) = self.validity.as_mut() {
validity.extend_constant(additional, true);
}
}
#[inline]
pub fn extend_trusted_len<I, P>(&mut self, iterator: I)
where
P: AsRef<[u8]>,
I: TrustedLen<Item = Option<P>>,
{
unsafe { self.extend_trusted_len_unchecked(iterator) }
}
#[inline]
pub unsafe fn extend_trusted_len_unchecked<I, P>(&mut self, iterator: I)
where
P: AsRef<[u8]>,
I: Iterator<Item = Option<P>>,
{
if self.validity.is_none() {
let mut validity = MutableBitmap::new();
validity.extend_constant(self.len(), true);
self.validity = Some(validity);
}
extend_from_trusted_len_iter(
&mut self.offsets,
&mut self.values,
self.validity.as_mut().unwrap(),
iterator,
);
if self.validity.as_mut().unwrap().null_count() == 0 {
self.validity = None;
}
}
pub fn from_iter_values<T: AsRef<[u8]>, I: Iterator<Item = T>>(iterator: I) -> Self {
let (offsets, values) = values_iter(iterator);
Self::from_data(Self::default_data_type(), offsets, values, None)
}
}
impl<O: Offset, T: AsRef<[u8]>> Extend<Option<T>> for MutableBinaryArray<O> {
fn extend<I: IntoIterator<Item = Option<T>>>(&mut self, iter: I) {
self.try_extend(iter).unwrap();
}
}
impl<O: Offset, T: AsRef<[u8]>> TryExtend<Option<T>> for MutableBinaryArray<O> {
fn try_extend<I: IntoIterator<Item = Option<T>>>(&mut self, iter: I) -> Result<()> {
let mut iter = iter.into_iter();
self.reserve(iter.size_hint().0);
iter.try_for_each(|x| self.try_push(x))
}
}
impl<O: Offset, T: AsRef<[u8]>> TryPush<Option<T>> for MutableBinaryArray<O> {
fn try_push(&mut self, value: Option<T>) -> Result<()> {
match value {
Some(value) => {
let bytes = value.as_ref();
let size =
O::from_usize(self.values.len() + bytes.len()).ok_or(ArrowError::Overflow)?;
self.values.extend_from_slice(bytes);
self.offsets.push(size);
match &mut self.validity {
Some(validity) => validity.push(true),
None => {}
}
}
None => {
self.offsets.push(self.last_offset());
match &mut self.validity {
Some(validity) => validity.push(false),
None => self.init_validity(),
}
}
}
Ok(())
}
}