use std::sync::Arc;
use vortex_buffer::{Buffer, ByteBuffer};
use vortex_dtype::{DType, Nullability};
use vortex_error::{
VortexExpect, VortexResult, vortex_bail, vortex_ensure, vortex_err, vortex_panic,
};
use crate::arrays::binary_view::BinaryView;
use crate::builders::{ArrayBuilder, VarBinViewBuilder};
use crate::stats::ArrayStats;
use crate::validity::Validity;
#[derive(Clone, Debug)]
pub struct VarBinViewArray {
pub(super) dtype: DType,
pub(super) buffers: Arc<[ByteBuffer]>,
pub(super) views: Buffer<BinaryView>,
pub(super) validity: Validity,
pub(super) stats_set: ArrayStats,
}
impl VarBinViewArray {
pub fn new(
views: Buffer<BinaryView>,
buffers: Arc<[ByteBuffer]>,
dtype: DType,
validity: Validity,
) -> Self {
Self::try_new(views, buffers, dtype, validity)
.vortex_expect("VarBinViewArray construction failed")
}
pub fn try_new(
views: Buffer<BinaryView>,
buffers: Arc<[ByteBuffer]>,
dtype: DType,
validity: Validity,
) -> VortexResult<Self> {
Self::validate(&views, &buffers, &dtype, &validity)?;
Ok(unsafe { Self::new_unchecked(views, buffers, dtype, validity) })
}
pub unsafe fn new_unchecked(
views: Buffer<BinaryView>,
buffers: Arc<[ByteBuffer]>,
dtype: DType,
validity: Validity,
) -> Self {
#[cfg(debug_assertions)]
Self::validate(&views, &buffers, &dtype, &validity)
.vortex_expect("[Debug Assertion]: Invalid `VarBinViewArray` parameters");
Self {
dtype,
buffers,
views,
validity,
stats_set: Default::default(),
}
}
pub fn validate(
views: &Buffer<BinaryView>,
buffers: &Arc<[ByteBuffer]>,
dtype: &DType,
validity: &Validity,
) -> VortexResult<()> {
vortex_ensure!(
validity.nullability() == dtype.nullability(),
"validity {:?} incompatible with nullability {:?}",
validity,
dtype.nullability()
);
match dtype {
DType::Utf8(_) => Self::validate_views(views, buffers, validity, |string| {
simdutf8::basic::from_utf8(string).is_ok()
})?,
DType::Binary(_) => Self::validate_views(views, buffers, validity, |_| true)?,
_ => vortex_bail!("invalid DType {dtype} for `VarBinViewArray`"),
}
Ok(())
}
fn validate_views<F>(
views: &Buffer<BinaryView>,
buffers: &Arc<[ByteBuffer]>,
validity: &Validity,
validator: F,
) -> VortexResult<()>
where
F: Fn(&[u8]) -> bool,
{
for (idx, &view) in views.iter().enumerate() {
if validity.is_null(idx) {
continue;
}
if view.is_inlined() {
let bytes = &unsafe { view.inlined }.data[..view.len() as usize];
vortex_ensure!(
validator(bytes),
"view at index {idx}: inlined bytes failed utf-8 validation"
);
} else {
let view = view.as_view();
let buf_index = view.buffer_index as usize;
let start_offset = view.offset as usize;
let end_offset = start_offset.saturating_add(view.size as usize);
let buf = buffers.get(buf_index).ok_or_else(||
vortex_err!("view at index {idx} references invalid buffer: {buf_index} out of bounds for VarBinViewArray with {} buffers",
buffers.len()))?;
vortex_ensure!(
start_offset < buf.len(),
"start offset {start_offset} out of bounds for buffer {buf_index} with size {}",
buf.len(),
);
vortex_ensure!(
end_offset <= buf.len(),
"end offset {end_offset} out of bounds for buffer {buf_index} with size {}",
buf.len(),
);
let bytes = &buf[start_offset..end_offset];
vortex_ensure!(
view.prefix == bytes[..4],
"VarBinView prefix does not match full string"
);
vortex_ensure!(
validator(bytes),
"view at index {idx}: outlined bytes fails utf-8 validation"
);
}
}
Ok(())
}
pub fn nbuffers(&self) -> usize {
self.buffers.len()
}
#[inline]
pub fn views(&self) -> &Buffer<BinaryView> {
&self.views
}
#[inline]
pub fn bytes_at(&self, index: usize) -> ByteBuffer {
let views = self.views();
let view = &views[index];
if !view.is_inlined() {
let view_ref = view.as_view();
self.buffer(view_ref.buffer_index() as usize)
.slice(view_ref.as_range())
} else {
views
.clone()
.into_byte_buffer()
.slice_ref(view.as_inlined().value())
}
}
#[inline]
pub fn buffer(&self, idx: usize) -> &ByteBuffer {
if idx >= self.nbuffers() {
vortex_panic!(
"{idx} buffer index out of bounds, there are {} buffers",
self.nbuffers()
);
}
&self.buffers[idx]
}
#[inline]
pub fn buffers(&self) -> &Arc<[ByteBuffer]> {
&self.buffers
}
#[allow(clippy::same_name_method)]
pub fn from_iter<T: AsRef<[u8]>, I: IntoIterator<Item = Option<T>>>(
iter: I,
dtype: DType,
) -> Self {
let iter = iter.into_iter();
let mut builder = VarBinViewBuilder::with_capacity(dtype, iter.size_hint().0);
for item in iter {
match item {
None => builder.append_null(),
Some(v) => builder.append_value(v),
}
}
builder.finish_into_varbinview()
}
pub fn from_iter_str<T: AsRef<str>, I: IntoIterator<Item = T>>(iter: I) -> Self {
let iter = iter.into_iter();
let mut builder = VarBinViewBuilder::with_capacity(
DType::Utf8(Nullability::NonNullable),
iter.size_hint().0,
);
for item in iter {
builder.append_value(item.as_ref());
}
builder.finish_into_varbinview()
}
pub fn from_iter_nullable_str<T: AsRef<str>, I: IntoIterator<Item = Option<T>>>(
iter: I,
) -> Self {
let iter = iter.into_iter();
let mut builder = VarBinViewBuilder::with_capacity(
DType::Utf8(Nullability::Nullable),
iter.size_hint().0,
);
for item in iter {
match item {
None => builder.append_null(),
Some(v) => builder.append_value(v.as_ref()),
}
}
builder.finish_into_varbinview()
}
pub fn from_iter_bin<T: AsRef<[u8]>, I: IntoIterator<Item = T>>(iter: I) -> Self {
let iter = iter.into_iter();
let mut builder = VarBinViewBuilder::with_capacity(
DType::Binary(Nullability::NonNullable),
iter.size_hint().0,
);
for item in iter {
builder.append_value(item.as_ref());
}
builder.finish_into_varbinview()
}
pub fn from_iter_nullable_bin<T: AsRef<[u8]>, I: IntoIterator<Item = Option<T>>>(
iter: I,
) -> Self {
let iter = iter.into_iter();
let mut builder = VarBinViewBuilder::with_capacity(
DType::Binary(Nullability::Nullable),
iter.size_hint().0,
);
for item in iter {
match item {
None => builder.append_null(),
Some(v) => builder.append_value(v.as_ref()),
}
}
builder.finish_into_varbinview()
}
}
impl<'a> FromIterator<Option<&'a [u8]>> for VarBinViewArray {
fn from_iter<T: IntoIterator<Item = Option<&'a [u8]>>>(iter: T) -> Self {
Self::from_iter_nullable_bin(iter)
}
}
impl FromIterator<Option<Vec<u8>>> for VarBinViewArray {
fn from_iter<T: IntoIterator<Item = Option<Vec<u8>>>>(iter: T) -> Self {
Self::from_iter_nullable_bin(iter)
}
}
impl FromIterator<Option<String>> for VarBinViewArray {
fn from_iter<T: IntoIterator<Item = Option<String>>>(iter: T) -> Self {
Self::from_iter_nullable_str(iter)
}
}
impl<'a> FromIterator<Option<&'a str>> for VarBinViewArray {
fn from_iter<T: IntoIterator<Item = Option<&'a str>>>(iter: T) -> Self {
Self::from_iter_nullable_str(iter)
}
}