use crate::data::types::{BytesType, OffsetType};
use crate::data::ArrayDataLayout;
use crate::{ArrayDataBuilder, Buffers};
use arrow_buffer::buffer::{NullBuffer, OffsetBuffer, ScalarBuffer};
use arrow_buffer::{ArrowNativeType, Buffer};
use arrow_schema::DataType;
use std::marker::PhantomData;
mod private {
use super::*;
pub trait BytesSealed {
unsafe fn from_bytes_unchecked(b: &[u8]) -> &Self;
fn downcast_ref(data: &ArrayDataBytes) -> Option<&ArrayDataBytesOffset<Self>>
where
Self: Bytes;
fn downcast(data: ArrayDataBytes) -> Option<ArrayDataBytesOffset<Self>>
where
Self: Bytes;
fn upcast(v: ArrayDataBytesOffset<Self>) -> ArrayDataBytes
where
Self: Bytes;
}
pub trait BytesOffsetSealed {
fn downcast_ref<B: Bytes + ?Sized>(
data: &ArrayDataBytesOffset<B>,
) -> Option<&BytesArrayData<Self, B>>
where
Self: BytesOffset;
fn downcast<B: Bytes + ?Sized>(
data: ArrayDataBytesOffset<B>,
) -> Option<BytesArrayData<Self, B>>
where
Self: BytesOffset;
fn upcast<B: Bytes + ?Sized>(
v: BytesArrayData<Self, B>,
) -> ArrayDataBytesOffset<B>
where
Self: BytesOffset;
}
}
pub trait Bytes: private::BytesSealed + std::fmt::Debug {
const TYPE: BytesType;
}
impl Bytes for [u8] {
const TYPE: BytesType = BytesType::Binary;
}
impl private::BytesSealed for [u8] {
unsafe fn from_bytes_unchecked(b: &[u8]) -> &Self {
b
}
fn downcast_ref(data: &ArrayDataBytes) -> Option<&ArrayDataBytesOffset<Self>> {
match data {
ArrayDataBytes::Binary(v) => Some(v),
ArrayDataBytes::Utf8(_) => None,
}
}
fn downcast(data: ArrayDataBytes) -> Option<ArrayDataBytesOffset<Self>> {
match data {
ArrayDataBytes::Binary(v) => Some(v),
ArrayDataBytes::Utf8(_) => None,
}
}
fn upcast(v: ArrayDataBytesOffset<Self>) -> ArrayDataBytes {
ArrayDataBytes::Binary(v)
}
}
impl Bytes for str {
const TYPE: BytesType = BytesType::Utf8;
}
impl private::BytesSealed for str {
unsafe fn from_bytes_unchecked(b: &[u8]) -> &Self {
std::str::from_utf8_unchecked(b)
}
fn downcast_ref(data: &ArrayDataBytes) -> Option<&ArrayDataBytesOffset<Self>> {
match data {
ArrayDataBytes::Binary(_) => None,
ArrayDataBytes::Utf8(v) => Some(v),
}
}
fn downcast(data: ArrayDataBytes) -> Option<ArrayDataBytesOffset<Self>> {
match data {
ArrayDataBytes::Binary(_) => None,
ArrayDataBytes::Utf8(v) => Some(v),
}
}
fn upcast(v: ArrayDataBytesOffset<Self>) -> ArrayDataBytes {
ArrayDataBytes::Utf8(v)
}
}
pub trait BytesOffset: private::BytesOffsetSealed + ArrowNativeType {
const TYPE: OffsetType;
}
impl BytesOffset for i32 {
const TYPE: OffsetType = OffsetType::Int32;
}
impl private::BytesOffsetSealed for i32 {
fn downcast_ref<B: Bytes + ?Sized>(
data: &ArrayDataBytesOffset<B>,
) -> Option<&BytesArrayData<Self, B>> {
match data {
ArrayDataBytesOffset::Small(v) => Some(v),
ArrayDataBytesOffset::Large(_) => None,
}
}
fn downcast<B: Bytes + ?Sized>(
data: ArrayDataBytesOffset<B>,
) -> Option<BytesArrayData<Self, B>> {
match data {
ArrayDataBytesOffset::Small(v) => Some(v),
ArrayDataBytesOffset::Large(_) => None,
}
}
fn upcast<B: Bytes + ?Sized>(v: BytesArrayData<Self, B>) -> ArrayDataBytesOffset<B> {
ArrayDataBytesOffset::Small(v)
}
}
impl BytesOffset for i64 {
const TYPE: OffsetType = OffsetType::Int64;
}
impl private::BytesOffsetSealed for i64 {
fn downcast_ref<B: Bytes + ?Sized>(
data: &ArrayDataBytesOffset<B>,
) -> Option<&BytesArrayData<Self, B>> {
match data {
ArrayDataBytesOffset::Small(_) => None,
ArrayDataBytesOffset::Large(v) => Some(v),
}
}
fn downcast<B: Bytes + ?Sized>(
data: ArrayDataBytesOffset<B>,
) -> Option<BytesArrayData<Self, B>> {
match data {
ArrayDataBytesOffset::Small(_) => None,
ArrayDataBytesOffset::Large(v) => Some(v),
}
}
fn upcast<B: Bytes + ?Sized>(v: BytesArrayData<Self, B>) -> ArrayDataBytesOffset<B> {
ArrayDataBytesOffset::Large(v)
}
}
macro_rules! bytes_op {
($array:ident, $op:block) => {
match $array {
ArrayDataBytes::Binary($array) => match $array {
ArrayDataBytesOffset::Small($array) => $op
ArrayDataBytesOffset::Large($array) => $op
}
ArrayDataBytes::Utf8($array) => match $array {
ArrayDataBytesOffset::Small($array) => $op
ArrayDataBytesOffset::Large($array) => $op
}
}
};
}
#[derive(Debug, Clone)]
pub enum ArrayDataBytes {
Binary(ArrayDataBytesOffset<[u8]>),
Utf8(ArrayDataBytesOffset<str>),
}
impl ArrayDataBytes {
pub fn downcast_ref<O: BytesOffset, B: Bytes + ?Sized>(
&self,
) -> Option<&BytesArrayData<O, B>> {
O::downcast_ref(B::downcast_ref(self)?)
}
pub fn downcast<O: BytesOffset, B: Bytes + ?Sized>(
self,
) -> Option<BytesArrayData<O, B>> {
O::downcast(B::downcast(self)?)
}
pub fn slice(&self, offset: usize, len: usize) -> Self {
let s = self;
bytes_op!(s, { s.slice(offset, len).into() })
}
pub(crate) fn layout(&self) -> ArrayDataLayout<'_> {
let s = self;
bytes_op!(s, { s.layout() })
}
pub(crate) unsafe fn from_raw(
builder: ArrayDataBuilder,
offset: OffsetType,
bytes: BytesType,
) -> Self {
match bytes {
BytesType::Binary => Self::Binary(match offset {
OffsetType::Int32 => {
ArrayDataBytesOffset::Small(BytesArrayData::from_raw(builder))
}
OffsetType::Int64 => {
ArrayDataBytesOffset::Large(BytesArrayData::from_raw(builder))
}
}),
BytesType::Utf8 => Self::Utf8(match offset {
OffsetType::Int32 => {
ArrayDataBytesOffset::Small(BytesArrayData::from_raw(builder))
}
OffsetType::Int64 => {
ArrayDataBytesOffset::Large(BytesArrayData::from_raw(builder))
}
}),
}
}
}
#[derive(Debug)]
pub enum ArrayDataBytesOffset<B: Bytes + ?Sized> {
Small(BytesArrayData<i32, B>),
Large(BytesArrayData<i64, B>),
}
impl<B: Bytes + ?Sized> Clone for ArrayDataBytesOffset<B> {
fn clone(&self) -> Self {
match self {
Self::Small(v) => Self::Small(v.clone()),
Self::Large(v) => Self::Large(v.clone()),
}
}
}
impl<O: BytesOffset, B: Bytes + ?Sized> From<BytesArrayData<O, B>> for ArrayDataBytes {
fn from(value: BytesArrayData<O, B>) -> Self {
B::upcast(O::upcast(value))
}
}
#[derive(Debug)]
pub struct BytesArrayData<O: BytesOffset, B: Bytes + ?Sized> {
data_type: DataType,
offsets: OffsetBuffer<O>,
values: Buffer,
nulls: Option<NullBuffer>,
phantom: PhantomData<B>,
}
impl<O: BytesOffset, B: Bytes + ?Sized> Clone for BytesArrayData<O, B> {
fn clone(&self) -> Self {
Self {
data_type: self.data_type.clone(),
nulls: self.nulls.clone(),
offsets: self.offsets.clone(),
values: self.values.clone(),
phantom: Default::default(),
}
}
}
impl<O: BytesOffset, B: Bytes + ?Sized> BytesArrayData<O, B> {
pub unsafe fn new_unchecked(
data_type: DataType,
offsets: OffsetBuffer<O>,
values: Buffer,
nulls: Option<NullBuffer>,
) -> Self {
Self {
data_type,
nulls,
offsets,
values,
phantom: Default::default(),
}
}
pub(crate) unsafe fn from_raw(builder: ArrayDataBuilder) -> Self {
let mut iter = builder.buffers.into_iter();
let offsets = iter.next().unwrap();
let values = iter.next().unwrap();
let offsets = match builder.len {
0 => OffsetBuffer::new_empty(),
_ => OffsetBuffer::new_unchecked(ScalarBuffer::new(
offsets,
builder.offset,
builder.len + 1,
)),
};
Self {
values,
offsets,
data_type: builder.data_type,
nulls: builder.nulls,
phantom: Default::default(),
}
}
#[inline]
pub fn len(&self) -> usize {
self.offsets.len().wrapping_sub(1)
}
#[inline]
pub fn is_empty(&self) -> bool {
self.offsets.len() <= 1
}
#[inline]
pub fn values(&self) -> &B {
unsafe { B::from_bytes_unchecked(self.values.as_slice()) }
}
#[inline]
pub fn offsets(&self) -> &OffsetBuffer<O> {
&self.offsets
}
#[inline]
pub fn nulls(&self) -> Option<&NullBuffer> {
self.nulls.as_ref()
}
#[inline]
pub fn data_type(&self) -> &DataType {
&self.data_type
}
pub fn into_parts(self) -> (DataType, OffsetBuffer<O>, Buffer, Option<NullBuffer>) {
(self.data_type, self.offsets, self.values, self.nulls)
}
pub fn slice(&self, offset: usize, len: usize) -> Self {
Self {
values: self.values.clone(),
offsets: self.offsets.slice(offset, len),
data_type: self.data_type.clone(),
nulls: self.nulls().as_ref().map(|x| x.slice(offset, len)),
phantom: Default::default(),
}
}
pub(crate) fn layout(&self) -> ArrayDataLayout<'_> {
ArrayDataLayout {
data_type: &self.data_type,
len: self.offsets.len().wrapping_sub(1),
offset: 0,
nulls: self.nulls.as_ref(),
buffers: Buffers::two(self.offsets.inner().inner(), &self.values),
child_data: &[],
}
}
}
#[derive(Debug, Clone)]
pub struct FixedSizeBinaryArrayData {
data_type: DataType,
len: usize,
element_size: usize,
values: Buffer,
nulls: Option<NullBuffer>,
}
impl FixedSizeBinaryArrayData {
pub unsafe fn new_unchecked(
data_type: DataType,
len: usize,
element_size: usize,
values: Buffer,
nulls: Option<NullBuffer>,
) -> Self {
Self {
data_type,
nulls,
values,
len,
element_size,
}
}
pub(crate) unsafe fn from_raw(builder: ArrayDataBuilder, size: usize) -> Self {
let values = builder.buffers[0]
.slice_with_length(builder.offset * size, builder.len * size);
Self {
values,
data_type: builder.data_type,
len: builder.len,
element_size: size,
nulls: builder.nulls,
}
}
#[inline]
pub fn len(&self) -> usize {
self.len
}
#[inline]
pub fn is_empty(&self) -> bool {
self.len == 0
}
#[inline]
pub fn element_size(&self) -> usize {
self.element_size
}
#[inline]
pub fn values(&self) -> &[u8] {
&self.values
}
#[inline]
pub fn nulls(&self) -> Option<&NullBuffer> {
self.nulls.as_ref()
}
#[inline]
pub fn data_type(&self) -> &DataType {
&self.data_type
}
pub fn into_parts(self) -> (DataType, Buffer, Option<NullBuffer>) {
(self.data_type, self.values, self.nulls)
}
pub fn slice(&self, offset: usize, len: usize) -> Self {
let offset_element = offset.checked_mul(self.element_size).expect("overflow");
let len_element = len.checked_mul(self.element_size).expect("overflow");
let values = self.values.slice_with_length(offset_element, len_element);
Self {
len,
values,
data_type: self.data_type.clone(),
element_size: self.element_size,
nulls: self.nulls().as_ref().map(|x| x.slice(offset, len)),
}
}
pub(crate) fn layout(&self) -> ArrayDataLayout<'_> {
ArrayDataLayout {
data_type: &self.data_type,
len: self.len,
offset: 0,
nulls: self.nulls.as_ref(),
buffers: Buffers::one(&self.values),
child_data: &[],
}
}
}