use std::fmt::Debug;
use std::iter::FromIterator;
use std::ptr::NonNull;
use std::sync::Arc;
use std::{convert::AsRef, usize};
use crate::alloc::{Allocation, Deallocation};
use crate::util::bit_chunk_iterator::{BitChunks, UnalignedBitChunk};
use crate::{bytes::Bytes, native::ArrowNativeType};
use super::ops::bitwise_unary_op_helper;
use super::MutableBuffer;
#[derive(Clone, PartialEq, Debug)]
pub struct Buffer {
data: Arc<Bytes>,
offset: usize,
length: usize,
}
impl Buffer {
#[inline]
pub fn from_bytes(bytes: Bytes) -> Self {
let length = bytes.len();
Buffer {
data: Arc::new(bytes),
offset: 0,
length,
}
}
pub fn from_slice_ref<U: ArrowNativeType, T: AsRef<[U]>>(items: T) -> Self {
let slice = items.as_ref();
let capacity = slice.len() * std::mem::size_of::<U>();
let mut buffer = MutableBuffer::with_capacity(capacity);
buffer.extend_from_slice(slice);
buffer.into()
}
pub unsafe fn from_raw_parts(ptr: NonNull<u8>, len: usize, capacity: usize) -> Self {
assert!(len <= capacity);
Buffer::build_with_arguments(ptr, len, Deallocation::Arrow(capacity))
}
pub unsafe fn from_custom_allocation(
ptr: NonNull<u8>,
len: usize,
owner: Arc<dyn Allocation>,
) -> Self {
Buffer::build_with_arguments(ptr, len, Deallocation::Custom(owner))
}
unsafe fn build_with_arguments(
ptr: NonNull<u8>,
len: usize,
deallocation: Deallocation,
) -> Self {
let bytes = Bytes::new(ptr, len, deallocation);
Buffer {
data: Arc::new(bytes),
offset: 0,
length: len,
}
}
#[inline]
pub fn len(&self) -> usize {
self.length
}
#[inline]
pub fn capacity(&self) -> usize {
self.data.capacity()
}
#[inline]
pub fn is_empty(&self) -> bool {
self.length == 0
}
pub fn as_slice(&self) -> &[u8] {
&self.data[self.offset..(self.offset + self.length)]
}
pub fn slice(&self, offset: usize) -> Self {
assert!(
offset <= self.len(),
"the offset of the new Buffer cannot exceed the existing length"
);
Self {
data: self.data.clone(),
offset: self.offset + offset,
length: self.length - offset,
}
}
pub fn slice_with_length(&self, offset: usize, length: usize) -> Self {
assert!(
offset + length <= self.len(),
"the offset of the new Buffer cannot exceed the existing length"
);
Self {
data: self.data.clone(),
offset: self.offset + offset,
length,
}
}
#[inline]
pub fn as_ptr(&self) -> *const u8 {
unsafe { self.data.ptr().as_ptr().add(self.offset) }
}
pub fn typed_data<T: ArrowNativeType>(&self) -> &[T] {
let (prefix, offsets, suffix) = unsafe { self.as_slice().align_to::<T>() };
assert!(prefix.is_empty() && suffix.is_empty());
offsets
}
pub fn bit_slice(&self, offset: usize, len: usize) -> Self {
if offset % 8 == 0 {
return self.slice(offset / 8);
}
bitwise_unary_op_helper(self, offset, len, |a| a)
}
pub fn bit_chunks(&self, offset: usize, len: usize) -> BitChunks {
BitChunks::new(self.as_slice(), offset, len)
}
#[deprecated(note = "use count_set_bits_offset instead")]
pub fn count_set_bits(&self) -> usize {
let len_in_bits = self.len() * 8;
self.count_set_bits_offset(0, len_in_bits)
}
pub fn count_set_bits_offset(&self, offset: usize, len: usize) -> usize {
UnalignedBitChunk::new(self.as_slice(), offset, len).count_ones()
}
pub fn into_mutable(self) -> Result<MutableBuffer, Self> {
let offset_ptr = self.as_ptr();
let offset = self.offset;
let length = self.length;
Arc::try_unwrap(self.data)
.and_then(|bytes| {
assert_eq!(offset_ptr, bytes.ptr().as_ptr());
MutableBuffer::from_bytes(bytes).map_err(Arc::new)
})
.map_err(|bytes| Buffer {
data: bytes,
offset,
length,
})
}
}
impl<T: AsRef<[u8]>> From<T> for Buffer {
fn from(p: T) -> Self {
let slice = p.as_ref();
let len = slice.len();
let mut buffer = MutableBuffer::new(len);
buffer.extend_from_slice(slice);
buffer.into()
}
}
impl std::iter::FromIterator<bool> for Buffer {
fn from_iter<I>(iter: I) -> Self
where
I: IntoIterator<Item = bool>,
{
MutableBuffer::from_iter(iter).into()
}
}
impl std::ops::Deref for Buffer {
type Target = [u8];
fn deref(&self) -> &[u8] {
unsafe { std::slice::from_raw_parts(self.as_ptr(), self.len()) }
}
}
impl From<MutableBuffer> for Buffer {
#[inline]
fn from(buffer: MutableBuffer) -> Self {
buffer.into_buffer()
}
}
impl Buffer {
#[inline]
pub unsafe fn from_trusted_len_iter<T: ArrowNativeType, I: Iterator<Item = T>>(
iterator: I,
) -> Self {
MutableBuffer::from_trusted_len_iter(iterator).into()
}
#[inline]
pub unsafe fn try_from_trusted_len_iter<
E,
T: ArrowNativeType,
I: Iterator<Item = std::result::Result<T, E>>,
>(
iterator: I,
) -> std::result::Result<Self, E> {
Ok(MutableBuffer::try_from_trusted_len_iter(iterator)?.into())
}
}
impl<T: ArrowNativeType> FromIterator<T> for Buffer {
fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
let mut iterator = iter.into_iter();
let size = std::mem::size_of::<T>();
let mut buffer = match iterator.next() {
None => MutableBuffer::new(0),
Some(element) => {
let (lower, _) = iterator.size_hint();
let mut buffer = MutableBuffer::new(lower.saturating_add(1) * size);
unsafe {
std::ptr::write(buffer.as_mut_ptr() as *mut T, element);
buffer.set_len(size);
}
buffer
}
};
buffer.extend_from_iter(iterator);
buffer.into()
}
}
#[cfg(test)]
mod tests {
use std::panic::{RefUnwindSafe, UnwindSafe};
use std::thread;
use super::*;
#[test]
fn test_buffer_data_equality() {
let buf1 = Buffer::from(&[0, 1, 2, 3, 4]);
let buf2 = Buffer::from(&[0, 1, 2, 3, 4]);
assert_eq!(buf1, buf2);
let buf3 = buf1.slice(2);
assert_ne!(buf1, buf3);
let buf4 = buf2.slice_with_length(2, 3);
assert_eq!(buf3, buf4);
let mut buf2 = MutableBuffer::new(65);
buf2.extend_from_slice(&[0u8, 1, 2, 3, 4]);
let buf2 = buf2.into();
assert_eq!(buf1, buf2);
let buf2 = Buffer::from(&[0, 0, 2, 3, 4]);
assert_ne!(buf1, buf2);
let buf2 = Buffer::from(&[0, 1, 2, 3]);
assert_ne!(buf1, buf2);
}
#[test]
fn test_from_raw_parts() {
let buf = Buffer::from(&[0, 1, 2, 3, 4]);
assert_eq!(5, buf.len());
assert!(!buf.as_ptr().is_null());
assert_eq!([0, 1, 2, 3, 4], buf.as_slice());
}
#[test]
fn test_from_vec() {
let buf = Buffer::from(&[0, 1, 2, 3, 4]);
assert_eq!(5, buf.len());
assert!(!buf.as_ptr().is_null());
assert_eq!([0, 1, 2, 3, 4], buf.as_slice());
}
#[test]
fn test_copy() {
let buf = Buffer::from(&[0, 1, 2, 3, 4]);
let buf2 = buf;
assert_eq!(5, buf2.len());
assert_eq!(64, buf2.capacity());
assert!(!buf2.as_ptr().is_null());
assert_eq!([0, 1, 2, 3, 4], buf2.as_slice());
}
#[test]
fn test_slice() {
let buf = Buffer::from(&[2, 4, 6, 8, 10]);
let buf2 = buf.slice(2);
assert_eq!([6, 8, 10], buf2.as_slice());
assert_eq!(3, buf2.len());
assert_eq!(unsafe { buf.as_ptr().offset(2) }, buf2.as_ptr());
let buf3 = buf2.slice_with_length(1, 2);
assert_eq!([8, 10], buf3.as_slice());
assert_eq!(2, buf3.len());
assert_eq!(unsafe { buf.as_ptr().offset(3) }, buf3.as_ptr());
let buf4 = buf.slice(5);
let empty_slice: [u8; 0] = [];
assert_eq!(empty_slice, buf4.as_slice());
assert_eq!(0, buf4.len());
assert!(buf4.is_empty());
assert_eq!(buf2.slice_with_length(2, 1).as_slice(), &[10]);
}
#[test]
#[should_panic(
expected = "the offset of the new Buffer cannot exceed the existing length"
)]
fn test_slice_offset_out_of_bound() {
let buf = Buffer::from(&[2, 4, 6, 8, 10]);
buf.slice(6);
}
#[test]
fn test_access_concurrently() {
let buffer = Buffer::from(vec![1, 2, 3, 4, 5]);
let buffer2 = buffer.clone();
assert_eq!([1, 2, 3, 4, 5], buffer.as_slice());
let buffer_copy = thread::spawn(move || {
buffer
})
.join();
assert!(buffer_copy.is_ok());
assert_eq!(buffer2, buffer_copy.ok().unwrap());
}
macro_rules! check_as_typed_data {
($input: expr, $native_t: ty) => {{
let buffer = Buffer::from_slice_ref($input);
let slice: &[$native_t] = buffer.typed_data::<$native_t>();
assert_eq!($input, slice);
}};
}
#[test]
#[allow(clippy::float_cmp)]
fn test_as_typed_data() {
check_as_typed_data!(&[1i8, 3i8, 6i8], i8);
check_as_typed_data!(&[1u8, 3u8, 6u8], u8);
check_as_typed_data!(&[1i16, 3i16, 6i16], i16);
check_as_typed_data!(&[1i32, 3i32, 6i32], i32);
check_as_typed_data!(&[1i64, 3i64, 6i64], i64);
check_as_typed_data!(&[1u16, 3u16, 6u16], u16);
check_as_typed_data!(&[1u32, 3u32, 6u32], u32);
check_as_typed_data!(&[1u64, 3u64, 6u64], u64);
check_as_typed_data!(&[1f32, 3f32, 6f32], f32);
check_as_typed_data!(&[1f64, 3f64, 6f64], f64);
}
#[test]
fn test_count_bits() {
assert_eq!(0, Buffer::from(&[0b00000000]).count_set_bits_offset(0, 8));
assert_eq!(8, Buffer::from(&[0b11111111]).count_set_bits_offset(0, 8));
assert_eq!(3, Buffer::from(&[0b00001101]).count_set_bits_offset(0, 8));
assert_eq!(
6,
Buffer::from(&[0b01001001, 0b01010010]).count_set_bits_offset(0, 16)
);
assert_eq!(
16,
Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(0, 16)
);
}
#[test]
fn test_count_bits_slice() {
assert_eq!(
0,
Buffer::from(&[0b11111111, 0b00000000])
.slice(1)
.count_set_bits_offset(0, 8)
);
assert_eq!(
8,
Buffer::from(&[0b11111111, 0b11111111])
.slice_with_length(1, 1)
.count_set_bits_offset(0, 8)
);
assert_eq!(
3,
Buffer::from(&[0b11111111, 0b11111111, 0b00001101])
.slice(2)
.count_set_bits_offset(0, 8)
);
assert_eq!(
6,
Buffer::from(&[0b11111111, 0b01001001, 0b01010010])
.slice_with_length(1, 2)
.count_set_bits_offset(0, 16)
);
assert_eq!(
16,
Buffer::from(&[0b11111111, 0b11111111, 0b11111111, 0b11111111])
.slice(2)
.count_set_bits_offset(0, 16)
);
}
#[test]
fn test_count_bits_offset_slice() {
assert_eq!(8, Buffer::from(&[0b11111111]).count_set_bits_offset(0, 8));
assert_eq!(3, Buffer::from(&[0b11111111]).count_set_bits_offset(0, 3));
assert_eq!(5, Buffer::from(&[0b11111111]).count_set_bits_offset(3, 5));
assert_eq!(1, Buffer::from(&[0b11111111]).count_set_bits_offset(3, 1));
assert_eq!(0, Buffer::from(&[0b11111111]).count_set_bits_offset(8, 0));
assert_eq!(2, Buffer::from(&[0b01010101]).count_set_bits_offset(0, 3));
assert_eq!(
16,
Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(0, 16)
);
assert_eq!(
10,
Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(0, 10)
);
assert_eq!(
10,
Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(3, 10)
);
assert_eq!(
8,
Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(8, 8)
);
assert_eq!(
5,
Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(11, 5)
);
assert_eq!(
0,
Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(16, 0)
);
assert_eq!(
2,
Buffer::from(&[0b01101101, 0b10101010]).count_set_bits_offset(7, 5)
);
assert_eq!(
4,
Buffer::from(&[0b01101101, 0b10101010]).count_set_bits_offset(7, 9)
);
}
#[test]
fn test_unwind_safe() {
fn assert_unwind_safe<T: RefUnwindSafe + UnwindSafe>() {}
assert_unwind_safe::<Buffer>()
}
#[test]
fn test_from_foreign_vec() {
let mut vector = vec![1_i32, 2, 3, 4, 5];
let buffer = unsafe {
Buffer::from_custom_allocation(
NonNull::new_unchecked(vector.as_mut_ptr() as *mut u8),
vector.len() * std::mem::size_of::<i32>(),
Arc::new(vector),
)
};
let slice = buffer.typed_data::<i32>();
assert_eq!(slice, &[1, 2, 3, 4, 5]);
let buffer = buffer.slice(std::mem::size_of::<i32>());
let slice = buffer.typed_data::<i32>();
assert_eq!(slice, &[2, 3, 4, 5]);
}
}