use std::hash::Hash;
use arrow::array::*;
use arrow::bitmap::{Bitmap, BitmapBuilder};
use arrow::compute::arity::unary;
use arrow::datatypes::{ArrowDataType, TimeUnit};
use arrow::offset::{Offset, Offsets};
use arrow::types::NativeType;
use num_traits::AsPrimitive;
#[cfg(feature = "dtype-decimal")]
use num_traits::{Float, ToPrimitive};
use polars_error::PolarsResult;
use polars_utils::float16::pf16;
use polars_utils::pl_str::PlSmallStr;
use polars_utils::vec::PushUnchecked;
use super::CastOptionsImpl;
use super::temporal::*;
#[cfg(feature = "dtype-decimal")]
use crate::decimal::{dec128_verify_prec_scale, f64_to_dec128, i128_to_dec128};
pub trait SerPrimitive {
fn write(f: &mut Vec<u8>, val: Self) -> usize
where
Self: Sized;
}
macro_rules! impl_ser_primitive {
($ptype:ident) => {
impl SerPrimitive for $ptype {
fn write(f: &mut Vec<u8>, val: Self) -> usize
where
Self: Sized,
{
let mut buffer = itoa::Buffer::new();
let value = buffer.format(val);
f.extend_from_slice(value.as_bytes());
value.len()
}
}
};
}
impl_ser_primitive!(i8);
impl_ser_primitive!(i16);
impl_ser_primitive!(i32);
impl_ser_primitive!(i64);
impl_ser_primitive!(i128);
impl_ser_primitive!(u8);
impl_ser_primitive!(u16);
impl_ser_primitive!(u32);
impl_ser_primitive!(u64);
impl_ser_primitive!(u128);
impl SerPrimitive for pf16 {
fn write(f: &mut Vec<u8>, val: Self) -> usize
where
Self: Sized,
{
f32::write(f, AsPrimitive::<f32>::as_(val))
}
}
impl SerPrimitive for f32 {
fn write(f: &mut Vec<u8>, val: Self) -> usize
where
Self: Sized,
{
let mut buffer = zmij::Buffer::new();
let value = buffer.format(val);
f.extend_from_slice(value.as_bytes());
value.len()
}
}
impl SerPrimitive for f64 {
fn write(f: &mut Vec<u8>, val: Self) -> usize
where
Self: Sized,
{
let mut buffer = zmij::Buffer::new();
let value = buffer.format(val);
f.extend_from_slice(value.as_bytes());
value.len()
}
}
fn fallible_unary<I, F, G, O>(
array: &PrimitiveArray<I>,
op: F,
fail: G,
dtype: ArrowDataType,
) -> PrimitiveArray<O>
where
I: NativeType,
O: NativeType,
F: Fn(I) -> O,
G: Fn(I) -> bool,
{
let values = array.values();
let mut out = Vec::with_capacity(array.len());
let mut i = 0;
while i < array.len() && !fail(values[i]) {
unsafe { out.push_unchecked(op(values[i])) };
i += 1;
}
if out.len() == array.len() {
return PrimitiveArray::<O>::new(dtype, out.into(), array.validity().cloned());
}
let mut validity = BitmapBuilder::with_capacity(array.len());
validity.extend_constant(out.len(), true);
for &value in &values[out.len()..] {
unsafe {
out.push_unchecked(op(value));
validity.push_unchecked(!fail(value));
}
}
debug_assert_eq!(out.len(), array.len());
debug_assert_eq!(validity.len(), array.len());
let validity = validity.freeze();
let validity = match array.validity() {
None => validity,
Some(arr_validity) => arrow::bitmap::and(&validity, arr_validity),
};
PrimitiveArray::<O>::new(dtype, out.into(), Some(validity))
}
fn primitive_to_values_and_offsets<T: NativeType + SerPrimitive, O: Offset>(
from: &PrimitiveArray<T>,
) -> (Vec<u8>, Offsets<O>) {
let mut values: Vec<u8> = Vec::with_capacity(from.len());
let mut offsets: Vec<O> = Vec::with_capacity(from.len() + 1);
offsets.push(O::default());
let mut offset: usize = 0;
unsafe {
for &x in from.values().iter() {
let len = T::write(&mut values, x);
offset += len;
offsets.push(O::from_as_usize(offset));
}
values.set_len(offset);
values.shrink_to_fit();
let offsets = Offsets::new_unchecked(offsets);
(values, offsets)
}
}
pub fn primitive_to_boolean<T: NativeType>(
from: &PrimitiveArray<T>,
to_type: ArrowDataType,
) -> BooleanArray {
let iter = from.values().iter().map(|v| *v != T::default());
let values = Bitmap::from_trusted_len_iter(iter);
BooleanArray::new(to_type, values, from.validity().cloned())
}
pub(super) fn primitive_to_boolean_dyn<T>(
from: &dyn Array,
to_type: ArrowDataType,
) -> PolarsResult<Box<dyn Array>>
where
T: NativeType,
{
let from = from.as_any().downcast_ref().unwrap();
Ok(Box::new(primitive_to_boolean::<T>(from, to_type)))
}
pub(super) fn primitive_to_utf8<T: NativeType + SerPrimitive, O: Offset>(
from: &PrimitiveArray<T>,
) -> Utf8Array<O> {
let (values, offsets) = primitive_to_values_and_offsets(from);
unsafe {
Utf8Array::<O>::new_unchecked(
Utf8Array::<O>::default_dtype(),
offsets.into(),
values.into(),
from.validity().cloned(),
)
}
}
pub(super) fn primitive_to_utf8_dyn<T, O>(from: &dyn Array) -> PolarsResult<Box<dyn Array>>
where
O: Offset,
T: NativeType + SerPrimitive,
{
let from = from.as_any().downcast_ref().unwrap();
Ok(Box::new(primitive_to_utf8::<T, O>(from)))
}
pub(super) fn primitive_to_primitive_dyn<I, O>(
from: &dyn Array,
to_type: &ArrowDataType,
options: CastOptionsImpl,
) -> PolarsResult<Box<dyn Array>>
where
I: NativeType + num_traits::NumCast + num_traits::AsPrimitive<O>,
O: NativeType + num_traits::NumCast,
{
let from = from.as_any().downcast_ref::<PrimitiveArray<I>>().unwrap();
if options.wrapped {
Ok(Box::new(primitive_as_primitive::<I, O>(from, to_type)))
} else {
Ok(Box::new(primitive_to_primitive::<I, O>(from, to_type)))
}
}
pub fn primitive_to_primitive<I, O>(
from: &PrimitiveArray<I>,
to_type: &ArrowDataType,
) -> PrimitiveArray<O>
where
I: NativeType + num_traits::NumCast,
O: NativeType + num_traits::NumCast,
{
let iter = from
.iter()
.map(|v| v.and_then(|x| num_traits::cast::cast::<I, O>(*x)));
PrimitiveArray::<O>::from_trusted_len_iter(iter).to(to_type.clone())
}
#[cfg(feature = "dtype-decimal")]
pub fn integer_to_decimal<T: NativeType + ToPrimitive>(
from: &PrimitiveArray<T>,
to_precision: usize,
to_scale: usize,
) -> PrimitiveArray<i128> {
assert!(dec128_verify_prec_scale(to_precision, to_scale).is_ok());
let values = from
.iter()
.map(|x| i128_to_dec128(x?.to_i128()?, to_precision, to_scale));
PrimitiveArray::<i128>::from_trusted_len_iter(values)
.to(ArrowDataType::Decimal(to_precision, to_scale))
}
#[cfg(feature = "dtype-decimal")]
pub(super) fn integer_to_decimal_dyn<T>(
from: &dyn Array,
precision: usize,
scale: usize,
) -> PolarsResult<Box<dyn Array>>
where
T: NativeType + ToPrimitive,
{
let from = from.as_any().downcast_ref().unwrap();
Ok(Box::new(integer_to_decimal::<T>(from, precision, scale)))
}
#[cfg(feature = "dtype-decimal")]
pub fn float_to_decimal<T: NativeType + Float + AsPrimitive<f64>>(
from: &PrimitiveArray<T>,
to_precision: usize,
to_scale: usize,
) -> PrimitiveArray<i128> {
assert!(dec128_verify_prec_scale(to_precision, to_scale).is_ok());
let values = from
.iter()
.map(|x| f64_to_dec128(x?.as_(), to_precision, to_scale));
PrimitiveArray::<i128>::from_trusted_len_iter(values)
.to(ArrowDataType::Decimal(to_precision, to_scale))
}
#[cfg(feature = "dtype-decimal")]
pub(super) fn float_to_decimal_dyn<T: NativeType + Float + AsPrimitive<f64>>(
from: &dyn Array,
precision: usize,
scale: usize,
) -> PolarsResult<Box<dyn Array>> {
let from = from.as_any().downcast_ref().unwrap();
Ok(Box::new(float_to_decimal::<T>(from, precision, scale)))
}
pub fn primitive_as_primitive<I, O>(
from: &PrimitiveArray<I>,
to_type: &ArrowDataType,
) -> PrimitiveArray<O>
where
I: NativeType + num_traits::AsPrimitive<O>,
O: NativeType,
{
unary(from, num_traits::AsPrimitive::<O>::as_, to_type.clone())
}
pub fn primitive_to_same_primitive<T>(
from: &PrimitiveArray<T>,
to_type: &ArrowDataType,
) -> PrimitiveArray<T>
where
T: NativeType,
{
PrimitiveArray::<T>::new(
to_type.clone(),
from.values().clone(),
from.validity().cloned(),
)
}
pub(super) fn primitive_to_same_primitive_dyn<T>(
from: &dyn Array,
to_type: &ArrowDataType,
) -> PolarsResult<Box<dyn Array>>
where
T: NativeType,
{
let from = from.as_any().downcast_ref().unwrap();
Ok(Box::new(primitive_to_same_primitive::<T>(from, to_type)))
}
pub(super) fn primitive_to_dictionary_dyn<T: NativeType + Eq + Hash, K: DictionaryKey>(
from: &dyn Array,
) -> PolarsResult<Box<dyn Array>> {
let from = from.as_any().downcast_ref().unwrap();
primitive_to_dictionary::<T, K>(from).map(|x| Box::new(x) as Box<dyn Array>)
}
pub fn primitive_to_dictionary<T: NativeType + Eq + Hash, K: DictionaryKey>(
from: &PrimitiveArray<T>,
) -> PolarsResult<DictionaryArray<K>> {
let iter = from.iter().map(|x| x.copied());
let mut array = MutableDictionaryArray::<K, _>::try_empty(MutablePrimitiveArray::<T>::from(
from.dtype().clone(),
))?;
array.reserve(from.len());
array.try_extend(iter)?;
Ok(array.into())
}
pub unsafe fn primitive_map_is_valid<T: NativeType>(
from: &PrimitiveArray<T>,
f: impl Fn(T) -> bool,
dtype: ArrowDataType,
) -> PrimitiveArray<T> {
let values = from.values().clone();
let validity: Bitmap = values.iter().map(|&v| f(v)).collect();
let validity = if validity.unset_bits() > 0 {
let new_validity = match from.validity() {
None => validity,
Some(v) => v & &validity,
};
Some(new_validity)
} else {
from.validity().cloned()
};
unsafe { PrimitiveArray::new_unchecked(dtype, values, validity) }
}
pub fn int32_to_time32s(from: &PrimitiveArray<i32>) -> PrimitiveArray<i32> {
unsafe {
primitive_map_is_valid(
from,
|v| (0..SECONDS_IN_DAY as i32).contains(&v),
ArrowDataType::Time32(TimeUnit::Second),
)
}
}
pub fn int32_to_time32ms(from: &PrimitiveArray<i32>) -> PrimitiveArray<i32> {
unsafe {
primitive_map_is_valid(
from,
|v| (0..MILLISECONDS_IN_DAY as i32).contains(&v),
ArrowDataType::Time32(TimeUnit::Millisecond),
)
}
}
pub fn int64_to_time64us(from: &PrimitiveArray<i64>) -> PrimitiveArray<i64> {
unsafe {
primitive_map_is_valid(
from,
|v| (0..MICROSECONDS_IN_DAY).contains(&v),
ArrowDataType::Time32(TimeUnit::Microsecond),
)
}
}
pub fn int64_to_time64ns(from: &PrimitiveArray<i64>) -> PrimitiveArray<i64> {
unsafe {
primitive_map_is_valid(
from,
|v| (0..NANOSECONDS_IN_DAY).contains(&v),
ArrowDataType::Time64(TimeUnit::Nanosecond),
)
}
}
pub fn date32_to_date64(from: &PrimitiveArray<i32>) -> PrimitiveArray<i64> {
unary(
from,
|x| x as i64 * MILLISECONDS_IN_DAY,
ArrowDataType::Date64,
)
}
pub fn date64_to_date32(from: &PrimitiveArray<i64>) -> PrimitiveArray<i32> {
unary(
from,
|x| (x / MILLISECONDS_IN_DAY) as i32,
ArrowDataType::Date32,
)
}
pub fn time32s_to_time32ms(from: &PrimitiveArray<i32>) -> PrimitiveArray<i32> {
fallible_unary(
from,
|x| x.wrapping_mul(1000),
|x| x.checked_mul(1000).is_none(),
ArrowDataType::Time32(TimeUnit::Millisecond),
)
}
pub fn time32ms_to_time32s(from: &PrimitiveArray<i32>) -> PrimitiveArray<i32> {
unary(from, |x| x / 1000, ArrowDataType::Time32(TimeUnit::Second))
}
pub fn time64us_to_time64ns(from: &PrimitiveArray<i64>) -> PrimitiveArray<i64> {
fallible_unary(
from,
|x| x.wrapping_mul(1000),
|x| x.checked_mul(1000).is_none(),
ArrowDataType::Time64(TimeUnit::Nanosecond),
)
}
pub fn time64ns_to_time64us(from: &PrimitiveArray<i64>) -> PrimitiveArray<i64> {
unary(
from,
|x| x / 1000,
ArrowDataType::Time64(TimeUnit::Microsecond),
)
}
pub fn timestamp_to_date64(from: &PrimitiveArray<i64>, from_unit: TimeUnit) -> PrimitiveArray<i64> {
let from_size = time_unit_multiple(from_unit);
let to_size = MILLISECONDS;
let to_type = ArrowDataType::Date64;
match to_size.cmp(&from_size) {
std::cmp::Ordering::Less => unary(from, |x| x / (from_size / to_size), to_type),
std::cmp::Ordering::Equal => primitive_to_same_primitive(from, &to_type),
std::cmp::Ordering::Greater => fallible_unary(
from,
|x| x.wrapping_mul(to_size / from_size),
|x| x.checked_mul(to_size / from_size).is_none(),
to_type,
),
}
}
pub fn timestamp_to_date32(from: &PrimitiveArray<i64>, from_unit: TimeUnit) -> PrimitiveArray<i32> {
let from_size = time_unit_multiple(from_unit) * SECONDS_IN_DAY;
unary(from, |x| (x / from_size) as i32, ArrowDataType::Date32)
}
pub fn time32_to_time64(
from: &PrimitiveArray<i32>,
from_unit: TimeUnit,
to_unit: TimeUnit,
) -> PrimitiveArray<i64> {
let from_size = time_unit_multiple(from_unit);
let to_size = time_unit_multiple(to_unit);
let divisor = to_size / from_size;
fallible_unary(
from,
|x| (x as i64).wrapping_mul(divisor),
|x| (x as i64).checked_mul(divisor).is_none(),
ArrowDataType::Time64(to_unit),
)
}
pub fn time64_to_time32(
from: &PrimitiveArray<i64>,
from_unit: TimeUnit,
to_unit: TimeUnit,
) -> PrimitiveArray<i32> {
let from_size = time_unit_multiple(from_unit);
let to_size = time_unit_multiple(to_unit);
let divisor = from_size / to_size;
unary(
from,
|x| (x / divisor) as i32,
ArrowDataType::Time32(to_unit),
)
}
pub fn timestamp_to_timestamp(
from: &PrimitiveArray<i64>,
from_unit: TimeUnit,
to_unit: TimeUnit,
tz: &Option<PlSmallStr>,
) -> PrimitiveArray<i64> {
let from_size = time_unit_multiple(from_unit);
let to_size = time_unit_multiple(to_unit);
let to_type = ArrowDataType::Timestamp(to_unit, tz.clone());
if from_size >= to_size {
unary(from, |x| x / (from_size / to_size), to_type)
} else {
fallible_unary(
from,
|x| x.wrapping_mul(to_size / from_size),
|x| x.checked_mul(to_size / from_size).is_none(),
to_type,
)
}
}
pub(super) fn primitive_to_binview<T: NativeType + SerPrimitive>(
from: &PrimitiveArray<T>,
) -> BinaryViewArray {
let mut mutable = MutableBinaryViewArray::with_capacity(from.len());
let mut scratch = vec![];
for &x in from.values().iter() {
unsafe { scratch.set_len(0) };
T::write(&mut scratch, x);
mutable.push_value_ignore_validity(&scratch)
}
mutable.freeze().with_validity(from.validity().cloned())
}
pub(super) fn primitive_to_binview_dyn<T>(from: &dyn Array) -> BinaryViewArray
where
T: NativeType + SerPrimitive,
{
let from = from.as_any().downcast_ref().unwrap();
primitive_to_binview::<T>(from)
}