pub use crate::prelude::ChunkCompare;
use crate::prelude::*;
use arrow::{array::ArrayRef, buffer::Buffer};
use std::mem;
pub(crate) mod aggregate;
pub(crate) mod arithmetic;
mod comparison;
pub(crate) mod iterator;
use arithmetic::{LhsNumOpsDispatch, NumOpsDispatch, NumOpsDispatchSeriesSingleNumber};
use crate::fmt::FmtLargeList;
use arrow::array::ArrayDataRef;
use enum_dispatch::enum_dispatch;
use num::{Num, NumCast};
#[enum_dispatch]
#[derive(Clone)]
pub enum Series {
UInt8(ChunkedArray<UInt8Type>),
UInt16(ChunkedArray<UInt16Type>),
UInt32(ChunkedArray<UInt32Type>),
UInt64(ChunkedArray<UInt64Type>),
Int8(ChunkedArray<Int8Type>),
Int16(ChunkedArray<Int16Type>),
Int32(ChunkedArray<Int32Type>),
Int64(ChunkedArray<Int64Type>),
Float32(ChunkedArray<Float32Type>),
Float64(ChunkedArray<Float64Type>),
Utf8(ChunkedArray<Utf8Type>),
Bool(ChunkedArray<BooleanType>),
Date32(ChunkedArray<Date32Type>),
Date64(ChunkedArray<Date64Type>),
Time32Millisecond(Time32MillisecondChunked),
Time32Second(Time32SecondChunked),
Time64Nanosecond(ChunkedArray<Time64NanosecondType>),
Time64Microsecond(ChunkedArray<Time64MicrosecondType>),
DurationNanosecond(ChunkedArray<DurationNanosecondType>),
DurationMicrosecond(DurationMicrosecondChunked),
DurationMillisecond(DurationMillisecondChunked),
DurationSecond(DurationSecondChunked),
IntervalDayTime(IntervalDayTimeChunked),
IntervalYearMonth(IntervalYearMonthChunked),
TimestampNanosecond(TimestampNanosecondChunked),
TimestampMicrosecond(TimestampMicrosecondChunked),
TimestampMillisecond(TimestampMillisecondChunked),
TimestampSecond(TimestampSecondChunked),
LargeList(LargeListChunked),
}
#[macro_export]
macro_rules! apply_method_all_series {
($self:ident, $method:ident, $($args:expr),*) => {
match $self {
Series::Utf8(a) => a.$method($($args),*),
Series::Bool(a) => a.$method($($args),*),
Series::UInt8(a) => a.$method($($args),*),
Series::UInt16(a) => a.$method($($args),*),
Series::UInt32(a) => a.$method($($args),*),
Series::UInt64(a) => a.$method($($args),*),
Series::Int8(a) => a.$method($($args),*),
Series::Int16(a) => a.$method($($args),*),
Series::Int32(a) => a.$method($($args),*),
Series::Int64(a) => a.$method($($args),*),
Series::Float32(a) => a.$method($($args),*),
Series::Float64(a) => a.$method($($args),*),
Series::Date32(a) => a.$method($($args),*),
Series::Date64(a) => a.$method($($args),*),
Series::Time32Millisecond(a) => a.$method($($args),*),
Series::Time32Second(a) => a.$method($($args),*),
Series::Time64Nanosecond(a) => a.$method($($args),*),
Series::Time64Microsecond(a) => a.$method($($args),*),
Series::DurationNanosecond(a) => a.$method($($args),*),
Series::DurationMicrosecond(a) => a.$method($($args),*),
Series::DurationMillisecond(a) => a.$method($($args),*),
Series::DurationSecond(a) => a.$method($($args),*),
Series::TimestampNanosecond(a) => a.$method($($args),*),
Series::TimestampMicrosecond(a) => a.$method($($args),*),
Series::TimestampMillisecond(a) => a.$method($($args),*),
Series::TimestampSecond(a) => a.$method($($args),*),
Series::IntervalDayTime(a) => a.$method($($args),*),
Series::IntervalYearMonth(a) => a.$method($($args),*),
Series::LargeList(a) => a.$method($($args),*),
}
}
}
#[macro_export]
macro_rules! apply_method_numeric_series {
($self:ident, $method:ident, $($args:expr),*) => {
match $self {
Series::UInt8(a) => a.$method($($args),*),
Series::UInt16(a) => a.$method($($args),*),
Series::UInt32(a) => a.$method($($args),*),
Series::UInt64(a) => a.$method($($args),*),
Series::Int8(a) => a.$method($($args),*),
Series::Int16(a) => a.$method($($args),*),
Series::Int32(a) => a.$method($($args),*),
Series::Int64(a) => a.$method($($args),*),
Series::Float32(a) => a.$method($($args),*),
Series::Float64(a) => a.$method($($args),*),
Series::Date32(a) => a.$method($($args),*),
Series::Date64(a) => a.$method($($args),*),
Series::Time32Millisecond(a) => a.$method($($args),*),
Series::Time32Second(a) => a.$method($($args),*),
Series::Time64Nanosecond(a) => a.$method($($args),*),
Series::Time64Microsecond(a) => a.$method($($args),*),
Series::DurationNanosecond(a) => a.$method($($args),*),
Series::DurationMicrosecond(a) => a.$method($($args),*),
Series::DurationMillisecond(a) => a.$method($($args),*),
Series::DurationSecond(a) => a.$method($($args),*),
Series::TimestampNanosecond(a) => a.$method($($args),*),
Series::TimestampMicrosecond(a) => a.$method($($args),*),
Series::TimestampMillisecond(a) => a.$method($($args),*),
Series::TimestampSecond(a) => a.$method($($args),*),
Series::IntervalDayTime(a) => a.$method($($args),*),
Series::IntervalYearMonth(a) => a.$method($($args),*),
_ => unimplemented!(),
}
}
}
#[macro_export]
macro_rules! apply_method_numeric_series_and_return {
($self:ident, $method:ident, [$($args:expr),*], $($opt_question_mark:tt)*) => {
match $self {
Series::UInt8(a) => Series::UInt8(a.$method($($args),*)$($opt_question_mark)*),
Series::UInt16(a) => Series::UInt16(a.$method($($args),*)$($opt_question_mark)*),
Series::UInt32(a) => Series::UInt32(a.$method($($args),*)$($opt_question_mark)*),
Series::UInt64(a) => Series::UInt64(a.$method($($args),*)$($opt_question_mark)*),
Series::Int8(a) => Series::Int8(a.$method($($args),*)$($opt_question_mark)*),
Series::Int16(a) => Series::Int16(a.$method($($args),*)$($opt_question_mark)*),
Series::Int32(a) => Series::Int32(a.$method($($args),*)$($opt_question_mark)*),
Series::Int64(a) => Series::Int64(a.$method($($args),*)$($opt_question_mark)*),
Series::Float32(a) => Series::Float32(a.$method($($args),*)$($opt_question_mark)*),
Series::Float64(a) => Series::Float64(a.$method($($args),*)$($opt_question_mark)*),
Series::Date32(a) => Series::Date32(a.$method($($args),*)$($opt_question_mark)*),
Series::Date64(a) => Series::Date64(a.$method($($args),*)$($opt_question_mark)*),
Series::Time32Millisecond(a) => Series::Time32Millisecond(a.$method($($args),*)$($opt_question_mark)*),
Series::Time32Second(a) => Series::Time32Second(a.$method($($args),*)$($opt_question_mark)*),
Series::Time64Nanosecond(a) => Series::Time64Nanosecond(a.$method($($args),*)$($opt_question_mark)*),
Series::Time64Microsecond(a) => Series::Time64Microsecond(a.$method($($args),*)$($opt_question_mark)*),
Series::DurationNanosecond(a) => Series::DurationNanosecond(a.$method($($args),*)$($opt_question_mark)*),
Series::DurationMicrosecond(a) => Series::DurationMicrosecond(a.$method($($args),*)$($opt_question_mark)*),
Series::DurationMillisecond(a) => Series::DurationMillisecond(a.$method($($args),*)$($opt_question_mark)*),
Series::DurationSecond(a) => Series::DurationSecond(a.$method($($args),*)$($opt_question_mark)*),
Series::TimestampNanosecond(a) => Series::TimestampNanosecond(a.$method($($args),*)$($opt_question_mark)*),
Series::TimestampMicrosecond(a) => Series::TimestampMicrosecond(a.$method($($args),*)$($opt_question_mark)*),
Series::TimestampMillisecond(a) => Series::TimestampMillisecond(a.$method($($args),*)$($opt_question_mark)*),
Series::TimestampSecond(a) => Series::TimestampSecond(a.$method($($args),*)$($opt_question_mark)*),
Series::IntervalDayTime(a) => Series::IntervalDayTime(a.$method($($args),*)$($opt_question_mark)*),
Series::IntervalYearMonth(a) => Series::IntervalYearMonth(a.$method($($args),*)$($opt_question_mark)*),
_ => unimplemented!()
}
}
}
macro_rules! apply_method_all_series_and_return {
($self:ident, $method:ident, [$($args:expr),*], $($opt_question_mark:tt)*) => {
match $self {
Series::UInt8(a) => Series::UInt8(a.$method($($args),*)$($opt_question_mark)*),
Series::UInt16(a) => Series::UInt16(a.$method($($args),*)$($opt_question_mark)*),
Series::UInt32(a) => Series::UInt32(a.$method($($args),*)$($opt_question_mark)*),
Series::UInt64(a) => Series::UInt64(a.$method($($args),*)$($opt_question_mark)*),
Series::Int8(a) => Series::Int8(a.$method($($args),*)$($opt_question_mark)*),
Series::Int16(a) => Series::Int16(a.$method($($args),*)$($opt_question_mark)*),
Series::Int32(a) => Series::Int32(a.$method($($args),*)$($opt_question_mark)*),
Series::Int64(a) => Series::Int64(a.$method($($args),*)$($opt_question_mark)*),
Series::Float32(a) => Series::Float32(a.$method($($args),*)$($opt_question_mark)*),
Series::Float64(a) => Series::Float64(a.$method($($args),*)$($opt_question_mark)*),
Series::Utf8(a) => Series::Utf8(a.$method($($args),*)$($opt_question_mark)*),
Series::Bool(a) => Series::Bool(a.$method($($args),*)$($opt_question_mark)*),
Series::Date32(a) => Series::Date32(a.$method($($args),*)$($opt_question_mark)*),
Series::Date64(a) => Series::Date64(a.$method($($args),*)$($opt_question_mark)*),
Series::Time32Millisecond(a) => Series::Time32Millisecond(a.$method($($args),*)$($opt_question_mark)*),
Series::Time32Second(a) => Series::Time32Second(a.$method($($args),*)$($opt_question_mark)*),
Series::Time64Nanosecond(a) => Series::Time64Nanosecond(a.$method($($args),*)$($opt_question_mark)*),
Series::Time64Microsecond(a) => Series::Time64Microsecond(a.$method($($args),*)$($opt_question_mark)*),
Series::DurationNanosecond(a) => Series::DurationNanosecond(a.$method($($args),*)$($opt_question_mark)*),
Series::DurationMicrosecond(a) => Series::DurationMicrosecond(a.$method($($args),*)$($opt_question_mark)*),
Series::DurationMillisecond(a) => Series::DurationMillisecond(a.$method($($args),*)$($opt_question_mark)*),
Series::DurationSecond(a) => Series::DurationSecond(a.$method($($args),*)$($opt_question_mark)*),
Series::TimestampNanosecond(a) => Series::TimestampNanosecond(a.$method($($args),*)$($opt_question_mark)*),
Series::TimestampMicrosecond(a) => Series::TimestampMicrosecond(a.$method($($args),*)$($opt_question_mark)*),
Series::TimestampMillisecond(a) => Series::TimestampMillisecond(a.$method($($args),*)$($opt_question_mark)*),
Series::TimestampSecond(a) => Series::TimestampSecond(a.$method($($args),*)$($opt_question_mark)*),
Series::IntervalDayTime(a) => Series::IntervalDayTime(a.$method($($args),*)$($opt_question_mark)*),
Series::IntervalYearMonth(a) => Series::IntervalYearMonth(a.$method($($args),*)$($opt_question_mark)*),
Series::LargeList(a) => Series::LargeList(a.$method($($args),*)$($opt_question_mark)*),
}
}
}
macro_rules! unpack_series {
($self:ident, $variant:ident) => {
if let Series::$variant(ca) = $self {
Ok(ca)
} else {
Err(PolarsError::DataTypeMisMatch)
}
};
}
impl Series {
pub fn array_data(&self) -> Vec<ArrayDataRef> {
apply_method_all_series!(self, array_data,)
}
pub fn from_chunked_array<T: PolarsDataType>(ca: ChunkedArray<T>) -> Self {
pack_ca_to_series(ca)
}
pub fn chunk_lengths(&self) -> &Vec<usize> {
apply_method_all_series!(self, chunk_id,)
}
pub fn name(&self) -> &str {
apply_method_all_series!(self, name,)
}
pub fn rename(&mut self, name: &str) -> &mut Self {
apply_method_all_series!(self, rename, name);
self
}
pub fn field(&self) -> &Field {
apply_method_all_series!(self, ref_field,)
}
pub fn dtype(&self) -> &ArrowDataType {
self.field().data_type()
}
pub fn chunks(&self) -> &Vec<ArrayRef> {
apply_method_all_series!(self, chunks,)
}
pub fn n_chunks(&self) -> usize {
self.chunks().len()
}
pub fn i8(&self) -> Result<&Int8Chunked> {
unpack_series!(self, Int8)
}
pub fn i16(&self) -> Result<&Int16Chunked> {
unpack_series!(self, Int16)
}
pub fn i32(&self) -> Result<&Int32Chunked> {
unpack_series!(self, Int32)
}
pub fn i64(&self) -> Result<&Int64Chunked> {
unpack_series!(self, Int64)
}
pub fn f32(&self) -> Result<&Float32Chunked> {
unpack_series!(self, Float32)
}
pub fn f64(&self) -> Result<&Float64Chunked> {
unpack_series!(self, Float64)
}
pub fn u8(&self) -> Result<&UInt8Chunked> {
unpack_series!(self, UInt8)
}
pub fn u16(&self) -> Result<&UInt16Chunked> {
unpack_series!(self, UInt16)
}
pub fn u32(&self) -> Result<&UInt32Chunked> {
unpack_series!(self, UInt32)
}
pub fn u64(&self) -> Result<&UInt64Chunked> {
unpack_series!(self, UInt64)
}
pub fn bool(&self) -> Result<&BooleanChunked> {
unpack_series!(self, Bool)
}
pub fn utf8(&self) -> Result<&Utf8Chunked> {
unpack_series!(self, Utf8)
}
pub fn date32(&self) -> Result<&Date32Chunked> {
unpack_series!(self, Date32)
}
pub fn date64(&self) -> Result<&Date64Chunked> {
unpack_series!(self, Date64)
}
pub fn time32_millisecond(&self) -> Result<&Time32MillisecondChunked> {
unpack_series!(self, Time32Millisecond)
}
pub fn time32_second(&self) -> Result<&Time32SecondChunked> {
unpack_series!(self, Time32Second)
}
pub fn time64_nanosecond(&self) -> Result<&Time64NanosecondChunked> {
unpack_series!(self, Time64Nanosecond)
}
pub fn time64_microsecond(&self) -> Result<&Time64MicrosecondChunked> {
unpack_series!(self, Time64Microsecond)
}
pub fn duration_nanosecond(&self) -> Result<&DurationNanosecondChunked> {
unpack_series!(self, DurationNanosecond)
}
pub fn duration_microsecond(&self) -> Result<&DurationMicrosecondChunked> {
unpack_series!(self, DurationMicrosecond)
}
pub fn duration_millisecond(&self) -> Result<&DurationMillisecondChunked> {
unpack_series!(self, DurationMillisecond)
}
pub fn duration_second(&self) -> Result<&DurationSecondChunked> {
unpack_series!(self, DurationSecond)
}
pub fn timestamp_nanosecond(&self) -> Result<&TimestampNanosecondChunked> {
unpack_series!(self, TimestampNanosecond)
}
pub fn timestamp_microsecond(&self) -> Result<&TimestampMicrosecondChunked> {
unpack_series!(self, TimestampMicrosecond)
}
pub fn timestamp_millisecond(&self) -> Result<&TimestampMillisecondChunked> {
unpack_series!(self, TimestampMillisecond)
}
pub fn timestamp_second(&self) -> Result<&TimestampSecondChunked> {
unpack_series!(self, TimestampSecond)
}
pub fn interval_daytime(&self) -> Result<&IntervalDayTimeChunked> {
unpack_series!(self, IntervalDayTime)
}
pub fn interval_year_month(&self) -> Result<&IntervalYearMonthChunked> {
unpack_series!(self, IntervalYearMonth)
}
pub fn large_list(&self) -> Result<&LargeListChunked> {
unpack_series!(self, LargeList)
}
pub fn append_array(&mut self, other: ArrayRef) -> Result<&mut Self> {
apply_method_all_series!(self, append_array, other)?;
Ok(self)
}
pub fn limit(&self, num_elements: usize) -> Result<Self> {
Ok(apply_method_all_series_and_return!(self, limit, [num_elements], ?))
}
pub fn slice(&self, offset: usize, length: usize) -> Result<Self> {
Ok(apply_method_all_series_and_return!(self, slice, [offset, length], ?))
}
pub fn append(&mut self, other: &Self) -> Result<&mut Self> {
if self.dtype() == other.dtype() {
apply_method_all_series!(self, append, other.as_ref());
Ok(self)
} else {
Err(PolarsError::DataTypeMisMatch)
}
}
pub fn filter<T: AsRef<BooleanChunked>>(&self, filter: T) -> Result<Self> {
Ok(apply_method_all_series_and_return!(self, filter, [filter.as_ref()], ?))
}
pub fn take_iter(
&self,
iter: impl Iterator<Item = usize>,
capacity: Option<usize>,
) -> Result<Self> {
Ok(apply_method_all_series_and_return!(self, take, [iter, capacity], ?))
}
pub unsafe fn take_iter_unchecked(
&self,
iter: impl Iterator<Item = usize>,
capacity: Option<usize>,
) -> Self {
apply_method_all_series_and_return!(self, take_unchecked, [iter, capacity],)
}
pub unsafe fn take_opt_iter_unchecked(
&self,
iter: impl Iterator<Item = Option<usize>>,
capacity: Option<usize>,
) -> Self {
apply_method_all_series_and_return!(self, take_opt_unchecked, [iter, capacity],)
}
pub fn take_opt_iter(
&self,
iter: impl Iterator<Item = Option<usize>>,
capacity: Option<usize>,
) -> Result<Self> {
Ok(apply_method_all_series_and_return!(self, take_opt, [iter, capacity], ?))
}
pub fn take<T: AsTakeIndex>(&self, indices: &T) -> Result<Self> {
let mut iter = indices.as_take_iter();
let capacity = indices.take_index_len();
self.take_iter(&mut iter, Some(capacity))
}
pub fn len(&self) -> usize {
apply_method_all_series!(self, len,)
}
pub fn rechunk(&self, chunk_lengths: Option<&[usize]>) -> Result<Self> {
Ok(apply_method_all_series_and_return!(self, rechunk, [chunk_lengths], ?))
}
pub fn head(&self, length: Option<usize>) -> Self {
apply_method_all_series_and_return!(self, head, [length],)
}
pub fn tail(&self, length: Option<usize>) -> Self {
apply_method_all_series_and_return!(self, tail, [length],)
}
pub fn cast<N>(&self) -> Result<Self>
where
N: PolarsDataType,
{
let s = match self {
Series::Bool(arr) => pack_ca_to_series(arr.cast::<N>()?),
Series::Utf8(arr) => pack_ca_to_series(arr.cast::<N>()?),
Series::UInt8(arr) => pack_ca_to_series(arr.cast::<N>()?),
Series::UInt16(arr) => pack_ca_to_series(arr.cast::<N>()?),
Series::UInt32(arr) => pack_ca_to_series(arr.cast::<N>()?),
Series::UInt64(arr) => pack_ca_to_series(arr.cast::<N>()?),
Series::Int8(arr) => pack_ca_to_series(arr.cast::<N>()?),
Series::Int16(arr) => pack_ca_to_series(arr.cast::<N>()?),
Series::Int32(arr) => pack_ca_to_series(arr.cast::<N>()?),
Series::Int64(arr) => pack_ca_to_series(arr.cast::<N>()?),
Series::Float32(arr) => pack_ca_to_series(arr.cast::<N>()?),
Series::Float64(arr) => pack_ca_to_series(arr.cast::<N>()?),
Series::Date32(arr) => pack_ca_to_series(arr.cast::<N>()?),
Series::Date64(arr) => pack_ca_to_series(arr.cast::<N>()?),
Series::Time32Millisecond(arr) => pack_ca_to_series(arr.cast::<N>()?),
Series::Time32Second(arr) => pack_ca_to_series(arr.cast::<N>()?),
Series::Time64Nanosecond(arr) => pack_ca_to_series(arr.cast::<N>()?),
Series::Time64Microsecond(arr) => pack_ca_to_series(arr.cast::<N>()?),
Series::DurationNanosecond(arr) => pack_ca_to_series(arr.cast::<N>()?),
Series::DurationMicrosecond(arr) => pack_ca_to_series(arr.cast::<N>()?),
Series::DurationMillisecond(arr) => pack_ca_to_series(arr.cast::<N>()?),
Series::DurationSecond(arr) => pack_ca_to_series(arr.cast::<N>()?),
Series::TimestampNanosecond(arr) => pack_ca_to_series(arr.cast::<N>()?),
Series::TimestampMicrosecond(arr) => pack_ca_to_series(arr.cast::<N>()?),
Series::TimestampMillisecond(arr) => pack_ca_to_series(arr.cast::<N>()?),
Series::TimestampSecond(arr) => pack_ca_to_series(arr.cast::<N>()?),
Series::IntervalDayTime(arr) => pack_ca_to_series(arr.cast::<N>()?),
Series::IntervalYearMonth(arr) => pack_ca_to_series(arr.cast::<N>()?),
Series::LargeList(arr) => pack_ca_to_series(arr.cast::<N>()?),
};
Ok(s)
}
pub fn unpack<N>(&self) -> Result<&ChunkedArray<N>>
where
N: PolarsDataType,
{
macro_rules! unpack_if_match {
($ca:ident) => {{
if *$ca.dtype() == N::get_data_type() {
unsafe { Ok(mem::transmute::<_, &ChunkedArray<N>>($ca)) }
} else {
Err(PolarsError::DataTypeMisMatch)
}
}};
}
match self {
Series::Bool(arr) => unpack_if_match!(arr),
Series::Utf8(arr) => unpack_if_match!(arr),
Series::UInt8(arr) => unpack_if_match!(arr),
Series::UInt16(arr) => unpack_if_match!(arr),
Series::UInt32(arr) => unpack_if_match!(arr),
Series::UInt64(arr) => unpack_if_match!(arr),
Series::Int8(arr) => unpack_if_match!(arr),
Series::Int16(arr) => unpack_if_match!(arr),
Series::Int32(arr) => unpack_if_match!(arr),
Series::Int64(arr) => unpack_if_match!(arr),
Series::Float32(arr) => unpack_if_match!(arr),
Series::Float64(arr) => unpack_if_match!(arr),
Series::Date32(arr) => unpack_if_match!(arr),
Series::Date64(arr) => unpack_if_match!(arr),
Series::Time32Millisecond(arr) => unpack_if_match!(arr),
Series::Time32Second(arr) => unpack_if_match!(arr),
Series::Time64Nanosecond(arr) => unpack_if_match!(arr),
Series::Time64Microsecond(arr) => unpack_if_match!(arr),
Series::DurationNanosecond(arr) => unpack_if_match!(arr),
Series::DurationMicrosecond(arr) => unpack_if_match!(arr),
Series::DurationMillisecond(arr) => unpack_if_match!(arr),
Series::DurationSecond(arr) => unpack_if_match!(arr),
Series::TimestampNanosecond(arr) => unpack_if_match!(arr),
Series::TimestampMicrosecond(arr) => unpack_if_match!(arr),
Series::TimestampMillisecond(arr) => unpack_if_match!(arr),
Series::TimestampSecond(arr) => unpack_if_match!(arr),
Series::IntervalDayTime(arr) => unpack_if_match!(arr),
Series::IntervalYearMonth(arr) => unpack_if_match!(arr),
Series::LargeList(arr) => unpack_if_match!(arr),
}
}
pub fn get(&self, index: usize) -> AnyType {
apply_method_all_series!(self, get_any, index)
}
pub fn sort_in_place(&mut self, reverse: bool) -> &mut Self {
apply_method_all_series!(self, sort_in_place, reverse);
self
}
pub fn sort(&self, reverse: bool) -> Self {
apply_method_all_series_and_return!(self, sort, [reverse],)
}
pub fn argsort(&self, reverse: bool) -> Vec<usize> {
apply_method_all_series!(self, argsort, reverse)
}
pub fn null_count(&self) -> usize {
apply_method_all_series!(self, null_count,)
}
pub fn unique(&self) -> Self {
apply_method_all_series_and_return!(self, unique, [],)
}
pub fn arg_unique(&self) -> Vec<usize> {
apply_method_all_series!(self, arg_unique,)
}
pub fn is_null(&self) -> BooleanChunked {
apply_method_all_series!(self, is_null,)
}
pub fn null_bits(&self) -> Vec<(usize, Option<Buffer>)> {
apply_method_all_series!(self, null_bits,)
}
pub fn reverse(&self) -> Self {
apply_method_all_series_and_return!(self, reverse, [],)
}
pub fn as_single_ptr(&mut self) -> usize {
apply_method_numeric_series!(self, as_single_ptr,)
}
pub fn shift(&self, periods: i32) -> Result<Self> {
Ok(apply_method_all_series_and_return!(self, shift, [periods, &None],?))
}
pub fn fill_none(&self, strategy: FillNoneStrategy) -> Result<Self> {
Ok(apply_method_all_series_and_return!(self, fill_none, [strategy],?))
}
pub(crate) fn fmt_largelist(&self) -> String {
apply_method_all_series!(self, fmt_largelist,)
}
}
fn pack_ca_to_series<N: PolarsDataType>(ca: ChunkedArray<N>) -> Series {
unsafe {
match N::get_data_type() {
ArrowDataType::Boolean => Series::Bool(mem::transmute(ca)),
ArrowDataType::Utf8 => Series::Utf8(mem::transmute(ca)),
ArrowDataType::UInt8 => Series::UInt8(mem::transmute(ca)),
ArrowDataType::UInt16 => Series::UInt16(mem::transmute(ca)),
ArrowDataType::UInt32 => Series::UInt32(mem::transmute(ca)),
ArrowDataType::UInt64 => Series::UInt64(mem::transmute(ca)),
ArrowDataType::Int8 => Series::Int8(mem::transmute(ca)),
ArrowDataType::Int16 => Series::Int16(mem::transmute(ca)),
ArrowDataType::Int32 => Series::Int32(mem::transmute(ca)),
ArrowDataType::Int64 => Series::Int64(mem::transmute(ca)),
ArrowDataType::Float32 => Series::Float32(mem::transmute(ca)),
ArrowDataType::Float64 => Series::Float64(mem::transmute(ca)),
ArrowDataType::Date32(DateUnit::Day) => Series::Date32(mem::transmute(ca)),
ArrowDataType::Date64(DateUnit::Millisecond) => Series::Date64(mem::transmute(ca)),
ArrowDataType::Time64(datatypes::TimeUnit::Microsecond) => {
Series::Time64Microsecond(mem::transmute(ca))
}
ArrowDataType::Time64(datatypes::TimeUnit::Nanosecond) => {
Series::Time64Nanosecond(mem::transmute(ca))
}
ArrowDataType::Time32(datatypes::TimeUnit::Millisecond) => {
Series::Time32Millisecond(mem::transmute(ca))
}
ArrowDataType::Time32(datatypes::TimeUnit::Second) => {
Series::Time32Second(mem::transmute(ca))
}
ArrowDataType::Duration(datatypes::TimeUnit::Nanosecond) => {
Series::DurationNanosecond(mem::transmute(ca))
}
ArrowDataType::Duration(datatypes::TimeUnit::Microsecond) => {
Series::DurationMicrosecond(mem::transmute(ca))
}
ArrowDataType::Duration(datatypes::TimeUnit::Millisecond) => {
Series::DurationMillisecond(mem::transmute(ca))
}
ArrowDataType::Duration(datatypes::TimeUnit::Second) => {
Series::DurationSecond(mem::transmute(ca))
}
ArrowDataType::Timestamp(TimeUnit::Nanosecond, _) => {
Series::TimestampNanosecond(mem::transmute(ca))
}
ArrowDataType::Timestamp(TimeUnit::Microsecond, _) => {
Series::TimestampMicrosecond(mem::transmute(ca))
}
ArrowDataType::Timestamp(TimeUnit::Millisecond, _) => {
Series::TimestampMillisecond(mem::transmute(ca))
}
ArrowDataType::Timestamp(TimeUnit::Second, _) => {
Series::TimestampSecond(mem::transmute(ca))
}
ArrowDataType::Interval(IntervalUnit::YearMonth) => {
Series::IntervalYearMonth(mem::transmute(ca))
}
ArrowDataType::Interval(IntervalUnit::DayTime) => {
Series::IntervalDayTime(mem::transmute(ca))
}
ArrowDataType::LargeList(_) => Series::LargeList(mem::transmute(ca)),
_ => panic!("Not implemented: {:?}", N::get_data_type()),
}
}
}
pub trait NamedFrom<T, Phantom: ?Sized> {
fn new(name: &str, _: T) -> Self;
}
macro_rules! impl_named_from {
($type:ty, $series_var:ident, $method:ident) => {
impl<T: AsRef<$type>> NamedFrom<T, $type> for Series {
fn new(name: &str, v: T) -> Self {
Series::$series_var(ChunkedArray::$method(name, v.as_ref()))
}
}
};
}
impl<'a, T: AsRef<[&'a str]>> NamedFrom<T, [&'a str]> for Series {
fn new(name: &str, v: T) -> Self {
Series::Utf8(ChunkedArray::new_from_slice(name, v.as_ref()))
}
}
impl<'a, T: AsRef<[Option<&'a str>]>> NamedFrom<T, [Option<&'a str>]> for Series {
fn new(name: &str, v: T) -> Self {
Series::Utf8(ChunkedArray::new_from_opt_slice(name, v.as_ref()))
}
}
impl_named_from!([String], Utf8, new_from_slice);
impl_named_from!([bool], Bool, new_from_slice);
impl_named_from!([u8], UInt8, new_from_slice);
impl_named_from!([u16], UInt16, new_from_slice);
impl_named_from!([u32], UInt32, new_from_slice);
impl_named_from!([u64], UInt64, new_from_slice);
impl_named_from!([i8], Int8, new_from_slice);
impl_named_from!([i16], Int16, new_from_slice);
impl_named_from!([i32], Int32, new_from_slice);
impl_named_from!([i64], Int64, new_from_slice);
impl_named_from!([f32], Float32, new_from_slice);
impl_named_from!([f64], Float64, new_from_slice);
impl_named_from!([Option<String>], Utf8, new_from_opt_slice);
impl_named_from!([Option<bool>], Bool, new_from_opt_slice);
impl_named_from!([Option<u8>], UInt8, new_from_opt_slice);
impl_named_from!([Option<u16>], UInt16, new_from_opt_slice);
impl_named_from!([Option<u32>], UInt32, new_from_opt_slice);
impl_named_from!([Option<u64>], UInt64, new_from_opt_slice);
impl_named_from!([Option<i8>], Int8, new_from_opt_slice);
impl_named_from!([Option<i16>], Int16, new_from_opt_slice);
impl_named_from!([Option<i32>], Int32, new_from_opt_slice);
impl_named_from!([Option<i64>], Int64, new_from_opt_slice);
impl_named_from!([Option<f32>], Float32, new_from_opt_slice);
impl_named_from!([Option<f64>], Float64, new_from_opt_slice);
macro_rules! impl_as_ref_ca {
($type:ident, $series_var:ident) => {
impl AsRef<ChunkedArray<datatypes::$type>> for Series {
fn as_ref(&self) -> &ChunkedArray<datatypes::$type> {
match self {
Series::$series_var(a) => a,
_ => unimplemented!(),
}
}
}
};
}
impl_as_ref_ca!(UInt8Type, UInt8);
impl_as_ref_ca!(UInt16Type, UInt16);
impl_as_ref_ca!(UInt32Type, UInt32);
impl_as_ref_ca!(UInt64Type, UInt64);
impl_as_ref_ca!(Int8Type, Int8);
impl_as_ref_ca!(Int16Type, Int16);
impl_as_ref_ca!(Int32Type, Int32);
impl_as_ref_ca!(Int64Type, Int64);
impl_as_ref_ca!(Float32Type, Float32);
impl_as_ref_ca!(Float64Type, Float64);
impl_as_ref_ca!(BooleanType, Bool);
impl_as_ref_ca!(Utf8Type, Utf8);
impl_as_ref_ca!(Date32Type, Date32);
impl_as_ref_ca!(Date64Type, Date64);
impl_as_ref_ca!(Time64NanosecondType, Time64Nanosecond);
impl_as_ref_ca!(Time64MicrosecondType, Time64Microsecond);
impl_as_ref_ca!(Time32MillisecondType, Time32Millisecond);
impl_as_ref_ca!(Time32SecondType, Time32Second);
impl_as_ref_ca!(DurationNanosecondType, DurationNanosecond);
impl_as_ref_ca!(DurationMicrosecondType, DurationMicrosecond);
impl_as_ref_ca!(DurationMillisecondType, DurationMillisecond);
impl_as_ref_ca!(DurationSecondType, DurationSecond);
impl_as_ref_ca!(TimestampNanosecondType, TimestampNanosecond);
impl_as_ref_ca!(TimestampMicrosecondType, TimestampMicrosecond);
impl_as_ref_ca!(TimestampMillisecondType, TimestampMillisecond);
impl_as_ref_ca!(TimestampSecondType, TimestampSecond);
impl_as_ref_ca!(IntervalDayTimeType, IntervalDayTime);
impl_as_ref_ca!(IntervalYearMonthType, IntervalYearMonth);
impl_as_ref_ca!(LargeListType, LargeList);
macro_rules! impl_as_mut_ca {
($type:ident, $series_var:ident) => {
impl AsMut<ChunkedArray<datatypes::$type>> for Series {
fn as_mut(&mut self) -> &mut ChunkedArray<datatypes::$type> {
match self {
Series::$series_var(a) => a,
_ => unimplemented!(),
}
}
}
};
}
impl_as_mut_ca!(UInt8Type, UInt8);
impl_as_mut_ca!(UInt16Type, UInt16);
impl_as_mut_ca!(UInt32Type, UInt32);
impl_as_mut_ca!(UInt64Type, UInt64);
impl_as_mut_ca!(Int8Type, Int8);
impl_as_mut_ca!(Int16Type, Int16);
impl_as_mut_ca!(Int32Type, Int32);
impl_as_mut_ca!(Int64Type, Int64);
impl_as_mut_ca!(Float32Type, Float32);
impl_as_mut_ca!(Float64Type, Float64);
impl_as_mut_ca!(BooleanType, Bool);
impl_as_mut_ca!(Utf8Type, Utf8);
impl_as_mut_ca!(Date32Type, Date32);
impl_as_mut_ca!(Date64Type, Date64);
impl_as_mut_ca!(Time64NanosecondType, Time64Nanosecond);
impl_as_mut_ca!(Time64MicrosecondType, Time64Microsecond);
impl_as_mut_ca!(Time32MillisecondType, Time32Millisecond);
impl_as_mut_ca!(Time32SecondType, Time32Second);
impl_as_mut_ca!(DurationNanosecondType, DurationNanosecond);
impl_as_mut_ca!(DurationMicrosecondType, DurationMicrosecond);
impl_as_mut_ca!(DurationMillisecondType, DurationMillisecond);
impl_as_mut_ca!(DurationSecondType, DurationSecond);
impl_as_mut_ca!(TimestampNanosecondType, TimestampNanosecond);
impl_as_mut_ca!(TimestampMicrosecondType, TimestampMicrosecond);
impl_as_mut_ca!(TimestampMillisecondType, TimestampMillisecond);
impl_as_mut_ca!(TimestampSecondType, TimestampSecond);
impl_as_mut_ca!(IntervalDayTimeType, IntervalDayTime);
impl_as_mut_ca!(IntervalYearMonthType, IntervalYearMonth);
impl_as_mut_ca!(LargeListType, LargeList);
macro_rules! from_series_to_ca {
($variant:ident, $ca:ident) => {
impl<'a> From<&'a Series> for &'a $ca {
fn from(s: &'a Series) -> Self {
match s {
Series::$variant(ca) => ca,
_ => unimplemented!(),
}
}
}
};
}
from_series_to_ca!(UInt8, UInt8Chunked);
from_series_to_ca!(UInt16, UInt16Chunked);
from_series_to_ca!(UInt32, UInt32Chunked);
from_series_to_ca!(UInt64, UInt64Chunked);
from_series_to_ca!(Int8, Int8Chunked);
from_series_to_ca!(Int16, Int16Chunked);
from_series_to_ca!(Int32, Int32Chunked);
from_series_to_ca!(Int64, Int64Chunked);
from_series_to_ca!(Float32, Float32Chunked);
from_series_to_ca!(Float64, Float64Chunked);
from_series_to_ca!(Bool, BooleanChunked);
from_series_to_ca!(Utf8, Utf8Chunked);
from_series_to_ca!(Date32, Date32Chunked);
from_series_to_ca!(Date64, Date64Chunked);
from_series_to_ca!(Time32Millisecond, Time32MillisecondChunked);
from_series_to_ca!(Time32Second, Time32SecondChunked);
from_series_to_ca!(Time64Microsecond, Time64MicrosecondChunked);
from_series_to_ca!(Time64Nanosecond, Time64NanosecondChunked);
from_series_to_ca!(DurationMillisecond, DurationMillisecondChunked);
from_series_to_ca!(DurationSecond, DurationSecondChunked);
from_series_to_ca!(DurationMicrosecond, DurationMicrosecondChunked);
from_series_to_ca!(DurationNanosecond, DurationNanosecondChunked);
from_series_to_ca!(TimestampMillisecond, TimestampMillisecondChunked);
from_series_to_ca!(TimestampSecond, TimestampSecondChunked);
from_series_to_ca!(TimestampMicrosecond, TimestampMicrosecondChunked);
from_series_to_ca!(TimestampNanosecond, TimestampNanosecondChunked);
from_series_to_ca!(IntervalDayTime, IntervalDayTimeChunked);
from_series_to_ca!(IntervalYearMonth, IntervalYearMonthChunked);
from_series_to_ca!(LargeList, LargeListChunked);
impl From<(&str, ArrayRef)> for Series {
fn from(name_arr: (&str, ArrayRef)) -> Self {
let (name, arr) = name_arr;
let chunk = vec![arr];
match chunk[0].data_type() {
ArrowDataType::Utf8 => Utf8Chunked::new_from_chunks(name, chunk).into_series(),
ArrowDataType::Boolean => BooleanChunked::new_from_chunks(name, chunk).into_series(),
ArrowDataType::UInt8 => UInt8Chunked::new_from_chunks(name, chunk).into_series(),
ArrowDataType::UInt16 => UInt16Chunked::new_from_chunks(name, chunk).into_series(),
ArrowDataType::UInt32 => UInt32Chunked::new_from_chunks(name, chunk).into_series(),
ArrowDataType::UInt64 => UInt64Chunked::new_from_chunks(name, chunk).into_series(),
ArrowDataType::Int8 => Int8Chunked::new_from_chunks(name, chunk).into_series(),
ArrowDataType::Int16 => Int16Chunked::new_from_chunks(name, chunk).into_series(),
ArrowDataType::Int32 => Int32Chunked::new_from_chunks(name, chunk).into_series(),
ArrowDataType::Int64 => Int64Chunked::new_from_chunks(name, chunk).into_series(),
ArrowDataType::Float32 => Float32Chunked::new_from_chunks(name, chunk).into_series(),
ArrowDataType::Float64 => Float64Chunked::new_from_chunks(name, chunk).into_series(),
ArrowDataType::Date32(DateUnit::Day) => {
Date32Chunked::new_from_chunks(name, chunk).into_series()
}
ArrowDataType::Date64(DateUnit::Millisecond) => {
Date64Chunked::new_from_chunks(name, chunk).into_series()
}
ArrowDataType::Time32(TimeUnit::Millisecond) => {
Time32MillisecondChunked::new_from_chunks(name, chunk).into_series()
}
ArrowDataType::Time32(TimeUnit::Second) => {
Time32SecondChunked::new_from_chunks(name, chunk).into_series()
}
ArrowDataType::Time64(TimeUnit::Nanosecond) => {
Time64NanosecondChunked::new_from_chunks(name, chunk).into_series()
}
ArrowDataType::Time64(TimeUnit::Microsecond) => {
Time64MicrosecondChunked::new_from_chunks(name, chunk).into_series()
}
ArrowDataType::Interval(IntervalUnit::DayTime) => {
IntervalDayTimeChunked::new_from_chunks(name, chunk).into_series()
}
ArrowDataType::Interval(IntervalUnit::YearMonth) => {
IntervalYearMonthChunked::new_from_chunks(name, chunk).into_series()
}
ArrowDataType::Duration(TimeUnit::Nanosecond) => {
DurationNanosecondChunked::new_from_chunks(name, chunk).into_series()
}
ArrowDataType::Duration(TimeUnit::Microsecond) => {
DurationMicrosecondChunked::new_from_chunks(name, chunk).into_series()
}
ArrowDataType::Duration(TimeUnit::Millisecond) => {
DurationMillisecondChunked::new_from_chunks(name, chunk).into_series()
}
ArrowDataType::Duration(TimeUnit::Second) => {
DurationSecondChunked::new_from_chunks(name, chunk).into_series()
}
ArrowDataType::Timestamp(TimeUnit::Nanosecond, _) => {
TimestampNanosecondChunked::new_from_chunks(name, chunk).into_series()
}
ArrowDataType::Timestamp(TimeUnit::Microsecond, _) => {
TimestampMicrosecondChunked::new_from_chunks(name, chunk).into_series()
}
ArrowDataType::Timestamp(TimeUnit::Millisecond, _) => {
TimestampMillisecondChunked::new_from_chunks(name, chunk).into_series()
}
ArrowDataType::Timestamp(TimeUnit::Second, _) => {
TimestampSecondChunked::new_from_chunks(name, chunk).into_series()
}
ArrowDataType::LargeList(_) => {
LargeListChunked::new_from_chunks(name, chunk).into_series()
}
_ => unimplemented!(),
}
}
}
#[cfg(test)]
mod test {
use crate::prelude::*;
#[test]
fn cast() {
let ar = ChunkedArray::<Int32Type>::new_from_slice("a", &[1, 2]);
let s = Series::Int32(ar);
let s2 = s.cast::<Int64Type>().unwrap();
match s2 {
Series::Int64(_) => assert!(true),
_ => assert!(false),
}
let s2 = s.cast::<Float32Type>().unwrap();
match s2 {
Series::Float32(_) => assert!(true),
_ => assert!(false),
}
}
#[test]
fn new_series() {
Series::new("boolean series", &vec![true, false, true]);
Series::new("int series", &[1, 2, 3]);
let ca = Int32Chunked::new_from_slice("a", &[1, 2, 3]);
Series::from(ca);
}
#[test]
fn series_append() {
let mut s1 = Series::new("a", &[1, 2]);
let s2 = Series::new("b", &[3]);
s1.append(&s2).unwrap();
assert_eq!(s1.len(), 3);
let s2 = Series::new("b", &[3.0]);
assert!(s1.append(&s2).is_err())
}
}