use std::hash::Hash;
use arrow::array::BooleanArray;
use arrow::bitmap::MutableBitmap;
use arrow::legacy::utils::CustomIterTools;
use polars_core::prelude::*;
use polars_core::utils::NoNull;
use polars_core::with_match_physical_numeric_polars_type;
use polars_utils::total_ord::{ToTotalOrd, TotalEq, TotalHash};
pub fn is_last_distinct(s: &Series) -> PolarsResult<BooleanChunked> {
if s.is_empty() {
return Ok(BooleanChunked::full_null(s.name().clone(), 0));
} else if s.len() == 1 {
return Ok(BooleanChunked::new(s.name().clone(), &[true]));
}
let s = s.to_physical_repr();
use DataType::*;
let out = match s.dtype() {
Boolean => {
let ca = s.bool().unwrap();
is_last_distinct_boolean(ca)
},
Binary => {
let ca = s.binary().unwrap();
is_last_distinct_bin(ca)
},
String => {
let s = s.cast(&Binary).unwrap();
return is_last_distinct(&s);
},
dt if dt.is_primitive_numeric() => {
with_match_physical_numeric_polars_type!(s.dtype(), |$T| {
let ca: &ChunkedArray<$T> = s.as_ref().as_ref().as_ref();
is_last_distinct_numeric(ca)
})
},
#[cfg(feature = "dtype-struct")]
Struct(_) => return is_last_distinct_struct(&s),
List(inner) => {
polars_ensure!(
!inner.is_nested(),
InvalidOperation: "`is_last_distinct` on list type is only allowed if the inner type is not nested."
);
let ca = s.list().unwrap();
return is_last_distinct_list(ca);
},
dt => polars_bail!(opq = is_last_distinct, dt),
};
Ok(out)
}
fn is_last_distinct_boolean(ca: &BooleanChunked) -> BooleanChunked {
let mut out = MutableBitmap::with_capacity(ca.len());
out.extend_constant(ca.len(), false);
if ca.null_count() == ca.len() {
out.set(ca.len() - 1, true);
}
else {
let mut first_true_found = false;
let mut first_false_found = false;
let mut first_null_found = false;
let mut all_found = false;
let ca = ca.rechunk();
ca.downcast_as_array()
.iter()
.enumerate()
.rev()
.find_map(|(idx, val)| match val {
Some(true) if !first_true_found => {
first_true_found = true;
all_found &= first_true_found;
out.set(idx, true);
if all_found { Some(()) } else { None }
},
Some(false) if !first_false_found => {
first_false_found = true;
all_found &= first_false_found;
out.set(idx, true);
if all_found { Some(()) } else { None }
},
None if !first_null_found => {
first_null_found = true;
all_found &= first_null_found;
out.set(idx, true);
if all_found { Some(()) } else { None }
},
_ => None,
});
}
let arr = BooleanArray::new(ArrowDataType::Boolean, out.into(), None);
BooleanChunked::with_chunk(ca.name().clone(), arr)
}
fn is_last_distinct_bin(ca: &BinaryChunked) -> BooleanChunked {
let tmp = ca.rechunk();
let arr = tmp.downcast_as_array();
let mut unique = PlHashSet::new();
arr.iter()
.rev()
.map(|opt_v| unique.insert(opt_v))
.collect_reversed::<NoNull<BooleanChunked>>()
.into_inner()
.with_name(ca.name().clone())
}
fn is_last_distinct_numeric<T>(ca: &ChunkedArray<T>) -> BooleanChunked
where
T: PolarsNumericType,
T::Native: TotalHash + TotalEq + ToTotalOrd,
<T::Native as ToTotalOrd>::TotalOrdItem: Hash + Eq,
{
let tmp = ca.rechunk();
let arr = tmp.downcast_as_array();
let mut unique = PlHashSet::new();
arr.iter()
.rev()
.map(|opt_v| unique.insert(opt_v.to_total_ord()))
.collect_reversed::<NoNull<BooleanChunked>>()
.into_inner()
.with_name(ca.name().clone())
}
#[cfg(feature = "dtype-struct")]
fn is_last_distinct_struct(s: &Series) -> PolarsResult<BooleanChunked> {
let groups = s.group_tuples(true, false)?;
let last = unsafe { groups.take_group_lasts() };
let mut out = MutableBitmap::with_capacity(s.len());
out.extend_constant(s.len(), false);
for idx in last {
unsafe { out.set_unchecked(idx as usize, true) }
}
let arr = BooleanArray::new(ArrowDataType::Boolean, out.into(), None);
Ok(BooleanChunked::with_chunk(s.name().clone(), arr))
}
fn is_last_distinct_list(ca: &ListChunked) -> PolarsResult<BooleanChunked> {
let groups = ca.group_tuples(true, false)?;
let last = unsafe { groups.take_group_lasts() };
let mut out = MutableBitmap::with_capacity(ca.len());
out.extend_constant(ca.len(), false);
for idx in last {
unsafe { out.set_unchecked(idx as usize, true) }
}
let arr = BooleanArray::new(ArrowDataType::Boolean, out.into(), None);
Ok(BooleanChunked::with_chunk(ca.name().clone(), arr))
}