use num_traits::Bounded;
#[cfg(feature = "dtype-struct")]
use polars_core::chunked_array::ops::row_encode::_get_rows_encoded_ca;
use polars_core::prelude::arity::unary_elementwise_values;
use polars_core::prelude::*;
use polars_core::series::IsSorted;
use polars_core::with_match_physical_numeric_polars_type;
#[cfg(feature = "hash")]
use polars_utils::aliases::PlSeedableRandomStateQuality;
use polars_utils::total_ord::TotalOrd;
use crate::series::ops::SeriesSealed;
pub trait SeriesMethods: SeriesSealed {
fn value_counts(
&self,
sort: bool,
parallel: bool,
name: PlSmallStr,
normalize: bool,
) -> PolarsResult<DataFrame> {
let s = self.as_series();
polars_ensure!(
s.name() != &name,
Duplicate: "using `value_counts` on a column/series named '{}' would lead to duplicate \
column names; change `name` to fix", name,
);
let groups = s.group_tuples(parallel, sort)?;
let values = unsafe { s.agg_first(&groups) }
.with_name(s.name().clone())
.into();
let counts = groups.group_count().with_name(name.clone());
let counts = if normalize {
let len = s.len() as f64;
let counts: Float64Chunked =
unary_elementwise_values(&counts, |count| count as f64 / len);
counts.into_column()
} else {
counts.into_column()
};
let height = counts.len();
let cols = vec![values, counts];
let df = unsafe { DataFrame::new_unchecked(height, cols) };
if sort {
df.sort(
[name],
SortMultipleOptions::default()
.with_order_descending(true)
.with_multithreaded(parallel),
)
} else {
Ok(df)
}
}
#[cfg(feature = "hash")]
fn hash(&self, build_hasher: PlSeedableRandomStateQuality) -> UInt64Chunked {
let s = self.as_series();
let mut h = vec![];
s.0.vec_hash(build_hasher, &mut h).unwrap();
UInt64Chunked::from_vec(s.name().clone(), h)
}
fn ensure_sorted_arg(&self, operation: &str) -> PolarsResult<()> {
polars_ensure!(self.is_sorted(Default::default())?, InvalidOperation: "argument in operation '{}' is not sorted, please sort the 'expr/series/column' first", operation);
Ok(())
}
fn is_sorted(&self, options: SortOptions) -> PolarsResult<bool> {
let s = self.as_series();
let null_count = s.null_count();
if (options.descending
&& (options.nulls_last || null_count == 0)
&& matches!(s.is_sorted_flag(), IsSorted::Descending))
|| (!options.descending
&& (!options.nulls_last || null_count == 0)
&& matches!(s.is_sorted_flag(), IsSorted::Ascending))
{
return Ok(true);
}
#[cfg(feature = "dtype-struct")]
if matches!(s.dtype(), DataType::Struct(_)) {
let encoded = _get_rows_encoded_ca(
PlSmallStr::EMPTY,
&[s.clone().into()],
&[options.descending],
&[options.nulls_last],
false,
)?;
let options = SortOptions {
descending: false,
nulls_last: false,
..options
};
return encoded.into_series().is_sorted(options);
}
let s_len = s.len();
if null_count == s_len {
return Ok(true);
}
if null_count > 0 {
if options.nulls_last {
if s.slice((s_len - null_count) as i64, null_count)
.null_count()
!= null_count
{
return Ok(false);
}
} else if s.slice(0, null_count).null_count() != null_count {
return Ok(false);
}
}
if s.dtype().is_primitive_numeric() {
with_match_physical_numeric_polars_type!(s.dtype(), |$T| {
let ca: &ChunkedArray<$T> = s.as_ref().as_ref().as_ref();
return Ok(is_sorted_ca_num::<$T>(ca, options))
})
}
let non_null_len = s_len - null_count;
if non_null_len <= 1 {
return Ok(true);
}
let offset = (!options.nulls_last as i64) * (null_count as i64);
let non_null = s.slice(offset, non_null_len);
debug_assert_eq!(
non_null.null_count(),
0,
"internal error: `is_sorted` non-null slice contains nulls"
);
#[cfg(feature = "dtype-categorical")]
if matches!(non_null.dtype(), DataType::Categorical(_, _)) {
return is_sorted_categorical_lexical_adjacent(&non_null, options);
}
let phys = non_null.to_physical_repr();
let s_phys = phys.as_ref();
if s_phys.dtype().is_primitive_numeric() {
with_match_physical_numeric_polars_type!(s_phys.dtype(), |$T| {
let ca: &ChunkedArray<$T> = s_phys.as_ref().as_ref().as_ref();
return Ok(is_sorted_ca_num::<$T>(ca, options))
})
}
match s_phys.dtype() {
DataType::Boolean => {
let ca = s_phys.bool()?;
Ok(is_sorted_ca_bool(ca, options.descending))
},
DataType::String => {
let ca = s_phys.str()?;
Ok(is_sorted_adjacent_total_ord(
ca.no_null_iter(),
options.descending,
))
},
DataType::Binary => {
let ca = s_phys.binary()?;
Ok(is_sorted_adjacent_total_ord(
ca.no_null_iter(),
options.descending,
))
},
DataType::BinaryOffset => {
let ca = s_phys.binary_offset()?;
Ok(is_sorted_adjacent_total_ord(
ca.no_null_iter(),
options.descending,
))
},
_ => {
let cmp_len = non_null_len - 1;
let s1 = non_null.slice(0, cmp_len);
let s2 = non_null.slice(1, cmp_len);
let cmp_op = if options.descending {
Series::gt_eq
} else {
Series::lt_eq
};
Ok(cmp_op(&s1, &s2)?.all())
},
}
}
}
fn is_sorted_adjacent_total_ord<T: TotalOrd>(
it: impl Iterator<Item = T>,
descending: bool,
) -> bool {
let mut it = it;
let Some(mut prev) = it.next() else {
return true;
};
if descending {
for v in it {
if !prev.tot_ge(&v) {
return false;
}
prev = v;
}
} else {
for v in it {
if !prev.tot_le(&v) {
return false;
}
prev = v;
}
}
true
}
#[cfg(feature = "dtype-categorical")]
fn is_sorted_categorical_lexical_adjacent(s: &Series, options: SortOptions) -> PolarsResult<bool> {
polars_ensure!(
matches!(s.dtype(), DataType::Categorical(_, _)),
ComputeError: "internal error: expected Categorical in lexical `is_sorted` path",
);
with_match_categorical_physical_type!(s.dtype().cat_physical().unwrap(), |$C| {
let ca = s.cat::<$C>()?;
Ok(is_sorted_adjacent_total_ord(
ca.iter_str().map(|opt| {
opt.expect(
"`iter_str` produced None while categorical null_count reported 0 (`is_sorted`)"
)
}),
options.descending,
))
})
}
fn is_sorted_ca_bool(ca: &BooleanChunked, descending: bool) -> bool {
let len = ca.len();
if len <= 1 {
return true;
}
debug_assert_eq!(
ca.null_count(),
0,
"internal error: `is_sorted_ca_bool` expects a non-null boolean slice"
);
if descending {
let Some(idx) = ca.first_false_idx() else {
return true;
};
!ca.slice(idx as i64, ca.len() - idx).any()
} else {
let Some(idx) = ca.first_true_idx() else {
return true;
};
ca.slice(idx as i64, ca.len() - idx).all()
}
}
fn check_cmp<T: NumericNative, Cmp: Fn(&T, &T) -> bool>(
vals: &[T],
f: Cmp,
previous: &mut T,
) -> bool {
let mut sorted = true;
for c in vals.chunks(1024) {
for v in c {
sorted &= f(previous, v);
*previous = *v;
}
if !sorted {
return false;
}
}
sorted
}
fn is_sorted_ca_num<T: PolarsNumericType>(ca: &ChunkedArray<T>, options: SortOptions) -> bool {
if let Ok(vals) = ca.cont_slice() {
let mut previous = vals[0];
return if options.descending {
check_cmp(vals, |prev, c| prev.tot_ge(c), &mut previous)
} else {
check_cmp(vals, |prev, c| prev.tot_le(c), &mut previous)
};
};
if ca.null_count() == 0 {
let mut previous = if options.descending {
T::Native::max_value()
} else {
T::Native::min_value()
};
for arr in ca.downcast_iter() {
let vals = arr.values();
let sorted = if options.descending {
check_cmp(vals, |prev, c| prev.tot_ge(c), &mut previous)
} else {
check_cmp(vals, |prev, c| prev.tot_le(c), &mut previous)
};
if !sorted {
return false;
}
}
return true;
};
let null_count = ca.null_count();
if options.nulls_last {
let ca = ca.slice(0, ca.len() - null_count);
is_sorted_ca_num(&ca, options)
} else {
let ca = ca.slice(null_count as i64, ca.len() - null_count);
is_sorted_ca_num(&ca, options)
}
}
impl SeriesMethods for Series {}