use num_traits::ToPrimitive;
use rayon::iter::{IndexedParallelIterator, IntoParallelRefIterator, ParallelIterator};
use rayon::prelude::ParallelSlice;
use rayon::slice::ParallelSliceMut;
use serde::{Deserialize, Serialize};
use {crate::Commute, crate::Partial};
const PARALLEL_THRESHOLD: usize = 10_000;
#[inline]
pub fn median<I>(it: I) -> Option<f64>
where
I: Iterator,
<I as Iterator>::Item: PartialOrd + ToPrimitive + Send,
{
it.collect::<Unsorted<_>>().median()
}
#[inline]
pub fn mad<I>(it: I, precalc_median: Option<f64>) -> Option<f64>
where
I: Iterator,
<I as Iterator>::Item: PartialOrd + ToPrimitive + Send + Sync,
{
it.collect::<Unsorted<_>>().mad(precalc_median)
}
#[inline]
pub fn quartiles<I>(it: I) -> Option<(f64, f64, f64)>
where
I: Iterator,
<I as Iterator>::Item: PartialOrd + ToPrimitive + Send,
{
it.collect::<Unsorted<_>>().quartiles()
}
#[inline]
pub fn mode<T, I>(it: I) -> Option<T>
where
T: PartialOrd + Clone + Send,
I: Iterator<Item = T>,
{
it.collect::<Unsorted<T>>().mode()
}
#[inline]
pub fn modes<T, I>(it: I) -> (Vec<T>, usize, u32)
where
T: PartialOrd + Clone + Send,
I: Iterator<Item = T>,
{
it.collect::<Unsorted<T>>().modes()
}
#[inline]
pub fn antimodes<T, I>(it: I) -> (Vec<T>, usize, u32)
where
T: PartialOrd + Clone + Send,
I: Iterator<Item = T>,
{
let (antimodes_result, antimodes_count, antimodes_occurrences) =
it.collect::<Unsorted<T>>().antimodes();
(antimodes_result, antimodes_count, antimodes_occurrences)
}
#[inline]
pub fn gini<I>(it: I, precalc_sum: Option<f64>) -> Option<f64>
where
I: Iterator,
<I as Iterator>::Item: PartialOrd + ToPrimitive + Send + Sync,
{
it.collect::<Unsorted<_>>().gini(precalc_sum)
}
#[inline]
pub fn kurtosis<I>(it: I, precalc_mean: Option<f64>, precalc_variance: Option<f64>) -> Option<f64>
where
I: Iterator,
<I as Iterator>::Item: PartialOrd + ToPrimitive + Send + Sync,
{
it.collect::<Unsorted<_>>()
.kurtosis(precalc_mean, precalc_variance)
}
#[inline]
pub fn percentile_rank<I, V>(it: I, value: V) -> Option<f64>
where
I: Iterator,
<I as Iterator>::Item: PartialOrd + ToPrimitive + Send + Sync,
V: PartialOrd + ToPrimitive,
{
it.collect::<Unsorted<_>>().percentile_rank(value)
}
#[inline]
pub fn atkinson<I>(
it: I,
epsilon: f64,
precalc_mean: Option<f64>,
precalc_geometric_sum: Option<f64>,
) -> Option<f64>
where
I: Iterator,
<I as Iterator>::Item: PartialOrd + ToPrimitive + Send + Sync,
{
it.collect::<Unsorted<_>>()
.atkinson(epsilon, precalc_mean, precalc_geometric_sum)
}
fn median_on_sorted<T>(data: &[T]) -> Option<f64>
where
T: PartialOrd + ToPrimitive,
{
Some(match data.len() {
0 => {
core::hint::cold_path();
return None;
}
1 => data.first()?.to_f64()?,
len if len.is_multiple_of(2) => {
let idx = len / 2;
let v1 = unsafe { data.get_unchecked(idx - 1) }.to_f64()?;
let v2 = unsafe { data.get_unchecked(idx) }.to_f64()?;
f64::midpoint(v1, v2)
}
len => unsafe { data.get_unchecked(len / 2) }.to_f64()?,
})
}
fn mad_on_sorted<T>(data: &[T], precalc_median: Option<f64>) -> Option<f64>
where
T: Sync + PartialOrd + ToPrimitive,
{
if data.is_empty() {
core::hint::cold_path();
return None;
}
let median_obs = precalc_median.unwrap_or_else(|| median_on_sorted(data).unwrap());
let mut abs_diff_vec: Vec<f64> = if data.len() < PARALLEL_THRESHOLD {
data.iter()
.map(|x| (median_obs - unsafe { x.to_f64().unwrap_unchecked() }).abs())
.collect()
} else {
data.par_iter()
.map(|x| (median_obs - unsafe { x.to_f64().unwrap_unchecked() }).abs())
.collect()
};
let len = abs_diff_vec.len();
let mid = len / 2;
let cmp = |a: &f64, b: &f64| a.total_cmp(b);
abs_diff_vec.select_nth_unstable_by(mid, cmp);
if len.is_multiple_of(2) {
let right = abs_diff_vec[mid];
let left = abs_diff_vec[..mid]
.iter()
.max_by(|a, b| cmp(a, b))
.copied()?;
Some(f64::midpoint(left, right))
} else {
Some(abs_diff_vec[mid])
}
}
fn gini_on_sorted<T>(data: &[Partial<T>], precalc_sum: Option<f64>) -> Option<f64>
where
T: Sync + PartialOrd + ToPrimitive,
{
let len = data.len();
if len == 0 {
core::hint::cold_path();
return None;
}
if len == 1 {
core::hint::cold_path();
return Some(0.0);
}
let first_val = unsafe { data.get_unchecked(0).0.to_f64().unwrap_unchecked() };
if first_val < 0.0 {
core::hint::cold_path();
return None;
}
let (sum, weighted_sum) = if let Some(precalc) = precalc_sum {
if precalc < 0.0 {
core::hint::cold_path();
return None;
}
let weighted_sum = if len < PARALLEL_THRESHOLD {
let mut weighted_sum = 0.0;
for (i, x) in data.iter().enumerate() {
let val = unsafe { x.0.to_f64().unwrap_unchecked() };
weighted_sum = ((i + 1) as f64).mul_add(val, weighted_sum);
}
weighted_sum
} else {
data.par_iter()
.enumerate()
.map(|(i, x)| {
let val = unsafe { x.0.to_f64().unwrap_unchecked() };
(i + 1) as f64 * val
})
.sum()
};
(precalc, weighted_sum)
} else if len < PARALLEL_THRESHOLD {
let mut sum = 0.0;
let mut weighted_sum = 0.0;
for (i, x) in data.iter().enumerate() {
let val = unsafe { x.0.to_f64().unwrap_unchecked() };
sum += val;
weighted_sum = ((i + 1) as f64).mul_add(val, weighted_sum);
}
(sum, weighted_sum)
} else {
data.par_iter()
.enumerate()
.fold(
|| (0.0_f64, 0.0_f64),
|acc, (i, x)| {
let val = unsafe { x.0.to_f64().unwrap_unchecked() };
(acc.0 + val, ((i + 1) as f64).mul_add(val, acc.1))
},
)
.reduce(|| (0.0, 0.0), |a, b| (a.0 + b.0, a.1 + b.1))
};
if sum == 0.0 {
core::hint::cold_path();
return None;
}
let n = len as f64;
let gini = 2.0f64.mul_add(weighted_sum / (n * sum), -(n + 1.0) / n);
Some(gini)
}
fn kurtosis_on_sorted<T>(
data: &[Partial<T>],
precalc_mean: Option<f64>,
precalc_variance: Option<f64>,
) -> Option<f64>
where
T: Sync + PartialOrd + ToPrimitive,
{
let len = data.len();
if len < 4 {
core::hint::cold_path();
return None;
}
let mean = precalc_mean.unwrap_or_else(|| {
let sum: f64 = if len < PARALLEL_THRESHOLD {
data.iter()
.map(|x| unsafe { x.0.to_f64().unwrap_unchecked() })
.sum()
} else {
data.par_iter()
.map(|x| unsafe { x.0.to_f64().unwrap_unchecked() })
.sum()
};
sum / len as f64
});
let (variance_sq, fourth_power_sum) = if let Some(variance) = precalc_variance {
if variance < 0.0 {
core::hint::cold_path();
return None;
}
let variance_sq = variance * variance;
let fourth_power_sum = if len < PARALLEL_THRESHOLD {
let mut sum = 0.0;
for x in data {
let val = unsafe { x.0.to_f64().unwrap_unchecked() };
let diff = val - mean;
let diff_sq = diff * diff;
sum = diff_sq.mul_add(diff_sq, sum);
}
sum
} else {
data.par_iter()
.map(|x| {
let val = unsafe { x.0.to_f64().unwrap_unchecked() };
let diff = val - mean;
let diff_sq = diff * diff;
diff_sq * diff_sq
})
.sum()
};
(variance_sq, fourth_power_sum)
} else {
let (variance_sum, fourth_power_sum) = if len < PARALLEL_THRESHOLD {
let mut variance_sum = 0.0;
let mut fourth_power_sum = 0.0;
for x in data {
let val = unsafe { x.0.to_f64().unwrap_unchecked() };
let diff = val - mean;
let diff_sq = diff * diff;
variance_sum += diff_sq;
fourth_power_sum = diff_sq.mul_add(diff_sq, fourth_power_sum);
}
(variance_sum, fourth_power_sum)
} else {
data.par_iter()
.fold(
|| (0.0_f64, 0.0_f64),
|acc, x| {
let val = unsafe { x.0.to_f64().unwrap_unchecked() };
let diff = val - mean;
let diff_sq = diff * diff;
(acc.0 + diff_sq, diff_sq.mul_add(diff_sq, acc.1))
},
)
.reduce(|| (0.0, 0.0), |a, b| (a.0 + b.0, a.1 + b.1))
};
let variance = variance_sum / len as f64;
if variance == 0.0 {
core::hint::cold_path();
return None;
}
let variance_sq = variance * variance;
(variance_sq, fourth_power_sum)
};
if variance_sq == 0.0 {
core::hint::cold_path();
return None;
}
let n = len as f64;
let denominator = (n - 1.0) * (n - 2.0) * (n - 3.0);
let adjustment = 3.0 * (n - 1.0) * (n - 1.0) / denominator;
let kurtosis =
(n * (n + 1.0) * fourth_power_sum).mul_add(1.0 / (denominator * variance_sq), -adjustment);
Some(kurtosis)
}
fn percentile_rank_on_sorted<T, V>(data: &[Partial<T>], value: &V) -> Option<f64>
where
T: PartialOrd + ToPrimitive,
V: PartialOrd + ToPrimitive,
{
let len = data.len();
if len == 0 {
core::hint::cold_path();
return None;
}
let value_f64 = value.to_f64()?;
let count_leq = data.binary_search_by(|x| {
x.0.to_f64()
.unwrap_or(f64::NAN)
.partial_cmp(&value_f64)
.unwrap_or(std::cmp::Ordering::Less)
});
let count = match count_leq {
Ok(idx) => {
let upper = data[idx + 1..].partition_point(|x| {
x.0.to_f64()
.is_some_and(|v| v.total_cmp(&value_f64).is_le())
});
idx + 1 + upper
}
Err(idx) => idx, };
Some((count as f64 / len as f64) * 100.0)
}
fn atkinson_on_sorted<T>(
data: &[Partial<T>],
epsilon: f64,
precalc_mean: Option<f64>,
precalc_geometric_sum: Option<f64>,
) -> Option<f64>
where
T: Sync + PartialOrd + ToPrimitive,
{
let len = data.len();
if len == 0 {
core::hint::cold_path();
return None;
}
if len == 1 {
core::hint::cold_path();
return Some(0.0);
}
if epsilon < 0.0 {
core::hint::cold_path();
return None;
}
let mean = precalc_mean.unwrap_or_else(|| {
let sum: f64 = if len < PARALLEL_THRESHOLD {
data.iter()
.map(|x| unsafe { x.0.to_f64().unwrap_unchecked() })
.sum()
} else {
data.par_iter()
.map(|x| unsafe { x.0.to_f64().unwrap_unchecked() })
.sum()
};
sum / len as f64
});
if mean == 0.0 {
core::hint::cold_path();
return None;
}
if (epsilon - 1.0).abs() < 1e-10 {
let geometric_sum: f64 = if let Some(precalc) = precalc_geometric_sum {
precalc
} else if len < PARALLEL_THRESHOLD {
let mut sum = 0.0;
for x in data {
let val = unsafe { x.0.to_f64().unwrap_unchecked() };
if val <= 0.0 {
return None;
}
sum += val.ln();
}
sum
} else {
data.par_iter()
.map(|x| {
let val = unsafe { x.0.to_f64().unwrap_unchecked() };
if val <= 0.0 {
return f64::NAN;
}
val.ln()
})
.sum()
};
if geometric_sum.is_nan() {
core::hint::cold_path();
return None;
}
let geometric_mean = (geometric_sum / len as f64).exp();
return Some(1.0 - geometric_mean / mean);
}
let exponent = 1.0 - epsilon;
let sum_powered: f64 = if len < PARALLEL_THRESHOLD {
let mut sum = 0.0;
for x in data {
let val = unsafe { x.0.to_f64().unwrap_unchecked() };
if val < 0.0 {
return None;
}
let ratio = val / mean;
sum += ratio.powf(exponent);
}
sum
} else {
data.par_iter()
.map(|x| {
let val = unsafe { x.0.to_f64().unwrap_unchecked() };
if val < 0.0 {
return f64::NAN;
}
let ratio = val / mean;
ratio.powf(exponent)
})
.sum()
};
if sum_powered.is_nan() || sum_powered <= 0.0 {
core::hint::cold_path();
return None;
}
let atkinson = 1.0 - (sum_powered / len as f64).powf(1.0 / exponent);
Some(atkinson)
}
#[cfg(test)]
fn quickselect<T>(data: &mut [Partial<T>], k: usize) -> Option<&T>
where
T: PartialOrd,
{
if data.is_empty() || k >= data.len() {
core::hint::cold_path();
return None;
}
let mut left = 0;
let mut right = data.len() - 1;
loop {
if left == right {
return Some(&data[left].0);
}
let pivot_idx = median_of_three_pivot(data, left, right);
let pivot_idx = partition(data, left, right, pivot_idx);
match k.cmp(&pivot_idx) {
std::cmp::Ordering::Equal => return Some(&data[pivot_idx].0),
std::cmp::Ordering::Less => right = pivot_idx - 1,
std::cmp::Ordering::Greater => left = pivot_idx + 1,
}
}
}
#[cfg(test)]
fn median_of_three_pivot<T>(data: &[Partial<T>], left: usize, right: usize) -> usize
where
T: PartialOrd,
{
let mid = left + (right - left) / 2;
if data[left] <= data[mid] {
if data[mid] <= data[right] {
mid
} else if data[left] <= data[right] {
right
} else {
left
}
} else if data[left] <= data[right] {
left
} else if data[mid] <= data[right] {
right
} else {
mid
}
}
#[cfg(test)]
fn partition<T>(data: &mut [Partial<T>], left: usize, right: usize, pivot_idx: usize) -> usize
where
T: PartialOrd,
{
data.swap(pivot_idx, right);
let mut store_idx = left;
for i in left..right {
if unsafe { data.get_unchecked(i) <= data.get_unchecked(right) } {
data.swap(i, store_idx);
store_idx += 1;
}
}
data.swap(store_idx, right);
store_idx
}
fn quartiles_on_sorted<T>(data: &[Partial<T>]) -> Option<(f64, f64, f64)>
where
T: PartialOrd + ToPrimitive,
{
let len = data.len();
match len {
0..=2 => {
core::hint::cold_path();
return None;
}
3 => {
return Some(
unsafe {
(
data.get_unchecked(0).0.to_f64()?,
data.get_unchecked(1).0.to_f64()?,
data.get_unchecked(2).0.to_f64()?,
)
},
);
}
_ => {}
}
let k = len / 4;
let remainder = len % 4;
unsafe {
Some(match remainder {
0 => {
let q1 = f64::midpoint(
data.get_unchecked(k - 1).0.to_f64()?,
data.get_unchecked(k).0.to_f64()?,
);
let q2 = f64::midpoint(
data.get_unchecked(2 * k - 1).0.to_f64()?,
data.get_unchecked(2 * k).0.to_f64()?,
);
let q3 = f64::midpoint(
data.get_unchecked(3 * k - 1).0.to_f64()?,
data.get_unchecked(3 * k).0.to_f64()?,
);
(q1, q2, q3)
}
1 => {
let q1 = f64::midpoint(
data.get_unchecked(k - 1).0.to_f64()?,
data.get_unchecked(k).0.to_f64()?,
);
let q2 = data.get_unchecked(2 * k).0.to_f64()?;
let q3 = f64::midpoint(
data.get_unchecked(3 * k).0.to_f64()?,
data.get_unchecked(3 * k + 1).0.to_f64()?,
);
(q1, q2, q3)
}
2 => {
let q1 = data.get_unchecked(k).0.to_f64()?;
let q2 = f64::midpoint(
data.get_unchecked(2 * k).0.to_f64()?,
data.get_unchecked(2 * k + 1).0.to_f64()?,
);
let q3 = data.get_unchecked(3 * k + 1).0.to_f64()?;
(q1, q2, q3)
}
_ => {
let q1 = data.get_unchecked(k).0.to_f64()?;
let q2 = data.get_unchecked(2 * k + 1).0.to_f64()?;
let q3 = data.get_unchecked(3 * k + 2).0.to_f64()?;
(q1, q2, q3)
}
})
}
}
fn quartiles_with_zero_copy_selection<T>(data: &[Partial<T>]) -> Option<(f64, f64, f64)>
where
T: PartialOrd + ToPrimitive,
{
let len = data.len();
match len {
0..=2 => {
core::hint::cold_path();
return None;
}
3 => {
let mut indices: Vec<usize> = (0..3).collect();
let cmp = |a: &usize, b: &usize| {
data[*a]
.partial_cmp(&data[*b])
.unwrap_or(std::cmp::Ordering::Less)
};
indices.sort_unstable_by(cmp);
let min_val = data[indices[0]].0.to_f64()?;
let med_val = data[indices[1]].0.to_f64()?;
let max_val = data[indices[2]].0.to_f64()?;
return Some((min_val, med_val, max_val));
}
_ => {}
}
let k = len / 4;
let remainder = len % 4;
let mut indices: Vec<usize> = (0..len).collect();
let cmp = |a: &usize, b: &usize| {
data[*a]
.partial_cmp(&data[*b])
.unwrap_or(std::cmp::Ordering::Less)
};
let raw_positions: Vec<usize> = match remainder {
0 => vec![k - 1, k, 2 * k - 1, 2 * k, 3 * k - 1, 3 * k],
1 => vec![k - 1, k, 2 * k, 3 * k, 3 * k + 1],
2 => vec![k, 2 * k, 2 * k + 1, 3 * k + 1],
_ => vec![k, 2 * k + 1, 3 * k + 2],
};
let mut unique_positions = raw_positions.clone();
unique_positions.dedup();
let mut start = 0;
for &pos in &unique_positions {
indices[start..].select_nth_unstable_by(pos - start, &cmp);
start = pos + 1;
}
let values: Vec<f64> = raw_positions
.iter()
.map(|&pos| data[indices[pos]].0.to_f64())
.collect::<Option<Vec<_>>>()?;
match remainder {
0 => {
let q1 = f64::midpoint(values[0], values[1]);
let q2 = f64::midpoint(values[2], values[3]);
let q3 = f64::midpoint(values[4], values[5]);
Some((q1, q2, q3))
}
1 => {
let q1 = f64::midpoint(values[0], values[1]);
let q2 = values[2];
let q3 = f64::midpoint(values[3], values[4]);
Some((q1, q2, q3))
}
2 => {
let q1 = values[0];
let q2 = f64::midpoint(values[1], values[2]);
let q3 = values[3];
Some((q1, q2, q3))
}
_ => Some((values[0], values[1], values[2])),
}
}
fn mode_on_sorted<T, I>(it: I) -> Option<T>
where
T: PartialOrd,
I: Iterator<Item = T>,
{
use std::cmp::Ordering;
let (mut mode, mut next) = (None, None);
let (mut mode_count, mut next_count) = (0usize, 0usize);
for x in it {
if mode.as_ref() == Some(&x) {
mode_count += 1;
} else if next.as_ref() == Some(&x) {
next_count += 1;
} else {
next = Some(x);
next_count = 0;
}
match next_count.cmp(&mode_count) {
Ordering::Greater => {
mode = next;
mode_count = next_count;
next = None;
next_count = 0;
}
Ordering::Equal => {
mode = None;
mode_count = 0;
}
Ordering::Less => {}
}
}
mode
}
#[allow(clippy::type_complexity)]
#[inline]
fn modes_and_antimodes_on_sorted_slice<T>(
data: &[Partial<T>],
) -> ((Vec<T>, usize, u32), (Vec<T>, usize, u32))
where
T: PartialOrd + Clone,
{
let size = data.len();
if size == 0 {
core::hint::cold_path();
return ((Vec::new(), 0, 0), (Vec::new(), 0, 0));
}
let sqrt_size = size.isqrt();
let mut runs: Vec<(&T, u32)> = Vec::with_capacity(sqrt_size.clamp(16, 1_000));
let mut current_value = &data[0].0;
let mut current_count = 1;
let mut highest_count = 1;
let mut lowest_count = u32::MAX;
for x in data.iter().skip(1) {
if x.0 == *current_value {
current_count += 1;
highest_count = highest_count.max(current_count);
} else {
runs.push((current_value, current_count));
lowest_count = lowest_count.min(current_count);
current_value = &x.0;
current_count = 1;
}
}
runs.push((current_value, current_count));
lowest_count = lowest_count.min(current_count);
if runs.len() == 1 {
let (val, count) = runs.pop().unwrap();
return ((vec![val.clone()], 1, count), (Vec::new(), 0, 0));
}
if highest_count == 1 {
let antimodes_count = runs.len().min(10);
let total_count = runs.len();
let mut antimodes = Vec::with_capacity(antimodes_count);
for (val, _) in runs.into_iter().take(antimodes_count) {
antimodes.push(val.clone());
}
return ((Vec::new(), 0, 0), (antimodes, total_count, 1));
}
let estimated_modes = (runs.len() / 10).clamp(1, 10);
let estimated_antimodes = 10.min(runs.len());
let mut modes_result = Vec::with_capacity(estimated_modes);
let mut antimodes_result = Vec::with_capacity(estimated_antimodes);
let mut mode_count = 0;
let mut antimodes_count = 0;
let mut antimodes_collected = 0_u32;
for (val, count) in &runs {
if *count == highest_count {
modes_result.push((*val).clone());
mode_count += 1;
}
if *count == lowest_count {
antimodes_count += 1;
if antimodes_collected < 10 {
antimodes_result.push((*val).clone());
antimodes_collected += 1;
}
}
}
(
(modes_result, mode_count, highest_count),
(antimodes_result, antimodes_count, lowest_count),
)
}
#[allow(clippy::unsafe_derive_deserialize)]
#[derive(Clone, Serialize, Deserialize)]
pub struct Unsorted<T> {
#[serde(skip)]
sorted: bool,
data: Vec<Partial<T>>,
}
impl<T: PartialEq> PartialEq for Unsorted<T> {
fn eq(&self, other: &Self) -> bool {
self.data == other.data
}
}
impl<T: PartialEq> Eq for Unsorted<T> where Partial<T>: Eq {}
impl<T: PartialOrd + Send> Unsorted<T> {
#[inline]
#[must_use]
pub fn new() -> Unsorted<T> {
Default::default()
}
#[allow(clippy::inline_always)]
#[inline(always)]
pub fn add(&mut self, v: T) {
self.sorted = false;
self.data.push(Partial(v));
}
#[inline]
#[must_use]
pub const fn len(&self) -> usize {
self.data.len()
}
#[inline]
#[must_use]
pub const fn is_empty(&self) -> bool {
self.data.is_empty()
}
#[inline]
fn sort(&mut self) {
if !self.sorted {
if self.data.len() < PARALLEL_THRESHOLD {
self.data.sort_unstable();
} else {
self.data.par_sort_unstable();
}
self.sorted = true;
}
}
#[inline]
const fn already_sorted(&mut self) {
self.sorted = true;
}
#[inline]
pub fn add_bulk(&mut self, values: Vec<T>) {
self.sorted = false;
self.data.reserve(values.len());
self.data.extend(values.into_iter().map(Partial));
}
#[inline]
pub fn shrink_to_fit(&mut self) {
self.data.shrink_to_fit();
}
#[inline]
#[must_use]
pub fn with_capacity(capacity: usize) -> Self {
Unsorted {
sorted: true,
data: Vec::with_capacity(capacity),
}
}
#[inline]
pub fn push_ascending(&mut self, value: T) {
if let Some(last) = self.data.last() {
debug_assert!(last.0 <= value, "Value must be >= than last element");
}
self.data.push(Partial(value));
}
}
impl<T: PartialOrd + PartialEq + Clone + Send + Sync> Unsorted<T> {
#[inline]
pub fn cardinality(&mut self, sorted: bool, parallel_threshold: usize) -> u64 {
const CHUNK_SIZE: usize = 2048; const DEFAULT_PARALLEL_THRESHOLD: usize = 10_240;
let len = self.data.len();
match len {
0 => return 0,
1 => return 1,
_ => {}
}
if sorted {
self.already_sorted();
} else {
self.sort();
}
let use_parallel = parallel_threshold != 0
&& (parallel_threshold == 1
|| len > parallel_threshold.max(DEFAULT_PARALLEL_THRESHOLD));
if use_parallel {
type ChunkInfo<'a, T> = Vec<(u64, Option<&'a Partial<T>>, Option<&'a Partial<T>>)>;
let chunk_info: ChunkInfo<'_, T> = self
.data
.par_chunks(CHUNK_SIZE)
.map(|chunk| {
let mut count = u64::from(!chunk.is_empty());
for [a, b] in chunk.array_windows::<2>() {
if a != b {
count += 1;
}
}
(count, chunk.first(), chunk.last())
})
.collect();
let mut total = 0;
for (i, &(count, first_opt, _last_opt)) in chunk_info.iter().enumerate() {
total += count;
if i > 0
&& let (Some(prev_last), Some(curr_first)) = (chunk_info[i - 1].2, first_opt)
&& prev_last == curr_first
{
total -= 1; }
}
total
} else {
let mut count = u64::from(!self.data.is_empty());
for [a, b] in self.data.array_windows::<2>() {
if a != b {
count += 1;
}
}
count
}
}
}
impl<T: PartialOrd + Clone + Send> Unsorted<T> {
#[inline]
pub fn mode(&mut self) -> Option<T> {
if self.data.is_empty() {
return None;
}
self.sort();
mode_on_sorted(self.data.iter().map(|p| &p.0)).cloned()
}
#[inline]
fn modes(&mut self) -> (Vec<T>, usize, u32) {
if self.data.is_empty() {
return (Vec::new(), 0, 0);
}
self.sort();
modes_and_antimodes_on_sorted_slice(&self.data).0
}
#[inline]
fn antimodes(&mut self) -> (Vec<T>, usize, u32) {
if self.data.is_empty() {
return (Vec::new(), 0, 0);
}
self.sort();
modes_and_antimodes_on_sorted_slice(&self.data).1
}
#[allow(clippy::type_complexity)]
#[inline]
pub fn modes_antimodes(&mut self) -> ((Vec<T>, usize, u32), (Vec<T>, usize, u32)) {
if self.data.is_empty() {
return ((Vec::new(), 0, 0), (Vec::new(), 0, 0));
}
self.sort();
modes_and_antimodes_on_sorted_slice(&self.data)
}
}
impl Unsorted<Vec<u8>> {
#[allow(clippy::inline_always)]
#[inline(always)]
pub fn add_bytes(&mut self, v: &[u8]) {
self.sorted = false;
self.data.push(Partial(v.to_vec()));
}
}
impl<T: PartialOrd + ToPrimitive + Send> Unsorted<T> {
#[inline]
pub fn median(&mut self) -> Option<f64> {
if self.data.is_empty() {
return None;
}
self.sort();
median_on_sorted(&self.data)
}
}
impl<T: PartialOrd + ToPrimitive + Send + Sync> Unsorted<T> {
#[inline]
pub fn mad(&mut self, existing_median: Option<f64>) -> Option<f64> {
if self.data.is_empty() {
return None;
}
if existing_median.is_none() {
self.sort();
}
mad_on_sorted(&self.data, existing_median)
}
}
impl<T: PartialOrd + ToPrimitive + Send> Unsorted<T> {
#[inline]
pub fn quartiles(&mut self) -> Option<(f64, f64, f64)> {
if self.data.is_empty() {
return None;
}
self.sort();
quartiles_on_sorted(&self.data)
}
}
impl<T: PartialOrd + ToPrimitive + Send + Sync> Unsorted<T> {
#[inline]
pub fn gini(&mut self, precalc_sum: Option<f64>) -> Option<f64> {
if self.data.is_empty() {
return None;
}
self.sort();
gini_on_sorted(&self.data, precalc_sum)
}
#[inline]
pub fn kurtosis(
&mut self,
precalc_mean: Option<f64>,
precalc_variance: Option<f64>,
) -> Option<f64> {
if self.data.is_empty() {
return None;
}
self.sort();
kurtosis_on_sorted(&self.data, precalc_mean, precalc_variance)
}
#[inline]
#[allow(clippy::needless_pass_by_value)]
pub fn percentile_rank<V>(&mut self, value: V) -> Option<f64>
where
V: PartialOrd + ToPrimitive,
{
if self.data.is_empty() {
return None;
}
self.sort();
percentile_rank_on_sorted(&self.data, &value)
}
#[inline]
pub fn atkinson(
&mut self,
epsilon: f64,
precalc_mean: Option<f64>,
precalc_geometric_sum: Option<f64>,
) -> Option<f64> {
if self.data.is_empty() {
return None;
}
self.sort();
atkinson_on_sorted(&self.data, epsilon, precalc_mean, precalc_geometric_sum)
}
}
impl<T: PartialOrd + ToPrimitive + Clone + Send> Unsorted<T> {
#[inline]
pub fn quartiles_with_selection(&mut self) -> Option<(f64, f64, f64)> {
if self.data.is_empty() {
return None;
}
quartiles_with_zero_copy_selection(&self.data)
}
}
impl<T: PartialOrd + ToPrimitive + Send> Unsorted<T> {
#[inline]
#[must_use]
pub fn quartiles_zero_copy(&self) -> Option<(f64, f64, f64)> {
if self.data.is_empty() {
return None;
}
quartiles_with_zero_copy_selection(&self.data)
}
}
impl<T: PartialOrd + Send> Commute for Unsorted<T> {
#[inline]
fn merge(&mut self, mut v: Unsorted<T>) {
if v.is_empty() {
return;
}
self.sorted = false;
self.data.extend(std::mem::take(&mut v.data));
}
}
impl<T: PartialOrd> Default for Unsorted<T> {
#[inline]
fn default() -> Unsorted<T> {
Unsorted {
data: Vec::with_capacity(16),
sorted: true, }
}
}
impl<T: PartialOrd + Send> FromIterator<T> for Unsorted<T> {
#[inline]
fn from_iter<I: IntoIterator<Item = T>>(it: I) -> Unsorted<T> {
let mut v = Unsorted::new();
v.extend(it);
v
}
}
impl<T: PartialOrd> Extend<T> for Unsorted<T> {
#[inline]
fn extend<I: IntoIterator<Item = T>>(&mut self, it: I) {
self.sorted = false;
self.data.extend(it.into_iter().map(Partial));
}
}
fn custom_percentiles_on_sorted<T>(data: &[Partial<T>], percentiles: &[u8]) -> Option<Vec<T>>
where
T: PartialOrd + Clone,
{
let len = data.len();
if len == 0 || percentiles.iter().any(|&p| p > 100) {
return None;
}
let unique_percentiles: Vec<u8> = if percentiles.len() <= 1 {
percentiles.to_vec()
} else {
let is_sorted_unique = percentiles.array_windows::<2>().all(|[a, b]| a < b);
if is_sorted_unique {
percentiles.to_vec()
} else {
let mut seen = [false; 101];
let mut sorted_unique = Vec::with_capacity(percentiles.len().min(101));
for &p in percentiles {
if !seen[p as usize] {
seen[p as usize] = true;
sorted_unique.push(p);
}
}
sorted_unique.sort_unstable();
sorted_unique
}
};
let mut results = Vec::with_capacity(unique_percentiles.len());
unsafe {
for &p in &unique_percentiles {
#[allow(clippy::cast_sign_loss)]
let rank = ((f64::from(p) / 100.0) * len as f64).ceil() as usize;
let idx = rank.saturating_sub(1);
results.push(data.get_unchecked(idx).0.clone());
}
}
Some(results)
}
impl<T: PartialOrd + Clone + Send> Unsorted<T> {
#[inline]
pub fn custom_percentiles(&mut self, percentiles: &[u8]) -> Option<Vec<T>> {
if self.data.is_empty() {
return None;
}
self.sort();
custom_percentiles_on_sorted(&self.data, percentiles)
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_cardinality_empty() {
let mut unsorted: Unsorted<i32> = Unsorted::new();
assert_eq!(unsorted.cardinality(false, 1), 0);
}
#[test]
fn test_cardinality_single_element() {
let mut unsorted = Unsorted::new();
unsorted.add(5);
assert_eq!(unsorted.cardinality(false, 1), 1);
}
#[test]
fn test_cardinality_unique_elements() {
let mut unsorted = Unsorted::new();
unsorted.extend(vec![1, 2, 3, 4, 5]);
assert_eq!(unsorted.cardinality(false, 1), 5);
}
#[test]
fn test_cardinality_duplicate_elements() {
let mut unsorted = Unsorted::new();
unsorted.extend(vec![1, 2, 2, 3, 3, 3, 4, 4, 4, 4]);
assert_eq!(unsorted.cardinality(false, 1), 4);
}
#[test]
fn test_cardinality_all_same() {
let mut unsorted = Unsorted::new();
unsorted.extend(vec![1; 100]);
assert_eq!(unsorted.cardinality(false, 1), 1);
}
#[test]
fn test_cardinality_large_range() {
let mut unsorted = Unsorted::new();
unsorted.extend(0..1_000_000);
assert_eq!(unsorted.cardinality(false, 1), 1_000_000);
}
#[test]
fn test_cardinality_large_range_sequential() {
let mut unsorted = Unsorted::new();
unsorted.extend(0..1_000_000);
assert_eq!(unsorted.cardinality(false, 2_000_000), 1_000_000);
}
#[test]
fn test_cardinality_presorted() {
let mut unsorted = Unsorted::new();
unsorted.extend(vec![1, 2, 3, 4, 5]);
unsorted.sort();
assert_eq!(unsorted.cardinality(true, 1), 5);
}
#[test]
fn test_cardinality_float() {
let mut unsorted = Unsorted::new();
unsorted.extend(vec![1.0, 1.0, 2.0, 3.0, 3.0, 4.0]);
assert_eq!(unsorted.cardinality(false, 1), 4);
}
#[test]
fn test_cardinality_string() {
let mut unsorted = Unsorted::new();
unsorted.extend(vec!["a", "b", "b", "c", "c", "c"]);
assert_eq!(unsorted.cardinality(false, 1), 3);
}
#[test]
fn test_quartiles_selection_vs_sorted() {
let test_cases = vec![
vec![3, 5, 7, 9],
vec![3, 5, 7],
vec![1, 2, 7, 11],
vec![3, 5, 7, 9, 12],
vec![2, 2, 3, 8, 10],
vec![3, 5, 7, 9, 12, 20],
vec![0, 2, 4, 8, 10, 11],
vec![3, 5, 7, 9, 12, 20, 21],
vec![1, 5, 6, 6, 7, 10, 19],
];
for test_case in test_cases {
let mut unsorted1 = Unsorted::new();
let mut unsorted2 = Unsorted::new();
let mut unsorted3 = Unsorted::new();
unsorted1.extend(test_case.clone());
unsorted2.extend(test_case.clone());
unsorted3.extend(test_case.clone());
let result_sorted = unsorted1.quartiles();
let result_selection = unsorted2.quartiles_with_selection();
let result_zero_copy = unsorted3.quartiles_zero_copy();
assert_eq!(
result_sorted, result_selection,
"Selection mismatch for test case: {:?}",
test_case
);
assert_eq!(
result_sorted, result_zero_copy,
"Zero-copy mismatch for test case: {:?}",
test_case
);
}
}
#[test]
fn test_quartiles_with_selection_small() {
let mut unsorted: Unsorted<i32> = Unsorted::new();
assert_eq!(unsorted.quartiles_with_selection(), None);
let mut unsorted = Unsorted::new();
unsorted.extend(vec![1, 2]);
assert_eq!(unsorted.quartiles_with_selection(), None);
let mut unsorted = Unsorted::new();
unsorted.extend(vec![1, 2, 3]);
assert_eq!(unsorted.quartiles_with_selection(), Some((1.0, 2.0, 3.0)));
}
#[test]
fn test_quickselect() {
let data = vec![
Partial(3),
Partial(1),
Partial(4),
Partial(1),
Partial(5),
Partial(9),
Partial(2),
Partial(6),
];
assert_eq!(quickselect(&mut data.clone(), 0), Some(&1));
assert_eq!(quickselect(&mut data.clone(), 3), Some(&3));
assert_eq!(quickselect(&mut data.clone(), 7), Some(&9));
let mut empty: Vec<Partial<i32>> = vec![];
assert_eq!(quickselect(&mut empty, 0), None);
let mut data = vec![Partial(3), Partial(1), Partial(4), Partial(1), Partial(5)];
assert_eq!(quickselect(&mut data, 10), None); }
#[test]
fn median_stream() {
assert_eq!(median(vec![3usize, 5, 7, 9].into_iter()), Some(6.0));
assert_eq!(median(vec![3usize, 5, 7].into_iter()), Some(5.0));
}
#[test]
fn mad_stream() {
assert_eq!(mad(vec![3usize, 5, 7, 9].into_iter(), None), Some(2.0));
assert_eq!(
mad(
vec![
86usize, 60, 95, 39, 49, 12, 56, 82, 92, 24, 33, 28, 46, 34, 100, 39, 100, 38,
50, 61, 39, 88, 5, 13, 64
]
.into_iter(),
None
),
Some(16.0)
);
}
#[test]
fn mad_stream_precalc_median() {
let data = vec![3usize, 5, 7, 9].into_iter();
let median1 = median(data.clone());
assert_eq!(mad(data, median1), Some(2.0));
let data2 = vec![
86usize, 60, 95, 39, 49, 12, 56, 82, 92, 24, 33, 28, 46, 34, 100, 39, 100, 38, 50, 61,
39, 88, 5, 13, 64,
]
.into_iter();
let median2 = median(data2.clone());
assert_eq!(mad(data2, median2), Some(16.0));
}
#[test]
fn mode_stream() {
assert_eq!(mode(vec![3usize, 5, 7, 9].into_iter()), None);
assert_eq!(mode(vec![3usize, 3, 3, 3].into_iter()), Some(3));
assert_eq!(mode(vec![3usize, 3, 3, 4].into_iter()), Some(3));
assert_eq!(mode(vec![4usize, 3, 3, 3].into_iter()), Some(3));
assert_eq!(mode(vec![1usize, 1, 2, 3, 3].into_iter()), None);
}
#[test]
fn median_floats() {
assert_eq!(median(vec![3.0f64, 5.0, 7.0, 9.0].into_iter()), Some(6.0));
assert_eq!(median(vec![3.0f64, 5.0, 7.0].into_iter()), Some(5.0));
}
#[test]
fn mode_floats() {
assert_eq!(mode(vec![3.0f64, 5.0, 7.0, 9.0].into_iter()), None);
assert_eq!(mode(vec![3.0f64, 3.0, 3.0, 3.0].into_iter()), Some(3.0));
assert_eq!(mode(vec![3.0f64, 3.0, 3.0, 4.0].into_iter()), Some(3.0));
assert_eq!(mode(vec![4.0f64, 3.0, 3.0, 3.0].into_iter()), Some(3.0));
assert_eq!(mode(vec![1.0f64, 1.0, 2.0, 3.0, 3.0].into_iter()), None);
}
#[test]
fn modes_stream() {
assert_eq!(modes(vec![3usize, 5, 7, 9].into_iter()), (vec![], 0, 0));
assert_eq!(modes(vec![3usize, 3, 3, 3].into_iter()), (vec![3], 1, 4));
assert_eq!(modes(vec![3usize, 3, 4, 4].into_iter()), (vec![3, 4], 2, 2));
assert_eq!(modes(vec![4usize, 3, 3, 3].into_iter()), (vec![3], 1, 3));
assert_eq!(modes(vec![1usize, 1, 2, 2].into_iter()), (vec![1, 2], 2, 2));
let vec: Vec<u32> = vec![];
assert_eq!(modes(vec.into_iter()), (vec![], 0, 0));
}
#[test]
fn modes_floats() {
assert_eq!(
modes(vec![3_f64, 5.0, 7.0, 9.0].into_iter()),
(vec![], 0, 0)
);
assert_eq!(
modes(vec![3_f64, 3.0, 3.0, 3.0].into_iter()),
(vec![3.0], 1, 4)
);
assert_eq!(
modes(vec![3_f64, 3.0, 4.0, 4.0].into_iter()),
(vec![3.0, 4.0], 2, 2)
);
assert_eq!(
modes(vec![1_f64, 1.0, 2.0, 3.0, 3.0].into_iter()),
(vec![1.0, 3.0], 2, 2)
);
}
#[test]
fn antimodes_stream() {
assert_eq!(
antimodes(vec![3usize, 5, 7, 9].into_iter()),
(vec![3, 5, 7, 9], 4, 1)
);
assert_eq!(
antimodes(vec![1usize, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13].into_iter()),
(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 13, 1)
);
assert_eq!(
antimodes(vec![1usize, 3, 3, 3].into_iter()),
(vec![1], 1, 1)
);
assert_eq!(
antimodes(vec![3usize, 3, 4, 4].into_iter()),
(vec![3, 4], 2, 2)
);
assert_eq!(
antimodes(
vec![
3usize, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13,
14, 14, 15, 15
]
.into_iter()
),
(vec![3, 4, 5, 6, 7, 8, 9, 10, 11, 12], 13, 2)
);
assert_eq!(
antimodes(
vec![
3usize, 3, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 4, 4, 5, 5, 6, 6, 7, 7, 13, 13,
14, 14, 15, 15
]
.into_iter()
),
(vec![3, 4, 5, 6, 7, 8, 9, 10, 11, 12], 13, 2)
);
assert_eq!(
antimodes(vec![3usize, 3, 3, 4].into_iter()),
(vec![4], 1, 1)
);
assert_eq!(
antimodes(vec![4usize, 3, 3, 3].into_iter()),
(vec![4], 1, 1)
);
assert_eq!(
antimodes(vec![1usize, 1, 2, 2].into_iter()),
(vec![1, 2], 2, 2)
);
let vec: Vec<u32> = vec![];
assert_eq!(antimodes(vec.into_iter()), (vec![], 0, 0));
}
#[test]
fn antimodes_floats() {
assert_eq!(
antimodes(vec![3_f64, 5.0, 7.0, 9.0].into_iter()),
(vec![3.0, 5.0, 7.0, 9.0], 4, 1)
);
assert_eq!(
antimodes(vec![3_f64, 3.0, 3.0, 3.0].into_iter()),
(vec![], 0, 0)
);
assert_eq!(
antimodes(vec![3_f64, 3.0, 4.0, 4.0].into_iter()),
(vec![3.0, 4.0], 2, 2)
);
assert_eq!(
antimodes(vec![1_f64, 1.0, 2.0, 3.0, 3.0].into_iter()),
(vec![2.0], 1, 1)
);
}
#[test]
fn test_custom_percentiles() {
let mut unsorted: Unsorted<i32> = Unsorted::new();
unsorted.extend(1..=11);
let result = unsorted.custom_percentiles(&[25, 50, 75]).unwrap();
assert_eq!(result, vec![3, 6, 9]);
let mut str_data = Unsorted::new();
str_data.extend(vec!["a", "b", "c", "d", "e"]);
let result = str_data.custom_percentiles(&[20, 40, 60, 80]).unwrap();
assert_eq!(result, vec!["a", "b", "c", "d"]);
let mut char_data = Unsorted::new();
char_data.extend('a'..='e');
let result = char_data.custom_percentiles(&[25, 50, 75]).unwrap();
assert_eq!(result, vec!['b', 'c', 'd']);
let mut float_data = Unsorted::new();
float_data.extend(vec![1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9]);
let result = float_data
.custom_percentiles(&[10, 30, 50, 70, 90])
.unwrap();
assert_eq!(result, vec![1.1, 3.3, 5.5, 7.7, 9.9]);
let result = float_data.custom_percentiles(&[]).unwrap();
assert_eq!(result, Vec::<f64>::new());
let result = float_data.custom_percentiles(&[50, 50, 50]).unwrap();
assert_eq!(result, vec![5.5]);
let result = float_data.custom_percentiles(&[0, 100]).unwrap();
assert_eq!(result, vec![1.1, 9.9]);
let result = float_data.custom_percentiles(&[75, 25, 50]).unwrap();
assert_eq!(result, vec![3.3, 5.5, 7.7]);
let mut single = Unsorted::new();
single.add(42);
let result = single.custom_percentiles(&[0, 50, 100]).unwrap();
assert_eq!(result, vec![42, 42, 42]);
}
#[test]
fn quartiles_stream() {
assert_eq!(
quartiles(vec![3usize, 5, 7].into_iter()),
Some((3., 5., 7.))
);
assert_eq!(
quartiles(vec![3usize, 5, 7, 9].into_iter()),
Some((4., 6., 8.))
);
assert_eq!(
quartiles(vec![1usize, 2, 7, 11].into_iter()),
Some((1.5, 4.5, 9.))
);
assert_eq!(
quartiles(vec![3usize, 5, 7, 9, 12].into_iter()),
Some((4., 7., 10.5))
);
assert_eq!(
quartiles(vec![2usize, 2, 3, 8, 10].into_iter()),
Some((2., 3., 9.))
);
assert_eq!(
quartiles(vec![3usize, 5, 7, 9, 12, 20].into_iter()),
Some((5., 8., 12.))
);
assert_eq!(
quartiles(vec![0usize, 2, 4, 8, 10, 11].into_iter()),
Some((2., 6., 10.))
);
assert_eq!(
quartiles(vec![3usize, 5, 7, 9, 12, 20, 21].into_iter()),
Some((5., 9., 20.))
);
assert_eq!(
quartiles(vec![1usize, 5, 6, 6, 7, 10, 19].into_iter()),
Some((5., 6., 10.))
);
}
#[test]
fn quartiles_floats() {
assert_eq!(
quartiles(vec![3_f64, 5., 7.].into_iter()),
Some((3., 5., 7.))
);
assert_eq!(
quartiles(vec![3_f64, 5., 7., 9.].into_iter()),
Some((4., 6., 8.))
);
assert_eq!(
quartiles(vec![3_f64, 5., 7., 9., 12.].into_iter()),
Some((4., 7., 10.5))
);
assert_eq!(
quartiles(vec![3_f64, 5., 7., 9., 12., 20.].into_iter()),
Some((5., 8., 12.))
);
assert_eq!(
quartiles(vec![3_f64, 5., 7., 9., 12., 20., 21.].into_iter()),
Some((5., 9., 20.))
);
}
#[test]
fn test_quartiles_zero_copy_small() {
let unsorted: Unsorted<i32> = Unsorted::new();
assert_eq!(unsorted.quartiles_zero_copy(), None);
let mut unsorted = Unsorted::new();
unsorted.extend(vec![1, 2]);
assert_eq!(unsorted.quartiles_zero_copy(), None);
let mut unsorted = Unsorted::new();
unsorted.extend(vec![1, 2, 3]);
assert_eq!(unsorted.quartiles_zero_copy(), Some((1.0, 2.0, 3.0)));
let mut unsorted = Unsorted::new();
unsorted.extend(vec![3, 5, 7, 9]);
assert_eq!(unsorted.quartiles_zero_copy(), Some((4.0, 6.0, 8.0)));
}
#[test]
fn gini_empty() {
let mut unsorted: Unsorted<i32> = Unsorted::new();
assert_eq!(unsorted.gini(None), None);
let empty_vec: Vec<i32> = vec![];
assert_eq!(gini(empty_vec.into_iter(), None), None);
}
#[test]
fn gini_single_element() {
let mut unsorted = Unsorted::new();
unsorted.add(5);
assert_eq!(unsorted.gini(None), Some(0.0));
assert_eq!(gini(vec![5].into_iter(), None), Some(0.0));
}
#[test]
fn gini_perfect_equality() {
let mut unsorted = Unsorted::new();
unsorted.extend(vec![10, 10, 10, 10, 10]);
let result = unsorted.gini(None).unwrap();
assert!((result - 0.0).abs() < 1e-10, "Expected 0.0, got {}", result);
assert!((gini(vec![10, 10, 10, 10, 10].into_iter(), None).unwrap() - 0.0).abs() < 1e-10);
}
#[test]
fn gini_perfect_inequality() {
let mut unsorted = Unsorted::new();
unsorted.extend(vec![0, 0, 0, 0, 100]);
let result = unsorted.gini(None).unwrap();
assert!((result - 0.8).abs() < 1e-10, "Expected 0.8, got {}", result);
}
#[test]
fn gini_stream() {
let result = gini(vec![1usize, 2, 3, 4, 5].into_iter(), None).unwrap();
let expected = (2.0 * 55.0) / (5.0 * 15.0) - 6.0 / 5.0;
assert!(
(result - expected).abs() < 1e-10,
"Expected {}, got {}",
expected,
result
);
}
#[test]
fn gini_floats() {
let mut unsorted = Unsorted::new();
unsorted.extend(vec![1.0, 2.0, 3.0, 4.0, 5.0]);
let result = unsorted.gini(None).unwrap();
let expected = (2.0 * 55.0) / (5.0 * 15.0) - 6.0 / 5.0;
assert!((result - expected).abs() < 1e-10);
assert!(
(gini(vec![1.0f64, 2.0, 3.0, 4.0, 5.0].into_iter(), None).unwrap() - expected).abs()
< 1e-10
);
}
#[test]
fn gini_all_zeros() {
let mut unsorted = Unsorted::new();
unsorted.extend(vec![0, 0, 0, 0]);
assert_eq!(unsorted.gini(None), None);
assert_eq!(gini(vec![0, 0, 0, 0].into_iter(), None), None);
}
#[test]
fn gini_negative_values() {
let mut unsorted = Unsorted::new();
unsorted.extend(vec![-5, -3, -1, 1, 3, 5]);
let result = unsorted.gini(None);
assert_eq!(result, None);
let mut unsorted = Unsorted::new();
unsorted.extend(vec![-2, -1, 0, 1, 2]);
let result = unsorted.gini(None);
assert_eq!(result, None);
let mut unsorted = Unsorted::new();
unsorted.extend(vec![-1, 0, 1, 2, 3]);
let result = unsorted.gini(None);
assert_eq!(result, None);
}
#[test]
fn gini_known_cases() {
let mut unsorted = Unsorted::new();
unsorted.extend(vec![1, 1, 1, 1, 1]);
let result = unsorted.gini(None).unwrap();
assert!((result - 0.0).abs() < 1e-10);
let mut unsorted = Unsorted::new();
unsorted.extend(vec![0, 0, 0, 0, 1]);
let result = unsorted.gini(None).unwrap();
assert!((result - 0.8).abs() < 1e-10);
let mut unsorted = Unsorted::new();
unsorted.extend(vec![1, 2, 3]);
let result = unsorted.gini(None).unwrap();
let expected = (2.0 * 14.0) / (3.0 * 6.0) - 4.0 / 3.0;
assert!((result - expected).abs() < 1e-10);
}
#[test]
fn gini_precalc_sum() {
let mut unsorted = Unsorted::new();
unsorted.extend(vec![1, 2, 3, 4, 5]);
let precalc_sum = Some(15.0);
let result = unsorted.gini(precalc_sum).unwrap();
let expected = (2.0 * 55.0) / (5.0 * 15.0) - 6.0 / 5.0;
assert!((result - expected).abs() < 1e-10);
let mut unsorted2 = Unsorted::new();
unsorted2.extend(vec![1, 2, 3, 4, 5]);
let result2 = unsorted2.gini(None).unwrap();
assert!((result - result2).abs() < 1e-10);
}
#[test]
fn gini_large_dataset() {
let data: Vec<i32> = (1..=1000).collect();
let result = gini(data.iter().copied(), None);
assert!(result.is_some());
let gini_val = result.unwrap();
assert!(gini_val > 0.0 && gini_val < 0.5);
}
#[test]
fn gini_unsorted_vs_sorted() {
let mut unsorted1 = Unsorted::new();
unsorted1.extend(vec![5, 2, 8, 1, 9, 3, 7, 4, 6]);
let result1 = unsorted1.gini(None).unwrap();
let mut unsorted2 = Unsorted::new();
unsorted2.extend(vec![1, 2, 3, 4, 5, 6, 7, 8, 9]);
let result2 = unsorted2.gini(None).unwrap();
assert!((result1 - result2).abs() < 1e-10);
}
#[test]
fn gini_small_values() {
let mut unsorted = Unsorted::new();
unsorted.extend(vec![0.001, 0.002, 0.003, 0.004, 0.005]);
let result = unsorted.gini(None);
assert!(result.is_some());
let expected = (2.0 * 55.0) / (5.0 * 15.0) - 6.0 / 5.0;
assert!((result.unwrap() - expected).abs() < 1e-10);
}
#[test]
fn gini_large_values() {
let mut unsorted = Unsorted::new();
unsorted.extend(vec![1000, 2000, 3000, 4000, 5000]);
let result = unsorted.gini(None);
assert!(result.is_some());
let expected = (2.0 * 55.0) / (5.0 * 15.0) - 6.0 / 5.0;
assert!((result.unwrap() - expected).abs() < 1e-10);
}
#[test]
fn gini_two_elements() {
let mut unsorted = Unsorted::new();
unsorted.extend(vec![1, 2]);
let result = unsorted.gini(None).unwrap();
let expected = (2.0 * 5.0) / (2.0 * 3.0) - 3.0 / 2.0;
assert!((result - expected).abs() < 1e-10);
}
#[test]
fn gini_precalc_sum_zero() {
let mut unsorted = Unsorted::new();
unsorted.extend(vec![1, 2, 3, 4, 5]);
let result = unsorted.gini(Some(0.0));
assert_eq!(result, None);
}
#[test]
fn gini_precalc_sum_negative() {
let mut unsorted = Unsorted::new();
unsorted.extend(vec![-5, -3, -1, 1, 3]);
let result = unsorted.gini(None);
assert_eq!(result, None);
let mut unsorted = Unsorted::new();
unsorted.extend(vec![1, 2, 3]);
let result = unsorted.gini(Some(-5.0));
assert_eq!(result, None);
}
#[test]
fn gini_different_types() {
let mut unsorted_u32 = Unsorted::new();
unsorted_u32.extend(vec![1u32, 2, 3, 4, 5]);
let result_u32 = unsorted_u32.gini(None).unwrap();
let mut unsorted_i64 = Unsorted::new();
unsorted_i64.extend(vec![1i64, 2, 3, 4, 5]);
let result_i64 = unsorted_i64.gini(None).unwrap();
let expected = (2.0 * 55.0) / (5.0 * 15.0) - 6.0 / 5.0;
assert!((result_u32 - expected).abs() < 1e-10);
assert!((result_i64 - expected).abs() < 1e-10);
}
#[test]
fn gini_extreme_inequality() {
let mut unsorted = Unsorted::new();
unsorted.extend(vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 1000]);
let result = unsorted.gini(None).unwrap();
assert!((result - 0.9).abs() < 1e-10);
}
#[test]
fn gini_duplicate_values() {
let mut unsorted = Unsorted::new();
unsorted.extend(vec![1, 1, 1, 5, 5, 5, 10, 10, 10]);
let result = unsorted.gini(None);
assert!(result.is_some());
let gini_val = result.unwrap();
assert!((0.0..=1.0).contains(&gini_val));
}
#[test]
fn kurtosis_empty() {
let mut unsorted: Unsorted<i32> = Unsorted::new();
assert_eq!(unsorted.kurtosis(None, None), None);
let empty_vec: Vec<i32> = vec![];
assert_eq!(kurtosis(empty_vec.into_iter(), None, None), None);
}
#[test]
fn kurtosis_small() {
let mut unsorted = Unsorted::new();
unsorted.extend(vec![1, 2]);
assert_eq!(unsorted.kurtosis(None, None), None);
let mut unsorted = Unsorted::new();
unsorted.extend(vec![1, 2, 3]);
assert_eq!(unsorted.kurtosis(None, None), None);
}
#[test]
fn kurtosis_normal_distribution() {
let mut unsorted = Unsorted::new();
unsorted.extend(vec![1, 2, 3, 4, 5]);
let result = unsorted.kurtosis(None, None);
assert!(result.is_some());
}
#[test]
fn kurtosis_all_same() {
let mut unsorted = Unsorted::new();
unsorted.extend(vec![5, 5, 5, 5]);
assert_eq!(unsorted.kurtosis(None, None), None);
}
#[test]
fn kurtosis_stream() {
let result = kurtosis(vec![1usize, 2, 3, 4, 5].into_iter(), None, None);
assert!(result.is_some());
}
#[test]
fn kurtosis_precalc_mean_variance() {
let mut unsorted = Unsorted::new();
unsorted.extend(vec![1, 2, 3, 4, 5]);
let mean = 3.0f64;
let variance = ((1.0f64 - 3.0).powi(2)
+ (2.0f64 - 3.0).powi(2)
+ (3.0f64 - 3.0).powi(2)
+ (4.0f64 - 3.0).powi(2)
+ (5.0f64 - 3.0).powi(2))
/ 5.0;
let result = unsorted.kurtosis(Some(mean), Some(variance));
assert!(result.is_some());
let mut unsorted2 = Unsorted::new();
unsorted2.extend(vec![1, 2, 3, 4, 5]);
let result2 = unsorted2.kurtosis(None, None);
assert!((result.unwrap() - result2.unwrap()).abs() < 1e-10);
}
#[test]
fn kurtosis_precalc_mean_only() {
let mut unsorted = Unsorted::new();
unsorted.extend(vec![1, 2, 3, 4, 5]);
let mean = 3.0f64;
let result = unsorted.kurtosis(Some(mean), None);
assert!(result.is_some());
let mut unsorted2 = Unsorted::new();
unsorted2.extend(vec![1, 2, 3, 4, 5]);
let result2 = unsorted2.kurtosis(None, None);
assert!((result.unwrap() - result2.unwrap()).abs() < 1e-10);
}
#[test]
fn kurtosis_precalc_variance_only() {
let mut unsorted = Unsorted::new();
unsorted.extend(vec![1, 2, 3, 4, 5]);
let variance = ((1.0f64 - 3.0).powi(2)
+ (2.0f64 - 3.0).powi(2)
+ (3.0f64 - 3.0).powi(2)
+ (4.0f64 - 3.0).powi(2)
+ (5.0f64 - 3.0).powi(2))
/ 5.0;
let result = unsorted.kurtosis(None, Some(variance));
assert!(result.is_some());
let mut unsorted2 = Unsorted::new();
unsorted2.extend(vec![1, 2, 3, 4, 5]);
let result2 = unsorted2.kurtosis(None, None);
assert!((result.unwrap() - result2.unwrap()).abs() < 1e-10);
}
#[test]
fn kurtosis_exact_calculation() {
let mut unsorted = Unsorted::new();
unsorted.extend(vec![1, 2, 3, 4]);
let result = unsorted.kurtosis(None, None).unwrap();
assert!(result.is_finite());
}
#[test]
fn kurtosis_uniform_distribution() {
let mut unsorted = Unsorted::new();
unsorted.extend(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
let result = unsorted.kurtosis(None, None).unwrap();
assert!(result.is_finite());
}
#[test]
fn kurtosis_large_dataset() {
let data: Vec<i32> = (1..=1000).collect();
let result = kurtosis(data.iter().copied(), None, None);
assert!(result.is_some());
let kurt_val = result.unwrap();
assert!(kurt_val.is_finite());
}
#[test]
fn kurtosis_unsorted_vs_sorted() {
let mut unsorted1 = Unsorted::new();
unsorted1.extend(vec![5, 2, 8, 1, 9, 3, 7, 4, 6]);
let result1 = unsorted1.kurtosis(None, None).unwrap();
let mut unsorted2 = Unsorted::new();
unsorted2.extend(vec![1, 2, 3, 4, 5, 6, 7, 8, 9]);
let result2 = unsorted2.kurtosis(None, None).unwrap();
assert!((result1 - result2).abs() < 1e-10);
}
#[test]
fn kurtosis_minimum_size() {
let mut unsorted = Unsorted::new();
unsorted.extend(vec![1, 2, 3, 4]);
let result = unsorted.kurtosis(None, None);
assert!(result.is_some());
assert!(result.unwrap().is_finite());
}
#[test]
fn kurtosis_heavy_tailed() {
let mut unsorted = Unsorted::new();
unsorted.extend(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 100]);
let result = unsorted.kurtosis(None, None).unwrap();
assert!(result.is_finite());
assert!(result > -10.0); }
#[test]
fn kurtosis_light_tailed() {
let mut unsorted = Unsorted::new();
unsorted.extend(vec![10, 11, 12, 13, 14, 15, 16, 17, 18, 19]);
let result = unsorted.kurtosis(None, None).unwrap();
assert!(result.is_finite());
}
#[test]
fn kurtosis_small_variance() {
let mut unsorted = Unsorted::new();
unsorted.extend(vec![10.0, 10.001, 10.002, 10.003, 10.004]);
let result = unsorted.kurtosis(None, None);
assert!(result.is_some());
assert!(result.unwrap().is_finite());
}
#[test]
fn kurtosis_precalc_zero_variance() {
let mut unsorted = Unsorted::new();
unsorted.extend(vec![1, 2, 3, 4, 5]);
let result = unsorted.kurtosis(None, Some(0.0));
assert_eq!(result, None);
}
#[test]
fn kurtosis_precalc_negative_variance() {
let mut unsorted = Unsorted::new();
unsorted.extend(vec![1, 2, 3, 4, 5]);
let result = unsorted.kurtosis(None, Some(-1.0));
let _ = result;
}
#[test]
fn kurtosis_different_types() {
let mut unsorted_u32 = Unsorted::new();
unsorted_u32.extend(vec![1u32, 2, 3, 4, 5]);
let result_u32 = unsorted_u32.kurtosis(None, None).unwrap();
let mut unsorted_i64 = Unsorted::new();
unsorted_i64.extend(vec![1i64, 2, 3, 4, 5]);
let result_i64 = unsorted_i64.kurtosis(None, None).unwrap();
assert!((result_u32 - result_i64).abs() < 1e-10);
}
#[test]
fn kurtosis_floating_point_precision() {
let mut unsorted = Unsorted::new();
unsorted.extend(vec![1.1, 2.2, 3.3, 4.4, 5.5]);
let result = unsorted.kurtosis(None, None);
assert!(result.is_some());
assert!(result.unwrap().is_finite());
}
#[test]
fn kurtosis_negative_values() {
let mut unsorted = Unsorted::new();
unsorted.extend(vec![-5, -3, -1, 1, 3, 5]);
let result = unsorted.kurtosis(None, None);
assert!(result.is_some());
assert!(result.unwrap().is_finite());
}
#[test]
fn kurtosis_mixed_positive_negative() {
let mut unsorted = Unsorted::new();
unsorted.extend(vec![-10, -5, 0, 5, 10]);
let result = unsorted.kurtosis(None, None);
assert!(result.is_some());
assert!(result.unwrap().is_finite());
}
#[test]
fn kurtosis_duplicate_values() {
let mut unsorted = Unsorted::new();
unsorted.extend(vec![1, 1, 2, 2, 3, 3, 4, 4, 5, 5]);
let result = unsorted.kurtosis(None, None);
assert!(result.is_some());
assert!(result.unwrap().is_finite());
}
#[test]
fn kurtosis_precalc_mean_wrong() {
let mut unsorted1 = Unsorted::new();
unsorted1.extend(vec![1, 2, 3, 4, 5]);
let correct_result = unsorted1.kurtosis(None, None).unwrap();
let mut unsorted2 = Unsorted::new();
unsorted2.extend(vec![1, 2, 3, 4, 5]);
let wrong_mean = 10.0; let wrong_result = unsorted2.kurtosis(Some(wrong_mean), None).unwrap();
assert!((correct_result - wrong_result).abs() > 1e-5);
}
#[test]
fn percentile_rank_empty() {
let mut unsorted: Unsorted<i32> = Unsorted::new();
assert_eq!(unsorted.percentile_rank(5), None);
let empty_vec: Vec<i32> = vec![];
assert_eq!(percentile_rank(empty_vec.into_iter(), 5), None);
}
#[test]
fn percentile_rank_basic() {
let mut unsorted = Unsorted::new();
unsorted.extend(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
assert_eq!(unsorted.percentile_rank(0), Some(0.0));
assert_eq!(unsorted.percentile_rank(11), Some(100.0));
let rank = unsorted.percentile_rank(5).unwrap();
assert!((rank - 50.0).abs() < 1.0);
let rank = unsorted.percentile_rank(1).unwrap();
assert!((rank - 10.0).abs() < 1.0);
}
#[test]
fn percentile_rank_duplicates() {
let mut unsorted = Unsorted::new();
unsorted.extend(vec![1, 1, 2, 2, 3, 3, 4, 4, 5, 5]);
let rank = unsorted.percentile_rank(2).unwrap();
assert!((rank - 40.0).abs() < 1.0);
}
#[test]
fn percentile_rank_stream() {
let result = percentile_rank(vec![1usize, 2, 3, 4, 5].into_iter(), 3);
assert_eq!(result, Some(60.0)); }
#[test]
fn percentile_rank_many_ties() {
let mut unsorted = Unsorted::new();
for _ in 0..100 {
unsorted.add(5u32);
}
for _ in 0..100 {
unsorted.add(10u32);
}
let rank = unsorted.percentile_rank(5).unwrap();
assert!((rank - 50.0).abs() < f64::EPSILON);
let mut unsorted2 = Unsorted::new();
for _ in 0..100 {
unsorted2.add(5u32);
}
for _ in 0..100 {
unsorted2.add(10u32);
}
let rank = unsorted2.percentile_rank(10).unwrap();
assert!((rank - 100.0).abs() < f64::EPSILON);
}
#[test]
fn atkinson_empty() {
let mut unsorted: Unsorted<i32> = Unsorted::new();
assert_eq!(unsorted.atkinson(1.0, None, None), None);
let empty_vec: Vec<i32> = vec![];
assert_eq!(atkinson(empty_vec.into_iter(), 1.0, None, None), None);
}
#[test]
fn atkinson_single_element() {
let mut unsorted = Unsorted::new();
unsorted.add(5);
assert_eq!(unsorted.atkinson(1.0, None, None), Some(0.0));
assert_eq!(atkinson(vec![5].into_iter(), 1.0, None, None), Some(0.0));
}
#[test]
fn atkinson_perfect_equality() {
let mut unsorted = Unsorted::new();
unsorted.extend(vec![10, 10, 10, 10, 10]);
let result = unsorted.atkinson(1.0, None, None).unwrap();
assert!((result - 0.0).abs() < 1e-10);
}
#[test]
fn atkinson_epsilon_zero() {
let mut unsorted = Unsorted::new();
unsorted.extend(vec![1, 2, 3, 4, 5]);
let result = unsorted.atkinson(0.0, None, None).unwrap();
assert!((result - 0.0).abs() < 1e-10);
}
#[test]
fn atkinson_epsilon_one() {
let mut unsorted = Unsorted::new();
unsorted.extend(vec![1, 2, 3, 4, 5]);
let result = unsorted.atkinson(1.0, None, None);
assert!(result.is_some());
}
#[test]
fn atkinson_negative_epsilon() {
let mut unsorted = Unsorted::new();
unsorted.extend(vec![1, 2, 3, 4, 5]);
assert_eq!(unsorted.atkinson(-1.0, None, None), None);
}
#[test]
fn atkinson_zero_mean() {
let mut unsorted = Unsorted::new();
unsorted.extend(vec![0, 0, 0, 0]);
assert_eq!(unsorted.atkinson(1.0, None, None), None);
}
#[test]
fn atkinson_stream() {
let result = atkinson(vec![1usize, 2, 3, 4, 5].into_iter(), 1.0, None, None);
assert!(result.is_some());
}
#[test]
fn atkinson_precalc_mean_geometric_sum() {
let mut unsorted = Unsorted::new();
unsorted.extend(vec![1, 2, 3, 4, 5]);
let mean = 3.0f64;
let geometric_sum = 1.0f64.ln() + 2.0f64.ln() + 3.0f64.ln() + 4.0f64.ln() + 5.0f64.ln();
let result = unsorted.atkinson(1.0, Some(mean), Some(geometric_sum));
assert!(result.is_some());
let mut unsorted2 = Unsorted::new();
unsorted2.extend(vec![1, 2, 3, 4, 5]);
let result2 = unsorted2.atkinson(1.0, None, None);
assert!((result.unwrap() - result2.unwrap()).abs() < 1e-10);
}
#[test]
fn atkinson_precalc_mean_only() {
let mut unsorted = Unsorted::new();
unsorted.extend(vec![1, 2, 3, 4, 5]);
let mean = 3.0f64;
let result = unsorted.atkinson(1.0, Some(mean), None);
assert!(result.is_some());
let mut unsorted2 = Unsorted::new();
unsorted2.extend(vec![1, 2, 3, 4, 5]);
let result2 = unsorted2.atkinson(1.0, None, None);
assert!((result.unwrap() - result2.unwrap()).abs() < 1e-10);
}
#[test]
fn atkinson_precalc_geometric_sum_only() {
let mut unsorted = Unsorted::new();
unsorted.extend(vec![1, 2, 3, 4, 5]);
let geometric_sum = 1.0f64.ln() + 2.0f64.ln() + 3.0f64.ln() + 4.0f64.ln() + 5.0f64.ln();
let result = unsorted.atkinson(1.0, None, Some(geometric_sum));
assert!(result.is_some());
let mut unsorted2 = Unsorted::new();
unsorted2.extend(vec![1, 2, 3, 4, 5]);
let result2 = unsorted2.atkinson(1.0, None, None);
assert!((result.unwrap() - result2.unwrap()).abs() < 1e-10);
}
#[test]
fn test_median_with_infinity() {
let mut unsorted = Unsorted::new();
unsorted.extend(vec![1.0f64, 2.0, f64::INFINITY]);
assert_eq!(unsorted.median(), Some(2.0));
}
#[test]
fn test_median_with_neg_infinity() {
let mut unsorted = Unsorted::new();
unsorted.extend(vec![f64::NEG_INFINITY, 1.0f64, 2.0]);
assert_eq!(unsorted.median(), Some(1.0));
}
#[test]
fn test_quartiles_with_infinity() {
let mut unsorted = Unsorted::new();
unsorted.extend(vec![f64::NEG_INFINITY, 1.0, 2.0, 3.0, f64::INFINITY]);
let q = unsorted.quartiles();
assert!(q.is_some());
let (_, q2, _) = q.unwrap();
assert_eq!(q2, 2.0);
}
#[test]
fn test_mode_with_nan() {
let mut unsorted: Unsorted<f64> = Unsorted::new();
unsorted.extend(vec![1.0, f64::NAN, 2.0, 2.0, 3.0]);
let _result = unsorted.mode(); }
#[test]
fn test_gini_with_infinity() {
let mut unsorted = Unsorted::new();
unsorted.extend(vec![1.0f64, 2.0, f64::INFINITY]);
let g = unsorted.gini(None);
assert!(g.unwrap().is_nan());
}
#[test]
fn test_cardinality_with_infinity() {
let mut unsorted = Unsorted::new();
unsorted.extend(vec![1.0f64, f64::INFINITY, f64::NEG_INFINITY, 1.0]);
assert_eq!(unsorted.cardinality(false, 10_000), 3);
}
}
#[cfg(test)]
mod bench {
use super::*;
use std::time::Instant;
#[test]
#[ignore] fn comprehensive_quartiles_benchmark() {
let data_sizes = vec![
1_000, 10_000, 100_000, 500_000, 1_000_000, 2_000_000, 5_000_000, 10_000_000,
];
println!("=== COMPREHENSIVE QUARTILES BENCHMARK ===\n");
for size in data_sizes {
println!("--- Testing with {} elements ---", size);
let test_patterns = vec![
("Random", generate_random_data(size)),
("Reverse Sorted", {
let mut v = Vec::with_capacity(size);
for x in (0..size).rev() {
v.push(x as i32);
}
v
}),
("Already Sorted", {
let mut v = Vec::with_capacity(size);
for x in 0..size {
v.push(x as i32);
}
v
}),
("Many Duplicates", {
let mut v = Vec::with_capacity(size);
let chunk_size = size / 100;
for i in 0..100 {
v.extend(std::iter::repeat_n(i, chunk_size));
}
v.extend(std::iter::repeat_n(0, size - v.len()));
v
}),
];
for (pattern_name, test_data) in test_patterns {
println!("\n Pattern: {}", pattern_name);
let mut unsorted1 = Unsorted::new();
unsorted1.extend(test_data.clone());
let start = Instant::now();
let result_sorted = unsorted1.quartiles();
let sorted_time = start.elapsed();
let mut unsorted2 = Unsorted::new();
unsorted2.extend(test_data.clone());
let start = Instant::now();
let result_selection = unsorted2.quartiles_with_selection();
let selection_time = start.elapsed();
let mut unsorted3 = Unsorted::new();
unsorted3.extend(test_data);
let start = Instant::now();
let result_zero_copy = unsorted3.quartiles_zero_copy();
let zero_copy_time = start.elapsed();
assert_eq!(result_sorted, result_selection);
assert_eq!(result_sorted, result_zero_copy);
let selection_speedup =
sorted_time.as_nanos() as f64 / selection_time.as_nanos() as f64;
let zero_copy_speedup =
sorted_time.as_nanos() as f64 / zero_copy_time.as_nanos() as f64;
println!(" Sorting: {:>12?}", sorted_time);
println!(
" Selection: {:>12?} (speedup: {:.2}x)",
selection_time, selection_speedup
);
println!(
" Zero-copy: {:>12?} (speedup: {:.2}x)",
zero_copy_time, zero_copy_speedup
);
let best_algorithm =
if zero_copy_speedup > 1.0 && zero_copy_speedup >= selection_speedup {
"ZERO-COPY"
} else if selection_speedup > 1.0 {
"SELECTION"
} else {
"SORTING"
};
println!(" Best: {}", best_algorithm);
}
println!(); }
}
fn generate_random_data(size: usize) -> Vec<i32> {
let mut rng = 1234567u64;
let mut vec = Vec::with_capacity(size);
for _ in 0..size {
rng = rng.wrapping_mul(1103515245).wrapping_add(12345);
vec.push((rng >> 16) as i32);
}
vec
}
#[test]
#[ignore] fn find_selection_threshold() {
println!("=== FINDING SELECTION ALGORITHM THRESHOLD ===\n");
let mut found_threshold = None;
let test_sizes = vec![
1_000_000, 2_000_000, 3_000_000, 4_000_000, 5_000_000, 7_500_000, 10_000_000,
15_000_000, 20_000_000, 25_000_000, 30_000_000,
];
for size in test_sizes {
println!("Testing size: {}", size);
let test_data = generate_random_data(size);
let iterations = 3;
let mut sorting_total = 0u128;
let mut selection_total = 0u128;
let mut zero_copy_total = 0u128;
for i in 0..iterations {
println!(" Iteration {}/{}", i + 1, iterations);
let mut unsorted1 = Unsorted::new();
unsorted1.extend(test_data.clone());
let start = Instant::now();
let _result_sorted = unsorted1.quartiles();
sorting_total += start.elapsed().as_nanos();
let mut unsorted2 = Unsorted::new();
unsorted2.extend(test_data.clone());
let start = Instant::now();
let _result_selection = unsorted2.quartiles_with_selection();
selection_total += start.elapsed().as_nanos();
let mut unsorted3 = Unsorted::new();
unsorted3.extend(test_data.clone());
let start = Instant::now();
let _result_zero_copy = unsorted3.quartiles_zero_copy();
zero_copy_total += start.elapsed().as_nanos();
}
let avg_sorting = sorting_total / iterations as u128;
let avg_selection = selection_total / iterations as u128;
let avg_zero_copy = zero_copy_total / iterations as u128;
let selection_speedup = avg_sorting as f64 / avg_selection as f64;
let zero_copy_speedup = avg_sorting as f64 / avg_zero_copy as f64;
println!(
" Average sorting: {:>12.2}ms",
avg_sorting as f64 / 1_000_000.0
);
println!(
" Average selection: {:>12.2}ms (speedup: {:.2}x)",
avg_selection as f64 / 1_000_000.0,
selection_speedup
);
println!(
" Average zero-copy: {:>12.2}ms (speedup: {:.2}x)",
avg_zero_copy as f64 / 1_000_000.0,
zero_copy_speedup
);
if (selection_speedup > 1.0 || zero_copy_speedup > 1.0) && found_threshold.is_none() {
found_threshold = Some(size);
let best_method = if zero_copy_speedup > selection_speedup {
"Zero-copy"
} else {
"Selection"
};
println!(
" *** THRESHOLD FOUND: {} becomes faster at {} elements ***",
best_method, size
);
}
println!();
}
match found_threshold {
Some(threshold) => println!(
"🎯 Selection algorithm becomes faster at approximately {} elements",
threshold
),
None => println!("❌ Selection algorithm did not become faster in the tested range"),
}
}
#[test]
#[ignore] fn benchmark_different_data_types() {
println!("=== BENCHMARKING DIFFERENT DATA TYPES ===\n");
let size = 5_000_000;
println!("Testing with f64 data:");
let float_data: Vec<f64> = generate_random_data(size)
.into_iter()
.map(|x| x as f64 / 1000.0)
.collect();
let mut unsorted1 = Unsorted::new();
unsorted1.extend(float_data.clone());
let start = Instant::now();
let _result = unsorted1.quartiles();
let sorting_time = start.elapsed();
let mut unsorted2 = Unsorted::new();
unsorted2.extend(float_data.clone());
let start = Instant::now();
let _result = unsorted2.quartiles_with_selection();
let selection_time = start.elapsed();
let mut unsorted3 = Unsorted::new();
unsorted3.extend(float_data);
let start = Instant::now();
let _result = unsorted3.quartiles_zero_copy();
let zero_copy_time = start.elapsed();
println!(" Sorting: {:?}", sorting_time);
println!(" Selection: {:?}", selection_time);
println!(" Zero-copy: {:?}", zero_copy_time);
println!(
" Selection Speedup: {:.2}x",
sorting_time.as_nanos() as f64 / selection_time.as_nanos() as f64
);
println!(
" Zero-copy Speedup: {:.2}x\n",
sorting_time.as_nanos() as f64 / zero_copy_time.as_nanos() as f64
);
println!("Testing with i64 data:");
let int64_data: Vec<i64> = generate_random_data(size)
.into_iter()
.map(|x| x as i64 * 1000)
.collect();
let mut unsorted1 = Unsorted::new();
unsorted1.extend(int64_data.clone());
let start = Instant::now();
let _result = unsorted1.quartiles();
let sorting_time = start.elapsed();
let mut unsorted2 = Unsorted::new();
unsorted2.extend(int64_data.clone());
let start = Instant::now();
let _result = unsorted2.quartiles_with_selection();
let selection_time = start.elapsed();
let mut unsorted3 = Unsorted::new();
unsorted3.extend(int64_data);
let start = Instant::now();
let _result = unsorted3.quartiles_zero_copy();
let zero_copy_time = start.elapsed();
println!(" Sorting: {:?}", sorting_time);
println!(" Selection: {:?}", selection_time);
println!(" Zero-copy: {:?}", zero_copy_time);
println!(
" Selection Speedup: {:.2}x",
sorting_time.as_nanos() as f64 / selection_time.as_nanos() as f64
);
println!(
" Zero-copy Speedup: {:.2}x",
sorting_time.as_nanos() as f64 / zero_copy_time.as_nanos() as f64
);
}
}