use super::convert::{f64_from_usize, usize_from_f64};
use float_cmp::approx_eq;
use itertools::Itertools;
use serde::{Deserialize, Serialize};
use std::f64;
use std::ops::Range;
use thiserror::Error;
#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)]
enum Limits {
Equal { left: f64, right: f64, bins: usize },
Unequal { limits: Vec<f64> },
}
#[derive(Debug, Error)]
pub enum MergeBinError {
#[error("can not merge bins which end at {lhs} with bins that start at {rhs}")]
NonConsecutiveBins {
lhs: f64,
rhs: f64,
},
#[error("can not merge bins with indices {0:?}")]
NonConsecutiveRange(Range<usize>),
#[error("tried to merge bins with indices {range:?}, but there are only {bins} bins")]
InvalidRange {
range: Range<usize>,
bins: usize,
},
#[error("tried to merge bins with different dimensions {lhs} and {rhs}")]
IncompatibleDimensions {
lhs: usize,
rhs: usize,
},
}
#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)]
pub struct BinLimits(Limits);
#[derive(Debug, Error)]
pub enum BinRemapperNewError {
#[error("could not determine the dimensions from a normalization vector with length {normalizations_len} and limits vector with length {limits_len}")]
DimensionUnknown {
normalizations_len: usize,
limits_len: usize,
},
}
#[derive(Clone, Debug, Deserialize, Serialize)]
pub struct BinRemapper {
normalizations: Vec<f64>,
limits: Vec<(f64, f64)>,
}
#[derive(Debug)]
pub struct BinInfo<'a> {
limits: &'a BinLimits,
remapper: Option<&'a BinRemapper>,
}
impl<'a> BinInfo<'a> {
#[must_use]
pub const fn new(limits: &'a BinLimits, remapper: Option<&'a BinRemapper>) -> Self {
Self { limits, remapper }
}
#[must_use]
pub fn bins(&self) -> usize {
self.limits.bins()
}
#[must_use]
pub fn dimensions(&self) -> usize {
self.remapper.map_or(1, BinRemapper::dimensions)
}
#[must_use]
pub fn left(&self, dimension: usize) -> Vec<f64> {
if dimension >= self.dimensions() {
vec![]
} else {
self.remapper.map_or_else(
|| {
self.limits
.limits()
.iter()
.skip(0)
.take(self.bins())
.copied()
.collect()
},
|remapper| {
remapper
.limits()
.iter()
.skip(dimension)
.step_by(self.dimensions())
.take(self.bins())
.map(|tuple| tuple.0)
.collect()
},
)
}
}
#[must_use]
pub fn right(&self, dimension: usize) -> Vec<f64> {
if dimension >= self.dimensions() {
vec![]
} else {
self.remapper.map_or_else(
|| {
self.limits
.limits()
.iter()
.skip(1)
.take(self.bins())
.copied()
.collect()
},
|remapper| {
remapper
.limits()
.iter()
.skip(dimension)
.step_by(self.dimensions())
.take(self.bins())
.map(|tuple| tuple.1)
.collect()
},
)
}
}
#[must_use]
pub fn limits(&self) -> Vec<Vec<(f64, f64)>> {
self.remapper.map_or_else(
|| {
self.limits
.limits()
.windows(2)
.map(|window| vec![(window[0], window[1])])
.collect()
},
|remapper| {
remapper
.limits()
.to_vec()
.chunks_exact(self.dimensions())
.map(<[(f64, f64)]>::to_vec)
.collect()
},
)
}
#[must_use]
pub fn normalizations(&self) -> Vec<f64> {
self.remapper.map_or_else(
|| self.limits.bin_sizes(),
|remapper| remapper.normalizations().to_vec(),
)
}
#[must_use]
pub fn slices(&self) -> Vec<(usize, usize)> {
self.remapper
.map_or_else(|| vec![(0, self.limits.bins())], BinRemapper::slices)
}
}
impl PartialEq<BinInfo<'_>> for BinInfo<'_> {
fn eq(&self, other: &BinInfo) -> bool {
(self.limits == other.limits) && (self.remapper == other.remapper)
}
}
impl BinRemapper {
pub fn new(
normalizations: Vec<f64>,
limits: Vec<(f64, f64)>,
) -> Result<Self, BinRemapperNewError> {
if limits.len() % normalizations.len() == 0 {
Ok(Self {
normalizations,
limits,
})
} else {
Err(BinRemapperNewError::DimensionUnknown {
normalizations_len: normalizations.len(),
limits_len: limits.len(),
})
}
}
#[must_use]
pub fn bins(&self) -> usize {
self.normalizations.len()
}
#[must_use]
pub fn dimensions(&self) -> usize {
self.limits.len() / self.normalizations.len()
}
#[must_use]
pub fn limits(&self) -> &[(f64, f64)] {
&self.limits
}
pub fn merge_bins(&mut self, range: Range<usize>) -> Result<(), MergeBinError> {
if self
.slices()
.iter()
.any(|&(start, end)| (start <= range.start) && (range.end <= end))
{
for bin in range.start + 1..range.end {
self.normalizations[range.start] += self.normalizations[bin];
}
let dim = self.dimensions();
self.normalizations.drain(range.start + 1..range.end);
self.limits[dim * (range.start + 1) - 1].1 = self.limits[dim * range.end - 1].1;
self.limits.drain(dim * (range.start + 1)..dim * range.end);
Ok(())
} else {
Err(MergeBinError::NonConsecutiveRange(range))
}
}
pub fn merge(&mut self, other: &Self) -> Result<(), MergeBinError> {
let lhs_dim = self.dimensions();
let rhs_dim = other.dimensions();
if lhs_dim != rhs_dim {
return Err(MergeBinError::IncompatibleDimensions {
lhs: lhs_dim,
rhs: rhs_dim,
});
}
self.normalizations.extend_from_slice(&other.normalizations);
self.limits.extend_from_slice(&other.limits);
Ok(())
}
#[must_use]
pub fn normalizations(&self) -> &[f64] {
&self.normalizations
}
#[must_use]
pub fn slices(&self) -> Vec<(usize, usize)> {
if self.dimensions() == 1 {
vec![(0, self.bins())]
} else {
self.limits()
.iter()
.enumerate()
.filter_map(|(index, x)| {
((index % self.dimensions()) != (self.dimensions() - 1)).then(|| x)
})
.collect::<Vec<_>>()
.chunks_exact(self.dimensions() - 1)
.enumerate()
.dedup_by_with_count(|(_, x), (_, y)| x == y)
.map(|(count, (index, _))| (index, index + count))
.collect()
}
}
pub fn delete_bins(&mut self, bins: &[Range<usize>]) {
let dim = self.dimensions();
for range in bins.iter().cloned().rev() {
self.normalizations.drain(range);
}
for range in bins.iter().rev() {
self.limits.drain((range.start * dim)..(range.end * dim));
}
}
}
impl PartialEq<Self> for BinRemapper {
fn eq(&self, other: &Self) -> bool {
(self.limits == other.limits) && (self.normalizations == other.normalizations)
}
}
impl BinLimits {
#[must_use]
pub fn new(mut limits: Vec<f64>) -> Self {
limits.sort_by(|left, right| left.partial_cmp(right).unwrap());
if limits
.iter()
.zip(limits.iter().skip(1))
.map(|(current, next)| next - current)
.collect::<Vec<f64>>()
.windows(2)
.all(|val| approx_eq!(f64, val[0], val[1], ulps = 8))
{
Self(Limits::Equal {
left: *limits.first().unwrap(),
right: *limits.last().unwrap(),
bins: limits.len() - 1,
})
} else {
Self(Limits::Unequal { limits })
}
}
#[must_use]
pub fn bins(&self) -> usize {
match &self.0 {
Limits::Equal { bins, .. } => *bins,
Limits::Unequal { limits } => limits.len() - 1,
}
}
#[must_use]
pub fn index(&self, value: f64) -> Option<usize> {
match &self.0 {
Limits::Equal { left, right, bins } => {
if value < *left || value >= *right {
None
} else {
Some(usize_from_f64(
(value - left) / (right - left) * f64_from_usize(*bins),
))
}
}
Limits::Unequal { limits } => {
match limits.binary_search_by(|left| left.partial_cmp(&value).unwrap()) {
Err(0) => None,
Err(index) if index == limits.len() => None,
Ok(index) if index == (limits.len() - 1) => None,
Ok(index) => Some(index),
Err(index) => Some(index - 1),
}
}
}
}
#[must_use]
pub fn left(&self) -> f64 {
match &self.0 {
Limits::Unequal { limits } => *limits.first().unwrap(),
Limits::Equal { left, .. } => *left,
}
}
#[must_use]
pub fn limits(&self) -> Vec<f64> {
match &self.0 {
Limits::Equal { left, right, bins } => (0..=*bins)
.map(|b| (*right - *left).mul_add(f64_from_usize(b) / f64_from_usize(*bins), *left))
.collect(),
Limits::Unequal { limits } => limits.clone(),
}
}
pub fn merge_bins(&mut self, range: Range<usize>) -> Result<(), MergeBinError> {
if range.end > self.bins() {
return Err(MergeBinError::InvalidRange {
range,
bins: self.bins(),
});
}
let mut new_limits = self.limits();
new_limits.drain(range.start + 1..range.end);
*self = Self::new(new_limits);
Ok(())
}
#[must_use]
pub fn bin_sizes(&self) -> Vec<f64> {
match &self.0 {
Limits::Equal { left, right, bins } => {
vec![(*right - *left) / f64_from_usize(*bins); *bins]
}
Limits::Unequal { limits } => limits.windows(2).map(|x| x[1] - x[0]).collect(),
}
}
pub fn merge(&mut self, other: &Self) -> Result<(), MergeBinError> {
if !approx_eq!(f64, self.right(), other.left(), ulps = 8) {
return Err(MergeBinError::NonConsecutiveBins {
lhs: self.right(),
rhs: other.left(),
});
}
let mut limits = self.limits();
let add_limits = other.limits();
*limits.last_mut().unwrap() =
0.5 * (*limits.last().unwrap() + *add_limits.first().unwrap());
limits.extend_from_slice(&add_limits[1..]);
*self = Self::new(limits);
Ok(())
}
#[must_use]
pub fn right(&self) -> f64 {
match &self.0 {
Limits::Unequal { limits } => *limits.last().unwrap(),
Limits::Equal { right, .. } => *right,
}
}
pub fn delete_bins_left(&mut self, bins: usize) {
let mut limits = self.limits();
limits.drain(..bins);
*self = Self::new(limits);
}
pub fn delete_bins_right(&mut self, bins: usize) {
let mut limits = self.limits();
limits.drain((limits.len() - bins)..);
*self = Self::new(limits);
}
}
#[cfg(test)]
mod test {
use super::*;
use std::iter;
#[test]
fn bin_limits_merge() {
let mut limits = BinLimits::new(vec![0.0, 1.0 / 3.0, 2.0 / 3.0, 1.0]);
assert!(limits
.merge(&BinLimits::new(vec![
1.0,
1.0 + 1.0 / 3.0,
1.0 + 2.0 / 3.0,
2.0
]))
.is_ok());
assert_eq!(limits.left(), 0.0);
assert_eq!(limits.right(), 2.0);
assert_eq!(limits.bins(), 6);
let non_consecutive_bins = BinLimits::new(vec![3.0, 4.0]);
assert!(limits.merge(&non_consecutive_bins).is_err());
assert_eq!(limits.left(), 0.0);
assert_eq!(limits.right(), 2.0);
assert_eq!(limits.bins(), 6);
assert!(limits
.merge(&BinLimits::new(vec![
-1.0,
-1.0 + 1.0 / 3.0,
-1.0 + 2.0 / 3.0,
0.0
]))
.is_err());
assert_eq!(limits.left(), 0.0);
assert_eq!(limits.right(), 2.0);
assert_eq!(limits.bins(), 6);
}
#[test]
fn bin_info_without_remapper() {
let limits = BinLimits::new(vec![0.0, 0.125, 0.25, 0.375, 0.5]);
let info = BinInfo::new(&limits, None);
assert_eq!(info.bins(), 4);
assert_eq!(info.dimensions(), 1);
assert_eq!(info.left(0), vec![0.0, 0.125, 0.25, 0.375]);
assert_eq!(info.right(0), vec![0.125, 0.25, 0.375, 0.5]);
assert_eq!(info.normalizations(), vec![0.125; 4]);
assert_eq!(info.left(1), vec![]);
assert_eq!(info.right(1), vec![]);
assert_eq!(info.slices(), [(0, 4)]);
}
#[test]
fn bin_info_with_remapper() {
let limits = BinLimits::new(vec![0.0, 0.125, 0.25, 0.375, 0.5]);
let remapper = BinRemapper::new(
vec![1.0; 4],
vec![
(0.0, 0.5),
(0.25, 0.75),
(1.0, 2.0),
(0.5, 1.0),
(0.75, 1.0),
(2.0, 5.0),
(1.0, 2.0),
(1.75, 2.0),
(5.0, 5.5),
(2.5, 3.0),
(2.0, 2.5),
(6.0, 8.0),
],
)
.unwrap();
let info = BinInfo::new(&limits, Some(&remapper));
assert_ne!(info, BinInfo::new(&limits, None));
assert_eq!(info, BinInfo::new(&limits, Some(&remapper)));
assert_eq!(info.bins(), 4);
assert_eq!(info.dimensions(), 3);
assert_eq!(info.left(0), vec![0.0, 0.5, 1.0, 2.5]);
assert_eq!(info.left(1), vec![0.25, 0.75, 1.75, 2.0]);
assert_eq!(info.left(2), vec![1.0, 2.0, 5.0, 6.0]);
assert_eq!(info.right(0), vec![0.5, 1.0, 2.0, 3.0]);
assert_eq!(info.right(1), vec![0.75, 1.0, 2.0, 2.5]);
assert_eq!(info.right(2), vec![2.0, 5.0, 5.5, 8.0]);
assert_eq!(info.normalizations(), vec![1.0; 4]);
assert_eq!(info.left(3), vec![]);
assert_eq!(info.right(3), vec![]);
assert_eq!(info.slices(), [(0, 1), (1, 2), (2, 3), (3, 4)]);
}
#[test]
fn bin_info_slices() {
let limits = BinLimits::new(
iter::successors(Some(0.0), |n| Some(n + 1.0))
.take(11)
.collect(),
);
let remapper = BinRemapper::new(
vec![1.0; 10],
vec![
(0.0, 1.0),
(0.0, 1.0),
(0.0, 1.0),
(0.0, 1.0),
(0.0, 1.0),
(1.0, 2.0),
(0.0, 1.0),
(0.0, 1.0),
(2.0, 3.0),
(0.0, 1.0),
(1.0, 2.0),
(0.0, 1.0),
(0.0, 1.0),
(1.0, 2.0),
(1.0, 2.0),
(0.0, 1.0),
(1.0, 2.0),
(2.0, 3.0),
(1.0, 2.0),
(1.0, 2.0),
(0.0, 1.0),
(1.0, 2.0),
(1.0, 2.0),
(1.0, 2.0),
(1.0, 2.0),
(1.0, 2.0),
(2.0, 3.0),
(1.0, 2.0),
(1.0, 2.0),
(3.0, 4.0),
],
)
.unwrap();
let info = BinInfo::new(&limits, Some(&remapper));
assert_eq!(info.slices(), [(0, 3), (3, 6), (6, 10)]);
}
#[test]
fn bin_info_trivial_slices() {
let limits = BinLimits::new(
iter::successors(Some(0.0), |x| Some(x + 1.0))
.take(11)
.collect(),
);
let remapper = BinRemapper::new(
vec![1.0; 10],
iter::successors(Some((0.0, 1.0)), |x| Some((x.0 + 1.0, x.1 + 1.0)))
.take(10)
.collect(),
)
.unwrap();
let info = BinInfo::new(&limits, Some(&remapper));
assert_eq!(info.slices(), [(0, 10)]);
}
#[test]
fn bin_limits() {
let limits = BinLimits::new(vec![0.0, 0.125, 0.25, 0.375, 0.5, 0.625, 0.75, 0.875, 1.0]);
assert_eq!(limits.bins(), 8);
assert_eq!(limits.index(-0.1), None);
assert_eq!(limits.index(0.1), Some(0));
assert_eq!(limits.index(0.2), Some(1));
assert_eq!(limits.index(0.3), Some(2));
assert_eq!(limits.index(0.4), Some(3));
assert_eq!(limits.index(0.55), Some(4));
assert_eq!(limits.index(0.65), Some(5));
assert_eq!(limits.index(0.8), Some(6));
assert_eq!(limits.index(0.9), Some(7));
assert_eq!(limits.index(1.1), None);
let limits = BinLimits::new(vec![0.0, 0.125, 0.25, 0.375, 0.5, 0.625, 0.75, 0.875, 1.0]);
assert_eq!(limits.index(0.0), Some(0));
assert_eq!(limits.index(0.125), Some(1));
assert_eq!(limits.index(0.25), Some(2));
assert_eq!(limits.index(0.375), Some(3));
assert_eq!(limits.index(0.5), Some(4));
assert_eq!(limits.index(0.625), Some(5));
assert_eq!(limits.index(0.75), Some(6));
assert_eq!(limits.index(0.875), Some(7));
assert_eq!(limits.index(1.0), None);
let limits = BinLimits::new(vec![0.0, 0.1, 0.2, 0.3, 0.4, 0.5]);
assert_eq!(limits.bins(), 5);
assert_eq!(limits.index(-1.0), None);
assert_eq!(limits.index(0.05), Some(0));
assert_eq!(limits.index(0.15), Some(1));
assert_eq!(limits.index(0.25), Some(2));
assert_eq!(limits.index(0.35), Some(3));
assert_eq!(limits.index(0.45), Some(4));
assert_eq!(limits.index(1.1), None);
let limits = BinLimits::new(vec![0.0, 1.0]);
assert_eq!(limits.bins(), 1);
assert_eq!(limits.index(-0.1), None);
assert_eq!(limits.index(0.5), Some(0));
assert_eq!(limits.index(1.1), None);
let limits = BinLimits::new(vec![0.0, 0.1, 0.3, 0.6, 1.0]);
assert_eq!(limits.bins(), 4);
assert_eq!(limits.index(-1.0), None);
assert_eq!(limits.index(0.05), Some(0));
assert_eq!(limits.index(0.2), Some(1));
assert_eq!(limits.index(0.4), Some(2));
assert_eq!(limits.index(0.9), Some(3));
assert_eq!(limits.index(1.3), None);
let limits = BinLimits::new(vec![0.0, 0.25, 0.75, 0.875, 1.0]);
assert_eq!(limits.index(0.0), Some(0));
assert_eq!(limits.index(0.25), Some(1));
assert_eq!(limits.index(0.75), Some(2));
assert_eq!(limits.index(0.875), Some(3));
assert_eq!(limits.index(1.0), None);
let limits = BinLimits::new(vec![0.0, 0.4, 0.7, 0.9, 1.0]);
assert_eq!(limits.bins(), 4);
assert_eq!(limits.index(-1.0), None);
assert_eq!(limits.index(0.2), Some(0));
assert_eq!(limits.index(0.5), Some(1));
assert_eq!(limits.index(0.8), Some(2));
assert_eq!(limits.index(0.95), Some(3));
assert_eq!(limits.index(1.3), None);
}
#[test]
fn merge_bins() {
let mut limits = BinLimits::new(vec![0.0, 0.4, 0.7, 0.9, 1.0]);
assert!(limits.merge_bins(0..4).is_ok());
assert_eq!(limits.bins(), 1);
assert_eq!(limits.index(-1.0), None);
assert_eq!(limits.index(0.2), Some(0));
assert_eq!(limits.index(0.5), Some(0));
assert_eq!(limits.index(0.8), Some(0));
assert_eq!(limits.index(0.95), Some(0));
assert_eq!(limits.index(1.3), None);
}
#[test]
fn merge_bins_error() {
let mut limits = BinLimits::new(vec![0.0, 0.4, 0.7, 0.9, 1.0]);
assert!(limits.merge_bins(0..5).is_err());
}
#[test]
fn bin_remapper() {
let remapper = BinRemapper::new(
vec![1.0; 4],
vec![
(0.0, 0.5),
(0.25, 0.75),
(0.5, 1.0),
(0.75, 1.0),
(1.0, 2.0),
(1.75, 2.0),
(2.5, 3.0),
(2.0, 2.5),
],
)
.unwrap();
assert_ne!(
remapper,
BinRemapper::new(
vec![1.0; 4],
vec![(0.0, 1.0), (1.0, 2.0), (2.0, 3.0), (4.0, 5.0)]
)
.unwrap()
);
assert!(matches!(
BinRemapper::new(vec![1.0; 8], vec![(0.0, 1.0); 2]),
Err(BinRemapperNewError::DimensionUnknown{normalizations_len, limits_len})
if (normalizations_len == 8) && (limits_len == 2)
));
assert_eq!(remapper.bins(), 4);
assert_eq!(remapper.dimensions(), 2);
assert_eq!(
remapper.limits(),
&[
(0.0, 0.5),
(0.25, 0.75),
(0.5, 1.0),
(0.75, 1.0),
(1.0, 2.0),
(1.75, 2.0),
(2.5, 3.0),
(2.0, 2.5)
]
);
assert_eq!(remapper.normalizations(), vec![1.0; 4]);
}
#[test]
fn bin_remapper_merge_bins() {
let mut remapper = BinRemapper::new(
vec![1.0; 4],
vec![(0.0, 0.25), (0.25, 0.5), (0.5, 0.75), (0.75, 1.0)],
)
.unwrap();
remapper.merge_bins(0..4).unwrap();
assert_eq!(remapper.bins(), 1);
assert_eq!(remapper.dimensions(), 1);
assert_eq!(remapper.limits(), [(0.0, 1.0)]);
assert_eq!(remapper.normalizations(), [4.0]);
assert_eq!(remapper.slices(), [(0, 1)]);
}
}