use std::{cmp::Ordering, collections::BinaryHeap};
use crate::{Location, Report, WordExtremes, WordList};
#[derive(Debug, Clone)]
pub struct Exemplars<'w> {
lowest: Vec<WordExtremes<'w>>,
highest: Vec<WordExtremes<'w>>,
}
impl<'a> Exemplars<'a> {
#[inline]
#[must_use]
pub const fn lowest(&self) -> &[WordExtremes<'_>] {
self.lowest.as_slice()
}
#[inline]
#[must_use]
pub const fn highest(&self) -> &[WordExtremes<'_>] {
self.highest.as_slice()
}
#[inline]
#[must_use]
pub fn is_empty(&self) -> bool {
self.len() == 0
}
#[inline]
#[must_use]
pub fn len(&self) -> usize {
debug_assert_eq!(
self.lowest.len(),
self.highest.len(),
"Exemplars.lowest & Exemplars.highest should have the same number \
of members",
);
self.lowest.len()
}
#[inline]
#[must_use]
pub const fn to_report(
self,
location: &'a Location,
word_list: &'a WordList,
) -> Report<'a> {
Report::new(location, word_list, self)
}
}
#[derive(Debug, Clone)]
pub(crate) struct ExemplarCollector<'w> {
lowest: BinaryHeap<ByLowest<'w>>,
highest: BinaryHeap<ByHighest<'w>>,
}
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
struct ByLowest<'w>(WordExtremes<'w>);
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
struct ByHighest<'w>(WordExtremes<'w>);
impl Ord for ByLowest<'_> {
fn cmp(&self, other: &Self) -> Ordering {
self.0
.extremes
.lowest
.cmp(&other.0.extremes.lowest)
.then_with(|| self.0.word.cmp(other.0.word))
}
}
impl Ord for ByHighest<'_> {
fn cmp(&self, other: &Self) -> Ordering {
self.0
.extremes
.highest
.cmp(&other.0.extremes.highest)
.reverse()
.then_with(|| self.0.word.cmp(other.0.word))
}
}
impl PartialOrd for ByLowest<'_> {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl PartialOrd for ByHighest<'_> {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl<'w> ExemplarCollector<'w> {
pub(crate) fn new(top_n: usize) -> Self {
assert_ne!(top_n, 0, "ExemplarCollector must have non-zero capacity");
Self {
lowest: BinaryHeap::with_capacity(top_n),
highest: BinaryHeap::with_capacity(top_n),
}
}
pub(crate) fn push(&mut self, elem: WordExtremes<'w>) {
let by_lowest = ByLowest(elem);
if self.lowest.len() < self.lowest.capacity() {
self.lowest.push(by_lowest);
} else if let Some(mut weakest_low) = self.lowest.peek_mut()
&& by_lowest < *weakest_low
{
*weakest_low = by_lowest;
}
let by_highest = ByHighest(elem);
if self.highest.len() < self.highest.capacity() {
self.highest.push(by_highest);
} else if let Some(mut weakest_high) = self.highest.peek_mut()
&& by_highest < *weakest_high
{
*weakest_high = by_highest;
}
}
#[cfg(feature = "rayon")]
pub(crate) fn merge_with(&mut self, other: Self) {
use itertools::Itertools;
let ExemplarCollector { lowest, highest } = other;
highest
.into_iter()
.map(|ByHighest(extremes)| extremes)
.chain(lowest.into_iter().map(|ByLowest(extremes)| extremes))
.unique()
.for_each(|extremes| self.push(extremes));
}
pub(crate) fn build(self) -> Exemplars<'w> {
Exemplars {
lowest: self
.lowest
.into_sorted_vec()
.into_iter()
.map(|by| by.0)
.collect(),
highest: self
.highest
.into_sorted_vec()
.into_iter()
.map(|by| by.0)
.collect(),
}
}
}
pub trait CollectToExemplars<'a>: private::Sealed {
fn collect_min_max_extremes(self, n: usize) -> Exemplars<'a>;
}
impl<'a, I> CollectToExemplars<'a> for I
where
I: IntoIterator<Item = WordExtremes<'a>>,
{
fn collect_min_max_extremes(self, n: usize) -> Exemplars<'a> {
self.into_iter()
.fold(ExemplarCollector::new(n), |mut acc, report| {
acc.push(report);
acc
})
.build()
}
}
mod private {
use super::*;
pub trait Sealed {}
impl<'a, I> Sealed for I where I: CollectToExemplars<'a> {}
}
#[cfg(test)]
mod unit_tests {
use ordered_float::NotNan;
use super::*;
use crate::VerticalExtremes;
fn word_extremes(
word: &str,
lowest: f64,
highest: f64,
) -> WordExtremes<'_> {
WordExtremes {
word,
extremes: VerticalExtremes {
highest: NotNan::new(highest).expect("highest was NaN"),
lowest: NotNan::new(lowest).expect("lowest was NaN"),
},
}
}
#[test]
fn by_lowest_sorting() {
let a = word_extremes("a", 10., 10.);
let y = word_extremes("y", 1., 10.);
assert_eq!(ByLowest(a).cmp(&ByLowest(y)), Ordering::Greater);
let b = word_extremes("b", 10., 20.);
assert_eq!(ByLowest(a).cmp(&ByLowest(b)), Ordering::Less);
}
#[test]
fn by_highest_sorting() {
let a = word_extremes("a", 10., 10.);
let y = word_extremes("y", 1., 10.);
assert_eq!(ByHighest(a).cmp(&ByHighest(y)), Ordering::Less);
let c = word_extremes("c", 10., 10.);
assert_eq!(ByHighest(a).cmp(&ByHighest(c)), Ordering::Less);
}
}