use core::{
cmp::{min, Ordering},
marker::PhantomData,
ops::DerefMut,
};
fn dedup_by<T, F: Fn(&T, &T) -> bool>(d: &mut [T], same_bucket: F, keep: Keep) -> usize {
if d.is_empty() {
return 0;
}
let mut j = 0;
for i in 1..d.len() {
if !same_bucket(&d[i], &d[j]) {
j += 1;
if i != j {
d.swap(i, j);
}
} else if keep == Keep::Last {
d.swap(i, j);
}
}
j + 1
}
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
pub enum Keep {
First,
Last,
}
pub trait Seq<T>: DerefMut<Target = [T]> {
fn with_capacity(capacity: usize) -> Self;
fn push(&mut self, value: T);
fn truncate(&mut self, size: usize);
}
impl<T> Seq<T> for Vec<T> {
fn with_capacity(capacity: usize) -> Self {
Self::with_capacity(capacity)
}
fn push(&mut self, value: T) {
self.push(value)
}
fn truncate(&mut self, len: usize) {
self.truncate(len)
}
}
impl<A: smallvec::Array> Seq<A::Item> for smallvec::SmallVec<A> {
fn with_capacity(capacity: usize) -> Self {
Self::with_capacity(capacity)
}
fn push(&mut self, value: A::Item) {
self.push(value)
}
fn truncate(&mut self, len: usize) {
self.truncate(len)
}
}
struct SortAndDedup<I, T, F> {
data: I,
sorted: usize,
cmp: F,
keep: Keep,
_t: PhantomData<T>,
}
pub fn sort_dedup<I: Iterator, R: Seq<I::Item>>(iter: I, keep: Keep) -> R
where
I::Item: Ord,
{
sort_dedup_by(iter, keep, |a: &I::Item, b: &I::Item| a.cmp(b))
}
pub fn sort_dedup_by<I: Iterator, R: Seq<I::Item>, F>(iter: I, keep: Keep, cmp: F) -> R
where
F: Fn(&I::Item, &I::Item) -> std::cmp::Ordering,
{
let mut agg: SortAndDedup<R, I::Item, _> = SortAndDedup {
data: R::with_capacity(min(iter.size_hint().0, 16)),
sorted: 0,
cmp,
keep,
_t: PhantomData,
};
for x in iter {
agg.push(x);
}
agg.into_inner()
}
pub fn sort_dedup_by_key<I: Iterator, R: Seq<I::Item>, K: Ord, F: Fn(&I::Item) -> &K>(
iter: I,
keep: Keep,
key: F,
) -> R {
sort_dedup_by(iter, keep, |a: &I::Item, b: &I::Item| key(a).cmp(key(b)))
}
impl<I, T, F> SortAndDedup<I, T, F>
where
F: Fn(&T, &T) -> Ordering,
I: Seq<T>,
{
fn sort_and_dedup(&mut self) {
if self.sorted < self.data.len() {
let cmp = &self.cmp;
let slice = self.data.deref_mut();
slice.sort_by(cmp);
let unique = dedup_by(slice, |a, b| cmp(a, b) == Ordering::Equal, self.keep);
self.data.truncate(unique);
self.sorted = self.data.len();
}
}
fn into_inner(self) -> I {
let mut res = self;
res.sort_and_dedup();
res.data
}
fn push(&mut self, elem: T) {
if self.sorted == self.data.len() {
if let Some(last) = self.data.last_mut() {
match (self.cmp)(last, &elem) {
Ordering::Less => {
self.sorted += 1;
self.data.push(elem);
}
Ordering::Equal => {
if self.keep == Keep::Last {
*last = elem;
}
}
Ordering::Greater => {
self.data.push(elem);
}
}
} else {
self.sorted += 1;
self.data.push(elem);
}
} else {
self.data.push(elem);
}
if self.data.len() >= 16 {
let sorted = self.sorted;
let unsorted = self.data.len() - sorted;
if unsorted > sorted {
self.sort_and_dedup();
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use core::fmt::Debug;
use quickcheck_macros::quickcheck;
use std::collections::*;
fn unary_op<E: Debug, R: Eq + Debug>(x: E, expected: R, actual: R) -> bool {
let res = expected == actual;
if !res {
println!("x:{:?} expected:{:?} actual:{:?}", x, expected, actual);
}
res
}
#[quickcheck]
fn sort_and_dedup_check(x: Vec<i32>) -> bool {
let expected: Vec<i32> = x
.iter()
.cloned()
.collect::<BTreeSet<i32>>()
.into_iter()
.collect();
let actual: Vec<i32> = sort_dedup(x.into_iter(), Keep::First);
expected == actual
}
#[quickcheck]
fn sort_and_dedup_by_check(x: Vec<(i32, i32)>) -> bool {
let expected: Vec<(i32, i32)> = x
.iter()
.cloned()
.collect::<std::collections::BTreeMap<i32, i32>>()
.into_iter()
.collect();
let actual = sort_dedup_by_key(x.iter().cloned(), Keep::Last, |x| &x.0);
unary_op(x, expected, actual)
}
#[test]
fn dedup_by() {
let mut v: Vec<(i32, i32)> = vec![(0, 1), (0, 2), (0, 3)];
let r = super::dedup_by(&mut v, |(a, _), (b, _)| a == b, Keep::Last);
assert_eq!(r, 1);
v.truncate(r);
assert_eq!(v, vec![(0, 3)]);
}
#[test]
fn sort_and_dedup_by_test() {
let v: Vec<(i32, i32)> = vec![(0, 1), (0, 2), (0, 3), (1, 1), (1, 2)];
let keep_first: Vec<_> = sort_dedup_by_key(v.clone().into_iter(), Keep::First, |x| &x.0);
let keep_last: Vec<_> = sort_dedup_by_key(v.clone().into_iter(), Keep::Last, |x| &x.0);
assert_eq!(keep_first, vec![(0, 1), (1, 1)]);
assert_eq!(keep_last, vec![(0, 3), (1, 2)]);
let expected: Vec<(i32, i32)> = v
.iter()
.cloned()
.collect::<std::collections::BTreeMap<i32, i32>>()
.into_iter()
.collect();
println!("{:?} {:?} {:?}", keep_first, keep_last, expected)
}
}