1use core::{
2 cmp::{min, Ordering},
3 marker::PhantomData,
4 ops::DerefMut,
5};
6
7fn dedup_by<T, F: Fn(&T, &T) -> bool>(d: &mut [T], same_bucket: F, keep: Keep) -> usize {
15 if d.is_empty() {
16 return 0;
17 }
18 let mut j = 0;
19 for i in 1..d.len() {
20 if !same_bucket(&d[i], &d[j]) {
21 j += 1;
22 if i != j {
23 d.swap(i, j);
24 }
25 } else if keep == Keep::Last {
26 d.swap(i, j);
27 }
28 }
29 j + 1
30}
31
32#[derive(Debug, PartialEq, Eq, Clone, Copy)]
34pub enum Keep {
35 First,
37 Last,
39}
40
41pub trait Seq<T>: DerefMut<Target = [T]> {
43 fn with_capacity(capacity: usize) -> Self;
45 fn push(&mut self, value: T);
47 fn truncate(&mut self, size: usize);
49}
50
51impl<T> Seq<T> for Vec<T> {
52 fn with_capacity(capacity: usize) -> Self {
53 Self::with_capacity(capacity)
54 }
55 fn push(&mut self, value: T) {
56 self.push(value)
57 }
58 fn truncate(&mut self, len: usize) {
59 self.truncate(len)
60 }
61}
62
63impl<A: smallvec::Array> Seq<A::Item> for smallvec::SmallVec<A> {
64 fn with_capacity(capacity: usize) -> Self {
65 Self::with_capacity(capacity)
66 }
67 fn push(&mut self, value: A::Item) {
68 self.push(value)
69 }
70 fn truncate(&mut self, len: usize) {
71 self.truncate(len)
72 }
73}
74
75struct SortAndDedup<I, T, F> {
82 data: I,
84 sorted: usize,
86 cmp: F,
88 keep: Keep,
90
91 _t: PhantomData<T>,
92}
93
94pub fn sort_dedup<I: Iterator, R: Seq<I::Item>>(iter: I, keep: Keep) -> R
98where
99 I::Item: Ord,
100{
101 sort_dedup_by(iter, keep, |a: &I::Item, b: &I::Item| a.cmp(b))
102}
103
104pub fn sort_dedup_by<I: Iterator, R: Seq<I::Item>, F>(iter: I, keep: Keep, cmp: F) -> R
109where
110 F: Fn(&I::Item, &I::Item) -> std::cmp::Ordering,
111{
112 let mut agg: SortAndDedup<R, I::Item, _> = SortAndDedup {
113 data: R::with_capacity(min(iter.size_hint().0, 16)),
114 sorted: 0,
115 cmp,
116 keep,
117 _t: PhantomData,
118 };
119 for x in iter {
120 agg.push(x);
121 }
122 agg.into_inner()
123}
124
125pub fn sort_dedup_by_key<I: Iterator, R: Seq<I::Item>, K: Ord, F: Fn(&I::Item) -> &K>(
130 iter: I,
131 keep: Keep,
132 key: F,
133) -> R {
134 sort_dedup_by(iter, keep, |a: &I::Item, b: &I::Item| key(a).cmp(key(b)))
135}
136
137impl<I, T, F> SortAndDedup<I, T, F>
138where
139 F: Fn(&T, &T) -> Ordering,
140 I: Seq<T>,
141{
142 fn sort_and_dedup(&mut self) {
143 if self.sorted < self.data.len() {
144 let cmp = &self.cmp;
145 let slice = self.data.deref_mut();
146 slice.sort_by(cmp);
149 let unique = dedup_by(slice, |a, b| cmp(a, b) == Ordering::Equal, self.keep);
150 self.data.truncate(unique);
151 self.sorted = self.data.len();
152 }
153 }
154
155 fn into_inner(self) -> I {
156 let mut res = self;
157 res.sort_and_dedup();
158 res.data
159 }
160
161 fn push(&mut self, elem: T) {
162 if self.sorted == self.data.len() {
163 if let Some(last) = self.data.last_mut() {
164 match (self.cmp)(last, &elem) {
165 Ordering::Less => {
166 self.sorted += 1;
168 self.data.push(elem);
169 }
170 Ordering::Equal => {
171 if self.keep == Keep::Last {
173 *last = elem;
174 }
175 }
176 Ordering::Greater => {
177 self.data.push(elem);
179 }
180 }
181 } else {
182 self.sorted += 1;
184 self.data.push(elem);
185 }
186 } else {
187 self.data.push(elem);
189 }
190 if self.data.len() >= 16 {
192 let sorted = self.sorted;
193 let unsorted = self.data.len() - sorted;
194 if unsorted > sorted {
195 self.sort_and_dedup();
198 }
199 }
200 }
201}
202
203#[cfg(test)]
204mod tests {
205 use super::*;
206 use core::fmt::Debug;
207 use quickcheck_macros::quickcheck;
208 use std::collections::*;
209
210 fn unary_op<E: Debug, R: Eq + Debug>(x: E, expected: R, actual: R) -> bool {
212 let res = expected == actual;
213 if !res {
214 println!("x:{:?} expected:{:?} actual:{:?}", x, expected, actual);
215 }
216 res
217 }
218
219 #[quickcheck]
220 fn sort_and_dedup_check(x: Vec<i32>) -> bool {
221 let expected: Vec<i32> = x
222 .iter()
223 .cloned()
224 .collect::<BTreeSet<i32>>()
225 .into_iter()
226 .collect();
227 let actual: Vec<i32> = sort_dedup(x.into_iter(), Keep::First);
228 expected == actual
229 }
230
231 #[quickcheck]
232 fn sort_and_dedup_by_check(x: Vec<(i32, i32)>) -> bool {
233 let expected: Vec<(i32, i32)> = x
235 .iter()
236 .cloned()
237 .collect::<std::collections::BTreeMap<i32, i32>>()
238 .into_iter()
239 .collect();
240 let actual = sort_dedup_by_key(x.iter().cloned(), Keep::Last, |x| &x.0);
241 unary_op(x, expected, actual)
242 }
243
244 #[test]
245 fn dedup_by() {
246 let mut v: Vec<(i32, i32)> = vec![(0, 1), (0, 2), (0, 3)];
247 let r = super::dedup_by(&mut v, |(a, _), (b, _)| a == b, Keep::Last);
248 assert_eq!(r, 1);
249 v.truncate(r);
250 assert_eq!(v, vec![(0, 3)]);
251 }
252
253 #[test]
254 fn sort_and_dedup_by_test() {
255 let v: Vec<(i32, i32)> = vec![(0, 1), (0, 2), (0, 3), (1, 1), (1, 2)];
256 let keep_first: Vec<_> = sort_dedup_by_key(v.clone().into_iter(), Keep::First, |x| &x.0);
257 let keep_last: Vec<_> = sort_dedup_by_key(v.clone().into_iter(), Keep::Last, |x| &x.0);
258 assert_eq!(keep_first, vec![(0, 1), (1, 1)]);
259 assert_eq!(keep_last, vec![(0, 3), (1, 2)]);
260 let expected: Vec<(i32, i32)> = v
261 .iter()
262 .cloned()
263 .collect::<std::collections::BTreeMap<i32, i32>>()
264 .into_iter()
265 .collect();
266 println!("{:?} {:?} {:?}", keep_first, keep_last, expected)
267 }
268}