iter_set_ops/
merge.rs

1use binary_heap_plus::{BinaryHeap, PeekMut};
2use compare::Compare;
3use core::cmp::Ordering;
4use core::mem::swap;
5use core::ops::DerefMut;
6
7/// Iterates over the union of many sorted deduplicated iterators.
8///
9/// # Examples
10///
11/// ```
12/// use iter_set_ops::merge_iters;
13///
14/// let it1 = 1u8..=5;
15/// let it2 = 3u8..=7;
16/// let it3 = 2u8..=4;
17/// let mut iters = [it1, it2, it3];
18/// let res: Vec<_> = merge_iters(&mut iters).collect();
19///
20/// assert_eq!(res, vec![1, 2, 3, 4, 5, 6, 7]);
21/// ```
22#[inline]
23pub fn merge_iters<'a, T: Ord + 'a, I: Iterator<Item = T>>(
24    iters: &mut [I],
25) -> MergeIterator<
26    T,
27    I,
28    impl Fn(&T, &T) -> Ordering + 'a,
29    impl Fn(&(usize, T), &(usize, T)) -> Ordering + 'a,
30> {
31    merge_iters_by(iters, T::cmp)
32}
33
34/// Iterates over the union of many sorted deduplicated iterators, using `cmp` as the comparison operator.
35///
36/// # Examples
37///
38/// ```
39/// use iter_set_ops::merge_iters_by;
40///
41/// let it1 = (1u8..=5).rev();
42/// let it2 = (3u8..=7).rev();
43/// let it3 = (2u8..=4).rev();
44/// let mut iters = [it1, it2, it3];
45/// let res: Vec<_> = merge_iters_by(&mut iters, |x, y| y.cmp(x)).collect();
46///
47/// assert_eq!(res, vec![7, 6, 5, 4, 3, 2, 1]);
48/// ```
49pub fn merge_iters_by<'a, T, I: Iterator<Item = T>, F: Fn(&T, &T) -> Ordering + Copy + 'a>(
50    iters: &mut [I],
51    cmp: F,
52) -> MergeIterator<T, I, F, impl Fn(&(usize, T), &(usize, T)) -> Ordering + 'a> {
53    let mut vec = Vec::with_capacity(iters.len());
54    for (i, iter) in iters.iter_mut().enumerate() {
55        if let Some(x) = iter.next() {
56            vec.push((i, x));
57        }
58    }
59    let heap = BinaryHeap::from_vec_cmp(vec, move |(_, x): &(usize, T), (_, y): &(usize, T)| {
60        cmp(y, x)
61    });
62    MergeIterator { iters, cmp, heap }
63}
64
65pub struct MergeIterator<
66    'a,
67    T,
68    I: Iterator<Item = T>,
69    F: Fn(&T, &T) -> Ordering,
70    C: Compare<(usize, T)>,
71> {
72    iters: &'a mut [I],
73    cmp: F,
74    heap: BinaryHeap<(usize, T), C>,
75}
76
77impl<T, I: Iterator<Item = T>, F: Fn(&T, &T) -> Ordering, C: Compare<(usize, T)>> Iterator
78    for MergeIterator<'_, T, I, F, C>
79{
80    type Item = T;
81
82    fn next(&mut self) -> Option<Self::Item> {
83        if !self.heap.is_empty() {
84            let res = {
85                let mut peek = self.heap.peek_mut().unwrap();
86                let entry = peek.deref_mut();
87                if let Some(mut x) = self.iters[entry.0].next() {
88                    swap(&mut entry.1, &mut x);
89                    x
90                } else {
91                    PeekMut::pop(peek).1
92                }
93            };
94            while let Some(mut peek) = self.heap.peek_mut() {
95                if (self.cmp)(&res, &peek.1) == Ordering::Equal {
96                    let entry = peek.deref_mut();
97                    if let Some(mut x) = self.iters[entry.0].next() {
98                        swap(&mut entry.1, &mut x);
99                    } else {
100                        PeekMut::pop(peek);
101                    }
102                } else {
103                    break;
104                }
105            }
106            Some(res)
107        } else {
108            None
109        }
110    }
111}
112
113/// Iterates over the union of many sorted deduplicated iterators and groups equal items with their indices into a [`Vec`].
114///
115/// # Examples
116///
117/// ```
118/// use iter_set_ops::merge_iters_detailed;
119///
120/// let it1 = 1u8..=2;
121/// let it2 = 2u8..=3;
122/// let mut iters = [it1, it2];
123/// let res: Vec<_> = merge_iters_detailed(&mut iters).collect();
124///
125/// assert_eq!(res, vec![vec![(0, 1)], vec![(1, 2), (0, 2)], vec![(1, 3)]]);
126/// ```
127#[inline]
128pub fn merge_iters_detailed<'a, T: Ord + 'a, I: Iterator<Item = T>>(
129    iters: &mut [I],
130) -> DetailedMergeIterator<
131    T,
132    I,
133    impl Fn(&T, &T) -> Ordering + 'a,
134    impl Fn(&(usize, T), &(usize, T)) -> Ordering + 'a,
135> {
136    merge_iters_detailed_by(iters, T::cmp)
137}
138
139/// Iterates over the union of many sorted deduplicated iterators and groups equal items with their indices into a [`Vec`], using `cmp` as the comparison operator.
140///
141/// # Examples
142///
143/// ```
144/// use iter_set_ops::merge_iters_detailed_by;
145///
146/// let it1 = (1u8..=2).rev();
147/// let it2 = (2u8..=3).rev();
148/// let mut iters = [it1, it2];
149/// let res: Vec<_> = merge_iters_detailed_by(&mut iters, |x, y| y.cmp(x)).collect();
150///
151/// assert_eq!(res, vec![vec![(1, 3)], vec![(0, 2), (1, 2)], vec![(0, 1)]]);
152/// ```
153pub fn merge_iters_detailed_by<
154    'a,
155    T,
156    I: Iterator<Item = T>,
157    F: Fn(&T, &T) -> Ordering + Copy + 'a,
158>(
159    iters: &mut [I],
160    cmp: F,
161) -> DetailedMergeIterator<T, I, F, impl Fn(&(usize, T), &(usize, T)) -> Ordering + 'a> {
162    let mut vec = Vec::with_capacity(iters.len());
163    for (i, iter) in iters.iter_mut().enumerate() {
164        if let Some(x) = iter.next() {
165            vec.push((i, x));
166        }
167    }
168    let heap = BinaryHeap::from_vec_cmp(vec, move |(_, x): &(usize, T), (_, y): &(usize, T)| {
169        cmp(y, x)
170    });
171    DetailedMergeIterator { iters, cmp, heap }
172}
173
174pub struct DetailedMergeIterator<
175    'a,
176    T,
177    I: Iterator<Item = T>,
178    F: Fn(&T, &T) -> Ordering,
179    C: Compare<(usize, T)>,
180> {
181    iters: &'a mut [I],
182    cmp: F,
183    heap: BinaryHeap<(usize, T), C>,
184}
185
186impl<T, I: Iterator<Item = T>, F: Fn(&T, &T) -> Ordering, C: Compare<(usize, T)>> Iterator
187    for DetailedMergeIterator<'_, T, I, F, C>
188{
189    type Item = Vec<(usize, T)>;
190
191    fn next(&mut self) -> Option<Self::Item> {
192        if !self.heap.is_empty() {
193            let mut details = Vec::new();
194            let (i, res) = {
195                let mut peek = self.heap.peek_mut().unwrap();
196                let entry = peek.deref_mut();
197                if let Some(mut x) = self.iters[entry.0].next() {
198                    swap(&mut entry.1, &mut x);
199                    (entry.0, x)
200                } else {
201                    PeekMut::pop(peek)
202                }
203            };
204            while let Some(mut peek) = self.heap.peek_mut() {
205                if (self.cmp)(&res, &peek.1) == Ordering::Equal {
206                    let entry = peek.deref_mut();
207                    if let Some(mut x) = self.iters[entry.0].next() {
208                        swap(&mut entry.1, &mut x);
209                        details.push((entry.0, x));
210                    } else {
211                        let (j, x) = PeekMut::pop(peek);
212                        details.push((j, x));
213                    }
214                } else {
215                    break;
216                }
217            }
218            details.push((i, res));
219            Some(details)
220        } else {
221            None
222        }
223    }
224}
225
226#[cfg(test)]
227mod tests {
228    use super::*;
229    use rand::{rngs::StdRng, Rng, SeedableRng};
230    use std::collections::HashSet;
231
232    #[test]
233    fn test_merge() {
234        let it1 = 1u8..=5;
235        let it2 = 3u8..=7;
236        let it3 = 2u8..=4;
237        let mut iters = [it1, it2, it3];
238        let res: Vec<_> = merge_iters(&mut iters).collect();
239
240        assert_eq!(res, vec![1, 2, 3, 4, 5, 6, 7]);
241        assert!(iters[1].next().is_none());
242    }
243
244    #[test]
245    fn test_merge_by() {
246        let it1 = (1u8..=5).rev();
247        let it2 = (3u8..=7).rev();
248        let it3 = (2u8..=4).rev();
249        let mut iters = [it1, it2, it3];
250        let res: Vec<_> = merge_iters_by(&mut iters, |x, y| y.cmp(x)).collect();
251
252        assert_eq!(res, vec![7, 6, 5, 4, 3, 2, 1]);
253        assert!(iters[1].next().is_none());
254    }
255
256    #[test]
257    fn test_merge_detailed() {
258        let it1 = 1u8..=2;
259        let it2 = 2u8..=3;
260        let mut iters = [it1, it2];
261        let res: Vec<_> = merge_iters_detailed(&mut iters).collect();
262
263        assert_eq!(res, vec![vec![(0, 1)], vec![(1, 2), (0, 2)], vec![(1, 3)]]);
264        assert!(iters[1].next().is_none());
265    }
266
267    #[test]
268    fn test_merge_detailed_by() {
269        let it1 = (1u8..=2).rev();
270        let it2 = (2u8..=3).rev();
271        let mut iters = [it1, it2];
272        let res: Vec<_> = merge_iters_detailed_by(&mut iters, |x, y| y.cmp(x)).collect();
273
274        assert_eq!(res, vec![vec![(1, 3)], vec![(0, 2), (1, 2)], vec![(0, 1)]]);
275        assert!(iters[1].next().is_none());
276    }
277
278    #[test]
279    fn test_large_merge() {
280        const C: usize = 10;
281        const N: usize = 100_000;
282
283        let mut iters: Vec<_> = (0..C).map(|i| (0..(C * N)).skip(i).step_by(C)).collect();
284        let res: Vec<_> = merge_iters(&mut iters).collect();
285
286        let expected: Vec<_> = (0..(C * N)).collect();
287        assert_eq!(res, expected);
288    }
289
290    #[test]
291    fn test_random_merge() {
292        const C: usize = 10;
293        const N: usize = 10_000;
294
295        let mut rng = StdRng::seed_from_u64(42);
296        let mut vecs = Vec::with_capacity(C);
297        for _ in 0..C {
298            let mut vec = Vec::with_capacity(N);
299            for _ in 0..N {
300                vec.push(rng.gen::<u16>());
301            }
302            vec.sort_unstable();
303            vec.dedup();
304            vecs.push(vec);
305        }
306        let mut iters: Vec<_> = vecs.iter().map(|v| v.iter()).collect();
307        let res: HashSet<_> = merge_iters(&mut iters).collect();
308
309        for vec in vecs.iter() {
310            for x in vec {
311                assert!(res.contains(x));
312            }
313        }
314    }
315
316    #[test]
317    fn test_associative_merge() {
318        const C: usize = 10;
319        const N: usize = 10_000;
320
321        let mut rng = StdRng::seed_from_u64(42);
322        let mut vecs = Vec::with_capacity(C);
323        for _ in 0..C {
324            let mut vec = Vec::with_capacity(N);
325            for _ in 0..N {
326                vec.push(rng.gen::<u16>());
327            }
328            vec.sort_unstable();
329            vec.dedup();
330            vecs.push(vec);
331        }
332
333        let mut iters: Vec<_> = vecs.iter().map(|v| v.iter()).collect();
334        let res10: HashSet<_> = merge_iters(&mut iters).collect();
335
336        let mut nested_iters: Vec<Vec<_>> = (0..C)
337            .step_by(5)
338            .map(|i| vecs.iter().skip(i).take(5).map(|v| v.iter()).collect())
339            .collect();
340        let res5: HashSet<_> = merge_iters(
341            &mut nested_iters
342                .iter_mut()
343                .map(|inner_iters| merge_iters(inner_iters))
344                .collect::<Vec<_>>(),
345        )
346        .collect();
347
348        let mut nested_iters: Vec<Vec<_>> = (0..C)
349            .step_by(2)
350            .map(|i| vecs.iter().skip(i).take(2).map(|v| v.iter()).collect())
351            .collect();
352        let res2: HashSet<_> = merge_iters(
353            &mut nested_iters
354                .iter_mut()
355                .map(|inner_iters| merge_iters(inner_iters))
356                .collect::<Vec<_>>(),
357        )
358        .collect();
359
360        assert_eq!(res10, res5);
361        assert_eq!(res10, res2);
362    }
363}