binary_merge/
lib.rs

1#![doc = include_str!("../README.md")]
2#[cfg(test)]
3mod test;
4
5use core::cmp::Ordering;
6
7/// The read part of the merge state that is needed for the binary merge algorithm
8/// it just needs random access for the remainder of a and b
9///
10/// Very often A and B are the same type, but this is not strictly necessary
11pub trait MergeState {
12    /// Element type for a
13    type A;
14    /// Element type for b
15    type B;
16    /// The remaining data in a
17    fn a_slice(&self) -> &[Self::A];
18    /// The remaining data in b
19    fn b_slice(&self) -> &[Self::B];
20}
21
22/// A binary merge operation
23///
24/// It is often useful to keep the merge operation and the merge state separate. E.g. computing the
25/// intersection and checking if the intersection exists can be done with the same operation, but
26/// a different merge state. Likewise in-place operations and operations that produce a new entity
27/// can use the same merge operation. THerefore, the merge state is an additional parameter.SortedPairIter
28///
29/// The operation itself will often be a zero size struct
30pub trait MergeOperation<M: MergeState> {
31    /// Take n elements from a, return true to continue operation
32    fn from_a(&self, m: &mut M, n: usize) -> bool;
33    /// Take n elements from b, return true to continue operation
34    fn from_b(&self, m: &mut M, n: usize) -> bool;
35    /// Take 1 element from both a and b, return true to continue operation
36    fn collision(&self, m: &mut M) -> bool;
37    /// The comparison operation
38    fn cmp(&self, a: &M::A, b: &M::B) -> Ordering;
39    /// merge `an` elements from a and `bn` elements from b into the result
40    ///
41    /// This is a minimum comparison merge that has some overhead, so it is only worth
42    /// it for larger collections and if the comparison operation is expensive.
43    ///
44    /// It does make a big difference e.g. when merging a very large and a very small sequence,
45    /// or two disjoint sequences.
46    ///
47    /// returns false if the operation was prematurely aborted
48    fn binary_merge(&self, m: &mut M, an: usize, bn: usize) -> bool {
49        if an == 0 {
50            bn == 0 || self.from_b(m, bn)
51        } else if bn == 0 {
52            an == 0 || self.from_a(m, an)
53        } else {
54            // neither a nor b are 0
55            let am: usize = an / 2;
56            // pick the center element of a and find the corresponding one in b using binary search
57            let a = &m.a_slice()[am];
58            match m.b_slice()[..bn].binary_search_by(|b| self.cmp(a, b).reverse()) {
59                Ok(bm) => {
60                    // same elements. bm is the index corresponding to am
61                    // merge everything below am with everything below the found element bm
62                    self.binary_merge(m, am, bm) &&
63                    // add the elements a(am) and b(bm)
64                    self.collision(m) &&
65                    // merge everything above a(am) with everything above the found element
66                    self.binary_merge(m, an - am - 1, bn - bm - 1)
67                }
68                Err(bi) => {
69                    // not found. bi is the insertion point
70                    // merge everything below a(am) with everything below the found insertion point bi
71                    self.binary_merge(m, am, bi) &&
72                    // add a(am)
73                    self.from_a(m, 1) &&
74                    // everything above a(am) with everything above the found insertion point
75                    self.binary_merge(m, an - am - 1, bn - bi)
76                }
77            }
78        }
79    }
80    /// This is the classical tape merge algorithm, useful for when either
81    /// the number of elements is small or the comparison operation is very cheap.
82    fn tape_merge(&self, m: &mut M) -> bool {
83        loop {
84            if let Some(a) = m.a_slice().first() {
85                if let Some(b) = m.b_slice().first() {
86                    // something left in both a and b
87                    if !match self.cmp(a, b) {
88                        Ordering::Less => self.from_a(m, 1),
89                        Ordering::Equal => self.collision(m),
90                        Ordering::Greater => self.from_b(m, 1),
91                    } {
92                        return false;
93                    }
94                } else {
95                    // b is empty, add the rest of a
96                    break m.a_slice().is_empty() || self.from_a(m, m.a_slice().len());
97                }
98            } else {
99                // a is empty, add the rest of b
100                break m.b_slice().is_empty() || self.from_b(m, m.b_slice().len());
101            }
102        }
103    }
104    fn merge(&self, m: &mut M) -> bool {
105        let an = m.a_slice().len();
106        let bn = m.b_slice().len();
107        // only use the minimum comparison merge when it is worth it
108        if an > Self::MCM_THRESHOLD || bn > Self::MCM_THRESHOLD {
109            self.binary_merge(m, an, bn)
110        } else {
111            self.tape_merge(m)
112        }
113    }
114    /// Threshold above which we use the minimum comparison merge
115    ///
116    /// For very small collections, the tape merge has a similar number of comparisons
117    /// and requires less state.
118    ///
119    /// Also, if the comparison operation is very cheap, the minimum comparison merge
120    /// does not have big advantages.
121    ///
122    /// Set this to 0 to always do minimum comparison merge.
123    /// Set it to usize::max to always do tape merge.
124    const MCM_THRESHOLD: usize = 8;
125}