binary_merge/lib.rs
1#![doc = include_str!("../README.md")]
2#[cfg(test)]
3mod test;
4
5use core::cmp::Ordering;
6
7/// The read part of the merge state that is needed for the binary merge algorithm
8/// it just needs random access for the remainder of a and b
9///
10/// Very often A and B are the same type, but this is not strictly necessary
11pub trait MergeState {
12 /// Element type for a
13 type A;
14 /// Element type for b
15 type B;
16 /// The remaining data in a
17 fn a_slice(&self) -> &[Self::A];
18 /// The remaining data in b
19 fn b_slice(&self) -> &[Self::B];
20}
21
22/// A binary merge operation
23///
24/// It is often useful to keep the merge operation and the merge state separate. E.g. computing the
25/// intersection and checking if the intersection exists can be done with the same operation, but
26/// a different merge state. Likewise in-place operations and operations that produce a new entity
27/// can use the same merge operation. THerefore, the merge state is an additional parameter.SortedPairIter
28///
29/// The operation itself will often be a zero size struct
30pub trait MergeOperation<M: MergeState> {
31 /// Take n elements from a, return true to continue operation
32 fn from_a(&self, m: &mut M, n: usize) -> bool;
33 /// Take n elements from b, return true to continue operation
34 fn from_b(&self, m: &mut M, n: usize) -> bool;
35 /// Take 1 element from both a and b, return true to continue operation
36 fn collision(&self, m: &mut M) -> bool;
37 /// The comparison operation
38 fn cmp(&self, a: &M::A, b: &M::B) -> Ordering;
39 /// merge `an` elements from a and `bn` elements from b into the result
40 ///
41 /// This is a minimum comparison merge that has some overhead, so it is only worth
42 /// it for larger collections and if the comparison operation is expensive.
43 ///
44 /// It does make a big difference e.g. when merging a very large and a very small sequence,
45 /// or two disjoint sequences.
46 ///
47 /// returns false if the operation was prematurely aborted
48 fn binary_merge(&self, m: &mut M, an: usize, bn: usize) -> bool {
49 if an == 0 {
50 bn == 0 || self.from_b(m, bn)
51 } else if bn == 0 {
52 an == 0 || self.from_a(m, an)
53 } else {
54 // neither a nor b are 0
55 let am: usize = an / 2;
56 // pick the center element of a and find the corresponding one in b using binary search
57 let a = &m.a_slice()[am];
58 match m.b_slice()[..bn].binary_search_by(|b| self.cmp(a, b).reverse()) {
59 Ok(bm) => {
60 // same elements. bm is the index corresponding to am
61 // merge everything below am with everything below the found element bm
62 self.binary_merge(m, am, bm) &&
63 // add the elements a(am) and b(bm)
64 self.collision(m) &&
65 // merge everything above a(am) with everything above the found element
66 self.binary_merge(m, an - am - 1, bn - bm - 1)
67 }
68 Err(bi) => {
69 // not found. bi is the insertion point
70 // merge everything below a(am) with everything below the found insertion point bi
71 self.binary_merge(m, am, bi) &&
72 // add a(am)
73 self.from_a(m, 1) &&
74 // everything above a(am) with everything above the found insertion point
75 self.binary_merge(m, an - am - 1, bn - bi)
76 }
77 }
78 }
79 }
80 /// This is the classical tape merge algorithm, useful for when either
81 /// the number of elements is small or the comparison operation is very cheap.
82 fn tape_merge(&self, m: &mut M) -> bool {
83 loop {
84 if let Some(a) = m.a_slice().first() {
85 if let Some(b) = m.b_slice().first() {
86 // something left in both a and b
87 if !match self.cmp(a, b) {
88 Ordering::Less => self.from_a(m, 1),
89 Ordering::Equal => self.collision(m),
90 Ordering::Greater => self.from_b(m, 1),
91 } {
92 return false;
93 }
94 } else {
95 // b is empty, add the rest of a
96 break m.a_slice().is_empty() || self.from_a(m, m.a_slice().len());
97 }
98 } else {
99 // a is empty, add the rest of b
100 break m.b_slice().is_empty() || self.from_b(m, m.b_slice().len());
101 }
102 }
103 }
104 fn merge(&self, m: &mut M) -> bool {
105 let an = m.a_slice().len();
106 let bn = m.b_slice().len();
107 // only use the minimum comparison merge when it is worth it
108 if an > Self::MCM_THRESHOLD || bn > Self::MCM_THRESHOLD {
109 self.binary_merge(m, an, bn)
110 } else {
111 self.tape_merge(m)
112 }
113 }
114 /// Threshold above which we use the minimum comparison merge
115 ///
116 /// For very small collections, the tape merge has a similar number of comparisons
117 /// and requires less state.
118 ///
119 /// Also, if the comparison operation is very cheap, the minimum comparison merge
120 /// does not have big advantages.
121 ///
122 /// Set this to 0 to always do minimum comparison merge.
123 /// Set it to usize::max to always do tape merge.
124 const MCM_THRESHOLD: usize = 8;
125}