1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
#![doc = include_str!("../README.md")]
#[cfg(test)]
mod test;
use core::cmp::Ordering;
/// The read part of the merge state that is needed for the binary merge algorithm
/// it just needs random access for the remainder of a and b
///
/// Very often A and B are the same type, but this is not strictly necessary
pub trait MergeState {
/// Element type for a
type A;
/// Element type for b
type B;
/// The remaining data in a
fn a_slice(&self) -> &[Self::A];
/// The remaining data in b
fn b_slice(&self) -> &[Self::B];
}
/// A binary merge operation
///
/// It is often useful to keep the merge operation and the merge state separate. E.g. computing the
/// intersection and checking if the intersection exists can be done with the same operation, but
/// a different merge state. Likewise in-place operations and operations that produce a new entity
/// can use the same merge operation. THerefore, the merge state is an additional parameter.SortedPairIter
///
/// The operation itself will often be a zero size struct
pub trait MergeOperation<M: MergeState> {
/// Take n elements from a, return true to continue operation
fn from_a(&self, m: &mut M, n: usize) -> bool;
/// Take n elements from b, return true to continue operation
fn from_b(&self, m: &mut M, n: usize) -> bool;
/// Take 1 element from both a and b, return true to continue operation
fn collision(&self, m: &mut M) -> bool;
/// The comparison operation
fn cmp(&self, a: &M::A, b: &M::B) -> Ordering;
/// merge `an` elements from a and `bn` elements from b into the result
///
/// This is a minimum comparison merge that has some overhead, so it is only worth
/// it for larger collections and if the comparison operation is expensive.
///
/// It does make a big difference e.g. when merging a very large and a very small sequence,
/// or two disjoint sequences.
///
/// returns false if the operation was prematurely aborted
fn binary_merge(&self, m: &mut M, an: usize, bn: usize) -> bool {
if an == 0 {
bn == 0 || self.from_b(m, bn)
} else if bn == 0 {
an == 0 || self.from_a(m, an)
} else {
// neither a nor b are 0
let am: usize = an / 2;
// pick the center element of a and find the corresponding one in b using binary search
let a = &m.a_slice()[am];
match m.b_slice()[..bn].binary_search_by(|b| self.cmp(a, b).reverse()) {
Ok(bm) => {
// same elements. bm is the index corresponding to am
// merge everything below am with everything below the found element bm
self.binary_merge(m, am, bm) &&
// add the elements a(am) and b(bm)
self.collision(m) &&
// merge everything above a(am) with everything above the found element
self.binary_merge(m, an - am - 1, bn - bm - 1)
}
Err(bi) => {
// not found. bi is the insertion point
// merge everything below a(am) with everything below the found insertion point bi
self.binary_merge(m, am, bi) &&
// add a(am)
self.from_a(m, 1) &&
// everything above a(am) with everything above the found insertion point
self.binary_merge(m, an - am - 1, bn - bi)
}
}
}
}
/// This is the classical tape merge algorithm, useful for when either
/// the number of elements is small or the comparison operation is very cheap.
fn tape_merge(&self, m: &mut M) -> bool {
while !m.a_slice().is_empty() && !m.b_slice().is_empty() {
// very convoluted way to access the first element.
let a = &m.a_slice()[0];
let b = &m.b_slice()[0];
// calling the various ops advances the pointers
let r = match self.cmp(a, b) {
Ordering::Equal => self.collision(m),
Ordering::Less => self.from_a(m, 1),
Ordering::Greater => self.from_b(m, 1),
};
if !r {
return false;
}
}
if !m.a_slice().is_empty() && !self.from_a(m, m.a_slice().len()) {
return false;
}
if !m.b_slice().is_empty() && !self.from_b(m, m.b_slice().len()) {
return false;
}
true
}
fn merge(&self, m: &mut M) -> bool {
let an = m.a_slice().len();
let bn = m.b_slice().len();
// only use the minimum comparison merge when it is worth it
if an > Self::MCM_THRESHOLD || bn > Self::MCM_THRESHOLD {
self.binary_merge(m, an, bn)
} else {
self.tape_merge(m)
}
}
/// Threshold above which we use the minimum comparison merge
///
/// For very small collections, the tape merge has a similar number of comparisons
/// and requires less state.
///
/// Also, if the comparison operation is very cheap, the minimum comparison merge
/// does not have big advantages.
///
/// Set this to 0 to always do minimum comparison merge.
/// Set it to usize::max to always do tape merge.
const MCM_THRESHOLD: usize = 8;
}