segmap 0.1.0

Map and set data structures whose keys are stored as ranges. Contiguous and overlapping ranges that map to the same value are coalesced into a single range. Originated as a fork of Jeff Parsons' "rangemap"
Documentation
use core::{cmp::Ordering::*, fmt::Debug};

use crate::{map::Key, set::iterators::Iter, Segment, SegmentMap, SegmentSet};

impl<T> SegmentSet<T> {
    // TODO: into_difference_iter

    pub fn iter_difference<'a>(&'a self, other: &'a Self) -> Difference<'a, T>
    where
        T: Ord,
    {
        if let Some((self_range, other_range)) = self
            .map
            .bounds()
            .map(|s| other.map.bounds().map(|o| (s, o)))
            .flatten()
        {
            // If both ranges are bounded and overlap, perform a difference
            if self_range.overlaps(&other_range) {
                // // TODO: Use the tipping point, but only for the overlapping region?
                // if self.len() <= (other.len() / ITER_PERFORMANCE_TIPPING_SIZE_DIFF) {
                //     return Difference(DifferenceInner::Search {
                //         iter: self.iter(),
                //         prev: None,
                //         other,
                //     });
                // } else {
                return Difference(DifferenceInner::Stitch {
                    self_iter: self.iter(),
                    prev_self: None,
                    other_iter: other.iter(),
                    prev_other: None,
                });
                // }
            }
        }

        // If either range is empty or they don't overlap, the difference is
        // strictly `self`.
        Difference(DifferenceInner::Iterate(self.iter()))
    }

    // TODO: into_difference

    /// Return a set representing the difference of two sets
    ///
    /// I.e. All elements in `self` that are not in `other`
    ///
    /// If you need an iterator over the different items, use
    /// [`SegmentSet::difference_iter`], which is called here internally. However,
    /// this may be slightly faster if you need a set.
    pub fn difference<'a>(&'a self, other: &'a Self) -> SegmentSet<&'a T>
    where
        T: Ord,
    {
        // Don't need to insert, since we know ranges produced by the iterator
        // aren't overlapping
        SegmentSet {
            map: SegmentMap {
                map: self.iter_difference(other).map(|r| (Key(r), ())).collect(),
                store: alloc::vec::Vec::new(),
            },
        }
    }
}

/// Set Difference
impl<'a, T: Ord + Clone> core::ops::Sub<&'a SegmentSet<T>> for &'a SegmentSet<T> {
    type Output = SegmentSet<&'a T>;

    fn sub(self, rhs: &'a SegmentSet<T>) -> SegmentSet<&'a T> {
        self.difference(rhs)
    }
}

/// Set Removal // TODO: self.into_difference() may be quicker for these?
impl<T: Ord + Clone> core::ops::SubAssign<&SegmentSet<T>> for SegmentSet<T> {
    fn sub_assign(&mut self, rhs: &SegmentSet<T>) {
        for range in rhs.iter() {
            self.remove(range);
        }
    }
}

/// Set Removal
impl<T: Ord + Clone> core::ops::SubAssign<SegmentSet<T>> for SegmentSet<T> {
    fn sub_assign(&mut self, rhs: SegmentSet<T>) {
        for range in rhs.iter() {
            self.remove(range);
        }
    }
}

// TODO: Implement Difference Search method

/// An iterator representing the difference between two [`SegmentSet`]s
///
/// This struct is generated by [`SegmentSet::difference_iter`]
pub struct Difference<'a, T: Ord>(DifferenceInner<'a, T>);
#[derive(Debug)]
enum DifferenceInner<'a, T: 'a + Ord> {
    /// Iterate all of `self` and some of `other`, spotting matches along the
    /// way. The std lib uses Peekable here, which doesn't quite work for us,
    /// since we need to store a possibly split range (not the original range)
    Stitch {
        self_iter: Iter<'a, T>,
        prev_self: Option<Segment<&'a T>>,

        other_iter: Iter<'a, T>,
        prev_other: Option<Segment<&'a T>>,
    },

    // /// If other is large, we search through it instead of iterating
    // /// Trusting the stdlib knows what it's doing here...
    // ///
    // /// TODO: bench if Search mode is useful at all. Defaulting to include it
    // /// because stdlib does.
    // Search {
    //     iter: Iter<'a, T>,
    //     other: &'a SegmentSet<T>,

    //     /// Stored item, along with references to any overlapping ranges already
    //     /// queried (since they'll be immediately needed in the next iteration)
    //     prev: Option<(Range<&'a T>, alloc::vec::Vec<&'a Range<T>>)>,
    // },
    /// For non-overlapping sets, just produce everything in self
    ///
    /// This is also the case if `self` or `other` is empty
    Iterate(Iter<'a, T>),
}

// impl<T: Ord + fmt::Debug> fmt::Debug for Difference<'_, T> {
//     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
//         f.debug_tuple("Difference").field(&self.0.clone()).finish()
//     }
// }

// TODO: document why Range<&'a T> instead of &'a Range<T>
// TODO: combine Stitch and Search inner loop logic?
impl<'a, T: Ord> Iterator for Difference<'a, T> {
    type Item = Segment<&'a T>;

    fn next(&mut self) -> Option<Segment<&'a T>> {
        match &mut self.0 {
            DifferenceInner::Iterate(iter) => iter.next().map(|x| x.as_ref()),
            DifferenceInner::Stitch {
                self_iter,
                prev_self,

                other_iter,
                prev_other,
            } => {
                // If we stored a previous range, it's a split (so take it)
                // If there aren't any left, we're finished
                let mut range = prev_self
                    .take()
                    .or_else(|| self_iter.next().map(|x| x.as_ref()))?;

                // Look for items in `other` until we run out
                loop {
                    if let Some(other) = prev_other
                        .take()
                        .or_else(|| other_iter.next().map(|x| x.as_ref()))
                    {
                        // If `range` is still fully before `other`, use it (and
                        // hold on to `other`)
                        if range.end.cmp_start(&other.start).is_gt() {
                            prev_other.insert(other);
                            return Some(range);
                        }

                        // If `range` is fully after `other`, grab the next
                        // `other` (and loop again)
                        if range.start.cmp_end(&other.end).is_gt() {
                            continue;
                        }

                        // Otherwise, we have some overlap
                        //
                        // The `borrow_bound_x().unwrap()` calls below should be
                        // fine, since they only occur where the match precludes
                        // an unbounded start/end.
                        match (range.start.cmp(&other.start), range.end.cmp(&other.end)) {
                            // Partial overlap, but `left` doesn't extend beyond `right`
                            // We can use part of `left` and forget the rest
                            (Less, Less) => {
                                range.end = other.borrow_bound_before().unwrap();
                                prev_other.insert(other);
                                return Some(range);
                            }

                            // Partial overlap where `left` extends just to the
                            // end of `right` (don't save `right`)
                            (Less, Equal) => {
                                range.end = other.borrow_bound_before().unwrap();
                                return Some(range);
                            }

                            // Fully overlapped, but with some excess `right`
                            // Keep it and loop again with a new `left`.
                            (Greater | Equal, Less) => {
                                range = self_iter.next()?.as_ref();
                                prev_other.insert(other);
                                continue;
                            }

                            // Fully overlapped but with no more `right`, loop
                            // again with a new one of each
                            (Greater | Equal, Equal) => {
                                range = self_iter.next()?.as_ref();
                                continue;
                            }

                            // Partial overlap, but some `left` past `right`
                            // Keep part of `left` and look for a new `right`
                            (Greater | Equal, Greater) => {
                                range.start = other.borrow_bound_after().unwrap();
                                continue;
                            }

                            // `left` extends beyond `right` in both directions.
                            // Use the part of `left` before `right` and store
                            // the part after.
                            (Less, Greater) => {
                                prev_self.insert(Segment {
                                    start: other.borrow_bound_after().unwrap(),
                                    end: core::mem::replace(
                                        &mut range.end,
                                        other.borrow_bound_before().unwrap(),
                                    ),
                                });
                                return Some(range);
                            }
                        }
                    } else {
                        return Some(range);
                    }
                }
            } // DifferenceInner::Search { iter, other, prev } => {

              //     // Get either the next item in `iter`, or the previously stored
              //     // state. Short circuit out if there are no more items in iter.
              //     let (next, mut overlaps) = prev.take().or_else(|| {
              //         let next = iter.next()?.as_ref();
              //         let overlaps = if let Some(after) = next.end.borrow_after() {
              //             other.map.map.range(..=after.cloned())
              //         } else {
              //             other.map.map.iter()
              //         }
              //         .rev().filter_map(|(k, _)| if &k.0.overlaps(other) )
              //         .collect();

              //         Some((next, overlaps))
              //     })?;

              //     if overlaps.is_empty() {
              //         return Some(next);
              //     }

              //     // Get next segment from overlaps
              //     loop {

              //     }
              // }
        }
    }

    fn size_hint(&self) -> (usize, Option<usize>) {
        let (self_len, other_len) = match &self.0 {
            DifferenceInner::Stitch {
                self_iter,
                other_iter,
                ..
            } => (self_iter.len(), other_iter.len()),
            // DifferenceInner::Search { iter, other, .. } => (iter.len(), other.len()),
            DifferenceInner::Iterate(iter) => (iter.len(), 0),
        };
        (self_len.saturating_sub(other_len), Some(self_len))
    }

    fn min(mut self) -> Option<Segment<&'a T>> {
        self.next()
    }
}