integer_set 0.0.2

A library providing a high-performance integer set.
//! Provides an IntegerSet type which is an optimized Set<T: Int>.

extern crate interval;

use self::interval::Collection;
use self::interval::Interval;
use self::interval::Set;

use std::fmt::Show;
use std::num::Int;

/// Represents a set containing any number of integers.
///
/// This is not yet optimized for sets with a large number of sparsely
/// distributed intervals, but should some day be.
#[deriving(PartialEq, Show)]
pub struct IntegerSet<T: Int> {
    // All intervals must be non-empty. This vector will be empty to
    // denote the empty set.
    intervals: Vec<Interval<T>>,
}

impl<T: Int> IntegerSet<T> {
    /// Creates a new set with the given intervals.
    pub fn new(intervals: Vec<Interval<T>>) -> IntegerSet<T> {
        assert!(intervals.iter().all(|i| !i.is_empty()));
        assert!(intervals.as_slice().windows(2).all(|is| {
            assert_eq!(is.len(), 2);
            let ref l = is[0];
            let ref r = is[1];
            l < r
        }));
        IntegerSet{intervals: intervals}
    }
}

/// IntegerSet is a collection of integers.
impl<T: Int> Collection for IntegerSet<T> {
    // We do not allow inclusion of empty intervals, so the absence of
    // intervals indicates an empty set.
    fn is_empty(&self) -> bool { self.intervals.is_empty() }
    fn len(&self) -> uint {
        // The intervals are all disjoint, so summing their length
        // provides the total length of the set.
        use std::iter::AdditiveIterator;
        self.intervals.iter().map(|i| i.len()).sum()
    }
}

// IntegerSet is by design Set<Int>.
impl<T: Int> Set<T> for IntegerSet<T> {
    fn contains(&self, value: &T) -> bool {
        // TODO If intervals is greater than a certain length,
        // binary-searching ought to be used to find the potentially
        // containing interval.
        self.intervals.iter().any(|interval| interval.contains(value))
    }
    fn is_disjoint(&self, _other: &IntegerSet<T>) -> bool {
        panic!("not implemented")
    }
    fn is_subset(&self, _other: &IntegerSet<T>) -> bool {
        panic!("not implemented")
    }
}

// Returns true if two intervals intersect.
//
// This is useful to determine if they can be merged.
fn touching<T: Int>(
    mut a: Interval<T>, mut b: Interval<T>) -> bool {
    if !a.is_disjoint(&b) { return true; }
    if b < a { std::mem::swap(&mut a, &mut b); }
    a.end + Int::one() == b.begin
}

// Merges two intervals into a new interval.
//
// Requires `a` and `b` to be intersecting.
fn merge<T: Int + Show>(a: Interval<T>, b: Interval<T>) -> Interval<T> {
    assert!(touching(a, b));
    let begin = std::cmp::min(a.begin, b.begin);
    let end = std::cmp::max(a.end, b.end);
    Interval::new(begin, end)
}

#[test]
fn test_merge() {
    let a = Interval::new(1i, 2);
    let b = Interval::new(2i, 3);
    let merged = merge(a, b);
    assert_eq!(merged, Interval::new(1, 3));
}

#[test]
fn test_merge_adjacent() {
    let a = Interval::new(1i, 1);
    let b = Interval::new(2i, 2);
    let merged = merge(a, b);
    assert_eq!(merged, Interval::new(1, 2));
}

/// Adding two intervals will produce an IntegerSet.
///
/// We cannot return an interval in case the addends are disjoint.
impl<T: Int + Show> Add<Interval<T>, IntegerSet<T>> for Interval<T> {
    fn add(self, rhs: Interval<T>) -> IntegerSet<T> {
        if self.is_disjoint(&rhs) {
            let intervals =
                if self < rhs { vec![self, rhs] } else { vec![rhs, self] };
            IntegerSet::new(intervals)
        } else {
            IntegerSet::new(vec![merge(self, rhs)])
        }
    }
}

/// Subtracting two intervals can produce multiple intervals.
impl<T: Int + Show> Sub<Interval<T>, IntegerSet<T>> for Interval<T> {
    fn sub(self, rhs: Interval<T>) -> IntegerSet<T> {
        if self.is_disjoint(&rhs) { return IntegerSet::new(vec![self]); }
        // If "self" is a subset of "rhs", then our result is the
        // empty set.
        if self.is_subset(&rhs) { return IntegerSet::new(Vec::new()); }
        // Otherwise, we will return one or two intervals.
        if rhs.is_subset(&self) {
            // rhs is inside self, one or two intervals will be
            // returned.
            let mut intervals = Vec::new();
            let one = Int::one();
            if self.begin + one < rhs.begin {
                intervals.push(Interval::new(self.begin, rhs.begin - one));
            }
            if rhs.end < self.end - one {
                intervals.push(Interval::new(rhs.end + one, self.end));
            }
            // We must have at least one interval, since we already
            // filtered out the case where "self" is a subset of
            // "rhs".
            assert!(intervals.len() == 1 || intervals.len() == 2);
            return IntegerSet::new(intervals);
        }
        // There will be at most one interval since "rhs" overlaps one
        // of the ends of "self".
        let interval = if rhs < self {
            Interval::new(rhs.end, self.end)
        } else {
            assert!(self < rhs);
            Interval::new(self.begin, rhs.begin)
        };
        IntegerSet::new(vec![interval])
    }
}

/// Adding two IntegerSets will produce a new IntegerSet.
impl<T: Int + Show> Add<IntegerSet<T>, IntegerSet<T>> for IntegerSet<T> {
    fn add(self, rhs: IntegerSet<T>) -> IntegerSet<T> {
        let mut intervals = Vec::new();
        let mut ls = self.intervals.iter().peekable();
        let mut rs = rhs.intervals.iter().peekable();
        while !ls.is_empty() && !rs.is_empty() {
            let l = *ls.peek().unwrap();
            let r = *rs.peek().unwrap();
            let curr = if l.is_disjoint(r) {
                let smaller = if l < r { &mut ls } else { &mut rs };
                *smaller.next().unwrap()
            } else {
                merge(*ls.next().unwrap(), *rs.next().unwrap())
            };
            append(&mut intervals, curr);
        }
        for range in ls.chain(rs) {
            append(&mut intervals, *range);
        }
        IntegerSet::new(intervals)
    }
}

impl<T: Int + Show> Add<T, IntegerSet<T>> for IntegerSet<T> {
    fn add(self, rhs: T) -> IntegerSet<T> {
        let set = IntegerSet::new(vec![Interval::new(rhs, rhs)]);
        self + set
    }
}

// TODO Implement Sub trait.

// Appends an interval into a vector of intervals, merging where
// appropriate.
fn append<T: Int + Show>(intervals: &mut Vec<Interval<T>>,
                         interval: Interval<T>) {
    match intervals.pop() {
        Some(last) => {
            if last.is_disjoint(&interval) {
                intervals.push(last);
                intervals.push(interval);
            } else {
                intervals.push(merge(last, interval));
            }
        },
        None => intervals.push(interval),
    }
}

#[test]
fn test_empty() {
    let empty: Interval<int> = Interval::empty();
    assert!(empty.is_empty());
    assert_eq!(empty.len(), 0);
    assert!(!empty.contains(&0));
    assert!(!empty.contains(&1));
}

#[test]
fn test_singleton() {
    let i = Interval::new(1i, 1);
    assert!(!i.is_empty());
    assert_eq!(i.len(), 1);
    assert!(!i.contains(&0));
    assert!(i.contains(&1));
    assert!(!i.contains(&2));
}