char_set 0.0.1

A library providing a high-performance char set.
//! Provides a CharSet type which is an optimized Set<char>.
//!
//! This types is analogous to IntegerSet<char>, which is not viable
//! since char does not implement the Num trait and is not contiguous
//! by as it is Unicode.

extern crate integer_set;
extern crate interval;

use self::interval::Collection;
use self::interval::Interval;
use self::interval::Set;

use integer_set::IntegerSet;

// CharSet is just a wrapper over IntegerSet<u32>.
#[deriving(PartialEq, Show)]
pub struct CharSet(IntegerSet<u32>);

impl CharSet {
    fn new(i: Interval<u32>) -> CharSet {
        CharSet(IntegerSet::new(vec![i]))
    }
    /// Creates a new instance containing a single character.
    pub fn for_char(c: char) -> CharSet {
        CharSet::for_range(c, c)
    }
    /// Creates a new instance spanning [begin, end].
    pub fn for_range(begin: char, end: char) -> CharSet {
        CharSet::new(Interval::new(begin as u32, end as u32))
    }

    // TODO It would be useful to represent a non-closed interval
    // since we can't perform arithmetic on chars due to their
    // non-contiguous nature.

    fn to_integer_set(self) -> IntegerSet<u32> {
        match self { CharSet(n) => n }
    }

    fn as_integer_set(&self) -> &IntegerSet<u32> {
        match self { &CharSet(ref n) => n }
    }
}

impl Collection for CharSet {
    fn is_empty(&self) -> bool { self.as_integer_set().is_empty() }
    fn len(&self) -> uint { self.as_integer_set().len() }
}

impl Set<char> for CharSet {
    fn contains(&self, value: &char) -> bool {
        self.as_integer_set().contains(&(*value as u32))
    }
    fn is_disjoint(&self, other: &CharSet) -> bool {
        self.as_integer_set().is_disjoint(other.as_integer_set())
    }
    fn is_subset(&self, other: &CharSet) -> bool {
        self.as_integer_set().is_subset(other.as_integer_set())
    }
}

impl Add<CharSet, CharSet> for CharSet {
    fn add(self, other: CharSet) -> CharSet {
        CharSet(self.to_integer_set() + other.to_integer_set())
    }
}

impl Add<char, CharSet> for CharSet {
    fn add(self, other: char) -> CharSet { self + CharSet::for_char(other) }
}


// TODO Implement Sub trait.

#[test]
fn test_singleton_contains() {
    let set = CharSet::for_char('b');
    assert!(set.contains(&'b'));
    assert!(!set.contains(&'a'));
    assert!(!set.contains(&'c'));
}

#[test]
fn test_range_contains() {
    let set = CharSet::for_range('d', 'f');
    assert!(set.contains(&'d'));
    assert!(set.contains(&'e'));
    assert!(set.contains(&'f'));
    assert!(!set.contains(&'c'));
    assert!(!set.contains(&'g'));
}

#[test]
fn test_union() {
    let set = CharSet::for_char('c') + CharSet::for_range('e', 'f');
    assert!(set.contains(&'c'));
    assert!(set.contains(&'e'));
    assert!(set.contains(&'f'));
    assert!(!set.contains(&'b'));
    assert!(!set.contains(&'d'));
    assert!(!set.contains(&'g'));
}

// TODO Test is good, but doesn't work due to lack of Sub trait on
// CharSet.
#[test]
#[ignore]
#[cfg(do_not_compile)]
fn test_difference() {
    let set = CharSet::for_range('a', 'z') - CharSet::for_range('c', 'd');
    assert!(set.contains(&'a'));
    assert!(set.contains(&'z'));
    assert!(set.contains(&'b'));
    assert!(set.contains(&'e'));
    assert!(!set.contains(&'c'));
    assert!(!set.contains(&'d'));

    let set = CharSet::for_range('a', 'c') - CharSet::for_char('a');
    assert!(set.contains(&'b'));
    assert!(set.contains(&'c'));
    assert!(!set.contains(&'a'));
}