restricted-tuple 0.1.0

Tuple generator that ensures generated tuple contains at least one element from each given set
Documentation
//! A restricted n-tuple from m sets here refers to a sequence of n
//! elements from the given m sets, where, unlike a normal tuple, there
//! is required to be at least one element from each set.
//!
//! A restricted n-tuple counting function is a function which takes
//! n and returns the number of possible restricted n-tuples for the
//! given m sets.

extern crate integral_exponential_polynomial;
extern crate num_bigint;
extern crate num_integer;
#[cfg(test)]
extern crate num_iter;
extern crate num_traits;

use integral_exponential_polynomial::Polynomial;
use num_bigint::{BigUint, BigInt};
use num_integer::Integer;
use num_traits::{One, ToPrimitive};
use std::collections::HashMap;
use std::fmt;

type CountingFunctionCacheInternal = HashMap<Vec<usize>, Polynomial>;

/// Cache holding the counting functions for sets calculated
#[derive(Clone)]
pub struct CountingFunctionCache {
    internal: CountingFunctionCacheInternal
}

impl CountingFunctionCache {
    pub fn new() -> CountingFunctionCache {
        CountingFunctionCache {
            internal: CountingFunctionCacheInternal::new()
        }
    }
}

impl fmt::Debug for CountingFunctionCache {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        for (k, v) in self.internal.iter() {
            try!(writeln!(f, "{:?}: {}", k, v));
        }
        Ok(())
    }
}

fn get_normal_tuple_counting_function(sets: &Vec<usize>) -> Polynomial {
    let sum = sets.iter().fold(0, |sum, s| sum + s);
    Polynomial::one_term(BigInt::one(), BigUint::from(sum))
}

fn generate_counting_function(cache: &mut CountingFunctionCacheInternal,
                              sets: &Vec<usize>, mask: usize) -> Polynomial {
    let sets = sets.iter().enumerate().filter_map(|(index, &item)| {
        if mask & 1 << index != 0 {
            Some(item.clone())
        } else {
            None
        }
    }).collect::<Vec<usize>>();

    if let Some(result) = cache.get(&sets) {
        return result.clone();
    }
    let mask_all = (1 << sets.len()) - 1;
    let mut result = get_normal_tuple_counting_function(&sets);
    // Note that zero and mask_all are excluded intentionally. Zero is
    // excluded because it would always yield zero, which is useless,
    // while mask_all is excluded because that mask presents the cases
    // we actually want for this function.
    for mask in 1..mask_all {
        result -= generate_counting_function(cache, &sets, mask);
    }
    cache.insert(sets, result.clone());
    result
}

/// Counter for calculating number of restricted tuples
#[derive(Clone)]
pub struct Counter {
    polynomials: Vec<Polynomial>,
}

impl Counter {
    /// Creates a counter.
    pub fn new(cache: &mut CountingFunctionCache,
               sets: &Vec<usize>) -> Counter {
        let mask_num = 1 << sets.len();
        let mask_all = mask_num - 1;
        let result_base = get_normal_tuple_counting_function(&sets);
        let polynomials = (0..mask_num).map(|mask| {
            let mut result = result_base.clone();
            // Note that zero and mask_all are excluded intentionally
            // since mask_all would always be dropped by the condition
            // inside, while zero would always yield zero as a result.
            for submask in 1..mask_all {
                if submask | mask != mask_all {
                    result -= generate_counting_function(&mut cache.internal,
                                                         &sets, submask);
                }
            }
            result
        }).collect::<Vec<_>>();
        Counter { polynomials: polynomials }
    }

    fn count(&self, present_mask: usize, length: usize) -> BigUint {
        let result = self.polynomials[present_mask].apply(length);
        let result = result.to_biguint().unwrap();
        result
    }

    /// Returns the total number of restrict tuples given the sets and
    /// the length.
    pub fn count_total(&self, length: usize) -> BigUint {
        self.count(0, length)
    }
}

/// Generator for yielding restricted tuple
#[derive(Clone)]
pub struct Generator<'a, T: 'a + Clone> {
    sets: &'a Vec<Vec<T>>,
    counter: Counter,
}

impl<'a, T: 'a + Clone> Generator<'a, T> {
    /// Creates a generator.
    ///
    /// # Example
    ///
    /// ```
    /// # use restricted_tuple::*;
    /// let sets: Vec<Vec<u32>> = vec![vec![10, 20], vec![30, 40]];
    /// let mut cache = CountingFunctionCache::new();
    /// let generator = Generator::new(&mut cache, &sets);
    /// ```
    pub fn new(cache: &mut CountingFunctionCache,
               sets: &'a Vec<Vec<T>>) -> Generator<'a, T> {
        let num_sets = sets.iter().map(|s| s.len()).collect();
        let counter = Counter::new(cache, &num_sets);
        Generator {
            sets: sets,
            counter: counter,
        }
    }

    fn generate_internal<CountFn>(&self, length: usize, value: BigUint,
                                  count_fn: &mut CountFn) -> Vec<T>
            where CountFn: FnMut(usize, usize) -> BigUint {
        let mut result = Vec::with_capacity(length);
        let mut present_mask = 0;
        let mut value = value % count_fn(0, length);
        for i in 0..length {
            let left_length = length - i - 1;
            for (j, set) in self.sets.iter().enumerate() {
                let next_mask = present_mask | 1 << j;
                let item_span = count_fn(next_mask, left_length);
                let total_span = item_span.clone() * BigUint::from(set.len());
                if value >= total_span {
                    value = value - total_span;
                    continue;
                }
                let (quo, rem) = value.div_rem(&item_span);
                value = rem;
                present_mask = next_mask;
                result.push(set[quo.to_usize().unwrap()].clone());
                break;
            }
        }
        result
    }

    /// Generates a restricted tuple with given length and value.
    ///
    /// # Panics
    ///
    /// Panics when no restricted tuple can be generated. This could
    /// happen if the length is smaller than the number of sets, or if
    /// there is any empty set in the provided sets, or the provided
    /// sets itself is empty.
    pub fn generate(&self, length: usize, value: BigUint) -> Vec<T> {
        self.generate_internal(length, value, &mut |mask, len| {
            self.counter.count(mask, len)
        })
    }
}

/// Experimental generator with cache
///
/// This should have the identical behavior as Generator, but could be
/// faster, as it caches counting result inside. The drawback is that
/// it requires a mutable reference to call generate method.
#[derive(Clone)]
pub struct GeneratorWithCache<'a, T: 'a + Clone> {
    generator: &'a Generator<'a, T>,
    cache: Vec<Vec<Option<BigUint>>>,
}

impl<'a, T: 'a + Clone> GeneratorWithCache<'a, T> {
    pub fn from_generator(generator: &'a Generator<'a, T>)
        -> GeneratorWithCache<'a, T> {
        let mask_num = 1 << generator.sets.len();
        GeneratorWithCache {
            generator: generator,
            cache: vec![vec![]; mask_num],
        }
    }

    pub fn generate(&mut self, length: usize, value: BigUint) -> Vec<T> {
        self.generator.generate_internal(length, value, &mut |mask, len| {
            let mut cache_line = &mut self.cache[mask];
            if cache_line.len() <= len {
                let fillup_len = len - cache_line.len() + 1;
                cache_line.append(&mut vec![None; fillup_len]);
            } else if let Some(result) = cache_line[length].as_ref() {
                return result.clone();
            }
            let result = self.generator.counter.count(mask, len);
            cache_line[length] = Some(result.clone());
            result
        })
    }
}

#[derive(Clone, Copy, Debug)]
struct ItemState {
    set: usize,
    index: usize,
    present_mask: usize,
}

/// Iterator yielding all possible restricted tuples for given sets and
/// a given length.
#[derive(Clone)]
pub struct Enumerator<'a, T: 'a + Clone> {
    length: usize,
    sets: &'a Vec<Vec<T>>,
    next_state: Vec<ItemState>,
}

impl<'a, T: 'a + Clone> Enumerator<'a, T> {
    pub fn new(sets: &'a Vec<Vec<T>>, length: usize) -> Enumerator<'a, T> {
        let mut initial_state = vec![ItemState {
            set: 0, index: 0, present_mask: 1
        }; length];
        let mut mask = 1;
        for i in 1..sets.len() {
            mask |= 1 << i;
            initial_state[length - sets.len() + i] = ItemState {
                set: i, index: 0, present_mask: mask
            };
        }
        if !sets.iter().all(|set| !set.is_empty()) {
            // If any set is empty, or there is no set at all, no
            // restricted tuple would exist. The remaining algorithm
            // relies on the assertion that there is at least one set,
            // and each set has at least one element. Stop everything
            // here if that is not true, otherwise it would panic.
            initial_state[0].set = sets.len();
        }
        Enumerator {
            length: length,
            sets: sets,
            next_state: initial_state
        }
    }

    fn find_next(&mut self) {
        let mask_all = (1 << self.sets.len()) - 1;
        loop {
            let mut cur = self.length - 1;
            // Climb from the last to find the next normal tuple
            loop {
                let mut item = &mut self.next_state[cur];
                item.index += 1;
                if item.index == self.sets[item.set].len() {
                    // No more element in this set, try next set
                    item.index = 0;
                    item.set += 1;
                    if item.set == self.sets.len() {
                        if cur > 0 {
                            // No more set, try previous slot
                            item.set = 0;
                            cur -= 1;
                            continue;
                        } else {
                            // All done
                            return;
                        }
                    }
                }
                break;
            }
            // Update present mask of all affected items
            if cur == 0 {
                let mut item = &mut self.next_state[0];
                item.present_mask = 1 << item.set;
                cur += 1;
            }
            for i in cur..self.length {
                let prev_mask = self.next_state[i - 1].present_mask;
                let mut item = &mut self.next_state[i];
                item.present_mask = prev_mask | 1 << item.set;
            }
            // Check if the new tuple satisfy the restriction
            if self.next_state.last().unwrap().present_mask == mask_all {
                break;
            }
        }
    }
}

impl<'a, T: 'a + Clone> Iterator for Enumerator<'a, T> {
    type Item = Vec<T>;

    fn next(&mut self) -> Option<Self::Item> {
        if self.next_state[0].set >= self.sets.len() {
            None
        } else {
            let result = self.next_state.iter().map(|item| {
                self.sets[item.set][item.index].clone()
            }).collect();
            self.find_next();
            Some(result)
        }
    }
}

#[cfg(test)]
mod test {
    use num_bigint::BigUint;
    use num_iter::range;
    use num_traits::{Zero, One};
    use super::{CountingFunctionCache, Counter, Generator, Enumerator};

    fn test_sets1() -> Vec<Vec<u8>> {
        vec![
            vec!['A' as u8, 'B' as u8, 'C' as u8, 'D' as u8],
            vec!['a' as u8, 'b' as u8, 'c' as u8],
            vec!['0' as u8, '1' as u8],
        ]
    }

    struct ResultCycleTest<'a> {
        generator: &'a Generator<'a, u8>,
    }
    impl<'a> ResultCycleTest<'a> {
        fn run(&self, length: usize) {
            let count = self.generator.counter.count(0, length);
            for value in range(BigUint::zero(), count.clone()) {
                let value1 = count.clone() + value.clone();
                assert_eq!(self.generator.generate(length, value),
                           self.generator.generate(length, value1));
            }
        }
    }

    #[test]
    fn result_cycle_test() {
        let sets = test_sets1();
        let mut cache = CountingFunctionCache::new();
        let test = ResultCycleTest {
            generator: &Generator::new(&mut cache, &sets),
        };
        test.run(3);
        test.run(4);
    }

    struct ResultMatchTest<'a> {
        generator: &'a Generator<'a, u8>,
    }
    impl<'a> ResultMatchTest<'a> {
        fn run(&self, length: usize) {
            let mut value = BigUint::zero();
            for item in Enumerator::new(self.generator.sets, length) {
                let result = self.generator.generate(length, value.clone());
                assert_eq!(result, item);
                value = value + BigUint::one();
            }
            assert_eq!(value, self.generator.counter.count(0, length));
        }
    }

    #[test]
    fn result_match_test() {
        let sets = test_sets1();
        let mut cache = CountingFunctionCache::new();
        let test = ResultMatchTest {
            generator: &Generator::new(&mut cache, &sets),
        };
        test.run(3);
        test.run(4);
    }

    #[test]
    fn zero_count_test() {
        let mut cache = CountingFunctionCache::new();
        let set_lens = vec![];
        assert!(Counter::new(&mut cache, &set_lens).count(0, 5).is_zero());
        let set_lens = vec![5, 0, 8];
        assert!(Counter::new(&mut cache, &set_lens).count(0, 5).is_zero());
        let set_lens = vec![5, 2, 8];
        assert!(Counter::new(&mut cache, &set_lens).count(0, 2).is_zero());
        let set_lens = vec![5, 1, 8];
        assert!(!Counter::new(&mut cache, &set_lens).count(0, 5).is_zero());
    }

    fn panic_generating_test(length: usize, sets: Vec<Vec<u8>>) {
        let mut cache = CountingFunctionCache::new();
        let generator = Generator::new(&mut cache, &sets);
        generator.generate(length, BigUint::zero());
    }

    #[test]
    #[should_panic]
    fn empty_sets_generating_test() {
        panic_generating_test(5, vec![]);
    }

    #[test]
    #[should_panic]
    fn empty_set_in_sets_generating_test() {
        panic_generating_test(5, vec![
            vec!['0' as u8, '1' as u8],
            vec![],
            vec!['A' as u8, 'B' as u8],
        ]);
    }

    #[test]
    #[should_panic]
    fn length_not_enough_generating_test() {
        panic_generating_test(2, vec![
            vec!['0' as u8, '1' as u8],
            vec!['A' as u8, 'B' as u8],
            vec!['a' as u8, 'b' as u8],
        ]);
    }
}