i-dunno 0.6.0

RFC 8771 Internationalized Deliberately Unreadable Network Notation
Documentation
use crate::bits::Bits;

const UTF8_LENGTHS: [(usize, usize); 4] = [(0, 7), (8, 11), (12, 16), (17, 21)];

/// An Iterator that provides u32 values that may be code points by
/// consuming bits provided to it.
pub struct Combinations<I>
where
    I: Bits,
{
    /// The bits we will find new u32s in
    bits: I,
    /// An iterator through the possible lengths of code points in UTF-8
    it: std::slice::Iter<'static, (usize, usize)>,
    /// The potential code points already found
    partial: Vec<u32>,
    /// An inner iterator: if not None, we have found some potential
    /// code points, but we have more bits to explore.
    inner: Option<Box<Combinations<I>>>,
}

impl<I> Combinations<I>
where
    I: Bits,
{
    pub fn new(bits: I) -> Self {
        Self {
            bits,
            it: UTF8_LENGTHS.iter(),
            partial: Vec::new(),
            inner: None,
        }
    }

    fn partial(bits: I, partial: Vec<u32>) -> Self {
        Self {
            bits,
            it: UTF8_LENGTHS.iter(),
            partial,
            inner: None,
        }
    }
}

impl<I> Iterator for Combinations<I>
where
    I: Bits,
{
    type Item = Vec<u32>;

    fn next(&mut self) -> Option<Vec<u32>> {
        // If we're part-way through an inner iterator, return any
        // result it provides
        if let Some(inner) = &mut self.inner {
            if let Some(ret) = inner.next() {
                return Some(ret);
            } else {
                self.inner = None;
            }
        }

        // Otherwise, move on to the next potential character size
        while let Some((min, ln)) = self.it.next() {
            // If we have enough bits for this character
            if self.bits.len() >= *ln {
                // Make a potential character
                let mut new_bits = self.bits.clone();
                let code_point = new_bits.take_as_u32(*ln);
                // If the character is too small, it's not an allowed
                // code point for this encoding - it could have fit into a
                // smaller number of code units, so skip to the next size.
                if *min == 0 || code_point >= (1 << *min) {
                    // Add our newly-found code point to the list
                    let mut new_partial = self.partial.clone();
                    new_partial.push(code_point);
                    if new_bits.len() == 0 {
                        // If we've run out of bits, we found an encoding
                        return Some(new_partial);
                    } else {
                        // If we have more bits, we must recurse
                        self.inner = Some(Box::new(Combinations::partial(
                            new_bits,
                            new_partial,
                        )));
                        return self.next();
                    }
                }
            } else {
                // Otherwise we've run out of bits, so bail.
                return None;
            }
        }
        // We didn't find any character size that fits exactly the bits we
        // have left.  No more combinations.
        None
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::vecboolbits::VecBoolBits;

    fn combs(bits: VecBoolBits) -> Vec<Vec<u32>> {
        Combinations::new(bits).collect()
    }

    pub fn bits(s: &'static str) -> VecBoolBits {
        VecBoolBits::new(
            s.chars()
                .map(|c| if c == '1' { true } else { false })
                .collect(),
        )
    }

    #[test]
    fn given_enough_bits_for_a_single_char_then_combs_returns_it() {
        assert_eq!(combs(bits("1100110")), vec![vec![0b01100110]]);
        assert_eq!(combs(bits("1100111")), vec![vec![0b01100111]]);
    }

    #[test]
    fn combs_returns_multiple_numbers() {
        assert_eq!(
            combs(bits("11001101100111")), // 14
            vec![vec![0b01100110, 0b01100111]]
        );
    }

    #[test]
    fn combs_explores_multiple_alternatives() {
        assert_eq!(
            combs(bits("110011011001111101111")), // 21
            vec![
                vec![0b01100110, 0b01100111, 0b01101111],
                vec![0b110011011001111101111]
            ]
        );
        assert_eq!(
            combs(bits("111111111111111111")), // 18
            vec![
                vec![0b1111111, 0b11111111111],
                vec![0b11111111111, 0b1111111]
            ]
        );
        assert_eq!(
            combs(bits("11111111111111111111111111111111")), // 32
            vec![
                vec![0b1111111, 0b1111111, 0b1111111, 0b11111111111],
                vec![0b1111111, 0b1111111, 0b11111111111, 0b1111111],
                vec![0b1111111, 0b11111111111, 0b1111111, 0b1111111],
                vec![0b11111111111, 0b1111111, 0b1111111, 0b1111111],
                vec![0b11111111111, 0b111111111111111111111],
                vec![0b1111111111111111, 0b1111111111111111],
                vec![0b111111111111111111111, 0b11111111111],
            ]
        );
        assert_eq!(
            combs(bits("10101010101010101010101010101010")), // 32
            vec![
                vec![0b1010101, 0b0101010, 0b1010101, 0b01010101010],
                vec![0b1010101, 0b0101010, 0b10101010101, 0b0101010],
                vec![0b1010101, 0b01010101010, 0b1010101, 0b0101010],
                vec![0b10101010101, 0b0101010, 0b1010101, 0b0101010],
                vec![0b10101010101, 0b010101010101010101010],
                vec![0b1010101010101010, 0b1010101010101010],
                vec![0b101010101010101010101, 0b01010101010],
            ]
        );
        assert_eq!(
            combs(bits("00000000000000000000000100000001")), // 32
            vec![vec![0b0000000, 0b0000000, 0b0000000, 0b00100000001],]
        );
    }

    #[test]
    fn if_number_of_bytes_does_not_match_character_sizes_we_return_nothing() {
        // See Issue#1 - we do not provide encodings incuding padding
        assert_eq!(combs(bits("111")), Vec::<Vec<u32>>::new());
        assert_eq!(combs(bits("11111111")), Vec::<Vec<u32>>::new());
    }
}