use crate::bits::Bits;
const UTF8_LENGTHS: [(usize, usize); 4] = [(0, 7), (8, 11), (12, 16), (17, 21)];
pub struct Combinations<I>
where
I: Bits,
{
bits: I,
it: std::slice::Iter<'static, (usize, usize)>,
partial: Vec<u32>,
inner: Option<Box<Combinations<I>>>,
}
impl<I> Combinations<I>
where
I: Bits,
{
pub fn new(bits: I) -> Self {
Self {
bits,
it: UTF8_LENGTHS.iter(),
partial: Vec::new(),
inner: None,
}
}
fn partial(bits: I, partial: Vec<u32>) -> Self {
Self {
bits,
it: UTF8_LENGTHS.iter(),
partial,
inner: None,
}
}
}
impl<I> Iterator for Combinations<I>
where
I: Bits,
{
type Item = Vec<u32>;
fn next(&mut self) -> Option<Vec<u32>> {
if let Some(inner) = &mut self.inner {
if let Some(ret) = inner.next() {
return Some(ret);
} else {
self.inner = None;
}
}
while let Some((min, ln)) = self.it.next() {
if self.bits.len() >= *ln {
let mut new_bits = self.bits.clone();
let code_point = new_bits.take_as_u32(*ln);
if *min == 0 || code_point >= (1 << *min) {
let mut new_partial = self.partial.clone();
new_partial.push(code_point);
if new_bits.len() == 0 {
return Some(new_partial);
} else {
self.inner = Some(Box::new(Combinations::partial(
new_bits,
new_partial,
)));
return self.next();
}
}
} else {
return None;
}
}
None
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::vecboolbits::VecBoolBits;
fn combs(bits: VecBoolBits) -> Vec<Vec<u32>> {
Combinations::new(bits).collect()
}
pub fn bits(s: &'static str) -> VecBoolBits {
VecBoolBits::new(
s.chars()
.map(|c| if c == '1' { true } else { false })
.collect(),
)
}
#[test]
fn given_enough_bits_for_a_single_char_then_combs_returns_it() {
assert_eq!(combs(bits("1100110")), vec![vec![0b01100110]]);
assert_eq!(combs(bits("1100111")), vec![vec![0b01100111]]);
}
#[test]
fn combs_returns_multiple_numbers() {
assert_eq!(
combs(bits("11001101100111")), vec![vec![0b01100110, 0b01100111]]
);
}
#[test]
fn combs_explores_multiple_alternatives() {
assert_eq!(
combs(bits("110011011001111101111")), vec![
vec![0b01100110, 0b01100111, 0b01101111],
vec![0b110011011001111101111]
]
);
assert_eq!(
combs(bits("111111111111111111")), vec![
vec![0b1111111, 0b11111111111],
vec![0b11111111111, 0b1111111]
]
);
assert_eq!(
combs(bits("11111111111111111111111111111111")), vec![
vec![0b1111111, 0b1111111, 0b1111111, 0b11111111111],
vec![0b1111111, 0b1111111, 0b11111111111, 0b1111111],
vec![0b1111111, 0b11111111111, 0b1111111, 0b1111111],
vec![0b11111111111, 0b1111111, 0b1111111, 0b1111111],
vec![0b11111111111, 0b111111111111111111111],
vec![0b1111111111111111, 0b1111111111111111],
vec![0b111111111111111111111, 0b11111111111],
]
);
assert_eq!(
combs(bits("10101010101010101010101010101010")), vec![
vec![0b1010101, 0b0101010, 0b1010101, 0b01010101010],
vec![0b1010101, 0b0101010, 0b10101010101, 0b0101010],
vec![0b1010101, 0b01010101010, 0b1010101, 0b0101010],
vec![0b10101010101, 0b0101010, 0b1010101, 0b0101010],
vec![0b10101010101, 0b010101010101010101010],
vec![0b1010101010101010, 0b1010101010101010],
vec![0b101010101010101010101, 0b01010101010],
]
);
assert_eq!(
combs(bits("00000000000000000000000100000001")), vec![vec![0b0000000, 0b0000000, 0b0000000, 0b00100000001],]
);
}
#[test]
fn if_number_of_bytes_does_not_match_character_sizes_we_return_nothing() {
assert_eq!(combs(bits("111")), Vec::<Vec<u32>>::new());
assert_eq!(combs(bits("11111111")), Vec::<Vec<u32>>::new());
}
}