use std::iter::Iterator;
use std::clone::Clone;
#[derive(Debug, PartialEq, Eq)]
pub enum EncodedLength {
Length(u8),
CopyPrevious(u8),
RepeatZero3Bits(u8),
RepeatZero7Bits(u8),
}
impl EncodedLength {
fn from_prev_and_repeat(prev: u8, repeat: u8) -> EncodedLength {
match prev {
0 => {
if repeat <= 10 {
EncodedLength::RepeatZero3Bits(repeat)
} else {
EncodedLength::RepeatZero7Bits(repeat)
}
}
1...15 => EncodedLength::CopyPrevious(repeat),
_ => panic!(),
}
}
}
pub const COPY_PREVIOUS: usize = 16;
pub const REPEAT_ZERO_3_BITS: usize = 17;
pub const REPEAT_ZERO_7_BITS: usize = 18;
const MIN_REPEAT: u8 = 3;
fn update_out_and_freq(encoded: EncodedLength,
output: &mut Vec<EncodedLength>,
frequencies: &mut [u16; 19]) {
let index = match encoded {
EncodedLength::Length(l) => usize::from(l),
EncodedLength::CopyPrevious(_) => COPY_PREVIOUS,
EncodedLength::RepeatZero3Bits(_) => REPEAT_ZERO_3_BITS,
EncodedLength::RepeatZero7Bits(_) => REPEAT_ZERO_7_BITS,
};
frequencies[index] += 1;
output.push(encoded);
}
fn not_max_repetitions(length_value: u8, repeats: u8) -> bool {
(length_value == 0 && repeats < 138) || repeats < 6
}
pub fn encode_lengths<I>(lengths: I) -> Option<(Vec<EncodedLength>, [u16; 19])>
where I: Iterator<Item = u8> + Clone
{
let lengths = lengths;
let mut out = Vec::with_capacity(lengths.size_hint().0 / 2);
let mut frequencies = [0u16; 19];
let mut repeat = 0;
let mut iter = lengths.clone().enumerate().peekable();
let mut prev = if let Some(&(_, b)) = iter.peek() {
!b
} else {
return None;
};
while let Some((n, l)) = iter.next() {
if l == prev && not_max_repetitions(l, repeat) {
repeat += 1;
}
if l != prev || iter.peek().is_none() || !not_max_repetitions(l, repeat) {
if repeat >= MIN_REPEAT {
let val = EncodedLength::from_prev_and_repeat(prev, repeat);
update_out_and_freq(val, &mut out, &mut frequencies);
repeat = 0;
if l != prev {
if l != 0 {
update_out_and_freq(EncodedLength::Length(l), &mut out, &mut frequencies);
repeat = 0;
} else {
repeat = 1;
}
}
} else {
let extra_skip = if iter.peek().is_none() && l == prev {
1
} else {
0
};
let b_iter = lengths.clone().skip(n + extra_skip - repeat as usize);
let extra = if l != 0 || iter.peek().is_none() {
1
} else {
0
};
for i in b_iter.take(repeat as usize + extra) {
update_out_and_freq(EncodedLength::Length(i), &mut out, &mut frequencies);
}
repeat = 1 - extra as u8;
}
}
prev = l;
}
Some((out, frequencies))
}
type NodeIndex = u16;
type WeightType = u32;
#[derive(Copy, Clone, Debug)]
struct ChainNode {
weight: WeightType,
count: u16,
tail: Option<NodeIndex>,
}
struct Leaf {
weight: WeightType,
count: u16,
}
fn advance_lookahead(indexes: &mut [(usize, usize)], index: usize, next: usize) {
indexes[index].0 = indexes[index].1;
indexes[index].1 = next;
}
fn boundary_package_merge(lookahead_indexes: &mut [(usize, usize)],
nodes: &mut Vec<ChainNode>,
leaves: &[Leaf],
index: usize,
last: bool) {
let count = nodes[lookahead_indexes[index].1].count;
let next_count = count + 1;
if index == 0 && count >= leaves.len() as u16 {
return;
};
if index == 0 {
advance_lookahead(lookahead_indexes, index, nodes.len());
let new_weight = leaves[count as usize].weight;
nodes.push(ChainNode {
weight: new_weight,
count: next_count,
tail: None,
});
return;
}
let sum = {
let la = lookahead_indexes[index - 1];
nodes[la.0].weight + nodes[la.1].weight
};
if count < leaves.len() as u16 && sum > leaves[count as usize].weight {
let y = nodes[lookahead_indexes[index].1].tail;
let next_weight = leaves[count as usize].weight;
advance_lookahead(lookahead_indexes, index, nodes.len());
nodes.push(ChainNode {
weight: next_weight,
count: next_count,
tail: y,
});
} else {
{
advance_lookahead(lookahead_indexes, index, nodes.len());
nodes.push(ChainNode {
weight: sum,
count: count,
tail: Some(lookahead_indexes[index - 1].1 as NodeIndex),
});
}
if !last {
boundary_package_merge(lookahead_indexes, nodes, leaves, index - 1, false);
boundary_package_merge(lookahead_indexes, nodes, leaves, index - 1, false);
}
}
}
pub fn huffman_lengths_from_frequency(frequencies: &[u16], max_len: usize) -> Vec<u8> {
assert!(max_len > 1 && max_len < 16);
let mut lengths = vec![0; frequencies.len()];
let mut leaves: Vec<_> = frequencies.iter()
.enumerate()
.filter_map(|(n, f)| {
if *f > 0 {
Some(Leaf {
weight: *f as WeightType,
count: n as u16,
})
} else {
None
}
})
.collect();
if leaves.len() == 1 {
lengths[leaves[0].count as usize] += 1;
return lengths;
} else if leaves.is_empty() {
return lengths;
}
leaves.sort_by(|a, b| a.weight.cmp(&b.weight));
let mut nodes = Vec::with_capacity(8 * leaves.len());
nodes.extend(leaves.iter()
.take(2)
.enumerate()
.map(|(n, f)| {
ChainNode {
weight: f.weight,
count: n as u16 + 1,
tail: None,
}
}));
let mut lookahead_ptrs = vec![(0, 1); max_len];
let num_runs = (2 * leaves.len()) - 2 - 2;
for i in 0..num_runs {
let last = i == num_runs - 1;
boundary_package_merge(&mut lookahead_ptrs, &mut nodes, &leaves, max_len - 1, last);
}
let head = lookahead_ptrs.last().unwrap().1;
let mut next_node = head;
loop {
let node = nodes[next_node];
for item in leaves.iter().take(node.count as usize) {
lengths[item.count as usize] += 1;
}
if let Some(n) = node.tail {
next_node = n as usize;
} else {
break;
}
}
lengths
}
#[cfg(test)]
mod test {
use super::*;
use std::u16;
use huffman_table::NUM_LITERALS_AND_LENGTHS;
fn lit(value: u8) -> EncodedLength {
EncodedLength::Length(value)
}
fn zero(repeats: u8) -> EncodedLength {
match repeats {
0...10 => EncodedLength::RepeatZero3Bits(repeats),
_ => EncodedLength::RepeatZero7Bits(repeats),
}
}
fn copy(copies: u8) -> EncodedLength {
EncodedLength::CopyPrevious(copies)
}
#[test]
fn test_encode_lengths() {
use huffman_table::FIXED_CODE_LENGTHS;
let enc = encode_lengths(FIXED_CODE_LENGTHS.iter().cloned()).unwrap();
assert_eq!(enc.1[0..7], [0, 0, 0, 0, 0, 0, 0]);
assert_eq!(enc.1[10..16], [0, 0, 0, 0, 0, 0]);
assert_eq!(enc.1[17..19], [0, 0]);
let test_lengths = [0, 0, 5, 0, 15, 1, 0, 0, 0, 2, 4, 4, 4, 4, 3, 5, 5, 5, 5];
let enc = encode_lengths(test_lengths.iter().cloned()).unwrap().0;
assert_eq!(enc,
vec![lit(0), lit(0), lit(5), lit(0), lit(15), lit(1), zero(3), lit(2), lit(4),
copy(3), lit(3), lit(5), copy(3)]);
let test_lengths = [0, 0, 0, 5, 2, 3, 0, 0, 0];
let enc = encode_lengths(test_lengths.iter().cloned()).unwrap().0;
assert_eq!(enc, vec![zero(3), lit(5), lit(2), lit(3), zero(3)]);
let test_lengths = [0, 0, 0, 3, 3, 3, 5, 4, 4, 4, 4, 0, 0];
let enc = encode_lengths(test_lengths.iter().cloned()).unwrap().0;
assert_eq!(enc,
vec![zero(3), lit(3), lit(3), lit(3), lit(5), lit(4), copy(3), lit(0), lit(0)]);
let lens = [0, 0, 4, 0, 0, 4, 0, 0, 0, 0, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
1];
let _ = encode_lengths(lens.iter().cloned()).unwrap().0;
let lens = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 7, 8, 7, 8, 6, 6, 8, 0,
7, 6, 7, 8, 7, 7, 8, 0, 0, 0, 0, 0, 8, 8, 0, 8, 7, 0, 10, 8, 0, 8, 0, 10, 10,
8, 8, 10, 8, 0, 8, 7, 0, 10, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 7, 7, 7, 6,
7, 8, 8, 6, 0, 0, 8, 8, 7, 8, 8, 0, 7, 6, 6, 8, 8, 8, 10, 10, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10,
4, 3, 3, 4, 4, 5, 5, 5, 5, 5, 8, 8, 6, 7, 8, 10, 10, 0, 9 ,
0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 6, 6, 5, 5, 5, 5, 6, 5, 5, 4, 4, 4, 4, 4, 4,
3, 4, 3, 4];
let enc = encode_lengths(lens.iter().cloned()).unwrap().0;
assert_eq!(&enc[..10],
&[zero(10), lit(9), lit(0), lit(0), lit(9), zero(18), lit(6), zero(3), lit(8),
zero(4)]);
assert_eq!(&enc[10..20],
&[lit(8), lit(0), lit(0), lit(7), lit(8), lit(7), lit(8), lit(6), lit(6),
lit(8)]);
let enc = encode_lengths([1, 1, 1, 2].iter().cloned()).unwrap().0;
assert_eq!(enc, vec![lit(1), lit(1), lit(1), lit(2)]);
let enc = encode_lengths([0, 0, 3].iter().cloned()).unwrap().0;
assert_eq!(enc, vec![lit(0), lit(0), lit(3)]);
let enc = encode_lengths([0, 0, 0, 5, 2].iter().cloned()).unwrap().0;
assert_eq!(enc, vec![zero(3), lit(5), lit(2)]);
let enc = encode_lengths([0, 0, 0, 5, 0].iter().cloned()).unwrap().0;
assert!(*enc.last().unwrap() != lit(5));
}
#[test]
fn test_lengths_from_frequencies() {
let frequencies = [1, 1, 5, 7, 10, 14];
let expected = [4, 4, 3, 2, 2, 2];
let res = huffman_lengths_from_frequency(&frequencies, 4);
assert_eq!(expected, res.as_slice());
let frequencies = [1, 5, 1, 7, 10, 14];
let expected = [4, 3, 4, 2, 2, 2];
let res = huffman_lengths_from_frequency(&frequencies, 4);
assert_eq!(expected, res.as_slice());
let frequencies = [0, 25, 0, 10, 2, 4];
let res = huffman_lengths_from_frequency(&frequencies, 4);
assert_eq!(res[0], 0);
assert_eq!(res[2], 0);
assert!(res[1] < 4);
let frequencies = [0, 0, 0, 0, 0, 0, 0, 0, 55, 0, 0, 0];
let res = huffman_lengths_from_frequency(&frequencies, 5);
let expected = [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0];
assert_eq!(expected, res.as_slice());
let frequencies = [0; 30];
let res = huffman_lengths_from_frequency(&frequencies, 5);
for (a, b) in frequencies.iter().zip(res.iter()) {
assert_eq!(*a, (*b).into());
}
let mut frequencies = vec![3; NUM_LITERALS_AND_LENGTHS];
frequencies[55] = u16::MAX / 3;
frequencies[125] = u16::MAX / 3;
let res = huffman_lengths_from_frequency(&frequencies, 15);
assert_eq!(res.len(), NUM_LITERALS_AND_LENGTHS);
assert!(res[55] < 3);
assert!(res[125] < 3);
}
}