use std::io;
use std::cmp;
use std::iter;
use std::ops::Range;
use bit;
use huffman;
use huffman::Builder;
const FIXED_LITERAL_OR_LENGTH_CODE_TABLE: [(u8, Range<u16>, u16); 4] =
[(8, 000..144, 0b0_0011_0000),
(9, 144..256, 0b1_1001_0000),
(7, 256..280, 0b0_0000_0000),
(8, 280..288, 0b0_1100_0000)];
const BITWIDTH_CODE_ORDER: [usize; 19] = [16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2,
14, 1, 15];
const END_OF_BLOCK: u16 = 256;
const LENGTH_TABLE: [(u16, u8); 29] =
[(3, 0), (4, 0), (5, 0), (6, 0), (7, 0), (8, 0), (9, 0), (10, 0), (11, 1), (13, 1), (15, 1),
(17, 1), (19, 2), (23, 2), (27, 2), (31, 2), (35, 3), (43, 3), (51, 3), (59, 3), (67, 4),
(83, 4), (99, 4), (115, 4), (131, 5), (163, 5), (195, 5), (227, 5), (258, 0)];
const DISTANCE_TABLE: [(u16, u8); 30] = [(1, 0),
(2, 0),
(3, 0),
(4, 0),
(5, 1),
(7, 1),
(9, 2),
(13, 2),
(17, 3),
(25, 3),
(33, 4),
(49, 4),
(65, 5),
(97, 5),
(129, 6),
(193, 6),
(257, 7),
(385, 7),
(513, 8),
(769, 8),
(1025, 9),
(1537, 9),
(2049, 10),
(3073, 10),
(4097, 11),
(6145, 11),
(8193, 12),
(12289, 12),
(16385, 13),
(24577, 13)];
#[derive(Debug)]
pub enum Symbol {
EndOfBlock,
Literal(u8),
Share { length: u16, distance: u16 },
}
impl Symbol {
pub fn code(&self) -> u16 {
match *self {
Symbol::Literal(b) => b as u16,
Symbol::EndOfBlock => 256,
Symbol::Share { length, .. } => {
match length {
3...10 => 257 + length - 3,
11...18 => 265 + (length - 11) / 2,
19...34 => 269 + (length - 19) / 4,
35...66 => 273 + (length - 35) / 8,
67...130 => 277 + (length - 67) / 16,
131...257 => 281 + (length - 131) / 32,
258 => 285,
_ => unreachable!(),
}
}
}
}
pub fn extra_lengh(&self) -> Option<(u8, u16)> {
if let Symbol::Share { length, .. } = *self {
match length {
3...10 => None,
11...18 => Some((1, (length - 11) % 2)),
19...34 => Some((2, (length - 19) % 4)),
35...66 => Some((3, (length - 35) % 8)),
67...130 => Some((4, (length - 67) % 16)),
131...257 => Some((5, (length - 131) % 32)),
258 => None,
_ => unreachable!(),
}
} else {
None
}
}
pub fn distance(&self) -> Option<(u8, u8, u16)> {
if let Symbol::Share { distance, .. } = *self {
if distance <= 4 {
Some((distance as u8 - 1, 0, 0))
} else {
let mut extra_bits = 1;
let mut code = 4;
let mut base = 4;
while base * 2 < distance {
extra_bits += 1;
code += 2;
base *= 2;
}
let half = base / 2;
let delta = distance - base - 1;
if distance <= base + half {
Some((code, extra_bits, delta % half))
} else {
Some((code + 1, extra_bits, delta % half))
}
}
} else {
None
}
}
}
#[derive(Debug)]
pub struct Encoder {
literal: huffman::Encoder,
distance: huffman::Encoder,
}
impl Encoder {
pub fn encode<W>(&self, writer: &mut bit::BitWriter<W>, symbol: Symbol) -> io::Result<()>
where W: io::Write
{
try!(self.literal.encode(writer, symbol.code()));
if let Some((bits, extra)) = symbol.extra_lengh() {
try!(writer.write_bits(bits, extra));
}
if let Some((code, bits, extra)) = symbol.distance() {
try!(self.distance.encode(writer, code as u16));
if bits > 0 {
try!(writer.write_bits(bits, extra));
}
}
Ok(())
}
}
#[derive(Debug)]
pub struct Decoder {
literal: huffman::Decoder,
distance: huffman::Decoder,
}
impl Decoder {
#[inline(always)]
pub fn decode<R>(&self, reader: &mut bit::BitReader<R>) -> io::Result<Symbol>
where R: io::Read
{
self.decode_literal_or_length(reader).and_then(|mut s| {
if let Symbol::Share { ref mut distance, .. } = s {
*distance = try!(self.decode_distance(reader));
}
Ok(s)
})
}
#[inline(always)]
fn decode_literal_or_length<R>(&self, reader: &mut bit::BitReader<R>) -> io::Result<Symbol>
where R: io::Read
{
let decoded = try!(self.literal.decode(reader));
match decoded {
0...255 => Ok(Symbol::Literal(decoded as u8)),
256 => Ok(Symbol::EndOfBlock),
length_code => {
let (base, extra_bits) =
unsafe { *LENGTH_TABLE.get_unchecked(length_code as usize - 257) };
let extra = try!(reader.read_bits(extra_bits));
Ok(Symbol::Share {
length: base + extra,
distance: 0,
})
}
}
}
#[inline(always)]
fn decode_distance<R>(&self, reader: &mut bit::BitReader<R>) -> io::Result<u16>
where R: io::Read
{
let decoded = try!(self.distance.decode(reader)) as usize;
let (base, extra_bits) = unsafe { *DISTANCE_TABLE.get_unchecked(decoded) };
let extra = try!(reader.read_bits(extra_bits));
let distance = base + extra;
Ok(distance)
}
}
pub trait HuffmanCodec {
fn build(&self, symbols: &[Symbol]) -> Encoder;
fn save<W>(&self, writer: &mut bit::BitWriter<W>, codec: &Encoder) -> io::Result<()>
where W: io::Write;
fn load<R>(&self, reader: &mut bit::BitReader<R>) -> io::Result<Decoder> where R: io::Read;
}
#[derive(Debug)]
pub struct FixedHuffmanCodec;
impl HuffmanCodec for FixedHuffmanCodec {
#[allow(unused_variables)]
fn build(&self, symbols: &[Symbol]) -> Encoder {
let mut literal_builder = huffman::EncoderBuilder::new(288);
for &(bitwidth, ref symbols, code_base) in &FIXED_LITERAL_OR_LENGTH_CODE_TABLE {
for (code, symbol) in symbols.clone()
.enumerate()
.map(|(i, s)| (code_base + i as u16, s)) {
literal_builder.set_mapping(symbol, huffman::Code::new(bitwidth, code));
}
}
let mut distance_builder = huffman::EncoderBuilder::new(30);
for i in 0..30 {
distance_builder.set_mapping(i, huffman::Code::new(5, i));
}
Encoder {
literal: literal_builder.finish(),
distance: distance_builder.finish(),
}
}
#[allow(unused_variables)]
fn save<W>(&self, writer: &mut bit::BitWriter<W>, codec: &Encoder) -> io::Result<()>
where W: io::Write
{
Ok(())
}
#[allow(unused_variables)]
fn load<R>(&self, reader: &mut bit::BitReader<R>) -> io::Result<Decoder>
where R: io::Read
{
let mut literal_builder = huffman::DecoderBuilder::new(9, Some(END_OF_BLOCK));
for &(bitwidth, ref symbols, code_base) in &FIXED_LITERAL_OR_LENGTH_CODE_TABLE {
for (code, symbol) in symbols.clone()
.enumerate()
.map(|(i, s)| (code_base + i as u16, s)) {
literal_builder.set_mapping(symbol, huffman::Code::new(bitwidth, code));
}
}
let mut distance_builder = huffman::DecoderBuilder::new(5, None);
for i in 0..30 {
distance_builder.set_mapping(i, huffman::Code::new(5, i));
}
Ok(Decoder {
literal: literal_builder.finish(),
distance: distance_builder.finish(),
})
}
}
#[derive(Debug)]
pub struct DynamicHuffmanCodec;
impl HuffmanCodec for DynamicHuffmanCodec {
fn build(&self, symbols: &[Symbol]) -> Encoder {
let mut literal_counts = [0; 286];
let mut distance_counts = [0; 30];
for s in symbols {
literal_counts[s.code() as usize] += 1;
if let Some((d, _, _)) = s.distance() {
distance_counts[d as usize] += 1;
}
}
Encoder {
literal: huffman::EncoderBuilder::from_frequencies(&literal_counts, 15),
distance: huffman::EncoderBuilder::from_frequencies(&distance_counts, 15),
}
}
fn save<W>(&self, writer: &mut bit::BitWriter<W>, codec: &Encoder) -> io::Result<()>
where W: io::Write
{
let literal_code_count = cmp::max(257, codec.literal.used_max_symbol().unwrap_or(0) + 1);
let distance_code_count = cmp::max(1, codec.distance.used_max_symbol().unwrap_or(0) + 1);
let codes = build_bitwidth_codes(codec, literal_code_count, distance_code_count);
let mut code_counts = [0; 19];
for x in &codes {
code_counts[x.0 as usize] += 1;
}
let bitwidth_encoder = huffman::EncoderBuilder::from_frequencies(&code_counts, 7);
let bitwidth_code_count =
cmp::max(4,
BITWIDTH_CODE_ORDER.iter()
.rev()
.position(|&i| bitwidth_encoder.lookup(i as u16).width > 0)
.map_or(0, |trailing_zeros| 19 - trailing_zeros)) as u16;
try!(writer.write_bits(5, literal_code_count - 257));
try!(writer.write_bits(5, distance_code_count - 1));
try!(writer.write_bits(4, bitwidth_code_count - 4));
for &i in BITWIDTH_CODE_ORDER.iter().take(bitwidth_code_count as usize) {
let width = if code_counts[i] == 0 {
0
} else {
bitwidth_encoder.lookup(i as u16).width as u16
};
try!(writer.write_bits(3, width));
}
for &(code, bits, extra) in &codes {
try!(bitwidth_encoder.encode(writer, code as u16));
if bits > 0 {
try!(writer.write_bits(bits, extra as u16));
}
}
Ok(())
}
fn load<R>(&self, reader: &mut bit::BitReader<R>) -> io::Result<Decoder>
where R: io::Read
{
let literal_code_count = try!(reader.read_bits(5)) + 257;
let distance_code_count = try!(reader.read_bits(5)) + 1;
let bitwidth_code_count = try!(reader.read_bits(4)) + 4;
let mut bitwidth_code_bitwidthes = [0; 19];
for &i in BITWIDTH_CODE_ORDER.iter().take(bitwidth_code_count as usize) {
bitwidth_code_bitwidthes[i] = try!(reader.read_bits(3)) as u8;
}
let bitwidth_decoder = huffman::DecoderBuilder::from_bitwidthes(&bitwidth_code_bitwidthes,
None);
let mut literal_code_bitwidthes = Vec::with_capacity(literal_code_count as usize);
while literal_code_bitwidthes.len() < literal_code_count as usize {
let c = try!(bitwidth_decoder.decode(reader));
let last = literal_code_bitwidthes.last().cloned();
literal_code_bitwidthes.extend(try!(load_bitwidthes(reader, c, last)));
}
let mut distance_code_bitwidthes = Vec::with_capacity(distance_code_count as usize);
while distance_code_bitwidthes.len() < distance_code_count as usize {
let c = try!(bitwidth_decoder.decode(reader));
let last = distance_code_bitwidthes.last().cloned();
distance_code_bitwidthes.extend(try!(load_bitwidthes(reader, c, last)));
}
Ok(Decoder {
literal: huffman::DecoderBuilder::from_bitwidthes(&literal_code_bitwidthes,
Some(END_OF_BLOCK)),
distance: huffman::DecoderBuilder::from_bitwidthes(&distance_code_bitwidthes, None),
})
}
}
fn load_bitwidthes<R>(reader: &mut bit::BitReader<R>,
code: u16,
last: Option<u8>)
-> io::Result<Box<Iterator<Item = u8>>>
where R: io::Read
{
Ok(match code {
0...15 => Box::new(iter::once(code as u8)),
16 => {
let count = try!(reader.read_bits(2)) + 3;
let last = try!(last.ok_or_else(|| invalid_data_error!("No preceeding value")));
Box::new(iter::repeat(last).take(count as usize))
}
17 => {
let zeros = try!(reader.read_bits(3)) + 3;
Box::new(iter::repeat(0).take(zeros as usize))
}
18 => {
let zeros = try!(reader.read_bits(7)) + 11;
Box::new(iter::repeat(0).take(zeros as usize))
}
_ => unreachable!(),
})
}
fn build_bitwidth_codes(codec: &Encoder,
literal_code_count: u16,
distance_code_count: u16)
-> Vec<(u8, u8, u8)> {
struct RunLength {
value: u8,
count: usize,
}
let mut run_lens: Vec<RunLength> = Vec::new();
for &(e, size) in &[(&codec.literal, literal_code_count),
(&codec.distance, distance_code_count)] {
for (i, c) in (0..size).map(|x| e.lookup(x as u16).width).enumerate() {
if i > 0 && run_lens.last().map_or(false, |s| s.value == c) {
run_lens.last_mut().unwrap().count += 1;
} else {
run_lens.push(RunLength {
value: c,
count: 1,
})
}
}
}
let mut codes: Vec<(u8, u8, u8)> = Vec::new();
for r in run_lens {
if r.value == 0 {
let mut c = r.count;
while c >= 11 {
let n = cmp::min(138, c) as u8;
codes.push((18, 7, n - 11));
c -= n as usize;
}
if c >= 3 {
codes.push((17, 3, c as u8 - 3));
c = 0;
}
for _ in 0..c {
codes.push((0, 0, 0));
}
} else {
codes.push((r.value, 0, 0));
let mut c = r.count - 1;
while c >= 3 {
let n = cmp::min(6, c) as u8;
codes.push((16, 2, n - 3));
c -= n as usize;
}
for _ in 0..c {
codes.push((r.value, 0, 0));
}
}
}
codes
}