use std::fmt;
use bit_reverse::reverse_bits;
#[derive(Debug)]
pub enum HuffmanError {
EmptyLengthTable,
CodeTooLong,
_TooManyOfLength,
}
pub const NUM_LENGTH_CODES: usize = 29;
pub const NUM_DISTANCE_CODES: usize = 30;
pub const NUM_LITERALS_AND_LENGTHS: usize = 286;
pub const MAX_CODE_LENGTH: usize = 15;
pub const MIN_MATCH: u16 = 3;
pub const MAX_MATCH: u16 = 258;
pub const MIN_DISTANCE: u16 = 1;
pub const MAX_DISTANCE: u16 = 32768;
pub const END_OF_BLOCK_POSITION: usize = 256;
#[allow(unused)]
pub static FIXED_CODE_LENGTHS: [u8; NUM_LITERALS_AND_LENGTHS + 2] =
[8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 8, 8, 8, 8, 8, 8, 8, 8];
static LENGTH_EXTRA_BITS_LENGTH: [u8; NUM_LENGTH_CODES] =
[0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0];
static LENGTH_CODE: [u8; 256] =
[0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 12, 12, 13, 13, 13, 13, 14, 14,
14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 18,
18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27,
27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 28];
static BASE_LENGTH: [u8; NUM_LENGTH_CODES] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24,
28, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192,
224, 255];
const LENGTH_BITS_START: u16 = 257;
pub static FIXED_CODE_LENGTHS_DISTANCE: [u8; NUM_DISTANCE_CODES + 2] = [5; NUM_DISTANCE_CODES + 2];
static DISTANCE_CODES: [u8; 512] =
[0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9,
9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13,
13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0, 0, 16, 17, 18, 18,
19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23,
23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27,
27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
27, 27, 27, 27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29];
static DISTANCE_EXTRA_BITS: [u8; NUM_DISTANCE_CODES] = [0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5,
6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11,
12, 12, 13, 13];
static DISTANCE_BASE: [u16; NUM_DISTANCE_CODES] =
[0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256, 384, 512, 768, 1024, 1536,
2048, 3072, 4096, 6144, 8192, 12288, 16384, 24576];
#[derive(Copy, Clone)]
struct ExtraBits {
pub code_number: u16,
pub num_bits: u8,
pub value: u16,
}
pub fn get_length_code(length: u16) -> Option<usize> {
if let Some(c) = Some(usize::from(LENGTH_CODE[(length - MIN_MATCH) as usize])) {
Some(c + LENGTH_BITS_START as usize)
} else {
None
}
}
fn get_length_code_and_extra_bits(length: u16) -> Option<ExtraBits> {
if length < MIN_MATCH || length > MAX_MATCH {
return None;
}
let n = LENGTH_CODE[(length - MIN_MATCH) as usize];
let base = u16::from(BASE_LENGTH[n as usize]);
let num_bits = LENGTH_EXTRA_BITS_LENGTH[n as usize];
Some(ExtraBits {
code_number: u16::from(n) + LENGTH_BITS_START,
num_bits: num_bits,
value: length - base - MIN_MATCH,
})
}
pub fn get_distance_code(distance: u16) -> Option<u8> {
let distance = distance as usize;
match distance {
1...256 => Some(DISTANCE_CODES[distance - 1]),
257...32768 => Some(DISTANCE_CODES[256 + ((distance - 1) >> 7)]),
_ => None,
}
}
fn get_distance_code_and_extra_bits(distance: u16) -> Option<ExtraBits> {
if let Some(distance_code) = get_distance_code(distance) {
let extra = DISTANCE_EXTRA_BITS[distance_code as usize];
let base = DISTANCE_BASE[distance_code as usize] + 1;
Some(ExtraBits {
code_number: distance_code.into(),
num_bits: extra,
value: distance - base,
})
} else {
None
}
}
#[derive(Copy, Clone, Default)]
pub struct HuffmanCode {
pub code: u16,
pub length: u8,
}
impl fmt::Debug for HuffmanCode {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f,
"HuffmanCode {{ code: {:b}, length: {}}}",
self.code,
self.length)
}
}
impl HuffmanCode {
fn from_reversed_bits(code: u16, length: u8) -> Option<HuffmanCode> {
if length <= 15 {
Some(HuffmanCode {
code: reverse_bits(code, length),
length: length,
})
} else {
None
}
}
}
#[cfg(test)]
pub struct LengthAndDistanceBits {
pub length_code: HuffmanCode,
pub length_extra_bits: HuffmanCode,
pub distance_code: HuffmanCode,
pub distance_extra_bits: HuffmanCode,
}
fn build_length_count_table(table: &[u8]) -> Result<(usize, usize, Vec<u16>), HuffmanError> {
let max_length = match table.iter().max() {
Some(l) => (*l).into(),
None => return Err(HuffmanError::EmptyLengthTable),
};
if max_length > MAX_CODE_LENGTH {
return Err(HuffmanError::CodeTooLong);
}
let mut max_length_pos = 0;
let mut len_counts = vec![0u16; max_length + 1];
for (n, &length) in table.iter().enumerate() {
len_counts[usize::from(length)] += 1;
if length > 0 {
max_length_pos = n;
}
}
Ok((max_length, max_length_pos, len_counts))
}
pub fn create_codes(length_table: &[u8]) -> Result<Vec<HuffmanCode>, HuffmanError> {
let mut codes = vec![HuffmanCode::default(); length_table.len()];
create_codes_in_place(codes.as_mut_slice(), length_table)?;
Ok(codes)
}
pub fn create_codes_in_place(code_table: &mut [HuffmanCode],
length_table: &[u8])
-> Result<(), HuffmanError> {
let (max_length, max_length_pos, lengths) = build_length_count_table(length_table)?;
let mut code = 0u16;
let mut next_code = vec![0u16];
for bits in 1..max_length + 1 {
code = (code + lengths[bits - 1]) << 1;
next_code.push(code.into());
}
for n in 0..max_length_pos + 1 {
let length = usize::from(length_table[n]);
if length != 0 {
code_table[n] = HuffmanCode::from_reversed_bits(next_code[length], length as u8)
.ok_or(HuffmanError::CodeTooLong)?;
next_code[length] = next_code[length].wrapping_add(1);
}
}
Ok(())
}
pub struct HuffmanTable {
codes: [HuffmanCode; 288],
distance_codes: [HuffmanCode; 32],
}
impl HuffmanTable {
pub fn empty() -> HuffmanTable {
HuffmanTable {
codes: [HuffmanCode {
code: 0,
length: 0,
}; 288],
distance_codes: [HuffmanCode {
code: 0,
length: 0,
}; 32],
}
}
#[cfg(test)]
pub fn from_length_tables(literals_and_lengths: &[u8],
distances: &[u8])
-> Result<HuffmanTable, HuffmanError> {
let mut table = HuffmanTable {
codes: [HuffmanCode::default(); 288],
distance_codes: [HuffmanCode::default(); 32],
};
create_codes_in_place(table.codes.as_mut(), literals_and_lengths)?;
create_codes_in_place(table.distance_codes.as_mut(), distances)?;
Ok(table)
}
pub fn update_from_length_tables(&mut self,
literals_and_lengths: &[u8],
distances: &[u8])
-> Result<(), HuffmanError> {
create_codes_in_place(self.codes.as_mut(), literals_and_lengths)?;
create_codes_in_place(self.distance_codes.as_mut(), distances)
}
#[cfg(test)]
pub fn fixed_table() -> HuffmanTable {
HuffmanTable::from_length_tables(&FIXED_CODE_LENGTHS, &FIXED_CODE_LENGTHS_DISTANCE)
.expect("Error: Failed to build table for fixed huffman codes, this indicates an \
error somewhere in the code.")
}
pub fn get_literal(&self, value: u8) -> HuffmanCode {
self.codes[usize::from(value)]
}
pub fn get_end_of_block(&self) -> HuffmanCode {
self.codes[END_OF_BLOCK_POSITION]
}
pub fn get_length_huffman(&self, length: u16) -> Option<((HuffmanCode, HuffmanCode))> {
if length < MIN_MATCH || length > MAX_MATCH {
return None;
}
let length_data = match get_length_code_and_extra_bits(length) {
Some(t) => t,
None => return None,
};
let length_huffman_code = self.codes[length_data.code_number as usize];
Some((length_huffman_code,
HuffmanCode {
code: length_data.value,
length: length_data.num_bits,
}))
}
pub fn get_distance_huffman(&self, distance: u16) -> Option<((HuffmanCode, HuffmanCode))> {
if distance < MIN_DISTANCE || distance > MAX_DISTANCE {
return None;
}
let distance_data = match get_distance_code_and_extra_bits(distance) {
Some(t) => t,
None => return None,
};
let distance_huffman_code = self.distance_codes[distance_data.code_number as usize];
Some((distance_huffman_code,
HuffmanCode {
code: distance_data.value,
length: distance_data.num_bits,
}))
}
#[cfg(test)]
pub fn get_length_distance_code(&self,
length: u16,
distance: u16)
-> Option<(LengthAndDistanceBits)> {
let l_codes = self.get_length_huffman(length).unwrap();
let d_codes = self.get_distance_huffman(distance).unwrap();
Some(LengthAndDistanceBits {
length_code: l_codes.0,
length_extra_bits: l_codes.1,
distance_code: d_codes.0,
distance_extra_bits: d_codes.1,
})
}
}
mod test {
#[allow(unused_imports)]
use super::*;
#[allow(unused_imports)]
use super::{get_length_code_and_extra_bits, get_distance_code_and_extra_bits,
build_length_count_table};
#[test]
fn test_get_length_code() {
let extra_bits = get_length_code_and_extra_bits(4).unwrap();
assert_eq!(extra_bits.code_number, 258);
assert_eq!(extra_bits.num_bits, 0);
assert_eq!(extra_bits.value, 0);
let extra_bits = get_length_code_and_extra_bits(165).unwrap();
assert_eq!(extra_bits.code_number, 282);
assert_eq!(extra_bits.num_bits, 5);
assert_eq!(extra_bits.value, 2);
let extra_bits = get_length_code_and_extra_bits(257).unwrap();
assert_eq!(extra_bits.code_number, 284);
assert_eq!(extra_bits.num_bits, 5);
assert_eq!(extra_bits.value, 30);
let extra_bits = get_length_code_and_extra_bits(258).unwrap();
assert_eq!(extra_bits.code_number, 285);
assert_eq!(extra_bits.num_bits, 0);
}
#[test]
fn test_distance_code() {
assert_eq!(get_distance_code(1).unwrap(), 0);
assert_eq!(get_distance_code(0), None);
assert_eq!(get_distance_code(50000), None);
assert_eq!(get_distance_code(6146).unwrap(), 25);
assert_eq!(get_distance_code(256).unwrap(), 15);
}
#[test]
fn test_distance_extra_bits() {
let extra = get_distance_code_and_extra_bits(527).unwrap();
assert_eq!(extra.value, 0b1110);
assert_eq!(extra.code_number, 18);
assert_eq!(extra.num_bits, 8);
let extra = get_distance_code_and_extra_bits(256).unwrap();
assert_eq!(extra.code_number, 15);
assert_eq!(extra.num_bits, 6);
}
#[test]
fn test_length_table_fixed() {
let _ = build_length_count_table(&FIXED_CODE_LENGTHS).unwrap();
}
#[test]
fn test_length_table_max_length() {
let table = [16u8; 288];
let test = build_length_count_table(&table);
match test {
Err(HuffmanError::CodeTooLong) => (),
_ => panic!("Didn't fail with invalid length!"),
};
}
#[test]
fn test_empty_table() {
let table = [];
let test = build_length_count_table(&table);
match test {
Err(HuffmanError::EmptyLengthTable) => (),
_ => panic!("Empty length table didn't fail!'"),
}
}
#[test]
fn make_table_fixed() {
let table = HuffmanTable::fixed_table();
assert_eq!(table.codes[0].code, 0b00001100);
assert_eq!(table.codes[143].code, 0b11111101);
assert_eq!(table.codes[144].code, 0b000010011);
assert_eq!(table.codes[255].code, 0b111111111);
assert_eq!(table.codes[256].code, 0b0000000);
assert_eq!(table.codes[279].code, 0b1110100);
assert_eq!(table.codes[280].code, 0b00000011);
assert_eq!(table.codes[287].code, 0b11100011);
assert_eq!(table.distance_codes[0].code, 0);
assert_eq!(table.distance_codes[5].code, 20);
let ld = table.get_length_distance_code(4, 5).unwrap();
assert_eq!(ld.length_code.code, 0b00100000);
assert_eq!(ld.distance_code.code, 0b00100);
assert_eq!(ld.distance_extra_bits.length, 1);
assert_eq!(ld.distance_extra_bits.code, 0);
}
}