bitstream-io 0.4.0

Library for reading/writing un-aligned values from/to streams in big-endian and little-endian formats.
Documentation
// Copyright 2017 Brian Langenberger
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

//! Traits and implementations for reading or writing Huffman codes
//! from or to a stream.

use std::fmt;
use std::collections::BTreeMap;

pub enum ReadHuffmanTree<T: Clone> {
    Leaf(T),
    Tree(Box<ReadHuffmanTree<T>>, Box<ReadHuffmanTree<T>>)
}

impl<T: Clone> ReadHuffmanTree<T> {
    /// Given a vector of symbol/code pairs, compiles a Huffman tree
    /// for reading.
    /// Code must be 0 or 1 bits and are always consumed from the stream
    /// from least-significant in the list to most signficant
    /// (which makes them easier to read for humans).
    ///
    /// Each code in the tree must be unique, but symbols may occur
    /// multiple times.  All possible codes must be assigned some symbol.
    ///
    /// ## Example 1
    /// ```
    /// use bitstream_io::huffman::ReadHuffmanTree;
    /// assert!(ReadHuffmanTree::new(vec![(1i32, vec![0]),
    ///                                   (2i32, vec![1, 0]),
    ///                                   (3i32, vec![1, 1])]).is_ok());
    /// ```
    ///
    /// ## Example 2
    /// Note how the `1 0` code has no symbol, so this tree cannot be
    /// built for reading.
    ///
    /// ```
    /// use bitstream_io::huffman::ReadHuffmanTree;
    /// assert!(ReadHuffmanTree::new(vec![(1i32, vec![0]),
    ///                                   (3i32, vec![1, 1])]).is_err());
    /// ```
    pub fn new(values: Vec<(T, Vec<u8>)>) ->
        Result<ReadHuffmanTree<T>,HuffmanTreeError> {
        let mut tree = WipHuffmanTree::new_empty();

        for (symbol, code) in values.into_iter() {
            tree.add(code.as_slice(), symbol)?;
        }

        tree.into_read_tree()
    }
}

// Work-in-progress trees may have empty nodes during construction
// but those are not allowed in a finalized tree.
// If the user wants some codes to be None or an error symbol of some sort,
// those will need to be specified explicitly.
enum WipHuffmanTree<T: Clone> {
    Empty,
    Leaf(T),
    Tree(Box<WipHuffmanTree<T>>, Box<WipHuffmanTree<T>>)
}

impl<T: Clone> WipHuffmanTree<T> {
    fn new_empty() -> WipHuffmanTree<T> {
        WipHuffmanTree::Empty
    }

    fn new_leaf(value: T) -> WipHuffmanTree<T> {
        WipHuffmanTree::Leaf(value)
    }

    fn new_tree() -> WipHuffmanTree<T> {
        WipHuffmanTree::Tree(Box::new(Self::new_empty()),
                             Box::new(Self::new_empty()))
    }

    fn into_read_tree(self) -> Result<ReadHuffmanTree<T>,HuffmanTreeError> {
        match self {
            WipHuffmanTree::Empty => {
                Err(HuffmanTreeError::MissingLeaf)
            }
            WipHuffmanTree::Leaf(v) => {
                Ok(ReadHuffmanTree::Leaf(v))
            }
            WipHuffmanTree::Tree(zero, one) => {
                let zero = zero.into_read_tree()?;
                let one = one.into_read_tree()?;
                Ok(ReadHuffmanTree::Tree(Box::new(zero), Box::new(one)))
            }
        }
    }

    fn add(&mut self, code: &[u8], symbol: T) -> Result<(),HuffmanTreeError> {
        match self {
            &mut WipHuffmanTree::Empty => {
                if code.len() == 0 {
                    *self = WipHuffmanTree::new_leaf(symbol);
                    Ok(())
                } else {
                    *self = WipHuffmanTree::new_tree();
                    self.add(code, symbol)
                }
            }
            &mut WipHuffmanTree::Leaf(_) => {
                Err(if code.len() == 0 {
                    HuffmanTreeError::DuplicateLeaf
                } else {
                    HuffmanTreeError::OrphanedLeaf
                })
            }
            &mut WipHuffmanTree::Tree(ref mut zero, ref mut one) => {
                if code.len() == 0 {
                    Err(HuffmanTreeError::DuplicateLeaf)
                } else {
                    match code[0] {
                        0 => {zero.add(&code[1..], symbol)}
                        1 => {one.add(&code[1..], symbol)}
                        _ => {Err(HuffmanTreeError::InvalidBit)}
                    }
                }
            }
        }
    }
}

#[derive(PartialEq, Copy, Clone, Debug)]
pub enum HuffmanTreeError {
    InvalidBit,
    MissingLeaf,
    DuplicateLeaf,
    OrphanedLeaf
}

impl fmt::Display for HuffmanTreeError {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        match *self {
            HuffmanTreeError::InvalidBit => {
                write!(f, "invalid bit in specification")
            }
            HuffmanTreeError::MissingLeaf => {
                write!(f, "missing leaf node in specification")
            }
            HuffmanTreeError::DuplicateLeaf => {
                write!(f, "duplicate leaf node in specification")
            }
            HuffmanTreeError::OrphanedLeaf => {
                write!(f, "orphaned leaf node in specification")
            }
        }
    }
}

pub struct WriteHuffmanTree<T: Ord> {
    big_endian: BTreeMap<T,(u32,u64)>,
    little_endian: BTreeMap<T,(u32,u64)>
}

impl<T: Ord + Clone> WriteHuffmanTree<T> {
    /// Given a vector of symbol/code pairs, compiles a Huffman tree
    /// for writing.
    /// Code must be 0 or 1 bits and are always written to the stream
    /// from least-significant in the list to most signficant
    /// (which makes them easier to read for humans).
    ///
    /// If the same symbol occurs multiple times, the first code is used.
    /// Unlike in read trees, not all possible codes need to be
    /// assigned a symbol.
    ///
    /// ## Example
    /// ```
    /// use bitstream_io::huffman::WriteHuffmanTree;
    /// assert!(WriteHuffmanTree::new(vec![(1i32, vec![0]),
    ///                                    (2i32, vec![1, 0]),
    ///                                    (3i32, vec![1, 1])]).is_ok());
    /// ```
    pub fn new(values: Vec<(T, Vec<u8>)>) ->
        Result<WriteHuffmanTree<T>,HuffmanTreeError> {
        use super::{BitQueueBE, BitQueueLE, BitQueue};

        // This current implementation is limited to Huffman codes
        // that generate up to 64 bits.  It may need to be updated
        // if I can find anything larger.

        let mut big_endian = BTreeMap::new();
        let mut little_endian = BTreeMap::new();

        for (symbol, code) in values.into_iter() {
            let mut be_encoded = BitQueueBE::new();
            let mut le_encoded = BitQueueLE::new();
            let code_len = code.len() as u32;
            for bit in code {
                if (bit != 0) && (bit != 1) {
                    return Err(HuffmanTreeError::InvalidBit);
                }
                be_encoded.push(1, bit as u64);
                le_encoded.push(1, bit as u64);
            }
            big_endian.entry(symbol.clone())
                      .or_insert((code_len, be_encoded.value()));
            little_endian.entry(symbol)
                         .or_insert((code_len, le_encoded.value()));
        }

        Ok(WriteHuffmanTree{big_endian: big_endian,
                            little_endian: little_endian})
    }

    /// Returns true if symbol is in tree.
    pub fn has_symbol(&self, symbol: T) -> bool {
        self.big_endian.contains_key(&symbol)
    }

    /// Given symbol, returns big-endian (bits, value) pair
    /// for writing code.  Panics if symbol is not found.
    pub fn get_be(&self, symbol: T) -> (u32, u64) {
        self.big_endian[&symbol]
    }

    /// Given symbol, returns little-endian (bits, value) pair
    /// for writing code.  Panics if symbol is not found.
    pub fn get_le(&self, symbol: T) -> (u32, u64) {
        self.little_endian[&symbol]
    }
}