use crate::{
io::{reader::BinaryReader, writer::BinaryWriter},
priority_queue::PQ,
};
const R: usize = 256;
pub struct HuffmanEncode {
writer: BinaryWriter,
}
pub struct HuffmanDecode {
reader: BinaryReader,
}
fn char_at(c: u8) -> usize {
c as usize
}
type Link = Option<Box<Node>>;
type C = u8;
struct Node {
ch: C,
freq: usize,
left: Link,
right: Link,
}
impl Node {
pub fn new(ch: C, freq: usize) -> Self {
Node {
ch,
freq,
left: None,
right: None,
}
}
pub fn is_leaf(&self) -> bool {
self.left.is_none() && self.right.is_none()
}
}
impl PartialOrd for Node {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
self.freq.partial_cmp(&other.freq)
}
}
impl PartialEq for Node {
fn eq(&self, other: &Self) -> bool {
self.ch == other.ch
&& self.freq == other.freq
&& self.left == other.left
&& self.right == other.right
}
}
impl HuffmanEncode {
pub fn new() -> Self {
HuffmanEncode {
writer: BinaryWriter::new(64),
}
}
pub fn compress(mut self, txt: &str) -> Vec<u8> {
let chars = txt.as_bytes();
let mut freq = [0usize; R];
for &c in chars {
freq[char_at(c)] += 1;
}
let root = self.build_trie(freq);
let mut st = [""; R].map(|f| f.to_string());
Self::build_code(&mut st, &root, String::new());
self.write_trie(&root);
self.write_usize(chars.len() as u32);
for &p in chars.iter() {
for &c in st[char_at(p)].as_bytes() {
self.write_bool(c == b'1');
}
}
self.writer.close()
}
fn build_trie(&self, freq: [usize; R]) -> Node {
let mut pq = PQ::new_min_pq();
(0..R).for_each(|c| {
if freq[c] > 0 {
pq.push(Node::new(c as C, freq[c]));
}
});
while pq.size() > 1 {
let x = pq.pop().unwrap();
let y = pq.pop().unwrap();
let mut parent = Node::new(b'\0', x.freq + y.freq);
parent.left = Some(Box::new(x));
parent.right = Some(Box::new(y));
pq.push(parent);
}
pq.pop().unwrap()
}
fn build_code(st: &mut [String; R], x: &Node, s: String) {
if x.is_leaf() {
st[x.ch as usize] = s;
} else {
Self::build_code(st, x.left.as_deref().unwrap(), format!("{}0", s));
Self::build_code(st, x.right.as_deref().unwrap(), format!("{}1", s));
}
}
fn write_trie(&mut self, x: &Node) {
if x.is_leaf() {
self.write_bool(true);
self.write_char(x.ch);
} else {
self.write_bool(false);
self.write_trie(x.left.as_deref().unwrap());
self.write_trie(x.right.as_deref().unwrap());
}
}
fn write_usize(&mut self, len: u32) {
self.writer.write_u32(len)
}
fn write_bool(&mut self, j: bool) {
self.writer.writer_bool(j)
}
fn write_char(&mut self, ch: C) {
self.writer.writer_u8(ch);
}
}
impl HuffmanDecode {
pub fn new(data: Vec<u8>) -> Self {
HuffmanDecode {
reader: BinaryReader::new(data),
}
}
pub fn expand(&mut self) -> String {
let mut ans = String::new();
let mut root = self.read_trie();
let n = self.reader.read_u32();
for _ in 0..n {
let mut x = &mut root;
while !x.is_leaf() {
let a = if self.reader.read_bool() {
x.right.as_deref_mut()
} else {
x.left.as_deref_mut()
};
x = a.unwrap();
}
ans.push(char::from(x.ch));
}
ans
}
fn read_trie(&mut self) -> Node {
if self.reader.read_bool() {
Node::new(self.reader.read_u8(), 0)
} else {
let mut ans = Node::new(b'\0', 0);
ans.left = Some(Box::new(self.read_trie()));
ans.right = Some(Box::new(self.read_trie()));
ans
}
}
}
#[cfg(test)]
mod test {
use crate::string::huff_man::HuffmanDecode;
use super::HuffmanEncode;
#[test]
fn test() {
let txt = r#"abcdefgabcdefg1287abcdefgabcdefg128799abcdefgabcdefg1287abcdefgabcdefg1287abcdefgabcdefg128
cdefgabcdefg128799abcdefgabcdefg1287abcdefgabcdefg1287abcdefgabcdefg128
cdefgabcdefg128799abcdefgabcdefg1287abcdefgabcdefg1287abcdefgabcdefg128
aaaaaa799abcdefgabcdefg128799abcdefgabcdefg1287abcdefgabcdefg128799abcdefgabcdefg1287aaaaaaaaaaaaaaaaaaaaaa9"#.to_string();
let encode = HuffmanEncode::new();
let compress = encode.compress(&txt);
let origin_len = txt.as_bytes().len();
let compress_len = compress.len();
let rate = compress_len * 100 / origin_len;
println!(
"len txt :{},compress:{},rate:{}",
origin_len, compress_len, rate
);
let mut decode = HuffmanDecode::new(compress);
assert_eq!(decode.expand(), txt);
}
}