1#![doc(html_root_url = "https://docs.rs/huffman-compress/0.7.0")]
52#![forbid(unsafe_code)]
53#![deny(missing_docs)]
54#![deny(missing_debug_implementations)]
55
56extern crate bit_vec;
57extern crate num_traits;
58
59#[cfg(test)]
60#[macro_use]
61extern crate quickcheck;
62
63use std::{
64 borrow::Borrow,
65 cmp,
66 cmp::Reverse,
67 collections::{btree_map, BTreeMap, BinaryHeap},
68 error::Error,
69 fmt,
70 iter::{FromIterator, Take},
71};
72
73use bit_vec::BitVec;
74use num_traits::ops::saturating::Saturating;
75
76#[derive(Debug, Clone)]
78pub struct Tree<K> {
79 root: usize,
80 arena: Vec<Node<K>>,
81}
82
83#[derive(Debug, Clone)]
84struct Node<K> {
85 parent: Option<usize>,
86 data: NodeData<K>,
87}
88
89#[derive(Debug, Clone)]
90enum NodeData<K> {
91 Leaf { symbol: K },
92 Branch { left: usize, right: usize },
93}
94
95impl<K: Clone> Tree<K> {
96 pub fn unbounded_decoder<I>(&self, iterable: I) -> UnboundedDecoder<'_, K, I>
108 where
109 I: IntoIterator<Item = bool>,
110 {
111 UnboundedDecoder {
112 tree: self,
113 iter: iterable.into_iter(),
114 }
115 }
116
117 pub fn decoder<I>(&self, iterable: I, num_symbols: usize) -> Decoder<'_, K, I>
127 where
128 I: IntoIterator<Item = bool>,
129 {
130 self.unbounded_decoder(iterable).take(num_symbols)
131 }
132}
133
134pub type Decoder<'a, K, I> = Take<UnboundedDecoder<'a, K, I>>;
137
138#[derive(Debug)]
140pub struct UnboundedDecoder<'a, K: 'a, I: IntoIterator<Item = bool>> {
141 tree: &'a Tree<K>,
142 iter: I::IntoIter,
143}
144
145impl<'a, K: Clone, I: IntoIterator<Item = bool>> Iterator for UnboundedDecoder<'a, K, I> {
146 type Item = K;
147
148 fn next(&mut self) -> Option<K> {
149 let mut node = self.tree.arena.get(self.tree.root)?;
150
151 loop {
152 match node.data {
153 NodeData::Leaf { ref symbol } => return Some(symbol.clone()),
154 NodeData::Branch { left, right } => {
155 node = match self.iter.next() {
156 Some(true) => &self.tree.arena[left],
157 Some(false) => &self.tree.arena[right],
158 None => return None,
159 };
160 }
161 }
162 }
163 }
164}
165
166#[derive(Clone, Debug)]
168pub struct Book<K> {
169 book: BTreeMap<K, BitVec>,
170}
171
172impl<K: Ord + Clone> Book<K> {
173 pub fn into_inner(self) -> BTreeMap<K, BitVec> {
175 self.book
176 }
177
178 pub fn symbols(&self) -> btree_map::Keys<'_, K, BitVec> {
180 self.book.keys()
181 }
182
183 pub fn iter(&self) -> btree_map::Iter<'_, K, BitVec> {
185 self.book.iter()
186 }
187
188 pub fn len(&self) -> usize {
190 self.book.len()
191 }
192
193 pub fn is_empty(&self) -> bool {
195 self.book.is_empty()
196 }
197
198 pub fn get<Q>(&self, k: &Q) -> Option<&BitVec>
200 where
201 K: Borrow<Q>,
202 Q: ?Sized + Ord,
203 {
204 self.book.get(k)
205 }
206
207 pub fn contains_symbol<Q>(&self, k: &Q) -> bool
209 where
210 K: Borrow<Q>,
211 Q: ?Sized + Ord,
212 {
213 self.book.contains_key(k)
214 }
215
216 pub fn encode<Q>(&self, buffer: &mut BitVec, k: &Q) -> Result<(), EncodeError>
224 where
225 K: Borrow<Q>,
226 Q: ?Sized + Ord,
227 {
228 match self.book.get(k) {
229 Some(code) => buffer.extend(code),
230 None => return Err(EncodeError {}),
231 }
232
233 Ok(())
234 }
235
236 fn new() -> Book<K> {
237 Book {
238 book: BTreeMap::new(),
239 }
240 }
241
242 fn build(&mut self, arena: &[Node<K>], node: &Node<K>, word: BitVec) {
243 match node.data {
244 NodeData::Leaf { ref symbol } => {
245 self.book.insert(symbol.clone(), word);
246 }
247 NodeData::Branch { left, right } => {
248 let mut left_word = word.clone();
249 left_word.push(true);
250 self.build(arena, &arena[left], left_word);
251
252 let mut right_word = word;
253 right_word.push(false);
254 self.build(arena, &arena[right], right_word);
255 }
256 }
257 }
258}
259
260#[derive(Debug, Clone)]
262pub struct EncodeError;
263
264impl fmt::Display for EncodeError {
265 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
266 "encode error: tried to encode an unknown symbol".fmt(f)
267 }
268}
269
270impl Error for EncodeError {
271 fn description(&self) -> &str {
272 "encode error: tried to encode an unknown symbol"
273 }
274}
275
276#[derive(Debug, Clone)]
289pub struct CodeBuilder<K: Ord + Clone, W: Saturating + Ord> {
290 heap: BinaryHeap<HeapData<K, W>>,
291 arena: Vec<Node<K>>,
292}
293
294impl<K: Ord + Clone, W: Saturating + Ord> CodeBuilder<K, W> {
295 pub fn new() -> CodeBuilder<K, W> {
297 CodeBuilder {
298 heap: BinaryHeap::new(),
299 arena: Vec::new(),
300 }
301 }
302
303 pub fn with_capacity(capacity: usize) -> CodeBuilder<K, W> {
306 CodeBuilder {
307 heap: BinaryHeap::with_capacity(capacity),
308 arena: Vec::with_capacity(2 * capacity),
309 }
310 }
311
312 pub fn push(&mut self, symbol: K, weight: W) {
314 self.heap.push(HeapData {
315 weight: Reverse(weight),
316 symbol: symbol.clone(),
317 id: self.arena.len(),
318 });
319
320 self.arena.push(Node {
321 parent: None,
322 data: NodeData::Leaf { symbol },
323 });
324 }
325
326 pub fn finish(mut self) -> (Book<K>, Tree<K>) {
329 let mut book = Book::new();
330
331 let root = loop {
332 let left = match self.heap.pop() {
333 Some(left) => left,
334 None => {
335 return (
336 book,
337 Tree {
338 root: 0,
339 arena: self.arena,
340 },
341 )
342 }
343 };
344
345 let right = match self.heap.pop() {
346 Some(right) => right,
347 None => break left,
348 };
349
350 let id = self.arena.len();
351
352 self.arena[left.id].parent = Some(id);
353 self.arena[right.id].parent = Some(id);
354
355 self.heap.push(HeapData {
356 weight: Reverse(left.weight.0.saturating_add(right.weight.0)),
357 symbol: cmp::min(left.symbol, right.symbol),
358 id,
359 });
360
361 self.arena.push(Node {
362 parent: None,
363 data: NodeData::Branch {
364 left: left.id,
365 right: right.id,
366 },
367 });
368 };
369
370 book.build(&self.arena, &self.arena[root.id], BitVec::new());
371
372 (
373 book,
374 Tree {
375 root: root.id,
376 arena: self.arena,
377 },
378 )
379 }
380}
381
382impl<K: Ord + Clone, W: Saturating + Ord> Default for CodeBuilder<K, W> {
383 fn default() -> CodeBuilder<K, W> {
384 CodeBuilder::new()
385 }
386}
387
388impl<K: Ord + Clone, W: Saturating + Ord> FromIterator<(K, W)> for CodeBuilder<K, W> {
389 fn from_iter<T>(weights: T) -> CodeBuilder<K, W>
390 where
391 T: IntoIterator<Item = (K, W)>,
392 {
393 let iter = weights.into_iter();
394 let (size_hint, _) = iter.size_hint();
395 let mut code = CodeBuilder::with_capacity(size_hint);
396 code.extend(iter);
397 code
398 }
399}
400
401impl<K: Ord + Clone, W: Saturating + Ord> Extend<(K, W)> for CodeBuilder<K, W> {
402 fn extend<T>(&mut self, weights: T)
403 where
404 T: IntoIterator<Item = (K, W)>,
405 {
406 for (symbol, weight) in weights {
407 self.push(symbol, weight);
408 }
409 }
410}
411
412impl<'a, K: Ord + Clone, W: Saturating + Ord + Clone> FromIterator<(&'a K, &'a W)>
413 for CodeBuilder<K, W>
414{
415 fn from_iter<T>(weights: T) -> CodeBuilder<K, W>
416 where
417 T: IntoIterator<Item = (&'a K, &'a W)>,
418 {
419 CodeBuilder::from_iter(weights.into_iter().map(|(k, v)| (k.clone(), v.clone())))
420 }
421}
422
423impl<'a, K: Ord + Clone, W: Saturating + Ord + Clone> Extend<(&'a K, &'a W)> for CodeBuilder<K, W> {
424 fn extend<T>(&mut self, weights: T)
425 where
426 T: IntoIterator<Item = (&'a K, &'a W)>,
427 {
428 self.extend(weights.into_iter().map(|(k, v)| (k.clone(), v.clone())));
429 }
430}
431
432#[derive(Eq, PartialEq, Ord, PartialOrd, Debug)]
433struct HeapData<K, W> {
434 weight: Reverse<W>,
435 symbol: K, id: usize,
437}
438
439impl<K: Clone, W: Clone> Clone for HeapData<K, W> {
440 fn clone(&self) -> HeapData<K, W> {
441 HeapData {
442 weight: Reverse(self.weight.0.clone()),
443 symbol: self.symbol.clone(),
444 id: self.id,
445 }
446 }
447}
448
449pub fn codebook<'a, I, K, W>(weights: I) -> (Book<K>, Tree<K>)
452where
453 I: IntoIterator<Item = (&'a K, &'a W)>,
454 K: 'a + Ord + Clone,
455 W: 'a + Saturating + Ord + Clone,
456{
457 CodeBuilder::from_iter(weights).finish()
458}
459
460#[cfg(test)]
461mod tests {
462 use std::collections::HashMap;
463
464 use super::*;
465
466 #[test]
467 fn test_uniform() {
468 let mut sample = HashMap::new();
469 sample.insert(1, 1);
470 sample.insert(2, 1);
471 sample.insert(3, 1);
472 sample.insert(4, 1);
473 sample.insert(5, 1);
474 let (book, tree) = CodeBuilder::from_iter(sample).finish();
475
476 let mut buffer = BitVec::new();
477 book.encode(&mut buffer, &1).unwrap();
478 book.encode(&mut buffer, &2).unwrap();
479 book.encode(&mut buffer, &3).unwrap();
480 book.encode(&mut buffer, &4).unwrap();
481 book.encode(&mut buffer, &5).unwrap();
482
483 let mut decoder = tree.unbounded_decoder(buffer);
484 assert_eq!(decoder.next(), Some(1));
485 assert_eq!(decoder.next(), Some(2));
486 assert_eq!(decoder.next(), Some(3));
487 assert_eq!(decoder.next(), Some(4));
488 assert_eq!(decoder.next(), Some(5));
489 assert_eq!(decoder.next(), None);
490 }
491
492 #[test]
493 fn test_uniform_from_static() {
494 const WEIGHTS: &[(&char, &usize)] = &[(&'a', &1), (&'b', &1), (&'c', &1), (&'d', &1)];
495 let (book, tree) = codebook(WEIGHTS.iter().cloned());
496
497 let mut buffer = BitVec::new();
498 book.encode(&mut buffer, &'a').unwrap();
499 book.encode(&mut buffer, &'b').unwrap();
500 book.encode(&mut buffer, &'c').unwrap();
501 book.encode(&mut buffer, &'d').unwrap();
502
503 let mut decoder = tree.unbounded_decoder(buffer);
504 assert_eq!(decoder.next(), Some('a'));
505 assert_eq!(decoder.next(), Some('b'));
506 assert_eq!(decoder.next(), Some('c'));
507 assert_eq!(decoder.next(), Some('d'));
508 assert_eq!(decoder.next(), None);
509 }
510
511 #[test]
512 fn test_empty() {
513 let (book, tree) = CodeBuilder::<&str, i32>::new().finish();
514
515 let mut buffer = BitVec::new();
516 assert!(book.encode(&mut buffer, "hello").is_err());
517
518 let mut decoder = tree.unbounded_decoder(buffer);
519 assert_eq!(decoder.next(), None);
520 }
521
522 #[test]
523 fn test_single() {
524 let mut builder = CodeBuilder::new();
525 builder.push("hello", 1);
526 let (book, tree) = builder.finish();
527
528 let mut buffer = BitVec::new();
529 book.encode(&mut buffer, "hello").unwrap();
530
531 let mut decoder = tree.unbounded_decoder(buffer);
532 assert_eq!(decoder.next(), Some("hello"));
533 assert_eq!(decoder.next(), Some("hello")); }
535
536 quickcheck! {
537 fn efficient_order(ag: u32, at: u32, cg: u32, ct: u32, tg: u32) -> bool {
538 let mut builder = CodeBuilder::new();
539 builder.push("CG", cg);
540 builder.push("AG", ag);
541 builder.push("AT", at);
542 builder.push("CT", ct);
543 builder.push("TG", tg);
544 let (book, _) = builder.finish();
545
546 let len = |symbol| {
547 book.get(symbol).map_or(0, |code| code.len())
548 };
549
550 at >= ct || len("CT") <= len("AT") ||
551 ag.saturating_add(at).saturating_add(cg).saturating_add(ct).saturating_add(tg) == u32::MAX
552 }
553
554 fn encode_decode_bytes(symbols: Vec<u8>) -> bool {
555 let mut counts = [0; 256];
556 for symbol in &symbols {
557 counts[usize::from(*symbol)] += 1;
558 }
559
560 let (book, tree) = counts.iter()
561 .enumerate()
562 .map(|(k, v)| (k as u8, *v))
563 .collect::<CodeBuilder<_, _>>()
564 .finish();
565
566 let mut buffer = BitVec::new();
567 for symbol in &symbols {
568 book.encode(&mut buffer, symbol).unwrap();
569 }
570
571 tree.unbounded_decoder(&buffer).eq(symbols)
572 }
573 }
574}