1#![doc(html_root_url = "https://docs.rs/huffman-compress2/0.7.0")]
58#![forbid(unsafe_code)]
59#![deny(missing_docs)]
60#![deny(missing_debug_implementations)]
61#![warn(clippy::pedantic)]
62#![allow(clippy::manual_let_else)]
63
64use std::borrow::Borrow;
65use std::cmp;
66use std::cmp::Reverse;
67use std::collections::{btree_map, BTreeMap, BinaryHeap};
68use std::error::Error;
69use std::fmt;
70use std::iter::{FromIterator, Take};
71
72use bit_vec::BitVec;
73
74#[cfg(feature = "serde")]
75use serde::{Deserialize, Serialize};
76
77use num_traits::ops::saturating::Saturating;
78
79#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
81#[derive(Debug, Clone)]
82pub struct Tree<K> {
83 root: usize,
84 arena: Vec<Node<K>>,
85}
86
87#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
88#[derive(Debug, Clone)]
89struct Node<K> {
90 parent: Option<usize>,
91 data: NodeData<K>,
92}
93
94#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
95#[derive(Debug, Clone)]
96enum NodeData<K> {
97 Leaf { symbol: K },
98 Branch { left: usize, right: usize },
99}
100
101impl<K: Clone> Tree<K> {
102 pub fn unbounded_decoder<I>(&self, iterable: I) -> UnboundedDecoder<K, I>
114 where
115 I: IntoIterator<Item = bool>,
116 {
117 UnboundedDecoder {
118 tree: self,
119 iter: iterable.into_iter(),
120 }
121 }
122
123 pub fn decoder<I>(&self, iterable: I, num_symbols: usize) -> Decoder<K, I>
133 where
134 I: IntoIterator<Item = bool>,
135 {
136 self.unbounded_decoder(iterable).take(num_symbols)
137 }
138}
139
140pub type Decoder<'a, K, I> = Take<UnboundedDecoder<'a, K, I>>;
143
144#[derive(Debug)]
146pub struct UnboundedDecoder<'a, K: 'a, I: IntoIterator<Item = bool>> {
147 tree: &'a Tree<K>,
148 iter: I::IntoIter,
149}
150
151impl<'a, K: Clone, I: IntoIterator<Item = bool>> Iterator for UnboundedDecoder<'a, K, I> {
152 type Item = K;
153
154 fn next(&mut self) -> Option<K> {
155 let mut node = self.tree.arena.get(self.tree.root)?;
156
157 loop {
158 match node.data {
159 NodeData::Leaf { ref symbol } => return Some(symbol.clone()),
160 NodeData::Branch { left, right } => {
161 node = if self.iter.next()? {
162 &self.tree.arena[left]
163 } else {
164 &self.tree.arena[right]
165 };
166 }
167 }
168 }
169 }
170}
171
172#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
174#[derive(Clone, Debug)]
175pub struct Book<K: Ord + Clone> {
176 book: BTreeMap<K, BitVec>,
177}
178
179impl<K: Ord + Clone> Book<K> {
180 #[must_use]
182 pub fn into_inner(self) -> BTreeMap<K, BitVec> {
183 self.book
184 }
185
186 pub fn symbols(&self) -> btree_map::Keys<K, BitVec> {
188 self.book.keys()
189 }
190
191 pub fn iter(&self) -> btree_map::Iter<K, BitVec> {
193 self.book.iter()
194 }
195
196 #[must_use]
198 pub fn len(&self) -> usize {
199 self.book.len()
200 }
201
202 #[must_use]
204 pub fn is_empty(&self) -> bool {
205 self.book.is_empty()
206 }
207
208 pub fn get<Q: ?Sized>(&self, k: &Q) -> Option<&BitVec>
210 where
211 K: Borrow<Q>,
212 Q: Ord,
213 {
214 self.book.get(k)
215 }
216
217 pub fn contains_symbol<Q: ?Sized>(&self, k: &Q) -> bool
219 where
220 K: Borrow<Q>,
221 Q: Ord,
222 {
223 self.book.contains_key(k)
224 }
225
226 pub fn encode<Q: ?Sized>(&self, buffer: &mut BitVec, k: &Q) -> Result<(), EncodeError>
234 where
235 K: Borrow<Q>,
236 Q: Ord,
237 {
238 self.book
239 .get(k)
240 .map(|code| buffer.extend(code))
241 .ok_or(EncodeError {})
242 }
243
244 fn new() -> Book<K> {
245 Book {
246 book: BTreeMap::new(),
247 }
248 }
249
250 fn build(&mut self, arena: &[Node<K>], node: &Node<K>, word: BitVec) {
251 match node.data {
252 NodeData::Leaf { ref symbol } => {
253 self.book.insert(symbol.clone(), word);
254 }
255 NodeData::Branch { left, right } => {
256 let mut left_word = word.clone();
257 left_word.push(true);
258 self.build(arena, &arena[left], left_word);
259
260 let mut right_word = word;
261 right_word.push(false);
262 self.build(arena, &arena[right], right_word);
263 }
264 }
265 }
266}
267
268#[derive(Debug, Clone)]
270pub struct EncodeError;
271
272impl fmt::Display for EncodeError {
273 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
274 "encode error: tried to encode an unknown symbol".fmt(f)
275 }
276}
277
278impl Error for EncodeError {
279 fn description(&self) -> &str {
280 "encode error: tried to encode an unknown symbol"
281 }
282}
283
284#[derive(Debug, Clone)]
297pub struct CodeBuilder<K: Ord + Clone, W: Saturating + Ord> {
298 heap: BinaryHeap<HeapData<K, W>>,
299 arena: Vec<Node<K>>,
300}
301
302impl<K: Ord + Clone, W: Saturating + Ord> CodeBuilder<K, W> {
303 #[must_use]
305 pub fn new() -> CodeBuilder<K, W> {
306 CodeBuilder {
307 heap: BinaryHeap::new(),
308 arena: Vec::new(),
309 }
310 }
311
312 #[must_use]
315 pub fn with_capacity(capacity: usize) -> CodeBuilder<K, W> {
316 CodeBuilder {
317 heap: BinaryHeap::with_capacity(capacity),
318 arena: Vec::with_capacity(2 * capacity),
319 }
320 }
321
322 pub fn push(&mut self, symbol: K, weight: W) {
324 self.heap.push(HeapData {
325 weight: Reverse(weight),
326 symbol: symbol.clone(),
327 id: self.arena.len(),
328 });
329
330 self.arena.push(Node {
331 parent: None,
332 data: NodeData::Leaf { symbol },
333 });
334 }
335
336 #[must_use]
339 pub fn finish(mut self) -> (Book<K>, Tree<K>) {
340 let mut book = Book::new();
341
342 let root = loop {
343 let left = match self.heap.pop() {
344 Some(left) => left,
345 None => {
346 return (
347 book,
348 Tree {
349 root: 0,
350 arena: self.arena,
351 },
352 )
353 }
354 };
355
356 let right = match self.heap.pop() {
357 Some(right) => right,
358 None => break left,
359 };
360
361 let id = self.arena.len();
362
363 self.arena[left.id].parent = Some(id);
364 self.arena[right.id].parent = Some(id);
365
366 self.heap.push(HeapData {
367 weight: Reverse(left.weight.0.saturating_add(right.weight.0)),
368 symbol: cmp::min(left.symbol, right.symbol),
369 id,
370 });
371
372 self.arena.push(Node {
373 parent: None,
374 data: NodeData::Branch {
375 left: left.id,
376 right: right.id,
377 },
378 });
379 };
380
381 book.build(&self.arena, &self.arena[root.id], BitVec::new());
382 (
383 book,
384 Tree {
385 root: root.id,
386 arena: self.arena,
387 },
388 )
389 }
390}
391
392impl<K: Ord + Clone, W: Saturating + Ord> Default for CodeBuilder<K, W> {
393 fn default() -> CodeBuilder<K, W> {
394 CodeBuilder::new()
395 }
396}
397
398impl<K: Ord + Clone, W: Saturating + Ord> FromIterator<(K, W)> for CodeBuilder<K, W> {
399 fn from_iter<T>(weights: T) -> CodeBuilder<K, W>
400 where
401 T: IntoIterator<Item = (K, W)>,
402 {
403 let iter = weights.into_iter();
404 let (size_hint, _) = iter.size_hint();
405 let mut code = CodeBuilder::with_capacity(size_hint);
406 code.extend(iter);
407 code
408 }
409}
410
411impl<K: Ord + Clone, W: Saturating + Ord> Extend<(K, W)> for CodeBuilder<K, W> {
412 fn extend<T>(&mut self, weights: T)
413 where
414 T: IntoIterator<Item = (K, W)>,
415 {
416 for (symbol, weight) in weights {
417 self.push(symbol, weight);
418 }
419 }
420}
421
422impl<'a, K: Ord + Clone, W: Saturating + Ord + Clone> FromIterator<(&'a K, &'a W)>
423 for CodeBuilder<K, W>
424{
425 fn from_iter<T>(weights: T) -> CodeBuilder<K, W>
426 where
427 T: IntoIterator<Item = (&'a K, &'a W)>,
428 {
429 weights
430 .into_iter()
431 .map(|(k, v)| (k.clone(), v.clone()))
432 .collect()
433 }
434}
435
436impl<'a, K: Ord + Clone, W: Saturating + Ord + Clone> Extend<(&'a K, &'a W)> for CodeBuilder<K, W> {
437 fn extend<T>(&mut self, weights: T)
438 where
439 T: IntoIterator<Item = (&'a K, &'a W)>,
440 {
441 self.extend(weights.into_iter().map(|(k, v)| (k.clone(), v.clone())));
442 }
443}
444
445#[derive(Eq, PartialEq, Ord, PartialOrd, Debug)]
446struct HeapData<K, W> {
447 weight: Reverse<W>,
448 symbol: K, id: usize,
450}
451
452impl<K: Clone, W: Clone> Clone for HeapData<K, W> {
453 fn clone(&self) -> HeapData<K, W> {
454 HeapData {
455 weight: Reverse(self.weight.0.clone()),
456 symbol: self.symbol.clone(),
457 id: self.id,
458 }
459 }
460}
461
462pub fn codebook<'a, I, K, W>(weights: I) -> (Book<K>, Tree<K>)
465where
466 I: IntoIterator<Item = (&'a K, &'a W)>,
467 K: 'a + Ord + Clone,
468 W: 'a + Saturating + Ord + Clone,
469{
470 CodeBuilder::from_iter(weights).finish()
471}
472
473#[cfg(test)]
474mod tests {
475 use super::*;
476 use quickcheck::quickcheck;
477 use std::collections::HashMap;
478
479 #[test]
480 fn test_uniform() {
481 let mut sample = HashMap::new();
482 sample.insert(1, 1);
483 sample.insert(2, 1);
484 sample.insert(3, 1);
485 sample.insert(4, 1);
486 sample.insert(5, 1);
487 let (book, tree) = CodeBuilder::from_iter(sample).finish();
488
489 let mut buffer = BitVec::new();
490 book.encode(&mut buffer, &1).unwrap();
491 book.encode(&mut buffer, &2).unwrap();
492 book.encode(&mut buffer, &3).unwrap();
493 book.encode(&mut buffer, &4).unwrap();
494 book.encode(&mut buffer, &5).unwrap();
495
496 let mut decoder = tree.unbounded_decoder(buffer);
497 assert_eq!(decoder.next(), Some(1));
498 assert_eq!(decoder.next(), Some(2));
499 assert_eq!(decoder.next(), Some(3));
500 assert_eq!(decoder.next(), Some(4));
501 assert_eq!(decoder.next(), Some(5));
502 assert_eq!(decoder.next(), None);
503 }
504
505 #[test]
506 fn test_uniform_from_static() {
507 const WEIGHTS: &[(&char, &usize)] = &[(&'a', &1), (&'b', &1), (&'c', &1), (&'d', &1)];
508 let (book, tree) = codebook(WEIGHTS.iter().copied());
509
510 let mut buffer = BitVec::new();
511 book.encode(&mut buffer, &'a').unwrap();
512 book.encode(&mut buffer, &'b').unwrap();
513 book.encode(&mut buffer, &'c').unwrap();
514 book.encode(&mut buffer, &'d').unwrap();
515
516 let mut decoder = tree.unbounded_decoder(buffer);
517 assert_eq!(decoder.next(), Some('a'));
518 assert_eq!(decoder.next(), Some('b'));
519 assert_eq!(decoder.next(), Some('c'));
520 assert_eq!(decoder.next(), Some('d'));
521 assert_eq!(decoder.next(), None);
522 }
523
524 #[test]
525 fn test_empty() {
526 let (book, tree) = CodeBuilder::<&str, i32>::new().finish();
527
528 let mut buffer = BitVec::new();
529 assert!(book.encode(&mut buffer, "hello").is_err());
530
531 let mut decoder = tree.unbounded_decoder(buffer);
532 assert_eq!(decoder.next(), None);
533 }
534
535 #[test]
536 fn test_single() {
537 let mut builder = CodeBuilder::new();
538 builder.push("hello", 1);
539 let (book, tree) = builder.finish();
540
541 let mut buffer = BitVec::new();
542 book.encode(&mut buffer, "hello").unwrap();
543
544 let mut decoder = tree.unbounded_decoder(buffer);
545 assert_eq!(decoder.next(), Some("hello"));
546 assert_eq!(decoder.next(), Some("hello")); }
548
549 quickcheck! {
550 fn efficient_order(ag: u32, at: u32, cg: u32, ct: u32, tg: u32) -> bool {
551 let mut builder = CodeBuilder::new();
552 builder.push("CG", cg);
553 builder.push("AG", ag);
554 builder.push("AT", at);
555 builder.push("CT", ct);
556 builder.push("TG", tg);
557 let (book, _) = builder.finish();
558
559 let len = |symbol| {
560 book.get(symbol).map_or(0, bit_vec::BitVec::len)
561 };
562
563 at >= ct || len("CT") <= len("AT") ||
564 ag.saturating_add(at).saturating_add(cg).saturating_add(ct).saturating_add(tg) == u32::MAX
565 }
566
567 fn encode_decode_bytes(symbols: Vec<u8>) -> bool {
568 let mut counts = [0; 256];
569 for symbol in &symbols {
570 counts[usize::from(*symbol)] += 1;
571 }
572
573 let (book, tree) = counts.iter()
574 .enumerate()
575 .map(|(k, v)| (k as u8, *v))
576 .collect::<CodeBuilder<_, _>>()
577 .finish();
578
579 let mut buffer = BitVec::new();
580 for symbol in &symbols {
581 book.encode(&mut buffer, symbol).unwrap();
582 }
583
584 tree.unbounded_decoder(&buffer).eq(symbols)
585 }
586 }
587}