cshannon/encoding/
balanced_tree.rs1use super::Encoding;
24use crate::model::Model;
25use crate::{code::Letter, tokens::Token};
26use anyhow::{anyhow, Result};
27use std::collections::HashMap;
28
29pub fn new<T: Token>(m: Model<T>) -> Result<Encoding<T>> {
35 let mut map = HashMap::new();
36 let mut letter_generator = LetterGenerator::new(log2(m.len() as u64))?;
37 for t in m.tokens_sorted() {
38 match letter_generator.next() {
39 None => panic!("Ran out of letters"),
42 Some(l) => {
43 map.insert(t, l);
44 }
45 }
46 }
47 Ok(Encoding::new(map)?)
48}
49
50struct LetterGenerator {
51 bit_count: u64,
52 current: u64,
53 max: u64,
54}
55
56impl LetterGenerator {
57 pub fn new(bit_count: u64) -> Result<Self> {
58 if bit_count > 64 {
59 return Err(anyhow!("model has too many keys"));
60 }
61 Ok(Self {
62 bit_count,
63 current: 1,
66 max: 1 << bit_count,
67 })
68 }
69}
70
71impl Iterator for LetterGenerator {
72 type Item = Letter;
73
74 fn next(&mut self) -> Option<Self::Item> {
75 if self.current >= self.max {
76 return None;
77 }
78 let l = Letter::new(
79 &(self.current << (64 - self.bit_count))
80 .to_be_bytes()
81 .to_vec(),
82 self.bit_count as u64,
83 );
84 self.current += 1;
85 Some(l)
86 }
87}
88
89fn log2(n: u64) -> u64 {
90 let max = std::mem::size_of::<u64>() as u64 * 8;
91 let zeroes = n.leading_zeros() as u64;
92 max - zeroes
93}
94
95#[cfg(test)]
96mod tests {
97 use super::*;
98 use crate::code::Letter;
99 use crate::model;
100 use crate::tokens::test_utils::I32Token;
101 use std::collections::HashMap;
102
103 #[test]
104 fn empty() {
105 let m: Model<I32Token> = model::with_frequencies(&[]);
106 let t = new(m).unwrap();
107 assert!(t.alphabet().is_empty());
108 assert!(t.map().is_empty());
109 }
110
111 #[test]
112 fn one_token() {
113 let m = model::with_frequencies(&[(I32Token(1), 2)]);
114 let t = new(m).unwrap();
115 assert_eq!(t.alphabet().len(), 1);
116 let want: HashMap<I32Token, Letter> = [(I32Token(1), Letter::new(&[0b1000_0000], 1))]
117 .iter()
118 .cloned()
119 .collect();
120 assert_eq!(t.map(), &want);
121 }
122
123 #[test]
124 fn max_tokens_for_some_tree_height() {
125 let m = model::with_frequencies(&[(I32Token(1), 1), (I32Token(2), 2), (I32Token(3), 3)]);
126 let t = new(m).unwrap();
127 assert_eq!(t.alphabet().len(), 3);
128 let want: HashMap<I32Token, Letter> = [
129 (I32Token(3), Letter::new(&[0b0100_0000], 2)),
130 (I32Token(2), Letter::new(&[0b1000_0000], 2)),
131 (I32Token(1), Letter::new(&[0b1100_0000], 2)),
132 ]
133 .iter()
134 .cloned()
135 .collect();
136 assert_eq!(t.map(), &want);
137 }
138
139 #[test]
140 fn max_tokens_for_some_tree_height_then_one() {
141 let m = model::with_frequencies(&[
142 (I32Token(1), 1),
143 (I32Token(2), 2),
144 (I32Token(3), 3),
145 (I32Token(4), 4),
146 ]);
147 let t = new(m).unwrap();
148 assert_eq!(t.alphabet().len(), 4);
149 let want: HashMap<I32Token, Letter> = [
150 (I32Token(4), Letter::new(&[0b0010_0000], 3)),
151 (I32Token(3), Letter::new(&[0b0100_0000], 3)),
152 (I32Token(2), Letter::new(&[0b0110_0000], 3)),
153 (I32Token(1), Letter::new(&[0b1000_0000], 3)),
154 ]
155 .iter()
156 .cloned()
157 .collect();
158 assert_eq!(t.map(), &want);
159 }
160}