cshannon/encoding/
balanced_tree.rs1use super::Encoding;
24use crate::code::Letter;
25use crate::model::Model;
26use crate::tokens::Token;
27use anyhow::{anyhow, Result};
28use std::collections::HashMap;
29
30pub fn new<T>(m: Model<T>) -> Result<Encoding<T>>
36where
37 T: Token,
38{
39 let mut map = HashMap::new();
40 let mut letter_generator = LetterGenerator::new(log2(m.len() as u64))?;
41 for t in m.tokens_sorted() {
42 match letter_generator.next() {
43 None => panic!("Ran out of letters".to_owned()),
46 Some(l) => {
47 map.insert(t, l);
48 }
49 }
50 }
51 Ok(Encoding::new(map)?)
52}
53
54struct LetterGenerator {
55 bit_count: u64,
56 current: u64,
57 max: u64,
58}
59
60impl LetterGenerator {
61 pub fn new(bit_count: u64) -> Result<Self> {
62 if bit_count > 64 {
63 return Err(anyhow!("model has too many keys"));
64 }
65 Ok(Self {
66 bit_count,
67 current: 1,
70 max: 1 << bit_count,
71 })
72 }
73}
74
75impl Iterator for LetterGenerator {
76 type Item = Letter;
77
78 fn next(&mut self) -> Option<Self::Item> {
79 if self.current >= self.max {
80 return None;
81 }
82 let l = Letter::new(
83 &(self.current << (64 - self.bit_count))
84 .to_be_bytes()
85 .to_vec(),
86 self.bit_count as u64,
87 );
88 self.current += 1;
89 Some(l)
90 }
91}
92
93fn log2(n: u64) -> u64 {
95 let max = std::mem::size_of::<u64>() as u64 * 8;
96 let zeroes = n.leading_zeros() as u64;
97 max - zeroes
98}
99
100#[cfg(test)]
101mod tests {
102 use super::*;
103 use crate::code::Letter;
104 use crate::model;
105 use crate::tokens::test_utils::I32Token;
106 use std::collections::HashMap;
107
108 #[test]
109 fn empty() {
110 let m: Model<I32Token> = model::with_frequencies(&[]);
111 let t = new(m).unwrap();
112 assert!(t.alphabet().is_empty());
113 assert!(t.map().is_empty());
114 }
115
116 #[test]
117 fn one_token() {
118 let m = model::with_frequencies(&[(I32Token(1), 2)]);
119 let t = new(m).unwrap();
120 assert_eq!(t.alphabet().len(), 1);
121 let want: HashMap<I32Token, Letter> = [(I32Token(1), Letter::new(&[0b1000_0000], 1))]
122 .iter()
123 .cloned()
124 .collect();
125 assert_eq!(t.map(), &want);
126 }
127
128 #[test]
129 fn max_tokens_for_some_tree_height() {
130 let m = model::with_frequencies(&[(I32Token(1), 1), (I32Token(2), 2), (I32Token(3), 3)]);
131 let t = new(m).unwrap();
132 assert_eq!(t.alphabet().len(), 3);
133 let want: HashMap<I32Token, Letter> = [
134 (I32Token(3), Letter::new(&[0b0100_0000], 2)),
135 (I32Token(2), Letter::new(&[0b1000_0000], 2)),
136 (I32Token(1), Letter::new(&[0b1100_0000], 2)),
137 ]
138 .iter()
139 .cloned()
140 .collect();
141 assert_eq!(t.map(), &want);
142 }
143
144 #[test]
145 fn max_tokens_for_some_tree_height_then_one() {
146 let m = model::with_frequencies(&[
147 (I32Token(1), 1),
148 (I32Token(2), 2),
149 (I32Token(3), 3),
150 (I32Token(4), 4),
151 ]);
152 let t = new(m).unwrap();
153 assert_eq!(t.alphabet().len(), 4);
154 let want: HashMap<I32Token, Letter> = [
155 (I32Token(4), Letter::new(&[0b0010_0000], 3)),
156 (I32Token(3), Letter::new(&[0b0100_0000], 3)),
157 (I32Token(2), Letter::new(&[0b0110_0000], 3)),
158 (I32Token(1), Letter::new(&[0b1000_0000], 3)),
159 ]
160 .iter()
161 .cloned()
162 .collect();
163 assert_eq!(t.map(), &want);
164 }
165}