toolbox_rs/
huffman_code.rs

1use std::cmp::Reverse;
2use std::collections::{BinaryHeap, VecDeque};
3use std::fmt::Debug;
4use std::{cell::RefCell, rc::Rc};
5
6#[derive(Debug, Clone)]
7pub struct HuffmanNode<T> {
8    character: Option<T>,
9    frequency: i32,
10    left: Option<HuffmanNodeRef<T>>,
11    right: Option<HuffmanNodeRef<T>>,
12}
13
14impl<T> PartialEq for HuffmanNode<T> {
15    fn eq(&self, other: &Self) -> bool {
16        self.frequency == other.frequency
17    }
18}
19
20impl<T> Eq for HuffmanNode<T> {}
21
22impl<T> PartialOrd for HuffmanNode<T> {
23    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
24        Some(self.frequency.cmp(&other.frequency))
25    }
26}
27
28impl<T> Ord for HuffmanNode<T> {
29    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
30        self.frequency.cmp(&other.frequency)
31    }
32}
33
34type HuffmanNodeRef<T> = Rc<RefCell<HuffmanNode<T>>>;
35
36fn min_node<T>(
37    q1: &mut VecDeque<HuffmanNode<T>>,
38    q2: &mut VecDeque<HuffmanNode<T>>,
39) -> HuffmanNode<T> {
40    if q1.is_empty() {
41        return q2.pop_front().unwrap();
42    }
43
44    if q2.is_empty() {
45        return q1.pop_front().unwrap();
46    }
47
48    if q1.front().unwrap() < q2.front().unwrap() {
49        return q1.pop_front().unwrap();
50    }
51    q2.pop_front().unwrap()
52}
53
54pub fn generate_huffman_code_from_sorted<T: Copy + Debug>(
55    v: &[(T, i32)],
56) -> Vec<(T, std::string::String)> {
57    if v.is_empty() {
58        return Vec::new();
59    }
60
61    let mut q1 = VecDeque::new();
62    let mut q2 = VecDeque::new();
63
64    for (t, f) in v {
65        q1.push_back(HuffmanNode::<T> {
66            character: Some(*t),
67            frequency: *f,
68            left: None,
69            right: None,
70        })
71    }
72
73    while !q1.is_empty() || q2.len() > 1 {
74        let left = min_node(&mut q1, &mut q2);
75        let right = min_node(&mut q1, &mut q2);
76        let node = HuffmanNode::<T> {
77            character: None, // None signifies that this is an interior node
78            frequency: left.frequency + right.frequency,
79            left: Some(Rc::new(RefCell::new(left))),
80            right: Some(Rc::new(RefCell::new(right))),
81        };
82        q2.push_back(node);
83    }
84
85    let root = Rc::new(RefCell::new(q2.pop_front().unwrap()));
86
87    retrieve_codebook(root)
88}
89
90fn retrieve_codebook<T: Copy + Debug>(root: Rc<RefCell<HuffmanNode<T>>>) -> Vec<(T, String)> {
91    // generate code book
92    let mut code_book = Vec::new();
93    let mut stack = Vec::new();
94    stack.push((root.clone(), String::new()));
95
96    while let Some((current, prefix)) = stack.pop() {
97        if let Some(character) = current.borrow().character {
98            code_book.push((character, prefix));
99        } else {
100            if let Some(left) = current.borrow().left.clone() {
101                stack.push((left, prefix.clone() + "0"));
102            }
103            if let Some(right) = current.borrow().right.clone() {
104                stack.push((right, prefix.clone() + "1"));
105            }
106        };
107    }
108    code_book
109}
110
111pub fn generate_huffman_code_from_unsorted<T: Copy + Debug>(
112    v: &[(T, i32)],
113) -> Vec<(T, std::string::String)> {
114    // TODO: add yet another implementation of using sort() + sorted construction. Include criterion numbers
115    if v.is_empty() {
116        return Vec::new();
117    }
118
119    let mut q1 = BinaryHeap::new();
120
121    for (t, f) in v {
122        q1.push(Reverse(Rc::new(RefCell::new(HuffmanNode::<T> {
123            character: Some(*t),
124            frequency: *f,
125            left: None,
126            right: None,
127        }))));
128    }
129
130    while q1.len() > 1 {
131        let Reverse(x) = q1.pop().unwrap();
132        let Reverse(y) = q1.pop().unwrap();
133
134        let f1 = x.borrow().frequency;
135        let f2 = y.borrow().frequency;
136
137        let node = Rc::new(RefCell::new(HuffmanNode::<T> {
138            character: None,
139            frequency: f1 + f2,
140            left: Some(x),
141            right: Some(y),
142        }));
143        q1.push(Reverse(node));
144    }
145
146    let Reverse(root) = q1.pop().unwrap();
147    retrieve_codebook(root)
148}
149
150#[cfg(test)]
151mod tests {
152    use crate::huffman_code::HuffmanNode;
153
154    use super::generate_huffman_code_from_sorted;
155    use super::generate_huffman_code_from_unsorted;
156
157    #[test]
158    fn run_with_empty_input_unsorted() {
159        let input = Vec::<(char, i32)>::new();
160        let code_book = generate_huffman_code_from_unsorted(&input);
161        assert_eq!(0, code_book.len());
162    }
163
164    #[test]
165    fn run_with_empty_input_sorted() {
166        let input = Vec::<(char, i32)>::new();
167        let code_book = generate_huffman_code_from_sorted(&input);
168        assert_eq!(0, code_book.len());
169    }
170
171    #[test]
172    fn construction_unsorted() {
173        let v = [
174            ('a', 5),
175            ('b', 9),
176            ('c', 12),
177            ('d', 13),
178            ('e', 16),
179            ('f', 45),
180        ];
181        let code_book = generate_huffman_code_from_unsorted(&v);
182        let expected = [
183            ('f', "0"),
184            ('c', "100"),
185            ('d', "101"),
186            ('a', "1100"),
187            ('b', "1101"),
188            ('e', "111"),
189        ];
190
191        let matching = code_book
192            .iter()
193            .rev()
194            .zip(&expected)
195            .filter(|&(a, b)| a.0 == b.0 && a.1 == b.1)
196            .count();
197
198        assert!(matching == code_book.len());
199        assert!(matching == expected.len());
200    }
201
202    #[test]
203    fn construction_sorted() {
204        let v = [
205            ('a', 5),
206            ('b', 9),
207            ('c', 12),
208            ('d', 13),
209            ('e', 16),
210            ('f', 45),
211        ];
212        let code_book = generate_huffman_code_from_sorted(&v);
213        let expected = [
214            ('f', "0"),
215            ('c', "100"),
216            ('d', "101"),
217            ('a', "1100"),
218            ('b', "1101"),
219            ('e', "111"),
220        ];
221
222        let matching = code_book
223            .iter()
224            .rev()
225            .zip(&expected)
226            .filter(|&(a, b)| a.0 == b.0 && a.1 == b.1)
227            .count();
228
229        assert!(matching == code_book.len());
230        assert!(matching == expected.len());
231    }
232
233    #[test]
234    fn ord() {
235        let node1 = HuffmanNode {
236            character: Some('a'),
237            frequency: 5,
238            left: None,
239            right: None,
240        };
241        let node2 = HuffmanNode {
242            character: Some('b'),
243            frequency: 9,
244            left: None,
245            right: None,
246        };
247        let node3 = HuffmanNode {
248            character: Some('c'),
249            frequency: 5,
250            left: None,
251            right: None,
252        };
253        assert!(node1 < node2);
254        assert!(node2 > node1);
255        assert_eq!(node1, node3);
256    }
257
258    #[test]
259    fn partial_ord() {
260        let node1 = HuffmanNode {
261            character: Some('a'),
262            frequency: 5,
263            left: None,
264            right: None,
265        };
266        let node2 = HuffmanNode {
267            character: Some('b'),
268            frequency: 9,
269            left: None,
270            right: None,
271        };
272        assert!(node1 < node2);
273        assert!(node2 > node1);
274    }
275}