1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
extern crate itertools;
use std::cmp;
use std::error;
use std::fmt;
use std::mem;
use itertools::Itertools;
use Error::*;
fn order_non_nan(a: f64, b: f64) -> cmp::Ordering {
if a < b { cmp::Ordering::Less } else
if a > b { cmp::Ordering::Greater } else
{ cmp::Ordering::Equal }
}
fn complete_chunks<T>(mut slice: &[T], csize: usize) -> std::slice::Chunks<T> {
let remainder = slice.len() % csize;
if remainder > 0 {
slice = &slice[0..(slice.len() - remainder)];
}
slice.chunks(csize)
}
#[derive(Copy,Clone,PartialEq,Eq,Debug)]
pub enum Error {
NoSymbols,
MaxLenTooSmall,
MaxLenTooLarge,
}
impl Error {
fn descr(&self) -> &str {
match *self {
NoSymbols =>
"package-merge error: frequencies slice was empty",
MaxLenTooSmall =>
"package-merge error: max_len parameter was chosen too small",
MaxLenTooLarge =>
"package-merge error: max_len parameter was chosen too large",
}
}
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self.descr())
}
}
impl error::Error for Error {
fn description(&self) -> &str {
self.descr()
}
}
pub fn package_merge(frequencies: &[f64], max_len: u32) -> Result<Vec<u32>, Error> {
if frequencies.is_empty() {
return Err(Error::NoSymbols);
}
if frequencies.len() > (1usize << max_len) {
return Err(Error::MaxLenTooSmall);
}
if max_len > 32 {
return Err(Error::MaxLenTooLarge);
}
let sorted = {
let mut tmp = Vec::new();
tmp.extend(0..frequencies.len());
tmp.sort_by( |&a, &b| order_non_nan(frequencies[a],frequencies[b]) );
tmp
};
let capa = frequencies.len() * 2 - 1;
let mut list: Vec<f64> = Vec::with_capacity(capa);
let mut flags: Vec<u32> = vec![0; capa];
let mut merged: Vec<f64> = Vec::with_capacity(capa);
for depth in 0..max_len {
{
merged.clear();
let mask = 1u32 << depth;
let pairs = complete_chunks(&list, 2).map( |s| (s[0] + s[1], true) );
let srted = sorted.iter().map( |&i| (frequencies[i], false) );
for (p, m) in pairs.merge_by(srted, |a, b| a.0 < b.0 ) {
if m {
flags[merged.len()] |= mask;
}
merged.push(p);
}
}
mem::swap(&mut merged, &mut list);
}
let mut n = frequencies.len() * 2 - 2;
debug_assert!(list.len() >= n);
let mut code_lens = vec![0u32; frequencies.len()];
let mut depth = max_len;
while depth > 0 && n > 0 {
depth -= 1;
let mask = 1u32 << depth;
let mut merged = 0;
for i in 0..n {
if (flags[i] & mask) == 0 {
code_lens[sorted[i - merged]] += 1;
} else {
merged += 1;
}
}
n = merged * 2;
}
Ok(code_lens)
}
#[cfg(test)]
mod tests {
use super::package_merge;
#[test]
fn it_works() {
let freqs = [1.0, 32.0, 16.0, 4.0, 8.0, 2.0, 1.0];
let cl = package_merge(&freqs, 8).unwrap();
assert_eq!(&cl[..], &[6, 1, 2, 4, 3, 5, 6]);
let cl = package_merge(&freqs, 5).unwrap();
assert_eq!(&cl[..], &[5, 1, 2, 5, 3, 5, 5]);
}
#[test]
#[should_panic]
fn it_fails() {
let freqs = [1.0, 32.0, 16.0, 4.0, 8.0, 2.0, 1.0];
package_merge(&freqs, 2).unwrap();
}
}