rdir_encoding/
lib.rs

1extern crate itertools;
2extern crate num_integer;
3extern crate num_traits;
4
5use std::fmt;
6use std::iter;
7use std::i16;
8
9use itertools::Itertools;
10use num_traits::{Float, NumCast, PrimInt};
11
12#[derive(Debug)]
13pub enum RdirError {
14    Decode(String),
15    Encode(String),
16}
17
18impl fmt::Display for RdirError {
19    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
20        match *self {
21            RdirError::Decode(ref err) => write!(f, "Decode error: {}", err),
22            RdirError::Encode(ref err) => write!(f, "Encode error: {}", err),
23        }
24    }
25}
26
27/// Run-length encoding.
28///
29/// Run-length decoding can generally be used to compress arrays that contain
30/// stretches of equal values. Instead of storing each value itself, stretches
31/// of equal values are represented by the value itself and the occurrence count,
32/// that is a value/count pair.
33///
34/// # Examples
35///
36/// ```
37/// use rdir_encoding::RunLength;
38///
39/// let data = [1, 1, 1, 1, 2, 1, 1, 1, 1];
40/// let encoded = RunLength::encode(&data).unwrap();
41/// assert_eq!(vec![1, 4, 2, 1, 1, 4], encoded);
42///
43/// let decoded = RunLength::decode(&encoded).unwrap();
44/// assert_eq!(vec![1, 1, 1, 1, 2, 1, 1, 1, 1], decoded);
45/// ```
46#[derive(Debug)]
47pub struct RunLength;
48
49impl RunLength {
50    pub fn decode<T>(values: &[T]) -> Result<Vec<i32>, RdirError>
51    where
52        T: num_integer::Integer + NumCast + PrimInt,
53    {
54        let mut res: Vec<i32> = Vec::new();
55
56        if !values.len() % 2 == 0 {
57            return Err(RdirError::Decode("Run Length error".to_string()));
58        }
59
60        for v in values.chunks(2) {
61            let value = &v[0];
62            let repeat = &v[1];
63            let chunks: usize = NumCast::from(*repeat).unwrap();
64            for i in iter::repeat(value).take(chunks) {
65                let value: i32 = NumCast::from(*i).unwrap();
66                res.push(value);
67            }
68        }
69        Ok(res)
70    }
71
72    /// Encode any array of 'T' where `T ` can be any Integer.
73    pub fn encode<T>(values: &[T]) -> Result<Vec<i32>, RdirError>
74    where
75        T: num_integer::Integer + NumCast + PrimInt,
76    {
77        let mut result: Vec<i32> = Vec::new();
78
79        for (key, group) in &values.into_iter().group_by(|v| *v) {
80            let key: i32 = NumCast::from(*key).unwrap();
81            result.push(key);
82            result.push(group.count() as i32);
83        }
84        Ok(result)
85    }
86}
87
88/// Delta encoding.
89///
90/// Delta encoding is used to store an array of numbers. Instead of storing the
91/// numbers themselves, the differences (deltas) between the numbers are stored.
92/// When the values of the deltas are smaller than the numbers themselves they
93/// can be more efficiently packed to require less space.
94///
95/// Note that arrays in which the values change by an identical amount for a range
96/// of consecutive values lend themselves to subsequent run-length encoding.
97///
98/// # Examples
99///
100/// ```
101/// use rdir_encoding::Delta;
102///
103/// let encoded =      [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 5];
104/// let expected = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 20];
105///
106/// let decoded = Delta::decode(&encoded).unwrap();
107/// assert_eq!(expected, decoded);
108/// ```
109#[derive(Debug)]
110pub struct Delta;
111
112impl Delta {
113    /// Decode given values
114    pub fn decode(values: &[i32]) -> Result<Vec<i32>, RdirError> {
115        let mut buffer = Vec::with_capacity(values.len() as usize);
116
117        // The first entry in the array is left as is
118        buffer.push(values[0]);
119
120        for (index, value) in values.iter().skip(1).enumerate() {
121            let position = buffer[index];
122            buffer.push(position + value)
123        }
124        Ok(buffer)
125    }
126
127    /// Encode any array of 'T' where `T ` can be any Integer.
128    pub fn encode<T>(values: &[T]) -> Result<Vec<i32>, RdirError>
129    where
130        T: num_integer::Integer + NumCast + PrimInt,
131    {
132        if values.len() <= 1 {
133            let err = format!(
134                "Delta::encode() error: values length should be greater than {}",
135                values.len()
136            );
137            return Err(RdirError::Encode(err));
138        }
139
140        let mut buffer: Vec<i32> = Vec::with_capacity(values.len() as usize);
141
142        let mut position = NumCast::from(values[0]).unwrap();
143        buffer.push(position);
144        for (_, value) in values.iter().skip(1).enumerate() {
145            let value: i32 = NumCast::from(*value).unwrap();
146            buffer.push(value - position);
147            position = value;
148        }
149        Ok(buffer)
150    }
151}
152
153/// Integer encoding.
154///
155/// In integer encoding, floating point numbers are converted to integer values
156/// by multiplying with a factor and discard everything after the decimal point.
157/// Depending on the multiplication factor this can change the precision but with
158/// a sufficiently large factor it is lossless. The integer values can then often
159/// be compressed with delta encoding which is the main motivation for it.
160///
161/// # Examples
162///
163/// ```
164/// use rdir_encoding::IntegerEncoding;
165///
166/// let data = [1.00, 1.00, 0.50];
167/// let encoded = IntegerEncoding::encode(&data, 100).unwrap();
168/// assert_eq!(encoded, vec![100, 100, 50]);
169///
170/// let decoded = IntegerEncoding::decode(&encoded, 100).unwrap();
171/// assert_eq!(decoded, data);
172/// ```
173#[derive(Debug)]
174pub struct IntegerEncoding;
175
176impl IntegerEncoding {
177    /// Decode and return the decoded data
178    pub fn decode<T>(values: &[T], factor: i32) -> Result<Vec<f32>, RdirError>
179    where
180        T: num_integer::Integer + NumCast + PrimInt,
181    {
182        let result: Vec<f32> = values
183            .iter()
184            .map(|x| {
185                let value: f32 = NumCast::from(*x).unwrap();
186                value / factor as f32
187            })
188            .collect();
189        Ok(result)
190    }
191
192    /// Encode any array of 'T' where `T ` can be any Integer with an desired `factor`
193    pub fn encode<T>(values: &[T], factor: i32) -> Result<Vec<i32>, RdirError>
194    where
195        T: Float,
196    {
197        let result: Vec<i32> = values
198            .iter()
199            .map(|x| {
200                let x: T = NumCast::from(*x).unwrap();
201                let factor: T = NumCast::from(factor).unwrap();
202                let result: i32 = NumCast::from(x * factor).unwrap();
203                result
204            })
205            .collect();
206        Ok(result)
207    }
208}
209
210/// Recursive indexing encoding
211
212/// Recursive indexing encodes values such that the encoded values lie within the
213/// open interval (MIN, MAX). This allows to create a more compact representation
214/// of a 32-bit signed integer array when the majority of values in the array fit
215/// into 16-bit (or 8-bit). To encode each value in the input array the method
216/// stores the value itself if it lies within the open interval (MIN, MAX),
217/// otherwise the MAX (or MIN if the number is negative) interval endpoint is stored
218/// and subtracted from the input value. This process of storing and subtracting is
219/// repeated recursively until the remainder lies within the interval.
220///
221/// Note that `MAX` and `MIN` are the largest and smallest value that can be
222/// represented by the `i16` integer type
223///
224/// # Examples
225///
226/// ```
227/// use rdir_encoding::RecursiveIndexing;
228///
229/// let data = [1, 420, 32767, 120, -32768, 32769];
230///
231/// let encoded = RecursiveIndexing::encode(&data).unwrap();
232/// assert_eq!(encoded, vec![1, 420, 32767, 0, 120, -32768, 0, 32767, 2]);
233///
234/// let decoded = RecursiveIndexing::decode(&encoded).unwrap();
235/// assert_eq!(decoded, data);
236/// ```
237#[derive(Debug)]
238pub struct RecursiveIndexing;
239
240impl RecursiveIndexing {
241    /// Decode and return the decoded data
242    pub fn decode<T>(values: &[T]) -> Result<Vec<i32>, RdirError>
243    where
244        T: num_integer::Integer + NumCast + PrimInt,
245    {
246        let mut output = Vec::new();
247        let mut out_len: i32 = 0;
248
249        let max: i32 = NumCast::from(i16::MAX).unwrap();
250        let min: i32 = NumCast::from(i16::MIN).unwrap();
251
252        for item in values {
253            let item: i32 = NumCast::from(*item).unwrap();
254
255            if item == max || item == min {
256                out_len += item;
257            } else {
258                out_len += item;
259                output.push(out_len);
260                out_len = 0;
261            }
262        }
263        Ok(output)
264    }
265
266    /// Encode values
267    pub fn encode(values: &[i32]) -> Result<Vec<i16>, RdirError> {
268        let mut output: Vec<i16> = Vec::new();
269
270        let max: i32 = NumCast::from(i16::MAX).unwrap();
271        let min: i32 = NumCast::from(i16::MIN).unwrap();
272
273        for num in values {
274            let mut num = *num;
275            if num >= 0 {
276                while num >= max {
277                    output.push(NumCast::from(max).unwrap());
278                    num -= max;
279                }
280            } else {
281                while num <= min {
282                    output.push(NumCast::from(min).unwrap());
283                    num += min.abs();
284                }
285            }
286            output.push(NumCast::from(num).unwrap());
287        }
288        Ok(output)
289    }
290}
291
292#[cfg(test)]
293mod tests {
294    use super::*;
295
296    #[test]
297    fn it_decode_run_length_encoding() {
298        let encoded = [1, 4, 2, 1, 1, 4];
299        let decoded = RunLength::decode(&encoded).unwrap();
300        assert_eq!(vec![1, 1, 1, 1, 2, 1, 1, 1, 1], decoded);
301
302        let decoded = RunLength::decode(&encoded).unwrap();
303        assert_eq!(vec![1, 1, 1, 1, 2, 1, 1, 1, 1], decoded);
304    }
305
306    #[test]
307    fn it_encode_run_length_encoding() {
308        let encoded = [1, 1, 1, 1, 2, 1, 1, 1, 1];
309        let decoded = RunLength::encode(&encoded).unwrap();
310        assert_eq!(vec![1, 4, 2, 1, 1, 4], decoded);
311    }
312
313    #[test]
314    fn it_decode_delta_encoding() {
315        let data = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 5];
316        let expected = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 20];
317        let actual = Delta::decode(&data).unwrap();
318        assert_eq!(expected, actual);
319    }
320
321    #[test]
322    fn it_encode_delta_encoding() {
323        let data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 20];
324        let expected = vec![1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 5];
325        let actual = Delta::encode(&data).unwrap();
326        assert_eq!(expected, actual);
327    }
328
329    #[test]
330    fn it_decode_integer_decoding() {
331        let data = [100, 100, 100, 100, 50, 50];
332        let expected = vec![1.00, 1.00, 1.00, 1.00, 0.50, 0.50];
333        let actual = IntegerEncoding::decode(&data, 100).unwrap();
334        assert_eq!(expected, actual);
335
336        let data = [100_i16, 100, 100, 100, 50, 50];
337        let expected = vec![1.00, 1.00, 1.00, 1.00, 0.50, 0.50];
338        let actual = IntegerEncoding::decode(&data, 100).unwrap();
339        assert_eq!(expected, actual);
340    }
341
342    #[test]
343    fn it_encode_integer_encoding() {
344        let data = [1.00, 1.00, 1.00, 1.00, 0.50, 0.50];
345        let expected = vec![100, 100, 100, 100, 50, 50];
346        let actual = IntegerEncoding::encode(&data, 100).unwrap();
347        assert_eq!(expected, actual);
348    }
349
350    #[test]
351    fn it_decode_recursive_index_encoding() {
352        let data = [1, 420, 32767, 0, 120, -32768, 0, 32767, 2];
353        let expected = vec![1, 420, 32767, 120, -32768, 32769];
354        let actual = RecursiveIndexing::decode(&data).unwrap();
355        assert_eq!(expected, actual);
356    }
357
358    #[test]
359    fn it_encode_recursive_index_encoding() {
360        let data = [1, 420, 32767, 120, -32768, 32769];
361        let expected = vec![1, 420, 32767, 0, 120, -32768, 0, 32767, 2];
362        let actual = RecursiveIndexing::encode(&data).unwrap();
363        assert_eq!(expected, actual);
364    }
365}