avx_arrow/compression/
rle.rs1use crate::error::{ArrowError, Result};
6
7pub fn encode(data: &[u8]) -> Result<Vec<u8>> {
9 if data.is_empty() {
10 return Ok(Vec::new());
11 }
12
13 let mut output = Vec::with_capacity(data.len() / 2);
14 let mut i = 0;
15
16 while i < data.len() {
17 let value = data[i];
18 let mut count = 1u8;
19
20 while i + (count as usize) < data.len()
22 && data[i + count as usize] == value
23 && count < 255 {
24 count += 1;
25 }
26
27 output.push(count);
29 output.push(value);
30
31 i += count as usize;
32 }
33
34 Ok(output)
35}
36
37pub fn decode(data: &[u8]) -> Result<Vec<u8>> {
39 if data.len() % 2 != 0 {
40 return Err(ArrowError::InvalidData(
41 "RLE data must have even length".to_string()
42 ));
43 }
44
45 let mut output = Vec::with_capacity(data.len() * 2);
46 let mut i = 0;
47
48 while i < data.len() {
49 let count = data[i] as usize;
50 let value = data[i + 1];
51
52 output.extend(std::iter::repeat(value).take(count));
53 i += 2;
54 }
55
56 Ok(output)
57}
58
59pub struct RleEncoder {
61 buffer: Vec<u8>,
62}
63
64impl RleEncoder {
65 pub fn new() -> Self {
67 Self { buffer: Vec::new() }
68 }
69
70 pub fn encode(&mut self, data: &[u8]) -> Result<()> {
72 let encoded = encode(data)?;
73 self.buffer.extend_from_slice(&encoded);
74 Ok(())
75 }
76
77 pub fn finish(self) -> Vec<u8> {
79 self.buffer
80 }
81}
82
83impl Default for RleEncoder {
84 fn default() -> Self {
85 Self::new()
86 }
87}
88
89pub struct RleDecoder {
91 buffer: Vec<u8>,
92}
93
94impl RleDecoder {
95 pub fn new() -> Self {
97 Self { buffer: Vec::new() }
98 }
99
100 pub fn decode(&mut self, data: &[u8]) -> Result<()> {
102 let decoded = decode(data)?;
103 self.buffer.extend_from_slice(&decoded);
104 Ok(())
105 }
106
107 pub fn finish(self) -> Vec<u8> {
109 self.buffer
110 }
111}
112
113impl Default for RleDecoder {
114 fn default() -> Self {
115 Self::new()
116 }
117}
118
119#[cfg(test)]
120mod tests {
121 use super::*;
122
123 #[test]
124 fn test_rle_repeated() {
125 let data = vec![1u8; 100];
126 let encoded = encode(&data).unwrap();
127 assert!(encoded.len() < data.len());
128
129 let decoded = decode(&encoded).unwrap();
130 assert_eq!(decoded, data);
131 }
132
133 #[test]
134 fn test_rle_mixed() {
135 let data = vec![1, 1, 1, 2, 2, 3, 3, 3, 3, 4];
136 let encoded = encode(&data).unwrap();
137 let decoded = decode(&encoded).unwrap();
138 assert_eq!(decoded, data);
139 }
140
141 #[test]
142 fn test_rle_encoder() {
143 let mut encoder = RleEncoder::new();
144 encoder.encode(&[1, 1, 1]).unwrap();
145 encoder.encode(&[2, 2, 2, 2]).unwrap();
146
147 let encoded = encoder.finish();
148 let decoded = decode(&encoded).unwrap();
149 assert_eq!(decoded, vec![1, 1, 1, 2, 2, 2, 2]);
150 }
151}
152
153
154
155
156