avx_arrow/compression/
rle.rs

1//! Run-Length Encoding (RLE) implementation
2//!
3//! Optimized for data with repeated values
4
5use crate::error::{ArrowError, Result};
6
7/// Encode data using RLE
8pub fn encode(data: &[u8]) -> Result<Vec<u8>> {
9    if data.is_empty() {
10        return Ok(Vec::new());
11    }
12
13    let mut output = Vec::with_capacity(data.len() / 2);
14    let mut i = 0;
15
16    while i < data.len() {
17        let value = data[i];
18        let mut count = 1u8;
19
20        // Count consecutive identical values
21        while i + (count as usize) < data.len()
22            && data[i + count as usize] == value
23            && count < 255 {
24            count += 1;
25        }
26
27        // Write count and value
28        output.push(count);
29        output.push(value);
30
31        i += count as usize;
32    }
33
34    Ok(output)
35}
36
37/// Decode RLE data
38pub fn decode(data: &[u8]) -> Result<Vec<u8>> {
39    if data.len() % 2 != 0 {
40        return Err(ArrowError::InvalidData(
41            "RLE data must have even length".to_string()
42        ));
43    }
44
45    let mut output = Vec::with_capacity(data.len() * 2);
46    let mut i = 0;
47
48    while i < data.len() {
49        let count = data[i] as usize;
50        let value = data[i + 1];
51
52        output.extend(std::iter::repeat(value).take(count));
53        i += 2;
54    }
55
56    Ok(output)
57}
58
59/// RLE encoder with state
60pub struct RleEncoder {
61    buffer: Vec<u8>,
62}
63
64impl RleEncoder {
65    /// Create new encoder
66    pub fn new() -> Self {
67        Self { buffer: Vec::new() }
68    }
69
70    /// Encode data
71    pub fn encode(&mut self, data: &[u8]) -> Result<()> {
72        let encoded = encode(data)?;
73        self.buffer.extend_from_slice(&encoded);
74        Ok(())
75    }
76
77    /// Get encoded data
78    pub fn finish(self) -> Vec<u8> {
79        self.buffer
80    }
81}
82
83impl Default for RleEncoder {
84    fn default() -> Self {
85        Self::new()
86    }
87}
88
89/// RLE decoder with state
90pub struct RleDecoder {
91    buffer: Vec<u8>,
92}
93
94impl RleDecoder {
95    /// Create new decoder
96    pub fn new() -> Self {
97        Self { buffer: Vec::new() }
98    }
99
100    /// Decode data
101    pub fn decode(&mut self, data: &[u8]) -> Result<()> {
102        let decoded = decode(data)?;
103        self.buffer.extend_from_slice(&decoded);
104        Ok(())
105    }
106
107    /// Get decoded data
108    pub fn finish(self) -> Vec<u8> {
109        self.buffer
110    }
111}
112
113impl Default for RleDecoder {
114    fn default() -> Self {
115        Self::new()
116    }
117}
118
119#[cfg(test)]
120mod tests {
121    use super::*;
122
123    #[test]
124    fn test_rle_repeated() {
125        let data = vec![1u8; 100];
126        let encoded = encode(&data).unwrap();
127        assert!(encoded.len() < data.len());
128
129        let decoded = decode(&encoded).unwrap();
130        assert_eq!(decoded, data);
131    }
132
133    #[test]
134    fn test_rle_mixed() {
135        let data = vec![1, 1, 1, 2, 2, 3, 3, 3, 3, 4];
136        let encoded = encode(&data).unwrap();
137        let decoded = decode(&encoded).unwrap();
138        assert_eq!(decoded, data);
139    }
140
141    #[test]
142    fn test_rle_encoder() {
143        let mut encoder = RleEncoder::new();
144        encoder.encode(&[1, 1, 1]).unwrap();
145        encoder.encode(&[2, 2, 2, 2]).unwrap();
146
147        let encoded = encoder.finish();
148        let decoded = decode(&encoded).unwrap();
149        assert_eq!(decoded, vec![1, 1, 1, 2, 2, 2, 2]);
150    }
151}
152
153
154
155
156