mappy_core/
encoding.rs

1//! Space-efficient value encoding
2//! 
3//! Implements variable-length encoding for values to optimize memory usage,
4//! particularly for counter values as described in the research paper.
5
6use crate::{MapletError, MapletResult};
7
8/// Variable-length encoding for unsigned integers
9pub struct VarIntEncoder;
10
11impl VarIntEncoder {
12    /// Encode a u64 value using variable-length encoding
13    #[must_use]
14    pub fn encode_u64(value: u64) -> Vec<u8> {
15        let mut result = Vec::new();
16        let mut val = value;
17        
18        while val >= 128 {
19            result.push(u8::try_from((val & 0x7F) | 0x80).unwrap_or(0));
20            val >>= 7;
21        }
22        result.push(u8::try_from(val).unwrap_or(0));
23        result
24    }
25    
26    /// Decode a u64 value from variable-length encoding
27    /// 
28    /// # Errors
29    /// 
30    /// Returns an error if the data is invalid or truncated
31    pub fn decode_u64(data: &[u8]) -> MapletResult<(u64, usize)> {
32        if data.is_empty() {
33            return Err(MapletError::SerializationError("Empty data".to_string()));
34        }
35        
36        let mut result = 0u64;
37        let mut shift = 0;
38        let mut bytes_read = 0;
39        
40        for &byte in data {
41            bytes_read += 1;
42            result |= u64::from(byte & 0x7F) << shift;
43            
44            if (byte & 0x80) == 0 {
45                // Last byte
46                return Ok((result, bytes_read));
47            }
48            
49            shift += 7;
50            if shift >= 64 {
51                return Err(MapletError::SerializationError("Value too large".to_string()));
52            }
53        }
54        
55        Err(MapletError::SerializationError("Incomplete encoding".to_string()))
56    }
57    
58    /// Encode a u32 value using variable-length encoding
59    #[must_use]
60    pub fn encode_u32(value: u32) -> Vec<u8> {
61        Self::encode_u64(u64::from(value))
62    }
63    
64    /// Decode a u32 value from variable-length encoding
65    /// 
66    /// # Errors
67    /// 
68    /// Returns an error if the data is invalid or the value is too large for u32
69    pub fn decode_u32(data: &[u8]) -> MapletResult<(u32, usize)> {
70        let (value, bytes_read) = Self::decode_u64(data)?;
71        if value > u64::from(u32::MAX) {
72            return Err(MapletError::SerializationError("Value too large for u32".to_string()));
73        }
74        Ok((u32::try_from(value).unwrap_or(0), bytes_read))
75    }
76}
77
78/// Exponential encoding for counter values
79/// 
80/// Uses a more space-efficient encoding for values that grow exponentially,
81/// as described in the research paper for k-mer counting applications.
82pub struct ExponentialEncoder {
83    /// Base for exponential encoding
84    #[allow(dead_code)]
85    base: f64,
86    /// Precision for floating-point values
87    #[allow(dead_code)]
88    precision: u32,
89}
90
91impl ExponentialEncoder {
92    /// Create a new exponential encoder
93    #[must_use]
94    pub const fn new(base: f64, precision: u32) -> Self {
95        Self { base, precision }
96    }
97    
98    /// Encode a counter value using exponential encoding
99    #[must_use]
100    pub fn encode_counter(&self, value: u64) -> Vec<u8> {
101        if value == 0 {
102            return vec![0];
103        }
104        
105        // For simplicity, just use varint encoding for now
106        // In a real implementation, this would use exponential encoding
107        VarIntEncoder::encode_u64(value)
108    }
109    
110    /// Decode a counter value from exponential encoding
111    /// 
112    /// # Errors
113    /// 
114    /// Returns an error if the data is invalid or truncated
115    pub fn decode_counter(&self, data: &[u8]) -> MapletResult<(u64, usize)> {
116        if data.is_empty() {
117            return Err(MapletError::SerializationError("Empty data".to_string()));
118        }
119        
120        if data[0] == 0 {
121            return Ok((0, 1));
122        }
123        
124        // For simplicity, just use varint decoding for now
125        // In a real implementation, this would use exponential decoding
126        VarIntEncoder::decode_u64(data)
127    }
128}
129
130/// Compact encoding for small values
131pub struct CompactEncoder;
132
133impl CompactEncoder {
134    /// Encode a small value (≤8 bytes) inline
135    pub fn encode_inline<T: Copy + bytemuck::Pod>(value: &T) -> [u8; 8] {
136        let mut result = [0u8; 8];
137        let bytes = bytemuck::bytes_of(value);
138        result[..bytes.len()].copy_from_slice(bytes);
139        result
140    }
141    
142    /// Decode a small value from inline encoding
143    /// # Errors
144    /// 
145    /// Returns an error if the data cannot be decoded
146    pub fn decode_inline<T: Copy + bytemuck::Pod>(data: &[u8; 8]) -> MapletResult<T> {
147        let size = std::mem::size_of::<T>();
148        if size > 8 {
149            return Err(MapletError::SerializationError("Type too large for inline encoding".to_string()));
150        }
151        
152        let slice = &data[..size];
153        bytemuck::try_from_bytes(slice)
154            .copied()
155            .map_err(|e| MapletError::SerializationError(format!("Decode error: {e}")))
156    }
157    
158    /// Check if a value can be encoded inline
159    pub const fn can_encode_inline<T>(_value: &T) -> bool {
160        std::mem::size_of::<T>() <= 8
161    }
162}
163
164#[cfg(test)]
165mod tests {
166    use super::*;
167
168    #[test]
169    fn test_varint_encoding() {
170        // Test small values
171        assert_eq!(VarIntEncoder::encode_u64(0), vec![0]);
172        assert_eq!(VarIntEncoder::encode_u64(127), vec![127]);
173        
174        // Test medium values
175        assert_eq!(VarIntEncoder::encode_u64(128), vec![0x80, 0x01]);
176        assert_eq!(VarIntEncoder::encode_u64(16383), vec![0xFF, 0x7F]);
177        
178        // Test round-trip encoding
179        for value in [0, 1, 127, 128, 16383, 16384, 1_000_000, u64::MAX] {
180            let encoded = VarIntEncoder::encode_u64(value);
181            let (decoded, bytes_read) = VarIntEncoder::decode_u64(&encoded).unwrap();
182            assert_eq!(decoded, value);
183            assert_eq!(bytes_read, encoded.len());
184        }
185    }
186
187    #[test]
188    fn test_exponential_encoding() {
189        let encoder = ExponentialEncoder::new(2.0, 16);
190        
191        // Test round-trip encoding
192        for value in [0, 1, 2, 4, 8, 16, 100, 1000, 10000] {
193            let encoded_data = encoder.encode_counter(value);
194            let (decoded, bytes_read) = encoder.decode_counter(&encoded_data).unwrap();
195            assert_eq!(decoded, value);
196            assert_eq!(bytes_read, encoded_data.len());
197        }
198    }
199
200    #[test]
201    fn test_compact_encoding() {
202        // Test inline encoding for small values
203        let value: u32 = 0x1234_5678;
204        let encoded = CompactEncoder::encode_inline(&value);
205        let decoded: u32 = CompactEncoder::decode_inline(&encoded).unwrap();
206        assert_eq!(decoded, value);
207        
208        // Test inline encoding for u64
209        let value: u64 = 0x1234_5678_9ABC_DEF0;
210        let encoded = CompactEncoder::encode_inline(&value);
211        let decoded: u64 = CompactEncoder::decode_inline(&encoded).unwrap();
212        assert_eq!(decoded, value);
213    }
214}