mappy_core/
encoding.rs

1//! Space-efficient value encoding
2//! 
3//! Implements variable-length encoding for values to optimize memory usage,
4//! particularly for counter values as described in the research paper.
5
6use crate::{MapletError, MapletResult};
7
8/// Variable-length encoding for unsigned integers
9pub struct VarIntEncoder;
10
11impl VarIntEncoder {
12    /// Encode a u64 value using variable-length encoding
13    pub fn encode_u64(value: u64) -> Vec<u8> {
14        let mut result = Vec::new();
15        let mut val = value;
16        
17        while val >= 128 {
18            result.push(((val & 0x7F) | 0x80) as u8);
19            val >>= 7;
20        }
21        result.push(val as u8);
22        result
23    }
24    
25    /// Decode a u64 value from variable-length encoding
26    pub fn decode_u64(data: &[u8]) -> MapletResult<(u64, usize)> {
27        if data.is_empty() {
28            return Err(MapletError::SerializationError("Empty data".to_string()));
29        }
30        
31        let mut result = 0u64;
32        let mut shift = 0;
33        let mut bytes_read = 0;
34        
35        for &byte in data {
36            bytes_read += 1;
37            result |= ((byte & 0x7F) as u64) << shift;
38            
39            if (byte & 0x80) == 0 {
40                // Last byte
41                return Ok((result, bytes_read));
42            }
43            
44            shift += 7;
45            if shift >= 64 {
46                return Err(MapletError::SerializationError("Value too large".to_string()));
47            }
48        }
49        
50        Err(MapletError::SerializationError("Incomplete encoding".to_string()))
51    }
52    
53    /// Encode a u32 value using variable-length encoding
54    pub fn encode_u32(value: u32) -> Vec<u8> {
55        Self::encode_u64(value as u64)
56    }
57    
58    /// Decode a u32 value from variable-length encoding
59    pub fn decode_u32(data: &[u8]) -> MapletResult<(u32, usize)> {
60        let (value, bytes_read) = Self::decode_u64(data)?;
61        if value > u32::MAX as u64 {
62            return Err(MapletError::SerializationError("Value too large for u32".to_string()));
63        }
64        Ok((value as u32, bytes_read))
65    }
66}
67
68/// Exponential encoding for counter values
69/// 
70/// Uses a more space-efficient encoding for values that grow exponentially,
71/// as described in the research paper for k-mer counting applications.
72pub struct ExponentialEncoder {
73    /// Base for exponential encoding
74    base: f64,
75    /// Precision for floating-point values
76    precision: u32,
77}
78
79impl ExponentialEncoder {
80    /// Create a new exponential encoder
81    pub fn new(base: f64, precision: u32) -> Self {
82        Self { base, precision }
83    }
84    
85    /// Encode a counter value using exponential encoding
86    pub fn encode_counter(&self, value: u64) -> Vec<u8> {
87        if value == 0 {
88            return vec![0];
89        }
90        
91        // For simplicity, just use varint encoding for now
92        // In a real implementation, this would use exponential encoding
93        VarIntEncoder::encode_u64(value)
94    }
95    
96    /// Decode a counter value from exponential encoding
97    pub fn decode_counter(&self, data: &[u8]) -> MapletResult<(u64, usize)> {
98        if data.is_empty() {
99            return Err(MapletError::SerializationError("Empty data".to_string()));
100        }
101        
102        if data[0] == 0 {
103            return Ok((0, 1));
104        }
105        
106        // For simplicity, just use varint decoding for now
107        // In a real implementation, this would use exponential decoding
108        VarIntEncoder::decode_u64(data)
109    }
110}
111
112/// Compact encoding for small values
113pub struct CompactEncoder;
114
115impl CompactEncoder {
116    /// Encode a small value (≤8 bytes) inline
117    pub fn encode_inline<T: Copy + bytemuck::Pod>(value: &T) -> [u8; 8] {
118        let mut result = [0u8; 8];
119        let bytes = bytemuck::bytes_of(value);
120        result[..bytes.len()].copy_from_slice(bytes);
121        result
122    }
123    
124    /// Decode a small value from inline encoding
125    pub fn decode_inline<T: Copy + bytemuck::Pod>(data: &[u8; 8]) -> MapletResult<T> {
126        let size = std::mem::size_of::<T>();
127        if size > 8 {
128            return Err(MapletError::SerializationError("Type too large for inline encoding".to_string()));
129        }
130        
131        let slice = &data[..size];
132        bytemuck::try_from_bytes(slice)
133            .map(|v| *v)
134            .map_err(|e| MapletError::SerializationError(format!("Decode error: {}", e)))
135    }
136    
137    /// Check if a value can be encoded inline
138    pub fn can_encode_inline<T>(_value: &T) -> bool {
139        std::mem::size_of::<T>() <= 8
140    }
141}
142
143#[cfg(test)]
144mod tests {
145    use super::*;
146
147    #[test]
148    fn test_varint_encoding() {
149        // Test small values
150        assert_eq!(VarIntEncoder::encode_u64(0), vec![0]);
151        assert_eq!(VarIntEncoder::encode_u64(127), vec![127]);
152        
153        // Test medium values
154        assert_eq!(VarIntEncoder::encode_u64(128), vec![0x80, 0x01]);
155        assert_eq!(VarIntEncoder::encode_u64(16383), vec![0xFF, 0x7F]);
156        
157        // Test round-trip encoding
158        for value in [0, 1, 127, 128, 16383, 16384, 1000000, u64::MAX] {
159            let encoded = VarIntEncoder::encode_u64(value);
160            let (decoded, bytes_read) = VarIntEncoder::decode_u64(&encoded).unwrap();
161            assert_eq!(decoded, value);
162            assert_eq!(bytes_read, encoded.len());
163        }
164    }
165
166    #[test]
167    fn test_exponential_encoding() {
168        let encoder = ExponentialEncoder::new(2.0, 16);
169        
170        // Test round-trip encoding
171        for value in [0, 1, 2, 4, 8, 16, 100, 1000, 10000] {
172            let encoded = encoder.encode_counter(value);
173            let (decoded, bytes_read) = encoder.decode_counter(&encoded).unwrap();
174            assert_eq!(decoded, value);
175            assert_eq!(bytes_read, encoded.len());
176        }
177    }
178
179    #[test]
180    fn test_compact_encoding() {
181        // Test inline encoding for small values
182        let value: u32 = 0x12345678;
183        let encoded = CompactEncoder::encode_inline(&value);
184        let decoded: u32 = CompactEncoder::decode_inline(&encoded).unwrap();
185        assert_eq!(decoded, value);
186        
187        // Test inline encoding for u64
188        let value: u64 = 0x123456789ABCDEF0;
189        let encoded = CompactEncoder::encode_inline(&value);
190        let decoded: u64 = CompactEncoder::decode_inline(&encoded).unwrap();
191        assert_eq!(decoded, value);
192    }
193}