mappy_core/
encoding.rs

1//! Space-efficient value encoding
2//!
3//! Implements variable-length encoding for values to optimize memory usage,
4//! particularly for counter values as described in the research paper.
5
6use crate::{MapletError, MapletResult};
7
8/// Variable-length encoding for unsigned integers
9pub struct VarIntEncoder;
10
11impl VarIntEncoder {
12    /// Encode a u64 value using variable-length encoding
13    #[must_use]
14    pub fn encode_u64(value: u64) -> Vec<u8> {
15        let mut result = Vec::new();
16        let mut val = value;
17
18        while val >= 128 {
19            result.push(u8::try_from((val & 0x7F) | 0x80).unwrap_or(0));
20            val >>= 7;
21        }
22        result.push(u8::try_from(val).unwrap_or(0));
23        result
24    }
25
26    /// Decode a u64 value from variable-length encoding
27    ///
28    /// # Errors
29    ///
30    /// Returns an error if the data is invalid or truncated
31    pub fn decode_u64(data: &[u8]) -> MapletResult<(u64, usize)> {
32        if data.is_empty() {
33            return Err(MapletError::SerializationError("Empty data".to_string()));
34        }
35
36        let mut result = 0u64;
37        let mut shift = 0;
38        let mut bytes_read = 0;
39
40        for &byte in data {
41            bytes_read += 1;
42            result |= u64::from(byte & 0x7F) << shift;
43
44            if (byte & 0x80) == 0 {
45                // Last byte
46                return Ok((result, bytes_read));
47            }
48
49            shift += 7;
50            if shift >= 64 {
51                return Err(MapletError::SerializationError(
52                    "Value too large".to_string(),
53                ));
54            }
55        }
56
57        Err(MapletError::SerializationError(
58            "Incomplete encoding".to_string(),
59        ))
60    }
61
62    /// Encode a u32 value using variable-length encoding
63    #[must_use]
64    pub fn encode_u32(value: u32) -> Vec<u8> {
65        Self::encode_u64(u64::from(value))
66    }
67
68    /// Decode a u32 value from variable-length encoding
69    ///
70    /// # Errors
71    ///
72    /// Returns an error if the data is invalid or the value is too large for u32
73    pub fn decode_u32(data: &[u8]) -> MapletResult<(u32, usize)> {
74        let (value, bytes_read) = Self::decode_u64(data)?;
75        if value > u64::from(u32::MAX) {
76            return Err(MapletError::SerializationError(
77                "Value too large for u32".to_string(),
78            ));
79        }
80        Ok((u32::try_from(value).unwrap_or(0), bytes_read))
81    }
82}
83
84/// Exponential encoding for counter values
85///
86/// Uses a more space-efficient encoding for values that grow exponentially,
87/// as described in the research paper for k-mer counting applications.
88pub struct ExponentialEncoder {
89    /// Base for exponential encoding
90    #[allow(dead_code)]
91    base: f64,
92    /// Precision for floating-point values
93    #[allow(dead_code)]
94    precision: u32,
95}
96
97impl ExponentialEncoder {
98    /// Create a new exponential encoder
99    #[must_use]
100    pub const fn new(base: f64, precision: u32) -> Self {
101        Self { base, precision }
102    }
103
104    /// Encode a counter value using exponential encoding
105    #[must_use]
106    pub fn encode_counter(&self, value: u64) -> Vec<u8> {
107        if value == 0 {
108            return vec![0];
109        }
110
111        // For simplicity, just use varint encoding for now
112        // In a real implementation, this would use exponential encoding
113        VarIntEncoder::encode_u64(value)
114    }
115
116    /// Decode a counter value from exponential encoding
117    ///
118    /// # Errors
119    ///
120    /// Returns an error if the data is invalid or truncated
121    pub fn decode_counter(&self, data: &[u8]) -> MapletResult<(u64, usize)> {
122        if data.is_empty() {
123            return Err(MapletError::SerializationError("Empty data".to_string()));
124        }
125
126        if data[0] == 0 {
127            return Ok((0, 1));
128        }
129
130        // For simplicity, just use varint decoding for now
131        // In a real implementation, this would use exponential decoding
132        VarIntEncoder::decode_u64(data)
133    }
134}
135
136/// Compact encoding for small values
137pub struct CompactEncoder;
138
139impl CompactEncoder {
140    /// Encode a small value (≤8 bytes) inline
141    pub fn encode_inline<T: Copy + bytemuck::Pod>(value: &T) -> [u8; 8] {
142        let mut result = [0u8; 8];
143        let bytes = bytemuck::bytes_of(value);
144        result[..bytes.len()].copy_from_slice(bytes);
145        result
146    }
147
148    /// Decode a small value from inline encoding
149    /// # Errors
150    ///
151    /// Returns an error if the data cannot be decoded
152    pub fn decode_inline<T: Copy + bytemuck::Pod>(data: &[u8; 8]) -> MapletResult<T> {
153        let size = std::mem::size_of::<T>();
154        if size > 8 {
155            return Err(MapletError::SerializationError(
156                "Type too large for inline encoding".to_string(),
157            ));
158        }
159
160        let slice = &data[..size];
161        bytemuck::try_from_bytes(slice)
162            .copied()
163            .map_err(|e| MapletError::SerializationError(format!("Decode error: {e}")))
164    }
165
166    /// Check if a value can be encoded inline
167    pub const fn can_encode_inline<T>(_value: &T) -> bool {
168        std::mem::size_of::<T>() <= 8
169    }
170}
171
172#[cfg(test)]
173mod tests {
174    use super::*;
175
176    #[test]
177    fn test_varint_encoding() {
178        // Test small values
179        assert_eq!(VarIntEncoder::encode_u64(0), vec![0]);
180        assert_eq!(VarIntEncoder::encode_u64(127), vec![127]);
181
182        // Test medium values
183        assert_eq!(VarIntEncoder::encode_u64(128), vec![0x80, 0x01]);
184        assert_eq!(VarIntEncoder::encode_u64(16383), vec![0xFF, 0x7F]);
185
186        // Test round-trip encoding
187        for value in [0, 1, 127, 128, 16383, 16384, 1_000_000, u64::MAX] {
188            let encoded = VarIntEncoder::encode_u64(value);
189            let (decoded, bytes_read) = VarIntEncoder::decode_u64(&encoded).unwrap();
190            assert_eq!(decoded, value);
191            assert_eq!(bytes_read, encoded.len());
192        }
193    }
194
195    #[test]
196    fn test_exponential_encoding() {
197        let encoder = ExponentialEncoder::new(2.0, 16);
198
199        // Test round-trip encoding
200        for value in [0, 1, 2, 4, 8, 16, 100, 1000, 10000] {
201            let encoded_data = encoder.encode_counter(value);
202            let (decoded, bytes_read) = encoder.decode_counter(&encoded_data).unwrap();
203            assert_eq!(decoded, value);
204            assert_eq!(bytes_read, encoded_data.len());
205        }
206    }
207
208    #[test]
209    fn test_compact_encoding() {
210        // Test inline encoding for small values
211        let value: u32 = 0x1234_5678;
212        let encoded = CompactEncoder::encode_inline(&value);
213        let decoded: u32 = CompactEncoder::decode_inline(&encoded).unwrap();
214        assert_eq!(decoded, value);
215
216        // Test inline encoding for u64
217        let value: u64 = 0x1234_5678_9ABC_DEF0;
218        let encoded = CompactEncoder::encode_inline(&value);
219        let decoded: u64 = CompactEncoder::decode_inline(&encoded).unwrap();
220        assert_eq!(decoded, value);
221    }
222}