Skip to main content

common/serde/
encoding.rs

1//! Shared encoding utilities for key-value serialization.
2//!
3//! This module provides common encoding/decoding primitives used by OpenData
4//! storage systems.
5//!
6//! ## Why Encode/Decode Traits Are Not Defined Here
7//!
8//! Rust's orphan rules prevent implementing a trait for a type when both the
9//! trait AND the type are defined in external crates. This prevents conflicting
10//! implementations across the ecosystem.
11//!
12//! For example, if we defined `Encode` here in `common`, then `timeseries`
13//! couldn't implement `common::Encode` for `u32` or `SeriesId` (which is a type
14//! alias for `u32`) because:
15//! - `Encode` would be foreign to `timeseries` (defined in `common`)
16//! - `u32` is foreign to both (defined in `std`)
17//!
18//! The rule is: at least one of {trait, type} must be local to the crate doing
19//! the implementation.
20//!
21//! Therefore, each storage crate (timeseries, vector) defines its own
22//! `Encode`/`Decode` traits locally, allowing them to implement these traits
23//! for primitives and type aliases. The generic functions like `encode_array`
24//! must also be defined locally since they're bounded by the local traits.
25
26use bytes::BytesMut;
27
28/// Encoding error with a descriptive message.
29#[derive(Debug, Clone, PartialEq, Eq)]
30pub struct EncodingError {
31    pub message: String,
32}
33
34impl std::error::Error for EncodingError {}
35
36impl std::fmt::Display for EncodingError {
37    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
38        write!(f, "{}", self.message)
39    }
40}
41
42impl From<super::DeserializeError> for EncodingError {
43    fn from(err: super::DeserializeError) -> Self {
44        EncodingError {
45            message: err.message,
46        }
47    }
48}
49
50/// Encode a UTF-8 string.
51///
52/// Format: `len: u16` (little-endian) + `len` bytes of UTF-8
53pub fn encode_utf8(s: &str, buf: &mut BytesMut) {
54    let bytes = s.as_bytes();
55    let len = bytes.len();
56    if len > u16::MAX as usize {
57        panic!("String too long for UTF-8 encoding: {} bytes", len);
58    }
59    buf.extend_from_slice(&(len as u16).to_le_bytes());
60    buf.extend_from_slice(bytes);
61}
62
63/// Decode a UTF-8 string.
64///
65/// Format: `len: u16` (little-endian) + `len` bytes of UTF-8
66pub fn decode_utf8(buf: &mut &[u8]) -> Result<String, EncodingError> {
67    if buf.len() < 2 {
68        return Err(EncodingError {
69            message: "Buffer too short for UTF-8 length".to_string(),
70        });
71    }
72    let len = u16::from_le_bytes([buf[0], buf[1]]) as usize;
73    *buf = &buf[2..];
74
75    if buf.len() < len {
76        return Err(EncodingError {
77            message: format!(
78                "Buffer too short for UTF-8 payload: need {} bytes, have {}",
79                len,
80                buf.len()
81            ),
82        });
83    }
84
85    let bytes = &buf[..len];
86    *buf = &buf[len..];
87
88    String::from_utf8(bytes.to_vec()).map_err(|e| EncodingError {
89        message: format!("Invalid UTF-8: {}", e),
90    })
91}
92
93/// Encode an optional non-empty UTF-8 string.
94///
95/// Format: Same as Utf8, but `len = 0` means `None`
96pub fn encode_optional_utf8(opt: Option<&str>, buf: &mut BytesMut) {
97    match opt {
98        Some(s) => encode_utf8(s, buf),
99        None => {
100            buf.extend_from_slice(&0u16.to_le_bytes());
101        }
102    }
103}
104
105/// Decode an optional non-empty UTF-8 string.
106///
107/// Format: Same as Utf8, but `len = 0` means `None`
108pub fn decode_optional_utf8(buf: &mut &[u8]) -> Result<Option<String>, EncodingError> {
109    if buf.len() < 2 {
110        return Err(EncodingError {
111            message: "Buffer too short for optional UTF-8 length".to_string(),
112        });
113    }
114    let len = u16::from_le_bytes([buf[0], buf[1]]);
115    if len == 0 {
116        *buf = &buf[2..];
117        return Ok(None);
118    }
119    decode_utf8(buf).map(Some)
120}
121
122/// Decode the count prefix of an array.
123///
124/// Returns the count as a usize and advances the buffer past the count bytes.
125pub fn decode_array_count(buf: &mut &[u8]) -> Result<usize, EncodingError> {
126    if buf.len() < 2 {
127        return Err(EncodingError {
128            message: "Buffer too short for array count".to_string(),
129        });
130    }
131    let count = u16::from_le_bytes([buf[0], buf[1]]) as usize;
132    *buf = &buf[2..];
133    Ok(count)
134}
135
136/// Encode the count prefix of an array.
137///
138/// Panics if the count exceeds u16::MAX.
139pub fn encode_array_count(count: usize, buf: &mut BytesMut) {
140    if count > u16::MAX as usize {
141        panic!("Array too long: {} items", count);
142    }
143    buf.extend_from_slice(&(count as u16).to_le_bytes());
144}
145
146/// Validate that a buffer length is divisible by the element size for fixed-element arrays.
147pub fn validate_fixed_element_array_len(
148    buf_len: usize,
149    element_size: usize,
150) -> Result<usize, EncodingError> {
151    if !buf_len.is_multiple_of(element_size) {
152        return Err(EncodingError {
153            message: format!(
154                "Buffer length {} is not divisible by element size {}",
155                buf_len, element_size
156            ),
157        });
158    }
159    Ok(buf_len / element_size)
160}
161
162/// Encode a u64 value as 8-byte little-endian.
163///
164/// Format: 8 bytes in little-endian byte order
165pub fn encode_u64(value: u64, buf: &mut BytesMut) {
166    buf.extend_from_slice(&value.to_le_bytes());
167}
168
169/// Decode a u64 value from 8-byte little-endian.
170///
171/// Format: 8 bytes in little-endian byte order
172pub fn decode_u64(buf: &mut &[u8]) -> Result<u64, EncodingError> {
173    if buf.len() < 8 {
174        return Err(EncodingError {
175            message: format!("Buffer too short for u64: need 8 bytes, have {}", buf.len()),
176        });
177    }
178    let bytes: [u8; 8] = buf[..8].try_into().map_err(|_| EncodingError {
179        message: "Failed to extract 8 bytes for u64".to_string(),
180    })?;
181    *buf = &buf[8..];
182    Ok(u64::from_le_bytes(bytes))
183}
184
185#[cfg(test)]
186mod tests {
187    use super::*;
188
189    #[test]
190    fn should_encode_and_decode_utf8() {
191        // given
192        let s = "Hello, World!";
193        let mut buf = BytesMut::new();
194
195        // when
196        encode_utf8(s, &mut buf);
197        let mut slice = buf.as_ref();
198        let decoded = decode_utf8(&mut slice).unwrap();
199
200        // then
201        assert_eq!(decoded, s);
202        assert!(slice.is_empty());
203    }
204
205    #[test]
206    fn should_encode_and_decode_utf8_with_unicode() {
207        // given
208        let s = "Hello, 世界!";
209        let mut buf = BytesMut::new();
210
211        // when
212        encode_utf8(s, &mut buf);
213        let mut slice = buf.as_ref();
214        let decoded = decode_utf8(&mut slice).unwrap();
215
216        // then
217        assert_eq!(decoded, s);
218        assert!(slice.is_empty());
219    }
220
221    #[test]
222    fn should_encode_and_decode_optional_utf8_some() {
223        // given
224        let s = Some("test");
225        let mut buf = BytesMut::new();
226
227        // when
228        encode_optional_utf8(s, &mut buf);
229        let mut slice = buf.as_ref();
230        let decoded = decode_optional_utf8(&mut slice).unwrap();
231
232        // then
233        assert_eq!(decoded, s.map(|s| s.to_string()));
234        assert!(slice.is_empty());
235    }
236
237    #[test]
238    fn should_encode_and_decode_optional_utf8_none() {
239        // given
240        let s: Option<&str> = None;
241        let mut buf = BytesMut::new();
242
243        // when
244        encode_optional_utf8(s, &mut buf);
245        let mut slice = buf.as_ref();
246        let decoded = decode_optional_utf8(&mut slice).unwrap();
247
248        // then
249        assert_eq!(decoded, None);
250        assert!(slice.is_empty());
251    }
252
253    #[test]
254    fn should_return_error_for_truncated_utf8() {
255        // given
256        let mut buf = BytesMut::new();
257        buf.extend_from_slice(&10u16.to_le_bytes()); // claim 10 bytes
258        buf.extend_from_slice(b"short"); // only 5 bytes
259
260        // when
261        let mut slice = buf.as_ref();
262        let result = decode_utf8(&mut slice);
263
264        // then
265        assert!(result.is_err());
266        assert!(result.unwrap_err().message.contains("Buffer too short"));
267    }
268
269    #[test]
270    fn should_encode_and_decode_u64() {
271        // given
272        let value = 0x0123456789ABCDEFu64;
273        let mut buf = BytesMut::new();
274
275        // when
276        encode_u64(value, &mut buf);
277        let mut slice = buf.as_ref();
278        let decoded = decode_u64(&mut slice).unwrap();
279
280        // then
281        assert_eq!(decoded, value);
282        assert!(slice.is_empty());
283    }
284
285    #[test]
286    fn should_encode_and_decode_u64_zero() {
287        // given
288        let value = 0u64;
289        let mut buf = BytesMut::new();
290
291        // when
292        encode_u64(value, &mut buf);
293        let mut slice = buf.as_ref();
294        let decoded = decode_u64(&mut slice).unwrap();
295
296        // then
297        assert_eq!(decoded, value);
298        assert!(slice.is_empty());
299    }
300
301    #[test]
302    fn should_encode_and_decode_u64_max() {
303        // given
304        let value = u64::MAX;
305        let mut buf = BytesMut::new();
306
307        // when
308        encode_u64(value, &mut buf);
309        let mut slice = buf.as_ref();
310        let decoded = decode_u64(&mut slice).unwrap();
311
312        // then
313        assert_eq!(decoded, value);
314        assert!(slice.is_empty());
315    }
316
317    #[test]
318    fn should_return_error_for_truncated_u64() {
319        // given
320        let mut buf = BytesMut::new();
321        buf.extend_from_slice(&[1, 2, 3]); // only 3 bytes
322
323        // when
324        let mut slice = buf.as_ref();
325        let result = decode_u64(&mut slice);
326
327        // then
328        assert!(result.is_err());
329        assert!(result.unwrap_err().message.contains("Buffer too short"));
330    }
331}