Skip to main content

nodedb_codec/
raw.rs

1//! Raw (identity) codec — no compression.
2//!
3//! Passes data through unchanged. Used for symbol columns (already
4//! dictionary-encoded as 4-byte u32 IDs) or pre-compressed data.
5//!
6//! Wire format:
7//! ```text
8//! [4 bytes] data length (LE u32)
9//! [N bytes] raw data
10//! ```
11//!
12//! The length header is included for consistency with other codecs
13//! (allows the decoder to validate data integrity).
14
15use crate::error::CodecError;
16
17/// Encode raw bytes (identity codec with length header).
18pub fn encode(data: &[u8]) -> Vec<u8> {
19    let mut out = Vec::with_capacity(4 + data.len());
20    out.extend_from_slice(&(data.len() as u32).to_le_bytes());
21    out.extend_from_slice(data);
22    out
23}
24
25/// Decode raw bytes (validates length header).
26pub fn decode(data: &[u8]) -> Result<Vec<u8>, CodecError> {
27    if data.len() < 4 {
28        return Err(CodecError::Truncated {
29            expected: 4,
30            actual: data.len(),
31        });
32    }
33
34    let expected_len = u32::from_le_bytes([data[0], data[1], data[2], data[3]]) as usize;
35    let payload = &data[4..];
36
37    if payload.len() < expected_len {
38        return Err(CodecError::Truncated {
39            expected: 4 + expected_len,
40            actual: data.len(),
41        });
42    }
43
44    Ok(payload[..expected_len].to_vec())
45}
46
47/// Return a reference to the raw data without copying (zero-copy decode).
48///
49/// Useful when the caller can work with a borrowed slice.
50pub fn decode_ref(data: &[u8]) -> Result<&[u8], CodecError> {
51    if data.len() < 4 {
52        return Err(CodecError::Truncated {
53            expected: 4,
54            actual: data.len(),
55        });
56    }
57
58    let expected_len = u32::from_le_bytes([data[0], data[1], data[2], data[3]]) as usize;
59    let payload = &data[4..];
60
61    if payload.len() < expected_len {
62        return Err(CodecError::Truncated {
63            expected: 4 + expected_len,
64            actual: data.len(),
65        });
66    }
67
68    Ok(&payload[..expected_len])
69}
70
71// ---------------------------------------------------------------------------
72// Streaming encoder / decoder types (trivial wrappers)
73// ---------------------------------------------------------------------------
74
75/// Streaming Raw encoder.
76pub struct RawEncoder {
77    buf: Vec<u8>,
78}
79
80impl RawEncoder {
81    pub fn new() -> Self {
82        Self {
83            buf: Vec::with_capacity(4096),
84        }
85    }
86
87    pub fn push(&mut self, data: &[u8]) {
88        self.buf.extend_from_slice(data);
89    }
90
91    pub fn len(&self) -> usize {
92        self.buf.len()
93    }
94
95    pub fn is_empty(&self) -> bool {
96        self.buf.is_empty()
97    }
98
99    pub fn finish(self) -> Vec<u8> {
100        encode(&self.buf)
101    }
102}
103
104impl Default for RawEncoder {
105    fn default() -> Self {
106        Self::new()
107    }
108}
109
110/// Raw decoder wrapper.
111pub struct RawDecoder;
112
113impl RawDecoder {
114    pub fn decode_all(data: &[u8]) -> Result<Vec<u8>, CodecError> {
115        decode(data)
116    }
117
118    pub fn decode_ref(data: &[u8]) -> Result<&[u8], CodecError> {
119        decode_ref(data)
120    }
121}
122
123#[cfg(test)]
124mod tests {
125    use super::*;
126
127    #[test]
128    fn empty_roundtrip() {
129        let encoded = encode(&[]);
130        let decoded = decode(&encoded).unwrap();
131        assert!(decoded.is_empty());
132    }
133
134    #[test]
135    fn data_roundtrip() {
136        let data = b"hello world";
137        let encoded = encode(data);
138        let decoded = decode(&encoded).unwrap();
139        assert_eq!(decoded, data);
140        // 4 header + 11 data = 15
141        assert_eq!(encoded.len(), 15);
142    }
143
144    #[test]
145    fn zero_copy_decode() {
146        let data = b"test data";
147        let encoded = encode(data);
148        let slice = decode_ref(&encoded).unwrap();
149        assert_eq!(slice, data.as_ref());
150    }
151
152    #[test]
153    fn binary_data() {
154        let data: Vec<u8> = (0..256).map(|i| i as u8).collect();
155        let encoded = encode(&data);
156        let decoded = decode(&encoded).unwrap();
157        assert_eq!(decoded, data);
158    }
159
160    #[test]
161    fn u32_symbol_ids() {
162        // Typical symbol column: array of u32 IDs as LE bytes.
163        let ids: Vec<u32> = (0..1000).collect();
164        let raw: Vec<u8> = ids.iter().flat_map(|id| id.to_le_bytes()).collect();
165        let encoded = encode(&raw);
166        let decoded = decode(&encoded).unwrap();
167        assert_eq!(decoded, raw);
168    }
169
170    #[test]
171    fn streaming_encoder() {
172        let mut enc = RawEncoder::new();
173        enc.push(b"hello ");
174        enc.push(b"world");
175        let encoded = enc.finish();
176        let decoded = decode(&encoded).unwrap();
177        assert_eq!(decoded, b"hello world");
178    }
179
180    #[test]
181    fn truncated_errors() {
182        assert!(decode(&[]).is_err());
183        assert!(decode(&[10, 0, 0, 0, 1, 2]).is_err()); // claims 10 bytes, only 2
184    }
185}