Skip to main content

nodedb_codec/
raw.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! Raw (identity) codec — no compression.
4//!
5//! Passes data through unchanged. Used for symbol columns (already
6//! dictionary-encoded as 4-byte u32 IDs) or pre-compressed data.
7//!
8//! Wire format:
9//! ```text
10//! [4 bytes] data length (LE u32)
11//! [N bytes] raw data
12//! ```
13//!
14//! The length header is included for consistency with other codecs
15//! (allows the decoder to validate data integrity).
16
17use crate::error::CodecError;
18
19/// Encode raw bytes (identity codec with length header).
20pub fn encode(data: &[u8]) -> Vec<u8> {
21    let mut out = Vec::with_capacity(4 + data.len());
22    out.extend_from_slice(&(data.len() as u32).to_le_bytes());
23    out.extend_from_slice(data);
24    out
25}
26
27/// Decode raw bytes (validates length header).
28pub fn decode(data: &[u8]) -> Result<Vec<u8>, CodecError> {
29    if data.len() < 4 {
30        return Err(CodecError::Truncated {
31            expected: 4,
32            actual: data.len(),
33        });
34    }
35
36    let expected_len = u32::from_le_bytes([data[0], data[1], data[2], data[3]]) as usize;
37    let payload = &data[4..];
38
39    if payload.len() < expected_len {
40        return Err(CodecError::Truncated {
41            expected: 4 + expected_len,
42            actual: data.len(),
43        });
44    }
45
46    Ok(payload[..expected_len].to_vec())
47}
48
49/// Return a reference to the raw data without copying (zero-copy decode).
50///
51/// Useful when the caller can work with a borrowed slice.
52pub fn decode_ref(data: &[u8]) -> Result<&[u8], CodecError> {
53    if data.len() < 4 {
54        return Err(CodecError::Truncated {
55            expected: 4,
56            actual: data.len(),
57        });
58    }
59
60    let expected_len = u32::from_le_bytes([data[0], data[1], data[2], data[3]]) as usize;
61    let payload = &data[4..];
62
63    if payload.len() < expected_len {
64        return Err(CodecError::Truncated {
65            expected: 4 + expected_len,
66            actual: data.len(),
67        });
68    }
69
70    Ok(&payload[..expected_len])
71}
72
73// ---------------------------------------------------------------------------
74// Streaming encoder / decoder types (trivial wrappers)
75// ---------------------------------------------------------------------------
76
77/// Streaming Raw encoder.
78pub struct RawEncoder {
79    buf: Vec<u8>,
80}
81
82impl RawEncoder {
83    pub fn new() -> Self {
84        Self {
85            buf: Vec::with_capacity(4096),
86        }
87    }
88
89    pub fn push(&mut self, data: &[u8]) {
90        self.buf.extend_from_slice(data);
91    }
92
93    pub fn len(&self) -> usize {
94        self.buf.len()
95    }
96
97    pub fn is_empty(&self) -> bool {
98        self.buf.is_empty()
99    }
100
101    pub fn finish(self) -> Vec<u8> {
102        encode(&self.buf)
103    }
104}
105
106impl Default for RawEncoder {
107    fn default() -> Self {
108        Self::new()
109    }
110}
111
112/// Raw decoder wrapper.
113pub struct RawDecoder;
114
115impl RawDecoder {
116    pub fn decode_all(data: &[u8]) -> Result<Vec<u8>, CodecError> {
117        decode(data)
118    }
119
120    pub fn decode_ref(data: &[u8]) -> Result<&[u8], CodecError> {
121        decode_ref(data)
122    }
123}
124
125#[cfg(test)]
126mod tests {
127    use super::*;
128
129    #[test]
130    fn empty_roundtrip() {
131        let encoded = encode(&[]);
132        let decoded = decode(&encoded).unwrap();
133        assert!(decoded.is_empty());
134    }
135
136    #[test]
137    fn data_roundtrip() {
138        let data = b"hello world";
139        let encoded = encode(data);
140        let decoded = decode(&encoded).unwrap();
141        assert_eq!(decoded, data);
142        // 4 header + 11 data = 15
143        assert_eq!(encoded.len(), 15);
144    }
145
146    #[test]
147    fn zero_copy_decode() {
148        let data = b"test data";
149        let encoded = encode(data);
150        let slice = decode_ref(&encoded).unwrap();
151        assert_eq!(slice, data.as_ref());
152    }
153
154    #[test]
155    fn binary_data() {
156        let data: Vec<u8> = (0..256).map(|i| i as u8).collect();
157        let encoded = encode(&data);
158        let decoded = decode(&encoded).unwrap();
159        assert_eq!(decoded, data);
160    }
161
162    #[test]
163    fn u32_symbol_ids() {
164        // Typical symbol column: array of u32 IDs as LE bytes.
165        let ids: Vec<u32> = (0..1000).collect();
166        let raw: Vec<u8> = ids.iter().flat_map(|id| id.to_le_bytes()).collect();
167        let encoded = encode(&raw);
168        let decoded = decode(&encoded).unwrap();
169        assert_eq!(decoded, raw);
170    }
171
172    #[test]
173    fn streaming_encoder() {
174        let mut enc = RawEncoder::new();
175        enc.push(b"hello ");
176        enc.push(b"world");
177        let encoded = enc.finish();
178        let decoded = decode(&encoded).unwrap();
179        assert_eq!(decoded, b"hello world");
180    }
181
182    #[test]
183    fn truncated_errors() {
184        assert!(decode(&[]).is_err());
185        assert!(decode(&[10, 0, 0, 0, 1, 2]).is_err()); // claims 10 bytes, only 2
186    }
187}