splinter_rs/
codec.rs

1use bytes::{BufMut, Bytes, BytesMut};
2use culprit::Culprit;
3use thiserror::Error;
4use zerocopy::{ConvertError, SizeError};
5
6use crate::codec::encoder::Encoder;
7
8pub mod encoder;
9
10pub(crate) mod footer;
11pub(crate) mod partition_ref;
12pub(crate) mod runs_ref;
13pub(crate) mod tree_ref;
14
15/// Trait for types that can be encoded into a binary format.
16pub trait Encodable {
17    /// Returns the number of bytes required to encode this value.
18    ///
19    /// This should return the exact number of bytes that [`encode`](Self::encode)
20    /// will write, allowing for efficient buffer pre-allocation.
21    ///
22    /// Note: This function traverses the entire datastructure which scales with cardinality.
23    fn encoded_size(&self) -> usize;
24
25    /// Encodes this value into the provided encoder.
26    fn encode<B: BufMut>(&self, encoder: &mut Encoder<B>);
27
28    /// Convenience method that encodes this value to a [`Bytes`] buffer.
29    ///
30    /// This is the easiest way to serialize splinter data. It allocates
31    /// a buffer of the exact required size and encodes the value into it.
32    ///
33    /// # Examples
34    ///
35    /// ```
36    /// use splinter_rs::{Splinter, Encodable, PartitionWrite};
37    ///
38    /// let splinter = Splinter::from_iter([8, 42, 16]);
39    /// let bytes = splinter.encode_to_bytes();
40    /// assert!(!bytes.is_empty());
41    /// assert_eq!(bytes.len(), splinter.encoded_size());
42    /// ```
43    fn encode_to_bytes(&self) -> Bytes {
44        let size = self.encoded_size();
45        let mut encoder = Encoder::new(BytesMut::with_capacity(size));
46        self.encode(&mut encoder);
47        encoder.into_inner().freeze()
48    }
49}
50
51/// Errors that can occur when deserializing splinter data from bytes.
52///
53/// These errors indicate various types of corruption or invalid data that can
54/// be encountered when attempting to decode serialized splinter data.
55#[derive(Debug, Error)]
56pub enum DecodeErr {
57    /// The buffer does not contain enough bytes to decode the expected data.
58    ///
59    /// This error occurs when the buffer is truncated or smaller than the
60    /// minimum required size for a valid splinter.
61    #[error("not enough bytes")]
62    Length,
63
64    /// The data contains invalid or corrupted encoding structures.
65    ///
66    /// This error indicates that while the buffer has sufficient length and
67    /// correct magic bytes, the internal data structures are malformed or
68    /// contain invalid values.
69    #[error("invalid encoding")]
70    Validity,
71
72    /// The buffer does not end with the expected magic bytes.
73    ///
74    /// Splinter data ends with specific magic bytes to identify the format.
75    /// This error indicates the buffer does not contain valid splinter data
76    /// or has been corrupted at the end.
77    #[error("unknown magic value")]
78    Magic,
79
80    /// The calculated checksum does not match the stored checksum.
81    ///
82    /// This error indicates data corruption has occurred somewhere in the
83    /// buffer, as the integrity check has failed.
84    #[error("invalid checksum")]
85    Checksum,
86
87    /// The buffer contains data from the incompatible Splinter V1 format.
88    ///
89    /// This version of splinter-rs can only decode V2 format data. To decode
90    /// V1 data, use splinter-rs version 0.3.3 or earlier.
91    #[error("buffer contains serialized Splinter V1, decode using splinter-rs:v0.3.3")]
92    SplinterV1,
93}
94
95impl DecodeErr {
96    #[inline]
97    fn ensure_bytes_available(data: &[u8], len: usize) -> culprit::Result<(), DecodeErr> {
98        if data.len() < len {
99            Err(Culprit::new(Self::Length))
100        } else {
101            Ok(())
102        }
103    }
104}
105
106impl<S, D> From<SizeError<S, D>> for DecodeErr {
107    #[track_caller]
108    fn from(_: SizeError<S, D>) -> Self {
109        DecodeErr::Length
110    }
111}
112
113impl<A, S, V> From<ConvertError<A, S, V>> for DecodeErr {
114    #[track_caller]
115    fn from(err: ConvertError<A, S, V>) -> Self {
116        match err {
117            ConvertError::Alignment(_) => panic!("All zerocopy transmutations must be unaligned"),
118            ConvertError::Size(_) => DecodeErr::Length,
119            ConvertError::Validity(_) => DecodeErr::Validity,
120        }
121    }
122}
123
124#[cfg(test)]
125mod tests {
126    use itertools::Itertools;
127    use proptest::proptest;
128
129    use crate::{
130        Encodable, Splinter, SplinterRef, assert_error,
131        codec::{
132            DecodeErr,
133            footer::{Footer, SPLINTER_V2_MAGIC},
134            partition_ref::PartitionRef,
135        },
136        level::{Block, Level, Low},
137        partition_kind::PartitionKind,
138        testutil::{
139            LevelSetGen, mkpartition, mkpartition_buf, mksplinter_buf, mksplinter_manual,
140            test_partition_read,
141        },
142        traits::{Optimizable, TruncateFrom},
143    };
144
145    #[test]
146    fn test_encode_decode_direct() {
147        let mut setgen = LevelSetGen::<Low>::new(0xDEADBEEF);
148        let kinds = [
149            PartitionKind::Bitmap,
150            PartitionKind::Vec,
151            PartitionKind::Run,
152            PartitionKind::Tree,
153        ];
154        let sets = &[
155            vec![0],
156            vec![0, 1],
157            vec![0, u16::MAX],
158            vec![u16::MAX],
159            setgen.random(8),
160            setgen.random(4096),
161            setgen.runs(4096, 0.01),
162            setgen.runs(4096, 0.2),
163            setgen.runs(4096, 0.5),
164            setgen.runs(4096, 0.9),
165            (0..Low::MAX_LEN)
166                .map(|v| <Low as Level>::Value::truncate_from(v))
167                .collect_vec(),
168        ];
169
170        for kind in kinds {
171            for (i, set) in sets.iter().enumerate() {
172                println!("Testing partition kind: {kind:?} with set {i}");
173
174                let partition = mkpartition::<Low>(kind, &set);
175                let buf = partition.encode_to_bytes();
176                assert_eq!(
177                    partition.encoded_size(),
178                    buf.len(),
179                    "encoded_size doesn't match actual size"
180                );
181
182                let partition_ref = PartitionRef::<'_, Low>::from_suffix(&buf).unwrap();
183
184                assert_eq!(partition_ref.kind(), kind);
185                test_partition_read(&partition_ref, &set);
186            }
187        }
188    }
189
190    proptest! {
191        #[test]
192        fn test_encode_decode_proptest(
193            values in proptest::collection::vec(0u32..16384, 0..1024),
194        ) {
195            let expected = values.iter().copied().sorted().dedup().collect_vec();
196            let mut splinter = Splinter::from_iter(values);
197            splinter.optimize();
198            let buf = splinter.encode_to_bytes();
199            assert_eq!(
200                buf.len(),
201                splinter.encoded_size(),
202                "encoded_size doesn't match actual size"
203            );
204            let splinter_ref = SplinterRef::from_bytes(buf).unwrap();
205
206            test_partition_read(&splinter_ref, &expected);
207        }
208    }
209
210    #[test]
211    fn test_length_corruption() {
212        for i in 0..Footer::SIZE {
213            let truncated = [0].repeat(i);
214            assert_error!(
215                SplinterRef::from_bytes(truncated),
216                DecodeErr::Length,
217                "Failed for truncated buffer of size {}",
218                i
219            );
220        }
221    }
222
223    #[test]
224    fn test_corrupted_root_partition_kind() {
225        let mut buf = mksplinter_buf(&[1, 2, 3]);
226
227        // Buffer with just footer size but corrupted partition kind
228        let footer_offset = buf.len() - Footer::SIZE;
229        let partitions = &mut buf[0..footer_offset];
230        partitions[partitions.len() - 1] = 10;
231        let corrupted = mksplinter_manual(partitions);
232
233        assert_error!(SplinterRef::from_bytes(corrupted), DecodeErr::Validity);
234    }
235
236    #[test]
237    fn test_corrupted_magic() {
238        let mut buf = mksplinter_buf(&[1, 2, 3]);
239
240        let magic_offset = buf.len() - SPLINTER_V2_MAGIC.len();
241        buf[magic_offset..].copy_from_slice(&[0].repeat(4));
242
243        assert_error!(SplinterRef::from_bytes(buf), DecodeErr::Magic);
244    }
245
246    #[test]
247    fn test_corrupted_data() {
248        let mut buf = mksplinter_buf(&[1, 2, 3]);
249        buf[0] = 123;
250        assert_error!(SplinterRef::from_bytes(buf), DecodeErr::Checksum);
251    }
252
253    #[test]
254    fn test_corrupted_checksum() {
255        let mut buf = mksplinter_buf(&[1, 2, 3]);
256        let checksum_offset = buf.len() - Footer::SIZE;
257        buf[checksum_offset] = 123;
258        assert_error!(SplinterRef::from_bytes(buf), DecodeErr::Checksum);
259    }
260
261    #[test]
262    fn test_corrupted_vec_partition() {
263        let mut buf = mkpartition_buf::<Block>(PartitionKind::Vec, &[1, 2, 3]);
264
265        //                            1     2     3   len  kind
266        assert_eq!(buf.as_ref(), &[0x01, 0x02, 0x03, 0x02, 0x03]);
267
268        // corrupt the length
269        buf[3] = 5;
270
271        assert_error!(PartitionRef::<Block>::from_suffix(&buf), DecodeErr::Length);
272    }
273
274    #[test]
275    fn test_corrupted_run_partition() {
276        let mut buf = mkpartition_buf::<Block>(PartitionKind::Run, &[1, 2, 3]);
277
278        //                            1     3   len  kind
279        assert_eq!(buf.as_ref(), &[0x01, 0x03, 0x00, 0x04]);
280
281        // corrupt the length
282        buf[2] = 5;
283
284        assert_error!(PartitionRef::<Block>::from_suffix(&buf), DecodeErr::Length);
285    }
286
287    #[test]
288    fn test_corrupted_tree_partition() {
289        let mut buf = mkpartition_buf::<Low>(PartitionKind::Tree, &[1, 2]);
290
291        assert_eq!(
292            buf.as_ref(),
293            &[
294                // Vec partition
295                // 1     2   len  kind
296                0x01, 0x02, 0x01, 0x03,
297                // Tree partition
298                // offsets (u16), segments, len, kind
299                0x00, 0x00, 0x00, 0x00, 0x05
300            ]
301        );
302
303        // corrupt the tree len
304        buf[7] = 5;
305
306        assert_error!(PartitionRef::<Block>::from_suffix(&buf), DecodeErr::Length);
307    }
308
309    #[test]
310    fn test_vec_byteorder() {
311        let buf = mkpartition_buf::<Low>(PartitionKind::Vec, &[0x01_00, 0x02_00]);
312        assert_eq!(
313            buf.as_ref(),
314            &[
315                0x01, 0x00, // first value
316                0x02, 0x00, // second value
317                0x00, 0x01, // length
318                0x03, // kind
319            ]
320        );
321    }
322
323    #[test]
324    fn test_run_byteorder() {
325        let buf = mkpartition_buf::<Low>(PartitionKind::Run, &[0x01_00, 0x02_00]);
326        assert_eq!(
327            buf.as_ref(),
328            &[
329                0x01, 0x00, 0x01, 0x00, // first run
330                0x02, 0x00, 0x02, 0x00, // second run
331                0x00, 0x01, // length
332                0x04, // kind
333            ]
334        );
335    }
336
337    #[test]
338    fn test_detect_splinter_v1() {
339        let empty_splinter_v1 = b"\xda\xae\x12\xdf\0\0\0\0";
340        assert_error!(
341            SplinterRef::from_bytes(empty_splinter_v1.as_slice()),
342            DecodeErr::SplinterV1
343        );
344    }
345}