thor_devkit/
rlp.rs

1//! This module enables RLP encoding of high-level objects.
2//!
3//! RLP (recursive length prefix) is a common algorithm for encoding
4//! of variable length binary data. RLP encodes data before storing on disk
5//! or transmitting via network.
6//!
7//! Theory
8//! ------
9//!
10//! Encoding
11//! ********
12//!
13//! Primary RLP can only deal with "item" type, which is defined as:
14//!
15//! - Byte string ([`Bytes`]) or
16//! - Sequence of items ([`Vec`], fixed array or slice).
17//!
18//! Some examples are:
19//!
20//! * ``b'\x00\xff'``
21//! * empty list ``vec![]``
22//! * list of bytes ``vec![vec![0u8], vec![1u8, 3u8]]``
23//! * list of combinations ``vec![vec![], vec![0u8], vec![vec![0]]]``
24//!
25//! The encoded result is always a byte string (sequence of [`u8`]).
26//!
27//! Encoding algorithm
28//! ******************
29//!
30//! Given `x` item as input, we define `rlp_encode` as the following algorithm:
31//!
32//! Let `concat` be a function that joins given bytes into single byte sequence.
33//! 1. If `x` is a single byte and `0x00 <= x <= 0x7F`, `rlp_encode(x) = x`.
34//! 1. Otherwise, if `x` is a byte string, let `len(x)` be length of `x` in bytes
35//!    and define encoding as follows:
36//!    * If `0 < len(x) < 0x38` (note that empty byte string fulfills this requirement), then
37//!      ```txt
38//!      rlp_encode(x) = concat(0x80 + len(x), x)
39//!      ```
40//!      In this case first byte is in range `[0x80; 0xB7]`.
41//!    * If `0x38 <= len(x) <= 0xFFFFFFFF`, then
42//!      ```txt
43//!      rlp_encode(x) = concat(0xB7 + len(len(x)), len(x), x)
44//!      ```
45//!      In this case first byte is in range `[0xB8; 0xBF]`.
46//!    * For longer strings encoding is undefined.
47//! 1. Otherwise, if `x` is a list, let `s = concat(map(rlp_encode, x))`
48//!    be concatenation of RLP encodings of all its items.
49//!    * If `0 < len(s) < 0x38` (note that empty list matches), then
50//!      ```txt
51//!      rlp_encode(x) = concat(0xC0 + len(s), s)
52//!      ```
53//!      In this case first byte is in range `[0xC0; 0xF7]`.
54//!    * If `0x38 <= len(s) <= 0xFFFFFFFF`, then
55//!      ```txt
56//!      rlp_encode(x) = concat(0xF7 + len(len(s)), len(s), x)
57//!      ```
58//!      In this case first byte is in range `[0xF8; 0xFF]`.
59//!    * For longer lists encoding is undefined.
60//!
61//! See more in [Ethereum wiki](https://eth.wiki/fundamentals/rlp).
62//!
63//! Encoding examples
64//! *****************
65//!
66//! | ``x``             |       ``rlp_encode(x)``        |
67//! |-------------------|--------------------------------|
68//! | ``b''``           | ``0x80``                       |
69//! | ``b'\x00'``       | ``0x00``                       |
70//! | ``b'\x0F'``       | ``0x0F``                       |
71//! | ``b'\x79'``       | ``0x79``                       |
72//! | ``b'\x80'``       | ``0x81 0x80``                  |
73//! | ``b'\xFF'``       | ``0x81 0xFF``                  |
74//! | ``b'foo'``        | ``0x83 0x66 0x6F 0x6F``        |
75//! | ``[]``            | ``0xC0``                       |
76//! | ``[b'\x0F']``     | ``0xC1 0x0F``                  |
77//! | ``[b'\xEF']``     | ``0xC1 0x81 0xEF``             |
78//! | ``[[], [[]]]``    | ``0xC3 0xC0 0xC1 0xC0``        |
79//!
80//!
81//! Serialization
82//! *************
83//!
84//! However, in the real world, the inputs are not pure bytes nor lists.
85//! We need a way to encode numbers (like [`u64`]), custom structs, enums and other
86//! more complex machinery that exists in the surrounding code.
87//!
88//! This library wraps [`fastrlp`](https://docs.rs/fastrlp/0.4.0/fastrlp/)
89//! crate, so everything mentioned there about [`Encodable`] and [`Decodable`] traits still
90//! applies. You can implement those for any object to make it RLP-serializable.
91//!
92//! However, following this approach directly results in cluttered code: your `struct`s
93//! now have to use field types that match serialization, which may be very inconvenient.
94//!
95//! To avoid this pitfall, this RLP implementation allows "extended" struct definition
96//! via a macro. Let's have a look at `Transaction` definition:
97//!
98//! ```rust
99//! use thor_devkit::rlp::{AsBytes, AsVec, Maybe, Bytes};
100//! use thor_devkit::{rlp_encodable, U256};
101//! use thor_devkit::transactions::{Clause, Reserved};
102//!
103//! rlp_encodable! {
104//!     /// Represents a single VeChain transaction.
105//!     #[derive(Clone, Debug, Eq, PartialEq)]
106//!     pub struct Transaction {
107//!         /// Chain tag
108//!         pub chain_tag: u8,
109//!         pub block_ref: u64,
110//!         pub expiration: u32,
111//!         pub clauses: Vec<Clause>,
112//!         pub gas_price_coef: u8,
113//!         pub gas: u64,
114//!         pub depends_on: Option<U256> => AsBytes<U256>,
115//!         pub nonce: u64,
116//!         pub reserved: Option<Reserved> => AsVec<Reserved>,
117//!         pub signature: Option<Bytes> => Maybe<Bytes>,
118//!     }
119//! }
120//! ```
121//!
122//! What's going on here? First, some fields are encoded "as usual": unsigned integers
123//! are encoded just fine and you likely won't need any different encoding. However,
124//! some fields work in a different way. `depends_on` is a number that may be present
125//! or absent, and it should be encoded as a byte sting. `U256` is already encoded this
126//! way, but `None` is not ([`Option`] is not RLP-serializable on itself). So we wrap it
127//! in a special wrapper: [`AsBytes`]. [`AsBytes<T>`] will serialize `Some(T)` as `T` and
128//! [`None`] as an empty byte string.
129//!
130//! `reserved` is a truly special struct that has custom encoding implemented for it.
131//! That implementation serializes `Reserved` into a [`Vec<Bytes>`], and then serializes
132//! this [`Vec<Bytes>`] to the output stream. If it is empty, an empty vector should be
133//! written instead. This is achieved via [`AsVec`] annotation.
134//!
135//! [`Maybe`] is a third special wrapper. Fields annotated with [`Maybe`] may only be placed
136//! last (otherwise encoding is ambiguous), and with [`Maybe<T>`] `Some(T)` is serialized
137//! as `T` and [`None`] --- as nothing (zero bytes added).
138//!
139//! Fields comments are omitted here for brevity, they are preserved as well.
140//!
141//! This macro adds both decoding and encoding capabilities. See examples folder
142//! for more examples of usage, including custom types and machinery.
143//!
144//! Note that this syntax is not restricted to these three wrappers, you can use
145//! any types with proper [`From`] implementation:
146//!
147//! ```rust
148//! use thor_devkit::rlp_encodable;
149//!
150//! #[derive(Clone)]
151//! struct MySeries {
152//!     left: [u8; 2],
153//!     right: [u8; 2],
154//! }
155//!
156//! impl From<MySeries> for u32 {
157//!     fn from(value: MySeries) -> Self {
158//!         Self::from_be_bytes(value.left.into_iter().chain(value.right).collect::<Vec<_>>().try_into().unwrap())
159//!     }
160//! }
161//! impl From<u32> for MySeries {
162//!     fn from(value: u32) -> Self {
163//!         let [a, b, c, d] = value.to_be_bytes();
164//!         Self{ left: [a, b], right: [c, d] }
165//!     }
166//! }
167//!
168//! rlp_encodable! {
169//!     pub struct Foo {
170//!         pub foo: MySeries => u32,
171//!     }
172//! }
173//! ```
174//!
175
176pub use bytes::{Buf, BufMut, Bytes, BytesMut};
177pub use fastrlp::{Decodable, DecodeError as RLPError, Encodable, Header};
178
179/// Convenience alias for a result of fallible RLP decoding.
180pub type RLPResult<T> = Result<T, RLPError>;
181
182#[doc(hidden)]
183#[macro_export]
184macro_rules! __encode_as {
185    ($out:expr, $field:expr) => {
186        $field.encode($out);
187    };
188    ($out:expr, $field:expr => $cast:ty) => {
189        // TODO: this clone bugs me, we should be able to do better
190        <$cast>::from($field.clone()).encode($out);
191    };
192
193    ($out:expr, $field:expr $(=> $cast:ty)?, $($fields:expr $(=> $casts:ty)?),+) => {
194        $crate::__encode_as! { $out, $field $(=> $cast)? }
195        $crate::__encode_as! { $out, $($fields $(=> $casts)?),+ }
196    };
197}
198
199#[doc(hidden)]
200#[macro_export]
201macro_rules! __decode_as {
202    ($buf:expr, $field:ty) => {
203        <$field>::decode($buf)?
204    };
205    ($buf:expr, $field:ty => $cast:ty) => {
206        <$field>::from(<$cast>::decode($buf)?)
207    };
208
209    ($buf:expr, $field:ty $(=> $cast:ty)?, $($fields:ty $(=> $casts:ty)?),+) => {
210        $crate::__decode_as! { $buf, $field $(=> $cast)? }
211        $crate::__decode_as! { $buf, $($fields $(=> $casts)?),+ }
212    };
213}
214
215/// Create an RLP-encodable struct by specifying types to cast to.
216#[macro_export]
217macro_rules! rlp_encodable {
218    (
219        $(#[$attr:meta])*
220        $vis:vis struct $name:ident {
221            $(
222                $(#[$field_attr:meta])*
223                $field_vis:vis $field_name:ident: $field_type:ty $(=> $cast:ty)?,
224            )*
225        }
226    ) => {
227        $(#[$attr])*
228        $vis struct $name {
229            $(
230                $(#[$field_attr])*
231                $field_vis $field_name: $field_type,
232            )*
233        }
234
235        impl $name {
236            fn encode_internal(&self, out: &mut dyn $crate::rlp::BufMut) {
237                use $crate::rlp::Encodable;
238                $crate::__encode_as!(out, $(self.$field_name $(=> $cast)?),+);
239            }
240        }
241
242        impl $crate::rlp::Encodable for $name {
243            fn encode(&self, out: &mut dyn $crate::rlp::BufMut) {
244                let mut buf = $crate::rlp::BytesMut::new();
245                self.encode_internal(&mut buf);
246                $crate::rlp::Header {
247                    list: true,
248                    payload_length: buf.len()
249                }.encode(out);
250                out.put_slice(&buf)
251            }
252        }
253
254        impl $crate::rlp::Decodable for $name {
255            fn decode(buf: &mut &[u8]) -> $crate::rlp::RLPResult<Self> {
256                #[allow(unused_imports)]
257                use $crate::rlp::Decodable;
258                $crate::rlp::Header::decode(buf)?;
259                Ok(Self {
260                    $($field_name: $crate::__decode_as!(buf, $field_type $(=> $cast)? )),*
261                })
262            }
263        }
264    }
265}
266
267macro_rules! map_to_option {
268    ($name:ident) => {
269        impl<T: Encodable + Decodable, S: Into<T>> From<Option<S>> for $name<T> {
270            fn from(value: Option<S>) -> Self {
271                match value {
272                    Some(v) => Self::Just(v.into()),
273                    None => Self::Nothing,
274                }
275            }
276        }
277        impl<T: Encodable + Decodable> From<$name<T>> for Option<T> {
278            fn from(value: $name<T>) -> Self {
279                match value {
280                    $name::Just(v) => Self::Some(v),
281                    $name::Nothing => Self::None,
282                }
283            }
284        }
285    };
286}
287
288/// Serialization wrapper for `Option` to serialize `None` as empty `Bytes`.
289///
290/// <div class="warning">
291///  Do not use it directly: it is only intended for use with `rlp_encodable!` macro.
292/// </div>
293#[allow(clippy::manual_non_exhaustive)]
294pub enum AsBytes<T: Encodable + Decodable> {
295    #[doc(hidden)]
296    Just(T),
297    #[doc(hidden)]
298    Nothing,
299}
300map_to_option!(AsBytes);
301
302impl<T: Encodable + Decodable> Encodable for AsBytes<T> {
303    fn encode(&self, out: &mut dyn BufMut) {
304        match self {
305            Self::Just(value) => value.encode(out),
306            Self::Nothing => Bytes::new().encode(out),
307        }
308    }
309}
310impl<T: Encodable + Decodable> Decodable for AsBytes<T> {
311    fn decode(buf: &mut &[u8]) -> RLPResult<Self> {
312        if buf[0] == fastrlp::EMPTY_STRING_CODE {
313            Bytes::decode(buf)?;
314            Ok(Self::Nothing)
315        } else {
316            Ok(Self::Just(T::decode(buf)?))
317        }
318    }
319}
320
321/// Serialization wrapper for `Option` to serialize `None` as empty `Vec`.
322///
323/// Note that it will not be able to distinguish `None` and `Some(vec![])`
324/// as they map to the same encoded value.
325///
326/// <div class="warning">
327///  Do not use it directly: it is only intended for use with `rlp_encodable!` macro.
328/// </div>
329#[allow(clippy::manual_non_exhaustive)]
330pub enum AsVec<T: Encodable + Decodable> {
331    #[doc(hidden)]
332    Just(T),
333    #[doc(hidden)]
334    Nothing,
335}
336map_to_option!(AsVec);
337
338impl<T: Encodable + Decodable> Encodable for AsVec<T> {
339    fn encode(&self, out: &mut dyn BufMut) {
340        match self {
341            Self::Just(value) => value.encode(out),
342            Self::Nothing => fastrlp::Header {
343                list: true,
344                payload_length: 0,
345            }
346            .encode(out),
347        }
348    }
349}
350impl<T: Encodable + Decodable> Decodable for AsVec<T> {
351    fn decode(buf: &mut &[u8]) -> RLPResult<Self> {
352        if buf[0] == fastrlp::EMPTY_LIST_CODE {
353            let header = fastrlp::Header::decode(buf)?;
354            debug_assert!(header.list);
355            debug_assert!(header.payload_length == 0);
356            Ok(Self::Nothing)
357        } else {
358            Ok(Self::Just(T::decode(buf)?))
359        }
360    }
361}
362
363/// Serialization wrapper for `Option` to serialize `None` as nothing (do not modify
364/// output stream). This must be the last field in the struct.
365///
366/// <div class="warning">
367///  Do not use it directly: it is only intended for use with `rlp_encodable!` macro.
368/// </div>
369#[allow(clippy::manual_non_exhaustive)]
370pub enum Maybe<T: Encodable + Decodable> {
371    #[doc(hidden)]
372    Just(T),
373    #[doc(hidden)]
374    Nothing,
375}
376map_to_option!(Maybe);
377
378impl<T: Encodable + Decodable> Encodable for Maybe<T> {
379    fn encode(&self, out: &mut dyn BufMut) {
380        match self {
381            Self::Just(value) => value.encode(out),
382            Self::Nothing => (),
383        }
384    }
385}
386impl<T: Encodable + Decodable> Decodable for Maybe<T> {
387    fn decode(buf: &mut &[u8]) -> RLPResult<Self> {
388        if buf.remaining() == 0 {
389            Ok(Self::Nothing)
390        } else {
391            Ok(Self::Just(T::decode(buf)?))
392        }
393    }
394}
395
396#[inline]
397pub(crate) fn lstrip<S: AsRef<[u8]>>(bytes: S) -> Vec<u8> {
398    bytes
399        .as_ref()
400        .iter()
401        .skip_while(|&&x| x == 0)
402        .copied()
403        .collect()
404}
405
406#[inline]
407pub(crate) fn static_left_pad<const N: usize>(data: &[u8]) -> RLPResult<[u8; N]> {
408    if data.len() > N {
409        return Err(RLPError::Overflow);
410    }
411
412    let mut v = [0; N];
413
414    if data.is_empty() {
415        return Ok(v);
416    }
417
418    if data[0] == 0 {
419        return Err(RLPError::LeadingZero);
420    }
421
422    // SAFETY: length checked above
423    unsafe { v.get_unchecked_mut(N - data.len()..) }.copy_from_slice(data);
424    Ok(v)
425}
426
427#[cfg(test)]
428mod tests {
429    use super::*;
430
431    #[test]
432    fn test_left_pad() {
433        assert_eq!(
434            static_left_pad::<80>(&[1u8; 100]).unwrap_err(),
435            RLPError::Overflow
436        );
437        assert_eq!(static_left_pad::<10>(&[]).unwrap(), [0u8; 10]);
438        assert_eq!(
439            static_left_pad::<10>(&[0u8]).unwrap_err(),
440            RLPError::LeadingZero
441        );
442        assert_eq!(static_left_pad::<5>(&[1, 2, 3]).unwrap(), [0, 0, 1, 2, 3]);
443    }
444
445    #[test]
446    fn test_asbytes() {
447        rlp_encodable! {
448            #[derive(Eq, PartialEq, Debug)]
449            struct Test {
450                foo: Option<u8> => AsBytes<u8>,
451            }
452        }
453        // Struct header prefix
454        let header = fastrlp::EMPTY_LIST_CODE + 1;
455
456        let empty = Test { foo: None };
457        let mut buf = vec![];
458        empty.encode(&mut buf);
459        assert_eq!(buf, [header, fastrlp::EMPTY_STRING_CODE]);
460        assert_eq!(Test::decode(&mut &buf[..]).unwrap(), empty);
461
462        let full = Test { foo: Some(7) };
463        let mut buf = vec![];
464        full.encode(&mut buf);
465        assert_eq!(buf, [header, 7]);
466        assert_eq!(Test::decode(&mut &buf[..]).unwrap(), full);
467
468        let buf = [header, fastrlp::EMPTY_LIST_CODE];
469        assert_eq!(
470            Test::decode(&mut &buf[..]).unwrap_err(),
471            RLPError::UnexpectedList
472        );
473    }
474
475    #[test]
476    fn test_asvec() {
477        rlp_encodable! {
478            #[derive(Eq, PartialEq, Debug)]
479            struct Test {
480                foo: Option<u8> => AsVec<u8>,
481            }
482        }
483        // Struct header prefix
484        let header = fastrlp::EMPTY_LIST_CODE + 1;
485
486        let empty = Test { foo: None };
487        let mut buf = vec![];
488        empty.encode(&mut buf);
489        assert_eq!(buf, [header, fastrlp::EMPTY_LIST_CODE]);
490        assert_eq!(Test::decode(&mut &buf[..]).unwrap(), empty);
491
492        let full = Test { foo: Some(7) };
493        let mut buf = vec![];
494        full.encode(&mut buf);
495        assert_eq!(buf, [header, 7]);
496        assert_eq!(Test::decode(&mut &buf[..]).unwrap(), full);
497
498        let buf = [header, fastrlp::EMPTY_LIST_CODE + 1, 0x01];
499        assert_eq!(
500            Test::decode(&mut &buf[..]).unwrap_err(),
501            RLPError::UnexpectedList
502        );
503        let buf = [header, fastrlp::EMPTY_STRING_CODE + 1, 0x01];
504        assert_eq!(
505            Test::decode(&mut &buf[..]).unwrap_err(),
506            RLPError::NonCanonicalSingleByte
507        );
508
509        // Quirk: [EMPTY_STRING] parses into the same zero as [0x00]
510        let buf = [fastrlp::EMPTY_STRING_CODE];
511        assert_eq!(u8::decode(&mut &buf[..]).unwrap(), 0);
512        let buf = [header, fastrlp::EMPTY_STRING_CODE];
513        assert_eq!(Test::decode(&mut &buf[..]).unwrap(), Test { foo: Some(0) });
514    }
515
516    #[test]
517    fn test_vec_asvec() {
518        rlp_encodable! {
519            #[derive(Eq, PartialEq, Debug)]
520            struct Test {
521                foo: Option<Vec<u32>> => AsVec<Vec<u32>>,
522            }
523        }
524        // Struct header prefix
525        let header = fastrlp::EMPTY_LIST_CODE + 1;
526
527        let empty = Test { foo: None };
528        let mut buf = vec![];
529        empty.encode(&mut buf);
530        assert_eq!(buf, [header, fastrlp::EMPTY_LIST_CODE]);
531        assert_eq!(Test::decode(&mut &buf[..]).unwrap(), empty);
532
533        let blank = Test { foo: Some(vec![]) };
534        let mut buf = vec![];
535        blank.encode(&mut buf);
536        assert_eq!(buf, [header, fastrlp::EMPTY_LIST_CODE]);
537        // But it doesn't round-trip - we get None in return.
538        // Both None and empty vec map to the same encoded value.
539        assert_eq!(Test::decode(&mut &buf[..]).unwrap(), empty);
540
541        let full = Test {
542            foo: Some(vec![0x01, 0x02]),
543        };
544        let mut buf = vec![];
545        full.encode(&mut buf);
546        assert_eq!(buf, [header + 2, fastrlp::EMPTY_LIST_CODE + 2, 0x01, 0x02]);
547        assert_eq!(Test::decode(&mut &buf[..]).unwrap(), full);
548
549        let buf = [header, fastrlp::EMPTY_STRING_CODE + 1, 0x01];
550        assert_eq!(
551            Test::decode(&mut &buf[..]).unwrap_err(),
552            RLPError::NonCanonicalSingleByte
553        );
554    }
555
556    #[test]
557    fn test_maybe() {
558        rlp_encodable! {
559            #[derive(Eq, PartialEq, Debug)]
560            struct Test {
561                foo: Option<u8> => Maybe<u8>,
562            }
563        }
564
565        let empty = Test { foo: None };
566        let mut buf = vec![];
567        empty.encode(&mut buf);
568        assert_eq!(buf, [fastrlp::EMPTY_LIST_CODE]);
569        assert_eq!(Test::decode(&mut &buf[..]).unwrap(), empty);
570
571        let full = Test { foo: Some(7) };
572        let mut buf = vec![];
573        full.encode(&mut buf);
574        assert_eq!(buf, [fastrlp::EMPTY_LIST_CODE + 1, 7]);
575        assert_eq!(Test::decode(&mut &buf[..]).unwrap(), full);
576    }
577}