thor_devkit/rlp.rs
1//! This module enables RLP encoding of high-level objects.
2//!
3//! RLP (recursive length prefix) is a common algorithm for encoding
4//! of variable length binary data. RLP encodes data before storing on disk
5//! or transmitting via network.
6//!
7//! Theory
8//! ------
9//!
10//! Encoding
11//! ********
12//!
13//! Primary RLP can only deal with "item" type, which is defined as:
14//!
15//! - Byte string ([`Bytes`]) or
16//! - Sequence of items ([`Vec`], fixed array or slice).
17//!
18//! Some examples are:
19//!
20//! * ``b'\x00\xff'``
21//! * empty list ``vec![]``
22//! * list of bytes ``vec![vec![0u8], vec![1u8, 3u8]]``
23//! * list of combinations ``vec![vec![], vec![0u8], vec![vec![0]]]``
24//!
25//! The encoded result is always a byte string (sequence of [`u8`]).
26//!
27//! Encoding algorithm
28//! ******************
29//!
30//! Given `x` item as input, we define `rlp_encode` as the following algorithm:
31//!
32//! Let `concat` be a function that joins given bytes into single byte sequence.
33//! 1. If `x` is a single byte and `0x00 <= x <= 0x7F`, `rlp_encode(x) = x`.
34//! 1. Otherwise, if `x` is a byte string, let `len(x)` be length of `x` in bytes
35//! and define encoding as follows:
36//! * If `0 < len(x) < 0x38` (note that empty byte string fulfills this requirement), then
37//! ```txt
38//! rlp_encode(x) = concat(0x80 + len(x), x)
39//! ```
40//! In this case first byte is in range `[0x80; 0xB7]`.
41//! * If `0x38 <= len(x) <= 0xFFFFFFFF`, then
42//! ```txt
43//! rlp_encode(x) = concat(0xB7 + len(len(x)), len(x), x)
44//! ```
45//! In this case first byte is in range `[0xB8; 0xBF]`.
46//! * For longer strings encoding is undefined.
47//! 1. Otherwise, if `x` is a list, let `s = concat(map(rlp_encode, x))`
48//! be concatenation of RLP encodings of all its items.
49//! * If `0 < len(s) < 0x38` (note that empty list matches), then
50//! ```txt
51//! rlp_encode(x) = concat(0xC0 + len(s), s)
52//! ```
53//! In this case first byte is in range `[0xC0; 0xF7]`.
54//! * If `0x38 <= len(s) <= 0xFFFFFFFF`, then
55//! ```txt
56//! rlp_encode(x) = concat(0xF7 + len(len(s)), len(s), x)
57//! ```
58//! In this case first byte is in range `[0xF8; 0xFF]`.
59//! * For longer lists encoding is undefined.
60//!
61//! See more in [Ethereum wiki](https://eth.wiki/fundamentals/rlp).
62//!
63//! Encoding examples
64//! *****************
65//!
66//! | ``x`` | ``rlp_encode(x)`` |
67//! |-------------------|--------------------------------|
68//! | ``b''`` | ``0x80`` |
69//! | ``b'\x00'`` | ``0x00`` |
70//! | ``b'\x0F'`` | ``0x0F`` |
71//! | ``b'\x79'`` | ``0x79`` |
72//! | ``b'\x80'`` | ``0x81 0x80`` |
73//! | ``b'\xFF'`` | ``0x81 0xFF`` |
74//! | ``b'foo'`` | ``0x83 0x66 0x6F 0x6F`` |
75//! | ``[]`` | ``0xC0`` |
76//! | ``[b'\x0F']`` | ``0xC1 0x0F`` |
77//! | ``[b'\xEF']`` | ``0xC1 0x81 0xEF`` |
78//! | ``[[], [[]]]`` | ``0xC3 0xC0 0xC1 0xC0`` |
79//!
80//!
81//! Serialization
82//! *************
83//!
84//! However, in the real world, the inputs are not pure bytes nor lists.
85//! We need a way to encode numbers (like [`u64`]), custom structs, enums and other
86//! more complex machinery that exists in the surrounding code.
87//!
88//! This library wraps [`fastrlp`](https://docs.rs/fastrlp/0.4.0/fastrlp/)
89//! crate, so everything mentioned there about [`Encodable`] and [`Decodable`] traits still
90//! applies. You can implement those for any object to make it RLP-serializable.
91//!
92//! However, following this approach directly results in cluttered code: your `struct`s
93//! now have to use field types that match serialization, which may be very inconvenient.
94//!
95//! To avoid this pitfall, this RLP implementation allows "extended" struct definition
96//! via a macro. Let's have a look at `Transaction` definition:
97//!
98//! ```rust
99//! use thor_devkit::rlp::{AsBytes, AsVec, Maybe, Bytes};
100//! use thor_devkit::{rlp_encodable, U256};
101//! use thor_devkit::transactions::{Clause, Reserved};
102//!
103//! rlp_encodable! {
104//! /// Represents a single VeChain transaction.
105//! #[derive(Clone, Debug, Eq, PartialEq)]
106//! pub struct Transaction {
107//! /// Chain tag
108//! pub chain_tag: u8,
109//! pub block_ref: u64,
110//! pub expiration: u32,
111//! pub clauses: Vec<Clause>,
112//! pub gas_price_coef: u8,
113//! pub gas: u64,
114//! pub depends_on: Option<U256> => AsBytes<U256>,
115//! pub nonce: u64,
116//! pub reserved: Option<Reserved> => AsVec<Reserved>,
117//! pub signature: Option<Bytes> => Maybe<Bytes>,
118//! }
119//! }
120//! ```
121//!
122//! What's going on here? First, some fields are encoded "as usual": unsigned integers
123//! are encoded just fine and you likely won't need any different encoding. However,
124//! some fields work in a different way. `depends_on` is a number that may be present
125//! or absent, and it should be encoded as a byte sting. `U256` is already encoded this
126//! way, but `None` is not ([`Option`] is not RLP-serializable on itself). So we wrap it
127//! in a special wrapper: [`AsBytes`]. [`AsBytes<T>`] will serialize `Some(T)` as `T` and
128//! [`None`] as an empty byte string.
129//!
130//! `reserved` is a truly special struct that has custom encoding implemented for it.
131//! That implementation serializes `Reserved` into a [`Vec<Bytes>`], and then serializes
132//! this [`Vec<Bytes>`] to the output stream. If it is empty, an empty vector should be
133//! written instead. This is achieved via [`AsVec`] annotation.
134//!
135//! [`Maybe`] is a third special wrapper. Fields annotated with [`Maybe`] may only be placed
136//! last (otherwise encoding is ambiguous), and with [`Maybe<T>`] `Some(T)` is serialized
137//! as `T` and [`None`] --- as nothing (zero bytes added).
138//!
139//! Fields comments are omitted here for brevity, they are preserved as well.
140//!
141//! This macro adds both decoding and encoding capabilities. See examples folder
142//! for more examples of usage, including custom types and machinery.
143//!
144//! Note that this syntax is not restricted to these three wrappers, you can use
145//! any types with proper [`From`] implementation:
146//!
147//! ```rust
148//! use thor_devkit::rlp_encodable;
149//!
150//! #[derive(Clone)]
151//! struct MySeries {
152//! left: [u8; 2],
153//! right: [u8; 2],
154//! }
155//!
156//! impl From<MySeries> for u32 {
157//! fn from(value: MySeries) -> Self {
158//! Self::from_be_bytes(value.left.into_iter().chain(value.right).collect::<Vec<_>>().try_into().unwrap())
159//! }
160//! }
161//! impl From<u32> for MySeries {
162//! fn from(value: u32) -> Self {
163//! let [a, b, c, d] = value.to_be_bytes();
164//! Self{ left: [a, b], right: [c, d] }
165//! }
166//! }
167//!
168//! rlp_encodable! {
169//! pub struct Foo {
170//! pub foo: MySeries => u32,
171//! }
172//! }
173//! ```
174//!
175
176pub use bytes::{Buf, BufMut, Bytes, BytesMut};
177pub use fastrlp::{Decodable, DecodeError as RLPError, Encodable, Header};
178
179/// Convenience alias for a result of fallible RLP decoding.
180pub type RLPResult<T> = Result<T, RLPError>;
181
182#[doc(hidden)]
183#[macro_export]
184macro_rules! __encode_as {
185 ($out:expr, $field:expr) => {
186 $field.encode($out);
187 };
188 ($out:expr, $field:expr => $cast:ty) => {
189 // TODO: this clone bugs me, we should be able to do better
190 <$cast>::from($field.clone()).encode($out);
191 };
192
193 ($out:expr, $field:expr $(=> $cast:ty)?, $($fields:expr $(=> $casts:ty)?),+) => {
194 $crate::__encode_as! { $out, $field $(=> $cast)? }
195 $crate::__encode_as! { $out, $($fields $(=> $casts)?),+ }
196 };
197}
198
199#[doc(hidden)]
200#[macro_export]
201macro_rules! __decode_as {
202 ($buf:expr, $field:ty) => {
203 <$field>::decode($buf)?
204 };
205 ($buf:expr, $field:ty => $cast:ty) => {
206 <$field>::from(<$cast>::decode($buf)?)
207 };
208
209 ($buf:expr, $field:ty $(=> $cast:ty)?, $($fields:ty $(=> $casts:ty)?),+) => {
210 $crate::__decode_as! { $buf, $field $(=> $cast)? }
211 $crate::__decode_as! { $buf, $($fields $(=> $casts)?),+ }
212 };
213}
214
215/// Create an RLP-encodable struct by specifying types to cast to.
216#[macro_export]
217macro_rules! rlp_encodable {
218 (
219 $(#[$attr:meta])*
220 $vis:vis struct $name:ident {
221 $(
222 $(#[$field_attr:meta])*
223 $field_vis:vis $field_name:ident: $field_type:ty $(=> $cast:ty)?,
224 )*
225 }
226 ) => {
227 $(#[$attr])*
228 $vis struct $name {
229 $(
230 $(#[$field_attr])*
231 $field_vis $field_name: $field_type,
232 )*
233 }
234
235 impl $name {
236 fn encode_internal(&self, out: &mut dyn $crate::rlp::BufMut) {
237 use $crate::rlp::Encodable;
238 $crate::__encode_as!(out, $(self.$field_name $(=> $cast)?),+);
239 }
240 }
241
242 impl $crate::rlp::Encodable for $name {
243 fn encode(&self, out: &mut dyn $crate::rlp::BufMut) {
244 let mut buf = $crate::rlp::BytesMut::new();
245 self.encode_internal(&mut buf);
246 $crate::rlp::Header {
247 list: true,
248 payload_length: buf.len()
249 }.encode(out);
250 out.put_slice(&buf)
251 }
252 }
253
254 impl $crate::rlp::Decodable for $name {
255 fn decode(buf: &mut &[u8]) -> $crate::rlp::RLPResult<Self> {
256 #[allow(unused_imports)]
257 use $crate::rlp::Decodable;
258 $crate::rlp::Header::decode(buf)?;
259 Ok(Self {
260 $($field_name: $crate::__decode_as!(buf, $field_type $(=> $cast)? )),*
261 })
262 }
263 }
264 }
265}
266
267macro_rules! map_to_option {
268 ($name:ident) => {
269 impl<T: Encodable + Decodable, S: Into<T>> From<Option<S>> for $name<T> {
270 fn from(value: Option<S>) -> Self {
271 match value {
272 Some(v) => Self::Just(v.into()),
273 None => Self::Nothing,
274 }
275 }
276 }
277 impl<T: Encodable + Decodable> From<$name<T>> for Option<T> {
278 fn from(value: $name<T>) -> Self {
279 match value {
280 $name::Just(v) => Self::Some(v),
281 $name::Nothing => Self::None,
282 }
283 }
284 }
285 };
286}
287
288/// Serialization wrapper for `Option` to serialize `None` as empty `Bytes`.
289///
290/// <div class="warning">
291/// Do not use it directly: it is only intended for use with `rlp_encodable!` macro.
292/// </div>
293#[allow(clippy::manual_non_exhaustive)]
294pub enum AsBytes<T: Encodable + Decodable> {
295 #[doc(hidden)]
296 Just(T),
297 #[doc(hidden)]
298 Nothing,
299}
300map_to_option!(AsBytes);
301
302impl<T: Encodable + Decodable> Encodable for AsBytes<T> {
303 fn encode(&self, out: &mut dyn BufMut) {
304 match self {
305 Self::Just(value) => value.encode(out),
306 Self::Nothing => Bytes::new().encode(out),
307 }
308 }
309}
310impl<T: Encodable + Decodable> Decodable for AsBytes<T> {
311 fn decode(buf: &mut &[u8]) -> RLPResult<Self> {
312 if buf[0] == fastrlp::EMPTY_STRING_CODE {
313 Bytes::decode(buf)?;
314 Ok(Self::Nothing)
315 } else {
316 Ok(Self::Just(T::decode(buf)?))
317 }
318 }
319}
320
321/// Serialization wrapper for `Option` to serialize `None` as empty `Vec`.
322///
323/// Note that it will not be able to distinguish `None` and `Some(vec![])`
324/// as they map to the same encoded value.
325///
326/// <div class="warning">
327/// Do not use it directly: it is only intended for use with `rlp_encodable!` macro.
328/// </div>
329#[allow(clippy::manual_non_exhaustive)]
330pub enum AsVec<T: Encodable + Decodable> {
331 #[doc(hidden)]
332 Just(T),
333 #[doc(hidden)]
334 Nothing,
335}
336map_to_option!(AsVec);
337
338impl<T: Encodable + Decodable> Encodable for AsVec<T> {
339 fn encode(&self, out: &mut dyn BufMut) {
340 match self {
341 Self::Just(value) => value.encode(out),
342 Self::Nothing => fastrlp::Header {
343 list: true,
344 payload_length: 0,
345 }
346 .encode(out),
347 }
348 }
349}
350impl<T: Encodable + Decodable> Decodable for AsVec<T> {
351 fn decode(buf: &mut &[u8]) -> RLPResult<Self> {
352 if buf[0] == fastrlp::EMPTY_LIST_CODE {
353 let header = fastrlp::Header::decode(buf)?;
354 debug_assert!(header.list);
355 debug_assert!(header.payload_length == 0);
356 Ok(Self::Nothing)
357 } else {
358 Ok(Self::Just(T::decode(buf)?))
359 }
360 }
361}
362
363/// Serialization wrapper for `Option` to serialize `None` as nothing (do not modify
364/// output stream). This must be the last field in the struct.
365///
366/// <div class="warning">
367/// Do not use it directly: it is only intended for use with `rlp_encodable!` macro.
368/// </div>
369#[allow(clippy::manual_non_exhaustive)]
370pub enum Maybe<T: Encodable + Decodable> {
371 #[doc(hidden)]
372 Just(T),
373 #[doc(hidden)]
374 Nothing,
375}
376map_to_option!(Maybe);
377
378impl<T: Encodable + Decodable> Encodable for Maybe<T> {
379 fn encode(&self, out: &mut dyn BufMut) {
380 match self {
381 Self::Just(value) => value.encode(out),
382 Self::Nothing => (),
383 }
384 }
385}
386impl<T: Encodable + Decodable> Decodable for Maybe<T> {
387 fn decode(buf: &mut &[u8]) -> RLPResult<Self> {
388 if buf.remaining() == 0 {
389 Ok(Self::Nothing)
390 } else {
391 Ok(Self::Just(T::decode(buf)?))
392 }
393 }
394}
395
396#[inline]
397pub(crate) fn lstrip<S: AsRef<[u8]>>(bytes: S) -> Vec<u8> {
398 bytes
399 .as_ref()
400 .iter()
401 .skip_while(|&&x| x == 0)
402 .copied()
403 .collect()
404}
405
406#[inline]
407pub(crate) fn static_left_pad<const N: usize>(data: &[u8]) -> RLPResult<[u8; N]> {
408 if data.len() > N {
409 return Err(RLPError::Overflow);
410 }
411
412 let mut v = [0; N];
413
414 if data.is_empty() {
415 return Ok(v);
416 }
417
418 if data[0] == 0 {
419 return Err(RLPError::LeadingZero);
420 }
421
422 // SAFETY: length checked above
423 unsafe { v.get_unchecked_mut(N - data.len()..) }.copy_from_slice(data);
424 Ok(v)
425}
426
427#[cfg(test)]
428mod tests {
429 use super::*;
430
431 #[test]
432 fn test_left_pad() {
433 assert_eq!(
434 static_left_pad::<80>(&[1u8; 100]).unwrap_err(),
435 RLPError::Overflow
436 );
437 assert_eq!(static_left_pad::<10>(&[]).unwrap(), [0u8; 10]);
438 assert_eq!(
439 static_left_pad::<10>(&[0u8]).unwrap_err(),
440 RLPError::LeadingZero
441 );
442 assert_eq!(static_left_pad::<5>(&[1, 2, 3]).unwrap(), [0, 0, 1, 2, 3]);
443 }
444
445 #[test]
446 fn test_asbytes() {
447 rlp_encodable! {
448 #[derive(Eq, PartialEq, Debug)]
449 struct Test {
450 foo: Option<u8> => AsBytes<u8>,
451 }
452 }
453 // Struct header prefix
454 let header = fastrlp::EMPTY_LIST_CODE + 1;
455
456 let empty = Test { foo: None };
457 let mut buf = vec![];
458 empty.encode(&mut buf);
459 assert_eq!(buf, [header, fastrlp::EMPTY_STRING_CODE]);
460 assert_eq!(Test::decode(&mut &buf[..]).unwrap(), empty);
461
462 let full = Test { foo: Some(7) };
463 let mut buf = vec![];
464 full.encode(&mut buf);
465 assert_eq!(buf, [header, 7]);
466 assert_eq!(Test::decode(&mut &buf[..]).unwrap(), full);
467
468 let buf = [header, fastrlp::EMPTY_LIST_CODE];
469 assert_eq!(
470 Test::decode(&mut &buf[..]).unwrap_err(),
471 RLPError::UnexpectedList
472 );
473 }
474
475 #[test]
476 fn test_asvec() {
477 rlp_encodable! {
478 #[derive(Eq, PartialEq, Debug)]
479 struct Test {
480 foo: Option<u8> => AsVec<u8>,
481 }
482 }
483 // Struct header prefix
484 let header = fastrlp::EMPTY_LIST_CODE + 1;
485
486 let empty = Test { foo: None };
487 let mut buf = vec![];
488 empty.encode(&mut buf);
489 assert_eq!(buf, [header, fastrlp::EMPTY_LIST_CODE]);
490 assert_eq!(Test::decode(&mut &buf[..]).unwrap(), empty);
491
492 let full = Test { foo: Some(7) };
493 let mut buf = vec![];
494 full.encode(&mut buf);
495 assert_eq!(buf, [header, 7]);
496 assert_eq!(Test::decode(&mut &buf[..]).unwrap(), full);
497
498 let buf = [header, fastrlp::EMPTY_LIST_CODE + 1, 0x01];
499 assert_eq!(
500 Test::decode(&mut &buf[..]).unwrap_err(),
501 RLPError::UnexpectedList
502 );
503 let buf = [header, fastrlp::EMPTY_STRING_CODE + 1, 0x01];
504 assert_eq!(
505 Test::decode(&mut &buf[..]).unwrap_err(),
506 RLPError::NonCanonicalSingleByte
507 );
508
509 // Quirk: [EMPTY_STRING] parses into the same zero as [0x00]
510 let buf = [fastrlp::EMPTY_STRING_CODE];
511 assert_eq!(u8::decode(&mut &buf[..]).unwrap(), 0);
512 let buf = [header, fastrlp::EMPTY_STRING_CODE];
513 assert_eq!(Test::decode(&mut &buf[..]).unwrap(), Test { foo: Some(0) });
514 }
515
516 #[test]
517 fn test_vec_asvec() {
518 rlp_encodable! {
519 #[derive(Eq, PartialEq, Debug)]
520 struct Test {
521 foo: Option<Vec<u32>> => AsVec<Vec<u32>>,
522 }
523 }
524 // Struct header prefix
525 let header = fastrlp::EMPTY_LIST_CODE + 1;
526
527 let empty = Test { foo: None };
528 let mut buf = vec![];
529 empty.encode(&mut buf);
530 assert_eq!(buf, [header, fastrlp::EMPTY_LIST_CODE]);
531 assert_eq!(Test::decode(&mut &buf[..]).unwrap(), empty);
532
533 let blank = Test { foo: Some(vec![]) };
534 let mut buf = vec![];
535 blank.encode(&mut buf);
536 assert_eq!(buf, [header, fastrlp::EMPTY_LIST_CODE]);
537 // But it doesn't round-trip - we get None in return.
538 // Both None and empty vec map to the same encoded value.
539 assert_eq!(Test::decode(&mut &buf[..]).unwrap(), empty);
540
541 let full = Test {
542 foo: Some(vec![0x01, 0x02]),
543 };
544 let mut buf = vec![];
545 full.encode(&mut buf);
546 assert_eq!(buf, [header + 2, fastrlp::EMPTY_LIST_CODE + 2, 0x01, 0x02]);
547 assert_eq!(Test::decode(&mut &buf[..]).unwrap(), full);
548
549 let buf = [header, fastrlp::EMPTY_STRING_CODE + 1, 0x01];
550 assert_eq!(
551 Test::decode(&mut &buf[..]).unwrap_err(),
552 RLPError::NonCanonicalSingleByte
553 );
554 }
555
556 #[test]
557 fn test_maybe() {
558 rlp_encodable! {
559 #[derive(Eq, PartialEq, Debug)]
560 struct Test {
561 foo: Option<u8> => Maybe<u8>,
562 }
563 }
564
565 let empty = Test { foo: None };
566 let mut buf = vec![];
567 empty.encode(&mut buf);
568 assert_eq!(buf, [fastrlp::EMPTY_LIST_CODE]);
569 assert_eq!(Test::decode(&mut &buf[..]).unwrap(), empty);
570
571 let full = Test { foo: Some(7) };
572 let mut buf = vec![];
573 full.encode(&mut buf);
574 assert_eq!(buf, [fastrlp::EMPTY_LIST_CODE + 1, 7]);
575 assert_eq!(Test::decode(&mut &buf[..]).unwrap(), full);
576 }
577}