Skip to main content

ethrex_rlp/
decode.rs

1use super::{
2    constants::{RLP_EMPTY_LIST, RLP_NULL},
3    error::RLPDecodeError,
4};
5use alloc::string::String;
6use alloc::vec::Vec;
7use bytes::{Bytes, BytesMut};
8use core::net::{IpAddr, Ipv4Addr, Ipv6Addr};
9use ethereum_types::{
10    Address, Bloom, H32, H64, H128, H160, H256, H264, H512, H520, Signature, U256,
11};
12
13/// Max payload size accepted when decoding.
14/// While technically any size is RLP spec-compliant, there are no well-formed messages
15/// in our protocols that could carry such big payloads, so they are either bugs or malicious.
16const MAX_RLP_BYTES: usize = 1024 * 1024 * 1024;
17/// Strings and lists of fewer than this many bytes must use the short form
18/// (`0x80..=0xB7` / `0xC0..=0xF7`); using the long form for a shorter payload is
19/// non-canonical RLP and is rejected.
20const RLP_SHORT_ITEM_LIMIT: usize = 56;
21
22/// Trait for decoding RLP encoded slices of data.
23/// See <https://ethereum.org/en/developers/docs/data-structures-and-encoding/rlp/#rlp-decoding> for more information.
24/// The [`decode_unfinished`](RLPDecode::decode_unfinished) method is used to decode an RLP encoded slice of data and return the decoded value along with the remaining bytes.
25/// The [`decode`](RLPDecode::decode) method is used to decode an RLP encoded slice of data and return the decoded value.
26/// Implementors need to implement the [`decode_unfinished`](RLPDecode::decode_unfinished) method.
27/// While consumers can use the [`decode`](RLPDecode::decode) method to decode the RLP encoded data.
28pub trait RLPDecode: Sized {
29    fn decode_unfinished(rlp: &[u8]) -> Result<(Self, &[u8]), RLPDecodeError>;
30
31    fn decode(rlp: &[u8]) -> Result<Self, RLPDecodeError> {
32        let (decoded, remaining) = Self::decode_unfinished(rlp)?;
33        if !remaining.is_empty() {
34            return Err(RLPDecodeError::InvalidLength);
35        }
36
37        Ok(decoded)
38    }
39}
40
41impl RLPDecode for bool {
42    #[inline(always)]
43    fn decode_unfinished(buf: &[u8]) -> Result<(Self, &[u8]), RLPDecodeError> {
44        if buf.is_empty() {
45            return Err(RLPDecodeError::InvalidLength);
46        }
47        let value = match buf[0] {
48            RLP_NULL => false,
49            0x01 => true,
50            _ => return Err(RLPDecodeError::MalformedBoolean),
51        };
52
53        Ok((value, &buf[1..]))
54    }
55}
56
57impl RLPDecode for u8 {
58    fn decode_unfinished(rlp: &[u8]) -> Result<(Self, &[u8]), RLPDecodeError> {
59        let first_byte = rlp.first().ok_or(RLPDecodeError::InvalidLength)?;
60        match first_byte {
61            // Single byte in the range [0x00, 0x7f]
62            0..=0x7f => {
63                let rest = rlp.get(1..).ok_or(RLPDecodeError::MalformedData)?;
64                Ok((*first_byte, rest))
65            }
66
67            // RLP_NULL represents zero
68            &RLP_NULL => {
69                let rest = rlp.get(1..).ok_or(RLPDecodeError::MalformedData)?;
70                Ok((0, rest))
71            }
72
73            // Two bytes, where the first byte is RLP_NULL + 1
74            x if rlp.len() >= 2 && *x == RLP_NULL + 1 => {
75                let rest = rlp.get(2..).ok_or(RLPDecodeError::MalformedData)?;
76                Ok((rlp[1], rest))
77            }
78
79            // Any other case is invalid for u8
80            _ => Err(RLPDecodeError::MalformedData),
81        }
82    }
83}
84
85impl RLPDecode for u16 {
86    fn decode_unfinished(rlp: &[u8]) -> Result<(Self, &[u8]), RLPDecodeError> {
87        let (bytes, rest) = decode_bytes(rlp)?;
88        let padded_bytes = static_left_pad(bytes)?;
89        Ok((u16::from_be_bytes(padded_bytes), rest))
90    }
91}
92
93impl RLPDecode for u32 {
94    fn decode_unfinished(rlp: &[u8]) -> Result<(Self, &[u8]), RLPDecodeError> {
95        let (bytes, rest) = decode_bytes(rlp)?;
96        let padded_bytes = static_left_pad(bytes)?;
97        Ok((u32::from_be_bytes(padded_bytes), rest))
98    }
99}
100
101impl RLPDecode for u64 {
102    fn decode_unfinished(rlp: &[u8]) -> Result<(Self, &[u8]), RLPDecodeError> {
103        let (bytes, rest) = decode_bytes(rlp)?;
104        let padded_bytes = static_left_pad(bytes)?;
105        Ok((u64::from_be_bytes(padded_bytes), rest))
106    }
107}
108
109impl RLPDecode for usize {
110    fn decode_unfinished(rlp: &[u8]) -> Result<(Self, &[u8]), RLPDecodeError> {
111        let (bytes, rest) = decode_bytes(rlp)?;
112        let padded_bytes = static_left_pad(bytes)?;
113        Ok((usize::from_be_bytes(padded_bytes), rest))
114    }
115}
116
117impl RLPDecode for u128 {
118    fn decode_unfinished(rlp: &[u8]) -> Result<(Self, &[u8]), RLPDecodeError> {
119        let (bytes, rest) = decode_bytes(rlp)?;
120        let padded_bytes = static_left_pad(bytes)?;
121        Ok((u128::from_be_bytes(padded_bytes), rest))
122    }
123}
124
125// Decodes a slice of bytes of a fixed size. If you want to decode a list of elements,
126// you should use the Vec<T> implementation (for elements of the same type),
127// or use the decode implementation for tuples (for elements of different types)
128impl<const N: usize> RLPDecode for [u8; N] {
129    fn decode_unfinished(rlp: &[u8]) -> Result<(Self, &[u8]), RLPDecodeError> {
130        let (decoded_bytes, rest) = decode_bytes(rlp)?;
131        let value = decoded_bytes
132            .try_into()
133            .map_err(|_| RLPDecodeError::InvalidLength);
134
135        Ok((value?, rest))
136    }
137}
138
139impl RLPDecode for Bytes {
140    fn decode_unfinished(rlp: &[u8]) -> Result<(Self, &[u8]), RLPDecodeError> {
141        let (decoded, rest) = decode_bytes(rlp)?;
142        Ok((Bytes::copy_from_slice(decoded), rest))
143    }
144}
145
146impl RLPDecode for BytesMut {
147    fn decode_unfinished(rlp: &[u8]) -> Result<(Self, &[u8]), RLPDecodeError> {
148        let (decoded, rest) = decode_bytes(rlp)?;
149        Ok((BytesMut::from(decoded), rest))
150    }
151}
152
153impl RLPDecode for H32 {
154    fn decode_unfinished(rlp: &[u8]) -> Result<(Self, &[u8]), RLPDecodeError> {
155        let (value, rest) = RLPDecode::decode_unfinished(rlp)?;
156        Ok((H32(value), rest))
157    }
158}
159
160impl RLPDecode for H64 {
161    fn decode_unfinished(rlp: &[u8]) -> Result<(Self, &[u8]), RLPDecodeError> {
162        let (value, rest) = RLPDecode::decode_unfinished(rlp)?;
163        Ok((H64(value), rest))
164    }
165}
166
167impl RLPDecode for H128 {
168    fn decode_unfinished(rlp: &[u8]) -> Result<(Self, &[u8]), RLPDecodeError> {
169        let (value, rest) = RLPDecode::decode_unfinished(rlp)?;
170        Ok((H128(value), rest))
171    }
172}
173
174impl RLPDecode for H256 {
175    fn decode_unfinished(rlp: &[u8]) -> Result<(Self, &[u8]), RLPDecodeError> {
176        let (value, rest) = RLPDecode::decode_unfinished(rlp)?;
177        Ok((H256(value), rest))
178    }
179}
180
181impl RLPDecode for H264 {
182    fn decode_unfinished(rlp: &[u8]) -> Result<(Self, &[u8]), RLPDecodeError> {
183        let (value, rest) = RLPDecode::decode_unfinished(rlp)?;
184        Ok((H264(value), rest))
185    }
186}
187
188impl RLPDecode for Address {
189    fn decode_unfinished(rlp: &[u8]) -> Result<(Self, &[u8]), RLPDecodeError> {
190        let (value, rest) = RLPDecode::decode_unfinished(rlp)?;
191        Ok((H160(value), rest))
192    }
193}
194
195impl RLPDecode for H512 {
196    fn decode_unfinished(rlp: &[u8]) -> Result<(Self, &[u8]), RLPDecodeError> {
197        let (value, rest) = RLPDecode::decode_unfinished(rlp)?;
198        Ok((H512(value), rest))
199    }
200}
201
202impl RLPDecode for Signature {
203    fn decode_unfinished(rlp: &[u8]) -> Result<(Self, &[u8]), RLPDecodeError> {
204        let (value, rest) = RLPDecode::decode_unfinished(rlp)?;
205        Ok((H520(value), rest))
206    }
207}
208
209impl RLPDecode for U256 {
210    fn decode_unfinished(rlp: &[u8]) -> Result<(Self, &[u8]), RLPDecodeError> {
211        let (bytes, rest) = decode_bytes(rlp)?;
212        let padded_bytes: [u8; 32] = static_left_pad(bytes)?;
213        Ok((U256::from_big_endian(&padded_bytes), rest))
214    }
215}
216
217impl RLPDecode for Bloom {
218    fn decode_unfinished(rlp: &[u8]) -> Result<(Self, &[u8]), RLPDecodeError> {
219        let (value, rest) = RLPDecode::decode_unfinished(rlp)?;
220        Ok((Bloom(value), rest))
221    }
222}
223
224impl RLPDecode for String {
225    fn decode_unfinished(rlp: &[u8]) -> Result<(Self, &[u8]), RLPDecodeError> {
226        let (str_bytes, rest) = decode_bytes(rlp)?;
227        let value =
228            String::from_utf8(str_bytes.to_vec()).map_err(|_| RLPDecodeError::MalformedData)?;
229        Ok((value, rest))
230    }
231}
232
233impl RLPDecode for Ipv4Addr {
234    fn decode_unfinished(rlp: &[u8]) -> Result<(Self, &[u8]), RLPDecodeError> {
235        let (ip_bytes, rest) = decode_bytes(rlp)?;
236        let octets: [u8; 4] = ip_bytes
237            .try_into()
238            .map_err(|_| RLPDecodeError::InvalidLength)?;
239        Ok((Ipv4Addr::from(octets), rest))
240    }
241}
242
243impl RLPDecode for Ipv6Addr {
244    fn decode_unfinished(rlp: &[u8]) -> Result<(Self, &[u8]), RLPDecodeError> {
245        let (ip_bytes, rest) = decode_bytes(rlp)?;
246        let octets: [u8; 16] = ip_bytes
247            .try_into()
248            .map_err(|_| RLPDecodeError::InvalidLength)?;
249        Ok((Ipv6Addr::from(octets), rest))
250    }
251}
252
253impl RLPDecode for IpAddr {
254    fn decode_unfinished(rlp: &[u8]) -> Result<(Self, &[u8]), RLPDecodeError> {
255        let (ip_bytes, rest) = decode_bytes(rlp)?;
256
257        match ip_bytes.len() {
258            4 => {
259                let octets: [u8; 4] = ip_bytes
260                    .try_into()
261                    .map_err(|_| RLPDecodeError::InvalidLength)?;
262                Ok((IpAddr::V4(Ipv4Addr::from(octets)), rest))
263            }
264            16 => {
265                let octets: [u8; 16] = ip_bytes
266                    .try_into()
267                    .map_err(|_| RLPDecodeError::InvalidLength)?;
268                // Using to_canonical just in case it's an Ipv6-encoded Ipv4 address
269                Ok((IpAddr::V6(Ipv6Addr::from(octets)).to_canonical(), rest))
270            }
271            _ => Err(RLPDecodeError::InvalidLength),
272        }
273    }
274}
275
276// Here we interpret a Vec<T> as a list of elements of the same type.
277// If you need to decode a slice of bytes, you should decode it via the
278// [u8; N] implementation or similar (Bytes, BytesMut, etc).
279impl<T: RLPDecode> RLPDecode for Vec<T> {
280    fn decode_unfinished(rlp: &[u8]) -> Result<(Self, &[u8]), RLPDecodeError> {
281        if rlp.is_empty() {
282            return Err(RLPDecodeError::InvalidLength);
283        }
284
285        if rlp[0] == RLP_EMPTY_LIST {
286            return Ok((Vec::new(), &rlp[1..]));
287        }
288
289        let (is_list, payload, input_rest) = decode_rlp_item(rlp)?;
290        if !is_list {
291            return Err(RLPDecodeError::MalformedData);
292        }
293
294        let mut result = Vec::new();
295        let mut current_slice = payload;
296
297        while !current_slice.is_empty() {
298            let (item, rest_current_list) = T::decode_unfinished(current_slice)?;
299            result.push(item);
300            current_slice = rest_current_list;
301        }
302
303        Ok((result, input_rest))
304    }
305}
306
307impl<T1: RLPDecode, T2: RLPDecode> RLPDecode for (T1, T2) {
308    fn decode_unfinished(rlp: &[u8]) -> Result<(Self, &[u8]), RLPDecodeError> {
309        if rlp.is_empty() {
310            return Err(RLPDecodeError::InvalidLength);
311        }
312
313        let (is_list, payload, input_rest) = decode_rlp_item(rlp)?;
314        if !is_list {
315            return Err(RLPDecodeError::MalformedData);
316        }
317
318        let (first, first_rest) = T1::decode_unfinished(payload)?;
319        let (second, second_rest) = T2::decode_unfinished(first_rest)?;
320
321        // check that there is no more data to parse after the second element.
322        if !second_rest.is_empty() {
323            return Err(RLPDecodeError::MalformedData);
324        }
325
326        Ok(((first, second), input_rest))
327    }
328}
329
330impl<T1: RLPDecode, T2: RLPDecode, T3: RLPDecode> RLPDecode for (T1, T2, T3) {
331    fn decode_unfinished(rlp: &[u8]) -> Result<(Self, &[u8]), RLPDecodeError> {
332        if rlp.is_empty() {
333            return Err(RLPDecodeError::InvalidLength);
334        }
335        let (is_list, payload, input_rest) = decode_rlp_item(rlp)?;
336        if !is_list {
337            return Err(RLPDecodeError::MalformedData);
338        }
339        let (first, first_rest) = T1::decode_unfinished(payload)?;
340        let (second, second_rest) = T2::decode_unfinished(first_rest)?;
341        let (third, third_rest) = T3::decode_unfinished(second_rest)?;
342        // check that there is no more data to decode after the third element.
343        if !third_rest.is_empty() {
344            return Err(RLPDecodeError::MalformedData);
345        }
346
347        Ok(((first, second, third), input_rest))
348    }
349}
350
351// This implementation is useful when the message is a list with elements of mixed types
352// for example, the P2P message 'GetBlockHeaders', mixes hashes and numbers.
353impl<T1: RLPDecode, T2: RLPDecode, T3: RLPDecode, T4: RLPDecode> RLPDecode for (T1, T2, T3, T4) {
354    fn decode_unfinished(rlp: &[u8]) -> Result<(Self, &[u8]), RLPDecodeError> {
355        if rlp.is_empty() {
356            return Err(RLPDecodeError::InvalidLength);
357        }
358        let (is_list, payload, input_rest) = decode_rlp_item(rlp)?;
359        if !is_list {
360            return Err(RLPDecodeError::MalformedData);
361        }
362        let (first, first_rest) = T1::decode_unfinished(payload)?;
363        let (second, second_rest) = T2::decode_unfinished(first_rest)?;
364        let (third, third_rest) = T3::decode_unfinished(second_rest)?;
365        let (fourth, fourth_rest) = T4::decode_unfinished(third_rest)?;
366        // check that there is no more data to decode after the fourth element.
367        if !fourth_rest.is_empty() {
368            return Err(RLPDecodeError::MalformedData);
369        }
370
371        Ok(((first, second, third, fourth), input_rest))
372    }
373}
374
375/// Decodes an RLP item from a slice of bytes.
376/// It returns a 3-element tuple with the following elements:
377/// - A boolean indicating if the item is a list or not.
378/// - The payload of the item, without its prefix.
379/// - The remaining bytes after the item.
380pub fn decode_rlp_item(data: &[u8]) -> Result<(bool, &[u8], &[u8]), RLPDecodeError> {
381    if data.is_empty() {
382        return Err(RLPDecodeError::InvalidLength);
383    }
384
385    let first_byte = data[0];
386
387    match first_byte {
388        0..=0x7F => Ok((false, &data[..1], &data[1..])),
389        0x80..=0xB7 => {
390            let length = (first_byte - 0x80) as usize;
391            if length > MAX_RLP_BYTES || data.len() < length + 1 {
392                return Err(RLPDecodeError::InvalidLength);
393            }
394            let payload = &data[1..length + 1];
395            // Canonical RLP: a single byte in 0x00..=0x7f is its own encoding, so it must
396            // never be wrapped in a 1-byte string (e.g. `0x81 0x01` instead of `0x01`).
397            if length == 1 && payload[0] < 0x80 {
398                return Err(RLPDecodeError::MalformedData);
399            }
400            Ok((false, payload, &data[length + 1..]))
401        }
402        0xB8..=0xBF => {
403            let length_of_length = (first_byte - 0xB7) as usize;
404            if data.len() < length_of_length + 1 {
405                return Err(RLPDecodeError::InvalidLength);
406            }
407            let length_bytes = &data[1..length_of_length + 1];
408            // `static_left_pad` rejects leading-zero length bytes (non-minimal length).
409            let length = usize::from_be_bytes(static_left_pad(length_bytes)?);
410            // Canonical RLP: lengths < 56 must use the short-string form (0x80..=0xB7).
411            if length < RLP_SHORT_ITEM_LIMIT {
412                return Err(RLPDecodeError::MalformedData);
413            }
414            if length > MAX_RLP_BYTES || data.len() < length_of_length + length + 1 {
415                return Err(RLPDecodeError::InvalidLength);
416            }
417            Ok((
418                false,
419                &data[length_of_length + 1..length_of_length + length + 1],
420                &data[length_of_length + length + 1..],
421            ))
422        }
423        RLP_EMPTY_LIST..=0xF7 => {
424            let length = (first_byte - RLP_EMPTY_LIST) as usize;
425            if length > MAX_RLP_BYTES || data.len() < length + 1 {
426                return Err(RLPDecodeError::InvalidLength);
427            }
428            Ok((true, &data[1..length + 1], &data[length + 1..]))
429        }
430        0xF8..=0xFF => {
431            let list_length = (first_byte - 0xF7) as usize;
432            if data.len() < list_length + 1 {
433                return Err(RLPDecodeError::InvalidLength);
434            }
435            let length_bytes = &data[1..list_length + 1];
436            // `static_left_pad` rejects leading-zero length bytes (non-minimal length).
437            let payload_length = usize::from_be_bytes(static_left_pad(length_bytes)?);
438            // Canonical RLP: payloads < 56 must use the short-list form (0xC0..=0xF7).
439            if payload_length < RLP_SHORT_ITEM_LIMIT {
440                return Err(RLPDecodeError::MalformedData);
441            }
442            if payload_length > MAX_RLP_BYTES || data.len() < list_length + payload_length + 1 {
443                return Err(RLPDecodeError::InvalidLength);
444            }
445            Ok((
446                true,
447                &data[list_length + 1..list_length + payload_length + 1],
448                &data[list_length + payload_length + 1..],
449            ))
450        }
451    }
452}
453
454/// Splits an RLP item in two:
455/// - The first item including its prefix
456/// - The remaining bytes after the item
457///
458/// It returns a 2-element tuple with the following elements:
459/// - The payload of the item, including its prefix.
460/// - The remaining bytes after the item.
461pub fn get_item_with_prefix(data: &[u8]) -> Result<(&[u8], &[u8]), RLPDecodeError> {
462    if data.is_empty() {
463        return Err(RLPDecodeError::InvalidLength);
464    }
465
466    let first_byte = data[0];
467
468    match first_byte {
469        0..=0x7F => Ok((&data[..1], &data[1..])),
470        0x80..=0xB7 => {
471            let length = (first_byte - 0x80) as usize;
472            if length > MAX_RLP_BYTES || data.len() < length + 1 {
473                return Err(RLPDecodeError::InvalidLength);
474            }
475            Ok((&data[..length + 1], &data[length + 1..]))
476        }
477        0xB8..=0xBF => {
478            let length_of_length = (first_byte - 0xB7) as usize;
479            if data.len() < length_of_length + 1 {
480                return Err(RLPDecodeError::InvalidLength);
481            }
482            let length_bytes = &data[1..length_of_length + 1];
483            let length = usize::from_be_bytes(static_left_pad(length_bytes)?);
484            if length > MAX_RLP_BYTES || data.len() < length_of_length + length + 1 {
485                return Err(RLPDecodeError::InvalidLength);
486            }
487            Ok((
488                &data[..length_of_length + length + 1],
489                &data[length_of_length + length + 1..],
490            ))
491        }
492        RLP_EMPTY_LIST..=0xF7 => {
493            let length = (first_byte - RLP_EMPTY_LIST) as usize;
494            if length > MAX_RLP_BYTES || data.len() < length + 1 {
495                return Err(RLPDecodeError::InvalidLength);
496            }
497            Ok((&data[..length + 1], &data[length + 1..]))
498        }
499        0xF8..=0xFF => {
500            let list_length = (first_byte - 0xF7) as usize;
501            if data.len() < list_length + 1 {
502                return Err(RLPDecodeError::InvalidLength);
503            }
504            let length_bytes = &data[1..list_length + 1];
505            let payload_length = usize::from_be_bytes(static_left_pad(length_bytes)?);
506            if payload_length > MAX_RLP_BYTES || data.len() < list_length + payload_length + 1 {
507                return Err(RLPDecodeError::InvalidLength);
508            }
509            Ok((
510                &data[..list_length + payload_length + 1],
511                &data[list_length + payload_length + 1..],
512            ))
513        }
514    }
515}
516
517pub fn is_encoded_as_bytes(rlp: &[u8]) -> Result<bool, RLPDecodeError> {
518    let prefix = rlp.first().ok_or(RLPDecodeError::MalformedData)?;
519    Ok((0xb8..=0xbf).contains(prefix))
520}
521
522/// Receives an RLP bytes item (prefix between 0xb8 and 0xbf) and returns its payload
523pub fn get_rlp_bytes_item_payload(rlp: &[u8]) -> Result<&[u8], RLPDecodeError> {
524    let prefix = rlp.first().ok_or(RLPDecodeError::InvalidLength)?;
525    let offset: usize = (prefix - 0xb8 + 1).into();
526    rlp.get(offset + 1..).ok_or(RLPDecodeError::InvalidLength)
527}
528
529/// Decodes the payload of an RLP item from a slice of bytes.
530/// It returns a 2-element tuple with the following elements:
531/// - The payload of the item.
532/// - The remaining bytes after the item.
533pub fn decode_bytes(data: &[u8]) -> Result<(&[u8], &[u8]), RLPDecodeError> {
534    let (is_list, payload, rest) = decode_rlp_item(data)?;
535    if is_list {
536        return Err(RLPDecodeError::UnexpectedList);
537    }
538    Ok((payload, rest))
539}
540
541/// Pads a slice of bytes with zeros on the left to make it a fixed size slice.
542/// The size of the data must be less than or equal to the size of the output array.
543#[inline]
544pub fn static_left_pad<const N: usize>(data: &[u8]) -> Result<[u8; N], RLPDecodeError> {
545    let mut result = [0; N];
546
547    if data.is_empty() {
548        return Ok(result);
549    }
550    if data[0] == 0 {
551        return Err(RLPDecodeError::MalformedData);
552    }
553    if data.len() > N {
554        return Err(RLPDecodeError::InvalidLength);
555    }
556    let data_start_index = N.saturating_sub(data.len());
557    result
558        .get_mut(data_start_index..)
559        .ok_or(RLPDecodeError::InvalidLength)?
560        .copy_from_slice(data);
561    Ok(result)
562}