1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
use crate::{Address, Error};
/// RLP encoder and decoder, transactions are encoded via rlp whereas contract calls are encoded with the Ethereum ABI
/// transactions include contract calls so this is the outer wrapper for any ABI encoded value
use num256::Uint256;

/// Intermediate representation for RLP serialization and deserialization
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum RlpToken {
    List(Vec<RlpToken>),
    /// conceptually a string is just an arbitrary set of data, many trings
    /// are 64 bytes long and represent a 256bit integer or 8 bytes long for a 64 bit integer
    String(Vec<u8>),
    /// A single byte value, often just a length or offset, sometimes small numbers like a nonce may
    /// get folded into this
    SingleByte(u8),
}

impl RlpToken {
    /// Returns the byte content of String and SingleByte types
    /// returns an Error if the enum is the list variant
    pub fn get_byte_content(&self) -> Result<Vec<u8>, Error> {
        match self {
            RlpToken::List(_) => Err(Error::DeserializeRlp),
            RlpToken::String(b) => Ok(b.clone()),
            RlpToken::SingleByte(b) => Ok(vec![*b]),
        }
    }

    /// Returns the list content of a List type RLP token, returns an Error
    /// for the String and SingleByte variants
    pub fn get_list_content(&self) -> Result<Vec<RlpToken>, Error> {
        match self {
            RlpToken::List(v) => Ok(v.clone()),
            RlpToken::String(_) | RlpToken::SingleByte(_) => Err(Error::DeserializeRlp),
        }
    }
}

impl From<u8> for RlpToken {
    fn from(value: u8) -> Self {
        RlpToken::SingleByte(value)
    }
}

// trim leading zero bytes of a provided array
fn trim_leading_zero_bytes(bytes: Vec<u8>) -> Vec<u8> {
    for (i, v) in bytes.iter().enumerate() {
        if *v != 0 {
            return bytes[i..].to_vec();
        }
    }
    Vec::new()
}

impl From<Uint256> for RlpToken {
    fn from(value: Uint256) -> Self {
        if value < 255u8.into() {
            RlpToken::SingleByte(value.to_le_bytes()[0])
        } else {
            let value = value.to_be_bytes().to_vec();
            RlpToken::String(trim_leading_zero_bytes(value))
        }
    }
}

impl From<&Uint256> for RlpToken {
    fn from(value: &Uint256) -> Self {
        (*value).into()
    }
}

impl From<Address> for RlpToken {
    fn from(value: Address) -> Self {
        RlpToken::String(value.as_bytes().to_vec())
    }
}

impl From<&Address> for RlpToken {
    fn from(value: &Address) -> Self {
        RlpToken::String(value.as_bytes().to_vec())
    }
}

/// Unpacks RLP encoded bytes into a series of arrays
/// https://ethereum.org/en/developers/docs/data-structures-and-encoding/rlp/
/// From there further decoding can occur
pub fn unpack_rlp(input: &[u8]) -> Result<Vec<RlpToken>, Error> {
    // too small or too large
    if input.is_empty() || input.len() as u64 > u64::MAX {
        return Err(Error::DeserializeRlp);
    }

    match input[0] {
        d if d <= 0x7f => {
            // unit value
            let mut out = vec![RlpToken::SingleByte(d)];
            // base case, no other elements
            if input.len() == 1 {
                Ok(out)
            } else {
                // recurse for other elements
                out.extend(unpack_rlp(&input[1..])?);
                Ok(out)
            }
        }
        d if d <= 0xb7 => {
            // case for a small string
            let len_of_string = (d - 0x80) as usize;
            let end_index = 1 + len_of_string;

            if end_index > input.len() {
                return Err(Error::DeserializeRlp);
            }

            let mut out = if len_of_string == 0 {
                // special case for encoding an empty string
                // this can also be interpreted as the single byte zero
                // but it seems encoders expect 0x80 rather than 0x00
                vec![RlpToken::String(vec![])]
            } else if len_of_string == 1 {
                // speical case for a single byte value
                vec![RlpToken::SingleByte(input[1])]
            } else {
                vec![RlpToken::String(input[1..end_index].to_vec())]
            };
            // base case, no other elements
            if input.len() == end_index {
                Ok(out)
            } else {
                // recurse for other elements
                out.extend(unpack_rlp(&input[end_index..])?);
                Ok(out)
            }
        }
        d if d < 0xc0 => {
            // case for long string, decode both the length of the length and then the actual data
            let len_of_len_of_string = (d - 0xb7) as usize;
            if len_of_len_of_string >= input.len() - 1 {
                // impossibly long
                return Err(Error::DeserializeRlp);
            }
            let len_of_string =
                downcast(Uint256::from_be_bytes(&input[1..1 + len_of_len_of_string]))?;
            let start_index = 1 + len_of_len_of_string;
            let end_index = start_index + len_of_string;
            if start_index + len_of_string >= input.len() {
                // impossibly long
                return Err(Error::DeserializeRlp);
            }
            let mut out = vec![RlpToken::String(
                input[start_index..start_index + len_of_string].to_vec(),
            )];
            // base case, no other elements
            if input.len() == end_index {
                Ok(out)
            } else {
                // recurse for other elements
                out.extend(unpack_rlp(&input[end_index..])?);
                Ok(out)
            }
        }
        d if d <= 0xf7 => {
            // case for a short list, recurse
            let len_of_list = (d - 0xc0) as usize;
            let start_index = 1;
            let end_index = start_index + len_of_list;

            if end_index > input.len() {
                return Err(Error::DeserializeRlp);
            }

            let mut out = if len_of_list == 0 {
                // special case for encoding an empty list
                vec![RlpToken::List(vec![])]
            } else {
                vec![RlpToken::List(unpack_rlp(&input[start_index..end_index])?)]
            };
            // base case, no other elements
            if input.len() == end_index {
                Ok(out)
            } else {
                // recurse for other elements
                out.extend(unpack_rlp(&input[end_index..])?);
                Ok(out)
            }
        }
        d => {
            // case for long list, decode both the length of the length and then recurse
            let len_of_len_of_list = (d - 0xf7) as usize;
            if len_of_len_of_list >= input.len() - 1 {
                // impossibly long
                return Err(Error::DeserializeRlp);
            }
            let len_of_list = downcast(Uint256::from_be_bytes(&input[1..1 + len_of_len_of_list]))?;
            let start_index = 1 + len_of_len_of_list;
            let end_index = start_index + len_of_list;

            if end_index > input.len() {
                return Err(Error::DeserializeRlp);
            }

            let mut out = vec![RlpToken::List(unpack_rlp(&input[start_index..end_index])?)];
            // base case, no other elements
            if input.len() == end_index {
                Ok(out)
            } else {
                // recurse for other elements
                out.extend(unpack_rlp(&input[end_index..])?);
                Ok(out)
            }
        }
    }
}

/// Takes RLP token structs and packs the values into a single rlp
/// encoded byte array
pub fn pack_rlp(input: Vec<RlpToken>) -> Vec<u8> {
    let mut out: Vec<u8> = Vec::new();
    for token in input {
        match token {
            RlpToken::List(list) => {
                let encoded_list_data = pack_rlp(list);
                if encoded_list_data.len() <= 55 {
                    // small list case, encode length in single byte
                    out.extend(vec![0xc0 + encoded_list_data.len() as u8]);
                    // special case for zero length
                    if !encoded_list_data.is_empty() {
                        out.extend(encoded_list_data);
                    }
                } else {
                    // large list case, encoded the length of the length, then the data
                    let encoded_len_of_data =
                        trim_leading_zero_bytes(encoded_list_data.len().to_be_bytes().to_vec());
                    let len_of_len = encoded_len_of_data.len();
                    // this will overflow if trying to encode a value that's too large for rlp
                    out.extend(vec![0xf7 + len_of_len as u8]);
                    out.extend(encoded_len_of_data);
                    out.extend(encoded_list_data);
                }
            }
            RlpToken::String(string) => {
                // this is a series of observed hacky conditions, I believe because we compress addresses that are
                // less than 20 bytes to zeros
                let encoded_string_data = if all_bytes_are_zero(&string) && string.len() <= 20 {
                    vec![]
                } else {
                    string
                };
                if encoded_string_data.len() <= 55 {
                    // special case for zero length
                    let len = if encoded_string_data == vec![0] {
                        0
                    } else {
                        encoded_string_data.len()
                    };
                    // small string case, encode length in single byte
                    out.extend(vec![0x80 + len as u8]);
                    if len != 0 {
                        out.extend(encoded_string_data);
                    }
                } else {
                    // large list case, encoded the length of the length, then the string
                    let encoded_len_of_string =
                        trim_leading_zero_bytes(encoded_string_data.len().to_be_bytes().to_vec());
                    let len_of_len = encoded_len_of_string.len();
                    // this will overflow if trying to encode a value that's too large for rlp
                    out.extend(vec![0xb7 + len_of_len as u8]);
                    out.extend(encoded_len_of_string);
                    out.extend(encoded_string_data);
                }
            }
            RlpToken::SingleByte(b) => {
                // a single byte can be encoded as itself or as a single byte string
                if b > 0x7f {
                    // larger value encoded as a single byte string
                    out.extend(vec![0x81]);
                    // the actaul value
                    out.extend(vec![b])
                } else if b == 0 {
                    // the value 0 is encoded as a zero length string
                    // rather than 0x00 for some reason
                    out.extend(vec![0x80]);
                } else {
                    // all other numbers less than 0x7f
                    out.extend(vec![b])
                }
            }
        }
    }
    out
}

fn all_bytes_are_zero(input: &[u8]) -> bool {
    for b in input {
        if *b != 0 {
            return false;
        }
    }
    true
}

/// Safely downcasts a Uint256 to system integer size, note that on systems with 32 bit integer size
/// this may return invalid for some otherwise valid RLP, but only in the case that the system doesn't have
/// enough memory to decode the value anyways. I guess swap might allow this error case to actually be encountered
pub fn downcast(input: Uint256) -> Result<usize, Error> {
    if input > usize::MAX.into() {
        Err(Error::DeserializeRlp)
    } else {
        const USIZE_BYTES: usize = (usize::BITS / 8) as usize;
        let bytes = input.to_le_bytes();
        let mut slice = [0; USIZE_BYTES];
        slice.copy_from_slice(&bytes[0..USIZE_BYTES]);
        Ok(usize::from_le_bytes(slice))
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::utils::get_fuzz_bytes;
    use rand::thread_rng;
    use std::time::{Duration, Instant};

    const FUZZ_TIME: Duration = Duration::from_secs(30);

    #[test]
    fn test_downcast() {
        assert_eq!(downcast(50u8.into()).unwrap(), 50);
        let max = Uint256::from(u32::MAX);
        // note this will not work on systems with a 16 bit integer size
        #[cfg(all(unix, target_pointer_width = "32"))]
        assert!(downcast(max + 1u8.into()).is_err());
        #[cfg(all(unix, target_pointer_width = "64"))]
        assert_eq!(downcast(max + 1u8.into()).unwrap(), (u32::MAX as usize + 1));
    }

    #[test]
    fn fuzz_rlp_decode() {
        let start = Instant::now();
        let mut rng = thread_rng();
        while Instant::now() - start < FUZZ_TIME {
            let transaction_bytes = get_fuzz_bytes(&mut rng);

            let res = unpack_rlp(&transaction_bytes);
            match res {
                Ok(_) => println!("Got valid output, this should happen very rarely!"),
                Err(_e) => {}
            }
        }
    }
}