Skip to main content

ethrex_common/types/
account.rs

1use std::collections::BTreeMap;
2use std::sync::{Arc, LazyLock};
3
4use bytes::{BufMut, Bytes};
5use ethereum_types::{H256, U256};
6use ethrex_crypto::{Crypto, NativeCrypto};
7use ethrex_trie::Trie;
8use rustc_hash::FxHashMap;
9use serde::{Deserialize, Serialize};
10
11use ethrex_rlp::{
12    decode::RLPDecode,
13    encode::RLPEncode,
14    error::RLPDecodeError,
15    structs::{Decoder, Encoder},
16};
17
18use super::GenesisAccount;
19use crate::constants::{EMPTY_KECCAK_HASH, EMPTY_TRIE_HASH};
20
21/// Shared empty jump-target table. `Code::default()` and any bytecode without a
22/// `JUMPDEST` clone this (a refcount bump) instead of allocating a fresh empty
23/// `Arc` header each time. This matters because the per-tx `Code::default()`
24/// placeholder and every EOA / empty-code load would otherwise each allocate.
25static EMPTY_JUMP_TARGETS: LazyLock<Arc<[u32]>> = LazyLock::new(|| Arc::from(Vec::new()));
26
27/// Trailing STOP bytes appended to every bytecode so the dispatch loop can read
28/// the next opcode without a bounds check. 33 is the widest single-opcode advance
29/// (PUSH32: 1 opcode byte + 32 immediate bytes), so `pc` can never step past the
30/// padding regardless of which opcode sits at the last real byte.
31pub const BYTECODE_PADDING: usize = 33;
32
33#[derive(Clone, Debug, PartialEq, Eq, Hash)]
34pub struct Code {
35    // hash is only used for bytecodes stored in the DB, either for reading it from the DB
36    // or with the CODEHASH opcode, which needs an account address as argument and
37    // thus only accessed persisted bytecodes.
38    // We use a bogus H256::zero() value for initcodes as there is no way for the VM or
39    // endpoints to access that hash, saving one expensive Keccak hash.
40    pub hash: H256,
41    /// bytecode padded with 33 zeroes (STOP opcodes, due to PUSH32) to avoid checks on the hot path.
42    bytecode: Bytes,
43    /// The real bytecode length, needed for some opcodes, `bytecode` is padded with 33 STOPs to avoid checked adds on hot loop.
44    bytecode_len: usize,
45    // `Arc<[u32]>` so cloning `Code` (hot: every message-call resolves and clones
46    // the callee's code) is a refcount bump instead of deep-copying the table.
47    // Serializes via serde's `rc` feature (enabled workspace-wide).
48    // The valid addresses are 32-bit because, despite EIP-3860 restricting initcode size,
49    // this does not apply to previous forks. This is tested in the EEST tests, which would
50    // panic in debug mode.
51    pub jump_targets: Arc<[u32]>,
52}
53
54impl Code {
55    // SAFETY: hash will be stored as-is, so it either needs to match
56    // the real code hash (i.e. it was precomputed and we're reusing)
57    // or never be read (e.g. for initcode).
58    //
59    // `code` is the logical, unpadded bytecode; `BYTECODE_PADDING` STOP bytes are
60    // appended internally by `from_parts_unchecked`.
61    pub fn from_bytecode_unchecked(code: Bytes, hash: H256) -> Self {
62        let jump_targets = Self::compute_jump_targets(&code);
63        Self::from_parts_unchecked(hash, &code, jump_targets)
64    }
65
66    /// `code` is the logical, unpadded bytecode; `BYTECODE_PADDING` STOP bytes are
67    /// appended internally by `from_parts_unchecked`.
68    pub fn from_bytecode(code: Bytes, crypto: &dyn Crypto) -> Self {
69        let jump_targets = Self::compute_jump_targets(&code);
70        let hash = H256(crypto.keccak256(code.as_ref()));
71        Self::from_parts_unchecked(hash, &code, jump_targets)
72    }
73
74    /// Builds a `Code` from precomputed parts. The caller must guarantee `hash`
75    /// and `jump_targets` correspond to `code`; neither is recomputed or validated.
76    ///
77    /// `code` is the logical, unpadded bytecode: this function appends
78    /// `BYTECODE_PADDING` STOP bytes and records the original length in
79    /// `bytecode_len`. Never pass a pre-padded buffer, or the logical length and
80    /// every `JUMPDEST`/`PUSH` offset derived from it would be wrong.
81    pub fn from_parts_unchecked(hash: H256, code: &[u8], jump_targets: Arc<[u32]>) -> Self {
82        let bytecode_len = code.len();
83        let mut padded_code = Vec::with_capacity(bytecode_len + BYTECODE_PADDING);
84        padded_code.extend_from_slice(code);
85        padded_code.extend_from_slice(&[0u8; BYTECODE_PADDING]);
86        Self {
87            hash,
88            bytecode: Bytes::from_owner(padded_code),
89            bytecode_len,
90            jump_targets,
91        }
92    }
93
94    fn compute_jump_targets(code: &[u8]) -> Arc<[u32]> {
95        debug_assert!(code.len() <= u32::MAX as usize);
96        let mut targets = Vec::new();
97        let mut i = 0;
98        while i < code.len() {
99            // TODO: we don't use the constants from the vm module to avoid a circular dependency
100            match code[i] {
101                // OP_JUMPDEST
102                0x5B => {
103                    targets.push(i as u32);
104                }
105                // OP_PUSH1..32
106                c @ 0x60..0x80 => {
107                    // OP_PUSH0
108                    i += (c - 0x5F) as usize;
109                }
110                _ => (),
111            }
112            i += 1;
113        }
114        // Share the single empty table for jumpless bytecode (very common: EOAs,
115        // tiny contracts) so we don't allocate an `Arc` header for an empty slice.
116        if targets.is_empty() {
117            EMPTY_JUMP_TARGETS.clone()
118        } else {
119            Arc::from(targets)
120        }
121    }
122
123    #[inline]
124    pub fn code(&self) -> &[u8] {
125        self.bytecode.get(..self.bytecode_len).unwrap_or_default()
126    }
127
128    #[inline]
129    pub fn code_bytes(&self) -> Bytes {
130        self.bytecode.slice(..self.bytecode_len)
131    }
132
133    #[inline]
134    pub fn len(&self) -> usize {
135        self.bytecode_len
136    }
137
138    #[inline]
139    pub fn is_empty(&self) -> bool {
140        self.bytecode_len == 0
141    }
142
143    /// Returns the padded bytecode buffer (real code + [`BYTECODE_PADDING`] trailing
144    /// STOPs) used by the opcode dispatch loop to read opcodes without bounds checks.
145    /// Use [`Code::code`] for the real, unpadded bytecode.
146    #[inline]
147    pub fn dispatch_buf(&self) -> &[u8] {
148        &self.bytecode
149    }
150
151    /// Estimates the size of the Code struct in bytes
152    /// (including stack size and heap allocation).
153    ///
154    /// Note: This is an estimation and may not be exact.
155    ///
156    /// # Returns
157    ///
158    /// usize - Estimated size in bytes
159    pub fn size(&self) -> usize {
160        let hash_size = size_of::<H256>();
161        let bytes_size = size_of::<Bytes>();
162        let vec_size = size_of::<Arc<[u32]>>() + self.jump_targets.len() * size_of::<u32>();
163        hash_size + bytes_size + vec_size
164    }
165}
166
167/// Serde shadow for [`Code`]. Stores the *logical* (unpadded) bytecode so the
168/// padding is never part of the serialized form. Deserialization re-pads through
169/// [`Code::from_parts_unchecked`], which keeps the dispatch-loop invariant (every
170/// `Code` is padded with [`BYTECODE_PADDING`] trailing STOPs) sound regardless of
171/// where the bytes came from. Deserializing the padded buffer directly would
172/// otherwise let unpadded input through and cause OOB reads during execution.
173#[derive(Serialize, Deserialize)]
174struct CodeSerde {
175    hash: H256,
176    code: Bytes,
177    jump_targets: Arc<[u32]>,
178}
179
180impl Serialize for Code {
181    fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
182        CodeSerde {
183            hash: self.hash,
184            code: self.code_bytes(),
185            jump_targets: self.jump_targets.clone(),
186        }
187        .serialize(serializer)
188    }
189}
190
191impl<'de> Deserialize<'de> for Code {
192    fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
193        let CodeSerde {
194            hash,
195            code,
196            jump_targets,
197        } = CodeSerde::deserialize(deserializer)?;
198        Ok(Self::from_parts_unchecked(hash, &code, jump_targets))
199    }
200}
201
202#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
203pub struct CodeMetadata {
204    pub length: u64,
205}
206
207#[derive(Clone, Default, Debug, PartialEq, Eq, Serialize, Deserialize)]
208pub struct Account {
209    pub info: AccountInfo,
210    pub code: Code,
211    pub storage: FxHashMap<H256, U256>,
212}
213
214#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Eq, Hash)]
215pub struct AccountInfo {
216    pub code_hash: H256,
217    pub balance: U256,
218    pub nonce: u64,
219}
220
221#[derive(Clone, Copy, Debug, PartialEq, Serialize, Deserialize)]
222pub struct AccountState {
223    pub nonce: u64,
224    pub balance: U256,
225    pub storage_root: H256,
226    pub code_hash: H256,
227}
228
229/// A slim codec for an [`AccountState`].
230///
231/// The slim codec will optimize both the [storage root](AccountState::storage_root) and the
232/// [code hash](AccountState::code_hash)'s encoding so that it does not take space when empty.
233///
234/// The correct way to use it is to wrap the [`AccountState`] and encode it using this codec, and
235/// not to store the codec as a field in a struct.
236#[derive(Clone, Copy, Debug, Default, PartialEq)]
237pub struct AccountStateSlimCodec(pub AccountState);
238
239impl Default for AccountInfo {
240    fn default() -> Self {
241        Self {
242            code_hash: *EMPTY_KECCAK_HASH,
243            balance: Default::default(),
244            nonce: Default::default(),
245        }
246    }
247}
248
249impl Default for AccountState {
250    fn default() -> Self {
251        Self {
252            nonce: Default::default(),
253            balance: Default::default(),
254            storage_root: *EMPTY_TRIE_HASH,
255            code_hash: *EMPTY_KECCAK_HASH,
256        }
257    }
258}
259
260impl Default for Code {
261    fn default() -> Self {
262        Self {
263            bytecode: Bytes::from_static(&[0u8; BYTECODE_PADDING]),
264            bytecode_len: 0,
265            hash: *EMPTY_KECCAK_HASH,
266            jump_targets: EMPTY_JUMP_TARGETS.clone(),
267        }
268    }
269}
270
271impl From<GenesisAccount> for Account {
272    fn from(genesis: GenesisAccount) -> Self {
273        let code = Code::from_bytecode(genesis.code, &NativeCrypto);
274        Self {
275            info: AccountInfo {
276                code_hash: code.hash,
277                balance: genesis.balance,
278                nonce: genesis.nonce,
279            },
280            code,
281            storage: genesis
282                .storage
283                .iter()
284                .map(|(k, v)| (H256(k.to_big_endian()), *v))
285                .collect(),
286        }
287    }
288}
289
290pub fn code_hash(code: &Bytes, crypto: &dyn Crypto) -> H256 {
291    H256(crypto.keccak256(code.as_ref()))
292}
293
294impl RLPEncode for AccountInfo {
295    fn encode(&self, buf: &mut dyn bytes::BufMut) {
296        Encoder::new(buf)
297            .encode_field(&self.code_hash)
298            .encode_field(&self.balance)
299            .encode_field(&self.nonce)
300            .finish();
301    }
302}
303
304impl RLPDecode for AccountInfo {
305    fn decode_unfinished(rlp: &[u8]) -> Result<(AccountInfo, &[u8]), RLPDecodeError> {
306        let decoder = Decoder::new(rlp)?;
307        let (code_hash, decoder) = decoder.decode_field("code_hash")?;
308        let (balance, decoder) = decoder.decode_field("balance")?;
309        let (nonce, decoder) = decoder.decode_field("nonce")?;
310        let account_info = AccountInfo {
311            code_hash,
312            balance,
313            nonce,
314        };
315        Ok((account_info, decoder.finish()?))
316    }
317}
318
319impl RLPEncode for AccountState {
320    fn encode(&self, buf: &mut dyn bytes::BufMut) {
321        Encoder::new(buf)
322            .encode_field(&self.nonce)
323            .encode_field(&self.balance)
324            .encode_field(&self.storage_root)
325            .encode_field(&self.code_hash)
326            .finish();
327    }
328}
329
330impl RLPDecode for AccountState {
331    fn decode_unfinished(rlp: &[u8]) -> Result<(AccountState, &[u8]), RLPDecodeError> {
332        let decoder = Decoder::new(rlp)?;
333        let (nonce, decoder) = decoder.decode_field("nonce")?;
334        let (balance, decoder) = decoder.decode_field("balance")?;
335        let (storage_root, decoder) = decoder.decode_field("storage_root")?;
336        let (code_hash, decoder) = decoder.decode_field("code_hash")?;
337        let state = AccountState {
338            nonce,
339            balance,
340            storage_root,
341            code_hash,
342        };
343        Ok((state, decoder.finish()?))
344    }
345}
346
347impl RLPEncode for AccountStateSlimCodec {
348    fn encode(&self, buf: &mut dyn BufMut) {
349        struct StorageRootCodec<'a>(&'a H256);
350        impl RLPEncode for StorageRootCodec<'_> {
351            fn encode(&self, buf: &mut dyn BufMut) {
352                let data = if *self.0 != *EMPTY_TRIE_HASH {
353                    self.0.as_bytes()
354                } else {
355                    &[]
356                };
357
358                data.encode(buf);
359            }
360        }
361
362        struct CodeHashCodec<'a>(&'a H256);
363        impl RLPEncode for CodeHashCodec<'_> {
364            fn encode(&self, buf: &mut dyn BufMut) {
365                let data = if *self.0 != *EMPTY_KECCAK_HASH {
366                    self.0.as_bytes()
367                } else {
368                    &[]
369                };
370
371                data.encode(buf);
372            }
373        }
374
375        Encoder::new(buf)
376            .encode_field(&self.0.nonce)
377            .encode_field(&self.0.balance)
378            .encode_field(&StorageRootCodec(&self.0.storage_root))
379            .encode_field(&CodeHashCodec(&self.0.code_hash))
380            .finish();
381    }
382}
383
384impl RLPDecode for AccountStateSlimCodec {
385    fn decode_unfinished(rlp: &[u8]) -> Result<(Self, &[u8]), RLPDecodeError> {
386        struct StorageRootCodec(H256);
387        impl RLPDecode for StorageRootCodec {
388            fn decode_unfinished(mut rlp: &[u8]) -> Result<(Self, &[u8]), RLPDecodeError> {
389                let value = match rlp.split_off_first() {
390                    Some(0x80) => *EMPTY_TRIE_HASH,
391                    Some(0xA0) => {
392                        let data;
393                        (data, rlp) = rlp
394                            .split_first_chunk::<32>()
395                            .ok_or(RLPDecodeError::InvalidLength)?;
396                        H256(*data)
397                    }
398                    _ => return Err(RLPDecodeError::InvalidLength),
399                };
400
401                Ok((Self(value), rlp))
402            }
403        }
404
405        struct CodeHashCodec(H256);
406        impl RLPDecode for CodeHashCodec {
407            fn decode_unfinished(mut rlp: &[u8]) -> Result<(Self, &[u8]), RLPDecodeError> {
408                let value = match rlp.split_off_first() {
409                    Some(0x80) => *EMPTY_KECCAK_HASH,
410                    Some(0xA0) => {
411                        let data;
412                        (data, rlp) = rlp
413                            .split_first_chunk::<32>()
414                            .ok_or(RLPDecodeError::InvalidLength)?;
415                        H256(*data)
416                    }
417                    _ => return Err(RLPDecodeError::InvalidLength),
418                };
419
420                Ok((Self(value), rlp))
421            }
422        }
423
424        let decoder = Decoder::new(rlp)?;
425        let (nonce, decoder) = decoder.decode_field("nonce")?;
426        let (balance, decoder) = decoder.decode_field("balance")?;
427        let (StorageRootCodec(storage_root), decoder) = decoder.decode_field("storage_root")?;
428        let (CodeHashCodec(code_hash), decoder) = decoder.decode_field("code_hash")?;
429
430        Ok((
431            Self(AccountState {
432                nonce,
433                balance,
434                storage_root,
435                code_hash,
436            }),
437            decoder.finish()?,
438        ))
439    }
440}
441
442pub fn compute_storage_root(storage: &BTreeMap<U256, U256>, crypto: &dyn Crypto) -> H256 {
443    let iter = storage.iter().filter_map(|(k, v)| {
444        (!v.is_zero()).then_some((
445            crypto.keccak256(&k.to_big_endian()).to_vec(),
446            v.encode_to_vec(),
447        ))
448    });
449    Trie::compute_hash_from_unsorted_iter(iter, crypto)
450}
451
452impl From<&GenesisAccount> for AccountState {
453    fn from(value: &GenesisAccount) -> Self {
454        AccountState {
455            nonce: value.nonce,
456            balance: value.balance,
457            storage_root: compute_storage_root(&value.storage, &NativeCrypto),
458            code_hash: code_hash(&value.code, &NativeCrypto),
459        }
460    }
461}
462
463impl Account {
464    pub fn new(balance: U256, code: Code, nonce: u64, storage: FxHashMap<H256, U256>) -> Self {
465        Self {
466            info: AccountInfo {
467                balance,
468                code_hash: code.hash,
469                nonce,
470            },
471            code,
472            storage,
473        }
474    }
475}
476
477impl AccountInfo {
478    pub fn is_empty(&self) -> bool {
479        self.balance.is_zero() && self.nonce == 0 && self.code_hash == *EMPTY_KECCAK_HASH
480    }
481}
482
483#[cfg(test)]
484mod test {
485    use std::str::FromStr;
486
487    use super::*;
488
489    #[test]
490    fn test_code_hash() {
491        let empty_code = Bytes::new();
492        let hash = code_hash(&empty_code, &NativeCrypto);
493        assert_eq!(
494            hash,
495            H256::from_str("c5d2460186f7233c927e7db2dcc703c0e500b653ca82273b7bfad8045d85a470")
496                .unwrap()
497        )
498    }
499}