edb_engine/inspector/
tweak_inspector.rs

1// EDB - Ethereum Debugger
2// Copyright (C) 2024 Zhuo Zhang and Wuqi Zhang
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU Affero General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// This program is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU Affero General Public License for more details.
13//
14// You should have received a copy of the GNU Affero General Public License
15// along with this program. If not, see <https://www.gnu.org/licenses/>.
16
17//! Contract deployment inspector for replacing creation bytecode
18//!
19//! This inspector intercepts contract creation calls and can replace the init code
20//! with custom bytecode when the deployment would create a specific target address.
21
22use alloy_dyn_abi::JsonAbiExt;
23use alloy_primitives::{Address, Bytes, U256};
24use edb_common::EdbContext;
25use eyre::Result;
26use foundry_compilers::{artifacts::Contract, Artifact as _};
27use itertools::Itertools;
28use revm::{
29    bytecode::OpCode,
30    context::{CreateScheme, JournalTr},
31    database::CacheDB,
32    interpreter::{CreateInputs, CreateOutcome},
33    Database, DatabaseCommit, DatabaseRef, Inspector,
34};
35use tracing::{debug, error, info, warn};
36
37use crate::utils::disasm::{disassemble, extract_push_value};
38
39static CONSTRUCTOR_ARG_SEARCH_RANGE: usize = 1024;
40/// Inspector that intercepts and modifies contract deployments
41#[derive(Debug)]
42pub struct TweakInspector<'a> {
43    /// Target address we're looking for
44    target_address: Address,
45
46    /// Original init code (contract creation bytecode)
47    contract: &'a Contract,
48
49    /// Custom init code to use (contract creation bytecode)
50    recompiled_contract: &'a Contract,
51
52    /// Constructor arguments to append to init code
53    constructor_args: &'a Bytes,
54
55    /// The deployed bytecode we captured (filled after successful deployment)
56    deployed_code: Option<Bytes>,
57
58    /// Whether we found and replaced the target deployment
59    found_target: bool,
60}
61
62impl<'a> TweakInspector<'a> {
63    /// Create a new deployment inspector
64    pub fn new(
65        target_address: Address,
66        contract: &'a Contract,
67        recompiled_contract: &'a Contract,
68        constructor_args: &'a Bytes,
69    ) -> Self {
70        Self {
71            target_address,
72            contract,
73            recompiled_contract,
74            constructor_args,
75            deployed_code: None,
76            found_target: false,
77        }
78    }
79
80    /// Get the deployed bytecode if the target was found and deployed
81    pub fn deployed_code(&self) -> Option<&Bytes> {
82        self.deployed_code.as_ref()
83    }
84
85    /// Check if the target deployment was found
86    pub fn found_target(&self) -> bool {
87        self.found_target
88    }
89
90    /// Generate the deployed code
91    pub fn into_deployed_code(self) -> Result<Bytes> {
92        self.deployed_code.ok_or(eyre::eyre!("No deployed code found"))
93    }
94
95    /// Extract constructor arguments from the actual init code
96    /// This tries multiple strategies to extract the constructor arguments
97    fn extract_constructor_args(&self, init_code: &Bytes) -> Option<Bytes> {
98        // Early check: whether the constructor has arguments
99        if self
100            .contract
101            .abi
102            .as_ref()
103            .and_then(|abi| abi.constructor.as_ref())
104            .map(|c| c.inputs.is_empty())
105            .unwrap_or(true)
106        {
107            // If there is no constructor information, we assume Etherscan is correct.
108            debug!("No constructor args needed, using bytes from Etherscan");
109            return Some(self.constructor_args.clone());
110        }
111
112        // Strategy 1: If constructor_args from Etherscan is not empty, use it
113        if !self.constructor_args.is_empty() {
114            debug!("Using constructor args from Etherscan: {} bytes", self.constructor_args.len());
115            return Some(self.constructor_args.clone());
116        }
117
118        // Strategy 2: Check if original_init_code is an exact prefix of init_code
119        let original_creation_code = self.contract.get_bytecode_bytes()?;
120        if init_code.len() >= original_creation_code.len() {
121            let prefix = &init_code[..original_creation_code.len()];
122            if prefix == original_creation_code.as_ref() {
123                // Extract constructor args from the suffix
124                let constructor_args = &init_code[original_creation_code.len()..];
125                debug!(
126                    "Original init code is exact prefix, extracted constructor args: {} bytes",
127                    constructor_args.len()
128                );
129                return Some(Bytes::from(constructor_args.to_vec()));
130            }
131        }
132
133        // Strategy 3: Fallback - heuristically extract constructor args
134        // Assume constructor args start around original_init_code length and try to
135        // use constroctor abi to decode them
136        if init_code.len() > original_creation_code.len() {
137            let constructor_args = &init_code[original_creation_code.len()..];
138            debug!(
139                "Using heuristic extraction for constructor args: {} bytes",
140                constructor_args.len()
141            );
142
143            // At this point, we must have constructor args
144            let k = original_creation_code.len();
145            for i in (0..=CONSTRUCTOR_ARG_SEARCH_RANGE)
146                .flat_map(move |d| {
147                    [k.saturating_add(d).min(init_code.len() - 1), k.saturating_sub(d)]
148                })
149                .unique()
150            {
151                if self.can_use_as_constructor_args(&init_code[i..]) {
152                    debug!(
153                        "Successfully extracted constructor args: {} bytes",
154                        init_code[i..].len()
155                    );
156                    return Some(Bytes::from(init_code[i..].to_vec()));
157                }
158            }
159        }
160
161        // Strategy 4: Try to extract constructor args using the K pattern algorithm
162        if let Some(constructor_args) = self.extract_constructor_args_with_k_pattern(init_code) {
163            debug!("Extracted constructor args using K pattern: {} bytes", constructor_args.len());
164            return Some(constructor_args);
165        }
166
167        // No constructor args found
168        warn!("Could not extract constructor args");
169        None
170    }
171
172    /// Check whether the given bytes can be used as the constructor arguments
173    fn can_use_as_constructor_args(&self, data: &[u8]) -> bool {
174        let Some(constructor) = self.contract.abi.as_ref().and_then(|abi| abi.constructor.as_ref())
175        else {
176            return false;
177        };
178
179        if let Ok(decoded) = constructor.abi_decode_input(data) {
180            constructor
181                .abi_encode_input(&decoded)
182                .ok()
183                .map(|encoded| encoded == data)
184                .unwrap_or(false)
185        } else {
186            false
187        }
188    }
189
190    /// Extract constructor arguments using the K pattern from Solidity's init code
191    /// The pattern is: PUSHn <K> CODESIZE SUB ... PUSHn <K> ... CODECOPY
192    /// where K is the offset where [runtime_code][constructor_args] starts
193    fn extract_constructor_args_with_k_pattern(&self, init_code: &Bytes) -> Option<Bytes> {
194        // Disassemble the init code
195        let disasm = disassemble(init_code);
196
197        // Find all potential K values by looking for the complete pattern
198        let mut k_candidates = Vec::new();
199
200        for (i, inst) in disasm.instructions.iter().enumerate() {
201            // Look for the pattern: PUSHn <K> CODESIZE SUB
202            if !inst.is_push() {
203                continue;
204            }
205
206            // Check whether K value is valid
207            let Some(k_value) = extract_push_value(inst) else { continue };
208            if k_value >= U256::from(init_code.len()) {
209                continue;
210            }
211
212            // Check if this is followed by CODESIZE SUB
213            //
214            // CODESIZE check
215            let Some(codesize_inst) = disasm.instructions.get(i + 1) else { continue };
216            if codesize_inst.opcode != OpCode::CODESIZE {
217                continue;
218            }
219
220            // SUB check
221            let Some(sub_inst) = disasm.instructions.get(i + 2) else { continue };
222            if sub_inst.opcode != OpCode::SUB {
223                continue;
224            }
225
226            // Now verify this K appears again before CODECOPY
227            // Look ahead for CODECOPY (0x39)
228            for j in (i + 3)..(i + CONSTRUCTOR_ARG_SEARCH_RANGE) {
229                // CODECOPY
230                if disasm.instructions[j].opcode != OpCode::CODECOPY {
231                    continue;
232                }
233
234                // Check if the same K value appears before CODECOPY
235                // CODECOPY typically has pattern: PUSHn <K> ... CODECOPY
236                if let Some(push_before_codecopy) = disasm.instructions.get(j - 2) {
237                    if push_before_codecopy.is_push() {
238                        let Some(k2) = extract_push_value(push_before_codecopy) else { continue };
239                        if k2 == k_value {
240                            k_candidates.push(k_value);
241                            debug!("Found confirmed K value with full pattern: {}", k_value);
242                            break;
243                        }
244                    }
245                }
246
247                // Also check j-1 position for direct PUSH K CODECOPY
248                if let Some(push_before_codecopy) = disasm.instructions.get(j - 1) {
249                    if push_before_codecopy.is_push() {
250                        let Some(k2) = extract_push_value(push_before_codecopy) else { continue };
251                        if k2 == k_value {
252                            k_candidates.push(k_value);
253                            debug!("Found confirmed K value with full pattern: {}", k_value);
254                            break;
255                        }
256                    }
257                }
258            }
259        }
260
261        if k_candidates.is_empty() {
262            debug!("No K candidates found with complete pattern in init code");
263            return None;
264        }
265
266        for k_value in k_candidates {
267            // Use the first confirmed K value (they should all be the same if valid)
268            let Ok(k) = TryInto::<usize>::try_into(k_value) else { continue };
269            debug!("Using confirmed K value: {}", k);
270
271            // Extract the tail from position K
272            if k >= init_code.len() {
273                continue;
274            }
275
276            let tail = &init_code[k..];
277
278            // The tail contains [runtime_code][constructor_args]
279            // We need to determine where runtime ends and constructor args begin
280
281            // If we have the deployed code, we can use its length
282            let Some(deployed) = self
283                .contract
284                .evm
285                .as_ref()
286                .and_then(|e| e.deployed_bytecode.as_ref())
287                .and_then(|d| d.bytes())
288            else {
289                continue;
290            };
291
292            let runtime_len = deployed.len();
293            if tail.len() > runtime_len {
294                let constructor_args = &tail[runtime_len..];
295                if self.can_use_as_constructor_args(constructor_args) {
296                    return Some(Bytes::from(constructor_args.to_vec()));
297                }
298            }
299        }
300
301        // Final fallback: assume no constructor args if we can't determine the split
302        warn!("Could not determine runtime/constructor args split in tail");
303        None
304    }
305
306    /// Combine init code with constructor arguments
307    fn get_full_init_code(&self, init_code: &Bytes) -> Option<Bytes> {
308        // Extract constructor arguments using various strategies.
309        // If we cannot extract a valid one, we trust Etherscan.
310        let constructor_args =
311            self.extract_constructor_args(init_code).unwrap_or(self.constructor_args.clone());
312
313        // Simply concatenate recompiled init code with constructor args
314        let Some(recompiled_creation_code) = self.recompiled_contract.get_bytecode_bytes() else {
315            error!("Failed to get recompiled creation code for {}", self.target_address);
316            return None;
317        };
318
319        let mut full_code = recompiled_creation_code.to_vec();
320        full_code.extend_from_slice(&constructor_args);
321
322        debug!(
323            "Created full init code: {} bytes (init: {}, args: {})",
324            full_code.len(),
325            recompiled_creation_code.len(),
326            constructor_args.len()
327        );
328
329        Some(Bytes::from(full_code))
330    }
331}
332
333impl<DB> Inspector<EdbContext<DB>> for TweakInspector<'_>
334where
335    DB: Database + DatabaseCommit + DatabaseRef + Clone,
336    <CacheDB<DB> as Database>::Error: Clone,
337    <DB as Database>::Error: Clone,
338{
339    fn create(
340        &mut self,
341        context: &mut EdbContext<DB>,
342        inputs: &mut CreateInputs,
343    ) -> Option<CreateOutcome> {
344        // Get the nonce from the caller account
345        let account = context.journaled_state.load_account(inputs.caller).ok()?;
346        let nonce = account.info.nonce;
347
348        // Calculate what address would be created using the built-in method
349        let predicted_address = inputs.created_address(nonce);
350
351        debug!(
352            "CREATE intercepted: deployer={:?}, predicted={:?}, target={:?}",
353            inputs.caller, predicted_address, self.target_address
354        );
355
356        // Check if this is our target deployment
357        if predicted_address == self.target_address {
358            info!(
359                "Found target deployment! Replacing init code for address {:?}",
360                self.target_address
361            );
362
363            self.found_target = true;
364
365            // Replace the init code with our custom code + constructor args
366            inputs.init_code = self.get_full_init_code(&inputs.init_code).unwrap_or_default();
367
368            // Force the address to be our target (in case of any calculation differences)
369            // Convert to Custom scheme to ensure the exact address
370            inputs.scheme = CreateScheme::Custom { address: self.target_address };
371        }
372
373        // Continue with normal execution
374        None
375    }
376
377    fn create_end(
378        &mut self,
379        _context: &mut EdbContext<DB>,
380        inputs: &CreateInputs,
381        outcome: &mut CreateOutcome,
382    ) {
383        // Check if this was our target deployment and it succeeded
384        if self.found_target
385            && matches!(inputs.scheme, CreateScheme::Custom { address } if address == self.target_address)
386        {
387            if outcome.result.is_ok() {
388                // Get the deployed bytecode from the context
389                if let Some(created_address) = outcome.address {
390                    if created_address == self.target_address {
391                        // Get deployed code from outcome's output (runtime bytecode)
392                        // self.deployed_code =
393                        //     context.load_account_code(created_address).map(|c| c.data.clone());
394                        self.deployed_code = Some(outcome.result.output.clone());
395                        info!(
396                            "Successfully captured deployed bytecode for {:?}: {} bytes",
397                            self.target_address,
398                            outcome.result.output.len()
399                        );
400                    }
401                }
402            } else {
403                info!(
404                    "Target deployment failed for {:?}: {:?}",
405                    self.target_address, outcome.result
406                );
407            }
408        }
409    }
410}