edb_engine/inspector/
opcode_snapshot_inspector.rs

1// EDB - Ethereum Debugger
2// Copyright (C) 2024 Zhuo Zhang and Wuqi Zhang
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU Affero General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// This program is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU Affero General Public License for more details.
13//
14// You should have received a copy of the GNU Affero General Public License
15// along with this program. If not, see <https://www.gnu.org/licenses/>.
16
17//! Opcode snapshot inspector for recording detailed VM state at each instruction
18//!
19//! This inspector captures instruction-level execution details including:
20//! - Current instruction offset (PC)
21//! - Contract address
22//! - Current opcode  
23//! - Memory state (with Arc sharing for unchanged memory)
24//! - Stack state (always cloned as most opcodes modify it)
25//! - Call data (with Arc sharing across same execution context)
26//!
27//! Memory optimization: Uses Arc to share memory and calldata when unchanged,
28//! reducing memory usage for large execution traces.
29
30use alloy_primitives::{Address, Bytes, U256};
31use edb_common::{
32    types::{ExecutionFrameId, Trace},
33    EdbContext, OpcodeTr,
34};
35use revm::{
36    bytecode::opcode::OpCode,
37    context::{ContextTr, LocalContextTr},
38    database::CacheDB,
39    interpreter::{
40        interpreter_types::{InputsTr, Jumps},
41        CallInputs, CallOutcome, CreateInputs, CreateOutcome, Interpreter,
42    },
43    state::TransientStorage,
44    Database, DatabaseCommit, DatabaseRef, Inspector,
45};
46use serde::{Deserialize, Serialize};
47use std::{
48    borrow::Borrow,
49    collections::{HashMap, HashSet},
50    ops::{Deref, DerefMut},
51    sync::Arc,
52};
53use tracing::error;
54
55/// Single opcode execution snapshot
56#[derive(Debug, Clone, Serialize, Deserialize)]
57pub struct OpcodeSnapshot<DB>
58where
59    DB: Database + DatabaseCommit + DatabaseRef + Clone,
60    <CacheDB<DB> as Database>::Error: Clone,
61    <DB as Database>::Error: Clone,
62{
63    /// Program counter (instruction offset)
64    pub pc: usize,
65    /// Target address that triggered the hook
66    pub target_address: Address,
67    /// Bytecode address that the current snapshot is running
68    pub bytecode_address: Address,
69    /// Current opcode
70    pub opcode: u8,
71    /// Memory state (shared via Arc when unchanged)
72    pub memory: Arc<Vec<u8>>,
73    /// Stack state (always cloned as most opcodes modify it)
74    pub stack: Vec<U256>,
75    /// Call data for this execution context (shared via Arc within same context)
76    pub calldata: Arc<Bytes>,
77    /// Database state (shared via Arc within same context)
78    pub database: Arc<CacheDB<DB>>,
79    /// Transition storage
80    pub transient_storage: Arc<TransientStorage>,
81}
82
83/// Collection of opcode snapshots
84#[derive(Debug, Clone)]
85pub struct OpcodeSnapshots<DB>
86where
87    DB: Database + DatabaseCommit + DatabaseRef + Clone,
88    <CacheDB<DB> as Database>::Error: Clone,
89    <DB as Database>::Error: Clone,
90{
91    inner: HashMap<ExecutionFrameId, Vec<OpcodeSnapshot<DB>>>,
92}
93
94impl<DB> Default for OpcodeSnapshots<DB>
95where
96    DB: Database + DatabaseCommit + DatabaseRef + Clone,
97    <CacheDB<DB> as Database>::Error: Clone,
98    <DB as Database>::Error: Clone,
99{
100    fn default() -> Self {
101        Self { inner: HashMap::new() }
102    }
103}
104
105impl<DB> Deref for OpcodeSnapshots<DB>
106where
107    DB: Database + DatabaseCommit + DatabaseRef + Clone,
108    <CacheDB<DB> as Database>::Error: Clone,
109    <DB as Database>::Error: Clone,
110{
111    type Target = HashMap<ExecutionFrameId, Vec<OpcodeSnapshot<DB>>>;
112
113    fn deref(&self) -> &Self::Target {
114        &self.inner
115    }
116}
117
118impl<DB> DerefMut for OpcodeSnapshots<DB>
119where
120    DB: Database + DatabaseCommit + DatabaseRef + Clone,
121    <CacheDB<DB> as Database>::Error: Clone,
122    <DB as Database>::Error: Clone,
123{
124    fn deref_mut(&mut self) -> &mut Self::Target {
125        &mut self.inner
126    }
127}
128
129/// Frame state tracking for memory optimization
130#[derive(Debug, Clone)]
131struct FrameState {
132    /// Last captured memory state
133    last_memory: Arc<Vec<u8>>,
134    /// Last captured calldata
135    last_calldata: Arc<Bytes>,
136}
137
138/// Inspector that records detailed opcode execution snapshots
139#[derive(Debug)]
140pub struct OpcodeSnapshotInspector<'a, DB>
141where
142    DB: Database + DatabaseCommit + DatabaseRef + Clone,
143    <CacheDB<DB> as Database>::Error: Clone,
144    <DB as Database>::Error: Clone,
145{
146    /// The trace of the current tx
147    trace: &'a Trace,
148
149    /// Map from execution frame ID to list of snapshots
150    pub snapshots: OpcodeSnapshots<DB>,
151
152    /// Set of addresses to exclude from recording (verified source code)
153    pub excluded_addresses: HashSet<Address>,
154
155    /// Stack to track current execution frames
156    frame_stack: Vec<ExecutionFrameId>,
157
158    /// Current trace entry counter (to match with call tracer)
159    current_trace_id: usize,
160
161    /// Frame state for each active frame (for memory optimization)
162    frame_states: HashMap<ExecutionFrameId, FrameState>,
163
164    /// Database context
165    database: Arc<CacheDB<DB>>,
166
167    /// Transition storage
168    transition_storage: Arc<TransientStorage>,
169
170    /// Last opcode
171    last_opcode: Option<OpCode>,
172}
173
174impl<'a, DB> OpcodeSnapshotInspector<'a, DB>
175where
176    DB: Database + DatabaseCommit + DatabaseRef + Clone,
177    <CacheDB<DB> as Database>::Error: Clone,
178    <DB as Database>::Error: Clone,
179{
180    /// Create a new opcode snapshot inspector
181    pub fn new(ctx: &EdbContext<DB>, trace: &'a Trace) -> Self {
182        Self {
183            trace,
184            snapshots: OpcodeSnapshots::<DB>::default(),
185            excluded_addresses: HashSet::new(),
186            frame_stack: Vec::new(),
187            current_trace_id: 0,
188            frame_states: HashMap::new(),
189            database: Arc::new(ctx.db().clone()),
190            transition_storage: Arc::new(TransientStorage::default()),
191            last_opcode: None,
192        }
193    }
194
195    /// Create inspector with excluded addresses
196    pub fn with_excluded_addresses(&mut self, excluded_addresses: HashSet<Address>) {
197        self.excluded_addresses = excluded_addresses;
198    }
199
200    /// Consume the inspector and return the collected snapshots
201    pub fn into_snapshots(self) -> OpcodeSnapshots<DB> {
202        self.snapshots
203    }
204
205    /// Add an address to exclude from recording
206    pub fn exclude_address(&mut self, address: Address) {
207        self.excluded_addresses.insert(address);
208    }
209
210    /// Get the current execution frame ID
211    fn current_frame_id(&self) -> Option<ExecutionFrameId> {
212        self.frame_stack.last().copied()
213    }
214
215    /// Check if we should record steps for the given address
216    fn should_record(&self, address: Address) -> bool {
217        !self.excluded_addresses.contains(&address)
218    }
219
220    /// Update storage
221    fn update_storage(&mut self, _interp: &Interpreter, ctx: &mut EdbContext<DB>) {
222        let Some(last_opcode) = self.last_opcode else { return };
223
224        if last_opcode.modifies_evm_state() {
225            let mut inner = ctx.journal().to_inner();
226            let changes = inner.finalize();
227            let mut snap = ctx.db().clone();
228            snap.commit(changes);
229            self.database = Arc::new(snap);
230        }
231
232        if last_opcode.modifies_transient_storage() {
233            let transient_storage = ctx.journal().transient_storage.clone();
234            self.transition_storage = Arc::new(transient_storage);
235        }
236    }
237
238    /// Record a snapshot at the current step
239    fn record_snapshot(&mut self, interp: &Interpreter, ctx: &mut EdbContext<DB>) {
240        // Get current opcode safely
241        let opcode = unsafe { OpCode::new_unchecked(interp.bytecode.opcode()) };
242
243        // Update last opcode
244        self.last_opcode = Some(opcode);
245
246        // Get current frame
247        let Some(frame_id) = self.current_frame_id() else {
248            return;
249        };
250
251        // Check if we should record for this address
252        let contract_address =
253            interp.input.bytecode_address().cloned().unwrap_or(interp.input.target_address());
254        if !self.should_record(contract_address) {
255            return;
256        }
257
258        let address = interp.input.target_address();
259
260        // Get or create frame state
261        let frame_state = self.frame_states.get(&frame_id);
262
263        // Get memory - reuse Arc if unchanged
264        let memory = if let Some(state) = frame_state {
265            let mem_ref = interp.memory.borrow();
266            let current_memory = mem_ref.context_memory();
267            if current_memory.len() == state.last_memory.len()
268                && &*current_memory == state.last_memory.as_slice()
269            {
270                // Memory unchanged, reuse Arc
271                state.last_memory.clone()
272            } else {
273                // Memory changed, create new Arc
274                Arc::new(current_memory.to_vec())
275            }
276        } else {
277            // First snapshot in frame
278            Arc::new(interp.memory.borrow().context_memory().to_vec())
279        };
280
281        // Get calldata - reuse Arc if in same frame
282        let calldata = if let Some(state) = frame_state {
283            state.last_calldata.clone()
284        } else {
285            // First snapshot in frame, get calldata
286            match interp.input.input() {
287                revm::interpreter::CallInput::SharedBuffer(range) => Arc::new(
288                    ctx.local()
289                        .shared_memory_buffer_slice(range.clone())
290                        .map(|slice| Bytes::from(slice.to_vec()))
291                        .unwrap_or_else(Bytes::new),
292                ),
293                revm::interpreter::CallInput::Bytes(bytes) => Arc::new(bytes.clone()),
294            }
295        };
296
297        // Create snapshot (stack is always cloned as it changes frequently)
298        let entry = self.trace.get(frame_id.trace_entry_id());
299        let snapshot = OpcodeSnapshot {
300            pc: interp.bytecode.pc(),
301            bytecode_address: entry.map(|t| t.code_address).unwrap_or(address),
302            target_address: entry.map(|t| t.target).unwrap_or(address),
303            opcode: opcode.get(),
304            memory: memory.clone(),
305            stack: interp.stack.data().clone(),
306            calldata: calldata.clone(),
307            database: self.database.clone(),
308            transient_storage: self.transition_storage.clone(),
309        };
310
311        // Add to snapshots for this frame
312        self.snapshots.entry(frame_id).or_default().push(snapshot);
313
314        // Update frame state for next snapshot
315        self.frame_states
316            .insert(frame_id, FrameState { last_memory: memory, last_calldata: calldata });
317    }
318
319    /// Start tracking a new execution frame
320    fn push_frame(&mut self, trace_id: usize) {
321        let frame_id = ExecutionFrameId::new(trace_id, 0);
322        self.frame_stack.push(frame_id);
323
324        // Initialize empty snapshot list for this frame if not exists
325        self.snapshots.entry(frame_id).or_default();
326    }
327
328    /// Stop tracking current execution frame and increment re-entry count
329    fn pop_frame(&mut self) -> Option<ExecutionFrameId> {
330        if let Some(frame_id) = self.frame_stack.pop() {
331            // Clean up frame state
332            self.frame_states.remove(&frame_id);
333
334            // Increment re-entry count for parent frame if it exists
335            if let Some(parent_frame_id) = self.frame_stack.last_mut() {
336                parent_frame_id.increment_re_entry();
337            }
338
339            Some(frame_id)
340        } else {
341            None
342        }
343    }
344
345    /// Get all recorded snapshots for a specific frame
346    pub fn get_frame_snapshots(
347        &self,
348        frame_id: ExecutionFrameId,
349    ) -> Option<&Vec<OpcodeSnapshot<DB>>> {
350        self.snapshots.get(&frame_id)
351    }
352
353    /// Get all execution frame IDs that have recorded snapshots
354    pub fn get_recorded_frames(&self) -> Vec<ExecutionFrameId> {
355        self.snapshots.keys().copied().collect()
356    }
357
358    /// Clear all recorded data
359    pub fn clear(&mut self) {
360        self.snapshots.clear();
361        self.frame_stack.clear();
362        self.frame_states.clear();
363        self.current_trace_id = 0;
364    }
365}
366
367impl<'a, DB> Inspector<EdbContext<DB>> for OpcodeSnapshotInspector<'a, DB>
368where
369    DB: Database + DatabaseCommit + DatabaseRef + Clone,
370    <CacheDB<DB> as Database>::Error: Clone,
371    <DB as Database>::Error: Clone,
372{
373    fn step(&mut self, interp: &mut Interpreter, context: &mut EdbContext<DB>) {
374        // Record snapshot BEFORE executing the opcode
375        self.record_snapshot(interp, context);
376    }
377
378    fn step_end(&mut self, interp: &mut Interpreter, context: &mut EdbContext<DB>) {
379        // Record snapshot AFTER executing the opcode
380        self.update_storage(interp, context);
381    }
382
383    fn call(
384        &mut self,
385        _context: &mut EdbContext<DB>,
386        _inputs: &mut CallInputs,
387    ) -> Option<CallOutcome> {
388        // Start tracking new execution frame
389        self.push_frame(self.current_trace_id);
390        self.current_trace_id += 1;
391        None
392    }
393
394    fn call_end(
395        &mut self,
396        _context: &mut EdbContext<DB>,
397        _inputs: &CallInputs,
398        outcome: &mut CallOutcome,
399    ) {
400        // Stop tracking current execution frame
401        let Some(frame_id) = self.pop_frame() else { return };
402
403        let Some(entry) = self.trace.get(frame_id.trace_entry_id()) else { return };
404
405        if entry.result != Some(outcome.into()) {
406            // Mismatch in expected outcome, log error
407            error!(
408                "Call outcome mismatch in frame {:?}: expected {:?}, got {:?}",
409                frame_id, entry.result, outcome
410            );
411        }
412    }
413
414    fn create(
415        &mut self,
416        _context: &mut EdbContext<DB>,
417        _inputs: &mut CreateInputs,
418    ) -> Option<CreateOutcome> {
419        // Start tracking new execution frame for contract creation
420        self.push_frame(self.current_trace_id);
421        self.current_trace_id += 1;
422        None
423    }
424
425    fn create_end(
426        &mut self,
427        _context: &mut EdbContext<DB>,
428        _inputs: &CreateInputs,
429        outcome: &mut CreateOutcome,
430    ) {
431        // Stop tracking current execution frame
432        let Some(frame_id) = self.pop_frame() else { return };
433
434        let Some(entry) = self.trace.get(frame_id.trace_entry_id()) else { return };
435
436        if entry.result != Some(outcome.into()) {
437            // Mismatch in expected outcome, log error
438            error!(
439                "Create outcome mismatch in frame {:?}: expected {:?}, got {:?}",
440                frame_id, entry.result, outcome
441            );
442        }
443    }
444}
445
446/// Pretty printing utilities for debugging
447impl<DB> OpcodeSnapshots<DB>
448where
449    DB: Database + DatabaseCommit + DatabaseRef + Clone,
450    <CacheDB<DB> as Database>::Error: Clone,
451    <DB as Database>::Error: Clone,
452{
453    /// Print comprehensive summary with frame details
454    pub fn print_summary(&self) {
455        println!(
456            "\n\x1b[36m╔══════════════════════════════════════════════════════════════════╗\x1b[0m"
457        );
458        println!(
459            "\x1b[36m║              OPCODE SNAPSHOT INSPECTOR SUMMARY                   ║\x1b[0m"
460        );
461        println!(
462            "\x1b[36m╚══════════════════════════════════════════════════════════════════╝\x1b[0m\n"
463        );
464
465        // Overall statistics
466        let total_frames = self.len();
467        let total_snapshots: usize = self.values().map(|v| v.len()).sum();
468
469        println!("\x1b[33m📊 Overall Statistics:\x1b[0m");
470        println!("  Total frames recorded: \x1b[32m{total_frames}\x1b[0m");
471        println!("  Total snapshots recorded:  \x1b[32m{total_snapshots}\x1b[0m");
472
473        if self.is_empty() {
474            println!("\n\x1b[90m  No opcode snapshots were recorded.\x1b[0m");
475            return;
476        }
477
478        // Calculate memory sharing statistics
479        let mut total_memory_instances = 0;
480        let mut unique_memory_instances = HashSet::new();
481        let mut total_calldata_instances = 0;
482        let mut unique_calldata_instances = HashSet::new();
483
484        for snapshots in self.values() {
485            for snapshot in snapshots {
486                total_memory_instances += 1;
487                unique_memory_instances.insert(Arc::as_ptr(&snapshot.memory) as usize);
488                total_calldata_instances += 1;
489                unique_calldata_instances.insert(Arc::as_ptr(&snapshot.calldata) as usize);
490            }
491        }
492
493        let memory_sharing_ratio = if total_memory_instances > 0 {
494            (total_memory_instances - unique_memory_instances.len()) as f64
495                / total_memory_instances as f64
496                * 100.0
497        } else {
498            0.0
499        };
500
501        let calldata_sharing_ratio = if total_calldata_instances > 0 {
502            (total_calldata_instances - unique_calldata_instances.len()) as f64
503                / total_calldata_instances as f64
504                * 100.0
505        } else {
506            0.0
507        };
508
509        println!("\n\x1b[33m💾 Memory Optimization:\x1b[0m");
510        println!("  Memory - Unique instances: \x1b[32m{}\x1b[0m / Total refs: \x1b[32m{}\x1b[0m (Sharing: \x1b[32m{:.1}%\x1b[0m)", 
511            unique_memory_instances.len(), total_memory_instances, memory_sharing_ratio);
512        println!("  Calldata - Unique instances: \x1b[32m{}\x1b[0m / Total refs: \x1b[32m{}\x1b[0m (Sharing: \x1b[32m{:.1}%\x1b[0m)", 
513            unique_calldata_instances.len(), total_calldata_instances, calldata_sharing_ratio);
514
515        println!("\n\x1b[33m📋 Frame Details:\x1b[0m");
516        println!(
517            "\x1b[90m─────────────────────────────────────────────────────────────────\x1b[0m"
518        );
519
520        // Sort frames for consistent output
521        let mut sorted_frames: Vec<_> = self.iter().collect();
522        sorted_frames
523            .sort_by_key(|(frame_id, _)| (frame_id.trace_entry_id(), frame_id.re_entry_count()));
524
525        for (frame_id, snapshots) in sorted_frames {
526            // Frame header with color coding based on snapshot count
527            let color = if snapshots.is_empty() {
528                "\x1b[90m" // Gray for empty
529            } else if snapshots.len() < 10 {
530                "\x1b[32m" // Green for small
531            } else if snapshots.len() < 100 {
532                "\x1b[33m" // Yellow for medium
533            } else {
534                "\x1b[31m" // Red for large
535            };
536
537            println!(
538                "\n  {}Frame {}\x1b[0m (trace.{}, re-entry {})",
539                color,
540                frame_id,
541                frame_id.trace_entry_id(),
542                frame_id.re_entry_count()
543            );
544            println!("  └─ Snapshots: \x1b[36m{}\x1b[0m", snapshots.len());
545
546            if !snapshots.is_empty() {
547                // Show first few and last few snapshots for context
548                let preview_count = 3.min(snapshots.len());
549
550                // First few snapshots
551                println!("     \x1b[90mFirst {preview_count} snapshots:\x1b[0m");
552                for (i, snapshot) in snapshots.iter().take(preview_count).enumerate() {
553                    self.print_snapshot_line(i, snapshot, "     ");
554                }
555
556                // Last few snapshots if there are more
557                if snapshots.len() > preview_count * 2 {
558                    println!(
559                        "     \x1b[90m... {} more snapshots ...\x1b[0m",
560                        snapshots.len() - preview_count * 2
561                    );
562                    println!("     \x1b[90mLast {preview_count} snapshots:\x1b[0m");
563                    let start_idx = snapshots.len() - preview_count;
564                    for (i, snapshot) in snapshots.iter().skip(start_idx).enumerate() {
565                        self.print_snapshot_line(start_idx + i, snapshot, "     ");
566                    }
567                } else if snapshots.len() > preview_count {
568                    // Show remaining snapshots
569                    for (i, snapshot) in snapshots.iter().skip(preview_count).enumerate() {
570                        self.print_snapshot_line(preview_count + i, snapshot, "     ");
571                    }
572                }
573
574                // Summary stats for this frame
575                let total_memory: usize = snapshots.iter().map(|s| s.memory.len()).sum();
576                let avg_stack_depth: f64 = snapshots.iter().map(|s| s.stack.len()).sum::<usize>()
577                    as f64
578                    / snapshots.len() as f64;
579
580                println!("     \x1b[90m├─ Avg stack depth: {avg_stack_depth:.1}\x1b[0m");
581                println!("     \x1b[90m└─ Total memory used: {total_memory} bytes\x1b[0m");
582            }
583        }
584
585        println!(
586            "\n\x1b[90m─────────────────────────────────────────────────────────────────\x1b[0m"
587        );
588    }
589
590    /// Helper to print a single snapshot line
591    fn print_snapshot_line(&self, index: usize, snapshot: &OpcodeSnapshot<DB>, indent: &str) {
592        let opcode = unsafe { OpCode::new_unchecked(snapshot.opcode) };
593        let opcode_str = opcode.as_str().to_string();
594
595        #[allow(deprecated)]
596        let addr_short = format!("{:?}", snapshot.bytecode_address);
597        let addr_display = if addr_short.len() > 10 {
598            format!("{}...{}", &addr_short[0..6], &addr_short[addr_short.len() - 4..])
599        } else {
600            addr_short
601        };
602
603        println!(
604            "{}  [{:4}] PC={:5} \x1b[94m{:18}\x1b[0m @ \x1b[37m{}\x1b[0m | Stack:{:2} Mem:{:6}B",
605            indent,
606            index,
607            snapshot.pc,
608            opcode_str,
609            addr_display,
610            snapshot.stack.len(),
611            snapshot.memory.len()
612        );
613    }
614}