edb_engine/snapshot/
analysis.rs

1// EDB - Ethereum Debugger
2// Copyright (C) 2024 Zhuo Zhang and Wuqi Zhang
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU Affero General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// This program is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU Affero General Public License for more details.
13//
14// You should have received a copy of the GNU Affero General Public License
15// along with this program. If not, see <https://www.gnu.org/licenses/>.
16
17//! Snapshot analysis for navigation and call stack tracking.
18//!
19//! This module provides sophisticated analysis capabilities for snapshots to enable
20//! efficient debugging navigation. It analyzes execution flows, call hierarchies,
21//! and snapshot relationships to support step-over, step-into, and step-out operations.
22//!
23//! # Core Analysis Features
24//!
25//! ## Next/Previous Step Analysis
26//! - **Step Navigation**: Determines next/previous snapshots for debugging navigation
27//! - **Call Stack Tracking**: Maintains function call hierarchy across snapshots
28//! - **Return Analysis**: Identifies function returns and modifier exits
29//! - **Hole Detection**: Handles gaps in snapshot coverage for complex call patterns
30//!
31//! ## Source-Level Analysis
32//! The analysis engine provides specialized handling for source-level (hook-based) snapshots:
33//! - **Function Entry/Exit Tracking**: Identifies function and modifier boundaries
34//! - **Internal Call Detection**: Recognizes internal function calls and library calls
35//! - **Call Stack Management**: Maintains accurate call stack state across snapshots
36//! - **Return Chain Analysis**: Handles complex return patterns through nested calls
37//!
38//! ## Opcode-Level Analysis
39//! For opcode snapshots, the analysis provides:
40//! - **Sequential Linking**: Links consecutive opcode snapshots within execution frames
41//! - **Frame Boundary Detection**: Identifies transitions between execution frames
42//! - **Fallback Coverage**: Ensures complete navigation even without source-level analysis
43//!
44//! # Navigation Support
45//!
46//! The analysis results enable sophisticated debugging operations:
47//! - **Step Over**: Navigate to the next snapshot in the same scope
48//! - **Step Into**: Enter function calls when available
49//! - **Step Out**: Exit current function to parent scope
50//! - **Reverse Navigation**: Support for backwards debugging through previous snapshots
51
52use std::collections::{HashMap, HashSet};
53
54use alloy_primitives::Address;
55use edb_common::types::{ExecutionFrameId, Trace};
56use eyre::Result;
57use itertools::Itertools;
58use revm::{database::CacheDB, Database, DatabaseCommit, DatabaseRef};
59use tracing::{debug, error, warn};
60
61use crate::{
62    analysis::{AnalysisResult, StepRef, UFID},
63    Snapshot, Snapshots,
64};
65
66/// Trait for analyzing snapshots to enable debugging navigation.
67///
68/// This trait provides the interface for analyzing snapshot collections to determine
69/// navigation relationships, call stack hierarchies, and execution flow patterns.
70pub trait SnapshotAnalysis {
71    /// Analyze snapshots using execution trace and source code analysis results.
72    ///
73    /// This method processes the snapshot collection to establish navigation relationships,
74    /// call stack hierarchies, and execution flow patterns based on the execution trace
75    /// and source code analysis results.
76    fn analyze(&mut self, trace: &Trace, analysis: &HashMap<Address, AnalysisResult>)
77        -> Result<()>;
78}
79
80impl<DB> SnapshotAnalysis for Snapshots<DB>
81where
82    DB: Database + DatabaseCommit + DatabaseRef + Clone,
83    <CacheDB<DB> as Database>::Error: Clone,
84    <DB as Database>::Error: Clone,
85{
86    fn analyze(
87        &mut self,
88        trace: &Trace,
89        analysis: &HashMap<Address, AnalysisResult>,
90    ) -> Result<()> {
91        self.analyze_next_steps(trace, analysis)
92    }
93}
94
95// Implementation of snapshot analysis for next/previous step navigation
96impl<DB> Snapshots<DB>
97where
98    DB: Database + DatabaseCommit + DatabaseRef + Clone,
99    <CacheDB<DB> as Database>::Error: Clone,
100    <DB as Database>::Error: Clone,
101{
102    /// Analyze next step relationships for debugging navigation.
103    ///
104    /// This method processes all snapshots to determine their next/previous relationships,
105    /// handling both opcode-level and source-level analysis with call stack tracking.
106    fn analyze_next_steps(
107        &mut self,
108        trace: &Trace,
109        analysis: &HashMap<Address, AnalysisResult>,
110    ) -> Result<()> {
111        let mut holed_snapshots: HashSet<usize> = HashSet::new();
112
113        for (entry_id, snapshots) in &self
114            .iter_mut()
115            .sorted_by_key(|(f_id, _)| f_id.trace_entry_id())
116            .chunk_by(|(f_id, _)| f_id.trace_entry_id())
117        {
118            let snapshots_vec: Vec<_> = snapshots.collect();
119
120            // The last snapshot of a entry will become holed
121            let Some((_, last_snapshot)) = snapshots_vec.last() else {
122                continue;
123            };
124            holed_snapshots.insert(last_snapshot.id());
125
126            if last_snapshot.is_opcode() {
127                Self::analyze_next_steps_for_opcode(snapshots_vec)?;
128            } else {
129                let bytecode_address = trace
130                    .get(entry_id)
131                    .ok_or_else(|| {
132                        eyre::eyre!("Trace entry {} not found for snapshot analysis", entry_id)
133                    })?
134                    .code_address;
135                let analysis_result = analysis.get(&bytecode_address).ok_or_else(|| {
136                    eyre::eyre!(
137                        "Analysis result not found for bytecode address {}",
138                        bytecode_address
139                    )
140                })?;
141                Self::analyze_next_steps_for_source(
142                    snapshots_vec,
143                    &mut holed_snapshots,
144                    analysis_result,
145                )?;
146            }
147        }
148
149        // Handle holed snapshots
150        self.find_next_step_for_holed_snapshots(trace, holed_snapshots)?;
151        self.analyze_prev_steps()?;
152
153        Ok(())
154    }
155
156    /// Analyze previous step relationships for reverse navigation.
157    ///
158    /// This method establishes the previous step links for all snapshots,
159    /// enabling backwards debugging navigation through the execution history.
160    fn analyze_prev_steps(&mut self) -> Result<()> {
161        for i in 0..self.len() {
162            let current_snapshot = &self[i].1;
163            let current_id = current_snapshot.id();
164            let next_id = current_snapshot
165                .next_id()
166                .ok_or_else(|| eyre::eyre!("Snapshot {} does not have next_id set", current_id))?;
167
168            let next_snapshot = &mut self[next_id].1;
169            if next_snapshot.prev_id().is_none() {
170                // The first snapshot whose next_id is the subject one, will be the prev_id
171                // of the current snapshot
172                next_snapshot.set_prev_id(current_id);
173            }
174        }
175
176        self.iter_mut().filter(|(_, snapshot)| snapshot.prev_id().is_none()).for_each(
177            |(_, snapshot)| {
178                snapshot.set_prev_id(snapshot.id().saturating_sub(1));
179            },
180        );
181
182        Ok(())
183    }
184
185    /// Find next steps for holed snapshots that don't have direct successors.
186    ///
187    /// This method handles snapshots that represent the end of execution frames
188    /// by finding their next steps in ancestor frames, ensuring complete navigation
189    /// coverage even for complex call patterns.
190    fn find_next_step_for_holed_snapshots(
191        &mut self,
192        trace: &Trace,
193        holed_snapshots: HashSet<usize>,
194    ) -> Result<()> {
195        let last_snapshot_id = self.len().saturating_sub(1);
196
197        // Handle holed snapshots
198        for current_id in holed_snapshots {
199            let entry_id = self[current_id].0.trace_entry_id();
200
201            // Try to find the first snapshot in the ancestor frames that is after the current snapshot
202            let mut next_id = None;
203            let mut entry = trace
204                .get(entry_id)
205                .ok_or_else(|| eyre::eyre!("Trace entry {} not found", entry_id))?;
206            while let Some(parent_id) = entry.parent_id {
207                // Find the first snapshot in the parent frame that is after the current snapshot
208                if let Some((_, parent_snapshot)) = self
209                    .iter()
210                    .skip(current_id.saturating_add(1))
211                    .find(|(f_id, _)| f_id.trace_entry_id() == parent_id)
212                {
213                    next_id = Some(parent_snapshot.id());
214                    break;
215                }
216
217                // Move up to the next parent
218                entry = trace
219                    .get(parent_id)
220                    .ok_or_else(|| eyre::eyre!("Trace entry {} not found", parent_id))?;
221            }
222
223            self[current_id].1.set_next_id(next_id.unwrap_or(last_snapshot_id));
224        }
225
226        Ok(())
227    }
228
229    /// Analyze next steps for opcode-level snapshots.
230    ///
231    /// For opcode snapshots, the analysis is straightforward: link each snapshot
232    /// to the next one in the same execution frame, providing sequential navigation.
233    fn analyze_next_steps_for_opcode(
234        mut snapshots: Vec<&mut (ExecutionFrameId, Snapshot<DB>)>,
235    ) -> Result<()> {
236        for i in 0..snapshots.len().saturating_sub(1) {
237            let (_, next_snapshot) = &snapshots[i + 1];
238            let next_id = next_snapshot.id();
239
240            // Link to the next snapshot in the same frame
241            let (_, current_snapshot) = &mut snapshots[i];
242            current_snapshot.set_next_id(next_id);
243        }
244
245        Ok(())
246    }
247
248    /// Analyze next steps for source-level (hook-based) snapshots.
249    ///
250    /// This is the most complex part of snapshot analysis, handling function calls,
251    /// returns, modifier executions, and call stack management to provide accurate
252    /// navigation for source-level debugging.
253    fn analyze_next_steps_for_source(
254        mut snapshots: Vec<&mut (ExecutionFrameId, Snapshot<DB>)>,
255        holed_snapshots: &mut HashSet<usize>,
256        analysis: &AnalysisResult,
257    ) -> Result<()> {
258        if snapshots.len() <= 1 {
259            return Ok(());
260        }
261
262        let mut stack: Vec<CallStackEntry> = Vec::new();
263        stack.push(CallStackEntry {
264            func_info: FunctionInfo::Unknown,
265            callsite: None,
266            return_after_callsite: false,
267        });
268
269        let is_entry =
270            |step: &StepRef| step.function_entry().is_some() || step.modifier_entry().is_some();
271
272        for i in 0..snapshots.len().saturating_sub(1) {
273            let usid = snapshots[i]
274                .1
275                .usid()
276                .ok_or_else(|| eyre::eyre!("Snapshot {} does not have usid set", i))?;
277            let step = analysis
278                .usid_to_step
279                .get(&usid)
280                .ok_or_else(|| eyre::eyre!("No step found for USID {}", u64::from(usid)))?;
281            let ufid = step.ufid();
282            let contract = analysis.ufid_to_function.get(&ufid).and_then(|f| f.contract());
283
284            let next_id = snapshots[i + 1].1.id();
285            let next_usid = snapshots[i + 1]
286                .1
287                .usid()
288                .ok_or_else(|| eyre::eyre!("Snapshot {} does not have usid set", i + 1))?;
289            let next_step = analysis
290                .usid_to_step
291                .get(&next_usid)
292                .ok_or_else(|| eyre::eyre!("No step found for USID {}", u64::from(next_usid)))?;
293            let next_ufid = next_step.ufid();
294            let next_contract =
295                analysis.ufid_to_function.get(&next_ufid).and_then(|f| f.contract());
296
297            // Step 0: To avoid annoying crash, we always add a placeholder stack entry
298            if stack.is_empty() {
299                error!("Call stack is empty at Snapshot {} (step 0)", snapshots[i].1.id());
300                stack.push(CallStackEntry {
301                    func_info: FunctionInfo::Unknown,
302                    callsite: None,
303                    return_after_callsite: false,
304                });
305            }
306
307            // Step 1: try to update the current function entry
308            let stack_entry = stack.last_mut().ok_or_else(|| {
309                eyre::eyre!("Call stack is empty at step 1 (which is impossible)")
310            })?;
311            if let Some(ufid) = step.function_entry() {
312                stack_entry.func_info.with_function(ufid);
313            }
314            if let Some(ufid) = step.modifier_entry() {
315                stack_entry.func_info.with_modifier(ufid);
316            }
317            if !stack_entry.func_info.is_valid() {
318                // We output the error but do not stop here.
319                error!("Invalid function info in call stack");
320            }
321
322            // Step 2: check whether this step contains any valid internal call
323            //  a) The step contains internal calls
324            if step.function_calls()
325                > snapshots[i + 1].0.re_entry_count() - snapshots[i].0.re_entry_count()
326                && is_entry(next_step)
327            {
328                // This step contains at least one valid internal call
329                stack.push(CallStackEntry {
330                    func_info: FunctionInfo::Unknown,
331                    callsite: Some(Callsite {
332                        id: i,
333                        callees: step.function_calls()
334                            - (snapshots[i + 1].0.re_entry_count()
335                                - snapshots[i].0.re_entry_count()),
336                    }),
337                    return_after_callsite: step.contains_return(),
338                });
339                continue;
340            }
341            // b) There is overrided operation (e.g., `+`)
342            if is_entry(next_step) && contract.is_some() && next_contract.is_none() {
343                // This is likely an internal call to a library function
344                warn!(
345                    "Assuming an internal call to a library function at Snapshot {}",
346                    snapshots[i].1.id()
347                );
348                stack.push(CallStackEntry {
349                    func_info: FunctionInfo::Unknown,
350                    callsite: Some(Callsite { id: i, callees: usize::MAX }),
351                    return_after_callsite: step.contains_return(),
352                });
353                continue;
354            }
355
356            // Step 3: update next id since we are certain for steps without internal calls
357            snapshots[i].1.set_next_id(next_id);
358
359            // Step 4: check return
360            let Some(stack_entry) = stack.last() else {
361                error!("Call stack is empty at Snapshot {} (step 4)", snapshots[i].1.id());
362                continue;
363            };
364
365            let will_return = match &stack_entry.func_info {
366                FunctionInfo::FunctionOnly(..) | FunctionInfo::ModifiedFunction { .. } => {
367                    !stack_entry.func_info.contains_ufid(next_ufid) || is_entry(next_step)
368                }
369                FunctionInfo::ModifierOnly(..) => {
370                    !(stack_entry.func_info.contains_ufid(next_ufid) || is_entry(next_step))
371                }
372                _ => !is_entry(next_step),
373            } || step.contains_return();
374            if !will_return {
375                // There is nothing we need to do if this step will not return
376                continue;
377            }
378
379            // Step 5: handle returning chain
380            loop {
381                let Some(mut stack_entry) = stack.pop() else {
382                    error!("Call stack is empty at Snapshot {} (step 5)", snapshots[i].1.id());
383                    break;
384                };
385
386                if stack_entry.callsite.is_none() {
387                    // We have returned from the top level, nothing more to do
388                    break;
389                }
390
391                // We have finished one call
392                let callsite = stack_entry.callsite.as_mut().unwrap();
393                callsite.callees = callsite.callees.saturating_sub(1);
394
395                let Some(parent_entry) = stack.last() else {
396                    // We have returned from the top level, nothing more to do
397                    break;
398                };
399
400                // Check whether we are done with this callsite (callsite_certainly_done has higher priority)
401                let callsite_certainly_done =
402                    parent_entry.func_info.contains_ufid(next_ufid) && !is_entry(next_step);
403                let callsite_certainly_not_done = next_contract.is_none(); // We are still in free functions
404                if (callsite.callees > 0 || callsite_certainly_not_done) && !callsite_certainly_done
405                {
406                    stack_entry.func_info = FunctionInfo::Unknown;
407                    stack.push(stack_entry);
408                    break;
409                }
410
411                let continue_to_return = match &parent_entry.func_info {
412                    FunctionInfo::FunctionOnly(..) | FunctionInfo::ModifiedFunction { .. } => {
413                        !parent_entry.func_info.contains_ufid(next_ufid) || is_entry(next_step)
414                    }
415                    FunctionInfo::ModifierOnly(..) => {
416                        !(parent_entry.func_info.contains_ufid(next_ufid) || is_entry(next_step))
417                    }
418                    _ => !is_entry(next_step),
419                } || stack_entry.return_after_callsite;
420
421                // We can confidently update the snapshot
422                snapshots[callsite.id].1.set_next_id(next_id);
423
424                if !continue_to_return {
425                    break;
426                }
427            }
428        }
429
430        while let Some(CallStackEntry { callsite: Some(Callsite { id, .. }), .. }) = stack.pop() {
431            debug!("Add snapshot as a hole: {}", snapshots[id].1.id());
432            holed_snapshots.insert(snapshots[id].1.id());
433        }
434
435        Ok(())
436    }
437}
438
439#[derive(Debug)]
440struct CallStackEntry {
441    func_info: FunctionInfo,
442    callsite: Option<Callsite>,
443    return_after_callsite: bool,
444}
445
446#[derive(Debug)]
447struct Callsite {
448    // The id in the snapshot
449    // NOTE: it is not snapshot id
450    id: usize,
451    // Number of callees that we haven't visited
452    callees: usize,
453}
454
455#[derive(Debug)]
456enum FunctionInfo {
457    Unknown,
458    ModifierOnly(Vec<UFID>),
459    FunctionOnly(UFID),
460    ModifiedFunction { func: UFID, modifiers: Vec<UFID> },
461    Invalid,
462}
463
464impl FunctionInfo {
465    fn contains_ufid(&self, ufid: UFID) -> bool {
466        match self {
467            Self::Unknown => false,
468            Self::ModifierOnly(ids) => ids.contains(&ufid),
469            Self::FunctionOnly(id) => *id == ufid,
470            Self::ModifiedFunction { func, modifiers } => {
471                *func == ufid || modifiers.contains(&ufid)
472            }
473            Self::Invalid => false,
474        }
475    }
476
477    fn is_valid(&self) -> bool {
478        !matches!(self, Self::Invalid)
479    }
480
481    fn _certainly_in_body(&self) -> bool {
482        matches!(self, Self::FunctionOnly(..) | Self::ModifiedFunction { .. })
483    }
484
485    fn with_modifier(&mut self, modifier: UFID) {
486        match self {
487            Self::Unknown => {
488                *self = Self::ModifierOnly(vec![modifier]);
489            }
490            Self::ModifierOnly(ids) => {
491                ids.push(modifier);
492            }
493            Self::FunctionOnly(func) => {
494                *self = Self::ModifiedFunction { func: *func, modifiers: vec![modifier] };
495            }
496            Self::ModifiedFunction { modifiers, .. } => {
497                modifiers.push(modifier);
498            }
499            Self::Invalid => {}
500        }
501    }
502
503    fn with_function(&mut self, function: UFID) {
504        match self {
505            Self::Unknown => *self = Self::FunctionOnly(function),
506            Self::ModifierOnly(ids) => {
507                *self = Self::ModifiedFunction { func: function, modifiers: ids.clone() }
508            }
509            Self::FunctionOnly(..) => *self = Self::Invalid,
510            Self::ModifiedFunction { .. } => *self = Self::Invalid,
511            Self::Invalid => {}
512        }
513    }
514}