edb_engine/analysis/
variable.rs

1// EDB - Ethereum Debugger
2// Copyright (C) 2024 Zhuo Zhang and Wuqi Zhang
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU Affero General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// This program is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU Affero General Public License for more details.
13//
14// You should have received a copy of the GNU Affero General Public License
15// along with this program. If not, see <https://www.gnu.org/licenses/>.
16
17//! Variable analysis and representation for Ethereum smart contract analysis.
18//!
19//! This module provides the core data structures and utilities for representing
20//! and tracking variables during smart contract analysis. It includes:
21//!
22//! - **UVID (Universal Variable Identifier)**: A unique identifier system for
23//!   tracking variables across different scopes and contexts
24//! - **Variable**: The main data structure representing a smart contract variable
25//! - **VariableType**: Enumeration of supported Solidity variable types
26//! - **VariableScope**: Structure for managing variable scope information
27//!
28//! The module is designed to work with the broader analysis framework to provide
29//! comprehensive variable tracking and type information during contract analysis.
30
31use delegate::delegate;
32use derive_more::From;
33use foundry_compilers::artifacts::{
34    ast::SourceLocation, Block, ContractDefinition, Expression, ForStatement, FunctionDefinition,
35    ModifierDefinition, SourceUnit, TypeName, UncheckedBlock, VariableDeclaration,
36};
37use once_cell::sync::OnceCell;
38use parking_lot::{RwLock, RwLockReadGuard, RwLockWriteGuard};
39use serde::{Deserialize, Serialize};
40use std::sync::Arc;
41
42use crate::analysis::{macros::universal_id, ContractRef, FunctionRef};
43
44// use crate::{
45//     // Visitor, Walk
46// };
47
48/// The slot where the `edb_runtime_values` mapping is stored.
49///
50/// This constant represents the first 8 bytes of the keccak256 hash of the string
51/// "EDB_RUNTIME_VALUE_OFFSET". It serves as the starting point for UVID generation
52/// to ensure unique identifier spaces across different analysis contexts.
53pub const EDB_RUNTIME_VALUE_OFFSET: u64 = 0x234c6dfc3bf8fed1;
54
55universal_id! {
56    /// A Universal Variable Identifier (UVID) is a unique identifier for a variable in a contract.
57    ///
58    /// UVIDs provide a way to uniquely identify variables across different scopes,
59    /// contexts, and analysis passes. They are used internally by the analysis engine
60    /// to track variable relationships and dependencies.
61    ///
62    /// UVID is also the storage slot that a variable should be stored in storage during debugging. UVID starts from `EDB_RUNTIME_VALUE_OFFSET`.
63    ///
64    /// # Examples
65    ///
66    /// ```rust
67    /// use edb::analysis::variable::{UVID, UVID::next};
68    ///
69    /// let uvid1 = UVID::next();
70    /// let uvid2 = UVID::next();
71    /// assert_ne!(uvid1, uvid2);
72    /// ```
73    UVID => EDB_RUNTIME_VALUE_OFFSET
74}
75
76/// A reference-counted pointer to a Variable.
77///
78/// This type alias provides shared ownership of Variable instances, allowing
79/// multiple parts of the analysis system to reference the same variable
80/// without copying the data.
81#[derive(Clone, derive_more::Debug)]
82#[allow(unused)]
83pub struct VariableRef {
84    inner: Arc<RwLock<Variable>>,
85    /* cached readonly fields*/
86    #[debug(ignore)]
87    name: OnceCell<String>,
88    #[debug(ignore)]
89    declaration: OnceCell<VariableDeclaration>,
90}
91
92impl From<Variable> for VariableRef {
93    fn from(variable: Variable) -> Self {
94        Self::new(variable)
95    }
96}
97
98#[allow(unused)]
99impl VariableRef {
100    /// Creates a new VariableRef from a Variable.
101    pub fn new(inner: Variable) -> Self {
102        Self {
103            inner: Arc::new(RwLock::new(inner)),
104            declaration: OnceCell::new(),
105            name: OnceCell::new(),
106        }
107    }
108
109    pub(crate) fn read(&self) -> RwLockReadGuard<'_, Variable> {
110        self.inner.read()
111    }
112
113    pub(crate) fn write(&self) -> RwLockWriteGuard<'_, Variable> {
114        self.inner.write()
115    }
116
117    /// Returns the unique identifier of this variable.
118    pub fn id(&self) -> UVID {
119        self.inner.read().id()
120    }
121
122    /// Returns the declaration of this variable.
123    pub fn declaration(&self) -> &VariableDeclaration {
124        self.declaration.get_or_init(|| self.inner.read().declaration())
125    }
126
127    /// Returns the type name of this variable.
128    pub fn type_name(&self) -> Option<&TypeName> {
129        self.declaration().type_name.as_ref()
130    }
131
132    /// Returns the base variable of this variable.
133    pub fn base(&self) -> Self {
134        let inner = self.inner.read();
135        if let Some(base) = inner.base() {
136            base
137        } else {
138            self.clone()
139        }
140    }
141
142    /// Returns the function of this variable.
143    pub fn function(&self) -> Option<FunctionRef> {
144        self.inner.read().function()
145    }
146
147    /// Returns the contract of this variable.
148    pub fn contract(&self) -> Option<ContractRef> {
149        self.inner.read().contract()
150    }
151}
152
153impl Serialize for VariableRef {
154    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
155    where
156        S: serde::Serializer,
157    {
158        // Serialize the inner Variable directly
159        self.inner.read().serialize(serializer)
160    }
161}
162
163impl<'de> Deserialize<'de> for VariableRef {
164    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
165    where
166        D: serde::Deserializer<'de>,
167    {
168        // Deserialize as Variable and wrap it in VariableRef
169        let variable = Variable::deserialize(deserializer)?;
170        Ok(Self::new(variable))
171    }
172}
173
174/// Represents a variable in a smart contract with its metadata and type information.
175///
176/// Currently, only local variables are supported.
177///
178/// The Variable struct contains all the information needed to track and analyze
179/// a variable during contract analysis, including its unique identifier, name,
180/// declaration details, type, and scope information.
181///
182/// # Examples
183///
184/// ```rust
185/// use edb::analysis::variable::{Variable, UVID, VariableType, VariableScope};
186/// use foundry_compilers::artifacts::VariableDeclaration;
187///
188/// let variable = Variable {
189///     uvid: UVID(1),
190///     name: "balance".to_string(),
191///     declare: VariableDeclaration::default(),
192///     ty: VariableType::Uint(256),
193///     scope: VariableScope {},
194/// };
195/// ```
196#[derive(Debug, Clone, Serialize, Deserialize)]
197#[non_exhaustive]
198#[allow(clippy::large_enum_variant)]
199pub enum Variable {
200    /// A plain variable with a direct declaration.
201    Plain {
202        /// The unique variable identifier.
203        uvid: UVID,
204        /// The variable declaration from the AST.
205        declaration: VariableDeclaration,
206        /// Whether this is a state variable (true) or local variable (false).
207        state_variable: bool,
208        /// Function that this variable is declared in.
209        function: Option<FunctionRef>,
210        /// Contract that this variable is declared in.
211        contract: Option<ContractRef>,
212    },
213    /// A member access variable (e.g., `obj.field`).
214    Member {
215        /// The base variable being accessed.
216        base: VariableRef,
217        /// The name of the member being accessed.
218        member: String,
219    },
220    /// An array or mapping index access variable (e.g., `arr[index]`).
221    Index {
222        /// The base variable being indexed.
223        base: VariableRef,
224        /// The index expression.
225        index: Expression,
226    },
227    /// An array slice access variable (e.g., `arr[start:end]`).
228    IndexRange {
229        /// The base variable being sliced.
230        base: VariableRef,
231        /// The start index expression.
232        start: Option<Expression>,
233        /// The end index expression.
234        end: Option<Expression>,
235    },
236}
237
238impl Variable {
239    /// Returns the unique identifier of this variable.
240    pub fn id(&self) -> UVID {
241        match self {
242            Self::Plain { uvid, .. } => *uvid,
243            Self::Member { base, .. } => base.read().id(),
244            Self::Index { base, .. } => base.read().id(),
245            Self::IndexRange { base, .. } => base.read().id(),
246        }
247    }
248
249    /// Returns the declaration of this variable.
250    pub fn declaration(&self) -> VariableDeclaration {
251        match self {
252            Self::Plain { declaration, .. } => declaration.clone(),
253            Self::Member { base, .. } => base.read().declaration(),
254            Self::Index { base, .. } => base.read().declaration(),
255            Self::IndexRange { base, .. } => base.read().declaration(),
256        }
257    }
258
259    /// Returns the function of this variable.
260    pub fn function(&self) -> Option<FunctionRef> {
261        match self {
262            Self::Plain { function, .. } => function.clone(),
263            Self::Member { base, .. } => base.read().function(),
264            Self::Index { base, .. } => base.read().function(),
265            Self::IndexRange { base, .. } => base.read().function(),
266        }
267    }
268
269    /// Returns the contract of this variable.
270    pub fn contract(&self) -> Option<ContractRef> {
271        match self {
272            Self::Plain { contract, .. } => contract.clone(),
273            Self::Member { base, .. } => base.read().contract(),
274            Self::Index { base, .. } => base.read().contract(),
275            Self::IndexRange { base, .. } => base.read().contract(),
276        }
277    }
278
279    /// Returns the base variable of this variable.
280    pub fn base(&self) -> Option<VariableRef> {
281        match self {
282            Self::Plain { .. } => None,
283            Self::Member { base, .. }
284            | Self::Index { base, .. }
285            | Self::IndexRange { base, .. } => {
286                if let Some(base) = base.read().base() {
287                    Some(base)
288                } else {
289                    Some(base.clone())
290                }
291            }
292        }
293    }
294
295    /// Returns a human-readable string representation of the variable.
296    ///
297    /// This method provides a concise display format for variables:
298    /// - Plain variables show their declaration name
299    /// - Member access shows `base.member`
300    /// - Index access shows `base[.]`
301    /// - Index range shows `base[..]`
302    pub fn pretty_display(&self) -> String {
303        match self {
304            Self::Plain { declaration, .. } => declaration.name.clone(),
305            Self::Member { base, member } => format!("{}.{}", base.read().pretty_display(), member),
306            Self::Index { base, .. } => format!("{}[.]", base.read().pretty_display()),
307            Self::IndexRange { base, .. } => {
308                format!("{}[..]", base.read().pretty_display())
309            }
310        }
311    }
312}
313
314/// A reference-counted pointer to a VariableScope.
315#[derive(Clone, derive_more::Debug)]
316pub struct VariableScopeRef {
317    inner: Arc<RwLock<VariableScope>>,
318
319    #[debug(ignore)]
320    children: OnceCell<Vec<VariableScopeRef>>,
321    #[debug(ignore)]
322    variables: OnceCell<Vec<VariableRef>>,
323    #[debug(ignore)]
324    variables_recursive: OnceCell<Vec<VariableRef>>,
325}
326
327impl From<VariableScope> for VariableScopeRef {
328    fn from(scope: VariableScope) -> Self {
329        Self::new(scope)
330    }
331}
332
333impl VariableScopeRef {
334    /// Creates a new VariableScopeRef from a VariableScope.
335    pub fn new(inner: VariableScope) -> Self {
336        Self {
337            inner: Arc::new(RwLock::new(inner)),
338            variables_recursive: OnceCell::new(),
339            variables: OnceCell::new(),
340            children: OnceCell::new(),
341        }
342    }
343
344    pub(crate) fn read(&self) -> RwLockReadGuard<'_, VariableScope> {
345        self.inner.read()
346    }
347
348    pub(crate) fn write(&self) -> RwLockWriteGuard<'_, VariableScope> {
349        self.inner.write()
350    }
351}
352
353/* Direct read methods */
354impl VariableScopeRef {
355    delegate! {
356        to self.inner.read() {
357            /// Returns the node ID of the AST node that corresponds to this scope.
358            pub fn ast_id(&self) -> usize;
359            /// Returns the source location of this scope's AST node.
360            pub fn src(&self) -> SourceLocation;
361            /// Returns a human-readable string representation of the scope hierarchy.
362            pub fn pretty_display(&self) -> String;
363        }
364    }
365}
366
367/* Cached read methods */
368impl VariableScopeRef {
369    /// Clears the cached variables and children.
370    pub fn clear_cache(&mut self) {
371        self.variables_recursive.take();
372        self.variables.take();
373        self.children.take();
374    }
375
376    /// Returns the children of this scope.
377    pub fn children(&self) -> &Vec<Self> {
378        self.children.get_or_init(|| self.inner.read().children.clone())
379    }
380
381    /// Returns the variables of this scope.
382    pub fn variables(&self) -> &Vec<VariableRef> {
383        self.variables.get_or_init(|| self.inner.read().variables.clone())
384    }
385
386    /// Returns all variables in this scope and its parent scopes recursively. The variables are cached.
387    pub fn variables_recursive(&self) -> &Vec<VariableRef> {
388        self.variables_recursive.get_or_init(|| {
389            let mut variables = self.variables().clone();
390            variables.extend(
391                self.inner
392                    .read()
393                    .parent
394                    .as_ref()
395                    .map_or(vec![], |parent| parent.variables_recursive().clone()),
396            );
397            variables
398        })
399    }
400}
401
402impl Serialize for VariableScopeRef {
403    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
404    where
405        S: serde::Serializer,
406    {
407        // Serialize the inner VariableScope directly
408        self.inner.read().serialize(serializer)
409    }
410}
411
412impl<'de> Deserialize<'de> for VariableScopeRef {
413    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
414    where
415        D: serde::Deserializer<'de>,
416    {
417        // Deserialize as VariableScope and wrap it in VariableScopeRef
418        let scope = VariableScope::deserialize(deserializer)?;
419        Ok(Self::new(scope))
420    }
421}
422/// Represents the scope and visibility information for a variable.
423///
424/// This structure contains information about where a variable is defined
425/// and how it can be accessed. Currently, this is a placeholder structure
426/// that can be extended with additional scope-related information as needed.
427///
428/// # Future Extensions
429///
430/// This structure may be extended to include:
431/// - Function scope information
432/// - Contract scope information
433/// - Visibility modifiers (public, private, internal, external)
434/// - Storage location (storage, memory, calldata)
435#[derive(Clone, Serialize, Deserialize, derive_more::Debug)]
436#[non_exhaustive]
437pub struct VariableScope {
438    /// The AST node that defines this scope
439    pub node: ScopeNode,
440    /// Variables declared in this scope, mapped by their UVID
441    pub variables: Vec<VariableRef>,
442    /// Parent scope
443    pub parent: Option<VariableScopeRef>,
444    /// Child scopes contained within this scope
445    pub children: Vec<VariableScopeRef>,
446}
447
448impl VariableScope {
449    /// Returns the unique identifier of this scope, i.e., the node ID of the AST node that corresponds to this scope.
450    pub fn ast_id(&self) -> usize {
451        self.node.ast_id()
452    }
453
454    /// Returns the source location of this scope's AST node.
455    pub fn src(&self) -> SourceLocation {
456        self.node.src()
457    }
458
459    /// Returns all variables in this scope and its parent scopes recursively. The variables are not cached.
460    pub fn variables_recursive(&self) -> Vec<VariableRef> {
461        let mut variables = self.variables.clone();
462        variables.extend(
463            self.parent.clone().map_or(vec![], |parent| parent.read().variables_recursive()),
464        );
465        variables
466    }
467
468    /// Returns a human-readable string representation of the scope hierarchy.
469    ///
470    /// This method displays the scope and all its child scopes in a tree-like format,
471    /// showing the variables contained in each scope.
472    pub fn pretty_display(&self) -> String {
473        self.pretty_display_with_indent(0)
474    }
475
476    fn pretty_display_with_indent(&self, indent_level: usize) -> String {
477        let mut result = String::new();
478        let indent = "  ".repeat(indent_level);
479
480        // Print current scope's variables
481        if self.variables.is_empty() {
482            result.push_str(&format!("{}Scope({}): {{}}", indent, self.node.variant_name()));
483        } else {
484            let mut variable_names: Vec<String> =
485                self.variables.iter().map(|var| var.read().pretty_display()).collect();
486            variable_names.sort(); // Sort for consistent output
487            result.push_str(&format!(
488                "{}Scope({}): {{{}}}",
489                indent,
490                self.node.variant_name(),
491                variable_names.join(", ")
492            ));
493        }
494
495        // Print children scopes recursively with increased indentation
496        for child in &self.children {
497            result.push('\n');
498            result.push_str(&child.read().pretty_display_with_indent(indent_level + 1));
499        }
500
501        result
502    }
503}
504
505/// Represents the type of a smart contract variable.
506///
507/// This enum covers the basic Solidity types that are commonly used in
508/// smart contract analysis. The types are designed to be extensible for
509/// future additions.
510///
511/// # Examples
512///
513/// ```rust
514/// use edb::analysis::variable::VariableType;
515///
516/// let uint_type = VariableType::Uint(256);
517/// let address_type = VariableType::Address;
518/// let bool_type = VariableType::Bool;
519/// ```
520#[derive(Debug, Clone)]
521#[non_exhaustive]
522pub enum VariableType {
523    /// A `uint` type variable. The number of bits is specified by the parameter.
524    ///
525    /// For instance, `Uint(8)` denotes a `uint8` Solidity type, while `Uint(256)`
526    /// represents a `uint256` (the default uint type in Solidity).
527    Uint(u8),
528    /// An `address` type variable representing an Ethereum address.
529    ///
530    /// This type is used for variables that store 20-byte Ethereum addresses.
531    Address,
532    /// A `bool` type variable representing a boolean value.
533    ///
534    /// This type is used for variables that can be either `true` or `false`.
535    Bool,
536}
537
538/// Represents different types of AST nodes that can define variable scopes.
539///
540/// This enum wraps various Solidity AST node types that create new variable scopes,
541/// allowing the variable analyzer to track scope boundaries and variable visibility.
542#[derive(Debug, Clone, From, Serialize, Deserialize)]
543#[allow(clippy::large_enum_variant)]
544pub enum ScopeNode {
545    /// A source unit scope (file-level).
546    SourceUnit(#[from] SourceUnit),
547    /// A block statement scope.
548    Block(#[from] Block),
549    /// An unchecked block scope.
550    UncheckedBlock(#[from] UncheckedBlock),
551    /// A for loop scope.
552    ForStatement(#[from] ForStatement),
553    /// A contract definition scope.
554    ContractDefinition(#[from] ContractDefinition),
555    /// A function definition scope.
556    FunctionDefinition(#[from] FunctionDefinition),
557    /// A modifier definition scope.
558    ModifierDefinition(#[from] ModifierDefinition),
559}
560
561impl ScopeNode {
562    /// Returns the node ID of the AST node.
563    pub fn ast_id(&self) -> usize {
564        match self {
565            Self::SourceUnit(source_unit) => source_unit.id,
566            Self::Block(block) => block.id,
567            Self::UncheckedBlock(unchecked_block) => unchecked_block.id,
568            Self::ForStatement(for_statement) => for_statement.id,
569            Self::ContractDefinition(contract_definition) => contract_definition.id,
570            Self::FunctionDefinition(function_definition) => function_definition.id,
571            Self::ModifierDefinition(modifier_definition) => modifier_definition.id,
572        }
573    }
574
575    /// Returns the source location of the wrapped AST node.
576    pub fn src(&self) -> SourceLocation {
577        match self {
578            Self::SourceUnit(source_unit) => source_unit.src,
579            Self::Block(block) => block.src,
580            Self::UncheckedBlock(unchecked_block) => unchecked_block.src,
581            Self::ForStatement(for_statement) => for_statement.src,
582            Self::ContractDefinition(contract_definition) => contract_definition.src,
583            Self::FunctionDefinition(function_definition) => function_definition.src,
584            Self::ModifierDefinition(modifier_definition) => modifier_definition.src,
585        }
586    }
587
588    /// Returns a string representation of the scope node variant name.
589    pub fn variant_name(&self) -> &'static str {
590        match self {
591            Self::SourceUnit(_) => "SourceUnit",
592            Self::Block(_) => "Block",
593            Self::UncheckedBlock(_) => "UncheckedBlock",
594            Self::ForStatement(_) => "ForStatement",
595            Self::ContractDefinition(_) => "ContractDefinition",
596            Self::FunctionDefinition(_) => "FunctionDefinition",
597            Self::ModifierDefinition(_) => "ModifierDefinition",
598        }
599    }
600}