edb_engine/analysis/variable.rs
1// EDB - Ethereum Debugger
2// Copyright (C) 2024 Zhuo Zhang and Wuqi Zhang
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU Affero General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// This program is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU Affero General Public License for more details.
13//
14// You should have received a copy of the GNU Affero General Public License
15// along with this program. If not, see <https://www.gnu.org/licenses/>.
16
17//! Variable analysis and representation for Ethereum smart contract analysis.
18//!
19//! This module provides the core data structures and utilities for representing
20//! and tracking variables during smart contract analysis. It includes:
21//!
22//! - **UVID (Universal Variable Identifier)**: A unique identifier system for
23//! tracking variables across different scopes and contexts
24//! - **Variable**: The main data structure representing a smart contract variable
25//! - **VariableType**: Enumeration of supported Solidity variable types
26//! - **VariableScope**: Structure for managing variable scope information
27//!
28//! The module is designed to work with the broader analysis framework to provide
29//! comprehensive variable tracking and type information during contract analysis.
30
31use delegate::delegate;
32use derive_more::From;
33use foundry_compilers::artifacts::{
34 ast::SourceLocation, Block, ContractDefinition, Expression, ForStatement, FunctionDefinition,
35 ModifierDefinition, SourceUnit, TypeName, UncheckedBlock, VariableDeclaration,
36};
37use once_cell::sync::OnceCell;
38use parking_lot::{RwLock, RwLockReadGuard, RwLockWriteGuard};
39use serde::{Deserialize, Serialize};
40use std::sync::Arc;
41
42use crate::analysis::{macros::universal_id, ContractRef, FunctionRef};
43
44// use crate::{
45// // Visitor, Walk
46// };
47
48/// The slot where the `edb_runtime_values` mapping is stored.
49///
50/// This constant represents the first 8 bytes of the keccak256 hash of the string
51/// "EDB_RUNTIME_VALUE_OFFSET". It serves as the starting point for UVID generation
52/// to ensure unique identifier spaces across different analysis contexts.
53pub const EDB_RUNTIME_VALUE_OFFSET: u64 = 0x234c6dfc3bf8fed1;
54
55universal_id! {
56 /// A Universal Variable Identifier (UVID) is a unique identifier for a variable in a contract.
57 ///
58 /// UVIDs provide a way to uniquely identify variables across different scopes,
59 /// contexts, and analysis passes. They are used internally by the analysis engine
60 /// to track variable relationships and dependencies.
61 ///
62 /// UVID is also the storage slot that a variable should be stored in storage during debugging. UVID starts from `EDB_RUNTIME_VALUE_OFFSET`.
63 ///
64 /// # Examples
65 ///
66 /// ```rust
67 /// use edb::analysis::variable::{UVID, UVID::next};
68 ///
69 /// let uvid1 = UVID::next();
70 /// let uvid2 = UVID::next();
71 /// assert_ne!(uvid1, uvid2);
72 /// ```
73 UVID => EDB_RUNTIME_VALUE_OFFSET
74}
75
76/// A reference-counted pointer to a Variable.
77///
78/// This type alias provides shared ownership of Variable instances, allowing
79/// multiple parts of the analysis system to reference the same variable
80/// without copying the data.
81#[derive(Clone, derive_more::Debug)]
82#[allow(unused)]
83pub struct VariableRef {
84 inner: Arc<RwLock<Variable>>,
85 /* cached readonly fields*/
86 #[debug(ignore)]
87 name: OnceCell<String>,
88 #[debug(ignore)]
89 declaration: OnceCell<VariableDeclaration>,
90}
91
92impl From<Variable> for VariableRef {
93 fn from(variable: Variable) -> Self {
94 Self::new(variable)
95 }
96}
97
98#[allow(unused)]
99impl VariableRef {
100 /// Creates a new VariableRef from a Variable.
101 pub fn new(inner: Variable) -> Self {
102 Self {
103 inner: Arc::new(RwLock::new(inner)),
104 declaration: OnceCell::new(),
105 name: OnceCell::new(),
106 }
107 }
108
109 pub(crate) fn read(&self) -> RwLockReadGuard<'_, Variable> {
110 self.inner.read()
111 }
112
113 pub(crate) fn write(&self) -> RwLockWriteGuard<'_, Variable> {
114 self.inner.write()
115 }
116
117 /// Returns the unique identifier of this variable.
118 pub fn id(&self) -> UVID {
119 self.inner.read().id()
120 }
121
122 /// Returns the declaration of this variable.
123 pub fn declaration(&self) -> &VariableDeclaration {
124 self.declaration.get_or_init(|| self.inner.read().declaration())
125 }
126
127 /// Returns the type name of this variable.
128 pub fn type_name(&self) -> Option<&TypeName> {
129 self.declaration().type_name.as_ref()
130 }
131
132 /// Returns the base variable of this variable.
133 pub fn base(&self) -> Self {
134 let inner = self.inner.read();
135 if let Some(base) = inner.base() {
136 base
137 } else {
138 self.clone()
139 }
140 }
141
142 /// Returns the function of this variable.
143 pub fn function(&self) -> Option<FunctionRef> {
144 self.inner.read().function()
145 }
146
147 /// Returns the contract of this variable.
148 pub fn contract(&self) -> Option<ContractRef> {
149 self.inner.read().contract()
150 }
151}
152
153impl Serialize for VariableRef {
154 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
155 where
156 S: serde::Serializer,
157 {
158 // Serialize the inner Variable directly
159 self.inner.read().serialize(serializer)
160 }
161}
162
163impl<'de> Deserialize<'de> for VariableRef {
164 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
165 where
166 D: serde::Deserializer<'de>,
167 {
168 // Deserialize as Variable and wrap it in VariableRef
169 let variable = Variable::deserialize(deserializer)?;
170 Ok(Self::new(variable))
171 }
172}
173
174/// Represents a variable in a smart contract with its metadata and type information.
175///
176/// Currently, only local variables are supported.
177///
178/// The Variable struct contains all the information needed to track and analyze
179/// a variable during contract analysis, including its unique identifier, name,
180/// declaration details, type, and scope information.
181///
182/// # Examples
183///
184/// ```rust
185/// use edb::analysis::variable::{Variable, UVID, VariableType, VariableScope};
186/// use foundry_compilers::artifacts::VariableDeclaration;
187///
188/// let variable = Variable {
189/// uvid: UVID(1),
190/// name: "balance".to_string(),
191/// declare: VariableDeclaration::default(),
192/// ty: VariableType::Uint(256),
193/// scope: VariableScope {},
194/// };
195/// ```
196#[derive(Debug, Clone, Serialize, Deserialize)]
197#[non_exhaustive]
198#[allow(clippy::large_enum_variant)]
199pub enum Variable {
200 /// A plain variable with a direct declaration.
201 Plain {
202 /// The unique variable identifier.
203 uvid: UVID,
204 /// The variable declaration from the AST.
205 declaration: VariableDeclaration,
206 /// Whether this is a state variable (true) or local variable (false).
207 state_variable: bool,
208 /// Function that this variable is declared in.
209 function: Option<FunctionRef>,
210 /// Contract that this variable is declared in.
211 contract: Option<ContractRef>,
212 },
213 /// A member access variable (e.g., `obj.field`).
214 Member {
215 /// The base variable being accessed.
216 base: VariableRef,
217 /// The name of the member being accessed.
218 member: String,
219 },
220 /// An array or mapping index access variable (e.g., `arr[index]`).
221 Index {
222 /// The base variable being indexed.
223 base: VariableRef,
224 /// The index expression.
225 index: Expression,
226 },
227 /// An array slice access variable (e.g., `arr[start:end]`).
228 IndexRange {
229 /// The base variable being sliced.
230 base: VariableRef,
231 /// The start index expression.
232 start: Option<Expression>,
233 /// The end index expression.
234 end: Option<Expression>,
235 },
236}
237
238impl Variable {
239 /// Returns the unique identifier of this variable.
240 pub fn id(&self) -> UVID {
241 match self {
242 Self::Plain { uvid, .. } => *uvid,
243 Self::Member { base, .. } => base.read().id(),
244 Self::Index { base, .. } => base.read().id(),
245 Self::IndexRange { base, .. } => base.read().id(),
246 }
247 }
248
249 /// Returns the declaration of this variable.
250 pub fn declaration(&self) -> VariableDeclaration {
251 match self {
252 Self::Plain { declaration, .. } => declaration.clone(),
253 Self::Member { base, .. } => base.read().declaration(),
254 Self::Index { base, .. } => base.read().declaration(),
255 Self::IndexRange { base, .. } => base.read().declaration(),
256 }
257 }
258
259 /// Returns the function of this variable.
260 pub fn function(&self) -> Option<FunctionRef> {
261 match self {
262 Self::Plain { function, .. } => function.clone(),
263 Self::Member { base, .. } => base.read().function(),
264 Self::Index { base, .. } => base.read().function(),
265 Self::IndexRange { base, .. } => base.read().function(),
266 }
267 }
268
269 /// Returns the contract of this variable.
270 pub fn contract(&self) -> Option<ContractRef> {
271 match self {
272 Self::Plain { contract, .. } => contract.clone(),
273 Self::Member { base, .. } => base.read().contract(),
274 Self::Index { base, .. } => base.read().contract(),
275 Self::IndexRange { base, .. } => base.read().contract(),
276 }
277 }
278
279 /// Returns the base variable of this variable.
280 pub fn base(&self) -> Option<VariableRef> {
281 match self {
282 Self::Plain { .. } => None,
283 Self::Member { base, .. }
284 | Self::Index { base, .. }
285 | Self::IndexRange { base, .. } => {
286 if let Some(base) = base.read().base() {
287 Some(base)
288 } else {
289 Some(base.clone())
290 }
291 }
292 }
293 }
294
295 /// Returns a human-readable string representation of the variable.
296 ///
297 /// This method provides a concise display format for variables:
298 /// - Plain variables show their declaration name
299 /// - Member access shows `base.member`
300 /// - Index access shows `base[.]`
301 /// - Index range shows `base[..]`
302 pub fn pretty_display(&self) -> String {
303 match self {
304 Self::Plain { declaration, .. } => declaration.name.clone(),
305 Self::Member { base, member } => format!("{}.{}", base.read().pretty_display(), member),
306 Self::Index { base, .. } => format!("{}[.]", base.read().pretty_display()),
307 Self::IndexRange { base, .. } => {
308 format!("{}[..]", base.read().pretty_display())
309 }
310 }
311 }
312}
313
314/// A reference-counted pointer to a VariableScope.
315#[derive(Clone, derive_more::Debug)]
316pub struct VariableScopeRef {
317 inner: Arc<RwLock<VariableScope>>,
318
319 #[debug(ignore)]
320 children: OnceCell<Vec<VariableScopeRef>>,
321 #[debug(ignore)]
322 variables: OnceCell<Vec<VariableRef>>,
323 #[debug(ignore)]
324 variables_recursive: OnceCell<Vec<VariableRef>>,
325}
326
327impl From<VariableScope> for VariableScopeRef {
328 fn from(scope: VariableScope) -> Self {
329 Self::new(scope)
330 }
331}
332
333impl VariableScopeRef {
334 /// Creates a new VariableScopeRef from a VariableScope.
335 pub fn new(inner: VariableScope) -> Self {
336 Self {
337 inner: Arc::new(RwLock::new(inner)),
338 variables_recursive: OnceCell::new(),
339 variables: OnceCell::new(),
340 children: OnceCell::new(),
341 }
342 }
343
344 pub(crate) fn read(&self) -> RwLockReadGuard<'_, VariableScope> {
345 self.inner.read()
346 }
347
348 pub(crate) fn write(&self) -> RwLockWriteGuard<'_, VariableScope> {
349 self.inner.write()
350 }
351}
352
353/* Direct read methods */
354impl VariableScopeRef {
355 delegate! {
356 to self.inner.read() {
357 /// Returns the node ID of the AST node that corresponds to this scope.
358 pub fn ast_id(&self) -> usize;
359 /// Returns the source location of this scope's AST node.
360 pub fn src(&self) -> SourceLocation;
361 /// Returns a human-readable string representation of the scope hierarchy.
362 pub fn pretty_display(&self) -> String;
363 }
364 }
365}
366
367/* Cached read methods */
368impl VariableScopeRef {
369 /// Clears the cached variables and children.
370 pub fn clear_cache(&mut self) {
371 self.variables_recursive.take();
372 self.variables.take();
373 self.children.take();
374 }
375
376 /// Returns the children of this scope.
377 pub fn children(&self) -> &Vec<Self> {
378 self.children.get_or_init(|| self.inner.read().children.clone())
379 }
380
381 /// Returns the variables of this scope.
382 pub fn variables(&self) -> &Vec<VariableRef> {
383 self.variables.get_or_init(|| self.inner.read().variables.clone())
384 }
385
386 /// Returns all variables in this scope and its parent scopes recursively. The variables are cached.
387 pub fn variables_recursive(&self) -> &Vec<VariableRef> {
388 self.variables_recursive.get_or_init(|| {
389 let mut variables = self.variables().clone();
390 variables.extend(
391 self.inner
392 .read()
393 .parent
394 .as_ref()
395 .map_or(vec![], |parent| parent.variables_recursive().clone()),
396 );
397 variables
398 })
399 }
400}
401
402impl Serialize for VariableScopeRef {
403 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
404 where
405 S: serde::Serializer,
406 {
407 // Serialize the inner VariableScope directly
408 self.inner.read().serialize(serializer)
409 }
410}
411
412impl<'de> Deserialize<'de> for VariableScopeRef {
413 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
414 where
415 D: serde::Deserializer<'de>,
416 {
417 // Deserialize as VariableScope and wrap it in VariableScopeRef
418 let scope = VariableScope::deserialize(deserializer)?;
419 Ok(Self::new(scope))
420 }
421}
422/// Represents the scope and visibility information for a variable.
423///
424/// This structure contains information about where a variable is defined
425/// and how it can be accessed. Currently, this is a placeholder structure
426/// that can be extended with additional scope-related information as needed.
427///
428/// # Future Extensions
429///
430/// This structure may be extended to include:
431/// - Function scope information
432/// - Contract scope information
433/// - Visibility modifiers (public, private, internal, external)
434/// - Storage location (storage, memory, calldata)
435#[derive(Clone, Serialize, Deserialize, derive_more::Debug)]
436#[non_exhaustive]
437pub struct VariableScope {
438 /// The AST node that defines this scope
439 pub node: ScopeNode,
440 /// Variables declared in this scope, mapped by their UVID
441 pub variables: Vec<VariableRef>,
442 /// Parent scope
443 pub parent: Option<VariableScopeRef>,
444 /// Child scopes contained within this scope
445 pub children: Vec<VariableScopeRef>,
446}
447
448impl VariableScope {
449 /// Returns the unique identifier of this scope, i.e., the node ID of the AST node that corresponds to this scope.
450 pub fn ast_id(&self) -> usize {
451 self.node.ast_id()
452 }
453
454 /// Returns the source location of this scope's AST node.
455 pub fn src(&self) -> SourceLocation {
456 self.node.src()
457 }
458
459 /// Returns all variables in this scope and its parent scopes recursively. The variables are not cached.
460 pub fn variables_recursive(&self) -> Vec<VariableRef> {
461 let mut variables = self.variables.clone();
462 variables.extend(
463 self.parent.clone().map_or(vec![], |parent| parent.read().variables_recursive()),
464 );
465 variables
466 }
467
468 /// Returns a human-readable string representation of the scope hierarchy.
469 ///
470 /// This method displays the scope and all its child scopes in a tree-like format,
471 /// showing the variables contained in each scope.
472 pub fn pretty_display(&self) -> String {
473 self.pretty_display_with_indent(0)
474 }
475
476 fn pretty_display_with_indent(&self, indent_level: usize) -> String {
477 let mut result = String::new();
478 let indent = " ".repeat(indent_level);
479
480 // Print current scope's variables
481 if self.variables.is_empty() {
482 result.push_str(&format!("{}Scope({}): {{}}", indent, self.node.variant_name()));
483 } else {
484 let mut variable_names: Vec<String> =
485 self.variables.iter().map(|var| var.read().pretty_display()).collect();
486 variable_names.sort(); // Sort for consistent output
487 result.push_str(&format!(
488 "{}Scope({}): {{{}}}",
489 indent,
490 self.node.variant_name(),
491 variable_names.join(", ")
492 ));
493 }
494
495 // Print children scopes recursively with increased indentation
496 for child in &self.children {
497 result.push('\n');
498 result.push_str(&child.read().pretty_display_with_indent(indent_level + 1));
499 }
500
501 result
502 }
503}
504
505/// Represents the type of a smart contract variable.
506///
507/// This enum covers the basic Solidity types that are commonly used in
508/// smart contract analysis. The types are designed to be extensible for
509/// future additions.
510///
511/// # Examples
512///
513/// ```rust
514/// use edb::analysis::variable::VariableType;
515///
516/// let uint_type = VariableType::Uint(256);
517/// let address_type = VariableType::Address;
518/// let bool_type = VariableType::Bool;
519/// ```
520#[derive(Debug, Clone)]
521#[non_exhaustive]
522pub enum VariableType {
523 /// A `uint` type variable. The number of bits is specified by the parameter.
524 ///
525 /// For instance, `Uint(8)` denotes a `uint8` Solidity type, while `Uint(256)`
526 /// represents a `uint256` (the default uint type in Solidity).
527 Uint(u8),
528 /// An `address` type variable representing an Ethereum address.
529 ///
530 /// This type is used for variables that store 20-byte Ethereum addresses.
531 Address,
532 /// A `bool` type variable representing a boolean value.
533 ///
534 /// This type is used for variables that can be either `true` or `false`.
535 Bool,
536}
537
538/// Represents different types of AST nodes that can define variable scopes.
539///
540/// This enum wraps various Solidity AST node types that create new variable scopes,
541/// allowing the variable analyzer to track scope boundaries and variable visibility.
542#[derive(Debug, Clone, From, Serialize, Deserialize)]
543#[allow(clippy::large_enum_variant)]
544pub enum ScopeNode {
545 /// A source unit scope (file-level).
546 SourceUnit(#[from] SourceUnit),
547 /// A block statement scope.
548 Block(#[from] Block),
549 /// An unchecked block scope.
550 UncheckedBlock(#[from] UncheckedBlock),
551 /// A for loop scope.
552 ForStatement(#[from] ForStatement),
553 /// A contract definition scope.
554 ContractDefinition(#[from] ContractDefinition),
555 /// A function definition scope.
556 FunctionDefinition(#[from] FunctionDefinition),
557 /// A modifier definition scope.
558 ModifierDefinition(#[from] ModifierDefinition),
559}
560
561impl ScopeNode {
562 /// Returns the node ID of the AST node.
563 pub fn ast_id(&self) -> usize {
564 match self {
565 Self::SourceUnit(source_unit) => source_unit.id,
566 Self::Block(block) => block.id,
567 Self::UncheckedBlock(unchecked_block) => unchecked_block.id,
568 Self::ForStatement(for_statement) => for_statement.id,
569 Self::ContractDefinition(contract_definition) => contract_definition.id,
570 Self::FunctionDefinition(function_definition) => function_definition.id,
571 Self::ModifierDefinition(modifier_definition) => modifier_definition.id,
572 }
573 }
574
575 /// Returns the source location of the wrapped AST node.
576 pub fn src(&self) -> SourceLocation {
577 match self {
578 Self::SourceUnit(source_unit) => source_unit.src,
579 Self::Block(block) => block.src,
580 Self::UncheckedBlock(unchecked_block) => unchecked_block.src,
581 Self::ForStatement(for_statement) => for_statement.src,
582 Self::ContractDefinition(contract_definition) => contract_definition.src,
583 Self::FunctionDefinition(function_definition) => function_definition.src,
584 Self::ModifierDefinition(modifier_definition) => modifier_definition.src,
585 }
586 }
587
588 /// Returns a string representation of the scope node variant name.
589 pub fn variant_name(&self) -> &'static str {
590 match self {
591 Self::SourceUnit(_) => "SourceUnit",
592 Self::Block(_) => "Block",
593 Self::UncheckedBlock(_) => "UncheckedBlock",
594 Self::ForStatement(_) => "ForStatement",
595 Self::ContractDefinition(_) => "ContractDefinition",
596 Self::FunctionDefinition(_) => "FunctionDefinition",
597 Self::ModifierDefinition(_) => "ModifierDefinition",
598 }
599 }
600}