Skip to main content

soroban_decompiler/
lib.rs

1//! Soroban WASM smart contract decompiler.
2//!
3//! This crate reconstructs idiomatic Rust source code from compiled Soroban
4//! WASM binaries. It combines the contract specification metadata embedded in
5//! the WASM custom sections with bytecode-level stack simulation to produce
6//! output that closely resembles the original contract source, including type
7//! definitions, function signatures, storage operations, authentication calls,
8//! and cross-contract invocations.
9//!
10//! # Quick start
11//!
12//! The simplest way to use the crate is through the top-level [`decompile`]
13//! function, which runs the full pipeline and returns formatted Rust source:
14//!
15//! ```no_run
16//! use soroban_decompiler::{decompile, DecompileOptions};
17//!
18//! let wasm = std::fs::read("contract.wasm").unwrap();
19//! let options = DecompileOptions { signatures_only: false };
20//! let source = decompile(&wasm, &options).unwrap();
21//! println!("{source}");
22//! ```
23//!
24//! # Architecture
25//!
26//! The decompilation pipeline runs in four stages. Each stage is also
27//! available as a standalone entry point for tools that need intermediate
28//! results.
29//!
30//! 1. **Spec extraction** ([`spec_extract`]) -- reads `contractspecv0` custom
31//!    sections to recover struct definitions, enum variants, error codes,
32//!    event schemas, and function signatures with fully typed and named
33//!    parameters. Entry point: [`extract_spec`].
34//!
35//! 2. **WASM analysis** ([`wasm_analysis`]) -- parses the binary with
36//!    `walrus`, traces through Soroban dispatcher chains, and simulates the
37//!    stack for each implementation function. The simulator tracks values
38//!    through locals, memory stores, function calls, and control flow
39//!    branches, resolving host function call arguments back to their origins
40//!    (parameters, constants, or earlier call results). Callee memory writes
41//!    are propagated to the caller so that helper functions that store through
42//!    pointer parameters have their results visible in the calling function.
43//!    Entry point: [`analyze`].
44//!
45//! 3. **Pattern recognition** ([`pattern_recognizer`]) -- maps host call
46//!    sequences to high-level Soroban SDK operations. For example, a
47//!    `symbol_new_from_linear_memory` followed by `get_contract_data` becomes
48//!    `env.storage().instance().get(symbol_short!("KEY"))`. This stage also
49//!    resolves struct field accesses through map unpack operations, detects
50//!    i128 round-trips, strips Soroban Val encoding boilerplate, and runs
51//!    dead variable elimination and common subexpression elimination. The
52//!    output is a typed intermediate representation defined in [`ir`].
53//!
54//! 4. **Code generation** ([`codegen`]) -- walks the IR and emits Rust token
55//!    streams using `syn` and `quote`, then formats the result with
56//!    `prettyplease`. Reconstructs `#[contracttype]` definitions,
57//!    `#[contracterror]` error enums, `#[contractimpl]` function bodies, and
58//!    the top-level `#[contract]` struct with appropriate `use` imports.
59
60pub mod codegen;
61pub mod host_functions;
62pub mod ir;
63pub mod pattern_recognizer;
64pub mod spec_extract;
65pub mod wasm_analysis;
66pub mod wasm_imports;
67
68use anyhow::Result;
69use stellar_xdr::curr::ScSpecEntry;
70
71/// Options controlling the decompilation process.
72///
73/// Pass this to [`decompile`] to configure which parts of the pipeline run.
74pub struct DecompileOptions {
75    /// When `true`, skip bytecode analysis entirely and only emit type
76    /// definitions and function signatures recovered from the contract spec.
77    ///
78    /// This is significantly faster and useful when only the contract
79    /// interface is needed (for bindings generation, documentation, or
80    /// ABI inspection).
81    pub signatures_only: bool,
82}
83
84/// Extract contract spec entries from a compiled Soroban WASM binary.
85///
86/// Reads the `contractspecv0` custom section and deserializes it into a list
87/// of [`ScSpecEntry`] values covering struct definitions, enum variants,
88/// error codes, event schemas, and function signatures.
89///
90/// This is the first stage of the decompilation pipeline and can be called
91/// independently when only the contract metadata is needed.
92///
93/// # Errors
94///
95/// Returns an error if the WASM binary does not contain a valid
96/// `contractspecv0` section or if the XDR deserialization fails.
97pub fn extract_spec(wasm: &[u8]) -> Result<Vec<ScSpecEntry>> {
98    spec_extract::extract_spec(wasm)
99}
100
101/// Resolve all host function imports in a WASM binary.
102///
103/// Iterates the WASM import table and matches each function import against
104/// the bundled Soroban host function database ([`host_functions`]). Returns
105/// one [`wasm_imports::ResolvedImport`] per import, with semantic names
106/// filled in for recognized host functions and `None` for unrecognized ones.
107///
108/// # Errors
109///
110/// Returns an error if the WASM binary cannot be parsed.
111pub fn resolve_imports(
112    wasm: &[u8],
113) -> Result<Vec<wasm_imports::ResolvedImport>> {
114    wasm_imports::resolve_imports(wasm)
115}
116
117/// Analyze WASM function bodies, resolving exports and host calls.
118///
119/// Parses the WASM binary, builds the host function import mapping, and
120/// returns an [`wasm_analysis::AnalyzedModule`] that provides per-function
121/// analysis including dispatcher tracing and stack simulation.
122///
123/// # Errors
124///
125/// Returns an error if `walrus` cannot parse the WASM binary.
126pub fn analyze(
127    wasm: &[u8],
128) -> Result<wasm_analysis::AnalyzedModule> {
129    wasm_analysis::AnalyzedModule::from_wasm(wasm)
130}
131
132/// Decompile a Soroban WASM binary into formatted Rust source code.
133///
134/// Runs the full four-stage pipeline (spec extraction, WASM analysis,
135/// pattern recognition, code generation) and returns the result as a
136/// `prettyplease`-formatted Rust source string.
137///
138/// When [`DecompileOptions::signatures_only`] is `true`, skips the analysis
139/// and pattern recognition stages entirely, producing only type definitions
140/// and function stubs with `todo!()` bodies.
141///
142/// # Errors
143///
144/// Returns an error if spec extraction fails, the WASM binary cannot be
145/// parsed, or the generated token stream is not valid Rust syntax.
146pub fn decompile(
147    wasm: &[u8],
148    options: &DecompileOptions,
149) -> Result<String> {
150    let entries = spec_extract::extract_spec(wasm)?;
151    if options.signatures_only {
152        codegen::generate_rust(&entries, None)
153    } else {
154        let analysis = wasm_analysis::AnalyzedModule::from_wasm(wasm)?;
155        codegen::generate_rust(&entries, Some(&analysis))
156    }
157}