Skip to main content

js_deobfuscator/transform/
proxy.rs

1//! Proxy function inlining.
2//!
3//! `function c(r, v) { return Hs(v - -966, r); }`
4//! `c(-679, -602)` → `Hs(-602 - -966, -679)`
5//!
6//! Two-pass: collect single-return-call functions, inline call sites.
7//! Uses proper AST-based parameter substitution to avoid corrupting strings.
8
9use rustc_hash::FxHashMap;
10
11use oxc::allocator::{Allocator, CloneIn};
12use oxc::ast::ast::*;
13use oxc::ast::AstBuilder;
14use oxc::ast_visit::VisitMut;
15use oxc::semantic::{Scoping, SymbolId};
16use oxc::span::SPAN;
17
18use oxc_traverse::{Traverse, TraverseCtx, traverse_mut};
19
20use crate::ast::codegen;
21use crate::engine::error::Result;
22use crate::engine::module::{Module, TransformResult};
23use crate::scope::{query, resolve};
24
25/// Proxy function inliner module.
26pub struct ProxyInliner;
27
28impl Module for ProxyInliner {
29    fn name(&self) -> &'static str { "ProxyInliner" }
30
31    fn changes_symbols(&self) -> bool {
32        // Creates new identifier references
33        true
34    }
35
36    fn transform<'a>(
37        &mut self,
38        allocator: &'a Allocator,
39        program: &mut Program<'a>,
40        scoping: Scoping,
41    ) -> Result<TransformResult> {
42        let mut collector = Collector::default();
43        let scoping = traverse_mut(&mut collector, allocator, program, scoping, ());
44        if collector.proxies.is_empty() {
45            return Ok(TransformResult { modifications: 0, scoping });
46        }
47        let mut inliner = Inliner { proxies: collector.proxies, modifications: 0 };
48        let scoping = traverse_mut(&mut inliner, allocator, program, scoping, ());
49        Ok(TransformResult { modifications: inliner.modifications, scoping })
50    }
51}
52
53/// Information about a proxy function (stored as source code to avoid lifetime issues).
54struct ProxyInfo {
55    /// Parameter names in order
56    params: Vec<String>,
57    /// The return expression as source code
58    return_source: String,
59}
60
61#[derive(Default)]
62struct Collector {
63    proxies: FxHashMap<SymbolId, ProxyInfo>,
64}
65
66impl<'a> Traverse<'a, ()> for Collector {
67    fn enter_statement(&mut self, stmt: &mut Statement<'a>, ctx: &mut TraverseCtx<'a, ()>) {
68        let Statement::FunctionDeclaration(func) = stmt else { return; };
69        let Some(id) = &func.id else { return; };
70        let Some(sym) = id.symbol_id.get() else { return; };
71        if query::has_writes(ctx.scoping(), sym) { return; }
72
73        // Must have exactly one statement: return <expression>
74        let Some(body) = &func.body else { return; };
75        if body.statements.len() != 1 { return; }
76        let Statement::ReturnStatement(ret) = &body.statements[0] else { return; };
77        let Some(ret_expr) = &ret.argument else { return; };
78
79        // Return value must be a simple expression (call, binary, etc.)
80        if !is_simple_expression(ret_expr) { return; }
81
82        // All params must be simple identifiers
83        let params: Vec<String> = func.params.items.iter()
84            .filter_map(|p| p.pattern.get_binding_identifier())
85            .map(|b| b.name.to_string())
86            .collect();
87        if params.len() != func.params.items.len() { return; }
88
89        // Store source code to avoid lifetime issues
90        let return_source = codegen::expr_to_code(ret_expr);
91
92        self.proxies.insert(sym, ProxyInfo {
93            params,
94            return_source,
95        });
96    }
97}
98
99/// Maximum AST depth allowed for an expression to qualify as a proxy template.
100///
101/// Caps the recursion budget for both the collector (`is_simple_expression`)
102/// and the inliner's `ParamSubstitutor`. Real proxy functions are shallow
103/// (`return f(b - a, r)` is depth ~3); legitimate inlining never needs more.
104/// Pathological obfuscator chains (`a + b + c + ... + z` with thousands of
105/// terms) used to overflow Rust's default 8 MB main stack via the recursive
106/// walker — capping depth here makes the pass safe by construction.
107const MAX_TEMPLATE_DEPTH: usize = 32;
108
109/// Check if an expression is simple enough to inline safely.
110///
111/// Bounded by [`MAX_TEMPLATE_DEPTH`] — deeper expressions are conservatively
112/// rejected so the inliner's substitutor never walks an unbounded tree.
113fn is_simple_expression(expr: &Expression) -> bool {
114    is_simple_expression_inner(expr, 0)
115}
116
117fn is_simple_expression_inner(expr: &Expression, depth: usize) -> bool {
118    if depth >= MAX_TEMPLATE_DEPTH {
119        return false;
120    }
121    let next = depth + 1;
122    match expr {
123        Expression::CallExpression(c) => {
124            is_simple_expression_inner(&c.callee, next) &&
125            c.arguments.iter().all(|a| {
126                a.as_expression().is_some_and(|e| is_simple_expression_inner(e, next))
127            })
128        }
129        Expression::BinaryExpression(b) => {
130            is_simple_expression_inner(&b.left, next)
131                && is_simple_expression_inner(&b.right, next)
132        }
133        Expression::UnaryExpression(u) => is_simple_expression_inner(&u.argument, next),
134        Expression::LogicalExpression(l) => {
135            is_simple_expression_inner(&l.left, next)
136                && is_simple_expression_inner(&l.right, next)
137        }
138        Expression::ConditionalExpression(c) => {
139            is_simple_expression_inner(&c.test, next)
140                && is_simple_expression_inner(&c.consequent, next)
141                && is_simple_expression_inner(&c.alternate, next)
142        }
143        Expression::Identifier(_) |
144        Expression::NumericLiteral(_) |
145        Expression::StringLiteral(_) |
146        Expression::BooleanLiteral(_) |
147        Expression::NullLiteral(_) => true,
148        Expression::StaticMemberExpression(m) => is_simple_expression_inner(&m.object, next),
149        Expression::ComputedMemberExpression(m) => {
150            is_simple_expression_inner(&m.object, next)
151                && is_simple_expression_inner(&m.expression, next)
152        }
153        Expression::ParenthesizedExpression(p) => {
154            is_simple_expression_inner(&p.expression, next)
155        }
156        _ => false,
157    }
158}
159
160struct Inliner {
161    proxies: FxHashMap<SymbolId, ProxyInfo>,
162    modifications: usize,
163}
164
165impl<'a> Traverse<'a, ()> for Inliner {
166    fn exit_expression(&mut self, expr: &mut Expression<'a>, ctx: &mut TraverseCtx<'a, ()>) {
167        // Match: proxy_func(args...)
168        let sym = {
169            let Expression::CallExpression(call) = &*expr else { return; };
170            let Expression::Identifier(id) = &call.callee else { return; };
171            resolve::get_reference_symbol(ctx.scoping(), id)
172        };
173        let Some(sym) = sym else { return; };
174        let Some(proxy) = self.proxies.get(&sym) else { return; };
175
176        // Must have exact argument count
177        let Expression::CallExpression(call) = &*expr else { return; };
178        if call.arguments.len() != proxy.params.len() { return; }
179
180        // Collect argument source codes
181        let arg_sources: Vec<String> = call.arguments.iter()
182            .filter_map(|a| a.as_expression())
183            .map(codegen::expr_to_code)
184            .collect();
185        if arg_sources.len() != proxy.params.len() { return; }
186
187        // Parse the return expression template
188        let allocator = ctx.ast.allocator;
189        let parsed = oxc::parser::Parser::new(
190            allocator, &proxy.return_source, oxc::span::SourceType::mjs(),
191        ).parse();
192
193        if !parsed.errors.is_empty() || parsed.program.body.is_empty() { return; }
194        let Statement::ExpressionStatement(es) = &parsed.program.body[0] else { return; };
195
196        // Clone into our arena
197        let mut cloned = es.expression.clone_in(allocator);
198
199        // Build parameter → argument source mapping
200        let mut substitutions: FxHashMap<&str, &str> = FxHashMap::default();
201        for (i, param) in proxy.params.iter().enumerate() {
202            substitutions.insert(param.as_str(), arg_sources[i].as_str());
203        }
204
205        // Substitute all parameter references with argument expressions
206        let mut substitutor = ParamSubstitutor {
207            substitutions: &substitutions,
208            allocator,
209            ast: &ctx.ast,
210        };
211        substitutor.visit_expression(&mut cloned);
212
213        *expr = cloned;
214        self.modifications += 1;
215    }
216}
217
218/// AST visitor that substitutes parameter identifiers with argument expressions.
219struct ParamSubstitutor<'a, 's> {
220    substitutions: &'s FxHashMap<&'s str, &'s str>,
221    allocator: &'a Allocator,
222    ast: &'s AstBuilder<'a>,
223}
224
225impl<'a, 's> VisitMut<'a> for ParamSubstitutor<'a, 's> {
226    fn visit_expression(&mut self, expr: &mut Expression<'a>) {
227        // Check if this is an identifier that should be substituted
228        if let Expression::Identifier(ident) = expr {
229            let name = ident.name.as_str();
230            if let Some(&replacement_src) = self.substitutions.get(name) {
231                // Parse the replacement expression
232                let parsed = oxc::parser::Parser::new(
233                    self.allocator, replacement_src, oxc::span::SourceType::mjs(),
234                ).parse();
235
236                if !parsed.errors.is_empty() || parsed.program.body.is_empty() {
237                    return;
238                }
239                let Statement::ExpressionStatement(es) = &parsed.program.body[0] else {
240                    return;
241                };
242
243                // Clone and wrap in parens if needed
244                let mut cloned = es.expression.clone_in(self.allocator);
245
246                // Wrap complex expressions in parentheses for safety
247                if needs_parens(&cloned) {
248                    cloned = self.ast.expression_parenthesized(SPAN, cloned);
249                }
250
251                *expr = cloned;
252                return; // Don't recurse into the replacement
253            }
254        }
255
256        // Recurse into children
257        match expr {
258            Expression::BinaryExpression(b) => {
259                self.visit_expression(&mut b.left);
260                self.visit_expression(&mut b.right);
261            }
262            Expression::UnaryExpression(u) => {
263                self.visit_expression(&mut u.argument);
264            }
265            Expression::LogicalExpression(l) => {
266                self.visit_expression(&mut l.left);
267                self.visit_expression(&mut l.right);
268            }
269            Expression::ConditionalExpression(c) => {
270                self.visit_expression(&mut c.test);
271                self.visit_expression(&mut c.consequent);
272                self.visit_expression(&mut c.alternate);
273            }
274            Expression::CallExpression(c) => {
275                self.visit_expression(&mut c.callee);
276                for arg in &mut c.arguments {
277                    if let Some(e) = arg.as_expression_mut() {
278                        self.visit_expression(e);
279                    }
280                }
281            }
282            Expression::StaticMemberExpression(m) => {
283                self.visit_expression(&mut m.object);
284            }
285            Expression::ComputedMemberExpression(m) => {
286                self.visit_expression(&mut m.object);
287                self.visit_expression(&mut m.expression);
288            }
289            Expression::ParenthesizedExpression(p) => {
290                self.visit_expression(&mut p.expression);
291            }
292            Expression::SequenceExpression(s) => {
293                for e in &mut s.expressions {
294                    self.visit_expression(e);
295                }
296            }
297            _ => {}
298        }
299    }
300}
301
302/// Check if an expression needs parentheses when used as a replacement.
303fn needs_parens(e: &Expression) -> bool {
304    matches!(e,
305        Expression::BinaryExpression(_)
306        | Expression::ConditionalExpression(_)
307        | Expression::AssignmentExpression(_)
308        | Expression::SequenceExpression(_)
309        | Expression::LogicalExpression(_)
310    )
311}
312
313#[cfg(test)]
314mod tests {
315    use super::*;
316    use oxc::codegen::Codegen;
317    use oxc::parser::Parser;
318    use oxc::semantic::SemanticBuilder;
319    use oxc::span::SourceType;
320
321    fn deob(source: &str) -> (String, usize) {
322        let alloc = Allocator::default();
323        let mut program = Parser::new(&alloc, source, SourceType::mjs()).parse().program;
324        let scoping = SemanticBuilder::new().build(&program).semantic.into_scoping();
325        let mut module = ProxyInliner;
326        let result = module.transform(&alloc, &mut program, scoping).unwrap();
327        (Codegen::new().build(&program).code, result.modifications)
328    }
329
330    #[test]
331    fn test_simple_proxy() {
332        let (code, mods) = deob("function f(a, b) { return g(b, a); } f(1, 2);");
333        assert!(mods > 0);
334        assert!(code.contains("g(2, 1)"), "got: {code}");
335    }
336
337    #[test]
338    fn test_arithmetic_proxy() {
339        let (code, mods) = deob("function c(r, v) { return Hs(v - -966, r); } c(-679, -602);");
340        assert!(mods > 0);
341        assert!(code.contains("Hs("), "should contain inlined call: {code}");
342    }
343
344    #[test]
345    fn test_no_inline_multi_statement() {
346        let (_, mods) = deob("function f(a) { var x = 1; return g(a); } f(1);");
347        assert_eq!(mods, 0, "multi-statement body should not be inlined");
348    }
349
350    #[test]
351    fn test_binary_expression() {
352        let (code, mods) = deob("function f(a) { return a + 1; } f(5);");
353        assert!(mods > 0, "should inline simple binary expression");
354        assert!(code.contains("5") && code.contains("+ 1"), "got: {code}");
355    }
356
357    #[test]
358    fn test_wrong_arg_count() {
359        let (_, mods) = deob("function f(a, b) { return g(a, b); } f(1);");
360        assert_eq!(mods, 0, "wrong arg count should not inline");
361    }
362
363    #[test]
364    fn test_string_literal_not_corrupted() {
365        // This was the critical bug - make sure strings aren't modified
366        let (code, mods) = deob("function f(a) { return g(\"hello a world\"); } f(1);");
367        assert!(mods > 0);
368        assert!(code.contains("\"hello a world\""), "string should not be corrupted: {code}");
369    }
370
371    #[test]
372    fn test_nested_calls() {
373        let (code, mods) = deob("function f(a, b) { return outer(inner(a), b); } f(1, 2);");
374        assert!(mods > 0);
375        assert!(code.contains("outer(inner(1), 2)") || code.contains("outer(inner((1)), (2))"), "got: {code}");
376    }
377}