Skip to main content

js_deobfuscator/targets/obfuscator_io/
string_array.rs

1//! String array decoder (obfuscator.io pattern).
2//!
3//! Locked module: runs once, decodes all strings, then convergence loop handles the rest.
4//!
5//! ## Pattern
6//!
7//! ```javascript
8//! function _0xarr() { var a = ["encoded1", ...]; _0xarr = function(){return a;}; return _0xarr(); }
9//! function _0xget(i) { return _0xarr()[i - 0x1a3]; }
10//! (function(arr, check) { /* push/shift rotation */ })(_0xarr, 0x44598);
11//! console.log(_0xget(0x1a5)); // → decoded string
12//! ```
13//!
14//! ## Pipeline
15//!
16//! 1. Detect shuffler IIFE (push/shift + parseInt pattern)
17//! 2. Collect component source (array function + accessor function)
18//! 3. Collect all accessor calls with literal args
19//! 4. Execute in Node.js: setup array + accessor + shuffler, batch eval all calls
20//! 5. Inline decoded strings at call sites
21
22use rustc_hash::{FxHashMap, FxHashSet};
23
24use oxc::allocator::Allocator;
25use oxc::ast::ast::*;
26use oxc::ast_visit::{Visit, walk};
27use oxc::semantic::{Scoping, SymbolId};
28use oxc::span::SPAN;
29
30use oxc_traverse::{Traverse, TraverseCtx, traverse_mut};
31
32use crate::ast::{codegen, create, query};
33use crate::engine::error::Result;
34use crate::engine::module::{Module, TransformResult};
35use crate::eval::node::NodeProcess;
36use crate::scope::resolve;
37
38/// String array decoder module. Intended as a **locked** module.
39#[derive(Default)]
40pub struct StringArrayDecoder {
41    node: Option<NodeProcess>,
42}
43
44impl Module for StringArrayDecoder {
45    fn name(&self) -> &'static str { "StringArrayDecoder" }
46    fn changes_symbols(&self) -> bool { true }
47
48    fn transform<'a>(
49        &mut self,
50        allocator: &'a Allocator,
51        program: &mut Program<'a>,
52        scoping: Scoping,
53    ) -> Result<TransformResult> {
54        // Phase 1: Detect shuffler IIFEs
55        let mut detector = ShufflerDetector::default();
56        let scoping = traverse_mut(&mut detector, allocator, program, scoping, ());
57
58        if detector.systems.is_empty() {
59            return Ok(TransformResult { modifications: 0, scoping });
60        }
61
62        let mut total_mods = 0;
63        let mut scoping = scoping;
64
65        // Phase 2: Collect component source code
66        let mut component_symbols: Vec<SymbolId> = Vec::new();
67        for sys in &detector.systems {
68            component_symbols.push(sys.array_symbol_id);
69            component_symbols.push(sys.accessor_symbol_id);
70        }
71        let mut component_collector = ComponentCollector::new(component_symbols);
72        scoping = traverse_mut(&mut component_collector, allocator, program, scoping, ());
73
74        for sys in &detector.systems {
75            // Phase 3: Collect all accessor calls with literal args
76            let mut call_collector = CallCollector::new(sys.accessor_symbol_id);
77            scoping = traverse_mut(&mut call_collector, allocator, program, scoping, ());
78
79            let calls: Vec<String> = call_collector.calls.into_iter().collect();
80            if calls.is_empty() { continue; }
81
82            // Phase 4: Execute in Node.js
83            let array_code = component_collector.code_map.get(&sys.array_symbol_id)
84                .map(|s| s.as_str()).unwrap_or("");
85            let accessor_code = component_collector.code_map.get(&sys.accessor_symbol_id)
86                .map(|s| s.as_str()).unwrap_or("");
87
88            let decoded = match self.execute(array_code, accessor_code, &sys.shuffler_code, &calls) {
89                Some(d) => d,
90                None => {
91                    tracing::warn!("string array decode failed for system");
92                    continue;
93                }
94            };
95
96            // Phase 5: Inline decoded strings
97            let mut inliner = StringInliner::new(sys.accessor_symbol_id, decoded);
98            scoping = traverse_mut(&mut inliner, allocator, program, scoping, ());
99            total_mods += inliner.modifications;
100        }
101
102        Ok(TransformResult { modifications: total_mods, scoping })
103    }
104}
105
106impl StringArrayDecoder {
107    fn ensure_node(&mut self) -> Option<&mut NodeProcess> {
108        if self.node.is_none() {
109            self.node = NodeProcess::spawn().ok();
110        }
111        self.node.as_mut()
112    }
113
114    fn execute(
115        &mut self,
116        array_code: &str,
117        accessor_code: &str,
118        shuffler_code: &str,
119        calls: &[String],
120    ) -> Option<FxHashMap<String, String>> {
121        if calls.is_empty() { return Some(FxHashMap::default()); }
122
123        let node = self.ensure_node()?;
124
125        // Setup: define array + accessor + run shuffler
126        let setup = format!("{array_code}\n{accessor_code}\n{shuffler_code}");
127        node.eval(&setup)?;
128
129        // Batch eval all calls
130        let mut script = String::from("(function() { var r = {};\n");
131        for (i, call) in calls.iter().enumerate() {
132            script.push_str(&format!("try {{ r[{i}] = {call}; }} catch(e) {{ r[{i}] = null; }}\n"));
133        }
134        script.push_str("return JSON.stringify(r); })()");
135
136        let result = node.eval(&script)?;
137        let result_str = result.as_str()?;
138        let obj: serde_json::Value = serde_json::from_str(result_str).ok()?;
139
140        let mut decoded = FxHashMap::default();
141        for (i, call) in calls.iter().enumerate() {
142            if let Some(s) = obj.get(i.to_string()).and_then(|v| v.as_str()) {
143                decoded.insert(call.clone(), s.to_string());
144            }
145        }
146        Some(decoded)
147    }
148}
149
150// ============================================================================
151// Phase 1: Detect shuffler IIFEs
152// ============================================================================
153
154struct DetectedSystem {
155    array_symbol_id: SymbolId,
156    accessor_symbol_id: SymbolId,
157    shuffler_code: String,
158}
159
160#[derive(Default)]
161struct ShufflerDetector {
162    systems: Vec<DetectedSystem>,
163}
164
165impl<'a> Traverse<'a, ()> for ShufflerDetector {
166    fn exit_statement(&mut self, stmt: &mut Statement<'a>, ctx: &mut TraverseCtx<'a, ()>) {
167        let Statement::ExpressionStatement(expr_stmt) = stmt else { return; };
168
169        if let Some(sys) = detect_shuffler_in_expr(&expr_stmt.expression, ctx.scoping()) {
170            self.systems.push(sys);
171            *stmt = ctx.ast.statement_empty(SPAN);
172        }
173    }
174}
175
176fn detect_shuffler_in_expr(expr: &Expression, scoping: &Scoping) -> Option<DetectedSystem> {
177    // Match: (function(arr, check) { ... })(_0xarr, 0x44598)
178    // Or: !function(arr, check) { ... }(_0xarr, 0x44598)
179    let call = match expr {
180        Expression::CallExpression(c) if is_iife(c) => c,
181        Expression::UnaryExpression(u) => {
182            if let Expression::CallExpression(c) = &u.argument {
183                if is_iife(c) { c } else { return None; }
184            } else { return None; }
185        }
186        Expression::SequenceExpression(seq) => {
187            // Try each sub-expression
188            for sub in &seq.expressions {
189                if let Some(sys) = detect_shuffler_in_expr(sub, scoping) {
190                    return Some(sys);
191                }
192            }
193            return None;
194        }
195        Expression::ParenthesizedExpression(p) => {
196            return detect_shuffler_in_expr(&p.expression, scoping);
197        }
198        _ => return None,
199    };
200
201    // First argument must be an identifier → array function
202    let first_expr = call.arguments.first()?.as_expression()?;
203    let Expression::Identifier(array_id) = first_expr else { return None; };
204    let array_symbol_id = resolve::get_reference_symbol(scoping, array_id)?;
205
206    // Validate body: count push/shift and parseInt
207    let callee = unwrap_parens(&call.callee);
208    let Expression::FunctionExpression(func) = callee else { return None; };
209    let body = func.body.as_ref()?;
210
211    let mut validator = ShufflerValidator::new(scoping, array_symbol_id);
212    // Register local symbols
213    for param in &func.params.items {
214        if let Some(b) = param.pattern.get_binding_identifier() {
215            if let Some(sym) = b.symbol_id.get() { validator.local_symbols.insert(sym); }
216        }
217    }
218    for stmt in &body.statements {
219        if let Statement::FunctionDeclaration(f) = stmt {
220            if let Some(id) = &f.id {
221                if let Some(sym) = id.symbol_id.get() { validator.local_symbols.insert(sym); }
222            }
223        }
224        if let Statement::VariableDeclaration(vd) = stmt {
225            for decl in &vd.declarations {
226                if let Some(b) = decl.id.get_binding_identifier() {
227                    if let Some(sym) = b.symbol_id.get() { validator.local_symbols.insert(sym); }
228                }
229            }
230        }
231    }
232    validator.visit_function_body(body);
233
234    if validator.push_shift_count < 1 || validator.parse_int_count < 2 {
235        return None;
236    }
237
238    // Find accessor: external function that isn't the array function
239    let accessor_symbol_id = validator.external_calls.iter()
240        .find(|s| **s != array_symbol_id)
241        .copied()?;
242
243    // Capture shuffler source for Node.js execution
244    let shuffler_code = codegen::expr_to_code(expr);
245
246    Some(DetectedSystem { array_symbol_id, accessor_symbol_id, shuffler_code })
247}
248
249fn is_iife(call: &CallExpression) -> bool {
250    matches!(unwrap_parens(&call.callee), Expression::FunctionExpression(_))
251}
252
253fn unwrap_parens<'a>(expr: &'a Expression<'a>) -> &'a Expression<'a> {
254    match expr {
255        Expression::ParenthesizedExpression(p) => unwrap_parens(&p.expression),
256        e => e,
257    }
258}
259
260// ============================================================================
261// Shuffler validator (uses Visit, not Traverse)
262// ============================================================================
263
264struct ShufflerValidator<'s> {
265    scoping: &'s Scoping,
266    push_shift_count: usize,
267    parse_int_count: usize,
268    external_calls: FxHashSet<SymbolId>,
269    local_symbols: FxHashSet<SymbolId>,
270    array_symbol_id: SymbolId,
271}
272
273impl<'s> ShufflerValidator<'s> {
274    fn new(scoping: &'s Scoping, array_symbol_id: SymbolId) -> Self {
275        Self {
276            scoping, push_shift_count: 0, parse_int_count: 0,
277            external_calls: FxHashSet::default(),
278            local_symbols: FxHashSet::default(),
279            array_symbol_id,
280        }
281    }
282}
283
284impl<'a> Visit<'a> for ShufflerValidator<'_> {
285    fn visit_call_expression(&mut self, call: &CallExpression<'a>) {
286        // Detect push(shift()) pattern
287        let is_push = match &call.callee {
288            Expression::StaticMemberExpression(m) => m.property.name == "push",
289            _ => false,
290        };
291        if is_push {
292            if let Some(Expression::CallExpression(inner_call)) = call.arguments.first().and_then(|a| a.as_expression()) {
293                if matches!(&inner_call.callee, Expression::StaticMemberExpression(m) if m.property.name == "shift") {
294                    self.push_shift_count += 1;
295                }
296            }
297        }
298
299        // Count parseInt
300        if let Expression::Identifier(id) = &call.callee {
301            if id.name == "parseInt" { self.parse_int_count += 1; }
302        }
303
304        // Track external function calls
305        if let Expression::Identifier(id) = &call.callee {
306            if let Some(sym) = resolve::get_reference_symbol(self.scoping, id) {
307                if sym != self.array_symbol_id && !self.local_symbols.contains(&sym) {
308                    self.external_calls.insert(sym);
309                }
310            }
311        }
312
313        walk::walk_call_expression(self, call);
314    }
315}
316
317// ============================================================================
318// Phase 2: Component source collector
319// ============================================================================
320
321struct ComponentCollector {
322    targets: Vec<SymbolId>,
323    code_map: FxHashMap<SymbolId, String>,
324}
325
326impl ComponentCollector {
327    fn new(targets: Vec<SymbolId>) -> Self {
328        Self { targets, code_map: FxHashMap::default() }
329    }
330}
331
332impl<'a> Traverse<'a, ()> for ComponentCollector {
333    fn enter_statement(&mut self, stmt: &mut Statement<'a>, _ctx: &mut TraverseCtx<'a, ()>) {
334        if let Statement::FunctionDeclaration(func) = stmt {
335            if let Some(id) = &func.id {
336                if let Some(sym) = id.symbol_id.get() {
337                    if self.targets.contains(&sym) {
338                        self.code_map.insert(sym, codegen::stmt_to_code(stmt));
339                    }
340                }
341            }
342        }
343    }
344}
345
346// ============================================================================
347// Phase 3: Call collector
348// ============================================================================
349
350struct CallCollector {
351    accessor_symbol_id: SymbolId,
352    calls: FxHashSet<String>,
353}
354
355impl CallCollector {
356    fn new(accessor_symbol_id: SymbolId) -> Self {
357        Self { accessor_symbol_id, calls: FxHashSet::default() }
358    }
359}
360
361impl<'a> Traverse<'a, ()> for CallCollector {
362    fn exit_expression(&mut self, expr: &mut Expression<'a>, ctx: &mut TraverseCtx<'a, ()>) {
363        let Expression::CallExpression(call) = &*expr else { return; };
364        let Expression::Identifier(id) = &call.callee else { return; };
365        let Some(sym) = resolve::get_reference_symbol(ctx.scoping(), id) else { return; };
366        if sym != self.accessor_symbol_id { return; }
367
368        // Only collect calls with all-literal arguments
369        if !call.arguments.iter().all(|a| a.as_expression().is_some_and(query::is_literal)) {
370            return;
371        }
372        self.calls.insert(codegen::expr_to_code(expr));
373    }
374}
375
376// ============================================================================
377// Phase 5: String inliner
378// ============================================================================
379
380struct StringInliner {
381    accessor_symbol_id: SymbolId,
382    decoded: FxHashMap<String, String>,
383    modifications: usize,
384}
385
386impl StringInliner {
387    fn new(accessor_symbol_id: SymbolId, decoded: FxHashMap<String, String>) -> Self {
388        Self { accessor_symbol_id, decoded, modifications: 0 }
389    }
390}
391
392impl<'a> Traverse<'a, ()> for StringInliner {
393    fn exit_expression(&mut self, expr: &mut Expression<'a>, ctx: &mut TraverseCtx<'a, ()>) {
394        let Expression::CallExpression(call) = &*expr else { return; };
395        let Expression::Identifier(id) = &call.callee else { return; };
396        let Some(sym) = resolve::get_reference_symbol(ctx.scoping(), id) else { return; };
397        if sym != self.accessor_symbol_id { return; }
398
399        let code = codegen::expr_to_code(expr);
400        if let Some(decoded_str) = self.decoded.get(&code) {
401            *expr = create::make_string(decoded_str, &ctx.ast);
402            self.modifications += 1;
403        }
404    }
405}