1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
//! Canonicalization of SSA functions for clean code generation.
//!
//! Strips Nops, removes empty blocks, compacts block indices, and
//! ensures valid terminators after deobfuscation passes.
use std::collections::{BTreeMap, BTreeSet};
use crate::{
analysis::ssa::{PhiOperand, SsaBlock, SsaFunction, SsaInstruction, SsaOp, SsaVarId},
utils::BitSet,
};
/// Finds kept predecessors of a removed block during canonicalization.
///
/// When a block is removed, we need to find the actual predecessor blocks
/// (that are being kept) which would flow into the removed block. This is
/// used to properly update PHI node predecessors.
///
/// The function follows predecessor chains through removed blocks until it
/// finds blocks that are being kept (have entries in `block_remap`).
pub(crate) fn find_kept_predecessors(
removed_block: usize,
predecessors: &BTreeMap<usize, Vec<usize>>,
block_remap: &[Option<usize>],
redirect_map: &BTreeMap<usize, usize>,
) -> Vec<usize> {
let mut result = Vec::new();
let block_count = block_remap.len();
let mut visited = BitSet::new(block_count);
let mut queue = vec![removed_block];
while let Some(current) = queue.pop() {
if current >= block_count || !visited.insert(current) {
continue;
}
if let Some(preds) = predecessors.get(¤t) {
for &pred in preds {
if let Some(Some(new_idx)) = block_remap.get(pred) {
// This predecessor is kept - add its new index
result.push(*new_idx);
} else if redirect_map.contains_key(&pred) {
// This predecessor is also removed - follow the chain
queue.push(pred);
}
}
}
}
result
}
impl SsaFunction {
/// Canonicalizes the SSA function for clean code generation.
///
/// This method performs final cleanup after deobfuscation passes:
///
/// 1. **Strip Nop instructions**: Removes all `SsaOp::Nop` instructions
/// 2. **Identify empty blocks**: Marks blocks with no instructions or phi nodes for removal
/// 3. **Build redirect map**: For removed blocks, finds their ultimate jump targets
/// 4. **Update branch targets**: Retargets jumps to skip removed empty blocks
/// 5. **Update PHI predecessors**: Fixes PHI node operands when predecessor blocks are removed
/// 6. **Compact blocks**: Removes empty blocks and renumbers remaining blocks contiguously
///
/// This should be called after all deobfuscation passes complete, before
/// code generation. The resulting SSA is cleaner and easier to convert to IL.
pub fn canonicalize(&mut self) {
// Phase 1: Strip Nop instructions from all blocks
for block in &mut self.blocks {
block
.instructions_mut()
.retain(|instr| !matches!(instr.op(), SsaOp::Nop));
}
// Collect blocks that must be preserved:
// - Exception handler entry blocks
// - Leave targets (exception handler exit blocks)
let block_count = self.blocks.len();
let mut protected_blocks = BitSet::new(block_count);
// Protect exception handler entry blocks
for handler in &self.exception_handlers {
if let Some(try_block) = handler.try_start_block {
if try_block < block_count {
protected_blocks.insert(try_block);
}
}
if let Some(handler_block) = handler.handler_start_block {
if handler_block < block_count {
protected_blocks.insert(handler_block);
}
}
if let Some(filter_block) = handler.filter_start_block {
if filter_block < block_count {
protected_blocks.insert(filter_block);
}
}
}
// Protect Leave targets (exception handler exit blocks)
for block in &self.blocks {
if let Some(SsaOp::Leave { target }) = block.terminator_op() {
if *target < block_count {
protected_blocks.insert(*target);
}
}
}
// Phase 2: Identify empty blocks and build remapping.
// An empty block has no instructions AND no phi nodes.
// Exception: Keep block 0 (entry) and protected exception handler blocks even if empty.
let mut block_remap: Vec<Option<usize>> = Vec::with_capacity(block_count);
let mut new_index = 0usize;
for (old_index, block) in self.blocks.iter().enumerate() {
let is_empty = block.instructions().is_empty() && block.phi_nodes().is_empty();
let is_entry = old_index == 0;
let is_protected = protected_blocks.contains(old_index);
if is_empty && !is_entry && !is_protected {
block_remap.push(None); // This block will be removed
} else {
block_remap.push(Some(new_index));
new_index += 1;
}
}
// Phase 3: Build redirect map for removed blocks.
// For each removed block, find its ultimate jump target (following jump chains).
// If we can't find a redirect for a block, we must keep it instead of removing it.
let mut redirect_map: BTreeMap<usize, usize> = BTreeMap::new();
for old_index in 0..self.blocks.len() {
if block_remap[old_index].is_none() {
// This block is being removed - find where it would jump to
if let Some(target) = self.find_ultimate_target(old_index, &block_remap) {
redirect_map.insert(old_index, target);
} else {
// Can't find a redirect target - we must keep this block.
// Reassign it a new index.
block_remap[old_index] = Some(new_index);
new_index += 1;
}
}
}
// Build predecessor map for PHI updates (needed for Phase 5).
// For each block, collect all blocks that have edges TO it.
let mut predecessors: BTreeMap<usize, Vec<usize>> = BTreeMap::new();
for (block_idx, block) in self.blocks.iter().enumerate() {
for target in block.successors() {
predecessors.entry(target).or_default().push(block_idx);
}
}
// Phase 4: Update all branch targets in remaining blocks.
for block in &mut self.blocks {
for instr in block.instructions_mut() {
Self::remap_branch_targets(instr.op_mut(), &block_remap, &redirect_map);
}
}
// Phase 5: Update PHI node predecessors.
// When a predecessor block is removed, we find the kept blocks that would have
// flowed into the removed block and use those as the new predecessors.
//
// Special case: Some PHI operands may reference orphaned blocks (blocks with no
// predecessors). This happens when deobfuscation passes modify the CFG without
// properly updating PHI predecessors. We try to recover these by assigning
// orphaned values to unaccounted-for predecessors.
// Process each block's PHI nodes
for block_idx in 0..self.blocks.len() {
// Get the predecessors of THIS block (the one containing the PHI)
// These are the OLD indices of blocks that jump to this block.
let phi_block_preds: Vec<usize> =
predecessors.get(&block_idx).cloned().unwrap_or_default();
// Also compute the NEW indices of kept predecessors
let kept_phi_block_preds: Vec<usize> = phi_block_preds
.iter()
.filter_map(|&old_pred| block_remap.get(old_pred).and_then(|opt| *opt))
.collect();
let block = &mut self.blocks[block_idx];
for phi in block.phi_nodes_mut() {
// Collect changes first (to avoid borrow issues)
let mut changes: Vec<(usize, Option<PhiOperand>, Vec<PhiOperand>)> = Vec::new();
// Track orphaned values (removed operands with no replacement)
let mut orphaned_values: Vec<SsaVarId> = Vec::new();
for (op_idx, operand) in phi.operands().iter().enumerate() {
let old_pred = operand.predecessor();
let value = operand.value();
if redirect_map.contains_key(&old_pred) {
// This predecessor was removed. Find all kept blocks that flow into it.
let kept_preds = find_kept_predecessors(
old_pred,
&predecessors,
&block_remap,
&redirect_map,
);
if kept_preds.is_empty() {
// Orphaned operand - track the value for potential recovery below
orphaned_values.push(value);
}
let replacements: Vec<PhiOperand> = kept_preds
.into_iter()
.map(|new_pred| PhiOperand::new(value, new_pred))
.collect();
// None = remove this operand, replacements = add these instead
changes.push((op_idx, None, replacements));
} else if let Some(Some(new_pred)) = block_remap.get(old_pred) {
// Predecessor was kept but renumbered - update in place
changes.push((op_idx, Some(PhiOperand::new(value, *new_pred)), Vec::new()));
}
}
// Apply changes in reverse order (to preserve indices when removing)
for (op_idx, replacement, additions) in changes.into_iter().rev() {
if let Some(new_op) = replacement {
// Update in place
if let Some(operand) = phi.operands_mut().get_mut(op_idx) {
*operand = new_op;
}
} else {
// Remove the operand
phi.operands_mut().remove(op_idx);
// Add replacement operands
for op in additions {
phi.add_operand(op);
}
}
}
// Post-processing: try to recover orphaned values by assigning them
// to unaccounted-for predecessors.
if !orphaned_values.is_empty() {
// Get the predecessors that are currently accounted for in the PHI
let accounted_preds: BTreeSet<usize> =
phi.operands().iter().map(PhiOperand::predecessor).collect();
// Find predecessors that are NOT accounted for
let unaccounted_preds: Vec<usize> = kept_phi_block_preds
.iter()
.copied()
.filter(|pred| !accounted_preds.contains(pred))
.collect();
// Assign orphaned values to unaccounted predecessors
for (orphan_val, &unaccounted_pred) in
orphaned_values.iter().zip(unaccounted_preds.iter())
{
phi.add_operand(PhiOperand::new(*orphan_val, unaccounted_pred));
}
}
}
}
// Phase 6: Remove empty blocks and compact block indices.
let mut kept_blocks: Vec<SsaBlock> = Vec::with_capacity(new_index);
for (old_index, block) in self.blocks.drain(..).enumerate() {
if block_remap[old_index].is_some() {
kept_blocks.push(block);
}
}
// Update block indices in kept blocks
for (new_idx, block) in kept_blocks.iter_mut().enumerate() {
block.set_id(new_idx);
}
self.blocks = kept_blocks;
// Phase 7: Remap exception handler block indices.
for handler in &mut self.exception_handlers {
handler.remap_block_indices(&block_remap);
}
// Phase 8: Ensure the method has a valid terminator.
self.ensure_valid_terminator();
}
/// Ensures the function has a valid terminator path from the entry block.
///
/// This handles the case where all meaningful code has been removed (e.g., after
/// neutralizing 100% protection code in a module .cctor), leaving only Jumps to
/// empty blocks. In such cases, we replace the entry block's terminator with a
/// Return instruction to produce valid IL.
fn ensure_valid_terminator(&mut self) {
// Check if the method effectively does nothing useful:
// - Only has Jump instructions (no actual code)
// - All Jump targets lead to empty blocks or more Jumps
let has_useful_code = self.blocks.iter().any(|block| {
block.instructions().iter().any(|instr| {
match instr.op() {
// Jumps and Nops don't count as useful - they're just control flow
SsaOp::Jump { .. } | SsaOp::Nop => false,
// Any other instruction (including returns, throws) is useful code
_ => true,
}
})
});
// If there's no useful code, replace entry block with just a Return
if !has_useful_code {
if let Some(entry_block) = self.blocks.first_mut() {
entry_block.instructions_mut().clear();
entry_block.phi_nodes_mut().clear();
entry_block
.add_instruction(SsaInstruction::synthetic(SsaOp::Return { value: None }));
}
}
}
/// Finds the ultimate jump target for a block, following jump chains.
///
/// Used during canonicalization to find where an empty block would
/// ultimately transfer control to.
fn find_ultimate_target(
&self,
block_idx: usize,
block_remap: &[Option<usize>],
) -> Option<usize> {
let mut visited = BitSet::new(self.blocks.len());
let mut current = block_idx;
while visited.insert(current) {
let block = self.blocks.get(current)?;
// Get the terminator's target
let terminator = block.terminator_op();
let target = terminator.and_then(|op| match op {
SsaOp::Jump { target } => Some(*target),
// For branches, we can't simplify - the block isn't truly empty
_ => None,
});
// Handle the target
match target {
Some(t) if block_remap.get(t).copied().flatten().is_some() => {
// Target exists in new layout
return block_remap.get(t).copied().flatten();
}
Some(t) => {
// Target is also being removed, follow the chain
current = t;
}
None => {
// No explicit jump target. Check if block is truly empty (no terminator).
// In CIL semantics, empty blocks fall through to the next block.
if terminator.is_none() && block.instructions().is_empty() {
// Try to fall through to the next block
let next_block = current + 1;
if next_block < self.blocks.len() {
if let Some(Some(new_idx)) = block_remap.get(next_block) {
// Next block exists in new layout
return Some(*new_idx);
} else if block_remap.get(next_block).is_some() {
// Next block is also being removed, follow the chain
current = next_block;
continue;
}
}
}
// No simple jump target and no fall-through, can't redirect
return None;
}
}
}
None // Cycle detected
}
/// Remaps branch targets according to the block remapping.
fn remap_branch_targets(
op: &mut SsaOp,
block_remap: &[Option<usize>],
redirect_map: &BTreeMap<usize, usize>,
) {
// Helper closure to remap a single target
let remap_target = |target: &mut usize| {
// First try redirect_map (for removed blocks with known targets)
if let Some(&new_target) = redirect_map.get(target) {
*target = new_target;
return;
}
// Then try block_remap (for kept blocks)
if let Some(Some(new_target)) = block_remap.get(*target) {
*target = *new_target;
}
};
match op {
SsaOp::Jump { target } | SsaOp::Leave { target } => {
remap_target(target);
}
SsaOp::Branch {
true_target,
false_target,
..
}
| SsaOp::BranchCmp {
true_target,
false_target,
..
} => {
remap_target(true_target);
remap_target(false_target);
}
SsaOp::Switch {
targets, default, ..
} => {
for target in targets.iter_mut() {
remap_target(target);
}
remap_target(default);
}
_ => {}
}
}
}