dotscope 0.7.0

A high-performance, cross-platform framework for analyzing and reverse engineering .NET PE executables
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
//! Global Value Numbering (GVN) pass.
//!
//! This pass eliminates redundant computations by detecting when the same
//! expression is computed multiple times with the same operands, and replacing
//! later uses with the earlier result.
//!
//! # Example
//!
//! Before:
//! ```text
//! v1 = add v0, 5
//! v2 = add v0, 5    // Redundant - same operation
//! v3 = mul v1, v2
//! ```
//!
//! After:
//! ```text
//! v1 = add v0, 5
//! v2 = add v0, 5    // Now dead (DCE will remove)
//! v3 = mul v1, v1   // v2 replaced with v1
//! ```
//!
//! # Algorithm
//!
//! The pass uses hash-based value numbering:
//!
//! 1. For each pure operation, create a hashable key from its opcode and operands
//! 2. Normalize commutative operations (e.g., `add v1, v0` → `add v0, v1`)
//! 3. If the key was seen before, replace uses of the new result with the old one
//! 4. Otherwise, record the key with this operation's result
//!
//! # Limitations
//!
//! - Only handles pure operations (no side effects)
//! - Does not perform code motion (dominator-based GVN would be more powerful)
//! - Works within a single method

use std::collections::HashMap;

use crate::{
    analysis::{BinaryOpKind, SsaFunction, SsaOp, SsaVarId, UnaryOpKind},
    compiler::{
        pass::{ModificationScope, SsaPass},
        CompilerContext, EventKind, EventLog,
    },
    metadata::token::Token,
    CilObject, Result,
};

/// A hashable key representing an operation for value numbering.
///
/// This captures the "value" of an expression - the operation type and operands,
/// but not the destination. Two operations with the same key compute the same value.
///
/// Uses the centralized `BinaryOpKind` and `UnaryOpKind` types from the SSA module.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
enum ValueKey {
    /// Binary operation: (kind, unsigned_flag, left, right)
    ///
    /// The unsigned flag is included for operations where signedness affects
    /// semantics (Div, Rem, Shr, Clt, Cgt). For other operations, it's normalized
    /// to `false` for consistent hashing.
    Binary(BinaryOpKind, bool, SsaVarId, SsaVarId),

    /// Unary operation: (kind, operand)
    Unary(UnaryOpKind, SsaVarId),

    /// Argument load: loading the same argument always produces the same value.
    LoadArg(usize),
}

impl ValueKey {
    /// Creates a normalized value key from an SSA operation.
    ///
    /// Returns `None` for operations that shouldn't be value-numbered
    /// (impure operations, constants, control flow, etc.).
    ///
    /// Uses `as_binary_op()` and `as_unary_op()` for extraction, then applies
    /// normalization for commutative operations.
    fn from_op(op: &SsaOp) -> Option<(Self, SsaVarId)> {
        // Try binary operations first
        if let Some(info) = op.as_binary_op() {
            // Skip overflow-checked operations (they may throw)
            if matches!(
                info.kind,
                BinaryOpKind::AddOvf | BinaryOpKind::SubOvf | BinaryOpKind::MulOvf
            ) {
                return None;
            }

            // Normalize for consistent hashing
            let normalized = info.normalized();
            let (kind, unsigned, left, right) = normalized.value_key();
            return Some((Self::Binary(kind, unsigned, left, right), normalized.dest));
        }

        // Try unary operations
        if let Some(info) = op.as_unary_op() {
            // Skip Ckfinite (it may throw)
            if info.kind == UnaryOpKind::Ckfinite {
                return None;
            }
            return Some((Self::Unary(info.kind, info.operand), info.dest));
        }

        // LoadArg: loading the same argument twice produces the same value.
        // This enables downstream passes (e.g., opaque predicate removal) to
        // detect self-comparisons like `ceq(arg0, arg0)`.
        if let SsaOp::LoadArg { dest, arg_index } = op {
            return Some((Self::LoadArg(*arg_index as usize), *dest));
        }

        // Skip everything else (constants, loads, stores, calls, control flow, etc.)
        None
    }
}

/// Global Value Numbering pass.
///
/// Eliminates redundant computations by detecting equivalent expressions
/// and replacing later occurrences with references to earlier results.
pub struct GlobalValueNumberingPass;

impl Default for GlobalValueNumberingPass {
    fn default() -> Self {
        Self::new()
    }
}

impl GlobalValueNumberingPass {
    /// Creates a new GVN pass.
    #[must_use]
    pub fn new() -> Self {
        Self
    }

    /// Runs GVN on a single SSA function.
    ///
    /// Returns the number of redundant expressions eliminated.
    fn run_gvn(ssa: &mut SsaFunction, method_token: Token, changes: &mut EventLog) -> usize {
        // Map from value key to the first variable that computed it
        let mut value_map: HashMap<ValueKey, SsaVarId> = HashMap::new();

        // Collect redundant definitions: (redundant_var, original_var, block_idx, instr_idx)
        let mut redundant: Vec<(SsaVarId, SsaVarId, usize, usize)> = Vec::new();

        // First pass: identify redundant computations
        for block in ssa.blocks() {
            let block_idx = block.id();
            for (instr_idx, instr) in block.instructions().iter().enumerate() {
                if let Some((key, dest)) = ValueKey::from_op(instr.op()) {
                    if let Some(&original) = value_map.get(&key) {
                        // This computation is redundant
                        redundant.push((dest, original, block_idx, instr_idx));
                    } else {
                        // First time seeing this value
                        value_map.insert(key, dest);
                    }
                }
            }
        }

        // Second pass: replace uses of redundant variables with originals,
        // including phi operands, then nop-out the dead instruction.
        // This prevents ping-ponging with DCE: without nop-out, DCE would find
        // the dead instruction as "new work" on the next normalization iteration.
        let mut total_replaced = 0;
        for (redundant_var, original_var, block_idx, instr_idx) in &redundant {
            let result = ssa.replace_uses_including_phis(*redundant_var, *original_var);
            if result.replaced > 0 {
                changes
                    .record(EventKind::ConstantFolded) // Reuse existing kind for expression elimination
                    .method(method_token)
                    .message(format!(
                        "GVN: {redundant_var}{original_var} ({} uses)",
                        result.replaced
                    ));
                total_replaced += result.replaced;
            }
            // Nop-out the redundant instruction so rebuild_ssa's strip_nops
            // removes it. This avoids leaving dead instructions for DCE to find.
            ssa.remove_instruction(*block_idx, *instr_idx);
        }

        total_replaced
    }
}

impl SsaPass for GlobalValueNumberingPass {
    fn name(&self) -> &'static str {
        "global-value-numbering"
    }

    fn description(&self) -> &'static str {
        "Eliminates redundant computations using value numbering"
    }

    fn modification_scope(&self) -> ModificationScope {
        ModificationScope::UsesOnly
    }

    fn run_on_method(
        &self,
        ssa: &mut SsaFunction,
        method_token: Token,
        ctx: &CompilerContext,
        _assembly: &CilObject,
    ) -> Result<bool> {
        let mut changes = EventLog::new();

        // GVN is a single-pass algorithm - no iteration needed
        // (unlike copy propagation which needs to resolve chains)
        Self::run_gvn(ssa, method_token, &mut changes);

        let changed = !changes.is_empty();
        if changed {
            ctx.events.merge(&changes);
        }
        Ok(changed)
    }
}

#[cfg(test)]
mod tests {
    use crate::{
        analysis::{BinaryOpKind, ConstValue, SsaFunctionBuilder, SsaOp, SsaVarId, UnaryOpKind},
        compiler::{
            passes::gvn::{GlobalValueNumberingPass, ValueKey},
            EventLog,
        },
        metadata::token::Token,
    };

    #[test]
    fn test_value_key_binary_commutative() {
        // Test that commutative operations with swapped operands produce the same key
        let v0 = SsaVarId::from_index(0);
        let v1 = SsaVarId::from_index(1);
        let v2 = SsaVarId::from_index(2);
        let v3 = SsaVarId::from_index(3);

        // Add is commutative - should normalize to same key
        let add_op1 = SsaOp::Add {
            dest: v2,
            left: v0,
            right: v1,
        };
        let add_op2 = SsaOp::Add {
            dest: v3,
            left: v1,
            right: v0,
        }; // Swapped operands

        let (key1, _) = ValueKey::from_op(&add_op1).unwrap();
        let (key2, _) = ValueKey::from_op(&add_op2).unwrap();
        assert_eq!(key1, key2, "Add should be commutative");

        // Sub is not commutative - different keys
        let sub_op1 = SsaOp::Sub {
            dest: v2,
            left: v0,
            right: v1,
        };
        let sub_op2 = SsaOp::Sub {
            dest: v3,
            left: v1,
            right: v0,
        };

        let (key3, _) = ValueKey::from_op(&sub_op1).unwrap();
        let (key4, _) = ValueKey::from_op(&sub_op2).unwrap();
        assert_ne!(key3, key4, "Sub should NOT be commutative");
    }

    #[test]
    fn test_value_key_from_op() {
        let v0 = SsaVarId::from_index(0);
        let v1 = SsaVarId::from_index(1);
        let v2 = SsaVarId::from_index(2);

        // Add operation
        let add_op = SsaOp::Add {
            dest: v2,
            left: v0,
            right: v1,
        };
        let (key, dest) = ValueKey::from_op(&add_op).unwrap();
        assert_eq!(dest, v2);
        assert!(matches!(key, ValueKey::Binary(BinaryOpKind::Add, _, _, _)));

        // Neg operation
        let neg_op = SsaOp::Neg {
            dest: v1,
            operand: v0,
        };
        let (key, dest) = ValueKey::from_op(&neg_op).unwrap();
        assert_eq!(dest, v1);
        assert!(matches!(key, ValueKey::Unary(UnaryOpKind::Neg, _)));

        // Const should return None (not value-numbered)
        let const_op = SsaOp::Const {
            dest: v0,
            value: ConstValue::I32(42),
        };
        assert!(ValueKey::from_op(&const_op).is_none());
    }

    #[test]
    fn test_gvn_eliminates_redundant() {
        // Build SSA:
        // v2 = add v0, v1
        // v3 = add v0, v1  <- redundant
        // v4 = mul v2, v3
        let (mut ssa, v2) = {
            let mut v2_out = SsaVarId::from_index(0);
            let ssa = SsaFunctionBuilder::new(0, 0)
                .build_with(|f| {
                    f.block(0, |b| {
                        let v0 = b.const_i32(10);
                        let v1 = b.const_i32(20);
                        let v2 = b.add(v0, v1);
                        v2_out = v2;
                        let v3 = b.add(v0, v1); // Redundant
                        let v4 = b.mul(v2, v3);
                        b.ret_val(v4);
                    });
                })
                .unwrap();
            (ssa, v2_out)
        };

        let mut changes = EventLog::new();
        let replaced =
            GlobalValueNumberingPass::run_gvn(&mut ssa, Token::new(0x06000001), &mut changes);

        // Should have replaced uses of v3 with v2
        assert!(replaced > 0);
        assert!(!changes.is_empty());

        // The mul should now use v2 twice
        let block = ssa.block(0).unwrap();
        let mul_instr = &block.instructions()[4]; // After 2 consts and 2 adds
        if let SsaOp::Mul { left, right, .. } = mul_instr.op() {
            assert_eq!(*left, v2);
            assert_eq!(*right, v2);
        } else {
            panic!("Expected Mul instruction");
        }
    }

    #[test]
    fn test_gvn_commutative_order() {
        // Build SSA:
        // v2 = add v0, v1
        // v3 = add v1, v0  <- same as v2 (commutative)
        let (mut ssa, v2) = {
            let mut v2_out = SsaVarId::from_index(0);
            let ssa = SsaFunctionBuilder::new(0, 0)
                .build_with(|f| {
                    f.block(0, |b| {
                        let v0 = b.const_i32(10);
                        let v1 = b.const_i32(20);
                        let v2 = b.add(v0, v1);
                        v2_out = v2;
                        let v3 = b.add(v1, v0); // Swapped
                        b.ret_val(v3);
                    });
                })
                .unwrap();
            (ssa, v2_out)
        };

        let mut changes = EventLog::new();
        let replaced =
            GlobalValueNumberingPass::run_gvn(&mut ssa, Token::new(0x06000001), &mut changes);

        // Should detect that add v1, v0 == add v0, v1
        assert!(replaced > 0);

        // Return should now use v2
        let block = ssa.block(0).unwrap();
        let ret_instr = &block.instructions()[4]; // After 2 consts and 2 adds
        if let SsaOp::Return {
            value: Some(ret_val),
        } = ret_instr.op()
        {
            assert_eq!(*ret_val, v2);
        } else {
            panic!("Expected Return instruction");
        }
    }

    #[test]
    fn test_gvn_non_commutative_preserved() {
        // Build SSA:
        // v2 = sub v0, v1
        // v3 = sub v1, v0  <- NOT the same (sub is not commutative)
        let mut ssa = SsaFunctionBuilder::new(0, 0)
            .build_with(|f| {
                f.block(0, |b| {
                    let v0 = b.const_i32(10);
                    let v1 = b.const_i32(20);
                    let _v2 = b.sub(v0, v1);
                    let v3 = b.sub(v1, v0);
                    b.ret_val(v3);
                });
            })
            .unwrap();

        let mut changes = EventLog::new();
        let replaced =
            GlobalValueNumberingPass::run_gvn(&mut ssa, Token::new(0x06000001), &mut changes);

        // Should NOT replace - these are different values
        assert_eq!(replaced, 0);
        assert!(changes.is_empty());
    }
}