dotscope 0.7.0

A high-performance, cross-platform framework for analyzing and reverse engineering .NET PE executables
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
//! BitMono string encryption detection and decryption technique.
//!
//! Detects BitMono's StringsEncryption protection, which encrypts string literals
//! using AES-256-CBC (via `RijndaelManaged` or `Aes`) with PBKDF2-HMAC-SHA1 key
//! derivation (`Rfc2898DeriveBytes`). Unlike ConfuserEx (which uses emulation),
//! BitMono's encryption is fully static — the key material is embedded as FieldRVA
//! data and the algorithm is standard AES-256-CBC.
//!
//! # CIL Pattern
//!
//! Each string literal is replaced with a 4-instruction sequence:
//! ```text
//! ldsfld     byte[] <encrypted_data_field>     // per-string FieldRVA data
//! ldsfld     byte[] <salt_field>               // shared 8-byte salt
//! ldsfld     byte[] <crypt_key_field>          // shared 8-byte key
//! call       string Decrypt(byte[], byte[], byte[])
//! ```
//!
//! # Detection
//!
//! Scans all static methods for references to crypto types:
//! - `RijndaelManaged` or `Aes` (symmetric cipher)
//! - `Rfc2898DeriveBytes` (key derivation)
//!
//! Also checks for the characteristic decryptor signature: a static method
//! returning `string` that takes 3+ `byte[]` parameters.
//!
//! # Feature Gate
//!
//! The actual string decryption requires the `legacy-crypto` feature for
//! PBKDF2-HMAC-SHA1 key derivation. Detection works without the feature,
//! but decryption is only available when `legacy-crypto` is enabled.
//!
//! # Two-Phase Design
//!
//! **Phase A — `initialize()`** (byte-level, runs before SSA):
//! - Tags infrastructure fields in the analysis context (encrypted data, salt, key fields)
//! - Registers decryptor method tokens so cleanup knows what to remove
//!
//! **Phase B — `StringDecryptionPass`** (SSA pass, runs AFTER `ReflectionDevirtualizationPass`):
//! - Finds `Call` to decryptor methods in SSA blocks
//! - Traces each site's `LoadStaticField` args to identify its specific encrypted
//!   data, salt, and key fields
//! - Derives AES key material per (salt, key) pair (cached for efficiency)
//! - Decrypts only fields actually referenced as decryptor arguments
//! - Replaces `Call` with `DecryptedString` constant, NOPs intermediate instructions

use std::{
    any::Any,
    collections::{HashMap, HashSet},
    sync::Arc,
};

#[cfg(feature = "legacy-crypto")]
use crate::deobfuscation::passes::bitmono::StringDecryptionPass;
use crate::{
    analysis::{ConstValue, SsaFunction, SsaOp, SsaVarId},
    cilassembly::CleanupRequest,
    compiler::{PassPhase, SsaPass},
    deobfuscation::{
        context::AnalysisContext,
        techniques::{Detection, Evidence, Technique, TechniqueCategory},
    },
    metadata::{
        tables::{MemberRefRaw, TableId, TypeDefRaw, TypeRefRaw},
        token::Token,
    },
    utils::CryptoParameters,
    CilObject,
};

/// Findings from BitMono string encryption detection.
#[derive(Debug)]
pub struct StringFindings {
    /// Token of the type containing the decryptor method, if identified.
    pub decryptor_type: Option<Token>,
    /// Tokens of identified decryptor methods.
    pub decryptor_tokens: Vec<Token>,
    /// Number of call sites that invoke the decryptor method.
    pub call_sites: usize,
    /// Infrastructure field tokens (encrypted data fields, salt, key).
    /// These are `byte[]` static fields loaded by decryptor call sites.
    pub infrastructure_fields: Vec<Token>,
    /// FieldRVA backing field tokens (ExplicitLayout structs initialized via
    /// `RuntimeHelpers.InitializeArray` to hold raw encrypted data).
    pub constant_data_fields: Vec<Token>,
    /// Types owning the FieldRVA backing fields (ExplicitLayout value types).
    pub constant_data_types: Vec<Token>,
    /// Crypto parameters extracted from the decryptor method's SSA.
    pub crypto_params: CryptoParameters,
}

/// Detects BitMono's AES+PBKDF2 string encryption.
///
/// Identifies the decryptor infrastructure by scanning for methods that
/// reference `RijndaelManaged`/`Aes` and `Rfc2898DeriveBytes`, or by
/// matching the characteristic `string(byte[], byte[], byte[])` signature.
/// Requires the `bitmono.calli` technique to run first so that `calli`
/// trampolines have been resolved to direct calls before decryption.
pub struct BitMonoStrings;

impl Technique for BitMonoStrings {
    fn id(&self) -> &'static str {
        "bitmono.strings"
    }

    fn name(&self) -> &'static str {
        "BitMono String Decryption"
    }

    fn category(&self) -> TechniqueCategory {
        TechniqueCategory::Value
    }

    fn requires(&self) -> &[&'static str] {
        &["bitmono.calli"]
    }

    #[cfg(feature = "legacy-crypto")]
    fn enabled(&self, _config: &crate::deobfuscation::config::EngineConfig) -> bool {
        true
    }

    #[cfg(not(feature = "legacy-crypto"))]
    fn enabled(&self, _config: &crate::deobfuscation::config::EngineConfig) -> bool {
        false
    }

    fn detect(&self, _assembly: &CilObject) -> Detection {
        // IL-level detection is not used — all detection happens in detect_ssa()
        // where SSA def-use chains give us precise crypto parameter extraction
        // and junk-immune decryptor identification.
        Detection::new_empty()
    }

    fn detect_ssa(&self, ctx: &AnalysisContext, assembly: &CilObject) -> Detection {
        // Phase 1: Find decryptor methods by scanning SSA for NewObj targeting
        // crypto types (Rfc2898DeriveBytes, RijndaelManaged/Aes).
        let mut decryptor_tokens: Vec<Token> = Vec::new();
        let mut decryptor_type: Option<Token> = None;

        for entry in ctx.ssa_functions.iter() {
            let method_token = *entry.key();
            let ssa = entry.value();

            if has_crypto_ops(ssa, assembly) {
                decryptor_tokens.push(method_token);
                if decryptor_type.is_none() {
                    if let Some(method) = assembly.method(&method_token) {
                        if let Some(decl_type) = method.declaring_type_rc() {
                            decryptor_type = Some(decl_type.token);
                        }
                    }
                }
            }
        }

        if decryptor_tokens.is_empty() {
            return Detection::new_empty();
        }

        // Phase 2: Extract crypto parameters from the first decryptor's SSA.
        let crypto_params = decryptor_tokens
            .first()
            .and_then(|token| ctx.ssa_functions.get(token))
            .map(|ssa| extract_crypto_parameters(&ssa, assembly))
            .unwrap_or_default();

        log::info!(
            "BitMono strings: extracted crypto params — {} iterations, {}/{} key/iv, {}",
            crypto_params.iterations,
            crypto_params.key_size,
            crypto_params.iv_size,
            crypto_params.hash_algorithm,
        );

        // Phase 3: Count call sites and collect infrastructure fields via SSA.
        let decryptor_set: HashSet<Token> = decryptor_tokens.iter().copied().collect();
        let mut call_site_count = 0usize;
        let mut infrastructure_fields: Vec<Token> = Vec::new();

        for entry in ctx.ssa_functions.iter() {
            let ssa = entry.value();
            for block in ssa.blocks() {
                for instr in block.instructions() {
                    let (call_token, args) = match instr.op() {
                        SsaOp::Call { method, args, .. } => (method.token(), args),
                        _ => continue,
                    };
                    if !decryptor_set.contains(&call_token) {
                        continue;
                    }
                    call_site_count += 1;

                    // Trace LoadStaticField args to collect infrastructure field tokens
                    for arg in args {
                        if let Some(field_token) = resolve_ldsfld_field(ssa, *arg) {
                            if !infrastructure_fields.contains(&field_token) {
                                infrastructure_fields.push(field_token);
                            }
                        }
                    }
                }
            }
        }

        // Phase 4: Collect FieldRVA backing fields and their owning types.
        let (constant_data_fields, constant_data_types) =
            collect_constant_data_tokens(assembly, &infrastructure_fields);

        let mut evidence = vec![Evidence::BytecodePattern(
            "BitMono string decryptor (AES + Rfc2898DeriveBytes)".to_string(),
        )];

        if call_site_count > 0 {
            evidence.push(Evidence::BytecodePattern(format!(
                "{call_site_count} call sites reference the string decryptor"
            )));
        }

        let findings = StringFindings {
            decryptor_type,
            decryptor_tokens,
            call_sites: call_site_count,
            infrastructure_fields,
            constant_data_fields,
            constant_data_types,
            crypto_params,
        };

        Detection::new_detected(
            evidence,
            Some(Box::new(findings) as Box<dyn Any + Send + Sync>),
        )
    }

    fn ssa_phase(&self) -> Option<PassPhase> {
        Some(PassPhase::Simplify)
    }

    #[cfg(feature = "legacy-crypto")]
    fn create_pass(
        &self,
        _ctx: &AnalysisContext,
        detection: &Detection,
        _assembly: &Arc<CilObject>,
    ) -> Vec<Box<dyn SsaPass>> {
        let Some(findings) = detection.findings::<StringFindings>() else {
            return Vec::new();
        };
        vec![Box::new(StringDecryptionPass::from_findings(
            findings,
            findings.crypto_params.clone(),
        ))]
    }

    #[cfg(not(feature = "legacy-crypto"))]
    fn create_pass(
        &self,
        _ctx: &AnalysisContext,
        _detection: &Detection,
        _assembly: &Arc<CilObject>,
    ) -> Vec<Box<dyn SsaPass>> {
        Vec::new()
    }

    fn cleanup(&self, detection: &Detection) -> Option<CleanupRequest> {
        let findings = detection.findings::<StringFindings>()?;

        let mut request = CleanupRequest::new();

        // Remove the decryptor type (cascades to its methods and fields)
        if let Some(decryptor_type) = findings.decryptor_type {
            request.add_type(decryptor_type);
        }

        // Remove infrastructure fields (encrypted data byte[] fields, salt, key)
        for token in &findings.infrastructure_fields {
            request.add_field(*token);
        }

        // Remove FieldRVA backing fields (ExplicitLayout data holders)
        for token in &findings.constant_data_fields {
            request.add_field(*token);
        }

        // Remove ExplicitLayout value types that own the backing fields.
        // These extend System.ValueType, so removing them cascades to remove
        // the ValueType TypeRef → System.Private.CoreLib AssemblyRef chain.
        for token in &findings.constant_data_types {
            request.add_type(*token);
        }

        if request.has_deletions() {
            Some(request)
        } else {
            None
        }
    }
}

/// Collects FieldRVA backing field tokens and their owning ExplicitLayout types.
///
/// Scans `.cctor` methods for `RuntimeHelpers.InitializeArray` patterns to build
/// a mapping from byte[] field → backing FieldRVA field. For each infrastructure
/// field that has a backing field, the backing field token is collected. The owning
/// ExplicitLayout value types are also collected — removing these cascades to remove
/// the `ValueType` TypeRef → `System.Private.CoreLib` AssemblyRef chain.
fn collect_constant_data_tokens(
    assembly: &CilObject,
    infrastructure_fields: &[Token],
) -> (Vec<Token>, Vec<Token>) {
    let mut constant_data_fields = Vec::new();
    let mut constant_data_types = Vec::new();

    if infrastructure_fields.is_empty() {
        return (constant_data_fields, constant_data_types);
    }

    // Build mapping: byte_array_field_token → backing_field_token
    let init_map = crate::deobfuscation::utils::build_init_array_map(assembly);
    if init_map.is_empty() {
        return (constant_data_fields, constant_data_types);
    }

    // Collect backing field tokens for infrastructure fields
    for infra_token in infrastructure_fields {
        if let Some(&backing_token) = init_map.get(infra_token) {
            if !constant_data_fields.contains(&backing_token) {
                constant_data_fields.push(backing_token);
            }
        }
    }

    // Find types that ONLY contain infrastructure backing fields — safe to remove.
    // A type like `<PrivateImplementationDetails>` may contain both string encryption
    // backing fields (removable) AND RuntimeHelpers.InitializeArray backing fields
    // (must survive). Only mark the type for deletion if ALL its FieldRVA fields
    // are being removed — otherwise we'd break array initialization.
    let removed_set: HashSet<Token> = constant_data_fields.iter().copied().collect();

    for type_entry in assembly.types().iter() {
        let cil_type = type_entry.value();

        if cil_type.token.row() == 1 && cil_type.token.table() == 0x02 {
            continue;
        }

        // Collect all FieldRVA-backed fields owned by this type
        let rva_fields: Vec<Token> = cil_type
            .fields
            .iter()
            .filter(|(_, field)| field.flags.has_field_rva())
            .map(|(_, field)| field.token)
            .collect();

        if rva_fields.is_empty() {
            continue;
        }

        // Only remove the type if ALL its FieldRVA fields are being removed.
        // If some survive (e.g., InitializeArray backing data), the type must too.
        if rva_fields.iter().all(|t| removed_set.contains(t)) {
            constant_data_types.push(cil_type.token);
        }
    }

    (constant_data_fields, constant_data_types)
}

/// Checks whether an SSA function contains NewObj/Call targeting crypto types.
///
/// Scans for `NewObj` or `Call` instructions whose resolved target name contains
/// both an AES type (`RijndaelManaged` or `Aes`) and `Rfc2898DeriveBytes`.
///
/// # Arguments
///
/// * `ssa` - The SSA function to scan.
/// * `assembly` - The assembly for resolving type names from metadata tokens.
fn has_crypto_ops(ssa: &SsaFunction, assembly: &CilObject) -> bool {
    let mut has_aes = false;
    let mut has_pbkdf2 = false;

    for block in ssa.blocks() {
        for instr in block.instructions() {
            let token = match instr.op() {
                SsaOp::NewObj { ctor, .. } => ctor.token(),
                SsaOp::Call { method, .. } | SsaOp::CallVirt { method, .. } => method.token(),
                _ => continue,
            };

            if let Some(name) = resolve_type_name(assembly, token) {
                if name.contains("RijndaelManaged") || name.contains("Aes") {
                    has_aes = true;
                }
                if name.contains("Rfc2898DeriveBytes") {
                    has_pbkdf2 = true;
                }
            }

            if has_aes && has_pbkdf2 {
                return true;
            }
        }
    }

    false
}

/// Extracts crypto parameters from a decryptor method's SSA.
///
/// Scans the SSA for:
/// 1. `NewObj { ctor: Rfc2898DeriveBytes::.ctor, args: [..., iterations] }` to get
///    the PBKDF2 iteration count
/// 2. `CallVirt { method: GetBytes, args: [_, size] }` to get key and IV sizes
///
/// Falls back to [`CryptoParameters::default()`] for any parameter that cannot
/// be resolved from the SSA.
fn extract_crypto_parameters(ssa: &SsaFunction, assembly: &CilObject) -> CryptoParameters {
    let mut params = CryptoParameters::default();

    // Build a map from SsaVarId → ConstValue for resolving arguments
    let const_map = build_const_map(ssa);

    let mut get_bytes_sizes: Vec<u32> = Vec::new();

    for block in ssa.blocks() {
        for instr in block.instructions() {
            match instr.op() {
                // NewObj Rfc2898DeriveBytes(key, salt, iterations)
                // or NewObj Rfc2898DeriveBytes(key, salt, iterations, hashAlgorithm)
                SsaOp::NewObj { ctor, args, .. } => {
                    if let Some(name) = resolve_type_name(assembly, ctor.token()) {
                        if name.contains("Rfc2898DeriveBytes") && args.len() >= 3 {
                            // 3rd arg (index 2) is the iteration count
                            if let Some(ConstValue::I32(iters)) = const_map.get(&args[2]) {
                                if *iters > 0 {
                                    params.iterations = *iters as u32;
                                }
                            }
                            // 4th arg (index 3), if present, is HashAlgorithmName
                            // For now we keep SHA1 default — .NET's HashAlgorithmName
                            // is a struct loaded via ldsfld, not a simple constant.
                        }
                    }
                }
                // CallVirt GetBytes(int32) — extract key and IV sizes
                SsaOp::CallVirt { method, args, .. } => {
                    if let Some(name) = assembly.resolve_method_name(method.token()) {
                        if name == "GetBytes" && args.len() == 2 {
                            // args[0] = this (Rfc2898DeriveBytes instance), args[1] = size
                            if let Some(ConstValue::I32(size)) = const_map.get(&args[1]) {
                                if *size > 0 {
                                    get_bytes_sizes.push(*size as u32);
                                }
                            }
                        }
                    }
                }
                _ => {}
            }
        }
    }

    // First GetBytes call = key size, second = IV size
    if let Some(&key_size) = get_bytes_sizes.first() {
        params.key_size = key_size as usize;
    }
    if let Some(&iv_size) = get_bytes_sizes.get(1) {
        params.iv_size = iv_size as usize;
    }

    params
}

/// Builds a map from SSA variable IDs to their constant values.
///
/// Only includes variables defined by `Const` instructions. Used for resolving
/// arguments to crypto API calls during parameter extraction.
///
/// # Arguments
///
/// * `ssa` - The SSA function to scan for constant definitions.
fn build_const_map(ssa: &SsaFunction) -> HashMap<SsaVarId, ConstValue> {
    let mut map = HashMap::new();
    for block in ssa.blocks() {
        for instr in block.instructions() {
            if let SsaOp::Const { dest, value } = instr.op() {
                map.insert(*dest, value.clone());
            }
        }
    }
    map
}

/// Resolves a `LoadStaticField` definition for an SSA variable.
///
/// Scans the SSA for the instruction that defines `var_id` and returns the
/// field token if it's a `LoadStaticField`.
///
/// # Arguments
///
/// * `ssa` - The SSA function containing the variable definition.
/// * `var_id` - The SSA variable ID to resolve.
fn resolve_ldsfld_field(ssa: &SsaFunction, var_id: SsaVarId) -> Option<Token> {
    for block in ssa.blocks() {
        for instr in block.instructions() {
            if let SsaOp::LoadStaticField { dest, field } = instr.op() {
                if *dest == var_id {
                    return Some(field.token());
                }
            }
        }
    }
    None
}

/// Resolves a metadata token to a type name string for crypto detection.
///
/// For MemberRef tokens, follows the class reference to get the declaring type name.
/// For TypeRef tokens, returns the type name directly. For TypeDef tokens, returns
/// the type name from the local assembly.
fn resolve_type_name(assembly: &CilObject, token: Token) -> Option<String> {
    let tables = assembly.tables()?;
    let strings = assembly.strings()?;

    match token.table() {
        // MemberRef (0x0A) — follow class to get declaring type
        0x0A => {
            let memberref_table = tables.table::<MemberRefRaw>()?;
            let memberref = memberref_table.get(token.row())?;
            if memberref.class.tag == TableId::TypeRef {
                let typeref_table = tables.table::<TypeRefRaw>()?;
                let typeref = typeref_table.get(memberref.class.row)?;
                let name = strings.get(typeref.type_name as usize).ok()?;
                let ns = strings
                    .get(typeref.type_namespace as usize)
                    .ok()
                    .unwrap_or("");
                Some(format!("{ns}.{name}"))
            } else if memberref.class.tag == TableId::TypeDef {
                let typedef_table = tables.table::<TypeDefRaw>()?;
                let typedef = typedef_table.get(memberref.class.row)?;
                let name = strings.get(typedef.type_name as usize).ok()?;
                let ns = strings
                    .get(typedef.type_namespace as usize)
                    .ok()
                    .unwrap_or("");
                Some(format!("{ns}.{name}"))
            } else {
                None
            }
        }
        // TypeRef (0x01)
        0x01 => {
            let typeref_table = tables.table::<TypeRefRaw>()?;
            let typeref = typeref_table.get(token.row())?;
            let name = strings.get(typeref.type_name as usize).ok()?;
            let ns = strings
                .get(typeref.type_namespace as usize)
                .ok()
                .unwrap_or("");
            Some(format!("{ns}.{name}"))
        }
        // TypeDef (0x02)
        0x02 => {
            let typedef_table = tables.table::<TypeDefRaw>()?;
            let typedef = typedef_table.get(token.row())?;
            let name = strings.get(typedef.type_name as usize).ok()?;
            let ns = strings
                .get(typedef.type_namespace as usize)
                .ok()
                .unwrap_or("");
            Some(format!("{ns}.{name}"))
        }
        _ => None,
    }
}

#[cfg(test)]
mod tests {
    use crate::{
        deobfuscation::techniques::{bitmono::BitMonoStrings, Technique},
        test::helpers::load_sample,
    };

    /// Verify that IL-level detect() is a no-op (detection is SSA-based).
    ///
    /// Positive detection is tested through the full pipeline in
    /// `dotscope/tests/bitmono.rs::test_all_bitmono_samples`.
    #[test]
    fn test_detect_is_noop() {
        let assembly = load_sample("tests/samples/packers/bitmono/0.39.0/bitmono_strings.exe");
        let technique = BitMonoStrings;
        let detection = technique.detect(&assembly);
        assert!(
            !detection.is_detected(),
            "IL-level detect() should be a no-op — detection happens in detect_ssa()"
        );
    }

    #[test]
    fn test_detect_negative() {
        let assembly = load_sample("tests/samples/packers/confuserex/1.6.0/original.exe");
        let technique = BitMonoStrings;
        let detection = technique.detect(&assembly);
        assert!(
            !detection.is_detected(),
            "BitMonoStrings should not detect string encryption in a non-BitMono assembly"
        );
    }
}