dotscope 0.7.0

A high-performance, cross-platform framework for analyzing and reverse engineering .NET PE executables
Documentation
//! ConfuserEx-specific emulation hooks.
//!
//! This module provides hooks for intercepting and handling ConfuserEx-specific
//! method patterns during emulation, such as inline LZMA decompression.
//!
//! # LZMA Decompression Hook
//!
//! ConfuserEx embeds an inline LZMA decompressor that doesn't use the BCL.
//! The [`create_lzma_hook`] function creates a hook that:
//!
//! 1. Matches internal methods with signature `byte[] -> byte[]`
//! 2. Checks if the input data has a ConfuserEx LZMA header
//! 3. Decompresses natively instead of emulating the complex LZMA algorithm
//!
//! This dramatically speeds up resource decryption by avoiding thousands of
//! emulated instructions for the LZMA decompression loop.
//!
//! # Anti-Tamper Stub Hook
//!
//! After anti-tamper byte-level deobfuscation, the anti-tamper initialization
//! methods should not be executed again during warmup emulation. The
//! [`create_anti_tamper_stub_hook`] function creates a hook that stubs out
//! these methods to prevent them from corrupting state.

use std::{collections::HashSet, sync::Arc};

use crate::{
    emulation::{EmValue, EmulationThread, Hook, HookContext, HookPriority, PreHookResult},
    metadata::{tables::TableId, token::Token, typesystem::CilFlavor},
    utils::{decompress_confuserex_lzma, is_confuserex_lzma},
};

/// Creates a hook for native ConfuserEx LZMA decompression.
///
/// This hook intercepts internal methods that match the LZMA decompressor pattern:
/// - Internal method (`MethodDef`, not external BCL)
/// - Signature: `byte[] MethodName(byte[])` or `static byte[] MethodName(byte[])`
/// - Input data starts with ConfuserEx LZMA header (0x5D followed by dictionary size)
///
/// When matched, the hook decompresses the data natively using the `lzma-rs` crate
/// instead of emulating the complex LZMA algorithm instruction-by-instruction.
///
/// # Returns
///
/// A [`Hook`] configured with appropriate matchers and a pre-hook handler that
/// performs native decompression.
#[must_use]
pub fn create_lzma_hook() -> Hook {
    Hook::new("confuserex-lzma-decompressor")
        .with_priority(HookPriority::HIGH) // Run before generic handlers
        .match_internal_method() // Only internal MethodDef, not external BCL
        .match_runtime("lzma-candidate-check", is_lzma_decompressor_candidate)
        .pre(lzma_decompression_handler)
}

/// Creates a hook that stubs out protection initialization methods.
///
/// After anti-tamper byte-level deobfuscation has decrypted method bodies,
/// the protection initialization methods (anti-tamper, anti-debug, resource handlers)
/// should not be executed during warmup emulation. If they were to run, they would
/// attempt to access data that has already been modified/consumed by the byte-level
/// deobfuscation, or perform P/Invoke calls that aren't supported in emulation.
///
/// This hook intercepts calls to the specified method tokens and immediately returns
/// an appropriate default value based on the method's return type.
///
/// # Arguments
///
/// * `tokens` - Set of protection method tokens to stub out.
///
/// # Returns
///
/// A [`Hook`] that bypasses execution of the specified methods.
#[must_use]
pub fn create_anti_tamper_stub_hook(tokens: HashSet<Token>) -> Hook {
    let tokens = Arc::new(tokens);

    Hook::new("confuserex-protection-stub")
        .with_priority(HookPriority::HIGHEST) // Highest priority - always match first
        .match_runtime("protection-token-check", {
            let tokens = Arc::clone(&tokens);
            move |ctx: &HookContext<'_>, _thread: &EmulationThread| {
                // Check if this is an internal method (MethodDef table = 0x06)
                // and if the token is in our stub set
                let is_methoddef = ctx.method_token.is_table(TableId::MethodDef);
                is_methoddef && tokens.contains(&ctx.method_token)
            }
        })
        .pre(|ctx, _thread| {
            // Return an appropriate default value based on the method's return type.
            // This prevents stack underflow when the caller expects a return value.
            let return_value = match ctx.return_type {
                None | Some(CilFlavor::Void) => None, // Void method
                Some(
                    CilFlavor::Boolean // false
                    | CilFlavor::I1 | CilFlavor::U1
                    | CilFlavor::I2 | CilFlavor::U2 | CilFlavor::Char
                    | CilFlavor::I4 | CilFlavor::U4
                    | CilFlavor::TypedRef { .. }
                    | CilFlavor::Unknown, // Fallback
                ) => Some(EmValue::I32(0)),
                Some(CilFlavor::I8 | CilFlavor::U8) => Some(EmValue::I64(0)),
                Some(CilFlavor::R4) => Some(EmValue::F32(0.0)),
                Some(CilFlavor::R8) => Some(EmValue::F64(0.0)),
                Some(
                    CilFlavor::I | CilFlavor::U
                    | CilFlavor::Pointer | CilFlavor::ByRef
                    | CilFlavor::FnPtr { .. }
                    | CilFlavor::Pinned,
                ) => Some(EmValue::NativeInt(0)),
                Some(
                    CilFlavor::String
                    | CilFlavor::Object
                    | CilFlavor::Class
                    | CilFlavor::Interface
                    | CilFlavor::Array { .. }
                    | CilFlavor::GenericInstance
                    | CilFlavor::GenericParameter { .. },
                ) => Some(EmValue::Null),
                Some(CilFlavor::ValueType) => {
                    // For value types, return a zeroed struct would be ideal,
                    // but EmValue::I32(0) works for small value types
                    Some(EmValue::I32(0))
                }
            };
            PreHookResult::Bypass(return_value)
        })
}

/// Checks if the method is an LZMA decompressor candidate.
///
/// A candidate must:
/// 1. Have signature `byte[] -> byte[]` (single byte array param, returns byte array)
/// 2. Have input data that looks like ConfuserEx LZMA format
///
/// Also supports streaming decompress methods that take byte[] but may not return byte[].
fn is_lzma_decompressor_candidate(ctx: &HookContext<'_>, thread: &EmulationThread) -> bool {
    // Check if we have a byte[] parameter (first param for static, or second for instance)
    let has_byte_array_param = match ctx.param_types {
        Some(params) if !params.is_empty() => params.iter().any(|p| {
            matches!(
                p,
                CilFlavor::Array { element_type, rank, .. }
                    if *rank == 1 && matches!(element_type.as_ref(), CilFlavor::U1 | CilFlavor::I1)
            )
        }),
        _ => false,
    };

    if !has_byte_array_param {
        return false;
    }

    // Check return type is byte[] (Array of U1/I1 with rank 1)
    // For streaming APIs, also allow void return type if we have LZMA input
    let returns_byte_array = match &ctx.return_type {
        Some(CilFlavor::Array {
            element_type, rank, ..
        }) => *rank == 1 && matches!(element_type.as_ref(), CilFlavor::U1 | CilFlavor::I1),
        _ => false,
    };

    // If it has byte[] param and returns byte[], it's a strong candidate
    // If it only has byte[] param (no byte[] return), still check for LZMA input
    if !returns_byte_array && !has_byte_array_param {
        return false;
    }

    // Check if input data looks like ConfuserEx LZMA
    is_lzma_input(ctx, thread)
}

/// Checks if any of the method's arguments contains ConfuserEx LZMA data.
fn is_lzma_input(ctx: &HookContext<'_>, thread: &EmulationThread) -> bool {
    // Check all arguments for byte[] containing LZMA data
    for (idx, arg) in ctx.args.iter().enumerate() {
        // Skip 'this' for instance methods
        if idx == 0 && ctx.this.is_some() {
            continue;
        }

        let EmValue::ObjectRef(heap_ref) = arg else {
            continue;
        };

        // Try to get array data from heap — both methods return Result<Option<...>>,
        // so propagate errors and fall through on None.
        let byte_data = match thread
            .heap()
            .get_array_as_bytes(*heap_ref, ctx.pointer_size)
        {
            Ok(Some(data)) => data,
            Ok(None) | Err(_) => match thread.heap().get_byte_array(*heap_ref) {
                Ok(Some(data)) => data,
                Ok(None) | Err(_) => continue,
            },
        };

        // Check for ConfuserEx LZMA header
        if is_confuserex_lzma(&byte_data) {
            return true;
        }
    }

    false
}

/// Pre-hook handler that performs native LZMA decompression.
fn lzma_decompression_handler(
    ctx: &HookContext<'_>,
    thread: &mut EmulationThread,
) -> PreHookResult {
    // Find the byte[] argument containing LZMA data
    let mut lzma_data: Option<Vec<u8>> = None;

    for (idx, arg) in ctx.args.iter().enumerate() {
        // Skip 'this' for instance methods
        if idx == 0 && ctx.this.is_some() {
            continue;
        }

        let EmValue::ObjectRef(heap_ref) = arg else {
            continue;
        };

        // Try to get array data from heap — both methods return Result<Option<...>>,
        // so propagate errors and fall through on None.
        let byte_data = match thread
            .heap()
            .get_array_as_bytes(*heap_ref, ctx.pointer_size)
        {
            Ok(Some(data)) => data,
            Ok(None) | Err(_) => match thread.heap().get_byte_array(*heap_ref) {
                Ok(Some(data)) => data,
                Ok(None) | Err(_) => continue,
            },
        };

        // Check for ConfuserEx LZMA header
        if is_confuserex_lzma(&byte_data) {
            lzma_data = Some(byte_data);
            break;
        }
    }

    let Some(byte_data) = lzma_data else {
        // No LZMA data found even though matcher thought there was.
        // This shouldn't happen, but if it does, return null to avoid stack issues.
        return PreHookResult::Bypass(Some(EmValue::Null));
    };

    // Decompress using native LZMA
    let Ok(decompressed) = decompress_confuserex_lzma(&byte_data) else {
        // Decompression failed - data looked like LZMA header but wasn't valid.
        // Return null rather than letting emulation proceed, which could cause
        // stack underflow or other issues when the hook matched but didn't handle.
        return PreHookResult::Bypass(Some(EmValue::Null));
    };

    // Allocate result as byte[] on the heap
    let elements: Vec<EmValue> = decompressed
        .into_iter()
        .map(|b| EmValue::I32(i32::from(b)))
        .collect();

    match thread
        .heap_mut()
        .alloc_array_with_values(CilFlavor::U1, elements)
    {
        Ok(result_ref) => PreHookResult::Bypass(Some(EmValue::ObjectRef(result_ref))),
        Err(_) => {
            // Allocation failed - return null rather than causing stack issues
            PreHookResult::Bypass(Some(EmValue::Null))
        }
    }
}

#[cfg(test)]
mod tests {
    use std::collections::HashSet;

    use crate::{
        deobfuscation::techniques::confuserex::hooks::{
            create_anti_tamper_stub_hook, create_lzma_hook,
        },
        emulation::HookPriority,
        metadata::token::Token,
    };

    #[test]
    fn test_create_lzma_hook() {
        let hook = create_lzma_hook();
        assert_eq!(hook.name(), "confuserex-lzma-decompressor");
        assert_eq!(hook.priority(), HookPriority::HIGH);
    }

    #[test]
    fn test_create_anti_tamper_stub_hook() {
        let mut tokens = HashSet::new();
        tokens.insert(Token::new(0x06000001));
        tokens.insert(Token::new(0x06000002));

        let hook = create_anti_tamper_stub_hook(tokens);
        assert_eq!(hook.name(), "confuserex-protection-stub");
        assert_eq!(hook.priority(), HookPriority::HIGHEST);
    }
}