aegis_vm_macro 0.2.51

Proc-macro for VM-protected functions - RustAegis
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
//! Modular AST to Bytecode Compiler
//!
//! Converts Rust expressions to VM bytecode at compile time.
//!
//! Module structure:
//! - mod.rs: Compiler struct and core infrastructure
//! - emit.rs: Bytecode emission helpers
//! - literal.rs: Literal compilation (int, bool, string)
//! - expr.rs: Expression compilation
//! - stmt.rs: Statement compilation
//! - array.rs: Array/vector operations
//! - control.rs: Control flow (if, while, loop, for)
//! - method.rs: Method call compilation (.len(), .push(), etc.)
//! - cast.rs: Type cast compilation (as i32, as u8, etc.)

mod emit;
mod literal;
mod expr;
mod stmt;
mod array;
mod control;
mod method;
mod cast;
pub mod native_call;

pub use native_call::NativeCallCollector;

use syn::{ItemFn, Pat, FnArg};
use std::collections::BTreeMap;
use crate::opcodes::exec;
use crate::crypto::OpcodeTable;
use crate::mba::MbaTransformer;
use crate::substitution::Substitution;
use crate::value_cryptor::ValueCryptor;

/// Compilation error
#[derive(Debug)]
pub struct CompileError(pub String);

impl std::fmt::Display for CompileError {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "{}", self.0)
    }
}

/// Variable type for proper method dispatch
#[allow(dead_code)] // IntegerSized reserved for future packed storage optimization
#[derive(Debug, Clone, PartialEq)]
pub(crate) enum VarType {
    /// Integer (u8, u16, u32, u64, i8, i16, i32, i64) with optional size in bytes
    Integer,
    /// Sized integer with explicit byte size (1, 2, 4, or 8)
    IntegerSized(u8),
    /// String (heap allocated)
    String,
    /// Vector/Array (heap allocated)
    Vector,
    /// Boolean (1 byte logically, but stored as 8 bytes for alignment)
    Bool,
    /// Struct (heap allocated) - contains struct type name
    Struct(std::string::String),
    /// Tuple (heap allocated) - contains element types for proper offset calculation
    Tuple(Vec<VarType>),
}

#[allow(dead_code)] // Reserved for future packed storage optimization
impl VarType {
    /// Get the size in bytes for this type (for tuple offset calculation)
    /// All types are stored as 8 bytes for alignment, but this tracks logical size
    pub fn size_bytes(&self) -> usize {
        match self {
            VarType::Integer => 8,
            VarType::IntegerSized(size) => *size as usize,
            VarType::String => 8,  // pointer
            VarType::Vector => 8,  // pointer
            VarType::Bool => 8,    // stored as u64 for alignment
            VarType::Struct(_) => 8, // pointer
            VarType::Tuple(elems) => {
                // Sum of all element sizes (each aligned to 8 bytes)
                elems.iter().map(|e| e.aligned_size()).sum()
            }
        }
    }

    /// Get aligned size (always 8 bytes for heap storage)
    pub fn aligned_size(&self) -> usize {
        // For now, all values are stored as 8 bytes for simplicity
        // This could be optimized later for packed storage
        8
    }

    /// Check if this type needs heap cleanup
    pub fn needs_cleanup(&self) -> bool {
        match self {
            VarType::String | VarType::Vector => true,
            VarType::Struct(_) => true, // checked separately for unit structs
            VarType::Tuple(elems) => {
                // Tuple needs cleanup if it has elements (allocated on heap)
                // or if any element needs cleanup
                !elems.is_empty() || elems.iter().any(|e| e.needs_cleanup())
            }
            _ => false,
        }
    }
}

/// Struct field definition
#[derive(Debug, Clone)]
pub(crate) struct FieldDef {
    /// Field name
    pub name: std::string::String,
    /// Byte offset from struct start
    pub offset: usize,
}

/// Struct definition for compile-time field lookup
#[derive(Debug, Clone)]
pub(crate) struct StructDef {
    /// Struct name (kept for debugging/error messages)
    #[allow(dead_code)]
    pub name: std::string::String,
    /// Fields with their offsets
    pub fields: Vec<FieldDef>,
    /// Total size in bytes
    pub size: usize,
}

impl StructDef {
    /// Get field offset by name
    pub fn get_field_offset(&self, field_name: &str) -> Option<usize> {
        self.fields.iter()
            .find(|f| f.name == field_name)
            .map(|f| f.offset)
    }
}

/// Variable information - register and type
#[derive(Debug, Clone)]
pub(crate) struct VarInfo {
    /// Register index
    pub reg: u8,
    /// Variable type
    pub var_type: VarType,
    /// Is signed (for integers)
    pub is_signed: bool,
    /// Needs heap cleanup on scope exit
    pub needs_cleanup: bool,
}

/// Variable location - either in input buffer or in a register
#[derive(Debug, Clone)]
#[allow(dead_code)]
pub(crate) enum VarLocation {
    /// Input buffer offset (for function arguments)
    InputOffset(usize),
    /// Register index (for local variables)
    Register(u8),
    /// Array stored in register (register holds heap address)
    /// Contains: register index, element size (1, 2, 4, or 8 bytes)
    Array(u8, u8),
    /// String stored in register (register holds heap address)
    String(u8),
}

/// Loop context for break/continue support
#[derive(Debug, Clone)]
pub(crate) struct LoopContext {
    /// Label for continue (jump to condition/increment)
    pub continue_label: String,
    /// Label for break (jump past loop end)
    pub break_label: String,
    /// Scope depth when loop started (for cleanup on break/continue)
    pub scope_depth: usize,
}

/// Compiler state
pub struct Compiler {
    /// Generated bytecode
    pub(crate) bytecode: Vec<u8>,
    /// Function argument name -> input buffer offset
    pub(crate) arg_offsets: BTreeMap<String, usize>,
    /// Scoped variable storage - Vec of scopes, each scope maps name -> VarInfo
    /// Innermost scope is at the end of the Vec
    pub(crate) scopes: Vec<BTreeMap<String, VarInfo>>,
    /// Legacy: Variable types for method dispatch (will be deprecated)
    pub(crate) var_types: BTreeMap<String, VarLocation>,
    /// Struct definitions for compile-time field lookup
    pub(crate) struct_defs: BTreeMap<String, StructDef>,
    /// Next available register for locals
    pub(crate) next_local_reg: u8,
    /// Current input buffer offset for next argument
    pub(crate) next_arg_offset: usize,
    /// Label name -> bytecode offset for jumps
    pub(crate) labels: BTreeMap<String, usize>,
    /// Pending jump fixups (bytecode offset, label name)
    pub(crate) fixups: Vec<(usize, String)>,
    /// Unique label counter
    pub(crate) label_counter: usize,
    /// Stack of active loops for break/continue
    pub(crate) loop_stack: Vec<LoopContext>,
    /// Opcode encoding table
    pub(crate) opcode_table: OpcodeTable,
    /// MBA transformer
    pub(crate) mba: MbaTransformer,
    /// Enable MBA transformations
    pub(crate) mba_enabled: bool,
    /// Substitution state
    pub(crate) subst: Substitution,
    /// ValueCryptor for constant obfuscation
    pub(crate) value_cryptor: ValueCryptor,
    /// Enable heavy value encryption
    pub(crate) value_cryptor_enabled: bool,
    /// Enable opaque predicates injection
    pub(crate) opaque_predicates_enabled: bool,
    /// Native call collector for external function calls
    pub(crate) native_collector: NativeCallCollector,
}

impl Compiler {
    /// Create new compiler
    #[allow(dead_code)]
    pub fn new() -> Self {
        Self::with_options(false, false)
    }

    /// Create compiler with MBA transformations enabled
    #[allow(dead_code)]
    pub fn with_mba(mba_enabled: bool) -> Self {
        Self::with_options(mba_enabled, false)
    }

    /// Create compiler with full options
    pub fn with_options(mba_enabled: bool, substitution_enabled: bool) -> Self {
        let seed = crate::crypto::get_opcode_table().get_seed();

        // Start with one global scope
        let scopes = vec![BTreeMap::new()];

        Self {
            bytecode: Vec::new(),
            arg_offsets: BTreeMap::new(),
            scopes,
            var_types: BTreeMap::new(),
            struct_defs: BTreeMap::new(),
            next_local_reg: 0,
            next_arg_offset: 0,
            labels: BTreeMap::new(),
            fixups: Vec::new(),
            label_counter: 0,
            loop_stack: Vec::new(),
            opcode_table: crate::crypto::get_opcode_table(),
            mba: MbaTransformer::new(seed),
            mba_enabled,
            subst: Substitution::new(seed, substitution_enabled),
            value_cryptor: ValueCryptor::new(seed),
            value_cryptor_enabled: mba_enabled,
            // Opaque predicates enabled for standard+ protection (when substitution is on)
            opaque_predicates_enabled: substitution_enabled,
            native_collector: NativeCallCollector::new(),
        }
    }

    /// Get reference to native call collector
    #[allow(dead_code)]
    pub fn native_collector(&self) -> &NativeCallCollector {
        &self.native_collector
    }

    /// Get mutable reference to native call collector
    #[allow(dead_code)]
    pub fn native_collector_mut(&mut self) -> &mut NativeCallCollector {
        &mut self.native_collector
    }

    /// Current bytecode position
    pub(crate) fn pos(&self) -> usize {
        self.bytecode.len()
    }

    /// Generate unique label
    pub(crate) fn unique_label(&mut self, prefix: &str) -> String {
        let label = format!("{}_{}", prefix, self.label_counter);
        self.label_counter += 1;
        label
    }

    /// Mark current position as label
    pub(crate) fn mark_label(&mut self, name: &str) {
        self.labels.insert(name.to_string(), self.pos());
    }

    /// Register a function argument
    pub(crate) fn register_arg(&mut self, name: &str) {
        self.arg_offsets.insert(name.to_string(), self.next_arg_offset);
        self.next_arg_offset += 8;
    }

    // =========================================================================
    // Scope Management
    // =========================================================================

    /// Push a new scope (called at block entry)
    pub(crate) fn push_scope(&mut self) {
        self.scopes.push(BTreeMap::new());
    }

    /// Pop current scope and emit cleanup for heap variables
    pub(crate) fn pop_scope(&mut self) {
        if let Some(scope) = self.scopes.pop() {
            // Emit HEAP_FREE for variables that need cleanup
            for (_name, info) in scope.iter() {
                if info.needs_cleanup {
                    // Push the heap address from register, then free it
                    self.emit_push_reg(info.reg);
                    self.emit_heap_free();
                }
            }
        }
    }

    /// Get current scope depth (number of active scopes)
    pub(crate) fn current_scope_depth(&self) -> usize {
        self.scopes.len()
    }

    /// Emit cleanup code for all scopes from current down to target_depth (exclusive)
    /// Used for early exits (return, break, continue) to properly free heap memory
    ///
    /// Example: If we have scopes [global, func, loop, inner] (depth=4)
    /// and target_depth=2 (loop level), we clean up [inner, loop] -> scopes at index 3, 2
    pub(crate) fn emit_scope_cleanup(&mut self, target_depth: usize) {
        let current_depth = self.scopes.len();

        // Clean up scopes from innermost to target (exclusive)
        // We iterate backwards from current to target
        if current_depth <= target_depth {
            return; // Nothing to clean up
        }

        // First, collect all registers that need cleanup (to avoid borrow issues)
        let mut regs_to_free = Vec::new();
        for depth in (target_depth..current_depth).rev() {
            if let Some(scope) = self.scopes.get(depth) {
                for (_name, info) in scope.iter() {
                    if info.needs_cleanup {
                        regs_to_free.push(info.reg);
                    }
                }
            }
        }

        // Now emit cleanup code
        for reg in regs_to_free {
            self.emit_push_reg(reg);
            self.emit_heap_free();
        }
    }

    /// Define a variable in the current scope
    pub(crate) fn define_var(&mut self, name: &str, var_type: VarType, is_signed: bool) -> Result<u8, CompileError> {
        self.define_var_internal(name, var_type, is_signed, false)
    }

    /// Define a variable that borrows from another (no cleanup needed)
    /// Used when extracting tuple elements: let inner = t.0
    pub(crate) fn define_var_borrowed(&mut self, name: &str, var_type: VarType, is_signed: bool) -> Result<u8, CompileError> {
        self.define_var_internal(name, var_type, is_signed, true)
    }

    /// Internal variable definition
    fn define_var_internal(&mut self, name: &str, var_type: VarType, is_signed: bool, is_borrowed: bool) -> Result<u8, CompileError> {
        if self.next_local_reg >= 248 {
            return Err(CompileError("Too many local variables (max 248)".to_string()));
        }

        let reg = self.next_local_reg;
        self.next_local_reg += 1;

        // Determine if cleanup is needed - borrowed values never need cleanup
        // Unit structs/tuples (size 0) also don't need cleanup
        let needs_cleanup = if is_borrowed {
            false
        } else {
            match &var_type {
                VarType::String | VarType::Vector => true,
                VarType::Struct(struct_name) => {
                    // Check struct size - unit structs don't need cleanup
                    self.struct_defs.get(struct_name)
                        .map(|def| def.size > 0)
                        .unwrap_or(false)
                }
                VarType::Tuple(elems) => !elems.is_empty(),  // Empty tuple () doesn't need cleanup
                _ => false,
            }
        };

        // Update legacy var_types for compatibility (using reference before move)
        match &var_type {
            VarType::String => {
                self.var_types.insert(name.to_string(), VarLocation::String(reg));
            }
            VarType::Vector => {
                self.var_types.insert(name.to_string(), VarLocation::Array(reg, 8));
            }
            _ => {
                self.var_types.insert(name.to_string(), VarLocation::Register(reg));
            }
        }

        let info = VarInfo {
            reg,
            var_type,
            is_signed,
            needs_cleanup,
        };

        // Add to current scope
        if let Some(scope) = self.scopes.last_mut() {
            scope.insert(name.to_string(), info);
        }

        Ok(reg)
    }

    /// Lookup a variable in all scopes (innermost first)
    pub(crate) fn lookup_var(&self, name: &str) -> Option<&VarInfo> {
        // Search from innermost to outermost scope
        for scope in self.scopes.iter().rev() {
            if let Some(info) = scope.get(name) {
                return Some(info);
            }
        }
        None
    }

    /// Check if a variable is signed
    pub(crate) fn is_var_signed(&self, name: &str) -> bool {
        if let Some(info) = self.lookup_var(name) {
            return info.is_signed;
        }
        false
    }

    /// Get variable type
    pub(crate) fn get_var_type(&self, name: &str) -> Option<VarType> {
        if let Some(info) = self.lookup_var(name) {
            return Some(info.var_type.clone());
        }
        None
    }

    /// Check if an expression is of boolean type
    pub(crate) fn is_bool_expr(&self, expr: &syn::Expr) -> bool {
        match expr {
            // Boolean literals
            syn::Expr::Lit(lit) => matches!(lit.lit, syn::Lit::Bool(_)),
            // Comparison operators always return bool
            syn::Expr::Binary(bin) => {
                matches!(bin.op,
                    syn::BinOp::Eq(_) | syn::BinOp::Ne(_) |
                    syn::BinOp::Lt(_) | syn::BinOp::Le(_) |
                    syn::BinOp::Gt(_) | syn::BinOp::Ge(_) |
                    syn::BinOp::And(_) | syn::BinOp::Or(_)
                )
            }
            // Variable reference - check type
            syn::Expr::Path(path) => {
                if path.path.segments.len() == 1 {
                    let name = path.path.segments[0].ident.to_string();
                    matches!(self.get_var_type(&name), Some(VarType::Bool))
                } else {
                    false
                }
            }
            // Method calls that return bool
            syn::Expr::MethodCall(mc) => {
                let method = mc.method.to_string();
                matches!(method.as_str(), "is_empty" | "contains" | "starts_with" | "ends_with" | "eq")
            }
            // Function call - we can't easily determine return type at compile time
            // The user should use explicit type annotation: let x: bool = func()
            syn::Expr::Call(_) => false,
            // Nested negation - if inner is bool, outer is bool
            syn::Expr::Unary(unary) => {
                if matches!(unary.op, syn::UnOp::Not(_)) {
                    self.is_bool_expr(&unary.expr)
                } else {
                    false
                }
            }
            // Parenthesized expression
            syn::Expr::Paren(paren) => self.is_bool_expr(&paren.expr),
            _ => false,
        }
    }

    /// Get variable location (argument or local) - uses new scope system
    pub(crate) fn get_var_location(&self, name: &str) -> Option<VarLocation> {
        // Check scopes first (innermost to outermost)
        if let Some(info) = self.lookup_var(name) {
            return match info.var_type {
                VarType::String => Some(VarLocation::String(info.reg)),
                VarType::Vector => Some(VarLocation::Array(info.reg, 8)),
                _ => Some(VarLocation::Register(info.reg)),
            };
        }
        // Check arguments
        if let Some(&offset) = self.arg_offsets.get(name) {
            return Some(VarLocation::InputOffset(offset));
        }
        // Legacy fallback
        if let Some(loc) = self.var_types.get(name) {
            return Some(loc.clone());
        }
        None
    }

    /// Apply all jump fixups
    pub(crate) fn apply_fixups(&mut self) -> Result<(), CompileError> {
        for (fixup_pos, label) in &self.fixups {
            let target = self.labels.get(label)
                .ok_or_else(|| CompileError(format!("Unknown label: {}", label)))?;

            let from = fixup_pos + 2;
            let offset = (*target as isize) - (from as isize);

            if offset < i16::MIN as isize || offset > i16::MAX as isize {
                return Err(CompileError(format!("Jump offset out of range: {}", offset)));
            }

            let offset_bytes = (offset as i16).to_le_bytes();
            self.bytecode[*fixup_pos] = offset_bytes[0];
            self.bytecode[*fixup_pos + 1] = offset_bytes[1];
        }
        Ok(())
    }

    /// Extract variable name from pattern
    pub(crate) fn extract_pat_name(pat: &Pat) -> Result<String, CompileError> {
        match pat {
            Pat::Ident(pat_ident) => Ok(pat_ident.ident.to_string()),
            Pat::Type(pat_type) => Self::extract_pat_name(&pat_type.pat),
            _ => Err(CompileError("Unsupported pattern in let binding".to_string())),
        }
    }

    /// Finalize compilation
    pub(crate) fn finalize(&mut self) -> Result<Vec<u8>, CompileError> {
        self.apply_fixups()?;

        if self.bytecode.is_empty() || self.bytecode.last().copied() != Some(exec::HALT) {
            self.emit_op(exec::HALT);
        }

        Ok(self.bytecode.clone())
    }
}

// ============================================================================
// Public API - Compile functions
// ============================================================================

/// Compile a function to bytecode (without MBA or substitution)
#[allow(dead_code)]
pub fn compile_function(func: &ItemFn) -> Result<Vec<u8>, CompileError> {
    compile_function_full(func, false, false)
}

/// Compile a function to bytecode with MBA transformations
#[allow(dead_code)]
pub fn compile_function_with_mba(func: &ItemFn) -> Result<Vec<u8>, CompileError> {
    compile_function_full(func, true, false)
}

/// Compile a function with substitution obfuscation
#[allow(dead_code)]
pub fn compile_function_with_substitution(func: &ItemFn) -> Result<Vec<u8>, CompileError> {
    compile_function_full(func, false, true)
}

/// Compile a function with full obfuscation (MBA + Substitution)
#[allow(dead_code)]
pub fn compile_function_paranoid(func: &ItemFn) -> Result<Vec<u8>, CompileError> {
    compile_function_full(func, true, true)
}

/// Compile a function with configurable options
pub fn compile_function_full(func: &ItemFn, mba_enabled: bool, substitution_enabled: bool) -> Result<Vec<u8>, CompileError> {
    let (bytecode, _) = compile_function_with_natives(func, mba_enabled, substitution_enabled)?;
    Ok(bytecode)
}

/// Compilation result including bytecode and native call info
#[allow(dead_code)]
pub struct CompileResult {
    /// Generated bytecode
    pub bytecode: Vec<u8>,
    /// Native call collector with all external function calls
    pub native_collector: NativeCallCollector,
}

/// Compile a function with configurable options, returning native call info
pub fn compile_function_with_natives(func: &ItemFn, mba_enabled: bool, substitution_enabled: bool) -> Result<(Vec<u8>, NativeCallCollector), CompileError> {
    let mut compiler = Compiler::with_options(mba_enabled, substitution_enabled);

    // Register function arguments
    for arg in &func.sig.inputs {
        if let FnArg::Typed(pat_type) = arg {
            if let Pat::Ident(pat_ident) = &*pat_type.pat {
                compiler.register_arg(&pat_ident.ident.to_string());
            }
        }
    }

    // Compile function body
    compiler.compile_block(&func.block)?;

    let bytecode = compiler.finalize()?;

    // Extract native collector
    let native_collector = std::mem::take(&mut compiler.native_collector);

    Ok((bytecode, native_collector))
}