swamp_vm/
lib.rs

1use crate::host::HostFunction;
2use swamp_vm_types::BinaryInstruction;
3use swamp_vm_types::opcode::OpCode;
4
5pub mod host;
6
7type Handler0 = fn(&mut Vm);
8type Handler1 = fn(&mut Vm, u16);
9type Handler2 = fn(&mut Vm, u16, u16);
10type Handler3 = fn(&mut Vm, u16, u16, u16);
11type Handler4 = fn(&mut Vm, u16, u16, u16, u16);
12
13#[derive(Copy, Clone)]
14enum HandlerType {
15    Args0(Handler0),
16    Args1(Handler1),
17    Args2(Handler2),
18    Args3(Handler3),
19    Args4(Handler4),
20}
21
22pub struct Vm {
23    // Memory
24    memory: *mut u8,
25    memory_size: usize,
26
27    // Memory regions (offsets)
28    alloc_offset: usize,      // Current allocation point
29    stack_base_offset: usize, // Base of stack
30    stack_offset: usize,      // Current stack position
31    constants_offset: usize,  // Start of constants region
32    frame_offset: usize,      // Current frame position
33
34    // Execution state
35    ip: usize,                            // Instruction pointer
36    instructions: Vec<BinaryInstruction>, // Bytecode
37    execution_complete: bool,             // Flag for completion
38
39    // Function call management
40    call_stack: Vec<CallFrame>, // Track function calls
41
42    // Host function integration
43    host_functions: Vec<HostFunction>, // Registered host functions
44
45    handlers: [HandlerType; 256],
46
47    // TODO: Error state
48    debug_call_depth: usize,
49}
50
51impl Vm {
52    #[must_use]
53    pub fn instructions(&self) -> &[BinaryInstruction] {
54        &self.instructions
55    }
56}
57
58impl Vm {
59    pub fn reset(&mut self) {
60        self.stack_offset = self.stack_base_offset;
61        self.ip = 0;
62        self.frame_offset = self.stack_offset;
63        self.execution_complete = false;
64        self.call_stack.clear();
65    }
66}
67
68impl Drop for Vm {
69    fn drop(&mut self) {
70        unsafe {
71            // Free the memory that was allocated in new()
72            let layout = std::alloc::Layout::from_size_align(self.memory_size, ALIGNMENT).unwrap();
73            std::alloc::dealloc(self.memory, layout);
74        }
75    }
76}
77
78impl Vm {
79    pub fn memory(&self) -> &[u8] {
80        unsafe { std::slice::from_raw_parts(self.memory, self.memory_size) }
81    }
82
83    pub fn stack_memory(&self) -> &[u8] {
84        unsafe { std::slice::from_raw_parts(self.stack_ptr(), self.memory_size) }
85    }
86
87    pub fn stack_base_memory(&self) -> &[u8] {
88        unsafe { std::slice::from_raw_parts(self.stack_base_ptr(), self.memory_size) }
89    }
90    pub fn frame_memory(&self) -> &[u8] {
91        unsafe { std::slice::from_raw_parts(self.frame_ptr(), self.memory_size) }
92    }
93}
94
95const ALIGNMENT: usize = 8;
96const ALIGNMENT_REST: usize = ALIGNMENT - 1;
97const ALIGNMENT_MASK: usize = !ALIGNMENT_REST;
98
99impl Vm {
100    pub fn new(instructions: Vec<BinaryInstruction>, constants: &[u8], memory_size: usize) -> Self {
101        let memory = unsafe {
102            std::alloc::alloc(std::alloc::Layout::from_size_align(memory_size, ALIGNMENT).unwrap())
103        };
104
105        // Reserve 20% for constants at the end
106        let constants_size = (memory_size / 5) & ALIGNMENT_MASK;
107        let constants_offset = memory_size - constants_size;
108
109        // Reserve 30% for stack in the middle
110        let stack_size = (memory_size * 3 / 10) & ALIGNMENT_MASK;
111        let stack_base_offset = (constants_offset - stack_size) & ALIGNMENT_MASK;
112
113        let mut vm = Self {
114            memory,          // Raw memory pointer
115            memory_size,     // Total memory size
116            alloc_offset: 0, // Heap starts at beginning
117            stack_base_offset,
118            stack_offset: stack_base_offset,
119            constants_offset,                // Constants at the end
120            frame_offset: stack_base_offset, // Frame starts at stack base
121            ip: 0,
122            instructions,
123            execution_complete: false,
124            call_stack: vec![],
125            host_functions: vec![],
126            handlers: [const { HandlerType::Args0(Self::execute_unimplemented) }; 256],
127            debug_call_depth: 0,
128        };
129
130        vm.handlers[OpCode::Ld as usize] = HandlerType::Args2(Self::execute_ld_local);
131        vm.handlers[OpCode::Ld8 as usize] = HandlerType::Args2(Self::execute_ld_imm_u8);
132        vm.handlers[OpCode::Ld16 as usize] = HandlerType::Args2(Self::execute_ld_imm_u16);
133        vm.handlers[OpCode::Ld32 as usize] = HandlerType::Args3(Self::execute_ld_imm_u32);
134        vm.handlers[OpCode::AddI32 as usize] = HandlerType::Args3(Self::execute_add_i32);
135        vm.handlers[OpCode::LtI32 as usize] = HandlerType::Args3(Self::execute_lt_i32);
136        vm.handlers[OpCode::Bnz as usize] = HandlerType::Args2(Self::execute_jmp_if);
137        vm.handlers[OpCode::Bz as usize] = HandlerType::Args2(Self::execute_jmp_if_not);
138        vm.handlers[OpCode::Jmp as usize] = HandlerType::Args1(Self::execute_jmp);
139        vm.handlers[OpCode::Call as usize] = HandlerType::Args1(Self::execute_call);
140
141        vm.handlers[OpCode::Enter as usize] = HandlerType::Args1(Self::execute_enter);
142        vm.handlers[OpCode::Ret as usize] = HandlerType::Args0(Self::execute_ret);
143        vm.handlers[OpCode::LdIndirect as usize] = HandlerType::Args4(Self::execute_ld_indirect);
144        vm.handlers[OpCode::Mov as usize] = HandlerType::Args3(Self::execute_mov);
145
146        vm.handlers[OpCode::Hlt as usize] = HandlerType::Args0(Self::execute_hlt);
147
148        // Optional: Zero out the memory for safety?
149        unsafe {
150            std::ptr::write_bytes(memory, 0, memory_size);
151        }
152
153        let start_addr = memory_size - constants.len();
154        unsafe {
155            std::ptr::copy_nonoverlapping(
156                constants.as_ptr(),
157                memory.add(start_addr),
158                constants.len(),
159            );
160        }
161
162        vm
163    }
164
165    #[must_use]
166    pub fn frame_offset(&self) -> usize {
167        self.frame_offset
168    }
169
170    // Read a value at a specific offset from memory
171    #[must_use]
172    pub fn get_i32(&self, offset: usize) -> i32 {
173        unsafe { *(self.ptr_at_i32(offset) as *const i32) }
174    }
175
176    pub fn load_bytecode(&mut self, instructions: Vec<BinaryInstruction>) {
177        self.instructions = instructions;
178        self.ip = 0;
179        self.execution_complete = false;
180    }
181
182    #[inline]
183    fn execute_ld_imm_u32(&mut self, dst_offset: u16, lower_bits: u16, upper_bits: u16) {
184        let value = ((upper_bits as u32) << 16) | (lower_bits as u32);
185
186        let dst_ptr = self.ptr_at_u32(self.frame_offset + dst_offset as usize) as *mut u32;
187        unsafe {
188            *dst_ptr = value;
189        }
190    }
191    #[inline]
192    fn execute_ld_imm_u16(&mut self, dst_offset: u16, data: u16) {
193        let dst_ptr = self.ptr_at_u16(self.frame_offset + dst_offset as usize) as *mut u16;
194        unsafe {
195            *dst_ptr = data;
196        }
197    }
198
199    #[inline]
200    fn execute_ld_imm_u8(&mut self, dst_offset: u16, octet: u16) {
201        let dst_ptr = self.frame_ptr_bool_at(dst_offset);
202        unsafe {
203            *dst_ptr = octet as u8;
204        }
205    }
206
207    #[inline]
208    fn execute_add_i32(&mut self, dst_offset: u16, lhs_offset: u16, rhs_offset: u16) {
209        let lhs_ptr = self.ptr_at_i32(self.frame_offset + lhs_offset as usize) as *const i32;
210        let rhs_ptr = self.ptr_at_i32(self.frame_offset + rhs_offset as usize) as *const i32;
211        let dst_ptr = self.ptr_at_i32(self.frame_offset + dst_offset as usize) as *mut i32;
212
213        unsafe {
214            let lhs = *lhs_ptr;
215            let rhs = *rhs_ptr;
216            *dst_ptr = lhs + rhs;
217        }
218    }
219
220    #[inline]
221    fn execute_lt_i32(&mut self, dst_offset: u16, lhs_offset: u16, rhs_offset: u16) {
222        let lhs_ptr = self.frame_ptr_i32_const_at(lhs_offset);
223        let rhs_ptr = self.frame_ptr_i32_const_at(rhs_offset);
224        let dst_ptr = self.frame_ptr_bool_at(dst_offset);
225
226        unsafe {
227            let lhs = *lhs_ptr;
228            let rhs = *rhs_ptr;
229            *dst_ptr = (lhs < rhs) as u8;
230        }
231    }
232
233    #[inline]
234    fn execute_jmp_if(&mut self, condition_offset: u16, absolute_ip: u16) {
235        let is_true = self.frame_ptr_bool_const_at(condition_offset);
236        if is_true {
237            self.ip = absolute_ip as usize;
238        }
239    }
240
241    #[inline]
242    fn execute_jmp_if_not(&mut self, condition_offset: u16, absolute_ip: u16) {
243        let is_true = self.frame_ptr_bool_const_at(condition_offset);
244        if !is_true {
245            self.ip = absolute_ip as usize;
246        }
247    }
248
249    #[inline]
250    fn execute_jmp(&mut self, absolute_ip: u16) {
251        self.ip = absolute_ip as usize;
252    }
253
254    #[inline]
255    fn execute_hlt(&mut self) {
256        self.execution_complete = true;
257    }
258
259    fn execute_unimplemented(&mut self) {
260        panic!("unknown OPCODE HALT!");
261    }
262
263    #[inline]
264    fn execute_mov(&mut self, dst_offset: u16, src_offset: u16, size: u16) {
265        let src_ptr = self.ptr_at_u16(self.frame_offset + src_offset as usize);
266        let dst_ptr = self.ptr_at_u16(self.frame_offset + dst_offset as usize);
267
268        unsafe {
269            std::ptr::copy_nonoverlapping(src_ptr, dst_ptr, size as usize);
270        }
271    }
272
273    #[inline]
274    fn execute_ld_local(&mut self, dst_offset: u16, src_offset: u16) {
275        let src_ptr = self.ptr_at_u32(self.frame_offset + src_offset as usize);
276        let dst_ptr = self.ptr_at_u32(self.frame_offset + dst_offset as usize);
277
278        unsafe {
279            std::ptr::copy_nonoverlapping(src_ptr, dst_ptr, 4);
280        }
281    }
282
283    #[inline]
284    fn execute_ld_indirect(
285        &mut self,
286        dst_offset: u16,
287        base_ptr_offset: u16,
288        offset_offset: u16,
289        size: u16,
290    ) {
291        let base_ptr_ptr =
292            self.ptr_at_u16(self.frame_offset + base_ptr_offset as usize) as *const u16;
293        let offset_ptr = self.ptr_at_u16(self.frame_offset + offset_offset as usize) as *const u16;
294
295        // Read the actual base pointer and offset values
296        let base_ptr_value;
297        let offset_value;
298
299        unsafe {
300            base_ptr_value = *base_ptr_ptr;
301            offset_value = *offset_ptr;
302        }
303
304        let src_addr = base_ptr_value as usize + offset_value as usize;
305
306        let src_ptr = self.ptr_at_u16(src_addr);
307        let dst_ptr = self.ptr_at_u16(self.frame_offset + dst_offset as usize);
308
309        unsafe {
310            std::ptr::copy_nonoverlapping(src_ptr, dst_ptr, size as usize);
311        }
312    }
313
314    // Helper to convert offset to pointer
315    #[inline(always)]
316    fn ptr_at_i32(&self, offset: usize) -> *mut i32 {
317        // Ensure alignment
318        debug_assert_eq!(offset % 4, 0, "Unaligned i32 access at offset {}", offset);
319        // Inline ptr_at functionality
320        unsafe { self.memory.add(offset) as *mut i32 }
321    }
322
323    #[inline(always)]
324    fn ptr_at_u32(&self, offset: usize) -> *mut u32 {
325        // Ensure alignment
326        debug_assert_eq!(offset % 4, 0, "Unaligned i32 access at offset {}", offset);
327        // Inline ptr_at functionality
328        unsafe { self.memory.add(offset) as *mut u32 }
329    }
330
331    #[inline(always)]
332    fn ptr_at_u16(&self, offset: usize) -> *mut u16 {
333        // Ensure alignment
334        debug_assert_eq!(offset % 2, 0, "Unaligned u16 access at offset {}", offset);
335        // Inline ptr_at functionality
336        unsafe { self.memory.add(offset) as *mut u16 }
337    }
338
339    #[inline(always)]
340    fn ptr_at_u8(&self, offset: usize) -> *mut u8 {
341        // Inline ptr_at functionality
342        unsafe { self.memory.add(offset) }
343    }
344
345    // Helper to get current frame pointer
346    fn frame_ptr(&self) -> *mut u8 {
347        self.ptr_at_u8(self.frame_offset)
348    }
349
350    fn stack_ptr(&self) -> *mut u8 {
351        self.ptr_at_u8(self.stack_offset)
352    }
353
354    fn stack_base_ptr(&self) -> *mut u8 {
355        self.ptr_at_u8(self.stack_base_offset)
356    }
357
358    #[inline(always)]
359    fn frame_ptr_i32_at(&self, offset: u16) -> *mut i32 {
360        self.ptr_at_i32(self.frame_offset + offset as usize)
361    }
362
363    #[inline(always)]
364    fn frame_ptr_i32_const_at(&self, offset: u16) -> *const i32 {
365        self.ptr_at_i32(self.frame_offset + offset as usize)
366            .cast_const()
367    }
368
369    #[inline(always)]
370    fn frame_ptr_bool_at(&self, offset: u16) -> *mut u8 {
371        self.ptr_at_u8(self.frame_offset + offset as usize)
372    }
373
374    #[inline(always)]
375    fn frame_ptr_bool_const_at(&self, offset: u16) -> bool {
376        unsafe { *self.ptr_at_u8(self.frame_offset + offset as usize) != 0 }
377    }
378
379    fn allocate(&mut self, size: usize) -> usize {
380        let aligned_size = (size + ALIGNMENT_REST) & ALIGNMENT_MASK;
381        let result_offset = self.alloc_offset;
382
383        assert!(
384            result_offset + aligned_size <= self.stack_base_offset,
385            "Out of memory"
386        );
387
388        self.alloc_offset += aligned_size;
389        result_offset
390    }
391
392    pub fn debug_opcode(&self, opcode: u8, operands: &[u16; 4]) {
393        eprintln!(
394            "{:8} [{}]",
395            OpCode::from(opcode),
396            match self.handlers[opcode as usize] {
397                HandlerType::Args0(_) => String::new(),
398                HandlerType::Args1(_) => format!("{:04x}", operands[0]),
399                HandlerType::Args2(_) => format!("{:04x}, {:04x}", operands[0], operands[1]),
400                HandlerType::Args3(_) => format!(
401                    "{:04x}, {:04x}, {:04x}",
402                    operands[0], operands[1], operands[2]
403                ),
404                HandlerType::Args4(_) => format!(
405                    "{:04x}, {:04x}, {:04x}, {:04x}",
406                    operands[0], operands[1], operands[2], operands[3]
407                ),
408            }
409        );
410    }
411
412    fn debug_instructions(&self) {
413        for (ip, instruction) in self.instructions.iter().enumerate() {
414            eprint!("|> {ip:04x}: ");
415            let operands = instruction.operands;
416            self.debug_opcode(instruction.opcode, &operands);
417        }
418    }
419
420    pub fn execute(&mut self) {
421        self.ip = 0;
422        self.execution_complete = false;
423        #[cfg(feature = "debug_vm")]
424        {
425            eprintln!("program:");
426            self.debug_instructions();
427            eprintln!("start executing");
428        }
429
430        self.call_stack.push(CallFrame {
431            return_address: 1,
432            previous_frame_offset: 0,
433            frame_size: 0,
434        });
435
436        while !self.execution_complete {
437            let instruction = &self.instructions[self.ip];
438            let opcode = instruction.opcode;
439
440            #[cfg(feature = "debug_vm")]
441            {
442                let operands = instruction.operands;
443                eprint!("> {:04x}: ", self.ip);
444                self.debug_opcode(opcode, &operands);
445
446                //    let s = hexify::format_hex(&self.frame_memory()[..16]);
447                //  eprintln!("mem: {s}");
448            }
449
450            match self.handlers[opcode as usize] {
451                HandlerType::Args0(handler) => handler(self),
452                HandlerType::Args1(handler) => handler(self, instruction.operands[0]),
453                HandlerType::Args2(handler) => {
454                    handler(self, instruction.operands[0], instruction.operands[1]);
455                }
456                HandlerType::Args3(handler) => handler(
457                    self,
458                    instruction.operands[0],
459                    instruction.operands[1],
460                    instruction.operands[2],
461                ),
462                HandlerType::Args4(handler) => handler(
463                    self,
464                    instruction.operands[0],
465                    instruction.operands[1],
466                    instruction.operands[2],
467                    instruction.operands[3],
468                ),
469            };
470
471            self.ip += 1;
472        }
473    }
474
475    fn execute_call(&mut self, target: u16) {
476        let return_info = CallFrame {
477            return_address: self.ip + 1,              // Instruction to return to
478            previous_frame_offset: self.frame_offset, // Previous frame position
479            frame_size: 0,                            // Will be filled by ENTER
480        };
481
482        self.call_stack.push(return_info);
483
484        self.ip = target as usize;
485    }
486
487    #[inline]
488    fn execute_enter(&mut self, aligned_size: u16) {
489        //let aligned_size = (frame_size as usize + ALIGNMENT_REST) & ALIGNMENT_MASK; // 8-byte alignment
490
491        let frame = self.call_stack.last_mut().unwrap();
492        frame.frame_size = aligned_size as usize;
493
494        // the functions frame of reference should be the stack offset
495        self.frame_offset = self.stack_offset;
496
497        // and we push the stack with the space of the local variables
498        self.stack_offset += aligned_size as usize;
499    }
500
501    #[inline]
502    fn execute_ret(&mut self) {
503        let frame = self.call_stack.pop().unwrap();
504
505        // Bring back the frame to the old frame
506        self.frame_offset = frame.previous_frame_offset;
507
508        // "pop" the space for the local variables of the stack
509        self.stack_offset -= frame.frame_size;
510
511        // going back to the old instruction
512        self.ip = frame.return_address;
513        self.ip -= 1; // Adjust for automatic increment
514
515        // NOTE: Any return value is always at frame_offset + 0
516    }
517}
518
519pub struct CallFrame {
520    return_address: usize,        // Instruction to return to
521    previous_frame_offset: usize, // Previous frame position
522    frame_size: usize,            // Size of this frame
523}