Skip to main content

rars_codec/
rarvm.rs

1use crate::{Error, Result};
2
3const MEMORY_SIZE: usize = 0x40000;
4const MEMORY_MASK: u32 = 0x3ffff;
5const GLOBAL_BASE: usize = 0x3c000;
6const SYSTEM_GLOBAL_SIZE: usize = 64;
7const MAX_USER_GLOBAL: usize = 0x2000 - SYSTEM_GLOBAL_SIZE;
8const MAX_STATIC_DATA: usize = MEMORY_SIZE - GLOBAL_BASE;
9const MAX_INSTRUCTIONS: usize = 25_000_000;
10const FLAG_C: u32 = 1;
11const FLAG_Z: u32 = 2;
12const FLAG_S: u32 = 0x8000_0000;
13
14#[derive(Debug, Clone, PartialEq, Eq)]
15pub struct Program {
16    pub static_data: Vec<u8>,
17    pub instructions: Vec<Instruction>,
18}
19
20#[derive(Debug, Clone, PartialEq, Eq)]
21pub struct Instruction {
22    pub opcode: Opcode,
23    pub byte_mode: bool,
24    pub operands: Vec<Operand>,
25}
26
27#[derive(Debug, Clone, Copy, PartialEq, Eq)]
28#[repr(u8)]
29pub enum Opcode {
30    Mov = 0,
31    Cmp = 1,
32    Add = 2,
33    Sub = 3,
34    Jz = 4,
35    Jnz = 5,
36    Inc = 6,
37    Dec = 7,
38    Jmp = 8,
39    Xor = 9,
40    And = 10,
41    Or = 11,
42    Test = 12,
43    Js = 13,
44    Jns = 14,
45    Jb = 15,
46    Jbe = 16,
47    Ja = 17,
48    Jae = 18,
49    Push = 19,
50    Pop = 20,
51    Call = 21,
52    Ret = 22,
53    Not = 23,
54    Shl = 24,
55    Shr = 25,
56    Sar = 26,
57    Neg = 27,
58    Pusha = 28,
59    Popa = 29,
60    Pushf = 30,
61    Popf = 31,
62    Movzx = 32,
63    Movsx = 33,
64    Xchg = 34,
65    Mul = 35,
66    Div = 36,
67    Adc = 37,
68    Sbb = 38,
69    Print = 39,
70}
71
72#[derive(Debug, Clone, PartialEq, Eq)]
73pub enum Operand {
74    Register(u8),
75    Immediate(u32),
76    RegisterIndirect(u8),
77    Indexed { register: u8, base: u32 },
78    Absolute(u32),
79}
80
81#[derive(Debug, Clone, PartialEq, Eq)]
82pub struct Invocation<'a> {
83    pub input: &'a [u8],
84    pub regs: [u32; 7],
85    pub global_data: &'a [u8],
86    pub file_offset: u64,
87    pub exec_count: u32,
88}
89
90#[derive(Debug, Clone, PartialEq, Eq)]
91pub struct ExecutionResult {
92    pub output: Vec<u8>,
93    pub globals: Vec<u8>,
94    pub regs: [u32; 8],
95}
96
97impl Program {
98    pub fn parse(blob: &[u8]) -> Result<Self> {
99        if blob.is_empty() {
100            return Err(Error::InvalidData("RARVM program blob is empty"));
101        }
102        if blob.iter().fold(0u8, |acc, &byte| acc ^ byte) != 0 {
103            return Err(Error::InvalidData("RARVM program checksum mismatch"));
104        }
105
106        let mut bits = BitReader::new(&blob[1..]);
107        let mut static_data = Vec::new();
108        if bits.read_bit()? != 0 {
109            let size = bits
110                .read_vm_number()?
111                .checked_add(1)
112                .ok_or(Error::InvalidData("RARVM static data size overflows"))?
113                as usize;
114            if size > MAX_STATIC_DATA {
115                return Err(Error::InvalidData("RARVM static data is too large"));
116            }
117            for _ in 0..size {
118                static_data.push(bits.read_bits(8)? as u8);
119            }
120        }
121
122        let mut instructions = Vec::new();
123        while bits.remaining_bits() >= 8 {
124            match parse_instruction(&mut bits, instructions.len()) {
125                Ok(instruction) => instructions.push(instruction),
126                Err(Error::NeedMoreInput) => break,
127                Err(error) => return Err(error),
128            }
129        }
130
131        if instructions
132            .last()
133            .is_none_or(|instruction| !instruction.opcode.is_unconditional_control_transfer())
134        {
135            instructions.push(Instruction {
136                opcode: Opcode::Ret,
137                byte_mode: false,
138                operands: Vec::new(),
139            });
140        }
141
142        Ok(Self {
143            static_data,
144            instructions,
145        })
146    }
147
148    pub fn execute(&self, invocation: Invocation<'_>) -> Result<ExecutionResult> {
149        let mut vm = Vm::new(self, invocation)?;
150        vm.run(self)
151    }
152}
153
154impl Opcode {
155    fn from_u8(value: u8) -> Result<Self> {
156        match value {
157            0 => Ok(Self::Mov),
158            1 => Ok(Self::Cmp),
159            2 => Ok(Self::Add),
160            3 => Ok(Self::Sub),
161            4 => Ok(Self::Jz),
162            5 => Ok(Self::Jnz),
163            6 => Ok(Self::Inc),
164            7 => Ok(Self::Dec),
165            8 => Ok(Self::Jmp),
166            9 => Ok(Self::Xor),
167            10 => Ok(Self::And),
168            11 => Ok(Self::Or),
169            12 => Ok(Self::Test),
170            13 => Ok(Self::Js),
171            14 => Ok(Self::Jns),
172            15 => Ok(Self::Jb),
173            16 => Ok(Self::Jbe),
174            17 => Ok(Self::Ja),
175            18 => Ok(Self::Jae),
176            19 => Ok(Self::Push),
177            20 => Ok(Self::Pop),
178            21 => Ok(Self::Call),
179            22 => Ok(Self::Ret),
180            23 => Ok(Self::Not),
181            24 => Ok(Self::Shl),
182            25 => Ok(Self::Shr),
183            26 => Ok(Self::Sar),
184            27 => Ok(Self::Neg),
185            28 => Ok(Self::Pusha),
186            29 => Ok(Self::Popa),
187            30 => Ok(Self::Pushf),
188            31 => Ok(Self::Popf),
189            32 => Ok(Self::Movzx),
190            33 => Ok(Self::Movsx),
191            34 => Ok(Self::Xchg),
192            35 => Ok(Self::Mul),
193            36 => Ok(Self::Div),
194            37 => Ok(Self::Adc),
195            38 => Ok(Self::Sbb),
196            39 => Ok(Self::Print),
197            _ => Err(Error::InvalidData("RARVM opcode is invalid")),
198        }
199    }
200
201    fn operand_count(self) -> usize {
202        match self {
203            Self::Ret | Self::Pusha | Self::Popa | Self::Pushf | Self::Popf | Self::Print => 0,
204            Self::Jz
205            | Self::Jnz
206            | Self::Inc
207            | Self::Dec
208            | Self::Jmp
209            | Self::Js
210            | Self::Jns
211            | Self::Jb
212            | Self::Jbe
213            | Self::Ja
214            | Self::Jae
215            | Self::Push
216            | Self::Pop
217            | Self::Call
218            | Self::Not
219            | Self::Neg => 1,
220            Self::Mov
221            | Self::Cmp
222            | Self::Add
223            | Self::Sub
224            | Self::Xor
225            | Self::And
226            | Self::Or
227            | Self::Test
228            | Self::Shl
229            | Self::Shr
230            | Self::Sar
231            | Self::Movzx
232            | Self::Movsx
233            | Self::Xchg
234            | Self::Mul
235            | Self::Div
236            | Self::Adc
237            | Self::Sbb => 2,
238        }
239    }
240
241    fn supports_byte_mode(self) -> bool {
242        matches!(
243            self,
244            Self::Mov
245                | Self::Cmp
246                | Self::Add
247                | Self::Sub
248                | Self::Inc
249                | Self::Dec
250                | Self::Xor
251                | Self::And
252                | Self::Or
253                | Self::Test
254                | Self::Not
255                | Self::Shl
256                | Self::Shr
257                | Self::Sar
258                | Self::Neg
259                | Self::Xchg
260                | Self::Mul
261                | Self::Div
262                | Self::Adc
263                | Self::Sbb
264        )
265    }
266
267    fn is_jump_or_call(self) -> bool {
268        matches!(
269            self,
270            Self::Jz
271                | Self::Jnz
272                | Self::Jmp
273                | Self::Js
274                | Self::Jns
275                | Self::Jb
276                | Self::Jbe
277                | Self::Ja
278                | Self::Jae
279                | Self::Call
280        )
281    }
282
283    fn is_unconditional_control_transfer(self) -> bool {
284        matches!(self, Self::Jmp | Self::Ret)
285    }
286}
287
288fn parse_instruction(bits: &mut BitReader<'_>, instruction_index: usize) -> Result<Instruction> {
289    let opcode = if bits.read_bit()? == 0 {
290        Opcode::from_u8(bits.read_bits(3)? as u8)?
291    } else {
292        Opcode::from_u8(bits.read_bits(5)? as u8 + 8)?
293    };
294    let byte_mode = opcode.supports_byte_mode() && bits.read_bit()? != 0;
295    let mut operands = Vec::with_capacity(opcode.operand_count());
296    for operand_index in 0..opcode.operand_count() {
297        let mut operand = parse_operand(bits, byte_mode)?;
298        if operand_index == 0 && opcode.is_jump_or_call() {
299            if let Operand::Immediate(value) = operand {
300                operand = Operand::Immediate(remap_jump_target(value, instruction_index));
301            }
302        }
303        operands.push(operand);
304    }
305    Ok(Instruction {
306        opcode,
307        byte_mode,
308        operands,
309    })
310}
311
312fn parse_operand(bits: &mut BitReader<'_>, byte_mode: bool) -> Result<Operand> {
313    if bits.read_bit()? != 0 {
314        return Ok(Operand::Register(bits.read_bits(3)? as u8));
315    }
316    if bits.read_bit()? == 0 {
317        return if byte_mode {
318            Ok(Operand::Immediate(bits.read_bits(8)?))
319        } else {
320            Ok(Operand::Immediate(bits.read_vm_number()?))
321        };
322    }
323    if bits.read_bit()? == 0 {
324        return Ok(Operand::RegisterIndirect(bits.read_bits(3)? as u8));
325    }
326    if bits.read_bit()? == 0 {
327        Ok(Operand::Indexed {
328            register: bits.read_bits(3)? as u8,
329            base: bits.read_vm_number()?,
330        })
331    } else {
332        Ok(Operand::Absolute(bits.read_vm_number()?))
333    }
334}
335
336fn remap_jump_target(value: u32, instruction_index: usize) -> u32 {
337    if value >= 256 {
338        return value - 256;
339    }
340
341    let mut distance = value as i64;
342    if distance >= 136 {
343        distance -= 264;
344    } else if distance >= 16 {
345        distance -= 8;
346    } else if distance >= 8 {
347        distance -= 16;
348    }
349    (instruction_index as i64).wrapping_add(distance) as u32
350}
351
352struct Vm {
353    memory: Vec<u8>,
354    regs: [u32; 8],
355    flags: u32,
356}
357
358impl Vm {
359    fn new(program: &Program, invocation: Invocation<'_>) -> Result<Self> {
360        if invocation.input.len() > GLOBAL_BASE {
361            return Err(Error::InvalidData("RARVM filter input is too large"));
362        }
363
364        let mut memory = vec![0u8; MEMORY_SIZE];
365        memory[..invocation.input.len()].copy_from_slice(invocation.input);
366        let global_len = invocation.global_data.len().min(0x2000);
367        memory[GLOBAL_BASE..GLOBAL_BASE + global_len]
368            .copy_from_slice(&invocation.global_data[..global_len]);
369        let static_start = GLOBAL_BASE + global_len;
370        let static_len = program
371            .static_data
372            .len()
373            .min(MEMORY_SIZE.saturating_sub(static_start));
374        memory[static_start..static_start + static_len]
375            .copy_from_slice(&program.static_data[..static_len]);
376
377        write_u32(
378            &mut memory,
379            GLOBAL_BASE + 0x1c,
380            invocation.input.len() as u32,
381        );
382        write_u32(&mut memory, GLOBAL_BASE + 0x20, 0);
383        write_u32(
384            &mut memory,
385            GLOBAL_BASE + 0x24,
386            invocation.file_offset as u32,
387        );
388        write_u32(
389            &mut memory,
390            GLOBAL_BASE + 0x28,
391            (invocation.file_offset >> 32) as u32,
392        );
393        write_u32(&mut memory, GLOBAL_BASE + 0x2c, invocation.exec_count);
394
395        let mut regs = [0u32; 8];
396        regs[..7].copy_from_slice(&invocation.regs);
397        regs[3] = GLOBAL_BASE as u32;
398        regs[4] = invocation.input.len() as u32;
399        regs[5] = invocation.exec_count;
400        regs[6] = invocation.file_offset as u32;
401        regs[7] = MEMORY_SIZE as u32;
402
403        Ok(Self {
404            memory,
405            regs,
406            flags: 0,
407        })
408    }
409
410    fn run(&mut self, program: &Program) -> Result<ExecutionResult> {
411        let mut ip = 0usize;
412        let mut terminated = false;
413        for _ in 0..MAX_INSTRUCTIONS {
414            let Some(instruction) = program.instructions.get(ip) else {
415                terminated = true;
416                break;
417            };
418            ip += 1;
419            if let Some(next_ip) = self.execute_instruction(instruction, ip)? {
420                if next_ip >= program.instructions.len() {
421                    terminated = true;
422                    break;
423                }
424                ip = next_ip;
425            }
426            if instruction.opcode == Opcode::Ret && self.regs[7] >= MEMORY_SIZE as u32 {
427                terminated = true;
428                break;
429            }
430        }
431        if !terminated {
432            return Err(Error::InvalidData("RARVM instruction limit exceeded"));
433        }
434
435        let mut output_pos = self.read_u32(GLOBAL_BASE + 0x20) as usize & MEMORY_MASK as usize;
436        let mut output_size = self.read_u32(GLOBAL_BASE + 0x1c) as usize & MEMORY_MASK as usize;
437        if output_pos
438            .checked_add(output_size)
439            .is_none_or(|end| end > MEMORY_SIZE)
440        {
441            output_pos = 0;
442            output_size = 0;
443        }
444        let output = self.memory[output_pos..output_pos + output_size].to_vec();
445
446        let user_global = (self.read_u32(GLOBAL_BASE + 0x30) as usize).min(MAX_USER_GLOBAL);
447        let globals =
448            self.memory[GLOBAL_BASE..GLOBAL_BASE + SYSTEM_GLOBAL_SIZE + user_global].to_vec();
449        Ok(ExecutionResult {
450            output,
451            globals,
452            regs: self.regs,
453        })
454    }
455
456    fn execute_instruction(
457        &mut self,
458        instruction: &Instruction,
459        ip: usize,
460    ) -> Result<Option<usize>> {
461        let byte_mode = instruction.byte_mode;
462        let op = |index| {
463            instruction
464                .operands
465                .get(index)
466                .ok_or(Error::InvalidData("RARVM instruction operand is missing"))
467        };
468        match instruction.opcode {
469            Opcode::Mov => {
470                let value = self.read_operand(op(1)?, byte_mode);
471                self.write_operand(op(0)?, value, byte_mode)?;
472            }
473            Opcode::Cmp => {
474                let a = self.read_operand(op(0)?, byte_mode);
475                let b = self.read_operand(op(1)?, byte_mode);
476                self.set_sub_flags(a, b, 0, byte_mode);
477            }
478            Opcode::Add => {
479                let a = self.read_operand(op(0)?, byte_mode);
480                let b = self.read_operand(op(1)?, byte_mode);
481                let result = self.mask_width(a.wrapping_add(b), byte_mode);
482                self.write_operand(op(0)?, result, byte_mode)?;
483                self.set_add_flags(a, b, 0, result, byte_mode);
484            }
485            Opcode::Sub => {
486                let a = self.read_operand(op(0)?, byte_mode);
487                let b = self.read_operand(op(1)?, byte_mode);
488                let result = self.mask_width(a.wrapping_sub(b), byte_mode);
489                self.write_operand(op(0)?, result, byte_mode)?;
490                self.set_sub_flags(a, b, 0, byte_mode);
491            }
492            Opcode::Jz => return Ok(self.conditional_jump(op(0)?, self.flags & FLAG_Z != 0)),
493            Opcode::Jnz => return Ok(self.conditional_jump(op(0)?, self.flags & FLAG_Z == 0)),
494            Opcode::Inc => {
495                let value = self.read_operand(op(0)?, byte_mode).wrapping_add(1);
496                let result = self.mask_width(value, byte_mode);
497                self.write_operand(op(0)?, result, byte_mode)?;
498                self.set_zs(result, byte_mode);
499            }
500            Opcode::Dec => {
501                let value = self.read_operand(op(0)?, byte_mode).wrapping_sub(1);
502                let result = self.mask_width(value, byte_mode);
503                self.write_operand(op(0)?, result, byte_mode)?;
504                self.set_zs(result, byte_mode);
505            }
506            Opcode::Jmp => return Ok(Some(self.read_operand(op(0)?, false) as usize)),
507            Opcode::Xor | Opcode::And | Opcode::Or | Opcode::Test => {
508                let a = self.read_operand(op(0)?, byte_mode);
509                let b = self.read_operand(op(1)?, byte_mode);
510                let result = match instruction.opcode {
511                    Opcode::Xor => a ^ b,
512                    Opcode::And | Opcode::Test => a & b,
513                    Opcode::Or => a | b,
514                    _ => unreachable!(),
515                };
516                let result = self.mask_width(result, byte_mode);
517                if instruction.opcode != Opcode::Test {
518                    self.write_operand(op(0)?, result, byte_mode)?;
519                }
520                self.set_zs(result, byte_mode);
521            }
522            Opcode::Js => return Ok(self.conditional_jump(op(0)?, self.flags & FLAG_S != 0)),
523            Opcode::Jns => return Ok(self.conditional_jump(op(0)?, self.flags & FLAG_S == 0)),
524            Opcode::Jb => return Ok(self.conditional_jump(op(0)?, self.flags & FLAG_C != 0)),
525            Opcode::Jbe => {
526                return Ok(self.conditional_jump(op(0)?, self.flags & (FLAG_C | FLAG_Z) != 0));
527            }
528            Opcode::Ja => {
529                return Ok(self.conditional_jump(op(0)?, self.flags & (FLAG_C | FLAG_Z) == 0));
530            }
531            Opcode::Jae => return Ok(self.conditional_jump(op(0)?, self.flags & FLAG_C == 0)),
532            Opcode::Push => self.push(self.read_operand(op(0)?, false)),
533            Opcode::Pop => {
534                let value = self.pop();
535                self.write_operand(op(0)?, value, false)?;
536            }
537            Opcode::Call => {
538                self.push(ip as u32);
539                return Ok(Some(self.read_operand(op(0)?, false) as usize));
540            }
541            Opcode::Ret => {
542                if self.regs[7] >= MEMORY_SIZE as u32 {
543                    return Ok(Some(usize::MAX));
544                }
545                return Ok(Some(self.pop() as usize));
546            }
547            Opcode::Not => {
548                let result = self.mask_width(!self.read_operand(op(0)?, byte_mode), byte_mode);
549                self.write_operand(op(0)?, result, byte_mode)?;
550            }
551            Opcode::Shl | Opcode::Shr | Opcode::Sar => {
552                self.shift(
553                    instruction.opcode,
554                    op(0)?,
555                    self.read_operand(op(1)?, byte_mode),
556                    byte_mode,
557                )?;
558            }
559            Opcode::Neg => {
560                let value = self.read_operand(op(0)?, byte_mode);
561                let result = self.mask_width(0u32.wrapping_sub(value), byte_mode);
562                self.write_operand(op(0)?, result, byte_mode)?;
563                if result == 0 {
564                    self.flags = FLAG_Z;
565                } else {
566                    self.flags = FLAG_C | (result & self.sign_bit(byte_mode));
567                }
568            }
569            Opcode::Pusha => {
570                let regs = self.regs;
571                for value in regs {
572                    self.push(value);
573                }
574            }
575            Opcode::Popa => {
576                let mut stack = self.regs[7];
577                for index in (0..8).rev() {
578                    self.regs[index] = self.read_mem(stack, false);
579                    stack = stack.wrapping_add(4);
580                }
581            }
582            Opcode::Pushf => self.push(self.flags),
583            Opcode::Popf => self.flags = self.pop(),
584            Opcode::Movzx => {
585                let value = self.read_operand(op(1)?, true) & 0xff;
586                self.write_operand(op(0)?, value, false)?;
587            }
588            Opcode::Movsx => {
589                let value = self.read_operand(op(1)?, true) as u8 as i8 as i32 as u32;
590                self.write_operand(op(0)?, value, false)?;
591            }
592            Opcode::Xchg => {
593                let a = self.read_operand(op(0)?, byte_mode);
594                let b = self.read_operand(op(1)?, byte_mode);
595                self.write_operand(op(0)?, b, byte_mode)?;
596                self.write_operand(op(1)?, a, byte_mode)?;
597            }
598            Opcode::Mul => {
599                let result = self
600                    .read_operand(op(0)?, byte_mode)
601                    .wrapping_mul(self.read_operand(op(1)?, byte_mode));
602                self.write_operand(op(0)?, self.mask_width(result, byte_mode), byte_mode)?;
603            }
604            Opcode::Div => {
605                let divisor = self.read_operand(op(1)?, byte_mode);
606                if let Some(result) = self.read_operand(op(0)?, byte_mode).checked_div(divisor) {
607                    self.write_operand(op(0)?, result, byte_mode)?;
608                }
609            }
610            Opcode::Adc | Opcode::Sbb => {
611                let a = self.read_operand(op(0)?, byte_mode);
612                let b = self.read_operand(op(1)?, byte_mode);
613                let carry = u32::from(self.flags & FLAG_C != 0);
614                let result = if instruction.opcode == Opcode::Adc {
615                    self.mask_width(a.wrapping_add(b).wrapping_add(carry), byte_mode)
616                } else {
617                    self.mask_width(a.wrapping_sub(b).wrapping_sub(carry), byte_mode)
618                };
619                self.write_operand(op(0)?, result, byte_mode)?;
620                if instruction.opcode == Opcode::Adc {
621                    self.set_add_flags(a, b, carry, result, byte_mode);
622                } else {
623                    self.set_sub_flags(a, b, carry, byte_mode);
624                }
625            }
626            Opcode::Print => {}
627        }
628        Ok(None)
629    }
630
631    fn conditional_jump(&self, operand: &Operand, condition: bool) -> Option<usize> {
632        condition.then_some(self.read_operand(operand, false) as usize)
633    }
634
635    fn read_operand(&self, operand: &Operand, byte_mode: bool) -> u32 {
636        match *operand {
637            Operand::Register(index) => {
638                let value = self.regs[index as usize];
639                if byte_mode {
640                    value & 0xff
641                } else {
642                    value
643                }
644            }
645            Operand::Immediate(value) => self.mask_width(value, byte_mode),
646            Operand::RegisterIndirect(index) => self.read_mem(self.regs[index as usize], byte_mode),
647            Operand::Indexed { register, base } => {
648                self.read_mem(base.wrapping_add(self.regs[register as usize]), byte_mode)
649            }
650            Operand::Absolute(address) => self.read_mem(address, byte_mode),
651        }
652    }
653
654    fn write_operand(&mut self, operand: &Operand, value: u32, byte_mode: bool) -> Result<()> {
655        match *operand {
656            Operand::Register(index) => {
657                let slot = &mut self.regs[index as usize];
658                if byte_mode {
659                    *slot = (*slot & 0xffff_ff00) | (value & 0xff);
660                } else {
661                    *slot = value;
662                }
663            }
664            Operand::RegisterIndirect(index) => {
665                self.write_mem(self.regs[index as usize], value, byte_mode)
666            }
667            Operand::Indexed { register, base } => {
668                self.write_mem(
669                    base.wrapping_add(self.regs[register as usize]),
670                    value,
671                    byte_mode,
672                );
673            }
674            Operand::Absolute(address) => self.write_mem(address, value, byte_mode),
675            Operand::Immediate(_) => {
676                return Err(Error::InvalidData("RARVM write to immediate operand"))
677            }
678        }
679        Ok(())
680    }
681
682    fn read_mem(&self, address: u32, byte_mode: bool) -> u32 {
683        let address = address & MEMORY_MASK;
684        if byte_mode {
685            u32::from(self.memory[address as usize])
686        } else {
687            self.read_u32(address as usize)
688        }
689    }
690
691    fn write_mem(&mut self, address: u32, value: u32, byte_mode: bool) {
692        let address = address & MEMORY_MASK;
693        if byte_mode {
694            self.memory[address as usize] = value as u8;
695        } else {
696            write_u32(&mut self.memory, address as usize, value);
697        }
698    }
699
700    fn read_u32(&self, address: usize) -> u32 {
701        let address = address as u32;
702        u32::from_le_bytes([
703            self.memory[(address & MEMORY_MASK) as usize],
704            self.memory[(address.wrapping_add(1) & MEMORY_MASK) as usize],
705            self.memory[(address.wrapping_add(2) & MEMORY_MASK) as usize],
706            self.memory[(address.wrapping_add(3) & MEMORY_MASK) as usize],
707        ])
708    }
709
710    fn push(&mut self, value: u32) {
711        self.regs[7] = self.regs[7].wrapping_sub(4);
712        self.write_mem(self.regs[7], value, false);
713    }
714
715    fn pop(&mut self) -> u32 {
716        let value = self.read_mem(self.regs[7], false);
717        self.regs[7] = self.regs[7].wrapping_add(4);
718        value
719    }
720
721    fn shift(&mut self, opcode: Opcode, dst: &Operand, count: u32, byte_mode: bool) -> Result<()> {
722        if count == 0 {
723            return Ok(());
724        }
725        let width = if byte_mode { 8 } else { 32 };
726        let count = count.min(width);
727        let value = self.read_operand(dst, byte_mode);
728        let result = match opcode {
729            Opcode::Shl => {
730                if count == width {
731                    0
732                } else {
733                    value.wrapping_shl(count)
734                }
735            }
736            Opcode::Shr => {
737                if count == width {
738                    0
739                } else {
740                    value.wrapping_shr(count)
741                }
742            }
743            Opcode::Sar => {
744                if byte_mode {
745                    if count >= 8 {
746                        if value & 0x80 != 0 {
747                            0xff
748                        } else {
749                            0
750                        }
751                    } else {
752                        ((value as u8 as i8) >> count) as u8 as u32
753                    }
754                } else if count >= 32 {
755                    if value & 0x8000_0000 != 0 {
756                        u32::MAX
757                    } else {
758                        0
759                    }
760                } else {
761                    ((value as i32) >> count) as u32
762                }
763            }
764            _ => unreachable!(),
765        };
766        let carry = match opcode {
767            Opcode::Shl => value & (1 << (width - count)) != 0,
768            Opcode::Shr | Opcode::Sar => value & (1 << (count - 1)) != 0,
769            _ => unreachable!(),
770        };
771        let result = self.mask_width(result, byte_mode);
772        self.write_operand(dst, result, byte_mode)?;
773        self.set_zsc(result, carry, byte_mode);
774        Ok(())
775    }
776
777    fn set_add_flags(&mut self, a: u32, b: u32, carry: u32, result: u32, byte_mode: bool) {
778        let mask = self.value_mask(byte_mode) as u64;
779        let sum = (a as u64 & mask) + (b as u64 & mask) + u64::from(carry);
780        self.set_zsc(result, sum > mask, byte_mode);
781    }
782
783    fn set_sub_flags(&mut self, a: u32, b: u32, borrow: u32, byte_mode: bool) {
784        let mask = self.value_mask(byte_mode) as u64;
785        let a = a as u64 & mask;
786        let subtrahend = (b as u64 & mask) + u64::from(borrow);
787        let result = self.mask_width((a as u32).wrapping_sub(subtrahend as u32), byte_mode);
788        self.set_zsc(result, a < subtrahend, byte_mode);
789    }
790
791    fn set_zs(&mut self, result: u32, byte_mode: bool) {
792        self.flags = if result == 0 {
793            FLAG_Z
794        } else {
795            result & self.sign_bit(byte_mode)
796        };
797    }
798
799    fn set_zsc(&mut self, result: u32, carry: bool, byte_mode: bool) {
800        self.set_zs(result, byte_mode);
801        if carry {
802            self.flags |= FLAG_C;
803        }
804    }
805
806    fn mask_width(&self, value: u32, byte_mode: bool) -> u32 {
807        value & self.value_mask(byte_mode)
808    }
809
810    fn value_mask(&self, byte_mode: bool) -> u32 {
811        if byte_mode {
812            0xff
813        } else {
814            u32::MAX
815        }
816    }
817
818    fn sign_bit(&self, byte_mode: bool) -> u32 {
819        if byte_mode {
820            0x80
821        } else {
822            FLAG_S
823        }
824    }
825}
826
827fn write_u32(memory: &mut [u8], address: usize, value: u32) {
828    let address = address as u32;
829    for (offset, byte) in value.to_le_bytes().into_iter().enumerate() {
830        memory[(address.wrapping_add(offset as u32) & MEMORY_MASK) as usize] = byte;
831    }
832}
833
834#[derive(Debug, Clone)]
835struct BitReader<'a> {
836    input: &'a [u8],
837    bit_pos: usize,
838}
839
840impl<'a> BitReader<'a> {
841    fn new(input: &'a [u8]) -> Self {
842        Self { input, bit_pos: 0 }
843    }
844
845    fn remaining_bits(&self) -> usize {
846        self.input.len() * 8 - self.bit_pos
847    }
848
849    fn read_bit(&mut self) -> Result<u32> {
850        self.read_bits(1)
851    }
852
853    fn read_bits(&mut self, count: usize) -> Result<u32> {
854        if count > 32 {
855            return Err(Error::InvalidData("RARVM bit read is too wide"));
856        }
857        if self.remaining_bits() < count {
858            return Err(Error::NeedMoreInput);
859        }
860        let mut value = 0;
861        for _ in 0..count {
862            let byte = self.input[self.bit_pos / 8];
863            let bit = (byte >> (7 - (self.bit_pos % 8))) & 1;
864            value = (value << 1) | u32::from(bit);
865            self.bit_pos += 1;
866        }
867        Ok(value)
868    }
869
870    fn read_vm_number(&mut self) -> Result<u32> {
871        match self.read_bits(2)? {
872            0 => self.read_bits(4),
873            1 => {
874                let high = self.read_bits(8)?;
875                if high >= 16 {
876                    Ok(high)
877                } else {
878                    Ok(0xffff_ff00 | (high << 4) | self.read_bits(4)?)
879                }
880            }
881            2 => self.read_bits(16),
882            3 => self.read_bits(32),
883            _ => unreachable!(),
884        }
885    }
886}
887
888#[cfg(test)]
889mod tests {
890    use super::*;
891
892    #[test]
893    fn rejects_bad_xor_checksum() {
894        assert_eq!(
895            Program::parse(&[0x12, 0x34]),
896            Err(Error::InvalidData("RARVM program checksum mismatch"))
897        );
898    }
899
900    #[test]
901    fn parses_static_data_and_appends_implicit_ret() {
902        let mut bits = BitWriter::new();
903        bits.write_bits(1, 1);
904        write_vm_number(&mut bits, 2);
905        bits.write_bits(0xaa, 8);
906        bits.write_bits(0xbb, 8);
907        bits.write_bits(0xcc, 8);
908        let program = Program::parse(&with_xor(bits.finish())).unwrap();
909
910        assert_eq!(program.static_data, [0xaa, 0xbb, 0xcc]);
911        assert_eq!(
912            program.instructions,
913            [Instruction {
914                opcode: Opcode::Ret,
915                byte_mode: false,
916                operands: Vec::new(),
917            }]
918        );
919    }
920
921    #[test]
922    fn parses_register_immediate_and_memory_operands() {
923        let mut bits = BitWriter::new();
924        bits.write_bits(0, 1);
925        write_opcode(&mut bits, Opcode::Mov);
926        bits.write_bits(0, 1);
927        write_reg(&mut bits, 2);
928        write_number_immediate(&mut bits, 0x1234);
929        write_opcode(&mut bits, Opcode::Add);
930        bits.write_bits(1, 1);
931        write_reg_indirect(&mut bits, 3);
932        write_byte_immediate(&mut bits, 0x7f);
933        write_opcode(&mut bits, Opcode::Sub);
934        bits.write_bits(0, 1);
935        write_indexed(&mut bits, 1, 0x44);
936        write_absolute(&mut bits, 0x3c000);
937        write_opcode(&mut bits, Opcode::Ret);
938
939        let program = Program::parse(&with_xor(bits.finish())).unwrap();
940        assert_eq!(program.static_data, []);
941        assert_eq!(program.instructions.len(), 4);
942        assert_eq!(program.instructions[0].opcode, Opcode::Mov);
943        assert!(!program.instructions[0].byte_mode);
944        assert_eq!(
945            program.instructions[0].operands,
946            [Operand::Register(2), Operand::Immediate(0x1234)]
947        );
948        assert_eq!(program.instructions[1].opcode, Opcode::Add);
949        assert!(program.instructions[1].byte_mode);
950        assert_eq!(
951            program.instructions[1].operands,
952            [Operand::RegisterIndirect(3), Operand::Immediate(0x7f)]
953        );
954        assert_eq!(
955            program.instructions[2].operands,
956            [
957                Operand::Indexed {
958                    register: 1,
959                    base: 0x44,
960                },
961                Operand::Absolute(0x3c000),
962            ]
963        );
964        assert_eq!(program.instructions[3].opcode, Opcode::Ret);
965    }
966
967    #[test]
968    fn remaps_jump_immediates_to_instruction_indices() {
969        let mut bits = BitWriter::new();
970        bits.write_bits(0, 1);
971        write_opcode(&mut bits, Opcode::Print);
972        write_opcode(&mut bits, Opcode::Jmp);
973        write_number_immediate(&mut bits, 15);
974
975        let program = Program::parse(&with_xor(bits.finish())).unwrap();
976        assert_eq!(program.instructions.len(), 2);
977        assert_eq!(
978            program.instructions[1],
979            Instruction {
980                opcode: Opcode::Jmp,
981                byte_mode: false,
982                operands: vec![Operand::Immediate(0)],
983            }
984        );
985    }
986
987    #[test]
988    fn executes_arithmetic_and_memory_writes() {
989        let program = Program {
990            static_data: Vec::new(),
991            instructions: vec![
992                Instruction {
993                    opcode: Opcode::Mov,
994                    byte_mode: false,
995                    operands: vec![Operand::Register(0), Operand::Immediate(7)],
996                },
997                Instruction {
998                    opcode: Opcode::Add,
999                    byte_mode: false,
1000                    operands: vec![Operand::Register(0), Operand::Immediate(5)],
1001                },
1002                Instruction {
1003                    opcode: Opcode::Mov,
1004                    byte_mode: true,
1005                    operands: vec![Operand::Absolute(0), Operand::Register(0)],
1006                },
1007                Instruction {
1008                    opcode: Opcode::Ret,
1009                    byte_mode: false,
1010                    operands: Vec::new(),
1011                },
1012            ],
1013        };
1014
1015        let result = program
1016            .execute(Invocation {
1017                input: &[0],
1018                regs: [0; 7],
1019                global_data: &[],
1020                file_offset: 0,
1021                exec_count: 0,
1022            })
1023            .unwrap();
1024
1025        assert_eq!(result.output, [12]);
1026        assert_eq!(result.regs[0], 12);
1027    }
1028
1029    #[test]
1030    fn executes_conditional_jump_and_stack_call() {
1031        let program = Program {
1032            static_data: Vec::new(),
1033            instructions: vec![
1034                Instruction {
1035                    opcode: Opcode::Mov,
1036                    byte_mode: false,
1037                    operands: vec![Operand::Register(0), Operand::Immediate(1)],
1038                },
1039                Instruction {
1040                    opcode: Opcode::Cmp,
1041                    byte_mode: false,
1042                    operands: vec![Operand::Register(0), Operand::Immediate(1)],
1043                },
1044                Instruction {
1045                    opcode: Opcode::Jz,
1046                    byte_mode: false,
1047                    operands: vec![Operand::Immediate(4)],
1048                },
1049                Instruction {
1050                    opcode: Opcode::Mov,
1051                    byte_mode: false,
1052                    operands: vec![Operand::Register(0), Operand::Immediate(99)],
1053                },
1054                Instruction {
1055                    opcode: Opcode::Call,
1056                    byte_mode: false,
1057                    operands: vec![Operand::Immediate(6)],
1058                },
1059                Instruction {
1060                    opcode: Opcode::Ret,
1061                    byte_mode: false,
1062                    operands: Vec::new(),
1063                },
1064                Instruction {
1065                    opcode: Opcode::Add,
1066                    byte_mode: false,
1067                    operands: vec![Operand::Register(0), Operand::Immediate(41)],
1068                },
1069                Instruction {
1070                    opcode: Opcode::Ret,
1071                    byte_mode: false,
1072                    operands: Vec::new(),
1073                },
1074            ],
1075        };
1076
1077        let result = program
1078            .execute(Invocation {
1079                input: &[0],
1080                regs: [0; 7],
1081                global_data: &[],
1082                file_offset: 0,
1083                exec_count: 0,
1084            })
1085            .unwrap();
1086
1087        assert_eq!(result.regs[0], 42);
1088    }
1089
1090    #[test]
1091    fn executes_unconditional_jumps_and_mutating_unary_ops() {
1092        let result = execute_instructions(vec![
1093            instr(
1094                Opcode::Mov,
1095                false,
1096                vec![Operand::Register(0), Operand::Immediate(1)],
1097            ),
1098            instr(Opcode::Inc, false, vec![Operand::Register(0)]),
1099            instr(Opcode::Dec, false, vec![Operand::Register(0)]),
1100            instr(Opcode::Not, false, vec![Operand::Register(0)]),
1101            instr(Opcode::Neg, false, vec![Operand::Register(0)]),
1102            instr(Opcode::Jmp, false, vec![Operand::Immediate(7)]),
1103            instr(
1104                Opcode::Mov,
1105                false,
1106                vec![Operand::Register(0), Operand::Immediate(99)],
1107            ),
1108            instr(Opcode::Ret, false, Vec::new()),
1109        ]);
1110
1111        assert_eq!(result.regs[0], 2);
1112    }
1113
1114    #[test]
1115    fn executes_logic_ops_and_test_without_writing_destination() {
1116        let result = execute_instructions(vec![
1117            instr(
1118                Opcode::Mov,
1119                false,
1120                vec![Operand::Register(0), Operand::Immediate(0b1010)],
1121            ),
1122            instr(
1123                Opcode::Xor,
1124                false,
1125                vec![Operand::Register(0), Operand::Immediate(0b1100)],
1126            ),
1127            instr(
1128                Opcode::And,
1129                false,
1130                vec![Operand::Register(0), Operand::Immediate(0b0110)],
1131            ),
1132            instr(
1133                Opcode::Or,
1134                false,
1135                vec![Operand::Register(0), Operand::Immediate(0b0001)],
1136            ),
1137            instr(
1138                Opcode::Test,
1139                false,
1140                vec![Operand::Register(0), Operand::Immediate(0b0100)],
1141            ),
1142            instr(Opcode::Jnz, false, vec![Operand::Immediate(7)]),
1143            instr(
1144                Opcode::Mov,
1145                false,
1146                vec![Operand::Register(0), Operand::Immediate(99)],
1147            ),
1148            instr(Opcode::Ret, false, Vec::new()),
1149        ]);
1150
1151        assert_eq!(result.regs[0], 0b0111);
1152    }
1153
1154    #[test]
1155    fn executes_unsigned_conditional_jumps() {
1156        let result = execute_instructions(vec![
1157            instr(
1158                Opcode::Mov,
1159                false,
1160                vec![Operand::Register(0), Operand::Immediate(0)],
1161            ),
1162            instr(
1163                Opcode::Cmp,
1164                false,
1165                vec![Operand::Immediate(1), Operand::Immediate(2)],
1166            ),
1167            instr(Opcode::Jb, false, vec![Operand::Immediate(5)]),
1168            instr(
1169                Opcode::Mov,
1170                false,
1171                vec![Operand::Register(0), Operand::Immediate(99)],
1172            ),
1173            instr(Opcode::Ret, false, Vec::new()),
1174            instr(Opcode::Jbe, false, vec![Operand::Immediate(7)]),
1175            instr(
1176                Opcode::Mov,
1177                false,
1178                vec![Operand::Register(0), Operand::Immediate(98)],
1179            ),
1180            instr(
1181                Opcode::Cmp,
1182                false,
1183                vec![Operand::Immediate(3), Operand::Immediate(2)],
1184            ),
1185            instr(Opcode::Ja, false, vec![Operand::Immediate(10)]),
1186            instr(
1187                Opcode::Mov,
1188                false,
1189                vec![Operand::Register(0), Operand::Immediate(97)],
1190            ),
1191            instr(
1192                Opcode::Cmp,
1193                false,
1194                vec![Operand::Immediate(3), Operand::Immediate(2)],
1195            ),
1196            instr(Opcode::Jae, false, vec![Operand::Immediate(13)]),
1197            instr(
1198                Opcode::Mov,
1199                false,
1200                vec![Operand::Register(0), Operand::Immediate(96)],
1201            ),
1202            instr(
1203                Opcode::Mov,
1204                false,
1205                vec![Operand::Register(0), Operand::Immediate(42)],
1206            ),
1207            instr(Opcode::Ret, false, Vec::new()),
1208        ]);
1209
1210        assert_eq!(result.regs[0], 42);
1211    }
1212
1213    #[test]
1214    fn executes_signed_conditional_jumps() {
1215        let result = execute_instructions(vec![
1216            instr(
1217                Opcode::Mov,
1218                false,
1219                vec![Operand::Register(0), Operand::Immediate(0)],
1220            ),
1221            instr(
1222                Opcode::Sub,
1223                false,
1224                vec![Operand::Register(0), Operand::Immediate(1)],
1225            ),
1226            instr(Opcode::Js, false, vec![Operand::Immediate(5)]),
1227            instr(
1228                Opcode::Mov,
1229                false,
1230                vec![Operand::Register(1), Operand::Immediate(99)],
1231            ),
1232            instr(Opcode::Ret, false, Vec::new()),
1233            instr(
1234                Opcode::Add,
1235                false,
1236                vec![Operand::Register(0), Operand::Immediate(1)],
1237            ),
1238            instr(Opcode::Jns, false, vec![Operand::Immediate(8)]),
1239            instr(
1240                Opcode::Mov,
1241                false,
1242                vec![Operand::Register(1), Operand::Immediate(98)],
1243            ),
1244            instr(
1245                Opcode::Mov,
1246                false,
1247                vec![Operand::Register(1), Operand::Immediate(42)],
1248            ),
1249            instr(Opcode::Ret, false, Vec::new()),
1250        ]);
1251
1252        assert_eq!(result.regs[0], 0);
1253        assert_eq!(result.regs[1], 42);
1254    }
1255
1256    #[test]
1257    fn executes_stack_register_and_flag_round_trips() {
1258        let result = execute_instructions(vec![
1259            instr(
1260                Opcode::Mov,
1261                false,
1262                vec![Operand::Register(0), Operand::Immediate(10)],
1263            ),
1264            instr(Opcode::Push, false, vec![Operand::Register(0)]),
1265            instr(
1266                Opcode::Mov,
1267                false,
1268                vec![Operand::Register(0), Operand::Immediate(0)],
1269            ),
1270            instr(Opcode::Pop, false, vec![Operand::Register(1)]),
1271            instr(
1272                Opcode::Mov,
1273                false,
1274                vec![Operand::Register(0), Operand::Immediate(10)],
1275            ),
1276            instr(Opcode::Pusha, false, Vec::new()),
1277            instr(
1278                Opcode::Mov,
1279                false,
1280                vec![Operand::Register(0), Operand::Immediate(99)],
1281            ),
1282            instr(Opcode::Popa, false, Vec::new()),
1283            instr(
1284                Opcode::Cmp,
1285                false,
1286                vec![Operand::Immediate(1), Operand::Immediate(2)],
1287            ),
1288            instr(Opcode::Pushf, false, Vec::new()),
1289            instr(
1290                Opcode::Cmp,
1291                false,
1292                vec![Operand::Immediate(2), Operand::Immediate(2)],
1293            ),
1294            instr(Opcode::Popf, false, Vec::new()),
1295            instr(Opcode::Jb, false, vec![Operand::Immediate(14)]),
1296            instr(
1297                Opcode::Mov,
1298                false,
1299                vec![Operand::Register(1), Operand::Immediate(99)],
1300            ),
1301            instr(Opcode::Ret, false, Vec::new()),
1302        ]);
1303
1304        assert_eq!(result.regs[0], 10);
1305        assert_eq!(result.regs[1], 10);
1306    }
1307
1308    #[test]
1309    fn executes_shifts_with_byte_and_word_modes() {
1310        let result = execute_instructions(vec![
1311            instr(
1312                Opcode::Mov,
1313                false,
1314                vec![Operand::Register(0), Operand::Immediate(0x81)],
1315            ),
1316            instr(
1317                Opcode::Shl,
1318                false,
1319                vec![Operand::Register(0), Operand::Immediate(1)],
1320            ),
1321            instr(
1322                Opcode::Shr,
1323                false,
1324                vec![Operand::Register(0), Operand::Immediate(2)],
1325            ),
1326            instr(
1327                Opcode::Mov,
1328                false,
1329                vec![Operand::Register(1), Operand::Immediate(0x80)],
1330            ),
1331            instr(
1332                Opcode::Sar,
1333                true,
1334                vec![Operand::Register(1), Operand::Immediate(1)],
1335            ),
1336            instr(Opcode::Ret, false, Vec::new()),
1337        ]);
1338
1339        assert_eq!(result.regs[0], 0x40);
1340        assert_eq!(result.regs[1], 0xc0);
1341    }
1342
1343    #[test]
1344    fn byte_mode_sar_accepts_shift_count_equal_to_width() {
1345        let result = execute_instructions(vec![
1346            instr(
1347                Opcode::Mov,
1348                false,
1349                vec![Operand::Register(0), Operand::Immediate(0x80)],
1350            ),
1351            instr(
1352                Opcode::Sar,
1353                true,
1354                vec![Operand::Register(0), Operand::Immediate(8)],
1355            ),
1356            instr(
1357                Opcode::Mov,
1358                false,
1359                vec![Operand::Register(1), Operand::Immediate(0x7f)],
1360            ),
1361            instr(
1362                Opcode::Sar,
1363                true,
1364                vec![Operand::Register(1), Operand::Immediate(8)],
1365            ),
1366            instr(Opcode::Ret, false, Vec::new()),
1367        ]);
1368
1369        assert_eq!(result.regs[0], 0xff);
1370        assert_eq!(result.regs[1], 0);
1371    }
1372
1373    #[test]
1374    fn full_width_shl_and_shr_clear_destination() {
1375        let result = execute_instructions(vec![
1376            instr(
1377                Opcode::Mov,
1378                false,
1379                vec![Operand::Register(0), Operand::Immediate(0x1234_5678)],
1380            ),
1381            instr(
1382                Opcode::Shl,
1383                false,
1384                vec![Operand::Register(0), Operand::Immediate(32)],
1385            ),
1386            instr(
1387                Opcode::Mov,
1388                false,
1389                vec![Operand::Register(1), Operand::Immediate(0x8765_4321)],
1390            ),
1391            instr(
1392                Opcode::Shr,
1393                false,
1394                vec![Operand::Register(1), Operand::Immediate(32)],
1395            ),
1396            instr(
1397                Opcode::Mov,
1398                false,
1399                vec![Operand::Register(2), Operand::Immediate(0xff)],
1400            ),
1401            instr(
1402                Opcode::Shl,
1403                true,
1404                vec![Operand::Register(2), Operand::Immediate(8)],
1405            ),
1406            instr(
1407                Opcode::Mov,
1408                false,
1409                vec![Operand::Register(3), Operand::Immediate(0xff)],
1410            ),
1411            instr(
1412                Opcode::Shr,
1413                true,
1414                vec![Operand::Register(3), Operand::Immediate(8)],
1415            ),
1416            instr(Opcode::Ret, false, Vec::new()),
1417        ]);
1418
1419        assert_eq!(result.regs[0], 0);
1420        assert_eq!(result.regs[1], 0);
1421        assert_eq!(result.regs[2] & 0xff, 0);
1422        assert_eq!(result.regs[3] & 0xff, 0);
1423    }
1424
1425    #[test]
1426    fn sbb_sets_borrow_flag_when_subtrahend_plus_carry_wraps_byte_width() {
1427        let result = execute_instructions(vec![
1428            instr(
1429                Opcode::Cmp,
1430                true,
1431                vec![Operand::Immediate(0), Operand::Immediate(1)],
1432            ),
1433            instr(
1434                Opcode::Mov,
1435                false,
1436                vec![Operand::Register(0), Operand::Immediate(0)],
1437            ),
1438            instr(
1439                Opcode::Sbb,
1440                true,
1441                vec![Operand::Register(0), Operand::Immediate(0xff)],
1442            ),
1443            instr(Opcode::Jb, false, vec![Operand::Immediate(6)]),
1444            instr(
1445                Opcode::Mov,
1446                false,
1447                vec![Operand::Register(1), Operand::Immediate(0xdead)],
1448            ),
1449            instr(Opcode::Ret, false, Vec::new()),
1450            instr(
1451                Opcode::Mov,
1452                false,
1453                vec![Operand::Register(1), Operand::Immediate(0xbeef)],
1454            ),
1455            instr(Opcode::Ret, false, Vec::new()),
1456        ]);
1457
1458        assert_eq!(result.regs[0] & 0xff, 0);
1459        assert_eq!(result.regs[1], 0xbeef);
1460    }
1461
1462    #[test]
1463    fn zero_count_shifts_are_noops() {
1464        let result = execute_instructions(vec![
1465            instr(
1466                Opcode::Mov,
1467                false,
1468                vec![Operand::Register(0), Operand::Immediate(0x1234_5678)],
1469            ),
1470            instr(
1471                Opcode::Shl,
1472                false,
1473                vec![Operand::Register(0), Operand::Immediate(0)],
1474            ),
1475            instr(
1476                Opcode::Shr,
1477                false,
1478                vec![Operand::Register(0), Operand::Immediate(0)],
1479            ),
1480            instr(
1481                Opcode::Sar,
1482                false,
1483                vec![Operand::Register(0), Operand::Immediate(0)],
1484            ),
1485            instr(Opcode::Ret, false, Vec::new()),
1486        ]);
1487
1488        assert_eq!(result.regs[0], 0x1234_5678);
1489    }
1490
1491    #[test]
1492    fn output_range_accepts_exclusive_memory_end() {
1493        let program = Program {
1494            static_data: Vec::new(),
1495            instructions: vec![
1496                instr(
1497                    Opcode::Mov,
1498                    false,
1499                    vec![
1500                        Operand::Absolute((GLOBAL_BASE + 0x20) as u32),
1501                        Operand::Immediate((MEMORY_SIZE - 1) as u32),
1502                    ],
1503                ),
1504                instr(
1505                    Opcode::Mov,
1506                    false,
1507                    vec![
1508                        Operand::Absolute((GLOBAL_BASE + 0x1c) as u32),
1509                        Operand::Immediate(1),
1510                    ],
1511                ),
1512                instr(
1513                    Opcode::Mov,
1514                    true,
1515                    vec![
1516                        Operand::Absolute((MEMORY_SIZE - 1) as u32),
1517                        Operand::Immediate(0x5a),
1518                    ],
1519                ),
1520                instr(Opcode::Ret, false, Vec::new()),
1521            ],
1522        };
1523
1524        let result = program
1525            .execute(Invocation {
1526                input: &[0],
1527                regs: [0; 7],
1528                global_data: &[],
1529                file_offset: 0,
1530                exec_count: 0,
1531            })
1532            .unwrap();
1533
1534        assert_eq!(result.output, [0x5a]);
1535    }
1536
1537    #[test]
1538    fn executes_extension_exchange_multiply_divide_and_carry_arithmetic() {
1539        let result = execute_instructions(vec![
1540            instr(
1541                Opcode::Mov,
1542                false,
1543                vec![Operand::Absolute(0), Operand::Immediate(0x80)],
1544            ),
1545            instr(
1546                Opcode::Movzx,
1547                false,
1548                vec![Operand::Register(0), Operand::Absolute(0)],
1549            ),
1550            instr(
1551                Opcode::Movsx,
1552                false,
1553                vec![Operand::Register(1), Operand::Absolute(0)],
1554            ),
1555            instr(
1556                Opcode::Xchg,
1557                false,
1558                vec![Operand::Register(0), Operand::Register(1)],
1559            ),
1560            instr(
1561                Opcode::Mul,
1562                false,
1563                vec![Operand::Register(1), Operand::Immediate(3)],
1564            ),
1565            instr(
1566                Opcode::Div,
1567                false,
1568                vec![Operand::Register(1), Operand::Immediate(2)],
1569            ),
1570            instr(
1571                Opcode::Cmp,
1572                false,
1573                vec![Operand::Immediate(1), Operand::Immediate(2)],
1574            ),
1575            instr(
1576                Opcode::Adc,
1577                false,
1578                vec![Operand::Register(1), Operand::Immediate(1)],
1579            ),
1580            instr(
1581                Opcode::Cmp,
1582                false,
1583                vec![Operand::Immediate(1), Operand::Immediate(2)],
1584            ),
1585            instr(
1586                Opcode::Sbb,
1587                false,
1588                vec![Operand::Register(1), Operand::Immediate(2)],
1589            ),
1590            instr(Opcode::Print, false, Vec::new()),
1591            instr(Opcode::Ret, false, Vec::new()),
1592        ]);
1593
1594        assert_eq!(result.regs[0], 0xffff_ff80);
1595        assert_eq!(result.regs[1], 0xbf);
1596    }
1597
1598    #[test]
1599    fn preserves_requested_user_globals() {
1600        let program = Program {
1601            static_data: b"static".to_vec(),
1602            instructions: vec![
1603                Instruction {
1604                    opcode: Opcode::Mov,
1605                    byte_mode: false,
1606                    operands: vec![Operand::Absolute(0x3c030), Operand::Immediate(4)],
1607                },
1608                Instruction {
1609                    opcode: Opcode::Ret,
1610                    byte_mode: false,
1611                    operands: Vec::new(),
1612                },
1613            ],
1614        };
1615
1616        let result = program
1617            .execute(Invocation {
1618                input: &[1, 2, 3],
1619                regs: [0; 7],
1620                global_data: &[0; 64],
1621                file_offset: 0x1_0000_0002,
1622                exec_count: 9,
1623            })
1624            .unwrap();
1625
1626        assert_eq!(result.output, [1, 2, 3]);
1627        assert_eq!(result.globals.len(), 68);
1628        assert_eq!(&result.globals[64..], b"stat");
1629    }
1630
1631    #[test]
1632    fn parse_rejects_huge_static_data_size_without_preallocating() {
1633        let err = Program::parse(&[0xff, 0xff, 0xff, 0xff, 0, 0]).unwrap_err();
1634        assert_eq!(err, Error::InvalidData("RARVM static data is too large"));
1635    }
1636
1637    #[test]
1638    fn parse_rejects_static_data_larger_than_vm_memory() {
1639        let mut bits = BitWriter::new();
1640        bits.write_bits(1, 1);
1641        write_vm_number(&mut bits, MAX_STATIC_DATA as u32);
1642
1643        let err = Program::parse(&with_xor(bits.finish())).unwrap_err();
1644        assert_eq!(err, Error::InvalidData("RARVM static data is too large"));
1645    }
1646
1647    fn instr(opcode: Opcode, byte_mode: bool, operands: Vec<Operand>) -> Instruction {
1648        Instruction {
1649            opcode,
1650            byte_mode,
1651            operands,
1652        }
1653    }
1654
1655    fn execute_instructions(instructions: Vec<Instruction>) -> ExecutionResult {
1656        Program {
1657            static_data: Vec::new(),
1658            instructions,
1659        }
1660        .execute(Invocation {
1661            input: &[0],
1662            regs: [0; 7],
1663            global_data: &[],
1664            file_offset: 0,
1665            exec_count: 0,
1666        })
1667        .unwrap()
1668    }
1669
1670    struct BitWriter {
1671        output: Vec<u8>,
1672        bit_pos: usize,
1673    }
1674
1675    impl BitWriter {
1676        fn new() -> Self {
1677            Self {
1678                output: Vec::new(),
1679                bit_pos: 0,
1680            }
1681        }
1682
1683        fn write_bits(&mut self, value: u32, count: usize) {
1684            for i in (0..count).rev() {
1685                if self.bit_pos.is_multiple_of(8) {
1686                    self.output.push(0);
1687                }
1688                if (value >> i) & 1 != 0 {
1689                    let idx = self.output.len() - 1;
1690                    self.output[idx] |= 1 << (7 - (self.bit_pos % 8));
1691                }
1692                self.bit_pos += 1;
1693            }
1694        }
1695
1696        fn finish(self) -> Vec<u8> {
1697            self.output
1698        }
1699    }
1700
1701    fn with_xor(mut payload: Vec<u8>) -> Vec<u8> {
1702        let checksum = payload.iter().fold(0u8, |acc, &byte| acc ^ byte);
1703        payload.insert(0, checksum);
1704        payload
1705    }
1706
1707    fn write_opcode(bits: &mut BitWriter, opcode: Opcode) {
1708        let value = opcode as u8;
1709        if value <= 7 {
1710            bits.write_bits(0, 1);
1711            bits.write_bits(u32::from(value), 3);
1712        } else {
1713            bits.write_bits(1, 1);
1714            bits.write_bits(u32::from(value - 8), 5);
1715        }
1716    }
1717
1718    fn write_reg(bits: &mut BitWriter, reg: u8) {
1719        bits.write_bits(1, 1);
1720        bits.write_bits(u32::from(reg), 3);
1721    }
1722
1723    fn write_number_immediate(bits: &mut BitWriter, value: u32) {
1724        bits.write_bits(0, 2);
1725        write_vm_number(bits, value);
1726    }
1727
1728    fn write_byte_immediate(bits: &mut BitWriter, value: u8) {
1729        bits.write_bits(0, 2);
1730        bits.write_bits(u32::from(value), 8);
1731    }
1732
1733    fn write_reg_indirect(bits: &mut BitWriter, reg: u8) {
1734        bits.write_bits(0b010, 3);
1735        bits.write_bits(u32::from(reg), 3);
1736    }
1737
1738    fn write_indexed(bits: &mut BitWriter, reg: u8, base: u32) {
1739        bits.write_bits(0b0110, 4);
1740        bits.write_bits(u32::from(reg), 3);
1741        write_vm_number(bits, base);
1742    }
1743
1744    fn write_absolute(bits: &mut BitWriter, address: u32) {
1745        bits.write_bits(0b0111, 4);
1746        write_vm_number(bits, address);
1747    }
1748
1749    fn write_vm_number(bits: &mut BitWriter, value: u32) {
1750        if value <= 15 {
1751            bits.write_bits(0, 2);
1752            bits.write_bits(value, 4);
1753        } else if value <= 255 {
1754            bits.write_bits(1, 2);
1755            bits.write_bits(value, 8);
1756        } else if value <= 0xffff {
1757            bits.write_bits(2, 2);
1758            bits.write_bits(value, 16);
1759        } else {
1760            bits.write_bits(3, 2);
1761            bits.write_bits(value, 32);
1762        }
1763    }
1764}