1use crate::{Error, Result};
2
3const MEMORY_SIZE: usize = 0x40000;
4const MEMORY_MASK: u32 = 0x3ffff;
5const GLOBAL_BASE: usize = 0x3c000;
6const SYSTEM_GLOBAL_SIZE: usize = 64;
7const MAX_USER_GLOBAL: usize = 0x2000 - SYSTEM_GLOBAL_SIZE;
8const MAX_STATIC_DATA: usize = MEMORY_SIZE - GLOBAL_BASE;
9const MAX_INSTRUCTIONS: usize = 25_000_000;
10const FLAG_C: u32 = 1;
11const FLAG_Z: u32 = 2;
12const FLAG_S: u32 = 0x8000_0000;
13
14#[derive(Debug, Clone, PartialEq, Eq)]
15pub struct Program {
16 pub static_data: Vec<u8>,
17 pub instructions: Vec<Instruction>,
18}
19
20#[derive(Debug, Clone, PartialEq, Eq)]
21pub struct Instruction {
22 pub opcode: Opcode,
23 pub byte_mode: bool,
24 pub operands: Vec<Operand>,
25}
26
27#[derive(Debug, Clone, Copy, PartialEq, Eq)]
28#[repr(u8)]
29pub enum Opcode {
30 Mov = 0,
31 Cmp = 1,
32 Add = 2,
33 Sub = 3,
34 Jz = 4,
35 Jnz = 5,
36 Inc = 6,
37 Dec = 7,
38 Jmp = 8,
39 Xor = 9,
40 And = 10,
41 Or = 11,
42 Test = 12,
43 Js = 13,
44 Jns = 14,
45 Jb = 15,
46 Jbe = 16,
47 Ja = 17,
48 Jae = 18,
49 Push = 19,
50 Pop = 20,
51 Call = 21,
52 Ret = 22,
53 Not = 23,
54 Shl = 24,
55 Shr = 25,
56 Sar = 26,
57 Neg = 27,
58 Pusha = 28,
59 Popa = 29,
60 Pushf = 30,
61 Popf = 31,
62 Movzx = 32,
63 Movsx = 33,
64 Xchg = 34,
65 Mul = 35,
66 Div = 36,
67 Adc = 37,
68 Sbb = 38,
69 Print = 39,
70}
71
72#[derive(Debug, Clone, PartialEq, Eq)]
73pub enum Operand {
74 Register(u8),
75 Immediate(u32),
76 RegisterIndirect(u8),
77 Indexed { register: u8, base: u32 },
78 Absolute(u32),
79}
80
81#[derive(Debug, Clone, PartialEq, Eq)]
82pub struct Invocation<'a> {
83 pub input: &'a [u8],
84 pub regs: [u32; 7],
85 pub global_data: &'a [u8],
86 pub file_offset: u64,
87 pub exec_count: u32,
88}
89
90#[derive(Debug, Clone, PartialEq, Eq)]
91pub struct ExecutionResult {
92 pub output: Vec<u8>,
93 pub globals: Vec<u8>,
94 pub regs: [u32; 8],
95}
96
97impl Program {
98 pub fn parse(blob: &[u8]) -> Result<Self> {
99 if blob.is_empty() {
100 return Err(Error::InvalidData("RARVM program blob is empty"));
101 }
102 if blob.iter().fold(0u8, |acc, &byte| acc ^ byte) != 0 {
103 return Err(Error::InvalidData("RARVM program checksum mismatch"));
104 }
105
106 let mut bits = BitReader::new(&blob[1..]);
107 let mut static_data = Vec::new();
108 if bits.read_bit()? != 0 {
109 let size = bits
110 .read_vm_number()?
111 .checked_add(1)
112 .ok_or(Error::InvalidData("RARVM static data size overflows"))?
113 as usize;
114 if size > MAX_STATIC_DATA {
115 return Err(Error::InvalidData("RARVM static data is too large"));
116 }
117 for _ in 0..size {
118 static_data.push(bits.read_bits(8)? as u8);
119 }
120 }
121
122 let mut instructions = Vec::new();
123 while bits.remaining_bits() >= 8 {
124 match parse_instruction(&mut bits, instructions.len()) {
125 Ok(instruction) => instructions.push(instruction),
126 Err(Error::NeedMoreInput) => break,
127 Err(error) => return Err(error),
128 }
129 }
130
131 if instructions
132 .last()
133 .is_none_or(|instruction| !instruction.opcode.is_unconditional_control_transfer())
134 {
135 instructions.push(Instruction {
136 opcode: Opcode::Ret,
137 byte_mode: false,
138 operands: Vec::new(),
139 });
140 }
141
142 Ok(Self {
143 static_data,
144 instructions,
145 })
146 }
147
148 pub fn execute(&self, invocation: Invocation<'_>) -> Result<ExecutionResult> {
149 let mut vm = Vm::new(self, invocation)?;
150 vm.run(self)
151 }
152}
153
154impl Opcode {
155 fn from_u8(value: u8) -> Result<Self> {
156 match value {
157 0 => Ok(Self::Mov),
158 1 => Ok(Self::Cmp),
159 2 => Ok(Self::Add),
160 3 => Ok(Self::Sub),
161 4 => Ok(Self::Jz),
162 5 => Ok(Self::Jnz),
163 6 => Ok(Self::Inc),
164 7 => Ok(Self::Dec),
165 8 => Ok(Self::Jmp),
166 9 => Ok(Self::Xor),
167 10 => Ok(Self::And),
168 11 => Ok(Self::Or),
169 12 => Ok(Self::Test),
170 13 => Ok(Self::Js),
171 14 => Ok(Self::Jns),
172 15 => Ok(Self::Jb),
173 16 => Ok(Self::Jbe),
174 17 => Ok(Self::Ja),
175 18 => Ok(Self::Jae),
176 19 => Ok(Self::Push),
177 20 => Ok(Self::Pop),
178 21 => Ok(Self::Call),
179 22 => Ok(Self::Ret),
180 23 => Ok(Self::Not),
181 24 => Ok(Self::Shl),
182 25 => Ok(Self::Shr),
183 26 => Ok(Self::Sar),
184 27 => Ok(Self::Neg),
185 28 => Ok(Self::Pusha),
186 29 => Ok(Self::Popa),
187 30 => Ok(Self::Pushf),
188 31 => Ok(Self::Popf),
189 32 => Ok(Self::Movzx),
190 33 => Ok(Self::Movsx),
191 34 => Ok(Self::Xchg),
192 35 => Ok(Self::Mul),
193 36 => Ok(Self::Div),
194 37 => Ok(Self::Adc),
195 38 => Ok(Self::Sbb),
196 39 => Ok(Self::Print),
197 _ => Err(Error::InvalidData("RARVM opcode is invalid")),
198 }
199 }
200
201 fn operand_count(self) -> usize {
202 match self {
203 Self::Ret | Self::Pusha | Self::Popa | Self::Pushf | Self::Popf | Self::Print => 0,
204 Self::Jz
205 | Self::Jnz
206 | Self::Inc
207 | Self::Dec
208 | Self::Jmp
209 | Self::Js
210 | Self::Jns
211 | Self::Jb
212 | Self::Jbe
213 | Self::Ja
214 | Self::Jae
215 | Self::Push
216 | Self::Pop
217 | Self::Call
218 | Self::Not
219 | Self::Neg => 1,
220 Self::Mov
221 | Self::Cmp
222 | Self::Add
223 | Self::Sub
224 | Self::Xor
225 | Self::And
226 | Self::Or
227 | Self::Test
228 | Self::Shl
229 | Self::Shr
230 | Self::Sar
231 | Self::Movzx
232 | Self::Movsx
233 | Self::Xchg
234 | Self::Mul
235 | Self::Div
236 | Self::Adc
237 | Self::Sbb => 2,
238 }
239 }
240
241 fn supports_byte_mode(self) -> bool {
242 matches!(
243 self,
244 Self::Mov
245 | Self::Cmp
246 | Self::Add
247 | Self::Sub
248 | Self::Inc
249 | Self::Dec
250 | Self::Xor
251 | Self::And
252 | Self::Or
253 | Self::Test
254 | Self::Not
255 | Self::Shl
256 | Self::Shr
257 | Self::Sar
258 | Self::Neg
259 | Self::Xchg
260 | Self::Mul
261 | Self::Div
262 | Self::Adc
263 | Self::Sbb
264 )
265 }
266
267 fn is_jump_or_call(self) -> bool {
268 matches!(
269 self,
270 Self::Jz
271 | Self::Jnz
272 | Self::Jmp
273 | Self::Js
274 | Self::Jns
275 | Self::Jb
276 | Self::Jbe
277 | Self::Ja
278 | Self::Jae
279 | Self::Call
280 )
281 }
282
283 fn is_unconditional_control_transfer(self) -> bool {
284 matches!(self, Self::Jmp | Self::Ret)
285 }
286}
287
288fn parse_instruction(bits: &mut BitReader<'_>, instruction_index: usize) -> Result<Instruction> {
289 let opcode = if bits.read_bit()? == 0 {
290 Opcode::from_u8(bits.read_bits(3)? as u8)?
291 } else {
292 Opcode::from_u8(bits.read_bits(5)? as u8 + 8)?
293 };
294 let byte_mode = opcode.supports_byte_mode() && bits.read_bit()? != 0;
295 let mut operands = Vec::with_capacity(opcode.operand_count());
296 for operand_index in 0..opcode.operand_count() {
297 let mut operand = parse_operand(bits, byte_mode)?;
298 if operand_index == 0 && opcode.is_jump_or_call() {
299 if let Operand::Immediate(value) = operand {
300 operand = Operand::Immediate(remap_jump_target(value, instruction_index));
301 }
302 }
303 operands.push(operand);
304 }
305 Ok(Instruction {
306 opcode,
307 byte_mode,
308 operands,
309 })
310}
311
312fn parse_operand(bits: &mut BitReader<'_>, byte_mode: bool) -> Result<Operand> {
313 if bits.read_bit()? != 0 {
314 return Ok(Operand::Register(bits.read_bits(3)? as u8));
315 }
316 if bits.read_bit()? == 0 {
317 return if byte_mode {
318 Ok(Operand::Immediate(bits.read_bits(8)?))
319 } else {
320 Ok(Operand::Immediate(bits.read_vm_number()?))
321 };
322 }
323 if bits.read_bit()? == 0 {
324 return Ok(Operand::RegisterIndirect(bits.read_bits(3)? as u8));
325 }
326 if bits.read_bit()? == 0 {
327 Ok(Operand::Indexed {
328 register: bits.read_bits(3)? as u8,
329 base: bits.read_vm_number()?,
330 })
331 } else {
332 Ok(Operand::Absolute(bits.read_vm_number()?))
333 }
334}
335
336fn remap_jump_target(value: u32, instruction_index: usize) -> u32 {
337 if value >= 256 {
338 return value - 256;
339 }
340
341 let mut distance = value as i64;
342 if distance >= 136 {
343 distance -= 264;
344 } else if distance >= 16 {
345 distance -= 8;
346 } else if distance >= 8 {
347 distance -= 16;
348 }
349 (instruction_index as i64).wrapping_add(distance) as u32
350}
351
352struct Vm {
353 memory: Vec<u8>,
354 regs: [u32; 8],
355 flags: u32,
356}
357
358impl Vm {
359 fn new(program: &Program, invocation: Invocation<'_>) -> Result<Self> {
360 if invocation.input.len() > GLOBAL_BASE {
361 return Err(Error::InvalidData("RARVM filter input is too large"));
362 }
363
364 let mut memory = vec![0u8; MEMORY_SIZE];
365 memory[..invocation.input.len()].copy_from_slice(invocation.input);
366 let global_len = invocation.global_data.len().min(0x2000);
367 memory[GLOBAL_BASE..GLOBAL_BASE + global_len]
368 .copy_from_slice(&invocation.global_data[..global_len]);
369 let static_start = GLOBAL_BASE + global_len;
370 let static_len = program
371 .static_data
372 .len()
373 .min(MEMORY_SIZE.saturating_sub(static_start));
374 memory[static_start..static_start + static_len]
375 .copy_from_slice(&program.static_data[..static_len]);
376
377 write_u32(
378 &mut memory,
379 GLOBAL_BASE + 0x1c,
380 invocation.input.len() as u32,
381 );
382 write_u32(&mut memory, GLOBAL_BASE + 0x20, 0);
383 write_u32(
384 &mut memory,
385 GLOBAL_BASE + 0x24,
386 invocation.file_offset as u32,
387 );
388 write_u32(
389 &mut memory,
390 GLOBAL_BASE + 0x28,
391 (invocation.file_offset >> 32) as u32,
392 );
393 write_u32(&mut memory, GLOBAL_BASE + 0x2c, invocation.exec_count);
394
395 let mut regs = [0u32; 8];
396 regs[..7].copy_from_slice(&invocation.regs);
397 regs[3] = GLOBAL_BASE as u32;
398 regs[4] = invocation.input.len() as u32;
399 regs[5] = invocation.exec_count;
400 regs[6] = invocation.file_offset as u32;
401 regs[7] = MEMORY_SIZE as u32;
402
403 Ok(Self {
404 memory,
405 regs,
406 flags: 0,
407 })
408 }
409
410 fn run(&mut self, program: &Program) -> Result<ExecutionResult> {
411 let mut ip = 0usize;
412 let mut terminated = false;
413 for _ in 0..MAX_INSTRUCTIONS {
414 let Some(instruction) = program.instructions.get(ip) else {
415 terminated = true;
416 break;
417 };
418 ip += 1;
419 if let Some(next_ip) = self.execute_instruction(instruction, ip)? {
420 if next_ip >= program.instructions.len() {
421 terminated = true;
422 break;
423 }
424 ip = next_ip;
425 }
426 if instruction.opcode == Opcode::Ret && self.regs[7] >= MEMORY_SIZE as u32 {
427 terminated = true;
428 break;
429 }
430 }
431 if !terminated {
432 return Err(Error::InvalidData("RARVM instruction limit exceeded"));
433 }
434
435 let mut output_pos = self.read_u32(GLOBAL_BASE + 0x20) as usize & MEMORY_MASK as usize;
436 let mut output_size = self.read_u32(GLOBAL_BASE + 0x1c) as usize & MEMORY_MASK as usize;
437 if output_pos
438 .checked_add(output_size)
439 .is_none_or(|end| end > MEMORY_SIZE)
440 {
441 output_pos = 0;
442 output_size = 0;
443 }
444 let output = self.memory[output_pos..output_pos + output_size].to_vec();
445
446 let user_global = (self.read_u32(GLOBAL_BASE + 0x30) as usize).min(MAX_USER_GLOBAL);
447 let globals =
448 self.memory[GLOBAL_BASE..GLOBAL_BASE + SYSTEM_GLOBAL_SIZE + user_global].to_vec();
449 Ok(ExecutionResult {
450 output,
451 globals,
452 regs: self.regs,
453 })
454 }
455
456 fn execute_instruction(
457 &mut self,
458 instruction: &Instruction,
459 ip: usize,
460 ) -> Result<Option<usize>> {
461 let byte_mode = instruction.byte_mode;
462 let op = |index| {
463 instruction
464 .operands
465 .get(index)
466 .ok_or(Error::InvalidData("RARVM instruction operand is missing"))
467 };
468 match instruction.opcode {
469 Opcode::Mov => {
470 let value = self.read_operand(op(1)?, byte_mode);
471 self.write_operand(op(0)?, value, byte_mode)?;
472 }
473 Opcode::Cmp => {
474 let a = self.read_operand(op(0)?, byte_mode);
475 let b = self.read_operand(op(1)?, byte_mode);
476 self.set_sub_flags(a, b, 0, byte_mode);
477 }
478 Opcode::Add => {
479 let a = self.read_operand(op(0)?, byte_mode);
480 let b = self.read_operand(op(1)?, byte_mode);
481 let result = self.mask_width(a.wrapping_add(b), byte_mode);
482 self.write_operand(op(0)?, result, byte_mode)?;
483 self.set_add_flags(a, b, 0, result, byte_mode);
484 }
485 Opcode::Sub => {
486 let a = self.read_operand(op(0)?, byte_mode);
487 let b = self.read_operand(op(1)?, byte_mode);
488 let result = self.mask_width(a.wrapping_sub(b), byte_mode);
489 self.write_operand(op(0)?, result, byte_mode)?;
490 self.set_sub_flags(a, b, 0, byte_mode);
491 }
492 Opcode::Jz => return Ok(self.conditional_jump(op(0)?, self.flags & FLAG_Z != 0)),
493 Opcode::Jnz => return Ok(self.conditional_jump(op(0)?, self.flags & FLAG_Z == 0)),
494 Opcode::Inc => {
495 let value = self.read_operand(op(0)?, byte_mode).wrapping_add(1);
496 let result = self.mask_width(value, byte_mode);
497 self.write_operand(op(0)?, result, byte_mode)?;
498 self.set_zs(result, byte_mode);
499 }
500 Opcode::Dec => {
501 let value = self.read_operand(op(0)?, byte_mode).wrapping_sub(1);
502 let result = self.mask_width(value, byte_mode);
503 self.write_operand(op(0)?, result, byte_mode)?;
504 self.set_zs(result, byte_mode);
505 }
506 Opcode::Jmp => return Ok(Some(self.read_operand(op(0)?, false) as usize)),
507 Opcode::Xor | Opcode::And | Opcode::Or | Opcode::Test => {
508 let a = self.read_operand(op(0)?, byte_mode);
509 let b = self.read_operand(op(1)?, byte_mode);
510 let result = match instruction.opcode {
511 Opcode::Xor => a ^ b,
512 Opcode::And | Opcode::Test => a & b,
513 Opcode::Or => a | b,
514 _ => unreachable!(),
515 };
516 let result = self.mask_width(result, byte_mode);
517 if instruction.opcode != Opcode::Test {
518 self.write_operand(op(0)?, result, byte_mode)?;
519 }
520 self.set_zs(result, byte_mode);
521 }
522 Opcode::Js => return Ok(self.conditional_jump(op(0)?, self.flags & FLAG_S != 0)),
523 Opcode::Jns => return Ok(self.conditional_jump(op(0)?, self.flags & FLAG_S == 0)),
524 Opcode::Jb => return Ok(self.conditional_jump(op(0)?, self.flags & FLAG_C != 0)),
525 Opcode::Jbe => {
526 return Ok(self.conditional_jump(op(0)?, self.flags & (FLAG_C | FLAG_Z) != 0));
527 }
528 Opcode::Ja => {
529 return Ok(self.conditional_jump(op(0)?, self.flags & (FLAG_C | FLAG_Z) == 0));
530 }
531 Opcode::Jae => return Ok(self.conditional_jump(op(0)?, self.flags & FLAG_C == 0)),
532 Opcode::Push => self.push(self.read_operand(op(0)?, false)),
533 Opcode::Pop => {
534 let value = self.pop();
535 self.write_operand(op(0)?, value, false)?;
536 }
537 Opcode::Call => {
538 self.push(ip as u32);
539 return Ok(Some(self.read_operand(op(0)?, false) as usize));
540 }
541 Opcode::Ret => {
542 if self.regs[7] >= MEMORY_SIZE as u32 {
543 return Ok(Some(usize::MAX));
544 }
545 return Ok(Some(self.pop() as usize));
546 }
547 Opcode::Not => {
548 let result = self.mask_width(!self.read_operand(op(0)?, byte_mode), byte_mode);
549 self.write_operand(op(0)?, result, byte_mode)?;
550 }
551 Opcode::Shl | Opcode::Shr | Opcode::Sar => {
552 self.shift(
553 instruction.opcode,
554 op(0)?,
555 self.read_operand(op(1)?, byte_mode),
556 byte_mode,
557 )?;
558 }
559 Opcode::Neg => {
560 let value = self.read_operand(op(0)?, byte_mode);
561 let result = self.mask_width(0u32.wrapping_sub(value), byte_mode);
562 self.write_operand(op(0)?, result, byte_mode)?;
563 if result == 0 {
564 self.flags = FLAG_Z;
565 } else {
566 self.flags = FLAG_C | (result & self.sign_bit(byte_mode));
567 }
568 }
569 Opcode::Pusha => {
570 let regs = self.regs;
571 for value in regs {
572 self.push(value);
573 }
574 }
575 Opcode::Popa => {
576 let mut stack = self.regs[7];
577 for index in (0..8).rev() {
578 self.regs[index] = self.read_mem(stack, false);
579 stack = stack.wrapping_add(4);
580 }
581 }
582 Opcode::Pushf => self.push(self.flags),
583 Opcode::Popf => self.flags = self.pop(),
584 Opcode::Movzx => {
585 let value = self.read_operand(op(1)?, true) & 0xff;
586 self.write_operand(op(0)?, value, false)?;
587 }
588 Opcode::Movsx => {
589 let value = self.read_operand(op(1)?, true) as u8 as i8 as i32 as u32;
590 self.write_operand(op(0)?, value, false)?;
591 }
592 Opcode::Xchg => {
593 let a = self.read_operand(op(0)?, byte_mode);
594 let b = self.read_operand(op(1)?, byte_mode);
595 self.write_operand(op(0)?, b, byte_mode)?;
596 self.write_operand(op(1)?, a, byte_mode)?;
597 }
598 Opcode::Mul => {
599 let result = self
600 .read_operand(op(0)?, byte_mode)
601 .wrapping_mul(self.read_operand(op(1)?, byte_mode));
602 self.write_operand(op(0)?, self.mask_width(result, byte_mode), byte_mode)?;
603 }
604 Opcode::Div => {
605 let divisor = self.read_operand(op(1)?, byte_mode);
606 if let Some(result) = self.read_operand(op(0)?, byte_mode).checked_div(divisor) {
607 self.write_operand(op(0)?, result, byte_mode)?;
608 }
609 }
610 Opcode::Adc | Opcode::Sbb => {
611 let a = self.read_operand(op(0)?, byte_mode);
612 let b = self.read_operand(op(1)?, byte_mode);
613 let carry = u32::from(self.flags & FLAG_C != 0);
614 let result = if instruction.opcode == Opcode::Adc {
615 self.mask_width(a.wrapping_add(b).wrapping_add(carry), byte_mode)
616 } else {
617 self.mask_width(a.wrapping_sub(b).wrapping_sub(carry), byte_mode)
618 };
619 self.write_operand(op(0)?, result, byte_mode)?;
620 if instruction.opcode == Opcode::Adc {
621 self.set_add_flags(a, b, carry, result, byte_mode);
622 } else {
623 self.set_sub_flags(a, b, carry, byte_mode);
624 }
625 }
626 Opcode::Print => {}
627 }
628 Ok(None)
629 }
630
631 fn conditional_jump(&self, operand: &Operand, condition: bool) -> Option<usize> {
632 condition.then_some(self.read_operand(operand, false) as usize)
633 }
634
635 fn read_operand(&self, operand: &Operand, byte_mode: bool) -> u32 {
636 match *operand {
637 Operand::Register(index) => {
638 let value = self.regs[index as usize];
639 if byte_mode {
640 value & 0xff
641 } else {
642 value
643 }
644 }
645 Operand::Immediate(value) => self.mask_width(value, byte_mode),
646 Operand::RegisterIndirect(index) => self.read_mem(self.regs[index as usize], byte_mode),
647 Operand::Indexed { register, base } => {
648 self.read_mem(base.wrapping_add(self.regs[register as usize]), byte_mode)
649 }
650 Operand::Absolute(address) => self.read_mem(address, byte_mode),
651 }
652 }
653
654 fn write_operand(&mut self, operand: &Operand, value: u32, byte_mode: bool) -> Result<()> {
655 match *operand {
656 Operand::Register(index) => {
657 let slot = &mut self.regs[index as usize];
658 if byte_mode {
659 *slot = (*slot & 0xffff_ff00) | (value & 0xff);
660 } else {
661 *slot = value;
662 }
663 }
664 Operand::RegisterIndirect(index) => {
665 self.write_mem(self.regs[index as usize], value, byte_mode)
666 }
667 Operand::Indexed { register, base } => {
668 self.write_mem(
669 base.wrapping_add(self.regs[register as usize]),
670 value,
671 byte_mode,
672 );
673 }
674 Operand::Absolute(address) => self.write_mem(address, value, byte_mode),
675 Operand::Immediate(_) => {
676 return Err(Error::InvalidData("RARVM write to immediate operand"))
677 }
678 }
679 Ok(())
680 }
681
682 fn read_mem(&self, address: u32, byte_mode: bool) -> u32 {
683 let address = address & MEMORY_MASK;
684 if byte_mode {
685 u32::from(self.memory[address as usize])
686 } else {
687 self.read_u32(address as usize)
688 }
689 }
690
691 fn write_mem(&mut self, address: u32, value: u32, byte_mode: bool) {
692 let address = address & MEMORY_MASK;
693 if byte_mode {
694 self.memory[address as usize] = value as u8;
695 } else {
696 write_u32(&mut self.memory, address as usize, value);
697 }
698 }
699
700 fn read_u32(&self, address: usize) -> u32 {
701 let address = address as u32;
702 u32::from_le_bytes([
703 self.memory[(address & MEMORY_MASK) as usize],
704 self.memory[(address.wrapping_add(1) & MEMORY_MASK) as usize],
705 self.memory[(address.wrapping_add(2) & MEMORY_MASK) as usize],
706 self.memory[(address.wrapping_add(3) & MEMORY_MASK) as usize],
707 ])
708 }
709
710 fn push(&mut self, value: u32) {
711 self.regs[7] = self.regs[7].wrapping_sub(4);
712 self.write_mem(self.regs[7], value, false);
713 }
714
715 fn pop(&mut self) -> u32 {
716 let value = self.read_mem(self.regs[7], false);
717 self.regs[7] = self.regs[7].wrapping_add(4);
718 value
719 }
720
721 fn shift(&mut self, opcode: Opcode, dst: &Operand, count: u32, byte_mode: bool) -> Result<()> {
722 if count == 0 {
723 return Ok(());
724 }
725 let width = if byte_mode { 8 } else { 32 };
726 let count = count.min(width);
727 let value = self.read_operand(dst, byte_mode);
728 let result = match opcode {
729 Opcode::Shl => {
730 if count == width {
731 0
732 } else {
733 value.wrapping_shl(count)
734 }
735 }
736 Opcode::Shr => {
737 if count == width {
738 0
739 } else {
740 value.wrapping_shr(count)
741 }
742 }
743 Opcode::Sar => {
744 if byte_mode {
745 if count >= 8 {
746 if value & 0x80 != 0 {
747 0xff
748 } else {
749 0
750 }
751 } else {
752 ((value as u8 as i8) >> count) as u8 as u32
753 }
754 } else if count >= 32 {
755 if value & 0x8000_0000 != 0 {
756 u32::MAX
757 } else {
758 0
759 }
760 } else {
761 ((value as i32) >> count) as u32
762 }
763 }
764 _ => unreachable!(),
765 };
766 let carry = match opcode {
767 Opcode::Shl => value & (1 << (width - count)) != 0,
768 Opcode::Shr | Opcode::Sar => value & (1 << (count - 1)) != 0,
769 _ => unreachable!(),
770 };
771 let result = self.mask_width(result, byte_mode);
772 self.write_operand(dst, result, byte_mode)?;
773 self.set_zsc(result, carry, byte_mode);
774 Ok(())
775 }
776
777 fn set_add_flags(&mut self, a: u32, b: u32, carry: u32, result: u32, byte_mode: bool) {
778 let mask = self.value_mask(byte_mode) as u64;
779 let sum = (a as u64 & mask) + (b as u64 & mask) + u64::from(carry);
780 self.set_zsc(result, sum > mask, byte_mode);
781 }
782
783 fn set_sub_flags(&mut self, a: u32, b: u32, borrow: u32, byte_mode: bool) {
784 let mask = self.value_mask(byte_mode) as u64;
785 let a = a as u64 & mask;
786 let subtrahend = (b as u64 & mask) + u64::from(borrow);
787 let result = self.mask_width((a as u32).wrapping_sub(subtrahend as u32), byte_mode);
788 self.set_zsc(result, a < subtrahend, byte_mode);
789 }
790
791 fn set_zs(&mut self, result: u32, byte_mode: bool) {
792 self.flags = if result == 0 {
793 FLAG_Z
794 } else {
795 result & self.sign_bit(byte_mode)
796 };
797 }
798
799 fn set_zsc(&mut self, result: u32, carry: bool, byte_mode: bool) {
800 self.set_zs(result, byte_mode);
801 if carry {
802 self.flags |= FLAG_C;
803 }
804 }
805
806 fn mask_width(&self, value: u32, byte_mode: bool) -> u32 {
807 value & self.value_mask(byte_mode)
808 }
809
810 fn value_mask(&self, byte_mode: bool) -> u32 {
811 if byte_mode {
812 0xff
813 } else {
814 u32::MAX
815 }
816 }
817
818 fn sign_bit(&self, byte_mode: bool) -> u32 {
819 if byte_mode {
820 0x80
821 } else {
822 FLAG_S
823 }
824 }
825}
826
827fn write_u32(memory: &mut [u8], address: usize, value: u32) {
828 let address = address as u32;
829 for (offset, byte) in value.to_le_bytes().into_iter().enumerate() {
830 memory[(address.wrapping_add(offset as u32) & MEMORY_MASK) as usize] = byte;
831 }
832}
833
834#[derive(Debug, Clone)]
835struct BitReader<'a> {
836 input: &'a [u8],
837 bit_pos: usize,
838}
839
840impl<'a> BitReader<'a> {
841 fn new(input: &'a [u8]) -> Self {
842 Self { input, bit_pos: 0 }
843 }
844
845 fn remaining_bits(&self) -> usize {
846 self.input.len() * 8 - self.bit_pos
847 }
848
849 fn read_bit(&mut self) -> Result<u32> {
850 self.read_bits(1)
851 }
852
853 fn read_bits(&mut self, count: usize) -> Result<u32> {
854 if count > 32 {
855 return Err(Error::InvalidData("RARVM bit read is too wide"));
856 }
857 if self.remaining_bits() < count {
858 return Err(Error::NeedMoreInput);
859 }
860 let mut value = 0;
861 for _ in 0..count {
862 let byte = self.input[self.bit_pos / 8];
863 let bit = (byte >> (7 - (self.bit_pos % 8))) & 1;
864 value = (value << 1) | u32::from(bit);
865 self.bit_pos += 1;
866 }
867 Ok(value)
868 }
869
870 fn read_vm_number(&mut self) -> Result<u32> {
871 match self.read_bits(2)? {
872 0 => self.read_bits(4),
873 1 => {
874 let high = self.read_bits(8)?;
875 if high >= 16 {
876 Ok(high)
877 } else {
878 Ok(0xffff_ff00 | (high << 4) | self.read_bits(4)?)
879 }
880 }
881 2 => self.read_bits(16),
882 3 => self.read_bits(32),
883 _ => unreachable!(),
884 }
885 }
886}
887
888#[cfg(test)]
889mod tests {
890 use super::*;
891
892 #[test]
893 fn rejects_bad_xor_checksum() {
894 assert_eq!(
895 Program::parse(&[0x12, 0x34]),
896 Err(Error::InvalidData("RARVM program checksum mismatch"))
897 );
898 }
899
900 #[test]
901 fn parses_static_data_and_appends_implicit_ret() {
902 let mut bits = BitWriter::new();
903 bits.write_bits(1, 1);
904 write_vm_number(&mut bits, 2);
905 bits.write_bits(0xaa, 8);
906 bits.write_bits(0xbb, 8);
907 bits.write_bits(0xcc, 8);
908 let program = Program::parse(&with_xor(bits.finish())).unwrap();
909
910 assert_eq!(program.static_data, [0xaa, 0xbb, 0xcc]);
911 assert_eq!(
912 program.instructions,
913 [Instruction {
914 opcode: Opcode::Ret,
915 byte_mode: false,
916 operands: Vec::new(),
917 }]
918 );
919 }
920
921 #[test]
922 fn parses_register_immediate_and_memory_operands() {
923 let mut bits = BitWriter::new();
924 bits.write_bits(0, 1);
925 write_opcode(&mut bits, Opcode::Mov);
926 bits.write_bits(0, 1);
927 write_reg(&mut bits, 2);
928 write_number_immediate(&mut bits, 0x1234);
929 write_opcode(&mut bits, Opcode::Add);
930 bits.write_bits(1, 1);
931 write_reg_indirect(&mut bits, 3);
932 write_byte_immediate(&mut bits, 0x7f);
933 write_opcode(&mut bits, Opcode::Sub);
934 bits.write_bits(0, 1);
935 write_indexed(&mut bits, 1, 0x44);
936 write_absolute(&mut bits, 0x3c000);
937 write_opcode(&mut bits, Opcode::Ret);
938
939 let program = Program::parse(&with_xor(bits.finish())).unwrap();
940 assert_eq!(program.static_data, []);
941 assert_eq!(program.instructions.len(), 4);
942 assert_eq!(program.instructions[0].opcode, Opcode::Mov);
943 assert!(!program.instructions[0].byte_mode);
944 assert_eq!(
945 program.instructions[0].operands,
946 [Operand::Register(2), Operand::Immediate(0x1234)]
947 );
948 assert_eq!(program.instructions[1].opcode, Opcode::Add);
949 assert!(program.instructions[1].byte_mode);
950 assert_eq!(
951 program.instructions[1].operands,
952 [Operand::RegisterIndirect(3), Operand::Immediate(0x7f)]
953 );
954 assert_eq!(
955 program.instructions[2].operands,
956 [
957 Operand::Indexed {
958 register: 1,
959 base: 0x44,
960 },
961 Operand::Absolute(0x3c000),
962 ]
963 );
964 assert_eq!(program.instructions[3].opcode, Opcode::Ret);
965 }
966
967 #[test]
968 fn remaps_jump_immediates_to_instruction_indices() {
969 let mut bits = BitWriter::new();
970 bits.write_bits(0, 1);
971 write_opcode(&mut bits, Opcode::Print);
972 write_opcode(&mut bits, Opcode::Jmp);
973 write_number_immediate(&mut bits, 15);
974
975 let program = Program::parse(&with_xor(bits.finish())).unwrap();
976 assert_eq!(program.instructions.len(), 2);
977 assert_eq!(
978 program.instructions[1],
979 Instruction {
980 opcode: Opcode::Jmp,
981 byte_mode: false,
982 operands: vec![Operand::Immediate(0)],
983 }
984 );
985 }
986
987 #[test]
988 fn executes_arithmetic_and_memory_writes() {
989 let program = Program {
990 static_data: Vec::new(),
991 instructions: vec![
992 Instruction {
993 opcode: Opcode::Mov,
994 byte_mode: false,
995 operands: vec![Operand::Register(0), Operand::Immediate(7)],
996 },
997 Instruction {
998 opcode: Opcode::Add,
999 byte_mode: false,
1000 operands: vec![Operand::Register(0), Operand::Immediate(5)],
1001 },
1002 Instruction {
1003 opcode: Opcode::Mov,
1004 byte_mode: true,
1005 operands: vec![Operand::Absolute(0), Operand::Register(0)],
1006 },
1007 Instruction {
1008 opcode: Opcode::Ret,
1009 byte_mode: false,
1010 operands: Vec::new(),
1011 },
1012 ],
1013 };
1014
1015 let result = program
1016 .execute(Invocation {
1017 input: &[0],
1018 regs: [0; 7],
1019 global_data: &[],
1020 file_offset: 0,
1021 exec_count: 0,
1022 })
1023 .unwrap();
1024
1025 assert_eq!(result.output, [12]);
1026 assert_eq!(result.regs[0], 12);
1027 }
1028
1029 #[test]
1030 fn executes_conditional_jump_and_stack_call() {
1031 let program = Program {
1032 static_data: Vec::new(),
1033 instructions: vec![
1034 Instruction {
1035 opcode: Opcode::Mov,
1036 byte_mode: false,
1037 operands: vec![Operand::Register(0), Operand::Immediate(1)],
1038 },
1039 Instruction {
1040 opcode: Opcode::Cmp,
1041 byte_mode: false,
1042 operands: vec![Operand::Register(0), Operand::Immediate(1)],
1043 },
1044 Instruction {
1045 opcode: Opcode::Jz,
1046 byte_mode: false,
1047 operands: vec![Operand::Immediate(4)],
1048 },
1049 Instruction {
1050 opcode: Opcode::Mov,
1051 byte_mode: false,
1052 operands: vec![Operand::Register(0), Operand::Immediate(99)],
1053 },
1054 Instruction {
1055 opcode: Opcode::Call,
1056 byte_mode: false,
1057 operands: vec![Operand::Immediate(6)],
1058 },
1059 Instruction {
1060 opcode: Opcode::Ret,
1061 byte_mode: false,
1062 operands: Vec::new(),
1063 },
1064 Instruction {
1065 opcode: Opcode::Add,
1066 byte_mode: false,
1067 operands: vec![Operand::Register(0), Operand::Immediate(41)],
1068 },
1069 Instruction {
1070 opcode: Opcode::Ret,
1071 byte_mode: false,
1072 operands: Vec::new(),
1073 },
1074 ],
1075 };
1076
1077 let result = program
1078 .execute(Invocation {
1079 input: &[0],
1080 regs: [0; 7],
1081 global_data: &[],
1082 file_offset: 0,
1083 exec_count: 0,
1084 })
1085 .unwrap();
1086
1087 assert_eq!(result.regs[0], 42);
1088 }
1089
1090 #[test]
1091 fn executes_unconditional_jumps_and_mutating_unary_ops() {
1092 let result = execute_instructions(vec![
1093 instr(
1094 Opcode::Mov,
1095 false,
1096 vec![Operand::Register(0), Operand::Immediate(1)],
1097 ),
1098 instr(Opcode::Inc, false, vec![Operand::Register(0)]),
1099 instr(Opcode::Dec, false, vec![Operand::Register(0)]),
1100 instr(Opcode::Not, false, vec![Operand::Register(0)]),
1101 instr(Opcode::Neg, false, vec![Operand::Register(0)]),
1102 instr(Opcode::Jmp, false, vec![Operand::Immediate(7)]),
1103 instr(
1104 Opcode::Mov,
1105 false,
1106 vec![Operand::Register(0), Operand::Immediate(99)],
1107 ),
1108 instr(Opcode::Ret, false, Vec::new()),
1109 ]);
1110
1111 assert_eq!(result.regs[0], 2);
1112 }
1113
1114 #[test]
1115 fn executes_logic_ops_and_test_without_writing_destination() {
1116 let result = execute_instructions(vec![
1117 instr(
1118 Opcode::Mov,
1119 false,
1120 vec![Operand::Register(0), Operand::Immediate(0b1010)],
1121 ),
1122 instr(
1123 Opcode::Xor,
1124 false,
1125 vec![Operand::Register(0), Operand::Immediate(0b1100)],
1126 ),
1127 instr(
1128 Opcode::And,
1129 false,
1130 vec![Operand::Register(0), Operand::Immediate(0b0110)],
1131 ),
1132 instr(
1133 Opcode::Or,
1134 false,
1135 vec![Operand::Register(0), Operand::Immediate(0b0001)],
1136 ),
1137 instr(
1138 Opcode::Test,
1139 false,
1140 vec![Operand::Register(0), Operand::Immediate(0b0100)],
1141 ),
1142 instr(Opcode::Jnz, false, vec![Operand::Immediate(7)]),
1143 instr(
1144 Opcode::Mov,
1145 false,
1146 vec![Operand::Register(0), Operand::Immediate(99)],
1147 ),
1148 instr(Opcode::Ret, false, Vec::new()),
1149 ]);
1150
1151 assert_eq!(result.regs[0], 0b0111);
1152 }
1153
1154 #[test]
1155 fn executes_unsigned_conditional_jumps() {
1156 let result = execute_instructions(vec![
1157 instr(
1158 Opcode::Mov,
1159 false,
1160 vec![Operand::Register(0), Operand::Immediate(0)],
1161 ),
1162 instr(
1163 Opcode::Cmp,
1164 false,
1165 vec![Operand::Immediate(1), Operand::Immediate(2)],
1166 ),
1167 instr(Opcode::Jb, false, vec![Operand::Immediate(5)]),
1168 instr(
1169 Opcode::Mov,
1170 false,
1171 vec![Operand::Register(0), Operand::Immediate(99)],
1172 ),
1173 instr(Opcode::Ret, false, Vec::new()),
1174 instr(Opcode::Jbe, false, vec![Operand::Immediate(7)]),
1175 instr(
1176 Opcode::Mov,
1177 false,
1178 vec![Operand::Register(0), Operand::Immediate(98)],
1179 ),
1180 instr(
1181 Opcode::Cmp,
1182 false,
1183 vec![Operand::Immediate(3), Operand::Immediate(2)],
1184 ),
1185 instr(Opcode::Ja, false, vec![Operand::Immediate(10)]),
1186 instr(
1187 Opcode::Mov,
1188 false,
1189 vec![Operand::Register(0), Operand::Immediate(97)],
1190 ),
1191 instr(
1192 Opcode::Cmp,
1193 false,
1194 vec![Operand::Immediate(3), Operand::Immediate(2)],
1195 ),
1196 instr(Opcode::Jae, false, vec![Operand::Immediate(13)]),
1197 instr(
1198 Opcode::Mov,
1199 false,
1200 vec![Operand::Register(0), Operand::Immediate(96)],
1201 ),
1202 instr(
1203 Opcode::Mov,
1204 false,
1205 vec![Operand::Register(0), Operand::Immediate(42)],
1206 ),
1207 instr(Opcode::Ret, false, Vec::new()),
1208 ]);
1209
1210 assert_eq!(result.regs[0], 42);
1211 }
1212
1213 #[test]
1214 fn executes_signed_conditional_jumps() {
1215 let result = execute_instructions(vec![
1216 instr(
1217 Opcode::Mov,
1218 false,
1219 vec![Operand::Register(0), Operand::Immediate(0)],
1220 ),
1221 instr(
1222 Opcode::Sub,
1223 false,
1224 vec![Operand::Register(0), Operand::Immediate(1)],
1225 ),
1226 instr(Opcode::Js, false, vec![Operand::Immediate(5)]),
1227 instr(
1228 Opcode::Mov,
1229 false,
1230 vec![Operand::Register(1), Operand::Immediate(99)],
1231 ),
1232 instr(Opcode::Ret, false, Vec::new()),
1233 instr(
1234 Opcode::Add,
1235 false,
1236 vec![Operand::Register(0), Operand::Immediate(1)],
1237 ),
1238 instr(Opcode::Jns, false, vec![Operand::Immediate(8)]),
1239 instr(
1240 Opcode::Mov,
1241 false,
1242 vec![Operand::Register(1), Operand::Immediate(98)],
1243 ),
1244 instr(
1245 Opcode::Mov,
1246 false,
1247 vec![Operand::Register(1), Operand::Immediate(42)],
1248 ),
1249 instr(Opcode::Ret, false, Vec::new()),
1250 ]);
1251
1252 assert_eq!(result.regs[0], 0);
1253 assert_eq!(result.regs[1], 42);
1254 }
1255
1256 #[test]
1257 fn executes_stack_register_and_flag_round_trips() {
1258 let result = execute_instructions(vec![
1259 instr(
1260 Opcode::Mov,
1261 false,
1262 vec![Operand::Register(0), Operand::Immediate(10)],
1263 ),
1264 instr(Opcode::Push, false, vec![Operand::Register(0)]),
1265 instr(
1266 Opcode::Mov,
1267 false,
1268 vec![Operand::Register(0), Operand::Immediate(0)],
1269 ),
1270 instr(Opcode::Pop, false, vec![Operand::Register(1)]),
1271 instr(
1272 Opcode::Mov,
1273 false,
1274 vec![Operand::Register(0), Operand::Immediate(10)],
1275 ),
1276 instr(Opcode::Pusha, false, Vec::new()),
1277 instr(
1278 Opcode::Mov,
1279 false,
1280 vec![Operand::Register(0), Operand::Immediate(99)],
1281 ),
1282 instr(Opcode::Popa, false, Vec::new()),
1283 instr(
1284 Opcode::Cmp,
1285 false,
1286 vec![Operand::Immediate(1), Operand::Immediate(2)],
1287 ),
1288 instr(Opcode::Pushf, false, Vec::new()),
1289 instr(
1290 Opcode::Cmp,
1291 false,
1292 vec![Operand::Immediate(2), Operand::Immediate(2)],
1293 ),
1294 instr(Opcode::Popf, false, Vec::new()),
1295 instr(Opcode::Jb, false, vec![Operand::Immediate(14)]),
1296 instr(
1297 Opcode::Mov,
1298 false,
1299 vec![Operand::Register(1), Operand::Immediate(99)],
1300 ),
1301 instr(Opcode::Ret, false, Vec::new()),
1302 ]);
1303
1304 assert_eq!(result.regs[0], 10);
1305 assert_eq!(result.regs[1], 10);
1306 }
1307
1308 #[test]
1309 fn executes_shifts_with_byte_and_word_modes() {
1310 let result = execute_instructions(vec![
1311 instr(
1312 Opcode::Mov,
1313 false,
1314 vec![Operand::Register(0), Operand::Immediate(0x81)],
1315 ),
1316 instr(
1317 Opcode::Shl,
1318 false,
1319 vec![Operand::Register(0), Operand::Immediate(1)],
1320 ),
1321 instr(
1322 Opcode::Shr,
1323 false,
1324 vec![Operand::Register(0), Operand::Immediate(2)],
1325 ),
1326 instr(
1327 Opcode::Mov,
1328 false,
1329 vec![Operand::Register(1), Operand::Immediate(0x80)],
1330 ),
1331 instr(
1332 Opcode::Sar,
1333 true,
1334 vec![Operand::Register(1), Operand::Immediate(1)],
1335 ),
1336 instr(Opcode::Ret, false, Vec::new()),
1337 ]);
1338
1339 assert_eq!(result.regs[0], 0x40);
1340 assert_eq!(result.regs[1], 0xc0);
1341 }
1342
1343 #[test]
1344 fn byte_mode_sar_accepts_shift_count_equal_to_width() {
1345 let result = execute_instructions(vec![
1346 instr(
1347 Opcode::Mov,
1348 false,
1349 vec![Operand::Register(0), Operand::Immediate(0x80)],
1350 ),
1351 instr(
1352 Opcode::Sar,
1353 true,
1354 vec![Operand::Register(0), Operand::Immediate(8)],
1355 ),
1356 instr(
1357 Opcode::Mov,
1358 false,
1359 vec![Operand::Register(1), Operand::Immediate(0x7f)],
1360 ),
1361 instr(
1362 Opcode::Sar,
1363 true,
1364 vec![Operand::Register(1), Operand::Immediate(8)],
1365 ),
1366 instr(Opcode::Ret, false, Vec::new()),
1367 ]);
1368
1369 assert_eq!(result.regs[0], 0xff);
1370 assert_eq!(result.regs[1], 0);
1371 }
1372
1373 #[test]
1374 fn full_width_shl_and_shr_clear_destination() {
1375 let result = execute_instructions(vec![
1376 instr(
1377 Opcode::Mov,
1378 false,
1379 vec![Operand::Register(0), Operand::Immediate(0x1234_5678)],
1380 ),
1381 instr(
1382 Opcode::Shl,
1383 false,
1384 vec![Operand::Register(0), Operand::Immediate(32)],
1385 ),
1386 instr(
1387 Opcode::Mov,
1388 false,
1389 vec![Operand::Register(1), Operand::Immediate(0x8765_4321)],
1390 ),
1391 instr(
1392 Opcode::Shr,
1393 false,
1394 vec![Operand::Register(1), Operand::Immediate(32)],
1395 ),
1396 instr(
1397 Opcode::Mov,
1398 false,
1399 vec![Operand::Register(2), Operand::Immediate(0xff)],
1400 ),
1401 instr(
1402 Opcode::Shl,
1403 true,
1404 vec![Operand::Register(2), Operand::Immediate(8)],
1405 ),
1406 instr(
1407 Opcode::Mov,
1408 false,
1409 vec![Operand::Register(3), Operand::Immediate(0xff)],
1410 ),
1411 instr(
1412 Opcode::Shr,
1413 true,
1414 vec![Operand::Register(3), Operand::Immediate(8)],
1415 ),
1416 instr(Opcode::Ret, false, Vec::new()),
1417 ]);
1418
1419 assert_eq!(result.regs[0], 0);
1420 assert_eq!(result.regs[1], 0);
1421 assert_eq!(result.regs[2] & 0xff, 0);
1422 assert_eq!(result.regs[3] & 0xff, 0);
1423 }
1424
1425 #[test]
1426 fn sbb_sets_borrow_flag_when_subtrahend_plus_carry_wraps_byte_width() {
1427 let result = execute_instructions(vec![
1428 instr(
1429 Opcode::Cmp,
1430 true,
1431 vec![Operand::Immediate(0), Operand::Immediate(1)],
1432 ),
1433 instr(
1434 Opcode::Mov,
1435 false,
1436 vec![Operand::Register(0), Operand::Immediate(0)],
1437 ),
1438 instr(
1439 Opcode::Sbb,
1440 true,
1441 vec![Operand::Register(0), Operand::Immediate(0xff)],
1442 ),
1443 instr(Opcode::Jb, false, vec![Operand::Immediate(6)]),
1444 instr(
1445 Opcode::Mov,
1446 false,
1447 vec![Operand::Register(1), Operand::Immediate(0xdead)],
1448 ),
1449 instr(Opcode::Ret, false, Vec::new()),
1450 instr(
1451 Opcode::Mov,
1452 false,
1453 vec![Operand::Register(1), Operand::Immediate(0xbeef)],
1454 ),
1455 instr(Opcode::Ret, false, Vec::new()),
1456 ]);
1457
1458 assert_eq!(result.regs[0] & 0xff, 0);
1459 assert_eq!(result.regs[1], 0xbeef);
1460 }
1461
1462 #[test]
1463 fn zero_count_shifts_are_noops() {
1464 let result = execute_instructions(vec![
1465 instr(
1466 Opcode::Mov,
1467 false,
1468 vec![Operand::Register(0), Operand::Immediate(0x1234_5678)],
1469 ),
1470 instr(
1471 Opcode::Shl,
1472 false,
1473 vec![Operand::Register(0), Operand::Immediate(0)],
1474 ),
1475 instr(
1476 Opcode::Shr,
1477 false,
1478 vec![Operand::Register(0), Operand::Immediate(0)],
1479 ),
1480 instr(
1481 Opcode::Sar,
1482 false,
1483 vec![Operand::Register(0), Operand::Immediate(0)],
1484 ),
1485 instr(Opcode::Ret, false, Vec::new()),
1486 ]);
1487
1488 assert_eq!(result.regs[0], 0x1234_5678);
1489 }
1490
1491 #[test]
1492 fn output_range_accepts_exclusive_memory_end() {
1493 let program = Program {
1494 static_data: Vec::new(),
1495 instructions: vec![
1496 instr(
1497 Opcode::Mov,
1498 false,
1499 vec![
1500 Operand::Absolute((GLOBAL_BASE + 0x20) as u32),
1501 Operand::Immediate((MEMORY_SIZE - 1) as u32),
1502 ],
1503 ),
1504 instr(
1505 Opcode::Mov,
1506 false,
1507 vec![
1508 Operand::Absolute((GLOBAL_BASE + 0x1c) as u32),
1509 Operand::Immediate(1),
1510 ],
1511 ),
1512 instr(
1513 Opcode::Mov,
1514 true,
1515 vec![
1516 Operand::Absolute((MEMORY_SIZE - 1) as u32),
1517 Operand::Immediate(0x5a),
1518 ],
1519 ),
1520 instr(Opcode::Ret, false, Vec::new()),
1521 ],
1522 };
1523
1524 let result = program
1525 .execute(Invocation {
1526 input: &[0],
1527 regs: [0; 7],
1528 global_data: &[],
1529 file_offset: 0,
1530 exec_count: 0,
1531 })
1532 .unwrap();
1533
1534 assert_eq!(result.output, [0x5a]);
1535 }
1536
1537 #[test]
1538 fn executes_extension_exchange_multiply_divide_and_carry_arithmetic() {
1539 let result = execute_instructions(vec![
1540 instr(
1541 Opcode::Mov,
1542 false,
1543 vec![Operand::Absolute(0), Operand::Immediate(0x80)],
1544 ),
1545 instr(
1546 Opcode::Movzx,
1547 false,
1548 vec![Operand::Register(0), Operand::Absolute(0)],
1549 ),
1550 instr(
1551 Opcode::Movsx,
1552 false,
1553 vec![Operand::Register(1), Operand::Absolute(0)],
1554 ),
1555 instr(
1556 Opcode::Xchg,
1557 false,
1558 vec![Operand::Register(0), Operand::Register(1)],
1559 ),
1560 instr(
1561 Opcode::Mul,
1562 false,
1563 vec![Operand::Register(1), Operand::Immediate(3)],
1564 ),
1565 instr(
1566 Opcode::Div,
1567 false,
1568 vec![Operand::Register(1), Operand::Immediate(2)],
1569 ),
1570 instr(
1571 Opcode::Cmp,
1572 false,
1573 vec![Operand::Immediate(1), Operand::Immediate(2)],
1574 ),
1575 instr(
1576 Opcode::Adc,
1577 false,
1578 vec![Operand::Register(1), Operand::Immediate(1)],
1579 ),
1580 instr(
1581 Opcode::Cmp,
1582 false,
1583 vec![Operand::Immediate(1), Operand::Immediate(2)],
1584 ),
1585 instr(
1586 Opcode::Sbb,
1587 false,
1588 vec![Operand::Register(1), Operand::Immediate(2)],
1589 ),
1590 instr(Opcode::Print, false, Vec::new()),
1591 instr(Opcode::Ret, false, Vec::new()),
1592 ]);
1593
1594 assert_eq!(result.regs[0], 0xffff_ff80);
1595 assert_eq!(result.regs[1], 0xbf);
1596 }
1597
1598 #[test]
1599 fn preserves_requested_user_globals() {
1600 let program = Program {
1601 static_data: b"static".to_vec(),
1602 instructions: vec![
1603 Instruction {
1604 opcode: Opcode::Mov,
1605 byte_mode: false,
1606 operands: vec![Operand::Absolute(0x3c030), Operand::Immediate(4)],
1607 },
1608 Instruction {
1609 opcode: Opcode::Ret,
1610 byte_mode: false,
1611 operands: Vec::new(),
1612 },
1613 ],
1614 };
1615
1616 let result = program
1617 .execute(Invocation {
1618 input: &[1, 2, 3],
1619 regs: [0; 7],
1620 global_data: &[0; 64],
1621 file_offset: 0x1_0000_0002,
1622 exec_count: 9,
1623 })
1624 .unwrap();
1625
1626 assert_eq!(result.output, [1, 2, 3]);
1627 assert_eq!(result.globals.len(), 68);
1628 assert_eq!(&result.globals[64..], b"stat");
1629 }
1630
1631 #[test]
1632 fn parse_rejects_huge_static_data_size_without_preallocating() {
1633 let err = Program::parse(&[0xff, 0xff, 0xff, 0xff, 0, 0]).unwrap_err();
1634 assert_eq!(err, Error::InvalidData("RARVM static data is too large"));
1635 }
1636
1637 #[test]
1638 fn parse_rejects_static_data_larger_than_vm_memory() {
1639 let mut bits = BitWriter::new();
1640 bits.write_bits(1, 1);
1641 write_vm_number(&mut bits, MAX_STATIC_DATA as u32);
1642
1643 let err = Program::parse(&with_xor(bits.finish())).unwrap_err();
1644 assert_eq!(err, Error::InvalidData("RARVM static data is too large"));
1645 }
1646
1647 fn instr(opcode: Opcode, byte_mode: bool, operands: Vec<Operand>) -> Instruction {
1648 Instruction {
1649 opcode,
1650 byte_mode,
1651 operands,
1652 }
1653 }
1654
1655 fn execute_instructions(instructions: Vec<Instruction>) -> ExecutionResult {
1656 Program {
1657 static_data: Vec::new(),
1658 instructions,
1659 }
1660 .execute(Invocation {
1661 input: &[0],
1662 regs: [0; 7],
1663 global_data: &[],
1664 file_offset: 0,
1665 exec_count: 0,
1666 })
1667 .unwrap()
1668 }
1669
1670 struct BitWriter {
1671 output: Vec<u8>,
1672 bit_pos: usize,
1673 }
1674
1675 impl BitWriter {
1676 fn new() -> Self {
1677 Self {
1678 output: Vec::new(),
1679 bit_pos: 0,
1680 }
1681 }
1682
1683 fn write_bits(&mut self, value: u32, count: usize) {
1684 for i in (0..count).rev() {
1685 if self.bit_pos.is_multiple_of(8) {
1686 self.output.push(0);
1687 }
1688 if (value >> i) & 1 != 0 {
1689 let idx = self.output.len() - 1;
1690 self.output[idx] |= 1 << (7 - (self.bit_pos % 8));
1691 }
1692 self.bit_pos += 1;
1693 }
1694 }
1695
1696 fn finish(self) -> Vec<u8> {
1697 self.output
1698 }
1699 }
1700
1701 fn with_xor(mut payload: Vec<u8>) -> Vec<u8> {
1702 let checksum = payload.iter().fold(0u8, |acc, &byte| acc ^ byte);
1703 payload.insert(0, checksum);
1704 payload
1705 }
1706
1707 fn write_opcode(bits: &mut BitWriter, opcode: Opcode) {
1708 let value = opcode as u8;
1709 if value <= 7 {
1710 bits.write_bits(0, 1);
1711 bits.write_bits(u32::from(value), 3);
1712 } else {
1713 bits.write_bits(1, 1);
1714 bits.write_bits(u32::from(value - 8), 5);
1715 }
1716 }
1717
1718 fn write_reg(bits: &mut BitWriter, reg: u8) {
1719 bits.write_bits(1, 1);
1720 bits.write_bits(u32::from(reg), 3);
1721 }
1722
1723 fn write_number_immediate(bits: &mut BitWriter, value: u32) {
1724 bits.write_bits(0, 2);
1725 write_vm_number(bits, value);
1726 }
1727
1728 fn write_byte_immediate(bits: &mut BitWriter, value: u8) {
1729 bits.write_bits(0, 2);
1730 bits.write_bits(u32::from(value), 8);
1731 }
1732
1733 fn write_reg_indirect(bits: &mut BitWriter, reg: u8) {
1734 bits.write_bits(0b010, 3);
1735 bits.write_bits(u32::from(reg), 3);
1736 }
1737
1738 fn write_indexed(bits: &mut BitWriter, reg: u8, base: u32) {
1739 bits.write_bits(0b0110, 4);
1740 bits.write_bits(u32::from(reg), 3);
1741 write_vm_number(bits, base);
1742 }
1743
1744 fn write_absolute(bits: &mut BitWriter, address: u32) {
1745 bits.write_bits(0b0111, 4);
1746 write_vm_number(bits, address);
1747 }
1748
1749 fn write_vm_number(bits: &mut BitWriter, value: u32) {
1750 if value <= 15 {
1751 bits.write_bits(0, 2);
1752 bits.write_bits(value, 4);
1753 } else if value <= 255 {
1754 bits.write_bits(1, 2);
1755 bits.write_bits(value, 8);
1756 } else if value <= 0xffff {
1757 bits.write_bits(2, 2);
1758 bits.write_bits(value, 16);
1759 } else {
1760 bits.write_bits(3, 2);
1761 bits.write_bits(value, 32);
1762 }
1763 }
1764}