feo3boy 0.1.0

Emulator core for the gameboy
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
//! Provides a handwritten opcode executor that directly evaluates opcodes.

use std::fmt;
use std::ops::{Deref, DerefMut};

use feo3boy_opcodes::gbz80types::Flags;
use feo3boy_opcodes::microcode;
use feo3boy_opcodes::opcode::args::{AluOp, ConditionCode, Operand16, Operand8};
use feo3boy_opcodes::opcode::{CBOpcode, CBOperation, InternalFetch, Opcode};
use log::{debug, trace, warn};

use crate::gbz80core::executor::Executor;
use crate::gbz80core::{externdefs, ExecutorContext};
use crate::interrupts::Interrupts;
use crate::memdev::RootMemDevice;

mod args;
mod tests;

/// Executor which evaluates GB Opcodes by decoding them, then matching on them and
/// calling a function which implements them. This executor is not capable of pausing
/// during instruction execution.
#[derive(Copy, Clone, Debug, Default, Eq, PartialEq)]
pub struct DirectExecutor;

impl Executor for DirectExecutor {
    type State = ();

    fn run_single_instruction(ctx: &mut impl ExecutorContext<State = Self::State>) {
        InternalFetch.runner().run(ctx)
    }
}

/// Convenience type for an ExecutorContext with a unit state.
trait Ctx: ExecutorContext<State = ()> {}

impl<E: ExecutorContext<State = ()>> Ctx for E {}

/// Trait for wrapping a value in its runner.
trait Runner {
    /// Wrap `self` in [`Eval`]`(self)`.
    #[inline]
    fn runner(self) -> Run<Self>
    where
        Self: Sized,
    {
        Run(self)
    }
}

impl Runner for InternalFetch {}
impl Runner for Opcode {}
impl Runner for CBOpcode {}

/// Helper to provide convenient eval/read/write functions for types deinfed in the
/// feo3boy-opcodes crate.
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
#[repr(transparent)]
struct Run<T>(T);

impl<T> Deref for Run<T> {
    type Target = T;

    fn deref(&self) -> &Self::Target {
        &self.0
    }
}

impl<T> DerefMut for Run<T> {
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.0
    }
}

impl<T: fmt::Display> fmt::Display for Run<T> {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        fmt::Display::fmt(&self.0, f)
    }
}

impl Run<InternalFetch> {
    fn run(self, ctx: &mut impl Ctx) {
        if ctx.cpu().halted {
            if ctx.interrupts().active().is_empty() {
                ctx.yield1m();
                return;
            }
            ctx.cpu_mut().halted = false;
        }

        if self.service_interrupt(ctx) {
            return;
        }

        let previous_ime = ctx.cpu().interrupt_master_enable;
        let pc = ctx.cpu().regs.pc;
        trace!("Loading opcode at {:#6X}", pc);
        let opcode = Operand8::Immediate.runner().read(ctx);
        if ctx.cpu().halt_bug {
            let state = ctx.cpu_mut();
            state.regs.pc = state.regs.pc.wrapping_sub(1);
            state.halt_bug = false;
        }
        let opcode = Opcode::decode(opcode);
        debug!("Executing @ {:#6X}: {}", pc, opcode);
        opcode.runner().run(ctx);
        ctx.cpu_mut().interrupt_master_enable.tick(previous_ime);
    }

    fn service_interrupt(self, ctx: &mut impl Ctx) -> bool {
        if ctx.cpu().interrupt_master_enable.enabled() {
            if let Some(interrupt) = ctx.interrupts().active().iter().next() {
                ctx.yield1m();
                ctx.yield1m();
                ctx.interrupts_mut().clear(interrupt);
                ctx.cpu_mut().interrupt_master_enable.clear();
                let ret_loc = if ctx.cpu().halt_bug {
                    let state = ctx.cpu_mut();
                    state.halt_bug = false;
                    state.regs.pc.wrapping_sub(1)
                } else {
                    ctx.cpu().regs.pc
                };
                push_helper(ctx, ret_loc);
                ctx.yield1m();
                ctx.cpu_mut().regs.pc = interrupt.handler_addr();
                return true;
            }
        }
        false
    }
}

impl Run<Opcode> {
    fn run(self, ctx: &mut impl Ctx) {
        match *self {
            Opcode::Nop => {}
            Opcode::Stop => panic!("STOP is bizarre and complicated and not implemented."),
            Opcode::JumpRelative(cond) => jump_relative(ctx, cond),
            Opcode::Inc8(operand) => inc8(ctx, operand),
            Opcode::Dec8(operand) => dec8(ctx, operand),
            Opcode::Load8 { dest, source } => load8(ctx, dest, source),
            Opcode::Inc16(operand) => inc16(ctx, operand),
            Opcode::Dec16(operand) => dec16(ctx, operand),
            Opcode::Load16 { dest, source } => load16(ctx, dest, source),
            Opcode::Add16(operand) => add16(ctx, operand),
            Opcode::Halt => externdefs::halt(ctx),
            Opcode::AluOp { operand, op } => alu_op(ctx, operand, op),
            Opcode::AluUnary(op) => op.runner().run(ctx),
            Opcode::Call(cond) => call(ctx, cond),
            Opcode::Jump(cond) => jump(ctx, cond),
            Opcode::Ret(cond) => ret(ctx, cond),
            Opcode::Push(operand) => push(ctx, operand),
            Opcode::Pop(operand) => pop(ctx, operand),
            Opcode::PrefixCB => <Run<CBOpcode>>::load_and_run(ctx),
            Opcode::DisableInterrupts => externdefs::disable_interrupts(ctx),
            Opcode::EnableInterrupts => externdefs::enable_interrupts(ctx, false),
            Opcode::RetInterrupt => interrupt_return(ctx),
            Opcode::OffsetSp => offset_sp(ctx),
            Opcode::AddressOfOffsetSp => address_of_offset_sp(ctx),
            Opcode::JumpHL => jump_hl(ctx),
            Opcode::Reset(dest) => reset(ctx, dest),
            // A brief note on this doc page:
            // https://gbdev.io/pandocs/CPU_Comparison_with_Z80.html
            // says that the unused opcodes will lock up the CPU, rather than behave as a
            // no-op.
            Opcode::MissingInstruction(opcode) => {
                warn!(
                    "Missing instruction {:#04X} encountered. Treating as NOP instead of locking.",
                    opcode
                );
            }
        }
    }
}

/// Implements the relative jump instruction.
fn jump_relative(ctx: &mut impl Ctx, cond: ConditionCode) {
    // Loading the offset also moves the program counter over the next instruction, which is
    // good because the jump is relative to the following instruction.
    let offset = Operand8::Immediate.runner().read(ctx);
    let base = ctx.cpu().regs.pc;
    // JR doesn't set any flags.
    let (dest, _) = microcode::defs::offset_addr(base, offset);
    if cond.runner().check(ctx) {
        trace!("Relative jump by {} from {} to {}", offset, base, dest);
        // Modifying the PC takes an extra tick, which isn't used if the condition fails.
        ctx.yield1m();
        ctx.cpu_mut().regs.pc = dest;
    } else {
        trace!("Skipping jump by {} from {} to {}", offset, base, dest);
    }
}

/// Implements 8 bit increment instruction.
fn inc8(ctx: &mut impl Ctx, operand: Operand8) {
    // Inc doesn't set the carry flag.
    const MASK: Flags = Flags::all().difference(Flags::CARRY);

    let val = operand.runner().read(ctx);
    let (res, flags) = microcode::defs::add(val, 1);
    trace!("Evaluating INC {} ({} => {})", operand, val, res);
    ctx.cpu_mut().regs.flags.merge(flags, MASK);
    operand.runner().write(ctx, res);
}

/// Implements 8 bit decrement instruction.
fn dec8(ctx: &mut impl Ctx, operand: Operand8) {
    // Dec doesn't set the carry flag.
    const MASK: Flags = Flags::all().difference(Flags::CARRY);

    let val = operand.runner().read(ctx);
    let (res, flags) = microcode::defs::sub(val, 1);
    trace!("Evaluating DEC {} ({} => {})", operand, val, res);
    ctx.cpu_mut().regs.flags.merge(flags, MASK);
    operand.runner().write(ctx, res);
}

/// Implements 8 bit load operations.
fn load8(ctx: &mut impl Ctx, dest: Operand8, source: Operand8) {
    let val = source.runner().read(ctx);
    trace!("Evaluating LD {},{} (<- {})", dest, source, val);
    dest.runner().write(ctx, val);
}

/// Implements 16 bit increment instruction.
fn inc16(ctx: &mut impl Ctx, operand: Operand16) {
    // 16 bit inc doesn't set any flags, and all actual operands are always registers, but it does
    // delay by 1 additional M cycle, probably because it has to operate on two bytes.
    let val = operand.runner().read(ctx);
    let res = val.wrapping_add(1);
    trace!("Evaluating INC {} ({} => {})", operand, val, res);
    ctx.yield1m();
    operand.runner().write(ctx, res);
}

/// Implements 16 bit decrement instruction.
fn dec16(ctx: &mut impl Ctx, operand: Operand16) {
    // 16 bit dec doesn't set any flags, and all actual operands are always registers, but it does
    // delay by 1 additional M cycle, probably because it has to operate on two bytes.
    let val = operand.runner().read(ctx);
    let res = val.wrapping_sub(1);
    trace!("Evaluating DEC {} ({} => {})", operand, val, res);
    ctx.yield1m();
    operand.runner().write(ctx, res);
}

/// Implements 16 bit load operations.
fn load16(ctx: &mut impl Ctx, dest: Operand16, source: Operand16) {
    let val = source.runner().read(ctx);
    if (dest, source) == (Operand16::Sp, Operand16::HL) {
        // Most of the 16 bit loads are <Pair>,<Immediate> and take time based on number of memory
        // accesses. There are two exceptions. LD (u16),SP, which is also just timed based on the
        // number of memory accesses, and LD SP,HL, which is all registers but still takes an extra
        // 1m cycle, which isn't automatically provided by Operand16 register interactions, so we
        // insert it here.
        ctx.yield1m();
    }
    trace!("Evaluating LD {},{} (<- {})", dest, source, val);
    dest.runner().write(ctx, val);
}

/// Implements 16 bit register add into HL. Never sets the zero flag and clears the subtract flag,
/// but does set carry and half-carry based on the upper byte of the operation (as if it was
/// performed by running the pseudo-instructions `add l,<arg-low>; adc h,<arg-high>`.
fn add16(ctx: &mut impl Ctx, arg: Operand16) {
    // 16 bit add never modifies the zero flag.
    const MASK: Flags = Flags::all().difference(Flags::ZERO);

    let lhs = ctx.cpu().regs.hl();
    let rhs = arg.runner().read(ctx); // This will always be a register in practice.

    let mut flags = Flags::empty();
    if (lhs & 0x7ff) + (rhs & 0x7ff) > 0x7ff {
        flags |= Flags::HALFCARRY;
    }
    let (res, carry) = lhs.overflowing_add(rhs);
    flags |= Flags::check_carry(carry);

    // 16 bit adds have a time of 8t/2m, so 1 more cycle is needed in addition to their
    // instruction load time.
    ctx.yield1m();

    ctx.cpu_mut().regs.set_hl(res);
    ctx.cpu_mut().regs.flags.merge(flags, MASK);
}

/// Runs an ALU operation.
fn alu_op(ctx: &mut impl Ctx, operand: Operand8, op: AluOp) {
    let arg = operand.runner().read(ctx);
    op.runner().run(ctx, arg);
}

/// Performs a conditional call.
fn call(ctx: &mut impl Ctx, cond: ConditionCode) {
    // Conveniently, unconditional call behaves exactly the same as a conditional call with a true
    // value, down to the timing.
    let dest = Operand16::Immediate.runner().read(ctx);
    if cond.runner().check(ctx) {
        // Conditional jump has an extra internal delay if the condition is true.
        ctx.yield1m();
        push_helper(ctx, ctx.cpu().regs.pc);
        ctx.cpu_mut().regs.pc = dest;
    }
}

/// Performs a conditional absolute jump.
fn jump(ctx: &mut impl Ctx, cond: ConditionCode) {
    // Conveniently, unconditional call behaves exactly the same as a conditional call with a true
    // value, down to the timing.
    let dest = Operand16::Immediate.runner().read(ctx);
    if cond.runner().check(ctx) {
        // Branching adds an extra cycle despite not accessing memory.
        ctx.yield1m();
        ctx.cpu_mut().regs.pc = dest;
    }
}

/// Performs a conditional return.
fn ret(ctx: &mut impl Ctx, cond: ConditionCode) {
    // Unlike Jump and Call, Ret is different depending on whether it is conditional or unconditional.
    if cond == ConditionCode::Unconditional {
        let dest = pop_helper(ctx);
        // There's an extra 1m delay after loading SP.
        ctx.yield1m();
        ctx.cpu_mut().regs.pc = dest;
    } else {
        // Conditional branch always has this extra delay before evaluating.
        ctx.yield1m();
        if cond.runner().check(ctx) {
            let dest = pop_helper(ctx);
            // But there's also an extra delay after reading.
            ctx.yield1m();
            ctx.cpu_mut().regs.pc = dest;
        }
    }
}

/// Implements push instruction.
fn push(ctx: &mut impl Ctx, operand: Operand16) {
    // In practice, operand is always a register.
    let val = operand.runner().read(ctx);
    // Push has an extra delay before writing.
    ctx.yield1m();
    push_helper(ctx, val);
}

/// Implements pop instruction.
fn pop(ctx: &mut impl Ctx, operand: Operand16) {
    let val = pop_helper(ctx);
    // In practice, operand is always a register.
    operand.runner().write(ctx, val)
}

/// Push helper, shared between push and call. Pushes a caller-supplied 16 bit value onto the stack,
/// waiting 1m between each byte and decrementing the stack pointer by 2.
fn push_helper(ctx: &mut impl Ctx, val: u16) {
    let [low, high] = val.to_le_bytes();
    ctx.yield1m();
    let addr = ctx.cpu().regs.sp.wrapping_sub(1);
    ctx.cpu_mut().regs.sp = addr;
    ctx.mem_mut().write_byte(addr, high);

    ctx.yield1m();
    let addr = ctx.cpu().regs.sp.wrapping_sub(1);
    ctx.cpu_mut().regs.sp = addr;
    ctx.mem_mut().write_byte(addr, low);
}

/// Pop helper, shared between pop and ret. Pops value from the stack, waiting 1m between each byte
/// and incrementing the stack pointer by 2.
fn pop_helper(ctx: &mut impl Ctx) -> u16 {
    ctx.yield1m();
    let addr = ctx.cpu().regs.sp;
    ctx.cpu_mut().regs.sp = addr.wrapping_add(1);
    let low = ctx.mem().read_byte(addr);

    ctx.yield1m();
    let addr = ctx.cpu().regs.sp;
    ctx.cpu_mut().regs.sp = addr.wrapping_add(1);
    let high = ctx.mem().read_byte(addr);

    u16::from_le_bytes([low, high])
}

/// Enabled interrupts and returns.
fn interrupt_return(ctx: &mut impl Ctx) {
    let dest = pop_helper(ctx);
    // Theres an extra 1m of delay in here.
    ctx.yield1m();
    ctx.cpu_mut().regs.pc = dest;
    ctx.cpu_mut().interrupt_master_enable.set();
}

/// Offsets the stack pointer by an immediate value.
fn offset_sp(ctx: &mut impl Ctx) {
    let offset = Operand8::Immediate.runner().read(ctx);
    let (res, flags) = microcode::defs::offset_addr(ctx.cpu().regs.sp, offset);
    // This instruction takes two more cycles after loading the offset.
    ctx.yield1m();
    ctx.yield1m();
    ctx.cpu_mut().regs.sp = res;
    ctx.cpu_mut().regs.flags = flags;
}

/// Loads the result of offsetting the stack pointer by an immediate value into HL.
fn address_of_offset_sp(ctx: &mut impl Ctx) {
    let offset = Operand8::Immediate.runner().read(ctx);
    let (res, flags) = microcode::defs::offset_addr(ctx.cpu().regs.sp, offset);
    // Interestingly, this instruction is actually faster than `ADD SP,i8`.
    ctx.yield1m();
    ctx.cpu_mut().regs.set_hl(res);
    ctx.cpu_mut().regs.flags = flags;
}

// Similar to unconditional jump, but using HL as the target address.
fn jump_hl(ctx: &mut impl Ctx) {
    let regs = &mut ctx.cpu_mut().regs;
    regs.pc = regs.hl();
}

/// Executes the reset instruction. Similar to call with a fixed destination.
fn reset(ctx: &mut impl Ctx, dest: u8) {
    // There's an extra delay at the start of an RST instruction.
    ctx.yield1m();
    push_helper(ctx, ctx.cpu().regs.pc);
    ctx.cpu_mut().regs.pc = dest as u16;
}

impl Run<CBOpcode> {
    /// Load and execute a single CB-prefixed opcode from the given context.
    fn load_and_run(ctx: &mut impl Ctx) {
        let pc = ctx.cpu().regs.pc;
        trace!("Loading CB-opcode at {:#6X}", pc);
        let opcode = Operand8::Immediate.runner().read(ctx);
        let opcode = CBOpcode::decode(opcode);
        debug!("Executing CB @ {:#6X}: {}", pc, opcode);
        opcode.runner().run(ctx);
    }

    /// Execute this opcode on the given context.
    fn run(self, ctx: &mut impl Ctx) {
        let arg = self.operand.runner().read(ctx);
        match self.op {
            CBOperation::RotateLeft8 => {
                let (res, flags) = microcode::defs::rotate_left8(arg);
                ctx.cpu_mut().regs.flags = flags;
                self.operand.runner().write(ctx, res);
            }
            CBOperation::RotateLeft9 => {
                let (res, flags) = microcode::defs::rotate_left9(ctx.cpu().regs.flags, arg);
                ctx.cpu_mut().regs.flags = flags;
                self.operand.runner().write(ctx, res);
            }
            CBOperation::RotateRight8 => {
                let (res, flags) = microcode::defs::rotate_right8(arg);
                ctx.cpu_mut().regs.flags = flags;
                self.operand.runner().write(ctx, res);
            }
            CBOperation::RotateRight9 => {
                let (res, flags) = microcode::defs::rotate_right9(ctx.cpu().regs.flags, arg);
                ctx.cpu_mut().regs.flags = flags;
                self.operand.runner().write(ctx, res);
            }
            CBOperation::ShiftLeft => {
                let (res, flags) = microcode::defs::shift_left(arg);
                ctx.cpu_mut().regs.flags = flags;
                self.operand.runner().write(ctx, res);
            }
            CBOperation::ShiftRightSignExt => {
                let (res, flags) = microcode::defs::shift_right_sign_ext(arg);
                ctx.cpu_mut().regs.flags = flags;
                self.operand.runner().write(ctx, res);
            }
            CBOperation::ShiftRight => {
                let (res, flags) = microcode::defs::shift_right(arg);
                ctx.cpu_mut().regs.flags = flags;
                self.operand.runner().write(ctx, res);
            }
            CBOperation::Swap => {
                let (res, flags) = microcode::defs::swap(arg);
                ctx.cpu_mut().regs.flags = flags;
                self.operand.runner().write(ctx, res);
            }
            CBOperation::TestBit(bit) => {
                // Doesn't affect the carry flag.
                const MASK: Flags = Flags::all().difference(Flags::CARRY);
                let flags = microcode::defs::test_bit(bit, arg);
                ctx.cpu_mut().regs.flags.merge(flags, MASK);
            }
            CBOperation::ResetBit(bit) => {
                let res = microcode::defs::reset_bit(bit, arg);
                self.operand.runner().write(ctx, res);
            }
            CBOperation::SetBit(bit) => {
                let res = microcode::defs::set_bit(bit, arg);
                self.operand.runner().write(ctx, res);
            }
        }
    }
}