yagbas 0.2.0

Yagbas is a programming language that builds programs for the Game Boy.
Documentation
; VBlank (with OAM DMA) Example for the Nintendo Game Boy
; by Dave VanEe 2022
; Tested with RGBDS 1.0.0
; License: CC0 (https://creativecommons.org/publicdomain/zero/1.0/)

include "hardware.inc"  ; Include hardware definitions so we can use nice names for things

; The VBlank vector is where execution is passed when the VBlank interrupt fires
SECTION "VBlank Vector", ROM0[$40]
; We only have 8 bytes here, so push all the registers to the stack and jump to the rest of the handler
; Note: Since the VBlank handler used here only affects A and F, we don't have to push/pop BC, DE, and HL,
;  but it's done here for demonstration purposes.
VBlank:
    push af             ; Push AF, BC, DE, and HL to the stack
    push bc
    push de
    push hl
    jp VBlankHandler    ; Jump to the rest of the handler


; The rest of the handler is contained in ROM0 to ensure it's always accessible without banking
SECTION "VBlank Handler", ROM0
VBlankHandler:
    ; Initiate the OAM DMA routine
    ld a, HIGH(wShadowOAM) ; Load the high byte of our Shadow OAM buffer into A
    call hOAMDMA         ; Call our OAM DMA routine (in HRAM), quickly copying from wShadowOAM to OAMRAM

    pop hl              ; Pop HL, DE, BC, and AF off the stack (reverse order from the earlier pushes)
    pop de
    pop bc
    pop af
    reti                ; Return and enable interrupts (ret + ei)


; Define a section that starts at the point the bootrom execution ends
SECTION "Start", ROM0[$0100]
    jp EntryPoint       ; Jump past the header space to our actual code

    ds $150-@, 0        ; Allocate space for RGBFIX to insert our ROM header by allocating
                        ;  the number of bytes from our current location (@) to the end of the
                        ;  header ($150)

EntryPoint:
    di                  ; Disable interrupts during setup
    ld sp, $e000        ; Set the stack pointer to the end of WRAM

    ; Turn off the LCD when it's safe to do so (during VBlank)
.waitVBlank
    ldh a, [rLY]        ; Read the LY register to check the current scanline
    cp SCREEN_HEIGHT_PX ; Compare the current scanline to the first scanline of VBlank
    jr c, .waitVBlank   ; Loop as long as the carry flag is set
    xor a               ; Once we exit the loop we're safely in VBlank
    ldh [rLCDC], a      ; Disable the LCD (must be done during VBlank to protect the LCD)

    ldh [hFrameCounter], a ; Zero our frame counter just to be safe (A is already zero from earlier)

    ; Copy the OAMDMA routine to HRAM, since during DMA we're limited on which
    ;  memory the CPU can access (but HRAM is safe)
    ld hl, OAMDMA       ; Load the source address of our routine into HL
    ld b, OAMDMA.end - OAMDMA ; Load the length of the OAMDMA routine into B
    ld c, LOW(hOAMDMA)  ; Load the low byte of the destination into C
.oamdmaCopyLoop
    ld a, [hli]         ; Load a byte from the address HL points to into the register A, increment HL
    ldh [c], a          ; Load the byte in the A register to the address in HRAM with the low byte stored in C
    inc c               ; Increment the low byte of the HRAM pointer in C
    dec b               ; Decrement the loop counter in B
    jr nz, .oamdmaCopyLoop ; If B isn't zero, continue looping

    ; Copy our tile to VRAM
    ld hl, TileData     ; Load the source address of our tiles into HL
    ld de, STARTOF(VRAM); Load the destination address in VRAM into DE
    ld b, 16            ; Load the number of bytes to copy into B (16 bytes per tile)
.copyLoop
    ld a, [hli]         ; Load a byte from the address HL points to into the register A, increment HL
    ld [de], a          ; Load the byte in the A register to the address DE points to
    inc de              ; Increment the destination pointer in DE
    dec b               ; Decrement the loop counter in B
    jr nz, .copyLoop    ; If B isn't zero, continue looping

    ; Setup an object palette
    ld a, %11100100     ; Define a 4-shade palette from darkest (11) to lightest (00)
    ldh [rOBP0], a      ; Set the object palette 0

    ; Ensure the sprite locations (in wShadowOAM) are initalized for OAM DMA
    call PopulateShadowOAM

    ; Perform OAM DMA once to ensure OAM doesn't contain garbage
    ld a, HIGH(wShadowOAM) ; Load the high byte of our Shadow OAM buffer into A
    call hOAMDMA         ; Call our OAM DMA routine (in HRAM), quickly copying from wShadowOAM to OAMRAM

    ; Setup the VBlank interrupt
    ld a, IE_VBLANK    ; Load the flag to enable the VBlank interrupt into A
    ldh [rIE], a        ; Load the prepared flag into the interrupt enable register
    xor a               ; Set A to zero
    ldh [rIF], a        ; Clear any lingering flags from the interrupt flag register to avoid false interrupts
    ei                  ; enable interrupts!

    ; Combine flag constants defined in hardware.inc into a single value with logical ORs and load it into A
    ; Note that some of these constants (LCDC_BG_OFF, LCDC_OBJ_8, LCDC_WIN_OFF) are zero, but are included for clarity
    ld a, LCDC_ON | LCDC_BG_OFF | LCDC_OBJ_8 | LCDC_OBJ_ON | LCDC_WIN_OFF
    ldh [rLCDC], a      ; Enable and configure the LCD to show the background

LoopForever:
    halt                ; Halt the CPU, waiting until an interrupt fires (this will sync our loop with VBlank)

    ld hl, hFrameCounter ; Point HL to the frame counter in HRAM
    inc [hl]            ; Increment the contents of the memory address pointed to by HL (hFrameCounter)

    call PopulateShadowOAM ; Update the sprite locations for the next frame

    jr LoopForever      ; Loop forever

; Populate Shadow OAM with locations based on the current hFrameCounter
DEF ENTRY_GAP EQU 8     ; The gap between SineTable entries for each OAM entry (try adjusting this value to see what changes)
PopulateShadowOAM:
    ; Note: This code takes advantage of the fact that SineTable is page-aligned and 256 bytes long, which means
    ;  we can adjust the low byte of the pointer and it will automatically wrap around the table.

    ld d, HIGH(SineTable) ; Load the high byte of the SineTable into D
    ldh a, [hFrameCounter] ; Load the current hFrameCounter value into A
    ld e, a             ; Load the frame counter value into E, making DE a pointer to SineTable that advances each frame
    ld c, e             ; Initialize the offset between Y and X entries in SineTable for a given OAM entry
                        ;  Note: Varying this offset per frame is what makes the overall shape pivot. Try using a fixed C!
    ld hl, wShadowOAM   ; Load the destination address in WRAM into HL
    ld b, OAM_COUNT     ; Load the number of OAM entries to populate into B
.oamDataLoop
    ld a, [de]          ; Read Y coordinate from SineTable
    add $14             ; Add +20 pixel offset for Y coordinate to center everything on screen
    ld [hli], a         ; Write Y coordiante to wShadowOAM, increment pointer in HL
    ld a, e             ; Advance SineTable pointer C entries
    add c               ;  ...
    ld e, a             ;  ...
    ld a, [de]          ; Read X coordinate from SineTable
    add $14             ; Add 20 pixel offset for X coordinate to center everything on screen
    ld [hli], a         ; Write X coordiante to wShadowOAM, increment pointer in HL
    ld a, e             ; Retreat SineTable pointer C-ENTRY_GAP entries back for next OAM entry
    sub c               ;  ...
    add ENTRY_GAP       ;  ...
    ld e, a             ;  ...
    xor a               ; Set A to zero for tile index and attributes
    ld [hli], a         ; Write tile index to wShadowOAM, increment pointer in HL
    ld [hli], a         ; Write attribytes to wShadowOAM, increment pointer in HL
    dec b               ; Decrement the loop counter in B
    jr nz, .oamDataLoop ; If B isn't zero, continue looping

    ret


SECTION "Shadow OAM", WRAM0, ALIGN[8]
; Reserve page-aligned space for a Shadow OAM buffer, to which we can safely write OAM data at any time, 
;  and then use our OAM DMA routine to copy it quickly to OAMRAM when desired. OAM DMA can only operate
;  on a block of data that starts at a page boundary, which is why we use ALIGN[8].
wShadowOAM:
    ds OAM_SIZE


SECTION "OAM DMA Routine", ROMX
; Initiate OAM DMA and then wait until the operation is complete, then return
; @param A High byte of the source data to DMA to OAM
OAMDMA:
    ldh [rDMA], a
    ld a, OAM_COUNT
.waitLoop
    dec a
    jr nz, .waitLoop
    ret
.end


SECTION "OAM DMA", HRAM
; Reserve space in HRAM for the OAMDMA routine, equal in length to the routine
hOAMDMA:
    ds OAMDMA.end - OAMDMA


SECTION "Frame Counter", HRAM
; Reserve space in HRAM to track frame advancement
hFrameCounter:
    ds 1


SECTION "Tile Data", ROMX
; Our tile data in 2bpp planar format (https://gbdev.io/pandocs/Tile_Data.html)
TileData:
.ball ; Use the "Game Boy Graphics" compact representation of the tile data
    dw `00333300
    dw `03011130
    dw `30001123
    dw `31011123
    dw `31111123
    dw `32111223
    dw `03222230
    dw `00333300


SECTION "Sine Table", ROMX, ALIGN[8]
; Generate a 256 byte lookup of sine values ranging from 0-128, aligned to a page for automatic wrapping
; See: https://rgbds.gbdev.io/docs/master/rgbasm.5#Fixed-point_expressions
SineTable:
FOR ANGLE, 0.0, 1.0, 1.0 / 256 ; delta = 1 full turn / 256 entries
	db (MUL(64.0, SIN(ANGLE)) + 64.0) >> 16
ENDR