generator 0.8.8

Stackfull Generator Library in Rust
Documentation
// unfortunately the IBM assembler just uses numbers for registers
// making the assembly hard to read when registers are mixed with offsets.
// therefore are here some defines for readability:

#pragma region defines

#define r0   0
#define r1   1
#define fp   1
#define r2   2
#define r3   3
#define r4   4
#define r5   5
#define r6   6
#define r7   7
#define r8   8
#define r9   9
#define r10  10
#define r11  11
#define r12  12
#define r13  13
#define r14  14
#define r15  15
#define r16  16
#define r17  17
#define r18  18
#define r19  19
#define r20  20
#define r21  21
#define r22  22
#define r23  23
#define r24  24
#define r25  25
#define r26  26
#define r27  27
#define r28  28
#define r29  29
#define r30  30
#define r31  31

// floating-point registers
#define f14 14
#define f15 15
#define f16 16
#define f17 17
#define f18 18
#define f19 19
#define f20 20
#define f21 21
#define f22 22
#define f23 23
#define f24 24
#define f25 25
#define f26 26
#define f27 27
#define f28 28
#define f29 29
#define f30 30
#define f31 31

// vector registers
#define v20 20
#define v21 21
#define v22 22
#define v23 23
#define v24 24
#define v25 25
#define v26 26
#define v27 27
#define v28 28
#define v29 29
#define v30 30
#define v31 31

#pragma endregion defines

.text
.globl prefetch
.type prefetch,@function
.align 16
prefetch:
    addis 2,12,.TOC.-prefetch@ha
    addi 2,2,.TOC.-prefetch@l
    .localentry prefetch, .-prefetch
    // NOTE: dcbt prefetches data, not instructions!
    dcbt 0, r3
    blr
.size prefetch,.-prefetch


.text
.globl bootstrap_green_task
.type bootstrap_green_task,@function
.align 16
bootstrap_green_task:
    // setting parameters from loaded non-volatile regs
    mr r3, r14 
    mr r4, r15

    mr r12, r16 // setup entrypoint since  position independent code can asssume 
                // r12 to contain its GEP address (page 61 Power ABI)

    mtlr r16
    blr

.size bootstrap_green_task,.-bootstrap_green_task


.text
.globl swap_registers
.type swap_registers,@function
.align 16
swap_registers:
    // save non-volatile registers to the buffer in Registers (r3)
    // load non-volatile registers from new context buffer given via r4
    
    // standard function preamble:
    addis 2,12,.TOC.-swap_registers@ha
    addi 2,2,.TOC.-swap_registers@l
    .localentry swap_registers, .-swap_registers

    // save link & control registers
    mflr r0
    std r0,0(r3)
    std r0, 2*8(r1)
    mfcr r0
    std r0,1*8(r3)


    // non-volatile registers: r1 (fp), r2 (toc), r13, r14-r31, f14-f31, v20-v31, vrsave,
    // arguments passed in r3-r10, stack
    // => previous reg list (r3), new reg list (r4)

    // saving non-volatile gprs:
    std r1,   2*8(r3) // stack pointer
    std r2,   3*8(r3) // TOC pointer
    std r12, 4*8(r3)  // gloabl entrypoint address (GEP)
    std r14,  5*8(r3) // local vars
    std r15,  6*8(r3)
    std r16,  7*8(r3)
    std r17,  8*8(r3)
    std r18, 9*8(r3)
    std r19, 10*8(r3)
    std r20, 11*8(r3)
    std r21, 12*8(r3)
    std r22, 13*8(r3)
    std r23, 14*8(r3)
    std r24, 15*8(r3)
    std r25, 16*8(r3)
    std r26, 17*8(r3)
    std r27, 18*8(r3)
    std r28, 19*8(r3)
    std r29, 20*8(r3)
    std r30, 21*8(r3)
    std r31, 22*8(r3) // end local vars

    // save non-volatile floating point registers
    addi r3, r3, 32*8  // start of fp array
    stfd f14,  0*8(r3) // local vars (floating point)
    stfd f15,  1*8(r3)
    stfd f16,  2*8(r3)
    stfd f17,  3*8(r3)
    stfd f18,  4*8(r3)
    stfd f19,  5*8(r3)
    stfd f20,  6*8(r3)
    stfd f21,  7*8(r3)
    stfd f22,  8*8(r3)
    stfd f23,  9*8(r3)
    stfd f24, 10*8(r3)
    stfd f25, 11*8(r3)
    stfd f26, 12*8(r3)
    stfd f27, 13*8(r3)
    stfd f28, 14*8(r3)
    stfd f29, 15*8(r3)
    stfd f30, 16*8(r3)
    stfd f31, 17*8(r3) // end local  vars (fp)

    // and finally the vector registers
    addi r3, r3, 18*8 // start of vr area
    li r6, 0
 
    stvx v20, r3, r6 // start of vr saving
    addi r6, r6, 16
    stvx v21, r3, r6
    addi r6, r6, 16
    stvx v22, r3, r6
    addi r6, r6, 16
    stvx v23, r3, r6
    addi r6, r6, 16
    stvx v24, r3, r6
    addi r6, r6, 16
    stvx v25, r3, r6
    addi r6, r6, 16
    stvx v26, r3, r6
    addi r6, r6, 16
    stvx v27, r3, r6
    addi r6, r6, 16
    stvx v28, r3, r6
    addi r6, r6, 16
    stvx v29, r3, r6
    addi r6, r6, 16
    stvx v30, r3, r6
    addi r6, r6, 16
    stvx v31, r3, r6 // end of vr saving

    // begin restoration

    // restore floating point registers
    mr r5, r4
    addi r4, r4, 32*8  // start of fp array
    lfd f14,  0*8(r4) // start of fp restore
    lfd f15,  1*8(r4)
    lfd f16,  2*8(r4)
    lfd f17,  3*8(r4)
    lfd f18,  4*8(r4)
    lfd f19,  5*8(r4)
    lfd f20,  6*8(r4)
    lfd f21,  7*8(r4)
    lfd f22,  8*8(r4)
    lfd f23,  9*8(r4)
    lfd f24, 10*8(r4)
    lfd f25, 11*8(r4)
    lfd f26, 12*8(r4)
    lfd f27, 13*8(r4)
    lfd f28, 14*8(r4)
    lfd f29, 15*8(r4)
    lfd f30, 16*8(r4)
    lfd f31, 17*8(r4) // end of fp restore

    // restore vector registers
    addi r4, r4, 18*8 // start of vr array
    li r6, 0

    lvx v20, r4, r6 // start of vr restore
    addi r6, r6, 16
    lvx v21, r4, r6
    addi r6, r6, 16
    lvx v22, r4, r6
    addi r6, r6, 16
    lvx v23, r4, r6
    addi r6, r6, 16
    lvx v24, r4, r6
    addi r6, r6, 16
    lvx v25, r4, r6
    addi r6, r6, 16
    lvx v26, r4, r6
    addi r6, r6, 16
    lvx v27, r4, r6
    addi r6, r6, 16
    lvx v28, r4, r6
    addi r6, r6, 16
    lvx v29, r4, r6
    addi r6, r6, 16
    lvx v30, r4, r6
    addi r6, r6, 16
    lvx v31, r4, r6 // end of vr restore

    // restore gpr registers
    mr r4, r5          
    ld r1,   2*8(r4) // start of gpr restore
    ld r2,   3*8(r4)
    ld r12, 4*8(r4)
    ld r14,  5*8(r4)
    ld r15,  6*8(r4)
    ld r16,  7*8(r4)
    ld r17,  8*8(r4)
    ld r18, 9*8(r4)
    ld r19, 10*8(r4)
    ld r20, 11*8(r4)
    ld r21, 12*8(r4)
    ld r22, 13*8(r4)
    ld r23, 14*8(r4)
    ld r24, 15*8(r4)
    ld r25, 16*8(r4)
    ld r26, 17*8(r4)
    ld r27, 18*8(r4)
    ld r28, 19*8(r4)
    ld r29, 20*8(r4)
    ld r30, 21*8(r4)
    ld r31, 22*8(r4) // end of gpr restore

    // restore/load lr and ctr registers
    ld r0, 8(r4)
    mtcr r0
    ld r0, 0(r4) // new link register
    mtlr r0

    blr

.size swap_registers,.-swap_registers

/* Mark that we don't need executable stack. */
.section .note.GNU-stack,"",%progbits