cairo-native 0.9.0-rc.7

A compiler to convert Cairo's IR Sierra code to MLIR and execute it.
.text


.global _invoke_trampoline
_invoke_trampoline:
    # rdi <- fn_ptr: extern "C" fn()
    # rsi <- args_ptr: *const u64
    # rdx <- args_len: usize
    # rcx <- ret_ptr: &mut [u64; 2]

    push    rbp                     # Push rbp (callee-saved).
    push    rcx                     # Push rcx (ret_ptr).
    mov     rbp,    rsp             # Store the current stack pointer.
    sub     rsp,    8               # Align the stack.

    mov     r10,    rdi             # We'll need rdi.
    mov     r11,    rsi             # We'll need rsi.

    cmp     rdx,    6               # Check if there are more than 6 arguments.
    jbe     2f                      # If there are less than 6, skip to register arguments.

    #
    # Process stack arguments.
    #

    # Add padding to support an odd number of stack parameters.
    mov     rax,    rdx
    and     rax,    1
    lea     rsp,    [rsp + 8 * rax]

  1:
    dec     rdx                     # Decrement length.
    mov     rax,    [r11 + 8 * rdx] # Load the value.
    push    rax                     # Push it into the stack.

    cmp     rdx,    6               # Check if there are more than 6 arguments.
    ja      1b                      # If there still are, loop back and repeat.

  2:
    #
    # Process registers.
    #

    shl     rdx,    2               # Multiply remaining length by 4.
    lea     rax,    [rip + 3f]      # Load the PC-relative address of `3f`.
    sub     rax,    rdx             # Subtract 4 * remaining_len (rdx).
    jmp     rax                     # Jump to the resulting address.

    mov     r9,     [r11 + 0x28]    # Load argument #6.
    mov     r8,     [r11 + 0x20]    # Load argument #5.
    mov     rcx,    [r11 + 0x18]    # Load argument #4.
    mov     rdx,    [r11 + 0x10]    # Load argument #3.
    mov     rsi,    [r11 + 0x08]    # Load argument #2.
    nop                             # Note: The previous 5 `mov` instructions use 4 bytes each, but
                                    #   the last one only takes 3. This `nop` (1 byte) is used to
                                    #   align them all at 4 bytes so that the last jump instruction
                                    #   works correctly.
    mov     rdi,    [r11]           # Load argument #1.

  3:
    # Call the function.
    call    r10

    mov     rsp,    rbp
    pop     rcx
    pop     rbp

    # Store return registers.
    mov     [rcx],      rax
    mov     [rcx + 8],  rdx

    ret