1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
// Copyright 2017 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

//! This module defines the `__rust_probestack` intrinsic which is used in the
//! implementation of "stack probes" on certain platforms.
//!
//! The purpose of a stack probe is to provide a static guarantee that if a
//! thread has a guard page then a stack overflow is guaranteed to hit that
//! guard page. If a function did not have a stack probe then there's a risk of
//! having a stack frame *larger* than the guard page, so a function call could
//! skip over the guard page entirely and then later hit maybe the heap or
//! another thread, possibly leading to security vulnerabilities such as [The
//! Stack Clash], for example.
//!
//! [The Stack Clash]: https://blog.qualys.com/securitylabs/2017/06/19/the-stack-clash
//!
//! The `__rust_probestack` is called in the prologue of functions whose stack
//! size is larger than the guard page, for example larger than 4096 bytes on
//! x86. This function is then responsible for "touching" all pages relevant to
//! the stack to ensure that that if any of them are the guard page we'll hit
//! them guaranteed.
//!
//! The precise ABI for how this function operates is defined by LLVM. There's
//! no real documentation as to what this is, so you'd basically need to read
//! the LLVM source code for reference. Often though the test cases can be
//! illuminating as to the ABI that's generated, or just looking at the output
//! of `llc`.
//!
//! Note that `#[naked]` is typically used here for the stack probe because the
//! ABI corresponds to no actual ABI.
//!
//! Finally it's worth noting that at the time of this writing LLVM only has
//! support for stack probes on x86 and x86_64. There's no support for stack
//! probes on any other architecture like ARM or PowerPC64. LLVM I'm sure would
//! be more than welcome to accept such a change!

#![cfg(not(feature = "mangled-names"))]
// Windows already has builtins to do this.
#![cfg(not(windows))]
// We only define stack probing for these architectures today.
#![cfg(any(target_arch = "x86_64", target_arch = "x86"))]

extern "C" {
    pub fn __rust_probestack();
}

// A wrapper for our implementation of __rust_probestack, which allows us to
// keep the assembly inline while controlling all CFI directives in the assembly
// emitted for the function.
//
// This is the ELF version.
#[cfg(not(any(target_vendor = "apple", target_os = "uefi")))]
macro_rules! define_rust_probestack {
    ($body: expr) => {
        concat!(
            "
            .pushsection .text.__rust_probestack
            .globl __rust_probestack
            .type  __rust_probestack, @function
            .hidden __rust_probestack
        __rust_probestack:
            ",
            $body,
            "
            .size __rust_probestack, . - __rust_probestack
            .popsection
            "
        )
    };
}

#[cfg(target_os = "uefi")]
macro_rules! define_rust_probestack {
    ($body: expr) => {
        concat!(
            "
            .globl __rust_probestack
        __rust_probestack:
            ",
            $body
        )
    };
}

// Same as above, but for Mach-O. Note that the triple underscore
// is deliberate
#[cfg(target_vendor = "apple")]
macro_rules! define_rust_probestack {
    ($body: expr) => {
        concat!(
            "
            .globl ___rust_probestack
        ___rust_probestack:
            ",
            $body
        )
    };
}

// Our goal here is to touch each page between %rsp+8 and %rsp+8-%rax,
// ensuring that if any pages are unmapped we'll make a page fault.
//
// The ABI here is that the stack frame size is located in `%rax`. Upon
// return we're not supposed to modify `%rsp` or `%rax`.
#[cfg(target_arch = "x86_64")]
global_asm!(define_rust_probestack!(
    "
    .cfi_startproc
    pushq  %rbp
    .cfi_adjust_cfa_offset 8
    .cfi_offset %rbp, -16
    movq   %rsp, %rbp
    .cfi_def_cfa_register %rbp

    mov    %rax,%r11        // duplicate %rax as we're clobbering %r11

    // Main loop, taken in one page increments. We're decrementing rsp by
    // a page each time until there's less than a page remaining. We're
    // guaranteed that this function isn't called unless there's more than a
    // page needed.
    //
    // Note that we're also testing against `8(%rsp)` to account for the 8
    // bytes pushed on the stack orginally with our return address. Using
    // `8(%rsp)` simulates us testing the stack pointer in the caller's
    // context.

    // It's usually called when %rax >= 0x1000, but that's not always true.
    // Dynamic stack allocation, which is needed to implement unsized
    // rvalues, triggers stackprobe even if %rax < 0x1000.
    // Thus we have to check %r11 first to avoid segfault.
    cmp    $0x1000,%r11
    jna    3f
2:
    sub    $0x1000,%rsp
    test   %rsp,8(%rsp)
    sub    $0x1000,%r11
    cmp    $0x1000,%r11
    ja     2b

3:
    // Finish up the last remaining stack space requested, getting the last
    // bits out of r11
    sub    %r11,%rsp
    test   %rsp,8(%rsp)

    // Restore the stack pointer to what it previously was when entering
    // this function. The caller will readjust the stack pointer after we
    // return.
    add    %rax,%rsp

    leave
    .cfi_def_cfa_register %rsp
    .cfi_adjust_cfa_offset -8
    ret
    .cfi_endproc
    "
));

#[cfg(target_arch = "x86")]
// This is the same as x86_64 above, only translated for 32-bit sizes. Note
// that on Unix we're expected to restore everything as it was, this
// function basically can't tamper with anything.
//
// The ABI here is the same as x86_64, except everything is 32-bits large.
global_asm!(define_rust_probestack!(
    "
    .cfi_startproc
    push   %ebp
    .cfi_adjust_cfa_offset 4
    .cfi_offset %ebp, -8
    mov    %esp, %ebp
    .cfi_def_cfa_register %ebp
    push   %ecx
    mov    %eax,%ecx

    cmp    $0x1000,%ecx
    jna    3f
2:
    sub    $0x1000,%esp
    test   %esp,8(%esp)
    sub    $0x1000,%ecx
    cmp    $0x1000,%ecx
    ja     2b

3:
    sub    %ecx,%esp
    test   %esp,8(%esp)

    add    %eax,%esp
    pop    %ecx
    leave
    .cfi_def_cfa_register %esp
    .cfi_adjust_cfa_offset -4
    ret
    .cfi_endproc
    "
));