origin/thread/
linux_raw.rs

1//! Thread startup and shutdown.
2//!
3//! Why does this api look like `thread::join(t)` instead of `t.join()`? Either
4//! way could work, but free functions help emphasize that this API's
5//! [`Thread`] differs from `std::thread::Thread`. It does not detach or free
6//! its resources on drop, and does not guarantee validity. That gives users
7//! more control when creating efficient higher-level abstractions like
8//! pthreads or `std::thread::Thread`.
9
10use crate::arch::{
11    clone, munmap_and_exit_thread, set_thread_pointer, thread_pointer, STACK_ALIGNMENT, TLS_OFFSET,
12};
13#[cfg(feature = "thread-at-exit")]
14use alloc::boxed::Box;
15#[cfg(feature = "unstable-errno")]
16use core::cell::Cell;
17use core::cmp::max;
18use core::ffi::c_void;
19use core::mem::{align_of, offset_of, size_of};
20use core::ptr::{copy_nonoverlapping, drop_in_place, null, null_mut, NonNull};
21use core::slice;
22use core::sync::atomic::Ordering::SeqCst;
23use core::sync::atomic::{AtomicI32, AtomicPtr, AtomicU32, AtomicU8};
24use linux_raw_sys::elf::*;
25use rustix::io;
26use rustix::mm::{mmap_anonymous, mprotect, MapFlags, MprotectFlags, ProtFlags};
27use rustix::param::{linux_execfn, page_size};
28use rustix::process::{getrlimit, Resource};
29use rustix::runtime::{exe_phdrs, set_tid_address};
30#[cfg(feature = "signal")]
31use rustix::runtime::{kernel_sigprocmask, How, KernelSigSet};
32use rustix::thread::gettid;
33
34pub use rustix::thread::Pid as ThreadId;
35
36/// An opaque pointer to a thread.
37///
38/// This type does not detach or free resources on drop. It just leaks the
39/// thread. To detach or join, call [`detach`] or [`join`] explicitly.
40#[derive(Copy, Clone, Eq, PartialEq)]
41pub struct Thread(NonNull<ThreadData>);
42
43impl Thread {
44    /// Convert to `Self` from a raw pointer that was returned from
45    /// `Thread::to_raw`.
46    #[inline]
47    pub fn from_raw(raw: *mut c_void) -> Self {
48        Self(NonNull::new(raw.cast()).unwrap())
49    }
50
51    /// Convert to `Self` from a raw non-null pointer.
52    ///
53    /// # Safety
54    ///
55    /// `raw` must be a valid non-null thread pointer.
56    #[inline]
57    pub unsafe fn from_raw_unchecked(raw: *mut c_void) -> Self {
58        Self(NonNull::new_unchecked(raw.cast()))
59    }
60
61    /// Convert to `Self` from a raw non-null pointer that was returned from
62    /// `Thread::to_raw_non_null`.
63    #[inline]
64    pub fn from_raw_non_null(raw: NonNull<c_void>) -> Self {
65        Self(raw.cast())
66    }
67
68    /// Convert to a raw pointer from a `Self`.
69    ///
70    /// This value is guaranteed to uniquely identify a thread, while it is
71    /// running. After a thread has exited, this value may be reused by new
72    /// threads.
73    #[inline]
74    pub fn to_raw(self) -> *mut c_void {
75        self.0.cast().as_ptr()
76    }
77
78    /// Convert to a raw non-null pointer from a `Self`.
79    ///
80    /// This value is guaranteed to uniquely identify a thread, while it is
81    /// running. After a thread has exited, this value may be reused by new
82    /// threads.
83    #[inline]
84    pub fn to_raw_non_null(self) -> NonNull<c_void> {
85        self.0.cast()
86    }
87}
88
89/// Data associated with a thread.
90///
91/// This is not `repr(C)` and not ABI-exposed.
92struct ThreadData {
93    thread_id: AtomicI32,
94    #[cfg(feature = "unstable-errno")]
95    errno_val: Cell<i32>,
96    detached: AtomicU8,
97    stack_addr: *mut c_void,
98    stack_size: usize,
99    guard_size: usize,
100    map_size: usize,
101    return_value: AtomicPtr<c_void>,
102
103    // Support a few dtors before using dynamic allocation.
104    #[cfg(feature = "thread-at-exit")]
105    dtors: smallvec::SmallVec<[Box<dyn FnOnce()>; 4]>,
106}
107
108// Values for `ThreadData::detached`.
109const INITIAL: u8 = 0;
110const DETACHED: u8 = 1;
111const ABANDONED: u8 = 2;
112
113impl ThreadData {
114    #[inline]
115    fn new(stack_addr: *mut c_void, stack_size: usize, guard_size: usize, map_size: usize) -> Self {
116        Self {
117            thread_id: AtomicI32::new(0),
118            #[cfg(feature = "unstable-errno")]
119            errno_val: Cell::new(0),
120            detached: AtomicU8::new(INITIAL),
121            stack_addr,
122            stack_size,
123            guard_size,
124            map_size,
125            return_value: AtomicPtr::new(null_mut()),
126            #[cfg(feature = "thread-at-exit")]
127            dtors: smallvec::SmallVec::new(),
128        }
129    }
130}
131
132/// Metadata describing a thread.
133#[repr(C)]
134struct Metadata {
135    /// Crate-internal fields. On platforms where TLS data goes after the
136    /// ABI-exposed fields, we store our fields before them.
137    #[cfg(any(target_arch = "aarch64", target_arch = "arm", target_arch = "riscv64"))]
138    thread: ThreadData,
139
140    /// ABI-exposed fields. This is allocated at a platform-specific offset
141    /// from the platform thread-pointer register value.
142    abi: Abi,
143
144    /// Crate-internal fields. On platforms where TLS data goes before the
145    /// ABI-exposed fields, we store our fields after them.
146    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
147    thread: ThreadData,
148}
149
150/// Fields which accessed by user code via well-known offsets from the platform
151/// thread-pointer register. Specifically, the thread-pointer register points
152/// to the `thread_pointee` field.
153#[repr(C)]
154#[cfg_attr(target_arch = "arm", repr(align(8)))]
155struct Abi {
156    /// The address the thread pointer points to.
157    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
158    thread_pointee: [u8; 0],
159
160    /// The ABI-exposed `canary` field.
161    #[cfg(any(target_arch = "aarch64", target_arch = "arm", target_arch = "riscv64"))]
162    canary: usize,
163
164    /// The address the thread pointer points to.
165    #[cfg(any(target_arch = "aarch64", target_arch = "arm"))]
166    thread_pointee: [u8; 0],
167
168    /// The ABI-exposed `dtv` field (though we don't yet implement dynamic
169    /// linking).
170    #[cfg(any(target_arch = "aarch64", target_arch = "arm", target_arch = "riscv64"))]
171    dtv: *const c_void,
172
173    /// The address the thread pointer points to.
174    #[cfg(target_arch = "riscv64")]
175    thread_pointee: [u8; 0],
176
177    /// Padding to put the TLS data which follows at its well-known offset.
178    #[cfg(any(target_arch = "aarch64", target_arch = "arm"))]
179    _pad: [usize; 1],
180
181    /// Padding to put the TLS data which follows at its well-known offset.
182    #[cfg(target_arch = "riscv64")]
183    _pad: [usize; 0],
184
185    /// x86 and x86-64 put a copy of the thread-pointer register at the memory
186    /// location pointed to by the thread-pointer register, because reading the
187    /// thread-pointer register directly is slow.
188    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
189    this: *mut c_void,
190
191    /// The ABI-exposed `dtv` field (though we don't yet implement dynamic
192    /// linking).
193    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
194    dtv: *const c_void,
195
196    /// Padding to put the `canary` field at its well-known offset.
197    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
198    _pad: [usize; 3],
199
200    /// The ABI-exposed `canary` field.
201    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
202    canary: usize,
203}
204
205/// Information obtained from the `DT_TLS` segment of the executable.
206///
207/// This variable must be initialized with [`initialize_startup_info`] before
208/// use.
209static mut STARTUP_TLS_INFO: StartupTlsInfo = StartupTlsInfo {
210    addr: null(),
211    mem_size: 0,
212    file_size: 0,
213    align: 0,
214};
215
216/// The type of [`STARTUP_TLS_INFO`].
217///
218/// This is not `repr(C)` and not ABI-exposed.
219struct StartupTlsInfo {
220    /// The base address of the TLS segment. Once initialize, this is
221    /// always non-null, even when the TLS data is absent, so that the
222    /// `addr` and `file_size` fields are suitable for passing to
223    /// `slice::from_raw_parts`.
224    addr: *const c_void,
225    /// The size of the memory region pointed to by `addr`.
226    mem_size: usize,
227    /// From this offset up to `mem_size` is zero-initialized.
228    file_size: usize,
229    /// The required alignment for the TLS segment.
230    align: usize,
231}
232
233/// The requested minimum size for stacks.
234static mut STARTUP_STACK_SIZE: usize = 0;
235
236/// Initialize `STARTUP_TLS_INFO` and `STARTUP_STACK_SIZE`.
237///
238/// Read values from the main executable segment headers (“phdrs”) relevant
239/// to initializing TLS provided to the program at startup, and store them in
240/// `STARTUP_TLS_INFO`.
241pub(super) fn initialize_startup_info() {
242    let mut tls_phdr = null();
243    let mut stack_size = 0;
244    let mut offset = 0;
245
246    let (first_phdr, phent, phnum) = exe_phdrs();
247    let mut current_phdr = first_phdr.cast::<Elf_Phdr>();
248
249    // The dynamic address of the dynamic section, which we can compare with
250    // the `PT_DYNAMIC` header's static address, if present.
251    //
252    // SAFETY: We're just taking the address of `_DYNAMIC` for arithmetic
253    // purposes, not dereferencing it.
254    let dynamic_addr: *const u8 = crate::arch::dynamic_table_addr().cast();
255
256    // SAFETY: We assume that the phdr array pointer and length the kernel
257    // provided to the process describe a valid phdr array, and that there are
258    // no other threads running so we can store to `STARTUP_TLS_INFO` and
259    // `STARTUP_STACK_SIZE` without synchronization.
260    unsafe {
261        let phdrs_end = current_phdr.byte_add(phnum * phent);
262        while current_phdr != phdrs_end {
263            let phdr = &*current_phdr;
264            current_phdr = current_phdr.byte_add(phent);
265
266            match phdr.p_type {
267                // Compute the offset from the static virtual addresses in the
268                // `p_vaddr` fields to the dynamic addresses. We don't always
269                // get a `PT_PHDR` or `PT_DYNAMIC` header, so use whichever one
270                // we get.
271                PT_PHDR => offset = first_phdr.addr().wrapping_sub(phdr.p_vaddr),
272                PT_DYNAMIC => offset = dynamic_addr.addr().wrapping_sub(phdr.p_vaddr),
273
274                PT_TLS => tls_phdr = phdr,
275                PT_GNU_STACK => stack_size = phdr.p_memsz,
276
277                _ => {}
278            }
279        }
280
281        STARTUP_TLS_INFO = if tls_phdr.is_null() {
282            // No `PT_TLS` section. Assume an empty TLS.
283            StartupTlsInfo {
284                addr: NonNull::dangling().as_ptr(),
285                mem_size: 0,
286                file_size: 0,
287                align: 1,
288            }
289        } else {
290            // We saw a `PT_TLS` section. Initialize the fields.
291            let tls_phdr = &*tls_phdr;
292            StartupTlsInfo {
293                addr: first_phdr.with_addr(offset.wrapping_add(tls_phdr.p_vaddr)),
294                mem_size: tls_phdr.p_memsz,
295                file_size: tls_phdr.p_filesz,
296                align: tls_phdr.p_align,
297            }
298        };
299
300        STARTUP_STACK_SIZE = stack_size;
301    }
302}
303
304extern "C" {
305    /// Declare the `_DYNAMIC` symbol so that we can compare its address with
306    /// the static address in the `PT_DYNAMIC` header to learn our offset. Use
307    /// a weak symbol because `_DYNAMIC` is not always present.
308    static _DYNAMIC: c_void;
309}
310// Rust has `extern_weak` but it isn't stable, so use a `global_asm`.
311core::arch::global_asm!(".weak _DYNAMIC");
312
313/// Initialize the main thread.
314///
315/// This function is similar to `create_thread` except that the OS thread is
316/// already created, and already has a stack (which we need to locate), and is
317/// already running. We still need to create the thread [`Metadata`], copy in
318/// the TLS initializers, and point the thread pointer to it so that it follows
319/// the thread ABI that all the other threads follow.
320///
321/// # Safety
322///
323/// `initialize_startup_info` must be called before this. And `mem` must be the
324/// initial value of the stack pointer in a new process, pointing to the
325/// initial contents of the stack.
326pub(super) unsafe fn initialize_main(mem: *mut c_void) {
327    // Determine the top of the stack. Linux puts the `AT_EXECFN` string at
328    // the top, so find the end of that, and then round up to the page size.
329    // See <https://lwn.net/Articles/631631/> for details.
330    let execfn = linux_execfn().to_bytes_with_nul();
331    let stack_base = execfn.as_ptr().add(execfn.len());
332    let stack_base = stack_base
333        .map_addr(|ptr| round_up(ptr, page_size()))
334        .cast_mut();
335
336    // We're running before any user code, so the startup soft stack limit is
337    // the effective stack size. Linux sets up inaccessible memory at the end
338    // of the stack.
339    let stack_map_size = getrlimit(Resource::Stack).current.unwrap() as usize;
340    let stack_least = stack_base.sub(stack_map_size);
341    let stack_size = stack_least.offset_from(mem.cast::<u8>()) as usize;
342    let guard_size = page_size();
343
344    // Initialize the canary value from the OS-provided random bytes.
345    let random_ptr = rustix::runtime::random().cast::<usize>();
346    let canary = random_ptr.read_unaligned();
347    __stack_chk_guard = canary;
348
349    let mut alloc_size = 0;
350    let (tls_data_bottom, header) = calculate_tls_size(&mut alloc_size);
351
352    // Allocate the thread data. Use `mmap_anonymous` rather than `alloc` here
353    // as the allocator may depend on thread-local data, which is what we're
354    // initializing here.
355    let new = mmap_anonymous(
356        null_mut(),
357        alloc_size,
358        ProtFlags::READ | ProtFlags::WRITE,
359        MapFlags::PRIVATE,
360    )
361    .unwrap()
362    .cast::<u8>();
363
364    let metadata_align = max(unsafe { STARTUP_TLS_INFO.align }, align_of::<Metadata>());
365    debug_assert_eq!(new.addr() % metadata_align, 0);
366
367    let tls_data = new.add(tls_data_bottom);
368    let metadata: *mut Metadata = new.add(header).cast();
369
370    let (newtls, thread_id_ptr) = initialize_tls(
371        tls_data,
372        metadata,
373        canary,
374        stack_least,
375        stack_size,
376        guard_size,
377        0,
378    );
379    let tid = rustix::runtime::set_tid_address(thread_id_ptr.cast());
380    *thread_id_ptr = tid.as_raw_nonzero().get();
381
382    // Point the platform thread-pointer register at the new thread metadata.
383    set_thread_pointer(newtls);
384}
385
386fn calculate_tls_size(map_size: &mut usize) -> (usize, usize) {
387    // SAFETY: `STARTUP_TLS_INFO` is initialized at program startup before
388    // we come here creating new threads.
389    let (startup_tls_align, startup_tls_mem_size) =
390        unsafe { (STARTUP_TLS_INFO.align, STARTUP_TLS_INFO.mem_size) };
391
392    // Compute relevant alignments.
393    let tls_data_align = startup_tls_align;
394    let page_align = page_size();
395    let header_align = align_of::<Metadata>();
396    let metadata_align = max(tls_data_align, header_align);
397    debug_assert!(metadata_align <= page_align);
398
399    *map_size = round_up(*map_size, metadata_align);
400
401    // Variant II: TLS data goes below the TCB.
402    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
403    let tls_data_bottom = *map_size;
404
405    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
406    {
407        *map_size += round_up(startup_tls_mem_size, tls_data_align);
408    }
409
410    let header = *map_size;
411
412    *map_size += size_of::<Metadata>();
413
414    // Variant I: TLS data goes above the TCB.
415    #[cfg(any(target_arch = "aarch64", target_arch = "arm", target_arch = "riscv64"))]
416    {
417        *map_size = round_up(*map_size, tls_data_align);
418    }
419
420    #[cfg(any(target_arch = "aarch64", target_arch = "arm", target_arch = "riscv64"))]
421    let tls_data_bottom = *map_size;
422
423    #[cfg(any(target_arch = "aarch64", target_arch = "arm", target_arch = "riscv64"))]
424    {
425        *map_size += round_up(startup_tls_mem_size, tls_data_align);
426    }
427    (tls_data_bottom, header)
428}
429
430unsafe fn initialize_tls(
431    tls_data: *mut u8,
432    metadata: *mut Metadata,
433    canary: usize,
434    stack_least: *mut u8,
435    stack_size: usize,
436    guard_size: usize,
437    map_size: usize,
438) -> (*mut c_void, *mut i32) {
439    let newtls: *mut c_void = (*metadata).abi.thread_pointee.as_mut_ptr().cast();
440
441    // Initialize the thread metadata.
442    metadata.write(Metadata {
443        abi: Abi {
444            canary,
445            dtv: null(),
446            #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
447            this: newtls,
448            _pad: Default::default(),
449            thread_pointee: [],
450        },
451        thread: ThreadData::new(stack_least.cast(), stack_size, guard_size, map_size),
452    });
453
454    // Initialize the TLS data with explicit initializer data.
455    slice::from_raw_parts_mut(tls_data, STARTUP_TLS_INFO.file_size).copy_from_slice(
456        slice::from_raw_parts(
457            STARTUP_TLS_INFO.addr.cast::<u8>(),
458            STARTUP_TLS_INFO.file_size,
459        ),
460    );
461
462    // Initialize the TLS data beyond `file_size` which is zero-filled.
463    slice::from_raw_parts_mut(
464        tls_data.add(STARTUP_TLS_INFO.file_size),
465        STARTUP_TLS_INFO.mem_size - STARTUP_TLS_INFO.file_size,
466    )
467    .fill(0);
468
469    let thread_id_ptr = (*metadata).thread.thread_id.as_ptr().cast::<i32>();
470
471    (newtls, thread_id_ptr)
472}
473
474/// Creates a new thread.
475///
476/// `fn_(args)` is called on the new thread, except that the argument values
477/// copied to memory that can be exclusively referenced by the thread.
478///
479/// # Safety
480///
481/// The values of `args` must be valid to send to the new thread, `fn_(args)`
482/// on the new thread must have defined behavior, and the return value must be
483/// valid to send to other threads.
484pub unsafe fn create(
485    fn_: unsafe fn(&mut [Option<NonNull<c_void>>]) -> Option<NonNull<c_void>>,
486    args: &[Option<NonNull<c_void>>],
487    stack_size: usize,
488    guard_size: usize,
489) -> io::Result<Thread> {
490    // Compute relevant alignments.
491    let page_align = page_size();
492    let stack_align = 16;
493
494    // Compute the `mmap` size.
495    let mut map_size = 0;
496
497    map_size += round_up(guard_size, page_align);
498
499    let stack_bottom = map_size;
500
501    map_size += round_up(stack_size, stack_align);
502
503    let stack_top = map_size;
504
505    let (tls_data_bottom, header) = calculate_tls_size(&mut map_size);
506
507    // Now we'll `mmap` the memory, initialize it, and create the OS thread.
508    unsafe {
509        // Allocate address space for the thread, including guard pages.
510        let map = mmap_anonymous(
511            null_mut(),
512            map_size,
513            ProtFlags::empty(),
514            MapFlags::PRIVATE | MapFlags::STACK,
515        )?
516        .cast::<u8>();
517
518        // Make the thread metadata and stack readable and writable, leaving
519        // the guard region inaccessible.
520        mprotect(
521            map.add(stack_bottom).cast(),
522            map_size - stack_bottom,
523            MprotectFlags::READ | MprotectFlags::WRITE,
524        )?;
525
526        // Compute specific pointers into the thread's memory.
527        let stack = map.add(stack_top);
528        let stack_least = map.add(stack_bottom);
529
530        let tls_data = map.add(tls_data_bottom);
531        let metadata: *mut Metadata = map.add(header).cast();
532
533        // Copy the current thread's canary to the new thread.
534        let canary = (*current_metadata()).abi.canary;
535
536        let (newtls, thread_id_ptr) = initialize_tls(
537            tls_data,
538            metadata,
539            canary,
540            stack_least,
541            stack_size,
542            guard_size,
543            map_size,
544        );
545
546        // Allocate space for the thread arguments on the child's stack.
547        let stack = stack.cast::<Option<NonNull<c_void>>>().sub(args.len());
548
549        // Align the stack pointer.
550        let stack = stack.with_addr(stack.addr() & STACK_ALIGNMENT.wrapping_neg());
551
552        // Store the thread arguments on the child's stack.
553        copy_nonoverlapping(args.as_ptr(), stack, args.len());
554
555        // The TLS region includes additional data beyond `file_size` which is
556        // expected to be zero-initialized, but we don't need to do anything
557        // here since we allocated the memory with `mmap_anonymous` so it's
558        // already zeroed.
559
560        // Create the OS thread. In Linux, this is a process that shares much
561        // of its state with the current process. We also pass additional
562        // flags:
563        //  - `SETTLS` to set the platform thread register.
564        //  - `CHILD_CLEARTID` to arrange for a futex wait for threads waiting
565        //    in `join_thread`.
566        //  - `PARENT_SETTID` to store the child's tid at the `parent_tid`
567        //    location.
568        //  - `CHILD_SETTID` to store the child's tid at the `child_tid`
569        //    location.
570        // We receive the tid in the same memory for the parent and the child,
571        // but we set both `PARENT_SETTID` and `CHILD_SETTID` to ensure that
572        // the store completes before either the parent or child reads the tid.
573        let flags = CloneFlags::VM
574            | CloneFlags::FS
575            | CloneFlags::FILES
576            | CloneFlags::SIGHAND
577            | CloneFlags::THREAD
578            | CloneFlags::SYSVSEM
579            | CloneFlags::SETTLS
580            | CloneFlags::CHILD_CLEARTID
581            | CloneFlags::CHILD_SETTID
582            | CloneFlags::PARENT_SETTID;
583        let clone_res = clone(
584            flags.bits(),
585            stack.cast(),
586            thread_id_ptr,
587            thread_id_ptr,
588            newtls,
589            core::mem::transmute(fn_),
590            args.len(),
591        );
592        if clone_res >= 0 {
593            #[cfg(feature = "log")]
594            {
595                let id = current_id();
596                log::trace!(
597                    "Thread[{:?}] launched thread Thread[{:?}] with stack_size={} and guard_size={}",
598                    id.as_raw_nonzero(),
599                    clone_res,
600                    stack_size,
601                    guard_size
602                );
603                for (i, arg) in args.iter().enumerate() {
604                    log::trace!("Thread[{:?}] args[{}]: {:?}", id.as_raw_nonzero(), i, arg);
605                }
606            }
607
608            Ok(Thread(NonNull::from(&mut (*metadata).thread)))
609        } else {
610            Err(io::Errno::from_raw_os_error(-clone_res as i32))
611        }
612    }
613}
614
615/// The entrypoint where Rust code is first executed on a new thread.
616///
617/// This transmutes `fn_` to
618/// `unsafe fn(&mut [*mut c_void]) -> Option<NonNull<c_void>>` and then calls
619/// it on the new thread. When `fn_` returns, the thread exits.
620///
621/// # Safety
622///
623/// `fn_` must be valid to transmute the function as described above and call
624/// it in the new thread.
625///
626/// After calling `fn_`, this terminates the thread.
627pub(super) unsafe extern "C" fn entry(
628    fn_: extern "C" fn(),
629    args: *mut *mut c_void,
630    num_args: usize,
631) -> ! {
632    #[cfg(feature = "log")]
633    log::trace!("Thread[{:?}] launched", current_id().as_raw_nonzero());
634
635    // Do some basic precondition checks, to ensure that our assembly code did
636    // what we expect it to do. These are debug-only for now, to keep the
637    // release-mode startup code simple to disassemble and inspect, while we're
638    // getting started.
639    #[cfg(debug_assertions)]
640    {
641        // If we have nightly, we can do additional checks.
642        #[cfg(feature = "nightly")]
643        {
644            extern "C" {
645                #[link_name = "llvm.frameaddress"]
646                fn builtin_frame_address(level: i32) -> *const u8;
647                #[link_name = "llvm.returnaddress"]
648                fn builtin_return_address(level: i32) -> *const u8;
649                #[cfg(target_arch = "aarch64")]
650                #[link_name = "llvm.sponentry"]
651                fn builtin_sponentry() -> *const u8;
652            }
653
654            // Check that the incoming stack pointer is where we expect it to be.
655            debug_assert_eq!(builtin_return_address(0), null());
656            debug_assert_ne!(builtin_frame_address(0), null());
657            #[cfg(not(any(target_arch = "x86", target_arch = "arm")))]
658            debug_assert_eq!(builtin_frame_address(0).addr() & 0xf, 0);
659            #[cfg(target_arch = "arm")]
660            debug_assert_eq!(builtin_frame_address(0).addr() & 0x3, 0);
661            #[cfg(target_arch = "x86")]
662            debug_assert_eq!(builtin_frame_address(0).addr() & 0xf, 8);
663            debug_assert_eq!(builtin_frame_address(1), null());
664            #[cfg(target_arch = "aarch64")]
665            debug_assert_ne!(builtin_sponentry(), null());
666            #[cfg(target_arch = "aarch64")]
667            debug_assert_eq!(builtin_sponentry().addr() & 0xf, 0);
668        }
669
670        // Check that `clone` stored our thread id as we expected.
671        debug_assert_eq!(current_id(), gettid());
672    }
673
674    // Call the user thread function. In `std`, this is `thread_start`. Ignore
675    // the return value for now, as `std` doesn't need it.
676    let fn_: unsafe fn(&mut [*mut c_void]) -> Option<NonNull<c_void>> = core::mem::transmute(fn_);
677    let args = slice::from_raw_parts_mut(args, num_args);
678    let return_value = fn_(args);
679
680    exit(return_value)
681}
682
683/// Call the destructors registered with [`at_exit`] and exit the thread.
684unsafe fn exit(return_value: Option<NonNull<c_void>>) -> ! {
685    let current = current();
686
687    #[cfg(feature = "log")]
688    if log::log_enabled!(log::Level::Trace) {
689        log::trace!(
690            "Thread[{:?}] returned {:?}",
691            current.0.as_ref().thread_id.load(SeqCst),
692            return_value
693        );
694    }
695
696    // Call functions registered with `at_exit`.
697    #[cfg(feature = "thread-at-exit")]
698    call_dtors(current);
699
700    // Read the thread's state, and set it to `ABANDONED` if it was `INITIAL`,
701    // which tells `join_thread` to free the memory. Otherwise, it's in the
702    // `DETACHED` state, and we free the memory immediately.
703    let state = current
704        .0
705        .as_ref()
706        .detached
707        .compare_exchange(INITIAL, ABANDONED, SeqCst, SeqCst);
708    if let Err(e) = state {
709        // The thread was detached. Prepare to free the memory. First read out
710        // all the fields that we'll need before freeing it.
711        #[cfg(feature = "log")]
712        let current_thread_id = current.0.as_ref().thread_id.load(SeqCst);
713        let current_map_size = current.0.as_ref().map_size;
714        let current_stack_addr = current.0.as_ref().stack_addr;
715        let current_guard_size = current.0.as_ref().guard_size;
716
717        #[cfg(feature = "log")]
718        log::trace!("Thread[{:?}] exiting as detached", current_thread_id);
719        debug_assert_eq!(e, DETACHED);
720
721        // Deallocate the `ThreadData`.
722        drop_in_place(current.0.as_ptr());
723
724        // Free the thread's `mmap` region, if we allocated it.
725        let map_size = current_map_size;
726        if map_size != 0 {
727            // Null out the tid address so that the kernel doesn't write to
728            // memory that we've freed trying to clear our tid when we exit.
729            let _ = set_tid_address(null_mut());
730
731            // In preparation for freeing the stack, block all signals, so that
732            // no signals for the process are delivered to this thread.
733            #[cfg(feature = "signal")]
734            {
735                let all = KernelSigSet::all();
736                kernel_sigprocmask(How::BLOCK, Some(&all)).ok();
737            }
738
739            // `munmap` the memory, which also frees the stack we're currently
740            // on, and do an `exit` carefully without touching the stack.
741            let map = current_stack_addr.byte_sub(current_guard_size);
742            munmap_and_exit_thread(map, map_size);
743        }
744    } else {
745        // The thread was not detached, so its memory will be freed when it's
746        // joined.
747        #[cfg(feature = "log")]
748        if log::log_enabled!(log::Level::Trace) {
749            log::trace!(
750                "Thread[{:?}] exiting as joinable",
751                current.0.as_ref().thread_id.load(SeqCst)
752            );
753        }
754
755        // Convert `return_value` into a `*mut c_void` so that we can store it
756        // in an `AtomicPtr`.
757        let return_value = match return_value {
758            Some(return_value) => return_value.as_ptr(),
759            None => null_mut(),
760        };
761
762        // Store the return value in the thread for `join_thread` to read.
763        current.0.as_ref().return_value.store(return_value, SeqCst);
764    }
765
766    // Terminate the thread.
767    rustix::runtime::exit_thread(0)
768}
769
770/// Call the destructors registered with [`at_exit`].
771#[cfg(feature = "thread-at-exit")]
772pub(crate) fn call_dtors(current: Thread) {
773    let mut current = current;
774
775    // Run the `dtors`, in reverse order of registration. Note that destructors
776    // may register new destructors.
777    //
778    // SAFETY: `current` points to thread-local data which is valid as long as
779    // the thread is alive.
780    while let Some(func) = unsafe { current.0.as_mut().dtors.pop() } {
781        #[cfg(feature = "log")]
782        if log::log_enabled!(log::Level::Trace) {
783            log::trace!(
784                "Thread[{:?}] calling `thread::at_exit`-registered function",
785                unsafe { current.0.as_ref().thread_id.load(SeqCst) },
786            );
787        }
788
789        func();
790    }
791}
792
793/// Marks a thread as “detached”.
794///
795/// Detached threads free their own resources automatically when they
796/// exit, rather than when they are joined.
797///
798/// # Safety
799///
800/// `thread` must point to a valid thread record that has not yet been detached
801/// and will not be joined.
802#[inline]
803pub unsafe fn detach(thread: Thread) {
804    #[cfg(feature = "log")]
805    let thread_id = thread.0.as_ref().thread_id.load(SeqCst);
806
807    #[cfg(feature = "log")]
808    if log::log_enabled!(log::Level::Trace) {
809        log::trace!(
810            "Thread[{:?}] marked as detached by Thread[{:?}]",
811            thread_id,
812            current_id().as_raw_nonzero()
813        );
814    }
815
816    if thread.0.as_ref().detached.swap(DETACHED, SeqCst) == ABANDONED {
817        wait_for_exit(thread);
818
819        #[cfg(feature = "log")]
820        log_thread_to_be_freed(thread_id);
821
822        free_memory(thread);
823    }
824}
825
826/// Waits for a thread to finish.
827///
828/// The return value is the value returned from the call to the `fn_` passed to
829/// `create_thread`.
830///
831/// # Safety
832///
833/// `thread` must point to a valid thread record that has not already been
834/// detached or joined.
835pub unsafe fn join(thread: Thread) -> Option<NonNull<c_void>> {
836    let thread_data = thread.0.as_ref();
837
838    #[cfg(feature = "log")]
839    let thread_id = thread_data.thread_id.load(SeqCst);
840
841    #[cfg(feature = "log")]
842    if log::log_enabled!(log::Level::Trace) {
843        log::trace!(
844            "Thread[{:?}] is being joined by Thread[{:?}]",
845            thread_id,
846            current_id().as_raw_nonzero()
847        );
848    }
849
850    wait_for_exit(thread);
851    debug_assert_eq!(thread_data.detached.load(SeqCst), ABANDONED);
852
853    #[cfg(feature = "log")]
854    log_thread_to_be_freed(thread_id);
855
856    // Load the return value stored by `exit_thread`, before we free the
857    // thread's memory.
858    let return_value = thread_data.return_value.load(SeqCst);
859
860    // `munmap` the stack and metadata for the thread.
861    free_memory(thread);
862
863    // Convert the `*mut c_void` we stored in the `AtomicPtr` back into an
864    // `Option<NonNull<c_void>>`.
865    NonNull::new(return_value)
866}
867
868/// Wait until `thread` has exited.
869///
870/// `thread` must point to a valid thread record that has not already been
871/// detached or joined.
872unsafe fn wait_for_exit(thread: Thread) {
873    use rustix::thread::futex;
874
875    // Check whether the thread has exited already; we set the
876    // `CloneFlags::CHILD_CLEARTID` flag on the clone syscall, so we can test
877    // for `NONE` here.
878    let thread_data = thread.0.as_ref();
879    let thread_id = &thread_data.thread_id;
880    while let Some(id_value) = ThreadId::from_raw(thread_id.load(SeqCst)) {
881        // This doesn't use any shared memory, but we can't use
882        // `FutexFlags::PRIVATE` because the wake comes from Linux
883        // as arranged by the `CloneFlags::CHILD_CLEARTID` flag,
884        // and Linux doesn't use the private flag for the wake.
885        match futex::wait(
886            AtomicU32::from_ptr(thread_id.as_ptr().cast()),
887            futex::Flags::empty(),
888            id_value.as_raw_nonzero().get() as u32,
889            None,
890        ) {
891            Ok(_) => break,
892            Err(io::Errno::INTR) => continue,
893            Err(e) => debug_assert_eq!(e, io::Errno::AGAIN),
894        }
895    }
896}
897
898#[cfg(feature = "log")]
899fn log_thread_to_be_freed(thread_id: i32) {
900    if log::log_enabled!(log::Level::Trace) {
901        log::trace!("Thread[{:?}] memory being freed", thread_id);
902    }
903}
904
905/// Free any dynamically-allocated memory for `thread`.
906///
907/// # Safety
908///
909/// `thread` must point to a valid thread record for a thread that has
910/// already exited.
911unsafe fn free_memory(thread: Thread) {
912    use rustix::mm::munmap;
913
914    // The thread was detached. Prepare to free the memory. First read out
915    // all the fields that we'll need before freeing it.
916    let map_size = thread.0.as_ref().map_size;
917    let stack_addr = thread.0.as_ref().stack_addr;
918    let guard_size = thread.0.as_ref().guard_size;
919
920    // Deallocate the `ThreadData`.
921    drop_in_place(thread.0.as_ptr());
922
923    // Free the thread's `mmap` region, if we allocated it.
924    if map_size != 0 {
925        let map = stack_addr.byte_sub(guard_size);
926        munmap(map, map_size).unwrap();
927    }
928}
929
930/// Registers a function to call when the current thread exits.
931#[cfg(feature = "thread-at-exit")]
932pub fn at_exit(func: Box<dyn FnOnce()>) {
933    // SAFETY: `current()` points to thread-local data which is valid as long
934    // as the thread is alive.
935    unsafe {
936        current().0.as_mut().dtors.push(func);
937    }
938}
939
940#[inline]
941#[must_use]
942fn current_metadata() -> *mut Metadata {
943    thread_pointer()
944        .wrapping_byte_sub(offset_of!(Metadata, abi) + offset_of!(Abi, thread_pointee))
945        .cast()
946}
947
948/// Return a raw pointer to the data associated with the current thread.
949#[inline]
950#[must_use]
951pub fn current() -> Thread {
952    // SAFETY: This is only called after we've initialized all the thread
953    // state.
954    unsafe { Thread(NonNull::from(&mut (*current_metadata()).thread)) }
955}
956
957/// Return the current thread id.
958///
959/// This is the same as [`rustix::thread::gettid`], but loads the value from a
960/// field in the runtime rather than making a system call.
961#[inline]
962#[must_use]
963pub fn current_id() -> ThreadId {
964    // Don't use the `id` function here because it returns an `Option` to
965    // handle the case where the thread has exited. We're querying the current
966    // thread which we know is still running because we're on it.
967    //
968    // SAFETY: All threads have been initialized, including the main thread
969    // with `initialize_main`, so `current()` returns a valid pointer.
970    let tid = unsafe { ThreadId::from_raw_unchecked(current().0.as_ref().thread_id.load(SeqCst)) };
971    debug_assert_eq!(tid, gettid(), "`current_id` disagrees with `gettid`");
972    tid
973}
974
975/// Set the current thread id, after a `fork`.
976///
977/// The only valid use for this is in the implementation of libc-like `fork`
978/// wrappers such as the one in c-scape. `posix_spawn`-like uses of `fork`
979/// don't need to do this because they shouldn't do anything that cares about
980/// the thread id before doing their `execve`.
981///
982/// # Safety
983///
984/// This must only be called immediately after a `fork` before any other
985/// threads are created. `tid` must be the same value as what [`gettid`] would
986/// return.
987#[doc(hidden)]
988#[inline]
989pub unsafe fn set_current_id_after_a_fork(tid: ThreadId) {
990    let current = current();
991    debug_assert_ne!(
992        tid.as_raw_nonzero().get(),
993        current.0.as_ref().thread_id.load(SeqCst),
994        "current thread ID already matches new thread ID"
995    );
996    debug_assert_eq!(tid, gettid(), "new thread ID disagrees with `gettid`");
997    current
998        .0
999        .as_ref()
1000        .thread_id
1001        .store(tid.as_raw_nonzero().get(), SeqCst);
1002}
1003
1004/// Return the address of the thread-local `errno` state.
1005///
1006/// This is equivalent to `__errno_location()` in glibc and musl.
1007#[cfg(feature = "unstable-errno")]
1008#[inline]
1009pub fn errno_location() -> *mut i32 {
1010    unsafe { core::ptr::addr_of_mut!((*current_metadata()).thread.errno_val).cast::<i32>() }
1011}
1012
1013/// Return the TLS address for the given `module` and `offset` for the current
1014/// thread.
1015#[inline]
1016#[must_use]
1017pub fn current_tls_addr(module: usize, offset: usize) -> *mut c_void {
1018    // Offset 0 is the generation field, and we don't support dynamic linking,
1019    // so we should only ever see 1 here.
1020    assert_eq!(module, 1);
1021
1022    // Platforms where TLS data goes after the ABI-exposed fields.
1023    #[cfg(any(target_arch = "aarch64", target_arch = "arm", target_arch = "riscv64"))]
1024    {
1025        thread_pointer()
1026            .wrapping_byte_add(size_of::<Abi>() - offset_of!(Abi, thread_pointee))
1027            .wrapping_byte_add(TLS_OFFSET)
1028            .wrapping_byte_add(offset)
1029    }
1030
1031    // Platforms where TLS data goes before the ABI-exposed fields.
1032    //
1033    // SAFETY: `STARTUP_TLS_INFO` has already been initialized by
1034    // [`initialize_startup_info`].
1035    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1036    unsafe {
1037        thread_pointer()
1038            .wrapping_byte_sub(STARTUP_TLS_INFO.mem_size)
1039            .wrapping_byte_add(TLS_OFFSET)
1040            .wrapping_byte_add(offset)
1041    }
1042}
1043
1044/// Return the id of a thread, or `None` if the thread has exited.
1045///
1046/// # Safety
1047///
1048/// `thread` must point to a valid thread record.
1049#[inline]
1050#[cfg_attr(docsrs, doc(cfg(feature = "take-charge")))]
1051pub unsafe fn id(thread: Thread) -> Option<ThreadId> {
1052    let raw = thread.0.as_ref().thread_id.load(SeqCst);
1053    ThreadId::from_raw(raw)
1054}
1055
1056/// Return the current thread's stack address (lowest address), size, and guard
1057/// size.
1058///
1059/// # Safety
1060///
1061/// `thread` must point to a valid thread record.
1062#[inline]
1063#[must_use]
1064pub unsafe fn stack(thread: Thread) -> (*mut c_void, usize, usize) {
1065    let data = thread.0.as_ref();
1066    (data.stack_addr, data.stack_size, data.guard_size)
1067}
1068
1069/// Return the default stack size for new threads.
1070#[inline]
1071#[must_use]
1072pub fn default_stack_size() -> usize {
1073    // This is just something simple that works for now.
1074    //
1075    // SAFETY: `STARTUP_STACK_SIZE` has already been initialized by
1076    // [`initialize_startup_info`].
1077    unsafe { max(0x20000, STARTUP_STACK_SIZE) }
1078}
1079
1080/// Return the default guard size for new threads.
1081#[inline]
1082#[must_use]
1083pub fn default_guard_size() -> usize {
1084    // This is just something simple that works for now.
1085    page_size() * 4
1086}
1087
1088/// Yield the current thread, encouraging other threads to run.
1089#[inline]
1090pub fn yield_current() {
1091    rustix::thread::sched_yield()
1092}
1093
1094/// The ARM ABI expects this to be defined.
1095#[cfg(target_arch = "arm")]
1096#[no_mangle]
1097extern "C" fn __aeabi_read_tp() -> *mut c_void {
1098    thread_pointer()
1099}
1100
1101/// Some targets use this global variable instead of the TLS `canary` field.
1102#[no_mangle]
1103static mut __stack_chk_guard: usize = 0;
1104
1105const fn round_up(addr: usize, boundary: usize) -> usize {
1106    (addr + (boundary - 1)) & boundary.wrapping_neg()
1107}
1108
1109// We define `clone` and `CloneFlags` here in `origin` instead of `rustix`
1110// because `clone` needs custom assembly code that knows about what we're
1111// using it for.
1112bitflags::bitflags! {
1113    struct CloneFlags: u32 {
1114        const NEWTIME        = linux_raw_sys::general::CLONE_NEWTIME; // since Linux 5.6
1115        const VM             = linux_raw_sys::general::CLONE_VM;
1116        const FS             = linux_raw_sys::general::CLONE_FS;
1117        const FILES          = linux_raw_sys::general::CLONE_FILES;
1118        const SIGHAND        = linux_raw_sys::general::CLONE_SIGHAND;
1119        const PIDFD          = linux_raw_sys::general::CLONE_PIDFD; // since Linux 5.2
1120        const PTRACE         = linux_raw_sys::general::CLONE_PTRACE;
1121        const VFORK          = linux_raw_sys::general::CLONE_VFORK;
1122        const PARENT         = linux_raw_sys::general::CLONE_PARENT;
1123        const THREAD         = linux_raw_sys::general::CLONE_THREAD;
1124        const NEWNS          = linux_raw_sys::general::CLONE_NEWNS;
1125        const SYSVSEM        = linux_raw_sys::general::CLONE_SYSVSEM;
1126        const SETTLS         = linux_raw_sys::general::CLONE_SETTLS;
1127        const PARENT_SETTID  = linux_raw_sys::general::CLONE_PARENT_SETTID;
1128        const CHILD_CLEARTID = linux_raw_sys::general::CLONE_CHILD_CLEARTID;
1129        const DETACHED       = linux_raw_sys::general::CLONE_DETACHED;
1130        const UNTRACED       = linux_raw_sys::general::CLONE_UNTRACED;
1131        const CHILD_SETTID   = linux_raw_sys::general::CLONE_CHILD_SETTID;
1132        const NEWCGROUP      = linux_raw_sys::general::CLONE_NEWCGROUP; // since Linux 4.6
1133        const NEWUTS         = linux_raw_sys::general::CLONE_NEWUTS;
1134        const NEWIPC         = linux_raw_sys::general::CLONE_NEWIPC;
1135        const NEWUSER        = linux_raw_sys::general::CLONE_NEWUSER;
1136        const NEWPID         = linux_raw_sys::general::CLONE_NEWPID;
1137        const NEWNET         = linux_raw_sys::general::CLONE_NEWNET;
1138        const IO             = linux_raw_sys::general::CLONE_IO;
1139    }
1140}