maniac_runtime/runtime/
preemption.rs

1//! Preemptive scheduling support for worker threads.
2//!
3//! This module provides platform-specific mechanisms to **interrupt** worker threads
4//! without terminating them, allowing generator context switching.
5//!
6//! # Architecture
7//!
8//! ## The "Trampoline" Approach
9//!
10//! To safely preempt a thread (on both Unix and Windows), we cannot simply run code
11//! inside a signal handler or a suspended thread context, because:
12//! 1. **Unix**: Signal handlers run with signals blocked and are extremely restricted (async-signal-safety).
13//!    Accessing Thread-Local Storage (TLS) or acquiring locks can deadlock or panic.
14//! 2. **Windows**: `SuspendThread` can stop a thread holding a lock (e.g., heap lock), leading to deadlocks
15//!    if we try to allocate or use locks. Register corruption is also a major risk.
16//!
17//! **Solution**: We "inject" a function call into the target thread's stream of execution.
18//!
19//! 1. **Interrupt**: We stop the thread (Signal on Unix, SuspendThread on Windows).
20//! 2. **Inject**: We modify the thread's stack and instruction pointer (RIP/PC) to simulate a call
21//!    to a `trampoline` function, saving the original RIP/PC on the stack.
22//! 3. **Resume**: The thread resumes execution at the `trampoline` (outside signal/suspend context).
23//! 4. **Trampoline**:
24//!    - Saves *all* volatile registers (preserving application state).
25//!    - Calls `rust_preemption_helper()` (safe Rust code, can touch TLS/Locks).
26//!    - Restores registers.
27//!    - Returns to the original code (via the saved RIP/PC).
28//!
29//! This ensures full safety: the actual preemption logic (checking flags, yielding) runs
30//! as normal thread code, not in an interrupt context.
31
32use crate::generator;
33use std::cell::Cell;
34use std::ptr;
35use std::sync::atomic::{AtomicBool, Ordering};
36
37/// Reasons the worker generator yields control back to the scheduler/trampoline.
38#[derive(Debug, Clone, Copy, PartialEq, Eq)]
39#[repr(u8)]
40pub enum GeneratorYieldReason {
41    /// Cooperative yield point inside the worker loop (scheduler driven).
42    Cooperative = 0,
43    /// Non-cooperative preemption triggered via signal/APC.
44    Preempted = 1,
45}
46
47impl GeneratorYieldReason {
48    #[inline]
49    pub const fn as_usize(self) -> usize {
50        self as usize
51    }
52
53    #[inline]
54    pub const fn from_usize(value: usize) -> Option<Self> {
55        match value {
56            0 => Some(Self::Cooperative),
57            1 => Some(Self::Preempted),
58            _ => None,
59        }
60    }
61}
62
63// Thread-local storage for the current worker's preemption flag pointer
64// This is accessed by the trampoline helper, which runs in normal thread context.
65thread_local! {
66    static CURRENT_WORKER_PREEMPTION_FLAG: Cell<*const AtomicBool> = const { Cell::new(ptr::null()) };
67    static CURRENT_GENERATOR_SCOPE: Cell<*mut ()> = const { Cell::new(ptr::null_mut()) };
68}
69
70// ============================================================================
71// Public API
72// ============================================================================
73
74/// Initialize preemption for the current worker thread
75#[inline]
76pub(crate) fn init_worker_thread_preemption(flag: &AtomicBool) {
77    CURRENT_WORKER_PREEMPTION_FLAG.with(|cell| {
78        cell.set(flag as *const AtomicBool);
79    });
80}
81
82/// Set the current generator scope
83#[inline]
84pub(crate) fn set_generator_scope(scope_ptr: *mut ()) {
85    CURRENT_GENERATOR_SCOPE.with(|cell| cell.set(scope_ptr));
86}
87
88/// Clear the current generator scope
89#[inline]
90pub(crate) fn clear_generator_scope() {
91    CURRENT_GENERATOR_SCOPE.with(|cell| cell.set(ptr::null_mut()));
92}
93
94/// Check and clear the preemption flag for the current worker
95#[inline]
96pub(crate) fn check_and_clear_preemption(flag: &AtomicBool) -> bool {
97    flag.swap(false, Ordering::AcqRel)
98}
99
100#[derive(Debug)]
101pub enum PreemptionError {
102    SignalSetupFailed,
103    ThreadSetupFailed,
104    InterruptFailed,
105    UnsupportedPlatform,
106}
107
108impl std::fmt::Display for PreemptionError {
109    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
110        write!(f, "{:?}", self)
111    }
112}
113impl std::error::Error for PreemptionError {}
114
115// ============================================================================
116// Internal Helper (Called by Trampoline)
117// ============================================================================
118
119/// This function is called by the assembly trampoline.
120/// It runs on the worker thread's stack in a normal execution context.
121/// It is safe to access TLS and yield here.
122#[unsafe(no_mangle)]
123pub extern "C" fn rust_preemption_helper() {
124    // 1. Check and clear flag (for cooperative correctness / hygiene)
125    // We don't require it to be true to yield, because we might be here via Unix signal
126    // which couldn't set the flag.
127    CURRENT_WORKER_PREEMPTION_FLAG.with(|cell| {
128        let ptr = cell.get();
129        if !ptr.is_null() {
130            unsafe {
131                (*ptr).store(false, Ordering::Release);
132            }
133        }
134    });
135
136    // 2. Always yield if we have a generator scope
137    // The fact that this function executed means preemption was triggered.
138    CURRENT_GENERATOR_SCOPE.with(|cell| {
139        let scope_ptr = cell.get();
140        if scope_ptr.is_null() {
141            return;
142        }
143
144        // SAFETY: The pointer is set only while the worker generator is running and
145        // cleared immediately after it exits. The trampoline only executes while the
146        // worker is inside that generator, so the pointer is always valid here.
147        unsafe {
148            let scope = &mut *(scope_ptr as *mut generator::Scope<(), usize>);
149            let _ = scope.yield_(GeneratorYieldReason::Preempted.as_usize());
150        }
151    });
152}
153
154// ============================================================================
155// Platform Implementations
156// ============================================================================
157
158#[cfg(not(any(
159    target_arch = "x86_64",
160    target_arch = "aarch64",
161    target_arch = "riscv64",
162    target_arch = "loongarch64"
163)))]
164mod unsupported {
165    use super::*;
166    // Stub implementation for unsupported platforms
167    pub struct WorkerThreadHandle {}
168    impl WorkerThreadHandle {
169        pub fn interrupt(&self) -> Result<(), PreemptionError> {
170            Err(PreemptionError::UnsupportedPlatform)
171        }
172    }
173    pub(crate) fn init_worker_preemption() -> Result<PreemptionHandle, PreemptionError> {
174        Ok(PreemptionHandle {})
175    }
176    pub(crate) struct PreemptionHandle {}
177}
178
179// ----------------------------------------------------------------------------
180// x86_64 Implementation
181// ----------------------------------------------------------------------------
182#[cfg(target_arch = "x86_64")]
183mod impl_x64 {
184    use super::*;
185
186    // x86_64 Trampoline (Unix)
187    #[cfg(unix)]
188    use std::arch::global_asm;
189
190    #[cfg(unix)]
191    macro_rules! define_x64_unix_trampoline {
192        ($section_directive:literal) => {
193            global_asm!(
194                $section_directive,
195                ".global preemption_trampoline",
196                "preemption_trampoline:",
197                // Context: RSP points to Saved RIP.
198                // SysV ABI Volatiles: RAX, RCX, RDX, RSI, RDI, R8-R11. XMM0-XMM15.
199                // We also save RBX for stack alignment.
200                "pushfq",
201                "push rax",
202                "push rcx",
203                "push rdx",
204                "push rsi",
205                "push rdi",
206                "push r8",
207                "push r9",
208                "push r10",
209                "push r11",
210                "push rbx",
211                // Save XMM0-XMM15 (16 regs * 16 bytes = 256 bytes)
212                "sub rsp, 256",
213                "movdqu [rsp + 240], xmm0",
214                "movdqu [rsp + 224], xmm1",
215                "movdqu [rsp + 208], xmm2",
216                "movdqu [rsp + 192], xmm3",
217                "movdqu [rsp + 176], xmm4",
218                "movdqu [rsp + 160], xmm5",
219                "movdqu [rsp + 144], xmm6",
220                "movdqu [rsp + 128], xmm7",
221                "movdqu [rsp + 112], xmm8",
222                "movdqu [rsp + 96], xmm9",
223                "movdqu [rsp + 80], xmm10",
224                "movdqu [rsp + 64], xmm11",
225                "movdqu [rsp + 48], xmm12",
226                "movdqu [rsp + 32], xmm13",
227                "movdqu [rsp + 16], xmm14",
228                "movdqu [rsp], xmm15",
229                // Align stack for call
230                "mov rbx, rsp",
231                "and rsp, -16",
232                "call rust_preemption_helper",
233                "mov rsp, rbx",
234                // Restore XMMs
235                "movdqu xmm15, [rsp]",
236                "movdqu xmm14, [rsp + 16]",
237                "movdqu xmm13, [rsp + 32]",
238                "movdqu xmm12, [rsp + 48]",
239                "movdqu xmm11, [rsp + 64]",
240                "movdqu xmm10, [rsp + 80]",
241                "movdqu xmm9, [rsp + 96]",
242                "movdqu xmm8, [rsp + 112]",
243                "movdqu xmm7, [rsp + 128]",
244                "movdqu xmm6, [rsp + 144]",
245                "movdqu xmm5, [rsp + 160]",
246                "movdqu xmm4, [rsp + 176]",
247                "movdqu xmm3, [rsp + 192]",
248                "movdqu xmm2, [rsp + 208]",
249                "movdqu xmm1, [rsp + 224]",
250                "movdqu xmm0, [rsp + 240]",
251                "add rsp, 256",
252                "pop rbx",
253                "pop r11",
254                "pop r10",
255                "pop r9",
256                "pop r8",
257                "pop rdi",
258                "pop rsi",
259                "pop rdx",
260                "pop rcx",
261                "pop rax",
262                "popfq",
263                // Restore Red Zone (128 bytes) + Return
264                "pop rax",      // Get RIP
265                "add rsp, 128", // Restore Red Zone
266                "jmp rax"       // Resume
267            );
268        };
269    }
270
271    #[cfg(all(unix, target_os = "macos"))]
272    define_x64_unix_trampoline!(".section __TEXT,__text");
273
274    #[cfg(all(unix, not(target_os = "macos")))]
275    define_x64_unix_trampoline!(".section .text");
276
277    // x86_64 Trampoline (Windows)
278    #[cfg(windows)]
279    use std::arch::global_asm;
280
281    #[cfg(windows)]
282    global_asm!(
283        ".section .text",
284        ".global preemption_trampoline",
285        "preemption_trampoline:",
286        // Windows x64 Volatiles: RAX, RCX, RDX, R8-R11. XMM0-XMM5.
287        // We also save RBX to use it for stack realignment.
288        "pushfq",
289        "push rax",
290        "push rcx",
291        "push rdx",
292        "push r8",
293        "push r9",
294        "push r10",
295        "push r11",
296        "push rbx",  // Save RBX (non-volatile, but we use it)
297        // Save XMM0-XMM5 (volatile on Windows)
298        "sub rsp, 96",
299        "movdqu [rsp + 80], xmm0",
300        "movdqu [rsp + 64], xmm1",
301        "movdqu [rsp + 48], xmm2",
302        "movdqu [rsp + 32], xmm3",
303        "movdqu [rsp + 16], xmm4",
304        "movdqu [rsp], xmm5",
305        // Align stack and allocate shadow space
306        "mov rbx, rsp",  // Save current RSP
307        "and rsp, -16",  // Align to 16 bytes
308        "sub rsp, 32",   // Shadow space
309        "call rust_preemption_helper",
310        // Restore stack
311        "mov rsp, rbx",  // Restore RSP (clears shadow space and alignment)
312        // Restore XMMs
313        "movdqu xmm5, [rsp]",
314        "movdqu xmm4, [rsp + 16]",
315        "movdqu xmm3, [rsp + 32]",
316        "movdqu xmm2, [rsp + 48]",
317        "movdqu xmm1, [rsp + 64]",
318        "movdqu xmm0, [rsp + 80]",
319        "add rsp, 96",
320        // Restore GPRs
321        "pop rbx",
322        "pop r11",
323        "pop r10",
324        "pop r9",
325        "pop r8",
326        "pop rdx",
327        "pop rcx",
328        "pop rax",
329        "popfq",
330        "ret"
331    );
332
333    #[cfg(unix)]
334    pub(crate) use unix_impl::init_worker_preemption;
335    #[cfg(unix)]
336    pub use unix_impl::{PreemptionHandle, WorkerThreadHandle};
337
338    #[cfg(unix)]
339    mod unix_impl {
340        use super::super::*;
341        use std::mem::MaybeUninit;
342
343        unsafe extern "C" {
344            fn preemption_trampoline();
345        }
346
347        pub struct WorkerThreadHandle {
348            pthread: libc::pthread_t,
349        }
350        unsafe impl Send for WorkerThreadHandle {}
351        unsafe impl Sync for WorkerThreadHandle {}
352
353        impl WorkerThreadHandle {
354            pub fn current() -> Result<Self, PreemptionError> {
355                Ok(Self {
356                    pthread: unsafe { libc::pthread_self() },
357                })
358            }
359            pub fn interrupt(&self) -> Result<(), PreemptionError> {
360                unsafe {
361                    if libc::pthread_kill(self.pthread, libc::SIGVTALRM) == 0 {
362                        Ok(())
363                    } else {
364                        Err(PreemptionError::InterruptFailed)
365                    }
366                }
367            }
368        }
369
370        pub struct PreemptionHandle {
371            old_handler: libc::sigaction,
372        }
373        impl Drop for PreemptionHandle {
374            fn drop(&mut self) {
375                unsafe {
376                    libc::sigaction(libc::SIGVTALRM, &self.old_handler, ptr::null_mut());
377                }
378            }
379        }
380        pub(crate) fn init_worker_preemption() -> Result<PreemptionHandle, PreemptionError> {
381            init_preemption()
382        }
383        fn init_preemption() -> Result<PreemptionHandle, PreemptionError> {
384            unsafe {
385                let mut sa: libc::sigaction = MaybeUninit::zeroed().assume_init();
386                sa.sa_sigaction = sigalrm_handler as usize;
387                libc::sigemptyset(&mut sa.sa_mask);
388                sa.sa_flags = libc::SA_RESTART | libc::SA_SIGINFO;
389                let mut old_sa: libc::sigaction = MaybeUninit::zeroed().assume_init();
390                if libc::sigaction(libc::SIGVTALRM, &sa, &mut old_sa) != 0 {
391                    return Err(PreemptionError::SignalSetupFailed);
392                }
393                Ok(PreemptionHandle {
394                    old_handler: old_sa,
395                })
396            }
397        }
398
399        extern "C" fn sigalrm_handler(
400            _signum: libc::c_int,
401            _info: *mut libc::siginfo_t,
402            context: *mut libc::c_void,
403        ) {
404            unsafe {
405                let ctx = context as *mut libc::ucontext_t;
406                let mcontext = &mut (*ctx).uc_mcontext;
407
408                #[cfg(target_os = "linux")]
409                let (rip_ptr, rsp_ptr) = (
410                    &mut mcontext.gregs[libc::REG_RIP as usize] as *mut _ as *mut u64,
411                    &mut mcontext.gregs[libc::REG_RSP as usize] as *mut _ as *mut u64,
412                );
413                #[cfg(target_os = "macos")]
414                let (rip_ptr, rsp_ptr) = {
415                    let mctx = *mcontext;
416                    (
417                        &mut (*mctx).__ss.__rip as *mut u64,
418                        &mut (*mctx).__ss.__rsp as *mut u64,
419                    )
420                };
421                #[cfg(target_os = "freebsd")]
422                let (rip_ptr, rsp_ptr) = (
423                    &mut mcontext.mc_rip as *mut _ as *mut u64,
424                    &mut mcontext.mc_rsp as *mut _ as *mut u64,
425                );
426                #[cfg(target_os = "openbsd")]
427                let (rip_ptr, rsp_ptr) = (
428                    &mut mcontext.sc_rip as *mut _ as *mut u64,
429                    &mut mcontext.sc_rsp as *mut _ as *mut u64,
430                );
431                #[cfg(target_os = "netbsd")]
432                let (rip_ptr, rsp_ptr) = (
433                    &mut mcontext.__gregs[libc::_REG_RIP as usize] as *mut _ as *mut u64,
434                    &mut mcontext.__gregs[libc::_REG_RSP as usize] as *mut _ as *mut u64,
435                );
436
437                let original_rip = *rip_ptr;
438                let mut sp = *rsp_ptr;
439                sp -= 128; // Red Zone
440                sp -= 8;
441                *(sp as *mut u64) = original_rip;
442                *rsp_ptr = sp;
443                *rip_ptr = preemption_trampoline as u64;
444            }
445        }
446    }
447
448    #[cfg(windows)]
449    pub(crate) use windows_impl::init_worker_preemption;
450    #[cfg(windows)]
451    pub use windows_impl::{PreemptionHandle, WorkerThreadHandle};
452
453    #[cfg(windows)]
454    mod windows_impl {
455        use super::super::*;
456        use winapi::shared::minwindef::DWORD;
457        use winapi::um::winnt::HANDLE;
458
459        unsafe extern "C" {
460            fn preemption_trampoline();
461        }
462
463        pub struct WorkerThreadHandle {
464            thread_handle: HANDLE,
465            preemption_flag: *const AtomicBool,
466        }
467        unsafe impl Send for WorkerThreadHandle {}
468        unsafe impl Sync for WorkerThreadHandle {}
469
470        impl WorkerThreadHandle {
471            pub fn current(preemption_flag: &AtomicBool) -> Result<Self, PreemptionError> {
472                use winapi::um::handleapi::DuplicateHandle;
473                use winapi::um::processthreadsapi::{GetCurrentProcess, GetCurrentThread};
474                use winapi::um::winnt::{
475                    THREAD_GET_CONTEXT, THREAD_QUERY_INFORMATION, THREAD_SET_CONTEXT,
476                    THREAD_SUSPEND_RESUME,
477                };
478                unsafe {
479                    let mut real_handle: HANDLE = std::ptr::null_mut();
480                    let pseudo_handle = GetCurrentThread();
481                    let current_process = GetCurrentProcess();
482                    // Request specific access rights explicitly
483                    let access = THREAD_SUSPEND_RESUME
484                        | THREAD_GET_CONTEXT
485                        | THREAD_SET_CONTEXT
486                        | THREAD_QUERY_INFORMATION;
487                    if DuplicateHandle(
488                        current_process,
489                        pseudo_handle,
490                        current_process,
491                        &mut real_handle,
492                        access,
493                        0,
494                        0,
495                    ) == 0
496                    {
497                        println!(
498                            "DuplicateHandle failed: {}",
499                            std::io::Error::last_os_error()
500                        );
501                        return Err(PreemptionError::ThreadSetupFailed);
502                    }
503                    println!("DuplicateHandle succeeded, handle: {:?}", real_handle);
504                    Ok(Self {
505                        thread_handle: real_handle,
506                        preemption_flag: preemption_flag as *const AtomicBool,
507                    })
508                }
509            }
510
511            pub fn interrupt(&self) -> Result<(), PreemptionError> {
512                use winapi::um::processthreadsapi::{
513                    GetThreadContext, ResumeThread, SetThreadContext, SuspendThread,
514                };
515                use winapi::um::winnt::{CONTEXT, CONTEXT_CONTROL, CONTEXT_INTEGER};
516                unsafe {
517                    if SuspendThread(self.thread_handle) == u32::MAX {
518                        println!("SuspendThread failed: {}", std::io::Error::last_os_error());
519                        return Err(PreemptionError::InterruptFailed);
520                    }
521                    (*self.preemption_flag).store(true, Ordering::Release);
522
523                    // Ensure 16-byte alignment for CONTEXT
524                    #[repr(align(16))]
525                    struct AlignedContext(CONTEXT);
526                    let mut aligned = std::mem::zeroed::<AlignedContext>();
527                    let context = &mut aligned.0;
528
529                    context.ContextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER;
530
531                    // Check alignment
532                    let addr = context as *const _ as usize;
533                    if addr % 16 != 0 {
534                        println!("CONTEXT not aligned: 0x{:x}", addr);
535                    }
536
537                    if GetThreadContext(self.thread_handle, context) == 0 {
538                        println!(
539                            "GetThreadContext failed: {} (addr: 0x{:x})",
540                            std::io::Error::last_os_error(),
541                            addr
542                        );
543                        ResumeThread(self.thread_handle);
544                        return Err(PreemptionError::InterruptFailed);
545                    }
546                    let original_rip = context.Rip;
547                    let mut rsp = context.Rsp;
548                    rsp -= 8;
549                    *(rsp as *mut u64) = original_rip;
550                    context.Rsp = rsp;
551                    context.Rip = preemption_trampoline as u64;
552                    if SetThreadContext(self.thread_handle, context) == 0 {
553                        println!(
554                            "SetThreadContext failed: {}",
555                            std::io::Error::last_os_error()
556                        );
557                        ResumeThread(self.thread_handle);
558                        return Err(PreemptionError::InterruptFailed);
559                    }
560                    ResumeThread(self.thread_handle);
561                    Ok(())
562                }
563            }
564        }
565        impl Drop for WorkerThreadHandle {
566            fn drop(&mut self) {
567                unsafe {
568                    winapi::um::handleapi::CloseHandle(self.thread_handle);
569                }
570            }
571        }
572        pub struct PreemptionHandle {
573            _marker: std::marker::PhantomData<()>,
574        }
575        pub(crate) fn init_preemption() -> Result<PreemptionHandle, PreemptionError> {
576            Ok(PreemptionHandle {
577                _marker: std::marker::PhantomData,
578            })
579        }
580        pub(crate) fn init_worker_preemption() -> Result<PreemptionHandle, PreemptionError> {
581            init_preemption()
582        }
583    }
584}
585
586// ----------------------------------------------------------------------------
587// AArch64 Implementation
588// ----------------------------------------------------------------------------
589#[cfg(target_arch = "aarch64")]
590mod impl_aarch64 {
591    use super::*;
592    use std::arch::global_asm;
593
594    // AArch64 Trampoline
595    // We use x18 as a temporary (platform reserve) or stick to caller-saved.
596    // AAPCS64 Volatiles: x0-x18, x30 (LR). SIMD v0-v7, v16-v31.
597    // We must save all of them.
598    // Stack must be 16-byte aligned.
599    #[cfg(not(windows))]
600    macro_rules! define_aarch64_trampoline {
601        ($section_directive:literal) => {
602            global_asm!(
603                $section_directive,
604                ".global preemption_trampoline",
605                "preemption_trampoline:",
606                // Save pair: x30 (LR), x0
607                "stp x30, x0, [sp, #-16]!",
608                "stp x1, x2, [sp, #-16]!",
609                "stp x3, x4, [sp, #-16]!",
610                "stp x5, x6, [sp, #-16]!",
611                "stp x7, x8, [sp, #-16]!",
612                "stp x9, x10, [sp, #-16]!",
613                "stp x11, x12, [sp, #-16]!",
614                "stp x13, x14, [sp, #-16]!",
615                "stp x15, x16, [sp, #-16]!",
616                "stp x17, x18, [sp, #-16]!",
617                // Note: We should also save FP/SIMD registers if the platform uses them for arguments.
618                // For safety in generic code interrupts, we'll save v0-v7 and v16-v31 (volatile).
619                // Each is 128-bit.
620                "stp q0, q1, [sp, #-32]!",
621                "stp q2, q3, [sp, #-32]!",
622                "stp q4, q5, [sp, #-32]!",
623                "stp q6, q7, [sp, #-32]!",
624                "stp q16, q17, [sp, #-32]!",
625                "stp q18, q19, [sp, #-32]!",
626                "stp q20, q21, [sp, #-32]!",
627                "stp q22, q23, [sp, #-32]!",
628                "stp q24, q25, [sp, #-32]!",
629                "stp q26, q27, [sp, #-32]!",
630                "stp q28, q29, [sp, #-32]!",
631                "stp q30, q31, [sp, #-32]!",
632                // Call helper
633                "bl rust_preemption_helper",
634                // Restore FP/SIMD
635                "ldp q30, q31, [sp], #32",
636                "ldp q28, q29, [sp], #32",
637                "ldp q26, q27, [sp], #32",
638                "ldp q24, q25, [sp], #32",
639                "ldp q22, q23, [sp], #32",
640                "ldp q20, q21, [sp], #32",
641                "ldp q18, q19, [sp], #32",
642                "ldp q16, q17, [sp], #32",
643                "ldp q6, q7, [sp], #32",
644                "ldp q4, q5, [sp], #32",
645                "ldp q2, q3, [sp], #32",
646                "ldp q0, q1, [sp], #32",
647                // Restore GPR
648                "ldp x17, x18, [sp], #16",
649                "ldp x15, x16, [sp], #16",
650                "ldp x13, x14, [sp], #16",
651                "ldp x11, x12, [sp], #16",
652                "ldp x9, x10, [sp], #16",
653                "ldp x7, x8, [sp], #16",
654                "ldp x5, x6, [sp], #16",
655                "ldp x3, x4, [sp], #16",
656                "ldp x1, x2, [sp], #16",
657                "ldp x30, x0, [sp], #16",
658                // Return to original PC (which we stored in LR/x30 when mocking the call)
659                "ret"
660            );
661        };
662    }
663
664    #[cfg(all(not(windows), target_os = "macos"))]
665    define_aarch64_trampoline!(".section __TEXT,__text");
666
667    #[cfg(all(not(windows), not(target_os = "macos")))]
668    define_aarch64_trampoline!(".section .text");
669
670    #[cfg(windows)]
671    use std::arch::global_asm;
672
673    #[cfg(windows)]
674    global_asm!(
675        ".section .text",
676        ".global preemption_trampoline",
677        "preemption_trampoline:",
678        // Save pair: x30 (LR), x0
679        "stp x30, x0, [sp, #-16]!",
680        "stp x1, x2, [sp, #-16]!",
681        "stp x3, x4, [sp, #-16]!",
682        "stp x5, x6, [sp, #-16]!",
683        "stp x7, x8, [sp, #-16]!",
684        "stp x9, x10, [sp, #-16]!",
685        "stp x11, x12, [sp, #-16]!",
686        "stp x13, x14, [sp, #-16]!",
687        "stp x15, x16, [sp, #-16]!",
688        "stp x17, x18, [sp, #-16]!",
689        // Save FP/SIMD
690        "stp q0, q1, [sp, #-32]!",
691        "stp q2, q3, [sp, #-32]!",
692        "stp q4, q5, [sp, #-32]!",
693        "stp q6, q7, [sp, #-32]!",
694        "stp q16, q17, [sp, #-32]!",
695        "stp q18, q19, [sp, #-32]!",
696        "stp q20, q21, [sp, #-32]!",
697        "stp q22, q23, [sp, #-32]!",
698        "stp q24, q25, [sp, #-32]!",
699        "stp q26, q27, [sp, #-32]!",
700        "stp q28, q29, [sp, #-32]!",
701        "stp q30, q31, [sp, #-32]!",
702        // Call helper
703        "sub sp, sp, #32", // shadow space for Windows/ARM64
704        "bl rust_preemption_helper",
705        "add sp, sp, #32",
706        // Restore FP/SIMD
707        "ldp q30, q31, [sp], #32",
708        "ldp q28, q29, [sp], #32",
709        "ldp q26, q27, [sp], #32",
710        "ldp q24, q25, [sp], #32",
711        "ldp q22, q23, [sp], #32",
712        "ldp q20, q21, [sp], #32",
713        "ldp q18, q19, [sp], #32",
714        "ldp q16, q17, [sp], #32",
715        "ldp q6, q7, [sp], #32",
716        "ldp q4, q5, [sp], #32",
717        "ldp q2, q3, [sp], #32",
718        "ldp q0, q1, [sp], #32",
719        // Restore GPR
720        "ldp x17, x18, [sp], #16",
721        "ldp x15, x16, [sp], #16",
722        "ldp x13, x14, [sp], #16",
723        "ldp x11, x12, [sp], #16",
724        "ldp x9, x10, [sp], #16",
725        "ldp x7, x8, [sp], #16",
726        "ldp x5, x6, [sp], #16",
727        "ldp x3, x4, [sp], #16",
728        "ldp x1, x2, [sp], #16",
729        "ldp x30, x0, [sp], #16",
730        "ret"
731    );
732
733    #[cfg(unix)]
734    pub(crate) use unix_impl::init_worker_preemption;
735    #[cfg(unix)]
736    pub use unix_impl::{PreemptionHandle, WorkerThreadHandle};
737
738    #[cfg(unix)]
739    mod unix_impl {
740        use super::super::*;
741        use std::mem::MaybeUninit;
742
743        unsafe extern "C" {
744            fn preemption_trampoline();
745        }
746
747        pub struct WorkerThreadHandle {
748            pthread: libc::pthread_t,
749        }
750        unsafe impl Send for WorkerThreadHandle {}
751        unsafe impl Sync for WorkerThreadHandle {}
752
753        impl WorkerThreadHandle {
754            pub fn current() -> Result<Self, PreemptionError> {
755                Ok(Self {
756                    pthread: unsafe { libc::pthread_self() },
757                })
758            }
759            pub fn interrupt(&self) -> Result<(), PreemptionError> {
760                unsafe {
761                    if libc::pthread_kill(self.pthread, libc::SIGVTALRM) == 0 {
762                        Ok(())
763                    } else {
764                        Err(PreemptionError::InterruptFailed)
765                    }
766                }
767            }
768        }
769
770        pub struct PreemptionHandle {
771            old_handler: libc::sigaction,
772        }
773        impl Drop for PreemptionHandle {
774            fn drop(&mut self) {
775                unsafe {
776                    libc::sigaction(libc::SIGVTALRM, &self.old_handler, ptr::null_mut());
777                }
778            }
779        }
780        pub(crate) fn init_worker_preemption() -> Result<PreemptionHandle, PreemptionError> {
781            init_preemption()
782        }
783        fn init_preemption() -> Result<PreemptionHandle, PreemptionError> {
784            unsafe {
785                let mut sa: libc::sigaction = MaybeUninit::zeroed().assume_init();
786                sa.sa_sigaction = sigalrm_handler as usize;
787                libc::sigemptyset(&mut sa.sa_mask);
788                sa.sa_flags = libc::SA_RESTART | libc::SA_SIGINFO;
789                let mut old_sa: libc::sigaction = MaybeUninit::zeroed().assume_init();
790                if libc::sigaction(libc::SIGVTALRM, &sa, &mut old_sa) != 0 {
791                    return Err(PreemptionError::SignalSetupFailed);
792                }
793                Ok(PreemptionHandle {
794                    old_handler: old_sa,
795                })
796            }
797        }
798
799        extern "C" fn sigalrm_handler(
800            _signum: libc::c_int,
801            _info: *mut libc::siginfo_t,
802            context: *mut libc::c_void,
803        ) {
804            unsafe {
805                let ctx = context as *mut libc::ucontext_t;
806                let mcontext = &mut (*ctx).uc_mcontext;
807
808                #[cfg(target_os = "linux")]
809                let (pc_ptr, sp_ptr, lr_ptr) = (
810                    &mut mcontext.pc as *mut u64,
811                    &mut mcontext.sp as *mut u64,
812                    &mut mcontext.regs[30] as *mut u64, // x30 is LR
813                );
814
815                #[cfg(target_os = "macos")]
816                let (pc_ptr, sp_ptr, lr_ptr) = {
817                    let mctx = *mcontext; // Deref &mut *mut -> *mut
818                    (
819                        &mut (*mctx).__ss.__pc as *mut u64,
820                        &mut (*mctx).__ss.__sp as *mut u64,
821                        &mut (*mctx).__ss.__lr as *mut u64,
822                    )
823                };
824
825                #[cfg(target_os = "freebsd")]
826                let (pc_ptr, sp_ptr, lr_ptr) = (
827                    &mut mcontext.mc_gpregs.gp_elr as *mut _ as *mut u64,
828                    &mut mcontext.mc_gpregs.gp_sp as *mut _ as *mut u64,
829                    &mut mcontext.mc_gpregs.gp_lr as *mut _ as *mut u64,
830                );
831
832                #[cfg(target_os = "openbsd")]
833                let (pc_ptr, sp_ptr, lr_ptr) = (
834                    &mut mcontext.sc_elr as *mut _ as *mut u64,
835                    &mut mcontext.sc_sp as *mut _ as *mut u64,
836                    &mut mcontext.sc_lr as *mut _ as *mut u64,
837                );
838
839                #[cfg(target_os = "netbsd")]
840                let (pc_ptr, sp_ptr, lr_ptr) = (
841                    &mut mcontext.__gregs[libc::_REG_PC] as *mut _ as *mut u64,
842                    &mut mcontext.__gregs[libc::_REG_SP] as *mut _ as *mut u64,
843                    &mut mcontext.__gregs[libc::_REG_LR] as *mut _ as *mut u64,
844                );
845
846                let original_pc = *pc_ptr;
847
848                // On AArch64, "BL" writes PC+4 to LR.
849                // We are simulating a BL to the trampoline.
850                // We want the trampoline to return to original_pc.
851                // The trampoline ends with "ret", which jumps to LR (x30).
852                // So we must set LR = original_pc.
853
854                *lr_ptr = original_pc;
855
856                // Set PC to trampoline
857                *pc_ptr = preemption_trampoline as u64;
858
859                // No stack modification needed because we use LR for return address,
860                // unlike x86 where return address is on stack.
861            }
862        }
863    }
864
865    #[cfg(windows)]
866    pub(crate) use windows_impl::init_worker_preemption;
867    #[cfg(windows)]
868    pub use windows_impl::{PreemptionHandle, WorkerThreadHandle};
869
870    #[cfg(windows)]
871    mod windows_impl {
872        use super::super::*;
873        use winapi::shared::minwindef::DWORD;
874        use winapi::um::winnt::HANDLE;
875
876        unsafe extern "C" {
877            fn preemption_trampoline();
878        }
879
880        pub struct WorkerThreadHandle {
881            thread_handle: HANDLE,
882            preemption_flag: *const AtomicBool,
883        }
884        unsafe impl Send for WorkerThreadHandle {}
885        unsafe impl Sync for WorkerThreadHandle {}
886
887        impl WorkerThreadHandle {
888            pub fn current(preemption_flag: &AtomicBool) -> Result<Self, PreemptionError> {
889                use winapi::um::handleapi::DuplicateHandle;
890                use winapi::um::processthreadsapi::{GetCurrentProcess, GetCurrentThread};
891                use winapi::um::winnt::{
892                    THREAD_GET_CONTEXT, THREAD_QUERY_INFORMATION, THREAD_SET_CONTEXT,
893                    THREAD_SUSPEND_RESUME,
894                };
895                unsafe {
896                    let mut real_handle: HANDLE = std::ptr::null_mut();
897                    let pseudo_handle = GetCurrentThread();
898                    let current_process = GetCurrentProcess();
899                    // Request specific access rights explicitly
900                    let access = THREAD_SUSPEND_RESUME
901                        | THREAD_GET_CONTEXT
902                        | THREAD_SET_CONTEXT
903                        | THREAD_QUERY_INFORMATION;
904                    if DuplicateHandle(
905                        current_process,
906                        pseudo_handle,
907                        current_process,
908                        &mut real_handle,
909                        access,
910                        0,
911                        0,
912                    ) == 0
913                    {
914                        println!(
915                            "DuplicateHandle failed: {}",
916                            std::io::Error::last_os_error()
917                        );
918                        return Err(PreemptionError::ThreadSetupFailed);
919                    }
920                    println!("DuplicateHandle succeeded, handle: {:?}", real_handle);
921                    Ok(Self {
922                        thread_handle: real_handle,
923                        preemption_flag: preemption_flag as *const AtomicBool,
924                    })
925                }
926            }
927
928            pub fn interrupt(&self) -> Result<(), PreemptionError> {
929                use winapi::um::processthreadsapi::{
930                    GetThreadContext, ResumeThread, SetThreadContext, SuspendThread,
931                };
932                use winapi::um::winnt::{CONTEXT, CONTEXT_CONTROL, CONTEXT_INTEGER};
933
934                // Note: winapi's CONTEXT structure for ARM64 is different.
935                // We need to check what winapi provides for ARM64.
936                // Typically CONTEXT_ARM64 (if configured) or just CONTEXT.
937                // Fields: Pc, Sp, Lr.
938
939                unsafe {
940                    if SuspendThread(self.thread_handle) == u32::MAX {
941                        println!("SuspendThread failed: {}", std::io::Error::last_os_error());
942                        return Err(PreemptionError::InterruptFailed);
943                    }
944                    (*self.preemption_flag).store(true, Ordering::Release);
945                    let mut context: CONTEXT = std::mem::zeroed();
946                    context.ContextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER;
947                    if GetThreadContext(self.thread_handle, &mut context) == 0 {
948                        println!(
949                            "GetThreadContext failed: {}",
950                            std::io::Error::last_os_error()
951                        );
952                        ResumeThread(self.thread_handle);
953                        return Err(PreemptionError::InterruptFailed);
954                    }
955
956                    // For ARM64 on Windows, standard field names in C headers are Pc, Sp, Lr.
957                    // In winapi-rs (which often mirrors the C union structure), access might depend on arch.
958                    // However, winapi::um::winnt::CONTEXT adapts to the target architecture.
959                    // For aarch64-pc-windows-msvc, it should expose these fields.
960
961                    // Simulate Call: Set LR to PC
962                    let original_pc = context.Pc;
963                    context.Lr = original_pc;
964
965                    // Jump to trampoline
966                    context.Pc = preemption_trampoline as u64;
967
968                    if SetThreadContext(self.thread_handle, &context) == 0 {
969                        println!(
970                            "SetThreadContext failed: {}",
971                            std::io::Error::last_os_error()
972                        );
973                        ResumeThread(self.thread_handle);
974                        return Err(PreemptionError::InterruptFailed);
975                    }
976                    ResumeThread(self.thread_handle);
977                    Ok(())
978                }
979            }
980        }
981        impl Drop for WorkerThreadHandle {
982            fn drop(&mut self) {
983                unsafe {
984                    winapi::um::handleapi::CloseHandle(self.thread_handle);
985                }
986            }
987        }
988        pub struct PreemptionHandle {
989            _marker: std::marker::PhantomData<()>,
990        }
991        pub(crate) fn init_preemption() -> Result<PreemptionHandle, PreemptionError> {
992            Ok(PreemptionHandle {
993                _marker: std::marker::PhantomData,
994            })
995        }
996        pub(crate) fn init_worker_preemption() -> Result<PreemptionHandle, PreemptionError> {
997            init_preemption()
998        }
999    }
1000}
1001
1002// ----------------------------------------------------------------------------
1003// RISC-V 64 Implementation (Linux only)
1004// ----------------------------------------------------------------------------
1005#[cfg(target_arch = "riscv64")]
1006mod impl_riscv64 {
1007    use super::*;
1008    use std::arch::global_asm;
1009
1010    // RISC-V 64 Trampoline
1011    // Volatiles: ra, t0-t6, a0-a7.
1012    // Stack align: 16 bytes.
1013    macro_rules! define_riscv64_trampoline {
1014        ($section_directive:literal) => {
1015            global_asm!(
1016                $section_directive,
1017                ".global preemption_trampoline",
1018                "preemption_trampoline:",
1019                "addi sp, sp, -320", // Plenty of space for volatile state (GPR + FP)
1020                // Save GPRs (Volatile: ra, t0-t6, a0-a7)
1021                "sd ra, 0(sp)",
1022                "sd t0, 8(sp)",
1023                "sd t1, 16(sp)",
1024                "sd t2, 24(sp)",
1025                "sd a0, 32(sp)",
1026                "sd a1, 40(sp)",
1027                "sd a2, 48(sp)",
1028                "sd a3, 56(sp)",
1029                "sd a4, 64(sp)",
1030                "sd a5, 72(sp)",
1031                "sd a6, 80(sp)",
1032                "sd a7, 88(sp)",
1033                "sd t3, 96(sp)",
1034                "sd t4, 104(sp)",
1035                "sd t5, 112(sp)",
1036                "sd t6, 120(sp)",
1037                // Save Float/Vector volatiles if needed. Assuming standard float extension (F/D).
1038                // Volatiles: ft0-ft11, fa0-fa7.
1039                "fsd ft0, 128(sp)",
1040                "fsd ft1, 136(sp)",
1041                "fsd ft2, 144(sp)",
1042                "fsd ft3, 152(sp)",
1043                "fsd ft4, 160(sp)",
1044                "fsd ft5, 168(sp)",
1045                "fsd ft6, 176(sp)",
1046                "fsd ft7, 184(sp)",
1047                "fsd fa0, 192(sp)",
1048                "fsd fa1, 200(sp)",
1049                "fsd fa2, 208(sp)",
1050                "fsd fa3, 216(sp)",
1051                "fsd fa4, 224(sp)",
1052                "fsd fa5, 232(sp)",
1053                "fsd fa6, 240(sp)",
1054                "fsd fa7, 248(sp)",
1055                "fsd ft8, 256(sp)",
1056                "fsd ft9, 264(sp)",
1057                "fsd ft10, 272(sp)",
1058                "fsd ft11, 280(sp)",
1059                "call rust_preemption_helper",
1060                // Restore
1061                "fld ft11, 280(sp)",
1062                "fld ft10, 272(sp)",
1063                "fld ft9, 264(sp)",
1064                "fld ft8, 256(sp)",
1065                "fld fa7, 248(sp)",
1066                "fld fa6, 240(sp)",
1067                "fld fa5, 232(sp)",
1068                "fld fa4, 224(sp)",
1069                "fld fa3, 216(sp)",
1070                "fld fa2, 208(sp)",
1071                "fld fa1, 200(sp)",
1072                "fld fa0, 192(sp)",
1073                "fld ft7, 184(sp)",
1074                "fld ft6, 176(sp)",
1075                "fld ft5, 168(sp)",
1076                "fld ft4, 160(sp)",
1077                "fld ft3, 152(sp)",
1078                "fld ft2, 144(sp)",
1079                "fld ft1, 136(sp)",
1080                "fld ft0, 128(sp)",
1081                "ld t6, 120(sp)",
1082                "ld t5, 112(sp)",
1083                "ld t4, 104(sp)",
1084                "ld t3, 96(sp)",
1085                "ld a7, 88(sp)",
1086                "ld a6, 80(sp)",
1087                "ld a5, 72(sp)",
1088                "ld a4, 64(sp)",
1089                "ld a3, 56(sp)",
1090                "ld a2, 48(sp)",
1091                "ld a1, 40(sp)",
1092                "ld a0, 32(sp)",
1093                "ld t2, 24(sp)",
1094                "ld t1, 16(sp)",
1095                "ld t0, 8(sp)",
1096                "ld ra, 0(sp)",
1097                "addi sp, sp, 320",
1098                "ret"
1099            );
1100        };
1101    }
1102
1103    #[cfg(not(target_os = "macos"))]
1104    define_riscv64_trampoline!(".section .text");
1105
1106    #[cfg(unix)]
1107    pub(crate) use unix_impl::init_worker_preemption;
1108    #[cfg(unix)]
1109    pub use unix_impl::{PreemptionHandle, WorkerThreadHandle};
1110
1111    #[cfg(unix)]
1112    mod unix_impl {
1113        use super::super::*;
1114        use std::mem::MaybeUninit;
1115
1116        unsafe extern "C" {
1117            fn preemption_trampoline();
1118        }
1119
1120        pub struct WorkerThreadHandle {
1121            pthread: libc::pthread_t,
1122        }
1123        unsafe impl Send for WorkerThreadHandle {}
1124        unsafe impl Sync for WorkerThreadHandle {}
1125
1126        impl WorkerThreadHandle {
1127            pub fn current() -> Result<Self, PreemptionError> {
1128                Ok(Self {
1129                    pthread: unsafe { libc::pthread_self() },
1130                })
1131            }
1132            pub fn interrupt(&self) -> Result<(), PreemptionError> {
1133                unsafe {
1134                    if libc::pthread_kill(self.pthread, libc::SIGVTALRM) == 0 {
1135                        Ok(())
1136                    } else {
1137                        Err(PreemptionError::InterruptFailed)
1138                    }
1139                }
1140            }
1141        }
1142
1143        pub struct PreemptionHandle {
1144            old_handler: libc::sigaction,
1145        }
1146        impl Drop for PreemptionHandle {
1147            fn drop(&mut self) {
1148                unsafe {
1149                    libc::sigaction(libc::SIGVTALRM, &self.old_handler, ptr::null_mut());
1150                }
1151            }
1152        }
1153        pub(crate) fn init_worker_preemption() -> Result<PreemptionHandle, PreemptionError> {
1154            init_preemption()
1155        }
1156        fn init_preemption() -> Result<PreemptionHandle, PreemptionError> {
1157            unsafe {
1158                let mut sa: libc::sigaction = MaybeUninit::zeroed().assume_init();
1159                sa.sa_sigaction = sigalrm_handler as usize;
1160                libc::sigemptyset(&mut sa.sa_mask);
1161                sa.sa_flags = libc::SA_RESTART | libc::SA_SIGINFO;
1162                let mut old_sa: libc::sigaction = MaybeUninit::zeroed().assume_init();
1163                if libc::sigaction(libc::SIGVTALRM, &sa, &mut old_sa) != 0 {
1164                    return Err(PreemptionError::SignalSetupFailed);
1165                }
1166                Ok(PreemptionHandle {
1167                    old_handler: old_sa,
1168                })
1169            }
1170        }
1171
1172        extern "C" fn sigalrm_handler(
1173            _signum: libc::c_int,
1174            _info: *mut libc::siginfo_t,
1175            context: *mut libc::c_void,
1176        ) {
1177            unsafe {
1178                let ctx = context as *mut libc::ucontext_t;
1179                let mcontext = &mut (*ctx).uc_mcontext;
1180
1181                // Linux RISC-V 64
1182                #[cfg(target_os = "linux")]
1183                let (pc_ptr, ra_ptr) = (
1184                    &mut mcontext.__gregs[0] as *mut _ as *mut u64, // REG_PC = 0
1185                    &mut mcontext.__gregs[1] as *mut _ as *mut u64, // REG_RA = 1
1186                );
1187
1188                #[cfg(target_os = "freebsd")]
1189                let (pc_ptr, ra_ptr) = (
1190                    &mut mcontext.mc_gpregs.gp_sepc as *mut _ as *mut u64,
1191                    &mut mcontext.mc_gpregs.gp_ra as *mut _ as *mut u64,
1192                );
1193
1194                #[cfg(target_os = "openbsd")]
1195                let (pc_ptr, ra_ptr) = (
1196                    &mut mcontext.sc_sepc as *mut _ as *mut u64,
1197                    &mut mcontext.sc_ra as *mut _ as *mut u64,
1198                );
1199
1200                #[cfg(target_os = "netbsd")]
1201                let (pc_ptr, ra_ptr) = (
1202                    &mut mcontext.__gregs[libc::_REG_PC] as *mut _ as *mut u64,
1203                    &mut mcontext.__gregs[libc::_REG_RA] as *mut _ as *mut u64,
1204                );
1205
1206                let original_pc = *pc_ptr;
1207
1208                // Simulate CALL (JAL):
1209                // Set RA (Return Address) to original PC
1210                *ra_ptr = original_pc;
1211
1212                // Jump to Trampoline
1213                *pc_ptr = preemption_trampoline as u64;
1214            }
1215        }
1216    }
1217}
1218
1219#[cfg(target_arch = "loongarch64")]
1220mod impl_loongarch64 {
1221    use super::*;
1222    use std::arch::global_asm;
1223
1224    #[cfg(not(target_os = "linux"))]
1225    compile_error!("LoongArch64 preemption is currently only implemented for Linux.");
1226
1227    #[cfg(target_os = "linux")]
1228    global_asm!(
1229        ".section .text",
1230        ".global preemption_trampoline",
1231        "preemption_trampoline:",
1232        // Reserve space for 30 GPRs and 32 FPRs (496 bytes total).
1233        "addi.d $sp, $sp, -496",
1234        // GPR spill area (0..232)
1235        "st.d $ra, $sp, 0",
1236        "st.d $tp, $sp, 8",
1237        "st.d $a0, $sp, 16",
1238        "st.d $a1, $sp, 24",
1239        "st.d $a2, $sp, 32",
1240        "st.d $a3, $sp, 40",
1241        "st.d $a4, $sp, 48",
1242        "st.d $a5, $sp, 56",
1243        "st.d $a6, $sp, 64",
1244        "st.d $a7, $sp, 72",
1245        "st.d $t0, $sp, 80",
1246        "st.d $t1, $sp, 88",
1247        "st.d $t2, $sp, 96",
1248        "st.d $t3, $sp, 104",
1249        "st.d $t4, $sp, 112",
1250        "st.d $t5, $sp, 120",
1251        "st.d $t6, $sp, 128",
1252        "st.d $t7, $sp, 136",
1253        "st.d $t8, $sp, 144",
1254        "st.d $u0, $sp, 152",
1255        "st.d $fp, $sp, 160",
1256        "st.d $s0, $sp, 168",
1257        "st.d $s1, $sp, 176",
1258        "st.d $s2, $sp, 184",
1259        "st.d $s3, $sp, 192",
1260        "st.d $s4, $sp, 200",
1261        "st.d $s5, $sp, 208",
1262        "st.d $s6, $sp, 216",
1263        "st.d $s7, $sp, 224",
1264        "st.d $s8, $sp, 232",
1265        // FPR spill area (240..488)
1266        "fst.d $f0,  $sp, 240",
1267        "fst.d $f1,  $sp, 248",
1268        "fst.d $f2,  $sp, 256",
1269        "fst.d $f3,  $sp, 264",
1270        "fst.d $f4,  $sp, 272",
1271        "fst.d $f5,  $sp, 280",
1272        "fst.d $f6,  $sp, 288",
1273        "fst.d $f7,  $sp, 296",
1274        "fst.d $f8,  $sp, 304",
1275        "fst.d $f9,  $sp, 312",
1276        "fst.d $f10, $sp, 320",
1277        "fst.d $f11, $sp, 328",
1278        "fst.d $f12, $sp, 336",
1279        "fst.d $f13, $sp, 344",
1280        "fst.d $f14, $sp, 352",
1281        "fst.d $f15, $sp, 360",
1282        "fst.d $f16, $sp, 368",
1283        "fst.d $f17, $sp, 376",
1284        "fst.d $f18, $sp, 384",
1285        "fst.d $f19, $sp, 392",
1286        "fst.d $f20, $sp, 400",
1287        "fst.d $f21, $sp, 408",
1288        "fst.d $f22, $sp, 416",
1289        "fst.d $f23, $sp, 424",
1290        "fst.d $f24, $sp, 432",
1291        "fst.d $f25, $sp, 440",
1292        "fst.d $f26, $sp, 448",
1293        "fst.d $f27, $sp, 456",
1294        "fst.d $f28, $sp, 464",
1295        "fst.d $f29, $sp, 472",
1296        "fst.d $f30, $sp, 480",
1297        "fst.d $f31, $sp, 488",
1298        "bl rust_preemption_helper",
1299        // Restore FPRs (descending order)
1300        "fld.d $f31, $sp, 488",
1301        "fld.d $f30, $sp, 480",
1302        "fld.d $f29, $sp, 472",
1303        "fld.d $f28, $sp, 464",
1304        "fld.d $f27, $sp, 456",
1305        "fld.d $f26, $sp, 448",
1306        "fld.d $f25, $sp, 440",
1307        "fld.d $f24, $sp, 432",
1308        "fld.d $f23, $sp, 424",
1309        "fld.d $f22, $sp, 416",
1310        "fld.d $f21, $sp, 408",
1311        "fld.d $f20, $sp, 400",
1312        "fld.d $f19, $sp, 392",
1313        "fld.d $f18, $sp, 384",
1314        "fld.d $f17, $sp, 376",
1315        "fld.d $f16, $sp, 368",
1316        "fld.d $f15, $sp, 360",
1317        "fld.d $f14, $sp, 352",
1318        "fld.d $f13, $sp, 344",
1319        "fld.d $f12, $sp, 336",
1320        "fld.d $f11, $sp, 328",
1321        "fld.d $f10, $sp, 320",
1322        "fld.d $f9,  $sp, 312",
1323        "fld.d $f8,  $sp, 304",
1324        "fld.d $f7,  $sp, 296",
1325        "fld.d $f6,  $sp, 288",
1326        "fld.d $f5,  $sp, 280",
1327        "fld.d $f4,  $sp, 272",
1328        "fld.d $f3,  $sp, 264",
1329        "fld.d $f2,  $sp, 256",
1330        "fld.d $f1,  $sp, 248",
1331        "fld.d $f0,  $sp, 240",
1332        // Restore GPRs (reverse order)
1333        "ld.d $s8, $sp, 232",
1334        "ld.d $s7, $sp, 224",
1335        "ld.d $s6, $sp, 216",
1336        "ld.d $s5, $sp, 208",
1337        "ld.d $s4, $sp, 200",
1338        "ld.d $s3, $sp, 192",
1339        "ld.d $s2, $sp, 184",
1340        "ld.d $s1, $sp, 176",
1341        "ld.d $s0, $sp, 168",
1342        "ld.d $fp, $sp, 160",
1343        "ld.d $u0, $sp, 152",
1344        "ld.d $t8, $sp, 144",
1345        "ld.d $t7, $sp, 136",
1346        "ld.d $t6, $sp, 128",
1347        "ld.d $t5, $sp, 120",
1348        "ld.d $t4, $sp, 112",
1349        "ld.d $t3, $sp, 104",
1350        "ld.d $t2, $sp, 96",
1351        "ld.d $t1, $sp, 88",
1352        "ld.d $t0, $sp, 80",
1353        "ld.d $a7, $sp, 72",
1354        "ld.d $a6, $sp, 64",
1355        "ld.d $a5, $sp, 56",
1356        "ld.d $a4, $sp, 48",
1357        "ld.d $a3, $sp, 40",
1358        "ld.d $a2, $sp, 32",
1359        "ld.d $a1, $sp, 24",
1360        "ld.d $a0, $sp, 16",
1361        "ld.d $tp, $sp, 8",
1362        "ld.d $ra, $sp, 0",
1363        "addi.d $sp, $sp, 496",
1364        "jirl $zero, $ra, 0"
1365    );
1366
1367    #[cfg(target_os = "linux")]
1368    pub(crate) use unix_impl::init_worker_preemption;
1369    #[cfg(target_os = "linux")]
1370    pub use unix_impl::{PreemptionHandle, WorkerThreadHandle};
1371
1372    #[cfg(target_os = "linux")]
1373    mod unix_impl {
1374        use super::super::*;
1375        use std::mem::MaybeUninit;
1376
1377        unsafe extern "C" {
1378            fn preemption_trampoline();
1379        }
1380
1381        pub struct WorkerThreadHandle {
1382            pthread: libc::pthread_t,
1383        }
1384        unsafe impl Send for WorkerThreadHandle {}
1385        unsafe impl Sync for WorkerThreadHandle {}
1386
1387        impl WorkerThreadHandle {
1388            pub fn current() -> Result<Self, PreemptionError> {
1389                Ok(Self {
1390                    pthread: unsafe { libc::pthread_self() },
1391                })
1392            }
1393            pub fn interrupt(&self) -> Result<(), PreemptionError> {
1394                unsafe {
1395                    if libc::pthread_kill(self.pthread, libc::SIGVTALRM) == 0 {
1396                        Ok(())
1397                    } else {
1398                        Err(PreemptionError::InterruptFailed)
1399                    }
1400                }
1401            }
1402        }
1403
1404        pub struct PreemptionHandle {
1405            old_handler: libc::sigaction,
1406        }
1407        impl Drop for PreemptionHandle {
1408            fn drop(&mut self) {
1409                unsafe {
1410                    libc::sigaction(libc::SIGVTALRM, &self.old_handler, ptr::null_mut());
1411                }
1412            }
1413        }
1414        pub(crate) fn init_worker_preemption() -> Result<PreemptionHandle, PreemptionError> {
1415            init_preemption()
1416        }
1417        fn init_preemption() -> Result<PreemptionHandle, PreemptionError> {
1418            unsafe {
1419                let mut sa: libc::sigaction = MaybeUninit::zeroed().assume_init();
1420                sa.sa_sigaction = sigalrm_handler as usize;
1421                libc::sigemptyset(&mut sa.sa_mask);
1422                sa.sa_flags = libc::SA_RESTART | libc::SA_SIGINFO;
1423                let mut old_sa: libc::sigaction = MaybeUninit::zeroed().assume_init();
1424                if libc::sigaction(libc::SIGVTALRM, &sa, &mut old_sa) != 0 {
1425                    return Err(PreemptionError::SignalSetupFailed);
1426                }
1427                Ok(PreemptionHandle {
1428                    old_handler: old_sa,
1429                })
1430            }
1431        }
1432
1433        extern "C" fn sigalrm_handler(
1434            _signum: libc::c_int,
1435            _info: *mut libc::siginfo_t,
1436            context: *mut libc::c_void,
1437        ) {
1438            unsafe {
1439                let ctx = context as *mut libc::ucontext_t;
1440                let mcontext = &mut (*ctx).uc_mcontext;
1441
1442                let pc_ptr = &mut mcontext.__pc as *mut _ as *mut u64;
1443                let ra_ptr = &mut mcontext.__gregs[1] as *mut _ as *mut u64; // $ra lives in r1
1444
1445                let original_pc = *pc_ptr;
1446                *ra_ptr = original_pc;
1447                *pc_ptr = preemption_trampoline as u64;
1448            }
1449        }
1450    }
1451}
1452
1453#[cfg(target_arch = "aarch64")]
1454pub use impl_aarch64::*;
1455#[cfg(target_arch = "riscv64")]
1456pub use impl_riscv64::*;
1457#[cfg(target_arch = "loongarch64")]
1458pub use impl_loongarch64::*;
1459#[cfg(target_arch = "x86_64")]
1460pub use impl_x64::*;
1461#[cfg(not(any(
1462    target_arch = "x86_64",
1463    target_arch = "aarch64",
1464    target_arch = "riscv64",
1465    target_arch = "loongarch64"
1466)))]
1467pub use unsupported::*;