stackaroo/
lib.rs

1//! # stackaroo
2//!
3//! A highly unsafe library for swapping out the OS-provided thread stack with custom stacks.
4//!
5//! ## Overview
6//!
7//! `stackaroo` provides platform-specific assembly implementations (x86_64 and AArch64) to perform
8//! direct stack pointer manipulation, allowing you to execute functions on arbitrarily large custom
9//! stacks that exceed OS-provided stack limitations.
10//!
11//! ## Safety
12//!
13//! This library is **extremely unsafe** and should only be used by people who understand the
14//! implications of manual stack management. It's primarily intended for research, kernel development,
15//! and testing scenarios where you need to bypass OS stack limitations.
16//!
17//! ## Supported Platforms
18//!
19//! - x86_64 (Intel/AMD 64-bit)
20//! - AArch64 (ARM 64-bit)
21//!
22//! ## Features
23//!
24//! - `std` (default): Enables standard library support (implies `alloc`)
25//! - `tls` (default): Enables thread-local storage for thread-safe stack swapping (requires `std`)
26//! - `alloc`: Enables heap allocation support (required for `swap_to_heap`)
27//!
28//! ## FFI Bindings
29//!
30//! For C/C++ interoperability, see the `stackaroo-ffi` crate in this workspace.
31//! It provides C-compatible bindings and generates a `stackaroo.h` header file.
32//!
33//! ## Concurrency
34//!
35//! - With `tls` feature: One swap per thread (uses thread-local storage, thread-safe)
36//! - Without `tls` feature: One swap globally (uses static variables, not thread-safe)
37//! - Does not support recursive/nested stack swaps
38//!
39//! ## Examples
40//!
41//! ### Basic usage with heap-allocated stack
42//!
43//! ```no_run
44//! use stackaroo::swap_to_heap;
45//!
46//! fn deep_recursion(depth: usize) {
47//!     let large_array = [0u8; 1024 * 1024]; // 1MB per frame
48//!     std::hint::black_box(&large_array);   // Don't let it be compiled-out
49//!     if depth > 0 {
50//!         deep_recursion(depth - 1);
51//!     }
52//! }
53//!
54//! fn main() {
55//!     unsafe {
56//!         swap_to_heap(|_: &mut ()| deep_recursion(1024), None, 4 << 30)
57//!     }.expect("Stack swap failed");
58//! }
59//! ```
60//!
61//! ### Usage with static stack
62//!
63//! ```no_run
64//! use stackaroo::swap_to_static;
65//!
66//! static mut STACK: [u8; 1 << 26] = [0; 1 << 26]; // 64MB global-backed stack
67//!
68//! fn compute(arg: &mut u32) {
69//!     *arg = *arg * 2 + 1;
70//! }
71//!
72//! fn main() {
73//!     unsafe {
74//!         let mut value = 100u32;
75//!         swap_to_static(compute, Some(&mut value), &mut STACK).unwrap();
76//!         println!("Result: {}", value);
77//!     }
78//! }
79//! ```
80//!
81//! ### Usage with argument passing
82//!
83//! ```no_run
84//! use stackaroo::swap_to_heap;
85//!
86//! struct Args {
87//!     input: u64,
88//!     output: u64,
89//! }
90//!
91//! fn fibonacci(args: &mut Args) {
92//!     fn fib(n: u64) -> u64 {
93//!         if n <= 1 { return n; }
94//!         fib(n - 1) + fib(n - 2)
95//!     }
96//!     args.output = fib(args.input);
97//! }
98//!
99//! fn main() {
100//!     unsafe {
101//!         let mut args = Args { input: 40, output: 0 };
102//!         swap_to_heap(fibonacci, Some(&mut args), 1 << 28).unwrap();
103//!         println!("Fibonacci({}) = {}", args.input, args.output);
104//!     }
105//! }
106//! ```
107
108#![cfg_attr(not(feature = "std"), no_std)]
109
110#[cfg(feature = "alloc")]
111extern crate alloc;
112
113use core::ffi::c_void;
114use core::result::Result;
115use core::{ptr, sync::atomic};
116
117pub(crate) mod helpers;
118use helpers::*;
119
120mod arch;
121
122/// Error types that can occur during stack swap operations.
123#[derive(Debug, PartialEq, PartialOrd, Eq, Ord)]
124pub enum Error {
125    /// The provided stack pointer is not properly aligned.
126    ///
127    /// Stack pointers must be aligned to the platform's requirements
128    /// (typically 16 bytes on x86_64 and AArch64).
129    StackPtrNotAligned,
130
131    /// A stack swap is already in progress on the current thread (with `tls` feature)
132    /// or globally (without `tls` feature).
133    ///
134    /// Nested or concurrent stack swaps are not supported. You must complete
135    /// the current stack swap before initiating another one.
136    StackSwapInProgress,
137}
138
139/// Swaps to a custom stack at a specific memory address, executes a function, then returns.
140///
141/// This is the low-level function that powers both `swap_to_heap` and
142/// `swap_to_static`. It allows you to provide an arbitrary stack pointer,
143/// giving you complete control over stack placement.
144///
145/// # Type Parameters
146///
147/// * `T` - The type of the argument passed to the callout function. Can be any type.
148///
149/// # Parameters
150///
151/// * `callout` - The function to execute on the new stack. It receives a mutable reference
152///   to the argument.
153/// * `arg` - An optional mutable reference to pass to the callout function. Use `None` if
154///   no argument is needed.
155/// * `new_stack_top` - A raw pointer to the top of the new stack (stacks grow downward).
156///   Must be properly aligned (typically 16 bytes) and point to valid, writable memory.
157///
158/// # Returns
159///
160/// * `Ok(())` - The stack swap completed successfully.
161/// * `Err(Error::StackPtrNotAligned)` - The provided stack pointer is not properly aligned.
162/// * `Err(Error::StackSwapInProgress)` - Another stack swap is already in progress.
163///
164/// # Safety
165///
166/// This function is **extremely unsafe** and requires careful attention:
167///
168/// * `new_stack_top` must point to valid, writable memory with sufficient size
169/// * The memory region must be properly aligned (typically 16 bytes for x86_64/AArch64)
170/// * The stack must be large enough for the callout function's needs, including all
171///   local variables, function calls, and recursion
172/// * The callout function must not attempt to use stack references from before the swap
173/// * The callout function must not unwind (panic) past the swap point
174/// * Nested stack swaps are not supported and will return an error
175/// * With the `tls` feature, this is thread-safe but only one swap per thread is allowed
176/// * Without the `tls` feature, only one swap can be active globally
177/// * The caller is responsible for ensuring the memory remains valid for the entire duration
178/// * Stack memory must not be freed or deallocated while the swap is active
179///
180/// # Platform-Specific Details
181///
182/// * **x86_64**: Uses `rsp` register for stack pointer manipulation
183/// * **AArch64**: Uses `sp` register for stack pointer manipulation
184/// * Stacks grow downward, so `new_stack_top` should point to the highest address
185///
186/// # Examples
187///
188/// ```no_run
189/// use stackaroo::swap_to;
190/// use std::alloc::{alloc, dealloc, Layout};
191///
192/// unsafe {
193///     // Allocate 1MB of memory for the stack
194///     let layout = Layout::from_size_align(1 << 20, 16).unwrap();
195///     let stack_bottom = alloc(layout);
196///     let stack_top = stack_bottom.add(1 << 20);
197///     
198///     // Execute function on custom stack
199///     let mut value = 42u32;
200///     swap_to(
201///         |v: &mut u32| { *v *= 2; },
202///         Some(&mut value),
203///         stack_top as *mut core::ffi::c_void
204///     ).unwrap();
205///     
206///     assert_eq!(value, 84);
207///     
208///     // Clean up
209///     dealloc(stack_bottom, layout);
210/// }
211/// ```
212///
213/// # Advanced Example: Memory-Mapped Stack
214///
215/// ```no_run
216/// # #[cfg(unix)]
217/// # {
218/// use stackaroo::swap_to;
219///
220/// unsafe {
221///     // Map 4MB of memory for a custom stack (Unix example)
222///     let size = 4 * 1024 * 1024;
223///     let addr = libc::mmap(
224///         std::ptr::null_mut(),
225///         size,
226///         libc::PROT_READ | libc::PROT_WRITE,
227///         libc::MAP_PRIVATE | libc::MAP_ANONYMOUS,
228///         -1,
229///         0,
230///     );
231///     
232///     if addr == libc::MAP_FAILED {
233///         panic!("mmap failed");
234///     }
235///     
236///     let stack_top = (addr as *mut u8).add(size);
237///     
238///     swap_to(
239///         |_: &mut ()| println!("Running on memory-mapped stack!"),
240///         None,
241///         stack_top as *mut core::ffi::c_void
242///     ).unwrap();
243///     
244///     libc::munmap(addr, size);
245/// }
246/// # }
247/// ```
248///
249/// # Notes
250///
251/// * Most users should prefer `swap_to_heap` or `swap_to_static` instead
252/// * This function uses compiler memory fences to ensure proper ordering of operations
253/// * The implementation uses platform-specific inline assembly for stack manipulation
254/// * Consider using this function when you need custom memory management (e.g., memory-mapped stacks)
255#[inline(never)]
256pub unsafe fn swap_to<T>(
257    callout: fn(&mut T) -> (),
258    arg: Option<&mut T>,
259    new_stack_top: *mut c_void,
260) -> Result<(), Error> {
261    if !new_stack_top.is_aligned() {
262        return Err(Error::StackPtrNotAligned);
263    }
264
265    if !get_old_rsp().is_null() || !get_callout_arg().is_null() {
266        return Err(Error::StackSwapInProgress);
267    }
268
269    // Preserve the callout function and argument in global variables
270    set_callout(core::mem::transmute(callout));
271
272    // Preserve the argument pointer in a global variable
273    if let Some(arg) = arg {
274        set_callout_arg(arg as *mut T as *mut c_void);
275    } else {
276        set_callout_arg(ptr::null_mut());
277    }
278
279    // Memory fence to ensure globals are written before stack switch
280    atomic::compiler_fence(atomic::Ordering::SeqCst);
281
282    // Save old stack and switch to new stack using platform-specific implementation
283    arch::swap(get_old_rsp_ptr(), new_stack_top);
284
285    // IMPORTANT NOTE:
286    // If by any chance the compiler inserts here instructions referencing the stack,
287    // this thing will burn and die.
288
289    // Memory fence to ensure global variables are written before function call
290    atomic::compiler_fence(atomic::Ordering::SeqCst);
291
292    // Call the callout function on the new stack, with an argument stored in a global variables
293    get_callout()(get_callout_arg());
294
295    // Memory fence to ensure function call completes before stack restoration
296    atomic::compiler_fence(atomic::Ordering::SeqCst);
297
298    // Restore old stack using platform-specific implementation
299    arch::restore(get_old_rsp_ptr());
300
301    // IMPORTANT NOTE:
302    // It's safe again for the compiler to insert stack-referencing instructions here.
303
304    // Memory fence to ensure stack is restored before clearing globals
305    atomic::compiler_fence(atomic::Ordering::SeqCst);
306
307    set_old_rsp(ptr::null_mut());
308    set_callout(dummy_callout);
309    set_callout_arg(ptr::null_mut());
310
311    Ok(())
312}
313
314/// Swaps to a heap-allocated stack, executes a function, then returns to the original stack.
315///
316/// This function allocates a new stack on the heap with the specified size, switches to it,
317/// executes the provided callout function, and then restores the original stack.
318///
319/// # Type Parameters
320///
321/// * `T` - The type of the argument passed to the callout function. Can be any type.
322///
323/// # Parameters
324///
325/// * `callout` - The function to execute on the new stack. It receives a mutable reference
326///   to the argument.
327/// * `arg` - An optional mutable reference to pass to the callout function. Use `None` if
328///   no argument is needed.
329/// * `stack_size` - The size in bytes of the stack to allocate. Must be large enough for
330///   the callout function's needs. Common sizes range from 1MB to 4GB.
331///
332/// # Returns
333///
334/// * `Ok(())` - The stack swap completed successfully.
335/// * `Err(Error::StackPtrNotAligned)` - The calculated stack pointer is not properly aligned.
336/// * `Err(Error::StackSwapInProgress)` - Another stack swap is already in progress.
337///
338/// # Safety
339///
340/// This function is highly unsafe for several reasons:
341///
342/// * The callout function must not attempt to use stack references from before the swap
343/// * The stack size must be sufficient for the callout function's needs, including all
344///   recursive calls and stack allocations
345/// * Nested stack swaps are not supported and will return an error
346/// * The callout function must not unwind (panic) past the swap point
347/// * With the `tls` feature, this is thread-safe but only one swap per thread is allowed
348/// * Without the `tls` feature, only one swap can be active globally
349///
350/// # Examples
351///
352/// ```no_run
353/// use stackaroo::swap_to_heap;
354///
355/// // Example 1: Without argument
356/// fn simple_function(_: &mut ()) {
357///     println!("Running on custom stack!");
358/// }
359///
360/// unsafe {
361///     swap_to_heap(simple_function, None, 1 << 20).unwrap();
362/// }
363///
364/// // Example 2: With argument
365/// fn compute(value: &mut u32) {
366///     *value = *value * 2;
367/// }
368///
369/// unsafe {
370///     let mut x = 42;
371///     swap_to_heap(compute, Some(&mut x), 1 << 20).unwrap();
372///     assert_eq!(x, 84);
373/// }
374/// ```
375///
376/// # Panics
377///
378/// If the heap allocation for the stack fails (out of memory).
379///
380/// # Feature Requirements
381///
382/// Requires the `alloc` feature to be enabled (enabled by default with `std`).
383#[inline(never)]
384#[cfg(feature = "alloc")]
385pub unsafe fn swap_to_heap<T>(
386    callout: fn(&mut T) -> (),
387    arg: Option<&mut T>,
388    stack_size: usize,
389) -> Result<(), Error> {
390    use alloc::vec;
391    let mut new_stack = vec![0u8; stack_size];
392    let new_stack_top = new_stack.as_mut_ptr().add(new_stack.len());
393    swap_to(callout, arg, new_stack_top as *mut c_void)
394}
395
396/// Swaps to a static stack, executes a function, then returns to the original stack.
397///
398/// This function switches to a pre-allocated static stack buffer, executes the provided
399/// callout function, and then restores the original stack. Unlike `swap_to_heap`,
400/// this doesn't perform any allocation and works in `no_std` environments.
401///
402/// # Type Parameters
403///
404/// * `T` - The type of the argument passed to the callout function. Can be any type.
405///
406/// # Parameters
407///
408/// * `callout` - The function to execute on the new stack. It receives a mutable reference
409///   to the argument.
410/// * `arg` - An optional mutable reference to pass to the callout function. Use `None` if
411///   no argument is needed.
412/// * `stack` - A mutable reference to a static byte array to use as the stack. The array
413///   must have a `'static` lifetime and be large enough for the callout function's needs.
414///
415/// # Returns
416///
417/// * `Ok(())` - The stack swap completed successfully.
418/// * `Err(Error::StackPtrNotAligned)` - The calculated stack pointer is not properly aligned.
419/// * `Err(Error::StackSwapInProgress)` - Another stack swap is already in progress.
420///
421/// # Safety
422///
423/// This function is highly unsafe for several reasons:
424///
425/// * The callout function must not attempt to use stack references from before the swap
426/// * The static buffer must be large enough for the callout function's needs, including
427///   all recursive calls and stack allocations
428/// * Nested stack swaps are not supported and will return an error
429/// * The callout function must not unwind (panic) past the swap point
430/// * With the `tls` feature, this is thread-safe but only one swap per thread is allowed
431/// * Without the `tls` feature, only one swap can be active globally
432/// * The static buffer must not be used concurrently by multiple threads
433///
434/// # Examples
435///
436/// ```no_run
437/// use stackaroo::swap_to_static;
438///
439/// static mut MY_STACK: [u8; 1 << 26] = [0; 1 << 26]; // 64MB
440///
441/// fn compute(value: &mut u32) {
442///     *value = *value * 2 + 1;
443/// }
444///
445/// unsafe {
446///     let mut x = 100;
447///     swap_to_static(compute, Some(&mut x), &mut MY_STACK).unwrap();
448///     assert_eq!(x, 201);
449/// }
450/// ```
451///
452/// # Notes
453///
454/// * Stack grows downward, so the function uses the end of the buffer as the stack top
455/// * The buffer is not initialized or cleared between uses
456/// * This function works in both `std` and `no_std` environments
457#[inline(never)]
458pub unsafe fn swap_to_static<T>(
459    callout: fn(&mut T) -> (),
460    arg: Option<&mut T>,
461    stack: &'static mut [u8],
462) -> Result<(), Error> {
463    let stack_ptr = stack.as_ptr() as *mut u8;
464    let stack_top = stack_ptr.add(stack.len());
465    swap_to(callout, arg, stack_top as *mut c_void)
466}
467
468#[cfg(test)]
469#[allow(static_mut_refs)]
470mod tests {
471    use super::*;
472
473    fn deep_recursion(depth: usize) {
474        let large_array = [0u8; 1 << 20]; // 1 MB per stack frame
475        core::hint::black_box(&large_array);
476        if depth == 0 {
477            return;
478        }
479        deep_recursion(depth - 1);
480    }
481
482    fn callout(_: &mut ()) {
483        deep_recursion(1024); // Should consume over 1GB of stack
484    }
485
486    #[test]
487    fn test_stack_swap() {
488        // Run on 4GB stack
489        unsafe { swap_to_heap(callout, None, 1 << 32) }.unwrap();
490    }
491
492    mod fibbonacci {
493        use super::*;
494
495        #[derive(Debug)]
496        struct Args {
497            n: u64,
498            result: u64,
499        }
500
501        fn fibonacci_callout(arg: &mut Args) {
502            // Deep recursive Fibonacci calculation that would overflow the regular stack
503            fn fibonacci(n: u64) -> u64 {
504                if n <= 1 {
505                    return n;
506                }
507                fibonacci(n - 1) + fibonacci(n - 2)
508            }
509            arg.result = fibonacci(arg.n);
510        }
511
512        #[test]
513        fn test_fibonacci() {
514            const STACK_SIZE: usize = 1 << 28;
515            unsafe {
516                let mut args = Args { n: 35, result: 0 };
517                swap_to_heap(fibonacci_callout, Some(&mut args), STACK_SIZE).unwrap();
518                assert_eq!(args.result, 9227465);
519
520                let mut args = Args { n: 40, result: 0 };
521                swap_to_heap(fibonacci_callout, Some(&mut args), STACK_SIZE).unwrap();
522                assert_eq!(args.result, 102334155);
523            }
524        }
525    }
526
527    #[test]
528    fn test_no_concurrent_stack_swaps() {
529        const STACK_SIZE: usize = 1 << 28;
530
531        fn attempt_nested_swap(_: &mut ()) {
532            // Try to initiate another stack swap while one is already in progress
533            let result = unsafe { swap_to_heap(|_: &mut ()| {}, None, STACK_SIZE) };
534
535            // This should fail with StackSwapInProgress error
536            assert_eq!(result, Err(Error::StackSwapInProgress));
537        }
538
539        // Initiate the first stack swap which will attempt a nested swap
540        let result = unsafe { swap_to_heap(attempt_nested_swap, None, STACK_SIZE) };
541
542        // The outer stack swap should succeed
543        assert!(result.is_ok());
544    }
545
546    mod global {
547        use super::*;
548
549        const STACK_SIZE: usize = 1 << 26;
550        static mut GLOBAL_STACK: [u8; STACK_SIZE] = [255; STACK_SIZE];
551
552        #[test]
553        fn test_swap_to_with_global_static() {
554            fn global_stack_callout(arg: &mut u32) {
555                // Simple computation on the global static stack
556                *arg = *arg * 2 + 1;
557                // Add to our result for verification
558                *arg += 42;
559            }
560
561            unsafe {
562                let mut arg = 100u32;
563                swap_to_static(global_stack_callout, Some(&mut arg), &mut GLOBAL_STACK).unwrap();
564                // Expected: (100 * 2 + 1) + 42 = 243
565                assert_eq!(arg, 243);
566            }
567        }
568    }
569
570    #[cfg(feature = "tls")]
571    #[test]
572    fn test_thread_safety() {
573        use std::sync::{Arc, Barrier};
574        use std::thread;
575
576        fn simple_callout(_: &mut ()) {
577            // Just a simple function that runs on the new stack
578        }
579
580        let barrier = Arc::new(Barrier::new(4));
581        let mut handles = vec![];
582
583        // Spawn multiple threads that all use stack swapping simultaneously
584        for _ in 0..4 {
585            let barrier_clone = Arc::clone(&barrier);
586            let handle = thread::spawn(move || {
587                barrier_clone.wait(); // Synchronize thread start
588
589                // Each thread should be able to use stack swapping independently
590                // This would fail with regular static variables due to race conditions
591                unsafe {
592                    swap_to_heap(simple_callout, None, 1 << 28).unwrap();
593                }
594
595                true // Return success
596            });
597            handles.push(handle);
598        }
599
600        // Wait for all threads to complete
601        for handle in handles {
602            let result = handle.join().unwrap();
603            assert!(result); // All threads should complete successfully
604        }
605    }
606}