stackaroo/lib.rs
1//! # stackaroo
2//!
3//! A highly unsafe library for swapping out the OS-provided thread stack with custom stacks.
4//!
5//! ## Overview
6//!
7//! `stackaroo` provides platform-specific assembly implementations (x86_64 and AArch64) to perform
8//! direct stack pointer manipulation, allowing you to execute functions on arbitrarily large custom
9//! stacks that exceed OS-provided stack limitations.
10//!
11//! ## Safety
12//!
13//! This library is **extremely unsafe** and should only be used by people who understand the
14//! implications of manual stack management. It's primarily intended for research, kernel development,
15//! and testing scenarios where you need to bypass OS stack limitations.
16//!
17//! ## Supported Platforms
18//!
19//! - x86_64 (Intel/AMD 64-bit)
20//! - AArch64 (ARM 64-bit)
21//!
22//! ## Features
23//!
24//! - `std` (default): Enables standard library support (implies `alloc`)
25//! - `tls` (default): Enables thread-local storage for thread-safe stack swapping (requires `std`)
26//! - `alloc`: Enables heap allocation support (required for `swap_to_heap`)
27//!
28//! ## FFI Bindings
29//!
30//! For C/C++ interoperability, see the `stackaroo-ffi` crate in this workspace.
31//! It provides C-compatible bindings and generates a `stackaroo.h` header file.
32//!
33//! ## Concurrency
34//!
35//! - With `tls` feature: One swap per thread (uses thread-local storage, thread-safe)
36//! - Without `tls` feature: One swap globally (uses static variables, not thread-safe)
37//! - Does not support recursive/nested stack swaps
38//!
39//! ## Examples
40//!
41//! ### Basic usage with heap-allocated stack
42//!
43//! ```no_run
44//! use stackaroo::swap_to_heap;
45//!
46//! fn deep_recursion(depth: usize) {
47//! let large_array = [0u8; 1024 * 1024]; // 1MB per frame
48//! std::hint::black_box(&large_array); // Don't let it be compiled-out
49//! if depth > 0 {
50//! deep_recursion(depth - 1);
51//! }
52//! }
53//!
54//! fn main() {
55//! unsafe {
56//! swap_to_heap(|_: &mut ()| deep_recursion(1024), None, 4 << 30)
57//! }.expect("Stack swap failed");
58//! }
59//! ```
60//!
61//! ### Usage with static stack
62//!
63//! ```no_run
64//! use stackaroo::swap_to_static;
65//!
66//! static mut STACK: [u8; 1 << 26] = [0; 1 << 26]; // 64MB global-backed stack
67//!
68//! fn compute(arg: &mut u32) {
69//! *arg = *arg * 2 + 1;
70//! }
71//!
72//! fn main() {
73//! unsafe {
74//! let mut value = 100u32;
75//! swap_to_static(compute, Some(&mut value), &mut STACK).unwrap();
76//! println!("Result: {}", value);
77//! }
78//! }
79//! ```
80//!
81//! ### Usage with argument passing
82//!
83//! ```no_run
84//! use stackaroo::swap_to_heap;
85//!
86//! struct Args {
87//! input: u64,
88//! output: u64,
89//! }
90//!
91//! fn fibonacci(args: &mut Args) {
92//! fn fib(n: u64) -> u64 {
93//! if n <= 1 { return n; }
94//! fib(n - 1) + fib(n - 2)
95//! }
96//! args.output = fib(args.input);
97//! }
98//!
99//! fn main() {
100//! unsafe {
101//! let mut args = Args { input: 40, output: 0 };
102//! swap_to_heap(fibonacci, Some(&mut args), 1 << 28).unwrap();
103//! println!("Fibonacci({}) = {}", args.input, args.output);
104//! }
105//! }
106//! ```
107
108#![cfg_attr(not(feature = "std"), no_std)]
109
110#[cfg(feature = "alloc")]
111extern crate alloc;
112
113use core::ffi::c_void;
114use core::result::Result;
115use core::{ptr, sync::atomic};
116
117pub(crate) mod helpers;
118use helpers::*;
119
120mod arch;
121
122/// Error types that can occur during stack swap operations.
123#[derive(Debug, PartialEq, PartialOrd, Eq, Ord)]
124pub enum Error {
125 /// The provided stack pointer is not properly aligned.
126 ///
127 /// Stack pointers must be aligned to the platform's requirements
128 /// (typically 16 bytes on x86_64 and AArch64).
129 StackPtrNotAligned,
130
131 /// A stack swap is already in progress on the current thread (with `tls` feature)
132 /// or globally (without `tls` feature).
133 ///
134 /// Nested or concurrent stack swaps are not supported. You must complete
135 /// the current stack swap before initiating another one.
136 StackSwapInProgress,
137}
138
139/// Swaps to a custom stack at a specific memory address, executes a function, then returns.
140///
141/// This is the low-level function that powers both `swap_to_heap` and
142/// `swap_to_static`. It allows you to provide an arbitrary stack pointer,
143/// giving you complete control over stack placement.
144///
145/// # Type Parameters
146///
147/// * `T` - The type of the argument passed to the callout function. Can be any type.
148///
149/// # Parameters
150///
151/// * `callout` - The function to execute on the new stack. It receives a mutable reference
152/// to the argument.
153/// * `arg` - An optional mutable reference to pass to the callout function. Use `None` if
154/// no argument is needed.
155/// * `new_stack_top` - A raw pointer to the top of the new stack (stacks grow downward).
156/// Must be properly aligned (typically 16 bytes) and point to valid, writable memory.
157///
158/// # Returns
159///
160/// * `Ok(())` - The stack swap completed successfully.
161/// * `Err(Error::StackPtrNotAligned)` - The provided stack pointer is not properly aligned.
162/// * `Err(Error::StackSwapInProgress)` - Another stack swap is already in progress.
163///
164/// # Safety
165///
166/// This function is **extremely unsafe** and requires careful attention:
167///
168/// * `new_stack_top` must point to valid, writable memory with sufficient size
169/// * The memory region must be properly aligned (typically 16 bytes for x86_64/AArch64)
170/// * The stack must be large enough for the callout function's needs, including all
171/// local variables, function calls, and recursion
172/// * The callout function must not attempt to use stack references from before the swap
173/// * The callout function must not unwind (panic) past the swap point
174/// * Nested stack swaps are not supported and will return an error
175/// * With the `tls` feature, this is thread-safe but only one swap per thread is allowed
176/// * Without the `tls` feature, only one swap can be active globally
177/// * The caller is responsible for ensuring the memory remains valid for the entire duration
178/// * Stack memory must not be freed or deallocated while the swap is active
179///
180/// # Platform-Specific Details
181///
182/// * **x86_64**: Uses `rsp` register for stack pointer manipulation
183/// * **AArch64**: Uses `sp` register for stack pointer manipulation
184/// * Stacks grow downward, so `new_stack_top` should point to the highest address
185///
186/// # Examples
187///
188/// ```no_run
189/// use stackaroo::swap_to;
190/// use std::alloc::{alloc, dealloc, Layout};
191///
192/// unsafe {
193/// // Allocate 1MB of memory for the stack
194/// let layout = Layout::from_size_align(1 << 20, 16).unwrap();
195/// let stack_bottom = alloc(layout);
196/// let stack_top = stack_bottom.add(1 << 20);
197///
198/// // Execute function on custom stack
199/// let mut value = 42u32;
200/// swap_to(
201/// |v: &mut u32| { *v *= 2; },
202/// Some(&mut value),
203/// stack_top as *mut core::ffi::c_void
204/// ).unwrap();
205///
206/// assert_eq!(value, 84);
207///
208/// // Clean up
209/// dealloc(stack_bottom, layout);
210/// }
211/// ```
212///
213/// # Advanced Example: Memory-Mapped Stack
214///
215/// ```no_run
216/// # #[cfg(unix)]
217/// # {
218/// use stackaroo::swap_to;
219///
220/// unsafe {
221/// // Map 4MB of memory for a custom stack (Unix example)
222/// let size = 4 * 1024 * 1024;
223/// let addr = libc::mmap(
224/// std::ptr::null_mut(),
225/// size,
226/// libc::PROT_READ | libc::PROT_WRITE,
227/// libc::MAP_PRIVATE | libc::MAP_ANONYMOUS,
228/// -1,
229/// 0,
230/// );
231///
232/// if addr == libc::MAP_FAILED {
233/// panic!("mmap failed");
234/// }
235///
236/// let stack_top = (addr as *mut u8).add(size);
237///
238/// swap_to(
239/// |_: &mut ()| println!("Running on memory-mapped stack!"),
240/// None,
241/// stack_top as *mut core::ffi::c_void
242/// ).unwrap();
243///
244/// libc::munmap(addr, size);
245/// }
246/// # }
247/// ```
248///
249/// # Notes
250///
251/// * Most users should prefer `swap_to_heap` or `swap_to_static` instead
252/// * This function uses compiler memory fences to ensure proper ordering of operations
253/// * The implementation uses platform-specific inline assembly for stack manipulation
254/// * Consider using this function when you need custom memory management (e.g., memory-mapped stacks)
255#[inline(never)]
256pub unsafe fn swap_to<T>(
257 callout: fn(&mut T) -> (),
258 arg: Option<&mut T>,
259 new_stack_top: *mut c_void,
260) -> Result<(), Error> {
261 if !new_stack_top.is_aligned() {
262 return Err(Error::StackPtrNotAligned);
263 }
264
265 if !get_old_rsp().is_null() || !get_callout_arg().is_null() {
266 return Err(Error::StackSwapInProgress);
267 }
268
269 // Preserve the callout function and argument in global variables
270 set_callout(core::mem::transmute(callout));
271
272 // Preserve the argument pointer in a global variable
273 if let Some(arg) = arg {
274 set_callout_arg(arg as *mut T as *mut c_void);
275 } else {
276 set_callout_arg(ptr::null_mut());
277 }
278
279 // Memory fence to ensure globals are written before stack switch
280 atomic::compiler_fence(atomic::Ordering::SeqCst);
281
282 // Save old stack and switch to new stack using platform-specific implementation
283 arch::swap(get_old_rsp_ptr(), new_stack_top);
284
285 // IMPORTANT NOTE:
286 // If by any chance the compiler inserts here instructions referencing the stack,
287 // this thing will burn and die.
288
289 // Memory fence to ensure global variables are written before function call
290 atomic::compiler_fence(atomic::Ordering::SeqCst);
291
292 // Call the callout function on the new stack, with an argument stored in a global variables
293 get_callout()(get_callout_arg());
294
295 // Memory fence to ensure function call completes before stack restoration
296 atomic::compiler_fence(atomic::Ordering::SeqCst);
297
298 // Restore old stack using platform-specific implementation
299 arch::restore(get_old_rsp_ptr());
300
301 // IMPORTANT NOTE:
302 // It's safe again for the compiler to insert stack-referencing instructions here.
303
304 // Memory fence to ensure stack is restored before clearing globals
305 atomic::compiler_fence(atomic::Ordering::SeqCst);
306
307 set_old_rsp(ptr::null_mut());
308 set_callout(dummy_callout);
309 set_callout_arg(ptr::null_mut());
310
311 Ok(())
312}
313
314/// Swaps to a heap-allocated stack, executes a function, then returns to the original stack.
315///
316/// This function allocates a new stack on the heap with the specified size, switches to it,
317/// executes the provided callout function, and then restores the original stack.
318///
319/// # Type Parameters
320///
321/// * `T` - The type of the argument passed to the callout function. Can be any type.
322///
323/// # Parameters
324///
325/// * `callout` - The function to execute on the new stack. It receives a mutable reference
326/// to the argument.
327/// * `arg` - An optional mutable reference to pass to the callout function. Use `None` if
328/// no argument is needed.
329/// * `stack_size` - The size in bytes of the stack to allocate. Must be large enough for
330/// the callout function's needs. Common sizes range from 1MB to 4GB.
331///
332/// # Returns
333///
334/// * `Ok(())` - The stack swap completed successfully.
335/// * `Err(Error::StackPtrNotAligned)` - The calculated stack pointer is not properly aligned.
336/// * `Err(Error::StackSwapInProgress)` - Another stack swap is already in progress.
337///
338/// # Safety
339///
340/// This function is highly unsafe for several reasons:
341///
342/// * The callout function must not attempt to use stack references from before the swap
343/// * The stack size must be sufficient for the callout function's needs, including all
344/// recursive calls and stack allocations
345/// * Nested stack swaps are not supported and will return an error
346/// * The callout function must not unwind (panic) past the swap point
347/// * With the `tls` feature, this is thread-safe but only one swap per thread is allowed
348/// * Without the `tls` feature, only one swap can be active globally
349///
350/// # Examples
351///
352/// ```no_run
353/// use stackaroo::swap_to_heap;
354///
355/// // Example 1: Without argument
356/// fn simple_function(_: &mut ()) {
357/// println!("Running on custom stack!");
358/// }
359///
360/// unsafe {
361/// swap_to_heap(simple_function, None, 1 << 20).unwrap();
362/// }
363///
364/// // Example 2: With argument
365/// fn compute(value: &mut u32) {
366/// *value = *value * 2;
367/// }
368///
369/// unsafe {
370/// let mut x = 42;
371/// swap_to_heap(compute, Some(&mut x), 1 << 20).unwrap();
372/// assert_eq!(x, 84);
373/// }
374/// ```
375///
376/// # Panics
377///
378/// If the heap allocation for the stack fails (out of memory).
379///
380/// # Feature Requirements
381///
382/// Requires the `alloc` feature to be enabled (enabled by default with `std`).
383#[inline(never)]
384#[cfg(feature = "alloc")]
385pub unsafe fn swap_to_heap<T>(
386 callout: fn(&mut T) -> (),
387 arg: Option<&mut T>,
388 stack_size: usize,
389) -> Result<(), Error> {
390 use alloc::vec;
391 let mut new_stack = vec![0u8; stack_size];
392 let new_stack_top = new_stack.as_mut_ptr().add(new_stack.len());
393 swap_to(callout, arg, new_stack_top as *mut c_void)
394}
395
396/// Swaps to a static stack, executes a function, then returns to the original stack.
397///
398/// This function switches to a pre-allocated static stack buffer, executes the provided
399/// callout function, and then restores the original stack. Unlike `swap_to_heap`,
400/// this doesn't perform any allocation and works in `no_std` environments.
401///
402/// # Type Parameters
403///
404/// * `T` - The type of the argument passed to the callout function. Can be any type.
405///
406/// # Parameters
407///
408/// * `callout` - The function to execute on the new stack. It receives a mutable reference
409/// to the argument.
410/// * `arg` - An optional mutable reference to pass to the callout function. Use `None` if
411/// no argument is needed.
412/// * `stack` - A mutable reference to a static byte array to use as the stack. The array
413/// must have a `'static` lifetime and be large enough for the callout function's needs.
414///
415/// # Returns
416///
417/// * `Ok(())` - The stack swap completed successfully.
418/// * `Err(Error::StackPtrNotAligned)` - The calculated stack pointer is not properly aligned.
419/// * `Err(Error::StackSwapInProgress)` - Another stack swap is already in progress.
420///
421/// # Safety
422///
423/// This function is highly unsafe for several reasons:
424///
425/// * The callout function must not attempt to use stack references from before the swap
426/// * The static buffer must be large enough for the callout function's needs, including
427/// all recursive calls and stack allocations
428/// * Nested stack swaps are not supported and will return an error
429/// * The callout function must not unwind (panic) past the swap point
430/// * With the `tls` feature, this is thread-safe but only one swap per thread is allowed
431/// * Without the `tls` feature, only one swap can be active globally
432/// * The static buffer must not be used concurrently by multiple threads
433///
434/// # Examples
435///
436/// ```no_run
437/// use stackaroo::swap_to_static;
438///
439/// static mut MY_STACK: [u8; 1 << 26] = [0; 1 << 26]; // 64MB
440///
441/// fn compute(value: &mut u32) {
442/// *value = *value * 2 + 1;
443/// }
444///
445/// unsafe {
446/// let mut x = 100;
447/// swap_to_static(compute, Some(&mut x), &mut MY_STACK).unwrap();
448/// assert_eq!(x, 201);
449/// }
450/// ```
451///
452/// # Notes
453///
454/// * Stack grows downward, so the function uses the end of the buffer as the stack top
455/// * The buffer is not initialized or cleared between uses
456/// * This function works in both `std` and `no_std` environments
457#[inline(never)]
458pub unsafe fn swap_to_static<T>(
459 callout: fn(&mut T) -> (),
460 arg: Option<&mut T>,
461 stack: &'static mut [u8],
462) -> Result<(), Error> {
463 let stack_ptr = stack.as_ptr() as *mut u8;
464 let stack_top = stack_ptr.add(stack.len());
465 swap_to(callout, arg, stack_top as *mut c_void)
466}
467
468#[cfg(test)]
469#[allow(static_mut_refs)]
470mod tests {
471 use super::*;
472
473 fn deep_recursion(depth: usize) {
474 let large_array = [0u8; 1 << 20]; // 1 MB per stack frame
475 core::hint::black_box(&large_array);
476 if depth == 0 {
477 return;
478 }
479 deep_recursion(depth - 1);
480 }
481
482 fn callout(_: &mut ()) {
483 deep_recursion(1024); // Should consume over 1GB of stack
484 }
485
486 #[test]
487 fn test_stack_swap() {
488 // Run on 4GB stack
489 unsafe { swap_to_heap(callout, None, 1 << 32) }.unwrap();
490 }
491
492 mod fibbonacci {
493 use super::*;
494
495 #[derive(Debug)]
496 struct Args {
497 n: u64,
498 result: u64,
499 }
500
501 fn fibonacci_callout(arg: &mut Args) {
502 // Deep recursive Fibonacci calculation that would overflow the regular stack
503 fn fibonacci(n: u64) -> u64 {
504 if n <= 1 {
505 return n;
506 }
507 fibonacci(n - 1) + fibonacci(n - 2)
508 }
509 arg.result = fibonacci(arg.n);
510 }
511
512 #[test]
513 fn test_fibonacci() {
514 const STACK_SIZE: usize = 1 << 28;
515 unsafe {
516 let mut args = Args { n: 35, result: 0 };
517 swap_to_heap(fibonacci_callout, Some(&mut args), STACK_SIZE).unwrap();
518 assert_eq!(args.result, 9227465);
519
520 let mut args = Args { n: 40, result: 0 };
521 swap_to_heap(fibonacci_callout, Some(&mut args), STACK_SIZE).unwrap();
522 assert_eq!(args.result, 102334155);
523 }
524 }
525 }
526
527 #[test]
528 fn test_no_concurrent_stack_swaps() {
529 const STACK_SIZE: usize = 1 << 28;
530
531 fn attempt_nested_swap(_: &mut ()) {
532 // Try to initiate another stack swap while one is already in progress
533 let result = unsafe { swap_to_heap(|_: &mut ()| {}, None, STACK_SIZE) };
534
535 // This should fail with StackSwapInProgress error
536 assert_eq!(result, Err(Error::StackSwapInProgress));
537 }
538
539 // Initiate the first stack swap which will attempt a nested swap
540 let result = unsafe { swap_to_heap(attempt_nested_swap, None, STACK_SIZE) };
541
542 // The outer stack swap should succeed
543 assert!(result.is_ok());
544 }
545
546 mod global {
547 use super::*;
548
549 const STACK_SIZE: usize = 1 << 26;
550 static mut GLOBAL_STACK: [u8; STACK_SIZE] = [255; STACK_SIZE];
551
552 #[test]
553 fn test_swap_to_with_global_static() {
554 fn global_stack_callout(arg: &mut u32) {
555 // Simple computation on the global static stack
556 *arg = *arg * 2 + 1;
557 // Add to our result for verification
558 *arg += 42;
559 }
560
561 unsafe {
562 let mut arg = 100u32;
563 swap_to_static(global_stack_callout, Some(&mut arg), &mut GLOBAL_STACK).unwrap();
564 // Expected: (100 * 2 + 1) + 42 = 243
565 assert_eq!(arg, 243);
566 }
567 }
568 }
569
570 #[cfg(feature = "tls")]
571 #[test]
572 fn test_thread_safety() {
573 use std::sync::{Arc, Barrier};
574 use std::thread;
575
576 fn simple_callout(_: &mut ()) {
577 // Just a simple function that runs on the new stack
578 }
579
580 let barrier = Arc::new(Barrier::new(4));
581 let mut handles = vec![];
582
583 // Spawn multiple threads that all use stack swapping simultaneously
584 for _ in 0..4 {
585 let barrier_clone = Arc::clone(&barrier);
586 let handle = thread::spawn(move || {
587 barrier_clone.wait(); // Synchronize thread start
588
589 // Each thread should be able to use stack swapping independently
590 // This would fail with regular static variables due to race conditions
591 unsafe {
592 swap_to_heap(simple_callout, None, 1 << 28).unwrap();
593 }
594
595 true // Return success
596 });
597 handles.push(handle);
598 }
599
600 // Wait for all threads to complete
601 for handle in handles {
602 let result = handle.join().unwrap();
603 assert!(result); // All threads should complete successfully
604 }
605 }
606}