Skip to main content

tidepool_codegen/
signal_safety.rs

1//! JIT signal safety via sigsetjmp/siglongjmp.
2//!
3//! JIT-compiled code can crash with SIGILL (case trap) or SIGSEGV
4//! (bad memory access). This module provides `with_signal_protection` which
5//! wraps JIT calls so that these signals return a clean error instead of
6//! killing the process.
7//!
8//! The actual sigsetjmp call lives in C (`csrc/sigsetjmp_wrapper.c`) because
9//! sigsetjmp is a "returns_twice" function. LLVM requires the `returns_twice`
10//! attribute on the caller for correct codegen, but Rust doesn't expose this
11//! attribute. Calling sigsetjmp directly from Rust can cause the optimizer to
12//! break the second-return path, especially on aarch64.
13
14#[cfg(unix)]
15mod inner {
16    use std::cell::Cell;
17    use std::ptr::{self, addr_of, addr_of_mut, null_mut};
18    use std::sync::atomic::{AtomicUsize, Ordering};
19
20    /// Pre-computed crash log path, populated once at `install()` time.
21    /// Stored as a fixed-size null-terminated byte buffer for async-signal-safety.
22    static mut CRASH_LOG_PATH: [u8; 512] = [0u8; 512];
23    static CRASH_LOG_PATH_LEN: AtomicUsize = AtomicUsize::new(0);
24    static mut CRASH_DIR_PATH: [u8; 512] = [0u8; 512];
25    static CRASH_DIR_PATH_LEN: AtomicUsize = AtomicUsize::new(0);
26
27    /// Write a crash dump using only async-signal-safe syscalls.
28    /// No allocations, no locks, no std::fs — just raw libc open/write/close.
29    ///
30    // SAFETY: Called from signal handler context. Uses only async-signal-safe
31    // syscalls (open, write, close). Static buffers avoid heap allocation.
32    unsafe fn write_crash_dump(sig: libc::c_int, info: *mut libc::siginfo_t) {
33        let path_len = CRASH_LOG_PATH_LEN.load(Ordering::Relaxed);
34        if path_len == 0 {
35            return;
36        }
37
38        let fd = libc::open(
39            addr_of!(CRASH_LOG_PATH) as *const libc::c_char,
40            libc::O_WRONLY | libc::O_CREAT | libc::O_APPEND,
41            0o644,
42        );
43        if fd < 0 {
44            return;
45        }
46
47        // Write signal info
48        let sig_name: &[u8] = match sig {
49            libc::SIGILL => b"SIGILL",
50            libc::SIGSEGV => b"SIGSEGV",
51            libc::SIGBUS => b"SIGBUS",
52            libc::SIGFPE => b"SIGFPE",
53            libc::SIGTRAP => b"SIGTRAP",
54            _ => b"UNKNOWN",
55        };
56
57        let mut buf = [0u8; 512];
58        let mut pos = 0;
59
60        // "[tidepool-crash] sig="
61        let prefix = b"[tidepool-crash] sig=";
62        buf[pos..pos + prefix.len()].copy_from_slice(prefix);
63        pos += prefix.len();
64
65        buf[pos..pos + sig_name.len()].copy_from_slice(sig_name);
66        pos += sig_name.len();
67
68        // " addr="
69        let addr_prefix = b" addr=0x";
70        buf[pos..pos + addr_prefix.len()].copy_from_slice(addr_prefix);
71        pos += addr_prefix.len();
72
73        // Faulting address as hex
74        let si_addr = if !info.is_null() {
75            (*info).si_addr() as usize
76        } else {
77            0
78        };
79        // Write hex digits
80        let hex_digits = b"0123456789abcdef";
81        let mut hex_buf = [b'0'; 16];
82        let mut val = si_addr;
83        for i in (0..16).rev() {
84            hex_buf[i] = hex_digits[val & 0xf];
85            val >>= 4;
86        }
87        buf[pos..pos + 16].copy_from_slice(&hex_buf);
88        pos += 16;
89
90        // " jmpbuf="
91        let jmp_prefix = b" jmpbuf=";
92        buf[pos..pos + jmp_prefix.len()].copy_from_slice(jmp_prefix);
93        pos += jmp_prefix.len();
94
95        let jmp_set = JMP_BUF.with(|cell| !cell.get().is_null());
96        if jmp_set {
97            buf[pos..pos + 3].copy_from_slice(b"set");
98            pos += 3;
99        } else {
100            buf[pos..pos + 4].copy_from_slice(b"null");
101            pos += 4;
102        }
103
104        // " ts="
105        let ts_prefix = b" ts=";
106        buf[pos..pos + ts_prefix.len()].copy_from_slice(ts_prefix);
107        pos += ts_prefix.len();
108
109        // Unix timestamp as decimal
110        let mut ts = libc::time(ptr::null_mut()) as u64;
111        let mut ts_buf = [0u8; 20];
112        let mut ts_len = 0;
113        if ts == 0 {
114            ts_buf[0] = b'0';
115            ts_len = 1;
116        } else {
117            while ts > 0 {
118                ts_buf[ts_len] = b'0' + (ts % 10) as u8;
119                ts /= 10;
120                ts_len += 1;
121            }
122            ts_buf[..ts_len].reverse();
123        }
124        buf[pos..pos + ts_len].copy_from_slice(&ts_buf[..ts_len]);
125        pos += ts_len;
126
127        // " ctx="
128        let ctx_prefix = b" ctx=";
129        buf[pos..pos + ctx_prefix.len()].copy_from_slice(ctx_prefix);
130        pos += ctx_prefix.len();
131
132        crate::host_fns::SIGNAL_SAFE_CTX_LEN.with(|c| {
133            let len = c.get();
134            crate::host_fns::SIGNAL_SAFE_CTX.with(|buf_cell| {
135                let s_buf = buf_cell.get();
136                if len > 0 {
137                    buf[pos..pos + len].copy_from_slice(&s_buf[..len]);
138                    pos += len;
139                }
140            });
141        });
142
143        buf[pos] = b'\n';
144        pos += 1;
145
146        libc::write(fd, buf.as_ptr() as *const libc::c_void, pos);
147        libc::close(fd);
148    }
149
150    /// Write a simple crash message (for panics in trampoline).
151    // SAFETY: Same constraints as write_crash_dump — async-signal-safe syscalls only.
152    unsafe fn write_crash_dump_msg(msg: &[u8]) {
153        let path_len = CRASH_LOG_PATH_LEN.load(Ordering::Relaxed);
154        if path_len == 0 {
155            return;
156        }
157
158        let fd = libc::open(
159            addr_of!(CRASH_LOG_PATH) as *const libc::c_char,
160            libc::O_WRONLY | libc::O_CREAT | libc::O_APPEND,
161            0o644,
162        );
163        if fd < 0 {
164            return;
165        }
166
167        let prefix = b"[tidepool-crash] ";
168        libc::write(fd, prefix.as_ptr() as *const libc::c_void, prefix.len());
169        libc::write(fd, msg.as_ptr() as *const libc::c_void, msg.len());
170        let nl = b"\n";
171        libc::write(fd, nl.as_ptr() as *const libc::c_void, 1);
172        libc::close(fd);
173    }
174
175    // sigjmp_buf sizes vary by platform:
176    //   - Linux x86_64 (glibc): __jmp_buf_tag[1] = 200 bytes
177    //   - macOS x86_64: 37 ints + signal mask ≈ 296 bytes
178    //   - macOS aarch64: int[49] = 196 bytes
179    // Use 512 bytes to cover all platforms with headroom.
180    #[repr(C, align(16))]
181    pub struct SigJmpBuf {
182        _buf: [u8; 512],
183    }
184
185    extern "C" {
186        fn siglongjmp(env: *mut SigJmpBuf, val: libc::c_int) -> !;
187
188        /// C wrapper: calls sigsetjmp, then callback(userdata) if it returns 0.
189        /// Returns 0 on normal completion, or the signal number on siglongjmp.
190        fn tidepool_sigsetjmp_call(
191            buf: *mut SigJmpBuf,
192            callback: unsafe extern "C" fn(*mut libc::c_void),
193            userdata: *mut libc::c_void,
194        ) -> libc::c_int;
195    }
196
197    /// Signal number that caused the jump.
198    #[derive(Debug, Clone, Copy)]
199    pub struct SignalError(pub i32);
200
201    impl std::fmt::Display for SignalError {
202        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
203            let name = match self.0 {
204                libc::SIGILL => "SIGILL (illegal instruction — likely exhausted case branch)",
205                libc::SIGSEGV => "SIGSEGV (segmentation fault — likely invalid memory access)",
206                libc::SIGBUS => "SIGBUS (bus error)",
207                libc::SIGFPE => "SIGFPE (arithmetic exception — likely division by zero)",
208                libc::SIGTRAP => "SIGTRAP (trap — likely Cranelift trap instruction)",
209                _ => return write!(f, "JIT signal: signal {} (unknown)", self.0),
210            };
211            write!(f, "JIT signal: {}", name)
212        }
213    }
214
215    // Thread-local jump buffer pointer. Synchronous signals (SIGILL, SIGSEGV,
216    // SIGBUS) are delivered to the faulting thread, so per-thread storage is
217    // correct. The `const` initializer avoids any lazy-init allocation, making
218    // the thread-local read async-signal-safe in practice.
219    thread_local! {
220        static JMP_BUF: Cell<*mut SigJmpBuf> = const { Cell::new(ptr::null_mut()) };
221        pub(crate) static FAULTING_ADDR: Cell<usize> = const { Cell::new(0) };
222    }
223
224    /// Trampoline called from C after sigsetjmp returns 0.
225    /// Casts userdata back to a `Box<dyn FnOnce()>` and calls it.
226    /// Panics are caught to prevent unwinding across the C FFI boundary (which is UB).
227    // SAFETY: userdata was created via Box::into_raw in with_signal_protection and
228    // points to a valid Box<Box<dyn FnOnce()>>. Panics are caught to prevent UB
229    // from unwinding across the C FFI boundary.
230    unsafe extern "C" fn trampoline(userdata: *mut libc::c_void) {
231        let closure: Box<Box<dyn FnOnce()>> = Box::from_raw(userdata as *mut Box<dyn FnOnce()>);
232        if std::panic::catch_unwind(std::panic::AssertUnwindSafe(move || {
233            (*closure)();
234        }))
235        .is_err()
236        {
237            // Panic crossed into the trampoline. We can't propagate it across C,
238            // so abort. The caller (with_signal_protection) already wraps JIT calls
239            // in catch_unwind at a higher level, so this should never fire.
240            write_crash_dump_msg(b"panic in JIT trampoline");
241            std::process::abort();
242        }
243    }
244
245    /// Wrap a JIT call with signal protection.
246    ///
247    /// If SIGILL/SIGSEGV/SIGBUS/SIGFPE fires during `f()`, returns `Err(SignalError)`
248    /// instead of crashing the process.
249    ///
250    /// # Safety
251    ///
252    /// The closure `f` must not hold Rust objects with Drop impls that would be
253    /// skipped by siglongjmp. Raw pointers and references are fine.
254    pub unsafe fn with_signal_protection<F, R>(f: F) -> Result<R, SignalError>
255    where
256        F: FnOnce() -> R,
257    {
258        // We need to pass the closure through C's void* callback interface.
259        // Use an UnsafeCell to get the return value out of the type-erased closure.
260        let result_cell = std::cell::UnsafeCell::new(None::<R>);
261        let result_ptr = &result_cell as *const std::cell::UnsafeCell<Option<R>>;
262
263        let wrapper: Box<dyn FnOnce()> = Box::new(move || {
264            let r = f();
265            // SAFETY: we're the only writer, and the reader waits until after we return.
266            unsafe { *(*result_ptr).get() = Some(r) };
267        });
268
269        // SAFETY: SigJmpBuf is repr(C) POD — zeroed is a valid initial state for sigsetjmp.
270        let mut buf: SigJmpBuf = std::mem::zeroed();
271
272        // Store the jump buffer so the signal handler can find it.
273        JMP_BUF.with(|cell| cell.set(&mut buf as *mut SigJmpBuf));
274        FAULTING_ADDR.with(|c| c.set(0));
275
276        // Double-box: outer Box for the fat pointer, inner Box<dyn FnOnce()>.
277        let boxed: Box<Box<dyn FnOnce()>> = Box::new(wrapper);
278        let userdata = Box::into_raw(boxed) as *mut libc::c_void;
279
280        let val = tidepool_sigsetjmp_call(&mut buf, trampoline, userdata);
281
282        JMP_BUF.with(|cell| cell.set(null_mut()));
283
284        if val != 0 {
285            // SAFETY: Signal was caught before trampoline ran. The Box is still valid
286            // because it was never consumed — reclaim and drop it to avoid a leak.
287            drop(Box::from_raw(userdata as *mut Box<dyn FnOnce()>));
288            return Err(SignalError(val));
289        }
290
291        // Closure completed normally — result_cell is guaranteed to be Some.
292        result_cell.into_inner().ok_or(SignalError(-1))
293    }
294
295    extern "C" fn handler(sig: libc::c_int, _info: *mut libc::siginfo_t, _ctx: *mut libc::c_void) {
296        // Synchronous signals (SIGILL, SIGSEGV, SIGBUS) are delivered to the
297        // faulting thread, so the thread-local read returns this thread's buf.
298        // SAFETY: _info is provided by the kernel signal delivery and is valid when non-null.
299        let si_addr = if !_info.is_null() {
300            unsafe { (*_info).si_addr() as usize }
301        } else {
302            0
303        };
304        FAULTING_ADDR.with(|c| c.set(si_addr));
305
306        let buf = JMP_BUF.with(|cell| cell.get());
307        if !buf.is_null() {
308            // SAFETY: buf is a valid sigjmp_buf set by tidepool_sigsetjmp_call on this thread.
309            // siglongjmp to a valid buf is the intended recovery path for JIT signal handling.
310            unsafe {
311                siglongjmp(buf, sig);
312            }
313        }
314        // SAFETY: Not in JIT context — writing crash dump with async-signal-safe calls,
315        // then terminating this thread only (not the process) to avoid killing the MCP server.
316        unsafe {
317            write_crash_dump(sig, _info);
318            #[cfg(target_os = "linux")]
319            libc::syscall(libc::SYS_exit, 0);
320            #[cfg(not(target_os = "linux"))]
321            libc::pthread_exit(std::ptr::null_mut());
322        }
323    }
324
325    /// Install signal handlers for SIGILL, SIGSEGV, SIGBUS on an alternate stack.
326    ///
327    /// Safe to call multiple times. Uses `sigaltstack` so the handler works even
328    /// on stack overflow.
329    pub fn install() {
330        use std::alloc::{alloc, Layout};
331        use std::sync::Once;
332
333        const ALT_STACK_SIZE: usize = 64 * 1024;
334
335        // Pre-compute crash log path once (safe, non-signal context).
336        static PATHS_INIT: Once = Once::new();
337        PATHS_INIT.call_once(|| {
338            if let Ok(cwd) = std::env::current_dir() {
339                let cwd_bytes = cwd.as_os_str().as_encoded_bytes();
340                let log_suffix = b"/.tidepool/crash.log\0";
341                let dir_suffix = b"/.tidepool\0";
342
343                if cwd_bytes.len() + log_suffix.len() < 512 {
344                    // SAFETY: CRASH_LOG_PATH/CRASH_DIR_PATH are static mut, accessed only
345                    // once here inside Once::call_once (no concurrent access). The copies
346                    // stay within the 512-byte buffer bounds (checked above).
347                    unsafe {
348                        let log_ptr = addr_of_mut!(CRASH_LOG_PATH) as *mut u8;
349                        ptr::copy_nonoverlapping(cwd_bytes.as_ptr(), log_ptr, cwd_bytes.len());
350                        ptr::copy_nonoverlapping(
351                            log_suffix.as_ptr(),
352                            log_ptr.add(cwd_bytes.len()),
353                            log_suffix.len(),
354                        );
355                        CRASH_LOG_PATH_LEN
356                            .store(cwd_bytes.len() + log_suffix.len() - 1, Ordering::Relaxed);
357
358                        let dir_ptr = addr_of_mut!(CRASH_DIR_PATH) as *mut u8;
359                        ptr::copy_nonoverlapping(cwd_bytes.as_ptr(), dir_ptr, cwd_bytes.len());
360                        ptr::copy_nonoverlapping(
361                            dir_suffix.as_ptr(),
362                            dir_ptr.add(cwd_bytes.len()),
363                            dir_suffix.len(),
364                        );
365                        CRASH_DIR_PATH_LEN
366                            .store(cwd_bytes.len() + dir_suffix.len() - 1, Ordering::Relaxed);
367
368                        // Ensure .tidepool/ directory exists (safe, non-signal context).
369                        libc::mkdir(addr_of!(CRASH_DIR_PATH) as *const libc::c_char, 0o755);
370                    }
371                }
372            }
373        });
374
375        // sigaltstack is per-thread, so each calling thread needs its own.
376        // Use a thread-local to allocate once per thread and leak (signal
377        // stacks must outlive the handler).
378        thread_local! {
379            static ALT_STACK_INSTALLED: std::cell::Cell<bool> = const { std::cell::Cell::new(false) };
380        }
381        ALT_STACK_INSTALLED.with(|installed| {
382            if !installed.get() {
383                // SAFETY: Allocating an alternate signal stack via the global allocator.
384                // The memory is intentionally leaked (signal stacks must outlive the handler).
385                // sigaltstack is per-thread and called once per thread via the thread-local guard.
386                unsafe {
387                    let Ok(layout) = Layout::from_size_align(ALT_STACK_SIZE, 16) else {
388                        return;
389                    };
390                    let alt_stack_ptr = alloc(layout);
391                    if alt_stack_ptr.is_null() {
392                        return;
393                    }
394
395                    let stack = libc::stack_t {
396                        ss_sp: alt_stack_ptr as *mut libc::c_void,
397                        ss_flags: 0,
398                        ss_size: ALT_STACK_SIZE,
399                    };
400                    libc::sigaltstack(&stack, ptr::null_mut());
401                }
402                installed.set(true);
403            }
404        });
405
406        // Always (re)install signal handlers. Other code (Rust panic runtime,
407        // test harness) may overwrite them, so we reinstall on every call.
408        // SAFETY: sigaction with SA_SIGINFO|SA_ONSTACK installs our handler on the alternate
409        // stack. The handler function pointer is valid for the process lifetime.
410        unsafe {
411            let mut sa: libc::sigaction = std::mem::zeroed();
412            sa.sa_flags = libc::SA_SIGINFO | libc::SA_ONSTACK;
413            sa.sa_sigaction = handler as *const () as usize;
414            libc::sigemptyset(&mut sa.sa_mask);
415
416            libc::sigaction(libc::SIGILL, &sa, ptr::null_mut());
417            libc::sigaction(libc::SIGSEGV, &sa, ptr::null_mut());
418            libc::sigaction(libc::SIGBUS, &sa, ptr::null_mut());
419            libc::sigaction(libc::SIGFPE, &sa, ptr::null_mut());
420            libc::sigaction(libc::SIGTRAP, &sa, ptr::null_mut());
421        }
422    }
423}
424
425#[cfg(not(unix))]
426mod inner {
427    #[derive(Debug, Clone, Copy)]
428    pub struct SignalError(pub i32);
429
430    impl std::fmt::Display for SignalError {
431        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
432            write!(f, "JIT signal: {}", self.0)
433        }
434    }
435
436    pub unsafe fn with_signal_protection<F, R>(f: F) -> Result<R, SignalError>
437    where
438        F: FnOnce() -> R,
439    {
440        Ok(f())
441    }
442
443    pub fn install() {}
444}
445
446pub use inner::*;