cee_scape/
lib.rs

1//! The cee-scape crate provides access to `setjmp` and `sigsetjmp`
2//! functionality, via an interface that ensures LLVM won't miscompile things.
3//!
4//! # Example usage
5//!
6//! The main intention is for this interface to be used with C code that expects
7//! to longjmp via jump buffers established at Rust-to-C FFI boundaries.
8//!
9//! Here is an example, where we are using `extern "C"` functions as stand-ins
10//! for the code you would normally expect to find in an external C library.
11//!
12//! ```rust
13//! mod pretend_this_comes_from_c {
14//!     use cee_scape::JmpBuf;
15//!
16//!     // Returns sum of a and b, but longjmps through `env` if either argument
17//!     // is negative (passing 1) or if the sum overflows (passing 2).
18//!     pub extern "C" fn careful_sum(env: JmpBuf, a: i32, b: i32) -> i32 {
19//!         check_values(env, a, b);
20//!         return a + b;
21//!     }
22//!
23//!     extern "C" fn check_values(env: JmpBuf, a: i32, b: i32) {
24//!         use cee_scape::longjmp;
25//!         if a < 0 || b < 0 { unsafe { longjmp(env, -1); } }
26//!         if (i32::MAX - a) < b { unsafe { longjmp(env, -2); } }
27//!     }
28//! }
29//!
30//! use pretend_this_comes_from_c::careful_sum as sum;
31//! use cee_scape::call_with_setjmp;
32//!
33//! assert_eq!(call_with_setjmp(|env| { sum(env, 10, 20) + 1000 }), 1030);
34//! assert_eq!(call_with_setjmp(|env| { sum(env, -10, 20) + 1000 }), -1);
35//! assert_eq!(call_with_setjmp(|env| { sum(env, 10, -20) + 1000 }), -1);
36//! assert_eq!(call_with_setjmp(|env| { sum(env, i32::MAX, 1) + 1000 }), -2);
37//! ```
38//!
39//! # Background on `setjmp` and `longjmp`.
40//!
41//! The `setjmp` and `longjmp` functions in C are used as the basis for
42//! "non-local jumps", also known as "escape continuations". It is a way to have
43//! a chain of calls "`entry` calls `middle_1` calls `middle_2` calls
44//! `innermost`", where the bodies of `middle_1` or `middle_2` or `innermost`
45//! might at some point decide that they want to jump all the way back to
46//! `entry` without having to pass through the remaining code that they would
47//! normally have to execute when returning via each of their respective
48//! callers.
49//!
50//! In C, this is done by having `entry` first call `setjmp` to initialize a
51//! jump enviroment (which would hold, for example, the current stack pointer
52//! and, if present, the current frame pointer), and then passing a pointer to
53//! that jump environment along during each of the child subroutines of A. If at
54//! any point a child subroutine wants to jump back to the point where `setjmp`
55//! had first returned, that child subroutine invoke `longjmp`, which reestablishes
56//! the stack to the position it had when `setjmp` had originally returned.
57//!
58//! # Safety (or lack thereof)
59//!
60//! This crate cannot ensure that the usual Rust control-flow rules are upheld,
61//! which means that the act of actually doing a longjmp/siglongjmp to a
62//! non-local jump environment (aka continuation) is *unsafe*.
63//!
64//! For example, several Rust API's rely on an assumption that they will always
65//! run some specific cleanup code after a callback is done. Such cleanup is
66//! sometimes encoded as a Rust destructor, but it can also just be directly
67//! encoded as straight-line code waiting to be run.
68//!
69//! Calls to `longjmp` blatantly break these assumptions. A `longjmp` invocation
70//! does not invoke any Rust destructors, and it does not "unwind the stack".
71//! All pending cleanup code between the `longjmp` invocation and the target
72//! jump environment (i.e. the place where the relevant `setjmp` first returned)
73//! is skipped.
74//!
75//! ```rust
76//! use std::cell::Cell;
77//! // This emulates a data structure that has an ongoing invariant:
78//! // the `depth` is incremented/decremented according to entry/exit
79//! // to a given callback (see `DepthTracker::enter` below).
80//! pub struct DepthTracker { depth: Cell<usize>, }
81//!
82//! let track = DepthTracker::new();
83//! cee_scape::call_with_setjmp(|env| {
84//!     track.enter(|| {
85//!         // This is what we expect: depth is larger in context of
86//!         // DepthTracker::enter callback
87//!         assert_eq!(track.depth(), 1);
88//!         "normal case"
89//!     });
90//!     0
91//! });
92//!
93//! // Normal case: the tracked depth has returned to zero.
94//! assert_eq!(track.depth(), 0);
95//!
96//! assert_eq!(cee_scape::call_with_setjmp(|env| {
97//!     track.enter(|| {
98//!         // This is what we expect: depth is larger in context of
99//!         // DepthTracker::enter callback
100//!         assert_eq!(track.depth(), 1);
101//!         // DIFFERENT: Now we bypass the DepthTracker's cleanup code.
102//!         unsafe { cee_scape::longjmp(env, 4) }
103//!         "abnormal case"
104//!     });
105//!     0
106//! }), 4);
107//!
108//! // This is the "surprise" due to the DIFFERENT line: longjmp skipped
109//! // over the decrement from returning from the callback, and so the count
110//! // is not consistent with what the data structure expects.
111//! assert_eq!(track.depth(), 1 /* not 0 */);
112//!
113//! // (These are just support routines for the `DepthTracker` above.)
114//! impl DepthTracker {
115//!     pub fn depth(&self) -> usize {
116//!         self.depth.get()
117//!     }
118//!     pub fn enter<X>(&self, callback: impl FnOnce() -> X) -> X {
119//!         self.update(|x|x+1);
120//!         let ret = callback();
121//!         self.update(|x|x-1);
122//!         ret
123//!     }
124//!     fn update(&self, effect: impl Fn(usize) -> usize) {
125//!         self.depth.set(effect(self.depth.get()));
126//!     }
127//!     pub fn new() -> Self {
128//!         DepthTracker { depth: Cell::new(0) }
129//!     }
130//! }
131//! ```
132//!
133//! In short, the `longjmp` routine is a blunt instrument. When a `longjmp`
134//! invocation skips some cleanup code, the compiler cannot know whether
135//! skipping that cleanup code was exactly what the program author intended, or
136//! if it represents a programming error.
137//!
138//! Furthermore, much cleanup code of this form is enforcing *Rust safety
139//! invariants*. This is why `longjmp` is provided here as an *unsafe* method;
140//! that is a reminder that while one can invoke `call_with_setjmp` safely, the
141//! obligation remains to audit whether any invocations of `longjmp` on the
142//! provided jump environment are breaking those safety invariants by skipping
143//! over such cleanup code.
144//!
145//! # Some static checking
146//!
147//! While not all of Rust's safety rules are statically enforced, one important
148//! one is enforced: When invoking `call_with_setjmp`, the saved jump
149//! environment is not allowed to escape the scope of the callback that is fed
150//! to `call_with_setjmp`:
151//!
152//! ```compile_fail
153//! let mut escaped = None;
154//! cee_scape::call_with_setjmp(|env| {
155//!     // If `env` were allowed to escape...
156//!     escaped = Some(env);
157//!     0
158//! });
159//! // ... it would be bad if we could then do this with it.
160//! unsafe { cee_scape::longjmp(escaped.unwrap(), 1); }
161//! ```
162//!
163//! We also cannot share jump environments across threads, because it is
164//! undefined behavior to `longjmp` via a jump environments that was initialized
165//! by a call to `setjmp` in a different thread.
166//!
167//! ```compile_fail
168//! cee_scape::call_with_setjmp(move |env| {
169//!     std::thread::scope(|s| {
170//!         s.spawn(move || {
171//!             unsafe { cee_scape::longjmp(env, 1); }
172//!         });
173//!         0
174//!     })
175//! });
176//! ```
177
178use libc::c_int;
179
180#[cfg_attr(not(target_os = "linux"), allow(dead_code))]
181mod glibc_compat;
182#[cfg_attr(not(target_os = "macos"), allow(dead_code))]
183mod macos_compat;
184#[cfg(target_os = "linux")]
185use glibc_compat as struct_defs;
186#[cfg(target_os = "macos")]
187use macos_compat as struct_defs;
188
189pub use crate::struct_defs::{JmpBufFields, JmpBufStruct};
190pub use crate::struct_defs::{SigJmpBufFields, SigJmpBufStruct};
191
192
193
194/// This is the type of the first argument that is fed to longjmp.
195pub type JmpBuf = *const JmpBufFields;
196
197/// This is the type of the first argument that is fed to siglongjmp.
198pub type SigJmpBuf = *const SigJmpBufFields;
199
200extern "C" {
201    /// Given a calling environment `jbuf` (which one can acquire via
202    /// `call_with_setjmp`) and a non-zero value `val`, moves the stack and
203    /// program counters to match the return position of where `jbuf` was
204    /// established via a call to `setjmp`, and then returns `val` from that
205    /// spot.
206    ///
207    /// You should only provide non-zero values for `val`. A zero-value may or
208    /// may not be replaced with a non-zero value for the return to the
209    /// non-local jump environment, depending on the underlying C library that
210    /// is linked in. (It may be silently replaced with a non-zero value, as a
211    /// non-zero value is the only way for the internal machinery to distinguish
212    /// between the first return from the initial call versus a non-local
213    /// return).
214    ///
215    /// FIXME: include safety note here, including the issues with destructors
216    pub fn longjmp(jbuf: JmpBuf, val: c_int) -> !;
217
218    /// Given a calling environment `jbuf` (which one can acquire via
219    /// `call_with_sigsetjmp`) and a non-zero value `val`, moves the stack and
220    /// program counters to match the return position of where `jbuf` was
221    /// established via a call to `setjmp`, and then returns `val` from that
222    /// spot.
223    ///
224    /// You should only provide non-zero values for `val`. A zero-value may or
225    /// may not be replaced with a non-zero value for the return to the
226    /// non-local jump environment, depending on the underlying C library that
227    /// is linked in. (It may be silently replaced with a non-zero value, as a
228    /// non-zero value is the only way for the internal machinery to distinguish
229    /// between the first return from the initial call versus a non-local
230    /// return).
231    ///
232    /// FIXME: include safety note here, including the issues with destructors
233    pub fn siglongjmp(jbuf: SigJmpBuf, val: c_int) -> !;
234}
235
236// FIXME: figure out how to access feature cfg'ing. (And then, look into linting
237// against people trying to do "the obvious things".)
238
239#[cfg(not(feature = "use_c_to_interface_with_setjmp"))]
240mod asm_based;
241#[cfg(not(feature = "use_c_to_interface_with_setjmp"))]
242pub use asm_based::{call_with_setjmp, call_with_sigsetjmp};
243
244#[cfg(feature = "use_c_to_interface_with_setjmp")]
245mod cee_based;
246#[cfg(feature = "use_c_to_interface_with_setjmp")]
247pub use cee_based::{call_with_setjmp, call_with_sigsetjmp};
248
249#[cfg(test)]
250mod tests {
251    // longjmp never returns, and its signature reflects that. But its noisy to
252    // be warned about it in the tests below, where the whole point is to ensure
253    // that everything *is* skipped in the expected manner.
254    #![allow(unreachable_code)]
255
256    use super::*;
257    use expect_test::expect;
258
259    #[test]
260    fn setjmp_basically_works() {
261        assert_eq!(call_with_setjmp(|_env| { 0 }), 0);
262        assert_eq!(call_with_setjmp(|_env| { 3 }), 3);
263        assert_eq!(
264            call_with_setjmp(|env| {
265                unsafe {
266                    longjmp(env, 4);
267                }
268                3
269            }),
270            4
271        );
272    }
273
274    #[test]
275    fn sigsetjmp_basically_works() {
276        assert_eq!(call_with_sigsetjmp(true, |_env| { 0 }), 0);
277        assert_eq!(call_with_sigsetjmp(true, |_env| { 3 }), 3);
278        assert_eq!(
279            call_with_sigsetjmp(true, |env| {
280                unsafe {
281                    siglongjmp(env, 4);
282                }
283                3
284            }),
285            4
286        );
287    }
288
289    #[test]
290    fn check_control_flow_details_1() {
291        // The basic test template: record control flow points via record, and
292        // compare them in the test output.
293        let mut record = String::new();
294        let result = call_with_setjmp(|env| {
295            record.push_str("A");
296            unsafe {
297                longjmp(env, 4);
298            }
299            record.push_str(" B");
300            0
301        });
302        assert_eq!(result, 4);
303        expect![["A"]].assert_eq(&record);
304    }
305
306    #[test]
307    fn check_control_flow_details_2() {
308        let mut record = String::new();
309        let result = call_with_setjmp(|_env1| {
310            record.push_str("A");
311            let ret = call_with_setjmp(|env2| {
312                record.push_str(" B");
313                unsafe {
314                    longjmp(env2, 4);
315                }
316                record.push_str(" C");
317                0
318            });
319            record.push_str(" D");
320            ret + 1
321        });
322        assert_eq!(result, 5);
323        expect![["A B D"]].assert_eq(&record);
324    }
325
326    #[test]
327    fn check_control_flow_details_3() {
328        let mut record = String::new();
329        let result = call_with_setjmp(|env1| {
330            record.push_str("A");
331            let ret = call_with_setjmp(|_env2| {
332                record.push_str(" B");
333                unsafe {
334                    longjmp(env1, 4);
335                }
336                record.push_str(" C");
337                0
338            });
339            record.push_str(" D");
340            ret + 1
341        });
342        assert_eq!(result, 4);
343        expect![["A B"]].assert_eq(&record);
344    }
345
346    #[cfg(feature = "test_c_integration")]
347    #[test]
348    fn c_integration() {
349        extern "C" {
350            fn subtract_but_longjmp_if_underflow(env: JmpBuf, a: u32, b: u32) -> u32;
351        }
352        assert_eq!(
353            call_with_setjmp(|env| {
354                (unsafe { subtract_but_longjmp_if_underflow(env, 10, 3) }) as c_int
355            }),
356            7
357        );
358
359        assert_eq!(
360            call_with_setjmp(|env| {
361                unsafe {
362                    subtract_but_longjmp_if_underflow(env, 3, 10);
363                    panic!("should never get here.");
364                }
365            }),
366            7
367        );
368    }
369
370    #[cfg(feature = "test_c_integration")]
371    #[test]
372    fn check_c_layout() {
373        // This type is defined in test_c_integration
374        #[repr(C)]
375        #[derive(Copy, Clone, Default, Debug)]
376        struct LayoutOfJmpBufs {
377            jb_size: usize,
378            jb_align: usize,
379            sigjb_size: usize,
380            sigjb_align: usize,
381        }
382
383        extern "C" {
384            fn get_c_jmpbuf_layout() -> LayoutOfJmpBufs;
385        }
386
387        let cinfo = unsafe { get_c_jmpbuf_layout() };
388        // Dump the info so that if the test fails the right values are easy
389        // enough to find.
390        eprintln!("Note: C jmp_buf/sigjmp_buf layout info: {cinfo:?}");
391
392        assert_eq!(cinfo.jb_size, core::mem::size_of::<JmpBufStruct>());
393        assert_eq!(cinfo.jb_align, core::mem::align_of::<JmpBufStruct>());
394        assert_eq!(cinfo.sigjb_size, core::mem::size_of::<SigJmpBufStruct>());
395        assert_eq!(cinfo.sigjb_align, core::mem::align_of::<SigJmpBufStruct>());
396    }
397}
398
399#[cfg(test)]
400mod tests_of_drop_interaction {
401    use std::sync::atomic::{AtomicUsize, Ordering};
402    use super::{call_with_setjmp, call_with_sigsetjmp};
403    struct IncrementOnDrop(&'static str, &'static AtomicUsize);
404    impl IncrementOnDrop {
405        fn new(name: &'static str, state: &'static AtomicUsize) -> Self {
406            println!("called new for {name}");
407            IncrementOnDrop(name, state)
408        }
409    }
410    impl Drop for IncrementOnDrop {
411        fn drop(&mut self) {
412            println!("called drop on {}", self.0);
413            self.1.fetch_add(1, Ordering::Relaxed);
414        }
415    }
416
417    #[test]
418    fn does_ptr_read_cause_a_double_drop_for_setjmp() {
419        static STATE: AtomicUsize = AtomicUsize::new(0);
420        let iod = IncrementOnDrop::new("iod", &STATE);
421        call_with_setjmp(move |_env| {
422            println!("at callback 1 start: {}", iod.1.load(Ordering::Relaxed));
423            let _own_it = iod;
424            0
425        });
426        println!("callback done, drop counter: {}", STATE.load(Ordering::Relaxed));
427        assert_eq!(STATE.load(Ordering::Relaxed), 1);
428        let iod = IncrementOnDrop::new("iod", &STATE);
429        call_with_setjmp(move |_env| {
430            println!("at callback 2 start: {}", iod.1.load(Ordering::Relaxed));
431            let _own_it = iod;
432            0
433        });
434        println!("callback done, drop counter: {}", STATE.load(Ordering::Relaxed));
435        assert_eq!(STATE.load(Ordering::Relaxed), 2);
436    }
437
438    #[test]
439    fn does_ptr_read_cause_a_double_drop_for_sigsetjmp() {
440        static STATE: AtomicUsize = AtomicUsize::new(0);
441        let iod = IncrementOnDrop::new("iod", &STATE);
442        call_with_sigsetjmp(false, move |_env| {
443            println!("at callback 3 start: {}", iod.1.load(Ordering::Relaxed));
444            let _own_it = iod;
445            0
446        });
447        println!("callback done, drop counter: {}", STATE.load(Ordering::Relaxed));
448        assert_eq!(STATE.load(Ordering::Relaxed), 1);
449        let iod = IncrementOnDrop::new("iod", &STATE);
450        call_with_sigsetjmp(true, move |_env| {
451            println!("at callback 4 start: {}", iod.1.load(Ordering::Relaxed));
452            let _own_it = iod;
453            0
454        });
455        println!("callback done, drop counter: {}", STATE.load(Ordering::Relaxed));
456        assert_eq!(STATE.load(Ordering::Relaxed), 2);
457    }
458
459    // FIXME: This test probably shouldn't be written this way. The intended safety property
460    // for calling longjmp is that there *are no* destructors waiting to run between the
461    // longjmp and its associated setjmp (and that we otherwise have UB).
462    #[test]
463    fn mix_drop_with_longjmp() {
464        use crate::longjmp;
465
466        static STATE: AtomicUsize = AtomicUsize::new(0);
467        // The above cases were checking that "normal" control flow,
468        // with no longjmp's involved, would not cause a double-drop.
469        // But as soon as longjmp is in the mix, we can no lonbger
470        // guarantee that the closure passed into call_with_setjmp will be dropped
471        let iod = IncrementOnDrop::new("iod", &STATE);
472        call_with_setjmp(move |env1| {
473            println!("at callback 1 start: {}", iod.1.load(Ordering::Relaxed));
474            let _own_it = iod;
475            unsafe { longjmp(env1, 4) }
476        });
477        println!("callback done, drop counter: {}", STATE.load(Ordering::Relaxed));
478        assert_eq!(STATE.load(Ordering::Relaxed), 0);
479    }
480}