1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185
/*! panic_monitor helps you monitor your threads and deal with panics. You might be tempted to use libstd's [`JoinHandle`]s for this use-case; however, they have two major limitations: * [`JoinHandle::join`] blocks the current thread. If you want to monitor multiple threads from a single "supervisor" thread, you would need something like `try_join`, and ideally you'd have an "epoll for [`JoinHandle`]s" as well to avoid busy-waiting. [`JoinHandle`] doesn't implement these, however. * You can't clone a [`JoinHandle`]. If you want multiple threads to be notified when a particular thread panics, you can't use its [`JoinHandle`] to achieve it. panic_monitor handles both of these issues. [`PanicMonitor::wait`] allows you to specify a number of threads. As soon as one of them panics, it returns a [`Thread`] struct (which contains the name and ID of the panicking thread). When calling [`PanicMonitor::wait`], you specify the watch-list in terms of [`ThreadId`]s. Since these are clonable, mulitple supervisor threads can monitor the same worker thread. Some other differences between [`PanicMonitor::wait`] and [`JoinHandle::join`]: * You don't receive the value which was passed to [`panic`]. (This would be impossible, given that such values are not required to implement [`Clone`].) * You aren't notified when a thread shuts down normally. `PanicMonitor` is for handling panicking threads only. [`PanicMonitor::wait`]: struct.PanicMonitor.html#method.wait [`JoinHandle`]: https://doc.rust-lang.org/std/thread/struct.JoinHandle.html [`JoinHandle::join`]: https://doc.rust-lang.org/std/thread/struct.JoinHandle.html#method.join [`panic`]: https://doc.rust-lang.org/std/macro.panic.html [`Clone`]: https://doc.rust-lang.org/std/clone/trait.Clone.html [`Thread`]: https://doc.rust-lang.org/std/thread/struct.Thread.html [`ThreadId`]: https://doc.rust-lang.org/std/thread/struct.ThreadId.html ## Usage Create a global [`PanicMonitor`] using [`lazy_static`], and initialise it from your main thread. Ideally you should do this before spawning any new threads. [`PanicMonitor`]: struct.PanicMonitor.html [`lazy_static`]: https://docs.rs/lazy_static/1.0.0/lazy_static/macro.lazy_static.html ``` #[macro_use] extern crate lazy_static; extern crate panic_monitor; use panic_monitor::PanicMonitor; use std::thread; use std::time::Duration; lazy_static! { static ref PANIC_MONITOR: PanicMonitor = PanicMonitor::new(); } fn main() { // Install a panic hook PANIC_MONITOR.init(); let h = thread::spawn(|| { thread::sleep(Duration::from_millis(100)); panic!(); }); PANIC_MONITOR.wait(&[h.thread().id()]); // ^ this will block until the thread panics PANIC_MONITOR.wait(&[h.thread().id()]); // ^ this will return immediately, since the thread is already dead h.join().unwrap_err(); } ``` */ use std::collections::HashMap; use std::panic; use std::sync::*; use std::thread::{self, Thread, ThreadId}; use std::time::*; const POISON_MSG: &str = "panic_monitor: Inner lock poisoned (please submit a bug report)"; /// A list of all threads which have panicked, with the ability to notify interested parties when /// this list is updated. pub struct PanicMonitor { panicked: Mutex<HashMap<ThreadId, Thread>>, // All threads which have historically panicked cvar: Condvar, } impl PanicMonitor { /// Create a new `PanicMonitor`. /// /// Call this inside a [`lazy_static`] block. You must call [`init`] after this. /// /// [`init`]: #method.init /// [`lazy_static`]: https://docs.rs/lazy_static/1.0.0/lazy_static/macro.lazy_static.html pub fn new() -> PanicMonitor { PanicMonitor { panicked: Mutex::new(HashMap::new()), cvar: Condvar::new(), } } /// Initialise the `PanicMonitor`. /// /// Call this method as early as you can: a thread which panics before the `PanicMonitor` is /// initialised will not trigger wake-ups. Calling `init` multiple times is relatively /// harmless. // // If you need to uninstall some existing handlers by calling `std::panic::set_hook(|_| {})`, // or something, you can call `init` again afterwards to re-add `PanicMonitor`'s hook. pub fn init(&'static self) { // Install a panic hook which makes a record of the panicking thread and notifies all // threads waiting on the PanicMonitor let hook = panic::take_hook(); panic::set_hook(Box::new(move|x| { let mut panicked = self.panicked.lock().expect(POISON_MSG); let current = thread::current(); panicked.insert(current.id(), current); self.cvar.notify_all(); hook(x); })); } /// Block the current thread until one of the watched threads panics. The returned vector is /// always non-empty. /// /// Note that this function returns as soon as one or more of the threads on the watch list has /// panicked. This means that if you specify a thread which has already panicked, this /// function will return immediately. Think of it as level-triggered, not edge-triggered. pub fn wait(&self, watch_list: &[ThreadId]) -> Vec<Thread> { let mut watched_panicked = vec![]; let mut panicked = self.panicked.lock().expect(POISON_MSG); loop { for tid in watch_list { if let Some(t) = panicked.get(tid) { watched_panicked.push(t.clone()); } } if watched_panicked.len() > 0 { return watched_panicked; } panicked = self.cvar.wait(panicked).expect(POISON_MSG); } } /// Block the current thread until one of the watched threads panic, or the timeout expires. /// The returned vector is empty if and only if the timeout expired. /// /// See [`wait`] for more information. /// /// [`wait`]: #method.wait pub fn wait_timeout(&self, watch_list: &[ThreadId], mut dur: Duration) -> Vec<Thread> { let mut watched_panicked = vec![]; let mut panicked = self.panicked.lock().expect(POISON_MSG); loop { for tid in watch_list { if let Some(t) = panicked.get(tid) { watched_panicked.push(t.clone()); } } if watched_panicked.len() > 0 { return watched_panicked; } let now = Instant::now(); let (guard, res) = self.cvar.wait_timeout(panicked, dur).expect(POISON_MSG); let elapsed = now.elapsed(); panicked = guard; if res.timed_out() || elapsed >= dur { return vec![]; } dur -= elapsed; // safe because ^ } } /// Check if any of the specified threads have panicked. This function may block, but only /// very briefly. The returned vector may be empty. /// /// See [`wait`] for more information. /// /// [`wait`]: #method.wait pub fn check(&self, watch_list: &[ThreadId]) -> Vec<Thread> { let mut watched_panicked = vec![]; let panicked = self.panicked.lock().expect(POISON_MSG); for tid in watch_list { if let Some(t) = panicked.get(tid) { watched_panicked.push(t.clone()); } } watched_panicked } }