1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
//! A library for awaiting and killing child processes from multiple threads.
//!
//! - [Docs](https://docs.rs/shared_child)
//! - [Crate](https://crates.io/crates/shared_child)
//! - [Repo](https://github.com/oconnor663/shared_child.rs)
//!
//! The
//! [`std::process::Child`](https://doc.rust-lang.org/std/process/struct.Child.html)
//! type in the standard library provides
//! [`wait`](https://doc.rust-lang.org/std/process/struct.Child.html#method.wait)
//! and
//! [`kill`](https://doc.rust-lang.org/std/process/struct.Child.html#method.kill)
//! methods that take `&mut self`, making it impossible to kill a child process
//! while another thread is waiting on it. That design works around a race
//! condition in Unix's `waitpid` function, where a PID might get reused as soon
//! as the wait returns, so a signal sent around the same time could
//! accidentally get delivered to the wrong process.
//!
//! However with the newer POSIX `waitid` function, we can wait on a child
//! without freeing its PID for reuse. That makes it safe to send signals
//! concurrently. Windows has actually always supported this, by preventing PID
//! reuse while there are still open handles to a child process. This library
//! wraps `std::process::Child` for concurrent use, backed by these APIs.
//!
//! Compatibility note: The `libc` crate doesn't currently support `waitid` on
//! NetBSD or OpenBSD, or on older versions of OSX. There [might also
//! be](https://bugs.python.org/msg167016) some version of OSX where the
//! `waitid` function exists but is broken. We can add a "best effort"
//! workaround using `waitpid` for these platforms as we run into them. Please
//! [file an issue](https://github.com/oconnor663/shared_child.rs/issues/new) if
//! you hit this.
//!
//! # Example
//!
//! ```rust
//! use shared_child::SharedChild;
//! use std::process::Command;
//! use std::sync::Arc;
//!
//! // Spawn a child that will just sleep for a long time,
//! // and put it in an Arc to share between threads.
//! let mut command = Command::new("python");
//! command.arg("-c").arg("import time; time.sleep(1000000000)");
//! let shared_child = SharedChild::spawn(&mut command).unwrap();
//! let child_arc = Arc::new(shared_child);
//!
//! // On another thread, wait on the child process.
//! let child_arc_clone = child_arc.clone();
//! let thread = std::thread::spawn(move || {
//!     child_arc_clone.wait().unwrap()
//! });
//!
//! // While the other thread is waiting, kill the child process.
//! // This wouldn't be possible with e.g. Arc<Mutex<Child>> from
//! // the standard library, because the waiting thread would be
//! // holding the mutex.
//! child_arc.kill().unwrap();
//!
//! // Join the waiting thread and get the exit status.
//! let exit_status = thread.join().unwrap();
//! assert!(!exit_status.success());
//! ```

use std::io;
use std::process::{Command, Child, ExitStatus};
use std::sync::{Condvar, Mutex};

mod sys;

#[cfg(unix)]
pub mod unix;

pub struct SharedChild {
    // This lock provides shared access to kill() and wait(). We never hold it
    // during a blocking wait, though, so that non-blocking waits and kills can
    // go through. (Blocking waits use libc::waitid with the WNOWAIT flag.)
    child: Mutex<Child>,

    // When there are multiple waiting threads, one of them will actually wait
    // on the child, and the rest will block on this condvar.
    state_lock: Mutex<ChildState>,
    state_condvar: Condvar,
}

impl SharedChild {
    /// Spawn a new `SharedChild` from a `std::process::Command`.
    pub fn spawn(command: &mut Command) -> io::Result<SharedChild> {
        let child = command.spawn()?;
        Ok(SharedChild {
               child: Mutex::new(child),
               state_lock: Mutex::new(NotWaiting),
               state_condvar: Condvar::new(),
           })
    }

    /// Return the child process ID.
    pub fn id(&self) -> u32 {
        self.child.lock().unwrap().id()
    }

    fn get_handle(&self) -> sys::Handle {
        sys::get_handle(&self.child.lock().unwrap())
    }

    /// Wait for the child to exit, blocking the current thread, and return its
    /// exit status.
    pub fn wait(&self) -> io::Result<ExitStatus> {
        let mut state = self.state_lock.lock().unwrap();
        loop {
            match *state {
                NotWaiting => {
                    // Either no one is waiting on the child yet, or a previous
                    // waiter failed. That means we need to do it ourselves.
                    // Break out of this loop.
                    break;
                }
                Waiting => {
                    // Another thread is already waiting on the child. We'll
                    // block until it signal us on the condvar, then loop again.
                    // Spurious wakeups could bring us here multiple times
                    // though, see the Condvar docs.
                    state = self.state_condvar.wait(state).unwrap();
                }
                Exited(exit_status) => return Ok(exit_status),
            }
        }

        // If we get here, we have the state lock, and we're the thread
        // responsible for waiting on the child. Set the state to Waiting and
        // then release the state lock, so that other threads can observe it
        // while we block. Afterwards we must leave the Waiting state before
        // this function exits, or other waiters will deadlock.
        *state = Waiting;
        drop(state);

        // Block until the child exits without reaping it. (On Unix, that means
        // we need to call libc::waitid with the WNOWAIT flag. On Windows
        // waiting never reaps.) That makes it safe for another thread to kill
        // while we're here, without racing against some process reusing the
        // child's PID. Having only one thread in this section is important,
        // because POSIX doesn't guarantee much about what happens when multiple
        // threads wait on a child at the same time:
        // http://pubs.opengroup.org/onlinepubs/9699919799/functions/V2_chap02.html#tag_15_13
        let noreap_result = sys::wait_without_reaping(self.get_handle());

        // Now either we hit an error, or the child has exited and needs to be
        // reaped. Retake the state lock and handle all the different exit
        // cases. No matter what happened/happens, we'll leave the Waiting state
        // and signal the state condvar.
        let mut state = self.state_lock.lock().unwrap();
        // The child has already exited, so this wait should clean up without blocking.
        let final_result = noreap_result.and_then(|_| self.child.lock().unwrap().wait());
        *state = if let Ok(exit_status) = final_result {
            Exited(exit_status)
        } else {
            NotWaiting
        };
        self.state_condvar.notify_all();
        final_result
    }

    /// Return the child's exit status if it has already exited. If the child is
    /// still running, return `Ok(None)`.
    pub fn try_wait(&self) -> io::Result<Option<ExitStatus>> {
        let mut status = self.state_lock.lock().unwrap();

        // Unlike wait() above, we don't loop on the Condvar here. If the status
        // is Waiting or Exited, we return immediately. However, if the status
        // is NotWaiting, we'll do a non-blocking wait below, in case the child
        // has already exited.
        match *status {
            NotWaiting => {}
            Waiting => return Ok(None),
            Exited(exit_status) => return Ok(Some(exit_status)),
        };

        // No one is waiting on the child. Check to see if it's already exited.
        // If it has, put ourselves in the Exited state. (There can't be any
        // other waiters to signal, because the state was NotWaiting when we
        // started, and we're still holding the status lock.)
        if sys::try_wait_without_reaping(self.get_handle())? {
            // The child has exited. Reap it. This should not block.
            let exit_status = self.child.lock().unwrap().wait()?;
            *status = Exited(exit_status);
            Ok(Some(exit_status))
        } else {
            Ok(None)
        }
    }

    /// Send a kill signal to the child. You should call `wait` afterwards to
    /// avoid leaving a zombie on Unix.
    pub fn kill(&self) -> io::Result<()> {
        let status = self.state_lock.lock().unwrap();
        if let Exited(_) = *status {
            return Ok(());
        }
        // The child is still running. Kill it. This assumes that the wait
        // functions above will never hold the child lock during a blocking
        // wait.
        self.child.lock().unwrap().kill()
    }

    /// Consume the `SharedChild` and return the `std::process::Child` it
    /// contains.
    ///
    /// We never reap the child process except through `Child::wait`, so the
    /// child object's inner state is correct, even if it was waited on while it
    /// was shared.
    pub fn into_inner(self) -> Child {
        self.child.into_inner().unwrap()
    }
}

enum ChildState {
    NotWaiting,
    Waiting,
    Exited(ExitStatus),
}

use ChildState::*;

#[cfg(test)]
mod tests {
    use std;
    use std::process::Command;
    use std::sync::Arc;
    use super::{SharedChild, sys};

    pub fn true_cmd() -> Command {
        let mut cmd = Command::new("python");
        cmd.arg("-c").arg("");
        cmd
    }

    pub fn sleep_forever_cmd() -> Command {
        let mut cmd = Command::new("python");
        cmd.arg("-c").arg("import time; time.sleep(1000000000)");
        cmd
    }

    #[test]
    fn test_wait() {
        let child = SharedChild::spawn(&mut true_cmd()).unwrap();
        // Test the id() function while we're at it.
        let id = child.id();
        assert!(id > 0);
        let status = child.wait().unwrap();
        assert_eq!(status.code().unwrap(), 0);
    }

    #[test]
    fn test_kill() {
        let child = SharedChild::spawn(&mut sleep_forever_cmd()).unwrap();
        child.kill().unwrap();
        let status = child.wait().unwrap();
        assert!(!status.success());
    }

    #[test]
    fn test_try_wait() {
        let child = SharedChild::spawn(&mut sleep_forever_cmd()).unwrap();
        let maybe_status = child.try_wait().unwrap();
        assert_eq!(maybe_status, None);
        child.kill().unwrap();
        // The child will handle that signal asynchronously, so we check it
        // repeatedly in a busy loop.
        let mut maybe_status = None;
        while let None = maybe_status {
            maybe_status = child.try_wait().unwrap();
        }
        assert!(maybe_status.is_some());
        assert!(!maybe_status.unwrap().success());
    }

    #[test]
    fn test_many_waiters() {
        let child = Arc::new(SharedChild::spawn(&mut sleep_forever_cmd()).unwrap());
        let mut threads = Vec::new();
        for _ in 0..10 {
            let clone = child.clone();
            threads.push(std::thread::spawn(move || clone.wait()));
        }
        child.kill().unwrap();
        for thread in threads {
            thread.join().unwrap().unwrap();
        }
    }

    #[test]
    fn test_waitid_after_exit_doesnt_hang() {
        // There are ominous reports (https://bugs.python.org/issue10812) of a
        // broken waitid implementation on OSX, which might hang forever if it
        // tries to wait on a child that's already exited.
        let child = true_cmd().spawn().unwrap();
        sys::wait_without_reaping(sys::get_handle(&child)).unwrap();
        // At this point the child has definitely exited. Wait again to test
        // that a second wait doesn't block.
        sys::wait_without_reaping(sys::get_handle(&child)).unwrap();
    }

    #[test]
    fn test_into_inner_before_wait() {
        let shared_child = SharedChild::spawn(&mut sleep_forever_cmd()).unwrap();
        let mut child = shared_child.into_inner();
        child.kill().unwrap();
        child.wait().unwrap();
    }

    #[test]
    fn test_into_inner_after_wait() {
        // This makes sure the child's inner state is valid. If we used waitpid
        // on the side, the inner child would try to wait again and cause an
        // error.
        let shared_child = SharedChild::spawn(&mut sleep_forever_cmd()).unwrap();
        shared_child.kill().unwrap();
        shared_child.wait().unwrap();
        let mut child = shared_child.into_inner();
        // The child has already been waited on, so kill should be an error.
        let kill_err = child.kill().unwrap_err();
        if cfg!(windows) {
            assert_eq!(std::io::ErrorKind::PermissionDenied, kill_err.kind());
        } else {
            assert_eq!(std::io::ErrorKind::InvalidInput, kill_err.kind());
        }
        // But wait should succeed.
        child.wait().unwrap();
    }
}