1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
//! tokio-task-tracker is a simple graceful shutdown solution for tokio.
//!
//! The basic idea is to use a `TaskSpawner` to create `TaskTracker` object, and hold
//! on to them in spawned tasks. Inside the task, you can check `tracker.cancelled().await`
//! to wait for the task to be cancelled.
//!
//! The `TaskWaiter` can be used to wait for an interrupt and then wait for all
//! `TaskTracker`s to be dropped.
//!
//! # Examples
//!
//! ```no_run
//! # use std::time::Duration;
//! #
//! #[tokio::main(flavor = "current_thread")]
//! async fn main() -> Result<(), Box<dyn std::error::Error>> {
//!     let (spawner, waiter) = tokio_task_tracker::new();
//!
//!     // Start a task
//!     spawner.spawn(|tracker| async move {
//!         tokio::select! {
//!             _ = tracker.cancelled() => {
//!                 // The token was cancelled, task should shut down.
//!             }
//!             _ = tokio::time::sleep(Duration::from_secs(9999)) => {
//!                 // Long work has completed
//!             }
//!         }
//!     });
//!
//!     // Wait for all tasks to complete, or for someone to hit ctrl-c.
//!     // If tasks down't complete within 5 seconds, we'll quit anyways.
//!     waiter.wait_for_shutdown(Duration::from_secs(5)).await?;
//!
//!     Ok(())
//! }
//! ```
//!
//! If you do not wish to allow a task to be aborted, you still need to make sure
//! the task captures the tracker, because TaskWaiter will wait for all trackers to be dropped:
//!
//! ```no_run
//! # use std::time::Duration;
//! #
//! # #[tokio::main(flavor = "current_thread")]
//! # async fn main() {
//! #     let (spawner, waiter) = tokio_task_tracker::new();
//! #
//!     // Start a task
//!     spawner.spawn(|tracker| async move {
//!         // Move the tracker into the task.
//!         let _tracker = tracker;
//!
//!         // Do some work that we don't want to abort.
//!         tokio::time::sleep(Duration::from_secs(9999)).await;
//!     });
//!
//! # }
//! ```
//!
//! You can also create a tracker via the `task` method:
//!
//! ```no_run
//! # use std::time::Duration;
//! #
//! # #[tokio::main(flavor = "current_thread")]
//! # async fn main() {
//! #     let (spawner, waiter) = tokio_task_tracker::new();
//! #
//!     // Start a task
//!     let tracker = spawner.task();
//!     tokio::task::spawn(async move {
//!         // Move the tracker into the task.
//!         let _tracker = tracker;
//!
//!         // ...
//!     });
//!
//! # }
//! ```
//!
//! Trackers can be used to spawn subtasks via `tracker.subtask()` or
//! `tracker.spawn()`.

use std::{
    future::Future,
    sync::{Arc, Mutex},
    time::Duration,
};

use shutdown::wait_for_shutdown_signal;
use tokio::{select, sync::mpsc, task::JoinHandle};
use tokio_util::sync::CancellationToken;

mod shutdown;

/// Builder is used to create a TaskSpawner and TaskWaiter.
pub struct Builder {
    token: Option<CancellationToken>,
}

/// TaskSpawner is used to spawn new task trackers.
#[derive(Clone)]
pub struct TaskSpawner {
    token: CancellationToken,
    stop_tx: Arc<Mutex<Option<mpsc::Sender<()>>>>,
}

/// TaskWaiter is used to wait until all task trackers have been dropped.
pub struct TaskWaiter {
    token: CancellationToken,
    /// Shared stop_tx is shared between all TaskSpawners and the TaskWaiter, so that
    /// when we call TaskWaiter::wait() we can drop the tx from all spawners.
    stop_tx: Arc<Mutex<Option<mpsc::Sender<()>>>>,
    stop_rx: mpsc::Receiver<()>,
}

/// A TaskTracker is used both as a token to keep track of active tasks, and
/// as a cancellation token to check to see if the current task should quit.
#[derive(Clone)]
pub struct TaskTracker {
    token: CancellationToken,
    // Hang on to an instance of tx. We do this so we can know when all tasks
    // have been completed.
    _stop_tx: Option<mpsc::Sender<()>>,
}

#[derive(Debug, PartialEq)]
pub enum Error {
    /// Returned when we timeout waiting for all tasks to shut down.
    Timeout,
    /// Returned when we cannot bind to the interrupt/terminate signals.
    CouldNotBindInterrupt,
    /// Returned when we were waiting for graceful shutdown, but received a
    /// second interrupt signal.
    ShutdownEarly,
}

impl std::error::Error for Error {}

impl std::fmt::Display for Error {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            Error::Timeout => write!(f, "Not all tasks finished before timeout"),
            Error::CouldNotBindInterrupt => write!(f, "Could not bind interrupt handler"),
            Error::ShutdownEarly => write!(f, "Skipping graceful shutdown due to second interrupt"),
        }
    }
}

/// Create a new TaskSpawner and TaskWaiter.
pub fn new() -> (TaskSpawner, TaskWaiter) {
    Builder::default().build()
}

impl Builder {
    /// Create a new Builder.
    pub fn new() -> Self {
        Builder { token: None }
    }

    /// Use an existing CancellationToken for the returned TaskWaiter and TaskSpawner.
    /// If the given token is cancelled, all associated TaskTrackers will be cancelled
    /// as well.
    pub fn set_cancellation_token(mut self, token: CancellationToken) -> Self {
        self.token = Some(token);
        self
    }

    /// Create a new TaskSpawner and TaskWaiter.
    pub fn build(self) -> (TaskSpawner, TaskWaiter) {
        let (stop_tx, stop_rx) = mpsc::channel(1);
        let stop_tx = Arc::new(Mutex::new(Some(stop_tx)));
        let token = self.token.unwrap_or(CancellationToken::new());

        (
            TaskSpawner {
                token: token.clone(),
                stop_tx: stop_tx.clone(),
            },
            TaskWaiter {
                token,
                stop_tx,
                stop_rx,
            },
        )
    }
}

impl Default for Builder {
    fn default() -> Self {
        Self::new()
    }
}

impl TaskSpawner {
    /// Create a new TaskTracker.
    pub fn task(&self) -> TaskTracker {
        TaskTracker {
            token: self.token.clone(),
            _stop_tx: self.stop_tx.lock().unwrap().as_ref().cloned(),
        }
    }

    /// Spawn a task.
    ///
    /// The given closure will be called, passing in a task tracker.
    pub fn spawn<T, F: FnOnce(TaskTracker) -> T>(&self, f: F) -> JoinHandle<T::Output>
    where
        T: Future + Send + 'static,
        T::Output: Send + 'static,
    {
        let tracker = self.task();
        tokio::task::spawn(f(tracker))
    }

    /// Notify all tasks created by this TaskSpawner that they should abort.
    pub fn cancel(&self) {
        self.token.cancel();
    }
}

impl TaskWaiter {
    /// Notify all tasks this TaskWaiter is waiting on that they should abort.
    pub fn cancel(&self) {
        self.token.cancel();
    }

    /// Wait for the application to be interrupted, and then gracefully shutdown
    /// allowing a timeout for all tasks to quit.  A second interrupt will cause
    /// an immediate shutdown.
    ///
    /// On Unix systems, "interrupt" means a SIGINT or SIGTERM. On all other
    /// platforms the current implementation uses `tokio::signal::ctrl_c()`
    /// to wait for an interrupt.
    pub async fn wait_for_shutdown(self, timeout: Duration) -> Result<(), Error> {
        // Wait for the ctrl-c.
        match wait_for_shutdown_signal().await {
            Ok(()) => {
                // time to shut down...
            }
            Err(_) => return Err(Error::CouldNotBindInterrupt),
        }

        // Let tasks know they should shut down.
        self.token.cancel();

        // Wait for everything to finish.
        select! {
            res = self.wait_with_timeout(timeout) => res,
            _ = wait_for_shutdown_signal() => Err(Error::ShutdownEarly),
        }
    }

    /// Wait for all tasks to finish.  If tasks do not finish before the timeout,
    /// `Error::Timeout` will be returned.
    pub async fn wait_with_timeout(self, timeout: Duration) -> Result<(), Error> {
        // Wait for all tasks to be dropped.
        tokio::time::timeout(timeout, self.wait())
            .await
            .map_err(|_| Error::Timeout {})?;

        Ok(())
    }

    /// Wait for all tasks to finish.
    pub async fn wait(mut self) {
        // Drop the tx half of the channel.
        drop(self.stop_tx.lock().unwrap().take());

        // Wait for all tasks to be dropped.
        let _ = self.stop_rx.recv().await;
    }
}

impl TaskTracker {
    /// Create a new subtask from this TaskTracker.
    pub fn subtask(&self) -> Self {
        self.clone()
    }

    /// Spawn a subtask.
    ///
    /// The given closure will be called, passing in a task tracker.
    pub fn spawn<T, F: FnOnce(TaskTracker) -> T>(&self, f: F) -> JoinHandle<T::Output>
    where
        T: Future + Send + 'static,
        T::Output: Send + 'static,
    {
        let tracker = self.subtask();
        tokio::task::spawn(f(tracker))
    }

    /// Check to see if this task has been cancelled.
    pub async fn cancelled(&self) {
        self.token.cancelled().await;
    }

    /// Returns true if this token has been cancelled.
    pub fn is_cancelled(&self) -> bool {
        self.token.is_cancelled()
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::{
        sync::atomic::{AtomicBool, Ordering},
        time::Duration,
    };

    #[tokio::test]
    async fn tracker_should_be_cancelled() {
        let (spawner, waiter) = super::new();

        let task = spawner.task();
        waiter.cancel();
        assert!(task.is_cancelled());
    }

    #[tokio::test]
    async fn should_work_with_existing_cancellation_token() {
        let token = CancellationToken::new();
        let (spawner, _) = super::Builder::new()
            .set_cancellation_token(token.clone())
            .build();
        let task = spawner.task();

        // Cancelling the token should cancel the task.
        token.cancel();
        assert!(task.is_cancelled());
    }

    #[tokio::test]
    async fn should_wait_for_tasks_to_complete() -> Result<(), Box<dyn std::error::Error>> {
        let (spawner, waiter) = super::new();

        let done = Arc::new(AtomicBool::new(false));

        // Start a task
        {
            let done = done.clone();
            spawner.spawn(|tracker| async move {
                tokio::select! {
                    _ = tracker.cancelled() => {
                        // The token was cancelled, task should shut down.
                    }
                    _ = tokio::time::sleep(Duration::from_millis(100)) => {
                        // Short task has completed.
                        done.store(true, Ordering::SeqCst);
                    }
                }
            });
        }

        // Wait for all tasks to complete.
        waiter.wait().await;

        // Should have completed.
        assert!(done.load(Ordering::SeqCst));

        Ok(())
    }

    #[tokio::test]
    async fn should_cancel_tasks() -> Result<(), Box<dyn std::error::Error>> {
        let (spawner, waiter) = super::new();

        let done = Arc::new(AtomicBool::new(false));

        // Start a task
        {
            let done = done.clone();
            spawner.spawn(|tracker| async move {
                tokio::select! {
                    _ = tracker.cancelled() => {
                        // The token was cancelled, task should shut down.
                    }
                    _ = tokio::time::sleep(Duration::from_secs(9999)) => {
                        // Long work has completed
                        done.store(true, Ordering::SeqCst);
                    }
                }
            });
        }

        // Cancel the task after a short while.
        tokio::time::sleep(Duration::from_millis(100)).await;
        waiter.cancel();

        // Wait for all tasks to complete.
        waiter.wait().await;

        // Should have timed out.
        assert!(!done.load(Ordering::SeqCst));

        Ok(())
    }

    #[tokio::test]
    async fn interrupt_tests() -> Result<(), Box<dyn std::error::Error>> {
        // Interrupt tests rely on global state in shutdown.rs to simulate
        // SIGINT.  Need to run these serially.
        should_wait_for_tasks_on_interrupt().await?;
        should_stop_immediately_on_second_interrupt().await?;

        Ok(())
    }

    async fn should_wait_for_tasks_on_interrupt() -> Result<(), Box<dyn std::error::Error>> {
        shutdown::reset_before_test();

        let (spawner, waiter) = super::new();

        let done = Arc::new(AtomicBool::new(false));

        // Start a task
        {
            let done = done.clone();
            spawner.spawn(|tracker| async move {
                tokio::select! {
                    _ = tracker.cancelled() => {
                        // The token was cancelled, task should shut down.
                    }
                    _ = tokio::time::sleep(Duration::from_secs(9999)) => {
                        // Long running task...
                        done.store(true, Ordering::SeqCst);
                    }
                }
            });
        }

        // Send a fake shutdown signal.
        tokio::spawn(async {
            shutdown::send_shutdown().await;
        });

        // Wait for all tasks to complete.
        waiter.wait_for_shutdown(Duration::from_secs(10)).await?;

        // Task should have been aborted.
        assert!(!done.load(Ordering::SeqCst));

        Ok(())
    }

    async fn should_stop_immediately_on_second_interrupt() -> Result<(), Box<dyn std::error::Error>>
    {
        shutdown::reset_before_test();

        let (spawner, waiter) = super::new();

        let done = Arc::new(AtomicBool::new(false));

        // Start a task
        {
            let done = done.clone();
            spawner.spawn(|tracker| async move {
                let _tracker = tracker;

                // Long running task that can't be cancelled.
                tokio::time::sleep(Duration::from_secs(99)).await;
                done.store(true, Ordering::SeqCst);
            });
        }

        // Send two shutdown signals. The second should cause us to die immediately.
        tokio::spawn(async move {
            shutdown::send_shutdown().await;
            shutdown::send_shutdown().await;
        });

        // We shouldn't wait here, because of the second interrupt.
        let err = waiter
            .wait_for_shutdown(Duration::from_secs(99))
            .await
            .unwrap_err();
        assert_eq!(err, Error::ShutdownEarly);

        // Task should have been aborted.
        assert!(!done.load(Ordering::SeqCst));

        Ok(())
    }
}