1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
//! A global, auto-scaling, preemptive scheduler based on `async-executor`.
//!
//! `smolscale` is a fairly thin wrapper around a global [`async-executor`]. Unlike `async-global-executor` and friends, however, it has a **preemptive** thread pool that ensures that tasks cannot block other tasks no matter what. This means that you can do things like run expensive computations or even do blocking I/O within a task without worrying about causing deadlocks. Even with "traditional" tasks that do not block, this approach can reduce worst-case latency.
//!
//! Furthermore, the thread pool is **adaptive**, using the least amount of threads required to "get the job done". This minimizes OS-level context switching, increasing performance in I/O bound tasks compared to the usual approach of spawning OS threads matching the number of CPUs.
//!
//! Finally, this crate has seriously minimal dependencies, and will not add significantly to your compilation times.
//!
//! This crate is heavily inspired by Stjepan Glavina's [previous work on async-std](https://async.rs/blog/stop-worrying-about-blocking-the-new-async-std-runtime/).

use futures_lite::prelude::*;
use futures_lite::Future;
use once_cell::sync::OnceCell;
use pin_project_lite::pin_project;
use std::{
    pin::Pin,
    sync::atomic::AtomicUsize,
    sync::atomic::Ordering,
    task::{Context, Poll},
    time::Duration,
};

const CHANGE_THRESH: u32 = 10;
const MONITOR_MS: u64 = 10;

static EXEC: async_executor::Executor<'static> = async_executor::Executor::new();

static FUTURES_BEING_POLLED: AtomicUsize = AtomicUsize::new(0);
static FBP_NONZERO: event_listener::Event = event_listener::Event::new();
static POLL_COUNT: AtomicUsize = AtomicUsize::new(0);

static MONITOR: OnceCell<std::thread::JoinHandle<()>> = OnceCell::new();

static UNDERLOAD: event_listener::Event = event_listener::Event::new();

fn start_monitor() {
    MONITOR.get_or_init(|| {
        std::thread::Builder::new()
            .name("sscale-mon".into())
            .spawn(monitor_loop)
            .unwrap()
    });
}

fn monitor_loop() {
    fn start_thread() {
        std::thread::Builder::new()
            .name("sscale-wkr".into())
            .spawn(|| {
                let listener = UNDERLOAD.listen();
                futures_lite::future::block_on(EXEC.run(futures_lite::future::pending::<()>()).or(
                    async {
                        listener.await;
                    },
                ))
            })
            .unwrap();
    }
    start_thread();

    let mut running_threads: usize = 1;
    let mut last_count: usize = 0;
    let mut overload_iters = 0;
    let mut underload_iters = 0;
    loop {
        std::thread::sleep(Duration::from_millis(MONITOR_MS));
        let fbp = loop {
            let fbp = FUTURES_BEING_POLLED.load(Ordering::SeqCst);
            if fbp > 0 {
                break fbp;
            }
            let listener = FBP_NONZERO.listen();
            let fbp = FUTURES_BEING_POLLED.load(Ordering::SeqCst);
            if fbp > 0 {
                break fbp;
            }
            overload_iters = 0;
            underload_iters += 1;
            if underload_iters > CHANGE_THRESH {
                underload_iters = 0;
                if running_threads > 1 {
                    UNDERLOAD.notify_additional_relaxed(1);
                    running_threads -= 1;
                }
            }
            listener.wait();
        };
        debug_assert!(fbp <= running_threads);
        let new_count = POLL_COUNT.load(Ordering::Relaxed);
        if FUTURES_BEING_POLLED.load(Ordering::Relaxed) == running_threads {
            underload_iters = 0;
            overload_iters += 1;
            if new_count == last_count || overload_iters > CHANGE_THRESH {
                start_thread();
                running_threads += 1;
            }
        }
        last_count = new_count;
    }
}

/// Spawns a task onto the lazily-initialized global executor.
///
/// The task can block or run CPU-intensive code if needed --- it will not block other tasks.
pub fn spawn<T: Send + 'static>(
    future: impl Future<Output = T> + Send + 'static,
) -> async_executor::Task<T> {
    start_monitor();
    EXEC.spawn(WrappedFuture::new(future))
}

pin_project! {
struct WrappedFuture<T, F: Future<Output = T>> {
    #[pin]
    fut: F,
}
}

impl<T, F: Future<Output = T>> Future for WrappedFuture<T, F> {
    type Output = T;

    fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
        let this = self.project();
        let pval = FUTURES_BEING_POLLED.fetch_add(1, Ordering::SeqCst);
        if pval == 0 {
            FBP_NONZERO.notify(1);
        }
        POLL_COUNT.fetch_add(1, Ordering::Relaxed);
        scopeguard::defer!({
            FUTURES_BEING_POLLED.fetch_sub(1, Ordering::Relaxed);
        });
        this.fut.poll(cx)
    }
}

impl<T, F: Future<Output = T> + 'static> WrappedFuture<T, F> {
    pub fn new(fut: F) -> Self {
        WrappedFuture { fut }
    }
}