switchyard 0.3.1

//! Real-time compute-focused async executor with job pools, thread-local data, and priorities.
//!
//! # Example
//!
//! ```rust
//! use switchyard::Switchyard;
//! use switchyard::threads::{thread_info, one_to_one};
//! // Create a new switchyard with one job pool and empty thread local data
//! let yard = Switchyard::new(one_to_one(thread_info(), Some("thread-name")), ||()).unwrap();
//!
//! // Spawn a task on pool 0 and priority 10 and get a JoinHandle
//! let handle = yard.spawn(10, async move { 5 + 5 });
//! // Spawn a lower priority task on the same pool
//! let handle2 = yard.spawn(0, async move { 2 + 2 });
//!
//! // Wait on the results
//! # futures_executor::block_on(async {
//! assert_eq!(handle.await + handle2.await, 14);
//! # });
//! ```
//!
//! # How Switchyard is Different
//!
//! Switchyard is different from other existing async executors, focusing on situations where
//! precise control of threads and execution order is needed. One such situation is using
//! task parallelism to parallelize a compute workload.
//!
//! ## Priorites
//!
//! Each task has a priority and tasks are ran in order from high priority to low priority.
//!
//! ```rust
//! # use switchyard::{Switchyard, threads::{thread_info, one_to_one}};
//! # let yard = Switchyard::new(one_to_one(thread_info(), Some("thread-name")), ||()).unwrap();
//! // Spawn task with lowest priority.
//! yard.spawn(0, async move { /* ... */ });
//! // Spawn task with higher priority. If both tasks are waiting, this one will run first.
//! yard.spawn(10, async move { /* ... */ });
//! ```
//!
//! ## Thread Local Data
//!
//! Each yard has some thread local data that can be accessed using [`spawn_local`](Switchyard::spawn_local).
//! Both the thread local data and the future generated by the async function passed to [`spawn_local`](Switchyard::spawn_local)
//! may be `!Send` and `!Sync`. The future will only be resumed on the thread that created it.
//!
//! ```rust
//! # use switchyard::{Switchyard, threads::{thread_info, one_to_one}};
//! # use std::cell::Cell;
//! // Create yard with thread local data. The data is !Sync.
//! let yard = Switchyard::new(one_to_one(thread_info(), Some("thread-name")), || Cell::new(42)).unwrap();
//!
//! // Spawn task that uses thread local data. Each running thread will get their own copy.
//! yard.spawn_local(0, |data| async move { data.set(10) });
//! ```
//!
//! # MSRV
//! 1.51
//!
//! Future MSRV bumps will be breaking changes.

#![deny(future_incompatible)]
#![deny(nonstandard_style)]
#![deny(rust_2018_idioms)]

use crate::{
    task::{Job, Task, ThreadLocalJob, ThreadLocalTask},
    threads::ThreadAllocationOutput,
    util::ThreadLocalPointer,
};
use futures_intrusive::{
    channel::shared::{oneshot_channel, ChannelReceiveFuture, OneshotReceiver},
    sync::ManualResetEvent,
};
use futures_task::{Context, Poll};
use parking_lot::{Condvar, Mutex, RawMutex};
use priority_queue::PriorityQueue;
use slotmap::{DefaultKey, DenseSlotMap};
use std::{
    any::Any,
    future::Future,
    panic::{catch_unwind, AssertUnwindSafe, UnwindSafe},
    pin::Pin,
    sync::{
        atomic::{AtomicBool, AtomicUsize, Ordering},
        Arc,
    },
};

pub mod affinity;
mod error;
mod task;
pub mod threads;
mod util;
mod worker;

pub use error::*;

/// Integer alias for a priority.
pub type Priority = u32;
/// Integer alias for the maximum amount of pools.
pub type PoolCount = u8;

/// Handle to a currently running task.
///
/// Awaiting this future will give the return value of the task.
pub struct JoinHandle<T: 'static> {
    _receiver: OneshotReceiver<Result<T, Box<dyn Any + Send + 'static>>>,
    receiver_future: ChannelReceiveFuture<RawMutex, Result<T, Box<dyn Any + Send + 'static>>>,
}
impl<T: 'static> Future for JoinHandle<T> {
    type Output = T;

    fn poll(self: Pin<&mut Self>, ctx: &mut Context<'_>) -> Poll<Self::Output> {
        let fut = unsafe { Pin::new_unchecked(&mut self.get_unchecked_mut().receiver_future) };
        let poll_res = fut.poll(ctx);

        match poll_res {
            Poll::Ready(None) => {
                // If this returns ready with none, that means the channel was closed
                // due to the waker dying. We can just return pending  as this future will never
                // return.
                Poll::Pending
            }
            Poll::Ready(Some(value)) => Poll::Ready(value.unwrap_or_else(|_| panic!("Job panicked!"))),
            Poll::Pending => Poll::Pending,
        }
    }
}

/// Vendored from futures-util as holy hell that's a large lib.
struct CatchUnwind<Fut>(Fut);

impl<Fut> CatchUnwind<Fut>
where
    Fut: Future + UnwindSafe,
{
    fn new(future: Fut) -> CatchUnwind<Fut> {
        CatchUnwind(future)
    }
}

impl<Fut> Future for CatchUnwind<Fut>
where
    Fut: Future + UnwindSafe,
{
    type Output = Result<Fut::Output, Box<dyn Any + Send>>;

    fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
        let f = unsafe { Pin::new_unchecked(&mut self.get_unchecked_mut().0) };
        catch_unwind(AssertUnwindSafe(|| f.poll(cx)))?.map(Ok)
    }
}

struct ThreadLocalQueue<TD> {
    waiting: Mutex<DenseSlotMap<DefaultKey, Arc<ThreadLocalTask<TD>>>>,
    inner: Mutex<PriorityQueue<ThreadLocalJob<TD>, u32>>,
}
struct FlaggedCondvar {
    running: AtomicBool,
    inner: Condvar,
}
struct Queue<TD> {
    waiting: Mutex<DenseSlotMap<DefaultKey, Arc<Task<TD>>>>,
    inner: Mutex<PriorityQueue<Job<TD>, u32>>,
    condvars: Vec<FlaggedCondvar>,
}
impl<TD> Queue<TD> {
    /// Must be called with `queue.inner`'s lock held.
    fn notify_one(&self) {
        for var in &self.condvars {
            if !var.running.load(Ordering::Relaxed) {
                var.inner.notify_one();
                return;
            }
        }
    }

    /// Must be called with `queue.inner`'s lock held.
    fn notify_all(&self) {
        // We could be more efficient and not notify everyone, but this is more surefire
        // and this function is only called on shutdown.
        for var in &self.condvars {
            var.inner.notify_all();
        }
    }
}

struct Shared<TD> {
    active_threads: AtomicUsize,
    idle_wait: ManualResetEvent,
    job_count: AtomicUsize,
    death_signal: AtomicBool,
    queue: Queue<TD>,
}

/// Compute focused async executor.
///
/// See crate documentation for more details.
pub struct Switchyard<TD: 'static> {
    shared: Arc<Shared<TD>>,
    threads: Vec<std::thread::JoinHandle<()>>,
    thread_local_data: Vec<*mut Arc<TD>>,
}
impl<TD: 'static> Switchyard<TD> {
    /// Create a new switchyard.
    ///
    /// For each element in the provided `thread_allocations` iterator, the yard will spawn a worker
    /// thread with the given settings. Helper functions in [`threads`] can generate these iterators
    /// for common situations.
    ///
    /// `thread_local_data_creation` will be called on each thread to create the thread local
    /// data accessible by `spawn_local`.
    pub fn new<TDFunc>(
        thread_allocations: impl IntoIterator<Item = ThreadAllocationOutput>,
        thread_local_data_creation: TDFunc,
    ) -> Result<Self, SwitchyardCreationError>
    where
        TDFunc: Fn() -> TD + Send + Sync + 'static,
    {
        let (thread_local_sender, thread_local_receiver) = std::sync::mpsc::channel();

        let thread_local_data_creation_arc = Arc::new(thread_local_data_creation);
        let allocation_vec: Vec<_> = thread_allocations.into_iter().collect();

        let num_logical_cpus = num_cpus::get();
        for allocation in allocation_vec.iter() {
            if let Some(affin) = allocation.affinity {
                if affin >= num_logical_cpus {
                    return Err(SwitchyardCreationError::InvalidAffinity {
                        affinity: affin,
                        total_threads: num_logical_cpus,
                    });
                }
            }
        }

        let mut shared = Arc::new(Shared {
            queue: Queue {
                waiting: Mutex::new(DenseSlotMap::new()),
                inner: Mutex::new(PriorityQueue::new()),
                condvars: Vec::new(),
            },
            active_threads: AtomicUsize::new(allocation_vec.len()),
            idle_wait: ManualResetEvent::new(false),
            job_count: AtomicUsize::new(0),
            death_signal: AtomicBool::new(false),
        });

        let shared_guard = Arc::get_mut(&mut shared).unwrap();

        let queue_local_indices: Vec<_> = allocation_vec
            .iter()
            .map(|_| {
                let condvar_array = &mut shared_guard.queue.condvars;

                let queue_local_index = condvar_array.len();
                condvar_array.push(FlaggedCondvar {
                    inner: Condvar::new(),
                    running: AtomicBool::new(true),
                });

                queue_local_index
            })
            .collect();

        let mut threads = Vec::with_capacity(allocation_vec.len());
        for (mut thread_info, queue_local_index) in allocation_vec.into_iter().zip(queue_local_indices) {
            let builder = std::thread::Builder::new();
            let builder = if let Some(name) = thread_info.name.take() {
                builder.name(name)
            } else {
                builder
            };
            let builder = if let Some(stack_size) = thread_info.stack_size.take() {
                builder.stack_size(stack_size)
            } else {
                builder
            };

            threads.push(
                builder
                    .spawn(worker::body::<TD, TDFunc>(
                        Arc::clone(&shared),
                        thread_info,
                        queue_local_index,
                        thread_local_sender.clone(),
                        thread_local_data_creation_arc.clone(),
                    ))
                    .unwrap_or_else(|_| panic!("Could not spawn thread")),
            );
        }
        // drop the sender we own, so we can retrieve pointers until all senders are dropped
        drop(thread_local_sender);

        let mut thread_local_data = Vec::with_capacity(threads.len());
        while let Ok(ThreadLocalPointer(ptr)) = thread_local_receiver.recv() {
            thread_local_data.push(ptr);
        }

        Ok(Self {
            threads,
            shared,
            thread_local_data,
        })
    }

    /// Things that must be done every time a task is spawned
    fn spawn_header(&self) {
        assert!(
            !self.shared.death_signal.load(Ordering::Acquire),
            "finish() has been called on this Switchyard. No more jobs may be added."
        );

        self.shared.job_count.fetch_add(1, Ordering::AcqRel);

        // Say we're no longer idle so that `yard.spawn(); yard.wait_for_idle()`
        // won't "return early". If the thread hasn't woken up fully yet by the
        // time wait_for_idle is called, it will immediately return even though logically there's
        // still an outstanding, active, job.
        self.shared.idle_wait.reset();
    }

    /// Spawn a future which can migrate between threads during executionat the given `priority`.
    ///
    /// A higher `priority` will cause the task to be run sooner.
    ///
    /// # Example
    ///
    /// ```rust
    /// use switchyard::{Switchyard, threads::single_thread};
    ///
    /// // Create a yard with a single pool
    /// let yard: Switchyard<()> = Switchyard::new(single_thread(None, None), || ()).unwrap();
    ///
    /// // Spawn a task with priority 0 and get a handle to the result.
    /// let handle = yard.spawn(0, async move { 2 * 2 });
    ///
    /// // Await result
    /// # futures_executor::block_on(async move {
    /// assert_eq!(handle.await, 4);
    /// # });
    /// ```
    ///
    /// # Panics
    ///
    /// - [`finish`](Switchyard::finish) has been called on the pool.
    pub fn spawn<Fut, T>(&self, priority: Priority, fut: Fut) -> JoinHandle<T>
    where
        Fut: Future<Output = T> + Send + 'static,
        T: Send + 'static,
    {
        self.spawn_header();

        let (sender, receiver) = oneshot_channel();
        let job = Job::Future(Task::new(
            Arc::clone(&self.shared),
            async move {
                // We don't care about the result, if this fails, that just means the join handle
                // has been dropped.
                let _ = sender.send(CatchUnwind::new(std::panic::AssertUnwindSafe(fut)).await);
            },
            priority,
        ));

        let queue: &Queue<TD> = &self.shared.queue;

        let mut queue_guard = queue.inner.lock();
        queue_guard.push(job, priority);
        // the required guard is held in `queue_guard`
        queue.notify_one();
        drop(queue_guard);

        JoinHandle {
            receiver_future: receiver.receive(),
            _receiver: receiver,
        }
    }

    /// Spawns an async function which is tied to a single thread during execution.
    ///
    /// Spawns to the given job `pool` at the given `priority`.
    ///
    /// The given async function will be provided an `Arc` to the thread-local data to create its future with.
    ///
    /// A higher `priority` will cause the task to be run sooner.
    ///
    /// The function must be `Send`, but the future returned by that function may be `!Send`.
    ///
    /// # Example
    ///
    /// ```rust
    /// use std::{cell::Cell, sync::Arc};
    /// use switchyard::{Switchyard, threads::single_thread};
    ///
    /// // Create a yard with thread local data.
    /// let yard: Switchyard<Cell<u64>> = Switchyard::new(
    ///     single_thread(None, None),
    ///     || Cell::new(42)
    /// ).unwrap();
    /// # let mut yard = yard;
    ///
    /// // Spawn an async function using the data.
    /// yard.spawn_local(0, |data: Arc<Cell<u64>>| async move {data.set(12);});
    /// # futures_executor::block_on(yard.wait_for_idle());
    ///
    /// async fn some_async(data: Arc<Cell<u64>>) -> u64 {
    ///     data.set(15);
    ///     2 * 2
    /// }
    ///
    /// // Works with normal async functions too
    /// let handle = yard.spawn_local(0, some_async);
    /// # futures_executor::block_on(yard.wait_for_idle());
    /// # futures_executor::block_on(async move {
    /// assert_eq!(handle.await, 4);
    /// # });
    /// ```
    ///
    /// # Panics
    ///
    /// - Panics is `pool` refers to a non-existent job pool.
    pub fn spawn_local<Func, Fut, T>(&self, priority: Priority, async_fn: Func) -> JoinHandle<T>
    where
        Func: FnOnce(Arc<TD>) -> Fut + Send + 'static,
        Fut: Future<Output = T>,
        T: Send + 'static,
    {
        self.spawn_header();

        let (sender, receiver) = oneshot_channel();
        let job = Job::Local(Box::new(move |td| {
            Box::pin(async move {
                // We don't care about the result, if this fails, that just means the join handle
                // has been dropped.
                let unwind_async_fn = AssertUnwindSafe(async_fn);
                let unwind_td = AssertUnwindSafe(td);
                let future = catch_unwind(move || AssertUnwindSafe(unwind_async_fn.0(unwind_td.0)));

                let ret = match future {
                    Ok(fut) => CatchUnwind::new(AssertUnwindSafe(fut)).await,
                    Err(panic) => Err(panic),
                };

                let _ = sender.send(ret);
            })
        }));

        let queue: &Queue<TD> = &self.shared.queue;

        let mut queue_guard = queue.inner.lock();
        queue_guard.push(job, priority);
        // the required guard is held in `queue_guard`
        queue.notify_one();
        drop(queue_guard);

        JoinHandle {
            receiver_future: receiver.receive(),
            _receiver: receiver,
        }
    }

    /// Wait until all working threads are starved of work due
    /// to lack of jobs or all jobs waiting.
    ///
    /// # Safety
    ///
    /// - This function provides no safety guarantees.
    /// - Jobs may be added while the future returns.
    /// - Jobs may be woken while the future returns.
    pub async fn wait_for_idle(&self) {
        // We don't reset it, threads will reset it when they become active again
        self.shared.idle_wait.wait().await;
    }

    /// Current amount of jobs in flight.
    ///
    /// # Safety
    ///
    /// - This function provides no safety guarantees.
    /// - Jobs may be added after the value is received and before it is returned.
    pub fn jobs(&self) -> usize {
        self.shared.job_count.load(Ordering::Relaxed)
    }

    /// Count of threads currently processing jobs.
    ///
    /// # Safety
    ///
    /// - This function provides no safety guarantees.
    /// - Jobs may be added after the value is received and before it is returned re-activating threads.
    pub fn active_threads(&self) -> usize {
        self.shared.active_threads.load(Ordering::Relaxed)
    }

    /// Kill all threads as soon as they finish their jobs. All calls to spawn and spawn_local will
    /// panic after this function is called.
    ///
    /// This is equivalent to calling drop. Calling this function twice will be a no-op
    /// the second time.
    pub fn finish(&mut self) {
        // send death signal then wake everyone up
        self.shared.death_signal.store(true, Ordering::Release);
        let lock = self.shared.queue.inner.lock();
        self.shared.queue.notify_all();
        drop(lock);

        self.thread_local_data.clear();
        for thread in self.threads.drain(..) {
            thread.join().unwrap();
        }
    }
}

impl<TD: 'static> Drop for Switchyard<TD> {
    fn drop(&mut self) {
        self.finish()
    }
}

unsafe impl<TD> Send for Switchyard<TD> {}
unsafe impl<TD> Sync for Switchyard<TD> {}