//! Lockless Work Stealing Job System.
//!
//! # Features
//! - Lockless work stealing queue
//! - 0 runtime allocations
//! - Jobs are executed from closures
//! - Chaining and grouping of jobs
//! - Parallel for_each abstraction
//!
//! # General Notes
//! This crate provides job system with 0 allocation overhead at runtime. What this means in
//! practice is, that once the instance is created, there are no more memory allocations required
//! to execute jobs.
//!
//! Jobs are allocated from a Job Pool with a fixed capacity. Each thread gets their own pool and
//! queue. The system uses a fixed storage space where it stores the closure for each job. The
//! default storage is set to 64 bytes. This can be extended to 128 by enabling the feature
//! _"job_storage_128"_.
//!
//! When a Job finishes execution there is a possibility to start other jobs. See
//! [JobScope::chain()] for more details.
//!
//! Additionally, it's recommended that, in order to avoid running out of jobs during execution, you
//! regularly ensure that you all your previous jobs have finished executing.
//!
//! Finally, this crate runs on rust stable and has been tested with rust 1.50.0 with the 2018 edition.
//!
//! # Safety
//! Due to the implementation performing type erasure to store the closures in the pre-allocated
//! space, I have not been able to figure out a way to ensure the compiler is aware of this so it
//! can enforce lifetime and ownership checks. Until that is addressed [JobScope::create],
//! [JobScope::create_with_parent] and [JobScope::run] are marked as unsafe and is up to caller
//! to guarantee that they can enforce their safty requirements.
//!
//! # Panics
//!
//! Due to the queues having a fixed size, if we start filling all the queues up to full capacity there
//! is a small chance that a worker thread may not be able to queue chained jobs. When this happens
//! the system will panic as there is no way to recover from this.
//! The queues come with debug asserts which will detect this situation.
//!
//! # Examples
//! ## Safe
//! These example provide safe variants that uphold the safety requirements.
//!
//! ### Parallel
//! See [for_each()][JobScope::for_each()] and [for_each_with_result()][JobScope::for_each_with_result]
//! for more details.
//! ```
//! let mut job_sys = jobsys::JobSystem::new(4, 512).unwrap();
//! let job_scope = jobsys::JobScope::new_from_system(&job_sys);
//! let mut array = [0_u32; 100];
//! job_scope.for_each(&mut array, |slice: &mut [u32], start, _end| {
//!         for i in 0..slice.len() {
//!             slice[i] = (start + i) as u32;
//!         }
//!     }).expect("Failed to start jobs");
//! ```
//!
//! ### Single Work Item
//! See [JobInstance][JobInstance] for more details.
//! ```
//! let job_sys = jobsys::JobSystem::new(2, 128).expect("Failed to init job system");
//! let job_scope = jobsys::JobScope::new_from_system(&job_sys);
//! let job_instance = jobsys::JobInstance::create(&job_scope, || {
//!     println!("Hello from Job Instance");
//!     }).unwrap();
//! job_instance.wait_with(|| println!("Waiting on Job to Finish")).expect("Failed to wait on job");
//! ```
//!
//! ## Unsafe
//! The following example are unsafe due to certain requirements that need to be upheld by the caller
//! but can be used as building block for safer alternatives.
//! ### Start and Wait
//! ```
//! let job_sys = jobsys::JobSystem::new(4, 512).unwrap();
//! let job_scope = jobsys::JobScope::new_from_system(&job_sys);
//! let mut handle = unsafe{job_scope.create(|| { println!("Hello World!");}).unwrap()};
//! unsafe{job_scope.run(&mut handle).expect("Failed to run job");}
//! job_scope.wait(&mut handle);
//! ```
//! ### Grouping
//! Ensure one job does not complete until other jobs have finished as well.
//! ```
//! let job_sys= jobsys::JobSystem::new(4, 512).unwrap();
//! let job_scope = jobsys::JobScope::new_from_system(&job_sys);
//! let mut parent = job_scope.create_noop().unwrap(); // Create a job that does nothing
//! let mut child = unsafe{job_scope.create_with_parent(&mut parent, || { println!("Hello World!");}).unwrap()};
//! unsafe{job_scope.run(&child).expect("Failed to start child");}
//! unsafe{job_scope.run(&parent).expect("Failed to start parent");}
//! job_scope.wait(&parent); // Parent will only finish when both it and its child have finished
//! ```
//! ### Chaining
//! Launch new jobs as soon as one job completes.
//! ```
//! let job_sys = jobsys::JobSystem::new(4, 512).unwrap();
//! let job_scope = jobsys::JobScope::new_from_system(&job_sys);
//! let mut first = unsafe{job_scope.create(|| { println!("Hello World Chained!");})}.unwrap();
//! let mut second = unsafe{job_scope.create(|| { println!("Hello World Chained!");})}.unwrap();
//! job_scope.chain(&mut first, &second).expect("Failed to chain job, maximum chain count exceeded");
//! unsafe{job_scope.run(&first).expect("Failed to start job");}
//! job_scope.wait(&second); // Second will only be executed after first completes
//! ```
//! ### Creating jobs on job threads
//! ```
//! let job_sys = jobsys::JobSystem::new(4, 512).unwrap();
//! let job_scope = jobsys::JobScope::new_from_system(&job_sys);
//! let mut handle = unsafe{job_scope.create(|| {
//!     let thread_job_scope = jobsys::JobScope::new_from_thread().unwrap();
//!     let thread_job_handle = thread_job_scope.create(|| {
//!         println!("Created on job thread");
//!     }).unwrap();
//!     thread_job_scope.run(&thread_job_handle).expect("Failed to run job");
//! }).unwrap()};
//! unsafe{job_scope.run(&mut handle).expect("Failed to run job");}
//! job_scope.wait(&mut handle);
//! ```
mod job;
mod queues;
mod thread;

use crate::thread::JobThread;
use crate::thread::ThreadData;
use std::marker::PhantomData;
use std::sync::Arc;

type ThreadDataList = Vec<ThreadData>;
/// Work Stealing JobSystem.
pub struct JobSystem {
    thread_data: std::sync::Arc<ThreadDataList>,
    threads: Vec<JobThread>,
}

#[derive(Debug)]
pub enum Error {
    /// Failed to create/start a job Thread
    ThreadCreate,
    /// The submitted closure exceeds the storage capacity
    StorageSizeExceeded,
    /// The thread queue is full and can't be submitted to
    QueueFull,
    /// The number of chained jobs has exceed the limit
    ChainCountExceeded,
    /// Job handle could not be resolved
    HandleResolve,
    /// We have detected multiple instances of a job system created on the same thread. This is not
    /// supported.
    MultipleInstances,
    /// Parent and child handles are the same
    ParentEqualsChild,
    /// Thread Local Job Pool has not been initialized
    InvalidThread,
}

impl JobSystem {
    /// Create a new instance of a JobSystem. To create jobs, please create a new instance of a
    /// [JobScope].
    ///
    /// * `thread_count` - Number of worker threads.
    /// * `job_capacity` - Maximum number of jobs the system can allocate. If this is not a power of
    /// 2 it will be rounded up to the next value which is a power of 2.
    pub fn new(thread_count: usize, job_capacity: usize) -> Result<Self, Error> {
        let final_job_capacity = job_capacity.next_power_of_two();
        if !thread::tls_set_main_thread_index() {
            return Err(Error::MultipleInstances);
        }
        let actual_thread_count = thread_count.max(1) + 1;
        let mut data_vec: ThreadDataList = Vec::with_capacity(actual_thread_count);
        data_vec.resize_with(actual_thread_count, || ThreadData::new(final_job_capacity));
        let thread_data = Arc::new(data_vec);
        let mut threads = Vec::with_capacity(thread_count);
        for index in 1..actual_thread_count {
            let mut thread = JobThread::new(thread_data.clone(), index);
            if thread.start(final_job_capacity).is_err() {
                return Err(Error::ThreadCreate);
            }
            threads.push(thread);
        }
        thread::tls_setup_job_pool(final_job_capacity);
        thread::tls_set_thread_data(thread_data.clone());
        Ok(Self {
            thread_data: thread_data.clone(),
            threads,
        })
    }

    fn shutdown(&mut self) {
        for thread in &mut self.threads {
            thread.finish().unwrap()
        }
        self.threads.clear();
        self.thread_data = Arc::new(vec![]);
        thread::tls_reset_main_thread_index();
        thread::tls_reset_job_pool();
        thread::tls_reset_thread_data();
    }
}

impl Drop for JobSystem {
    fn drop(&mut self) {
        self.shutdown();
    }
}

#[derive(Debug, Hash, Eq, PartialEq, Default)]
/// Handle which represents an allocated job.
pub struct ScopedJobHandle<'scope, T> {
    h: usize,
    // The phantom data here is required to prevent rust from happily accepting mutable borrows
    // to the same value. E.g.: Without PhantomData<T> the code below will compile without errors.
    // let js = JobSystem::new()?;
    // let mut v:u32 = 0;
    // for _ in 100 {
    // let x = &mut v;
    // let handle =  js.create(|| {
    //    x += 10;
    //    })?;
    // }
    // Note: This only works correctly if the handles are stored somewhere for the duration
    // of the task.
    p: PhantomData<T>,
    s: PhantomData<&'scope mut ()>,
}

/// Manages the creation and scheduling of jobs. This type can either be created from a JobSystem
/// instance or retrieved from a worker thread.
/// If you tried to use this with a thread that is not being tracked by the job system, all
/// the functions will fail with `Error::InvalidThread`.
#[derive(Debug)]
pub struct JobScope {
    thread_data: std::sync::Arc<ThreadDataList>,
}

impl JobScope {
    /// Create a new instance from a job system.
    pub fn new_from_system(job_system: &JobSystem) -> Self {
        Self {
            thread_data: job_system.thread_data.clone(),
        }
    }

    /// Create a new instance on a thread managed by a job system.
    /// If this calling thread is not managed by the job system, this will fail
    pub fn new_from_thread() -> Result<Self, Error> {
        if let Some(thread_data) = thread::tls_get_thread_data() {
            return Ok(Self { thread_data });
        }
        Err(Error::InvalidThread)
    }

    /// Allocates a new Job that does nothing.
    ///
    /// This can be useful in cases where you need a parent job for grouping control.
    pub fn create_noop(&self) -> Result<ScopedJobHandle<fn()>, Error> {
        unsafe { self.create(|| {}) }
    }

    /// Allocate a new Job
    ///
    /// This function will check whether the closure fits in the job storage space and return an
    /// error if that's the case.
    /// # Safety
    /// This function is unsafe since it's not possible to accurately determine whether the provided
    /// closure's capture will be captured multiple times or be alive until this closure has
    /// finished executing.
    /// This method is safe to call provided that you guarantee:
    /// * The closure captures are valid during the execution of the job
    /// * The closure doesn't capture a writeable reference more than once
    pub unsafe fn create<T>(&self, job: T) -> Result<ScopedJobHandle<T>, Error>
    where
        T: Sized + FnOnce() + Send,
    {
        if !job::can_store_type::<T>() {
            return Err(Error::StorageSizeExceeded);
        }
        let tls_job_pool = thread::tls_get_job_pool();
        tls_job_pool.with(|tls| {
            if let Some(pool) = tls.borrow_mut().as_mut() {
                let (job_instance, handle) = pool.allocate();
                job_instance.store(job);
                return Ok(ScopedJobHandle {
                    h: handle,
                    p: PhantomData,
                    s: PhantomData,
                });
            }
            Err(Error::InvalidThread)
        })
    }

    /// Allocate a new Job as a child of another job.
    ///
    /// The newly allocated will be created as a child of a parent job. What this means in practice
    /// is that the new job (child) can run parallel with the parent job and the parent job will
    /// not reach completion status until all of it's children have finished.
    /// # Safety
    /// This function is unsafe since it's not possible to accurately determine whether the provided
    /// closure's capture will be captured multiple times or be alive until this closure has
    /// finished executing.
    /// This method is safe to call provided that you guarantee:
    /// * The closure captures are valid during the execution of the job
    /// * The closure doesn't capture a writeable reference more than once
    pub unsafe fn create_with_parent<'a, T, Y>(
        &'a self,
        parent: &ScopedJobHandle<'a, Y>,
        job: T,
    ) -> Result<ScopedJobHandle<'a, T>, Error>
    where
        T: Sized + FnOnce() + Send,
    {
        if !job::can_store_type::<T>() {
            return Err(Error::StorageSizeExceeded);
        }
        let tls_job_pool = thread::tls_get_job_pool();
        tls_job_pool.with(|tls| {
            if let Some(pool) = tls.borrow_mut().as_mut() {
                if let Some(parent_job) = pool.get_mut_ptr(parent.h) {
                    let (job_instance, handle) = pool.allocate();
                    job_instance.store(job);
                    job_instance.set_parent_job(parent_job);
                    return Ok(ScopedJobHandle {
                        h: handle,
                        p: PhantomData,
                        s: PhantomData,
                    });
                }
                return Err(Error::HandleResolve);
            }
            Err(Error::InvalidThread)
        })
    }

    /// Register the child job as a follow up to the parent job.
    ///
    /// Every Job has the ability to chain follow up jobs on completion. This ensures the child
    /// job only runs when the parent is finished.
    ///
    /// There is a limited capacity for the number of jobs you can chain with one job. This function
    /// will return error if we are unable to register the child job.
    pub fn chain<T, Y>(
        &self,
        parent: &mut ScopedJobHandle<'_, T>,
        child: &ScopedJobHandle<'_, Y>,
    ) -> Result<(), Error>
    where
        T: Sized + FnOnce() + Send,
        Y: Sized + FnOnce() + Send,
    {
        if parent.h == child.h {
            return Err(Error::ParentEqualsChild);
        }

        let tls_job_pool = thread::tls_get_job_pool();
        tls_job_pool.with(|tls| {
            if let Some(job_pool) = tls.borrow_mut().as_mut() {
                let child_job = if let Some(child_job) = job_pool.get_mut_ptr(child.h) {
                    child_job
                } else {
                    return Err(Error::HandleResolve);
                };
                if let Some(parent_job) = job_pool.get_mut(parent.h) {
                    return match parent_job.chain_job(child_job) {
                        Err(_) => Err(Error::ChainCountExceeded),
                        _ => Ok(()),
                    };
                }
                return Err(Error::HandleResolve);
            }
            Err(Error::InvalidThread)
        })
    }

    /// Execute a job.
    /// # Safety
    /// This is currently marked unsafe due to the reasons described in
    /// [JobScope::create()] and [JobScope::create_with_parent()].
    pub unsafe fn run<T>(&self, handle: &ScopedJobHandle<T>) -> Result<(), Error>
    where
        T: Sized + FnOnce() + Send,
    {
        let tls_job_pool = thread::tls_get_job_pool();
        tls_job_pool.with(|tls| {
            if let Some(job_pool) = tls.borrow_mut().as_mut() {
                if let Some(job) = job_pool.get_mut(handle.h) {
                    return match thread::start_job(&self.thread_data, job) {
                        true => Ok(()),
                        false => Err(Error::QueueFull),
                    };
                }
                return Err(Error::HandleResolve);
            }
            Err(Error::InvalidThread)
        })
    }

    /// Block and wait until the job has finished.
    ///
    /// While we wait, the system will try to execute other jobs.
    pub fn wait<T>(&self, handle: &ScopedJobHandle<T>) -> Result<(), Error>
    where
        T: Sized + FnOnce() + Send,
    {
        // can't loop directly with TLS Pool here otherwise we trigger the RefCell panic
        // since we would keep the queue borrowed and it is possible that run
        // will run a job on the thread that is currently waiting.
        loop {
            match self.is_finished(handle) {
                Ok(finished) => {
                    if !finished {
                        if let Some(other_job) = thread::get_job(&self.thread_data) {
                            thread::run_job(&self.thread_data, other_job);
                        }
                    } else {
                        return Ok(());
                    }
                }
                Err(error) => return Err(error),
            };
        }
    }

    /// Check if a job has finished execution. Does not block.
    pub fn is_finished<T>(&self, handle: &ScopedJobHandle<T>) -> Result<bool, Error>
    where
        T: Sized + FnOnce() + Send,
    {
        let tls_job_pool = thread::tls_get_job_pool();
        tls_job_pool.with(|tls| {
            if let Some(job_pool) = tls.borrow().as_ref() {
                if let Some(job) = job_pool.get(handle.h) {
                    return Ok(job.is_finished());
                }
                return Err(Error::HandleResolve);
            }
            Err(Error::InvalidThread)
        })
    }

    /// Given a slice of data and read-only closure, divide the slice into unique sub-slices which
    /// are distributed to the worker threads.
    ///
    /// The closure will receive the following parameters in order:
    /// * unique sub-slice over which the the current thread is operating on
    /// * start index of the full slice
    /// * end index of the the full slice
    /// Note use [for_each_with_result()][JobScope::for_each_with_result()] if you wish to
    /// produce output.
    pub fn for_each<'env, T, Y>(&self, slice: &'env mut [Y], cb: T) -> Result<(), Error>
    where
        T: Fn(&mut [Y], usize, usize) + 'env + Send + Sync,
        Y: Send,
    {
        let parent_job = self.create_noop()?;
        const DEFAULT_GROUP_SIZE: usize = 64;
        let divisor = self.thread_data.len() - 1;
        let group_size = (slice.len() / divisor).max(DEFAULT_GROUP_SIZE);
        let mut offset = 0_usize;

        for work_slice in slice.chunks_mut(group_size) {
            let callback = &cb;
            let offset_copy = offset;
            offset += work_slice.len();
            let child_job = unsafe {
                // This is safe since we guarantee that in this function that we wait until
                // all children have finished executing and the closure's captures are
                // non-overlapping
                self.create_with_parent(&parent_job, move || {
                    callback(work_slice, offset_copy, offset_copy + work_slice.len());
                })?
            };
            unsafe {
                // See safety comment above
                self.run(&child_job)?
            };
        }
        unsafe {
            // See safety comment above
            self.run(&parent_job)?
        };
        self.wait(&parent_job)?;
        Ok(())
    }

    /// Same as [for_each()][JobScope::for_each()] but allows a result type to be returned for every individual group.
    pub fn for_each_with_result<'env, T, Y, Z>(
        &self,
        slice: &'env mut [Y],
        cb: T,
    ) -> Result<Vec<Z>, Error>
    where
        T: Fn(&[Y], usize, usize) -> Z + 'env + Send + Sync,
        Z: Sized + Default + Send,
        Y: Send,
    {
        let mut parent_job = self.create_noop().unwrap();

        const DEFAULT_GROUP_SIZE: usize = 64;
        let divisor = self.thread_data.len() - 1;
        let group_size = (slice.len() / divisor).max(DEFAULT_GROUP_SIZE);
        let group_count = (slice.len() as f64 / group_size as f64).ceil() as usize;
        let mut offset = 0_usize;
        let mut group_index = 0_usize;
        // While we could use Vec![z::default;group_count] here, this would not work for types that
        // are not cloneable
        let mut result_vec = Vec::<Z>::default();
        result_vec.resize_with(group_count, || Z::default());
        let mut result_iter = result_vec.iter_mut();
        for work_slice in slice.chunks_mut(group_size) {
            debug_assert!(group_index < group_count);
            let result_ref = result_iter
                .next()
                .expect("Result Vector calculation is incorrect");
            let offset_copy = offset;
            offset += work_slice.len();
            let callback = &cb;
            let child_job = unsafe {
                // This is safe since we guarantee that in this function that we wait until
                // all children have finished executing and the closure's captures are
                // non-overlapping
                self.create_with_parent(&mut parent_job, move || {
                    *result_ref = callback(work_slice, offset_copy, offset_copy + work_slice.len());
                })
            }?;
            unsafe {
                // See safety comment above
                self.run(&child_job)?;
            }
            group_index += 1;
        }
        unsafe {
            // See safety comment above
            self.run(&parent_job)?;
        }
        self.wait(&parent_job)?;
        Ok(result_vec)
    }
}

/// JobInstance provides a safe interface to schedule a Job and ensures waits until the job
/// has finished execution before it goes out of scope.
///
/// **NOTE** It is recommended you call one of the [wait][JobInstance::wait] or [wait_with][JobInstance::wait_with]
/// functions before the type goes out of scope and gets dropped. If we detect and error during the
/// `drop()` call, we will issue a panic.
#[derive(Debug)]
pub struct JobInstance<'scope, Job>
where
    Job: Sized + Send + FnOnce(),
{
    scope: &'scope JobScope,
    handle: ScopedJobHandle<'scope, Job>,
}

impl<'scope, Job> JobInstance<'scope, Job>
where
    Job: Sized + Send + FnOnce(),
{
    /// Attempt to create a new instance of job. The job handle will be allocated and scheduled
    /// to run immediately.
    pub fn create(
        job_scope: &'scope JobScope,
        job: Job,
    ) -> Result<JobInstance<'scope, Job>, Error> {
        match unsafe { job_scope.create(job) } {
            Ok(handle) => match unsafe { job_scope.run(&handle) } {
                Ok(()) => {
                    return Ok(JobInstance {
                        scope: job_scope,
                        handle,
                    })
                }
                Err(e) => return Err(e),
            },
            Err(e) => return Err(e),
        };
    }

    /// Blocking wait until the job has finished.
    pub fn wait(&self) -> Result<(), Error> {
        self.scope.wait(&self.handle)
    }

    /// Blocking wait until the job has finished, but before performing the blocking wait on the
    /// job handle, execute the provide closure once.
    pub fn wait_with<FN>(&self, cb: FN) -> Result<(), Error>
    where
        FN: FnOnce(),
    {
        (cb)();
        self.wait()
    }

    /// Check whether the job has finished, does not block.
    pub fn is_finished(&self) -> Result<bool, Error> {
        self.scope.is_finished(&self.handle)
    }
}

impl<'scope, Job: FnOnce() + Sized + Send> Drop for JobInstance<'scope, Job> {
    /// If we run into an error, this function will panic. It is recommend that you call
    /// one of the [wait][JobInstance::wait] or [wait_with][JobInstance::wait_with] functions on
    /// the [JobInstance][JobInstance] type instead.
    fn drop(&mut self) {
        if let Err(_) = self.scope.wait(&self.handle) {
            panic!("Failed to wait on job handle");
        }
    }
}

#[cfg(test)]
mod tests {
    use crate::{JobInstance, JobScope, JobSystem};
    use std::cell::RefCell;

    const THREAD_COUNT: usize = 4;
    const JOB_CAPACITY: usize = 1024;

    #[test]
    fn start_stop() {
        let r = JobSystem::new(THREAD_COUNT, JOB_CAPACITY);
        assert!(r.is_ok());
    }

    #[test]
    fn launch_jobs_check_overflow() {
        let job_sys = JobSystem::new(THREAD_COUNT, 8).expect("Failed to init job system");
        let job_scope = JobScope::new_from_system(&job_sys);
        let mut _counter = 0_usize;
        for _ in 0..64_u32 {
            const JOB_COUNT: usize = 7;
            let mut jobs = Vec::<_>::with_capacity(JOB_COUNT);

            for _ in 0..JOB_COUNT {
                let handle = job_scope.create_noop().unwrap();
                unsafe {
                    assert!(job_scope.run(&handle).is_ok());
                }
                jobs.push(handle);
            }
            for job in jobs {
                job_scope.wait(&job).expect("Wait Failed");
            }
            _counter += JOB_COUNT;
        }
    }

    #[test]
    #[cfg_attr(miri, ignore)]
    fn launch_jobs_from_job_threads() {
        // This functions fails on miri since with an error related to multiple write access to the
        // job pools job array. As long as the system doesn't overflow, this is safe since the
        // write will be independent and the array will not change size during execution. This
        // most likely related to the the mutable borrow from the tls key. And this only
        // happens when thw thread that is actively waiting on a job to finish schedules a job
        // during JobScope::wait() on the same thread.
        let job_sys = JobSystem::new(THREAD_COUNT, 128).expect("Failed to init job system");
        let job_scope = JobScope::new_from_system(&job_sys);
        const JOB_COUNT: u32 = 32;
        let mut jobs = Vec::<_>::with_capacity(JOB_COUNT as usize);
        for _ in 0..JOB_COUNT {
            let handle = unsafe {
                job_scope
                    .create(|| {
                        let thread_job_scope = JobScope::new_from_thread().unwrap();
                        let thread_job_handle = thread_job_scope.create_noop().unwrap();
                        thread_job_scope
                            .run(&thread_job_handle)
                            .expect("Failed to run job");
                    })
                    .unwrap()
            };
            unsafe { assert!(job_scope.run(&handle).is_ok()) };
            jobs.push(handle);
        }
        for job in jobs {
            job_scope.wait(&job).expect("Wait Failed");
        }
    }

    #[test]
    fn launch_jobs_with_ref() {
        let job_sys =
            JobSystem::new(THREAD_COUNT, JOB_CAPACITY).expect("Failed to init job system");
        let job_scope = JobScope::new_from_system(&job_sys);
        const JOB_COUNT: usize = 100;
        let mut jobs = Vec::<_>::with_capacity(JOB_COUNT);

        let val = std::sync::Arc::new(std::sync::atomic::AtomicU32::new(0));
        for _ in 0..JOB_COUNT {
            let val_copy = val.clone();
            let handle = unsafe {
                job_scope
                    .create(move || {
                        val_copy.fetch_add(10, std::sync::atomic::Ordering::Release);
                    })
                    .unwrap()
            };
            unsafe {
                assert!(job_scope.run(&handle).is_ok());
            }
            jobs.push(handle);
        }
        for job in jobs {
            job_scope.wait(&job).expect("Wait failed");
        }
        assert_eq!(
            val.load(std::sync::atomic::Ordering::Acquire),
            10 * JOB_COUNT as u32
        );
    }

    #[test]
    fn launch_jobs_chained() {
        let job_sys =
            JobSystem::new(THREAD_COUNT, JOB_CAPACITY).expect("Failed to init job system");
        const JOB_COUNT: usize = 20;
        let job_scope = JobScope::new_from_system(&job_sys);
        let mut jobs = Vec::<_>::with_capacity(JOB_COUNT);
        for i in 0..JOB_COUNT {
            let handle = unsafe {
                job_scope
                    .create(move || {
                        println!("Chained {:?}: Job {:02}", std::thread::current().id(), i);
                    })
                    .unwrap()
            };
            jobs.push(RefCell::new(handle));
            if i > 0 {
                let cur_handle = &jobs[i];
                let prev_handle = &jobs[i - 1];
                job_scope
                    .chain(&mut prev_handle.borrow_mut(), &cur_handle.borrow())
                    .expect("Failed to chain");
            }
        }
        unsafe {
            assert!(job_scope.run(&jobs.first().unwrap().borrow_mut()).is_ok());
        }
        job_scope
            .wait(&jobs.last().unwrap().borrow_mut())
            .expect("Wait failed");
    }

    #[test]
    fn parallel_for() {
        let job_sys =
            JobSystem::new(THREAD_COUNT, JOB_CAPACITY).expect("Failed to init job system");
        let job_scope = JobScope::new_from_system(&job_sys);
        let mut array = [0_u32; 100];
        let r = job_scope.for_each(&mut array, |slice: &mut [u32], start, _end| {
            for i in 0..slice.len() {
                slice[i] = (start + i) as u32;
            }
        });
        assert!(r.is_ok());
        for i in 0..array.len() {
            assert_eq!(array[i] as usize, i);
        }
    }

    #[test]
    fn launch_with_parent() {
        let job_sys =
            JobSystem::new(THREAD_COUNT, JOB_CAPACITY).expect("Failed to init job system");
        let job_scope = JobScope::new_from_system(&job_sys);
        const JOB_COUNT: usize = 20;
        let mut parent = job_scope.create_noop().unwrap();
        let mut jobs = Vec::<_>::with_capacity(JOB_COUNT);
        for _i in 1..JOB_COUNT {
            let handle = unsafe {
                job_scope
                    .create_with_parent(&mut parent, move || {
                        /*
                        println!(
                            "Hello from thread {:?}: Job {:02}",
                            std::thread::current().id(),
                            i
                        );*/
                    })
                    .unwrap()
            };
            jobs.push(handle);
        }
        unsafe {
            assert!(job_scope.run(&parent).is_ok());
        }
        for job in &jobs {
            unsafe {
                assert!(job_scope.run(job).is_ok());
            }
        }
        job_scope.wait(&parent).expect("Wait failed");
    }

    #[test]
    fn parallel_for_with_result() {
        let job_sys =
            JobSystem::new(THREAD_COUNT, JOB_CAPACITY).expect("Failed to init job system");
        let job_scope = JobScope::new_from_system(&job_sys);
        let mut array = [0_u32; 200];
        let r = job_scope.for_each_with_result(&mut array, |_slice: &[u32], start, end| -> u32 {
            (end - start) as u32
        });
        assert!(r.is_ok());
        let result: u32 = r.unwrap().iter().sum();
        assert_eq!(result, 200_u32);
    }

    /* This test is actually unsafe as it captures multiple references to the same variable. Atm
    I haven't figure out a way to make this safe, but this is here for reference.
    #[test]
    fn test_ref_creation() {
        //let mut jobs = Vec::<_>::with_capacity(200);
        {
            let job_sys = JobSystem::new(2, 128).unwrap();
            let js = JobScope::new_from_system(&job_sys);
            let mut v: u32 = 0;
            for _ in 0..100 {
                let x = &mut v;
                let handle = unsafe {
                    js
                        .create(move || {
                            *x += 10;
                        })
                        .unwrap()
                };
                js.run(&handle).unwrap();
                //jobs.push(handle);
            }
        }
    }*/

    #[test]
    fn job_instance_wait_drop() {
        let job_sys =
            JobSystem::new(THREAD_COUNT, JOB_CAPACITY).expect("Failed to init job system");
        let job_scope = JobScope::new_from_system(&job_sys);
        let _job_instance = JobInstance::create(&job_scope, || {
            println!("Hello from Job Instance (wait drop)");
        });
    }

    #[test]
    fn job_instance_wait_with() {
        let job_sys =
            JobSystem::new(THREAD_COUNT, JOB_CAPACITY).expect("Failed to init job system");
        let job_scope = JobScope::new_from_system(&job_sys);
        let job_instance = JobInstance::create(&job_scope, || {
            println!("Hello from Job Instance");
        })
        .unwrap();
        job_instance
            .wait_with(|| println!("Waiting on Job to Finish"))
            .expect("Failed to wait on job");
    }
}