1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#![forbid(warnings)]
#![forbid(unsafe_code)]

extern crate crossbeam_channel;
extern crate num_cpus;

use crossbeam_channel::bounded;
use std::marker::Send;
use std::marker::Sync;
use std::sync::Arc;
use std::thread;
use std::thread::JoinHandle;

pub struct ParallelIterator<O> {
    channel: crossbeam_channel::IntoIter<O>,
    threads: Vec<JoinHandle<()>>,
}

impl<O> ParallelIterator<O> {
    /// <PC> Producer Constructor. Enables usage of !Send and !Sync objects in the
    /// producer function.
    ///
    /// <XC> Xform Constructor. Enables usage of !Send and !Sync objects in the
    /// producer function. This can be useful for thread local caches and re-using
    /// large allocations between different tasks, packaged as a closure.
    ///
    /// <P> Producer iterator. Consumed internally by the transform/worker threads.
    ///
    /// <X> Xform closure. Applied to each job item produced by the producer
    /// iterator, in parallel by multiple worker threads.
    ///
    /// <I> Input item. Or task, produced by the producer iterator, transformed
    /// by the Xform closures.
    ///
    /// <O> Output item. Returned by the Xform closure(s) and by the
    /// Iterator::next method.
    ///
    pub fn new<PC, XC, P, X, I>(producer_ctor: PC, xform_ctor: XC) -> Self
    where
        PC: 'static + Send + FnOnce() -> P,
        XC: 'static + Send + Sync + Fn() -> X,
        X: FnMut(I) -> O,
        I: 'static + Send,
        O: 'static + Send,
        P: IntoIterator<Item = I>,
    {
        let mut threads = vec![];
        let jobs_rx = {
            let (tx, rx) = bounded(1);
            let join_handle = thread::spawn(move || {
                for e in producer_ctor() {
                    // Using expect here since this is most likely a fatal error
                    // and the panic should propagate to parent thread.
                    tx.send(e).expect("Producer thread failed to send job.");
                }
            });
            threads.push(join_handle);
            rx
        };
        let results_rx = {
            let (tx, rx) = bounded(1);
            let xform_ctor = Arc::new(xform_ctor);
            for _ in 0..num_cpus::get() {
                let tx = tx.clone();
                let jobs_rx = jobs_rx.clone();
                let xform_ctor = xform_ctor.clone();
                let join_handle = thread::spawn(move || {
                    let mut xform = xform_ctor();
                    for e in jobs_rx {
                        // Using expect here since this is most likely a fatal
                        // error and the panic should propagate to parent
                        // thread.
                        tx.send(xform(e))
                            .expect("Worker thread failed to send result.");
                    }
                });
                threads.push(join_handle);
            }
            rx
        };
        Self {
            channel: results_rx.into_iter(),
            threads,
        }
    }

    fn join_threads(&mut self) {
        while let Some(join_handle) = self.threads.pop() {
            // Using expect() here since trying to get the inner panic message
            // in a typesafe way is not possible?
            join_handle.join().expect("A child thread has paniced.");
        }
    }
}

impl<T> Iterator for ParallelIterator<T> {
    type Item = T;
    fn next(&mut self) -> Option<T> {
        let item = self.channel.next();
        if item.is_some() {
            return item;
        }
        self.join_threads();
        item // Should always be None here.
    }
}

#[cfg(test)]
mod tests {
    use super::ParallelIterator;

    /// Test helper
    fn do_some_work(i: u32) -> u32 {
        (0..1000).fold(i, |acc, x| acc.wrapping_add(x))
    }

    #[test]
    fn test_parallel_vs_sequential() {
        let prod_ctor = || (0u32..100);
        let xform_ctor = || do_some_work;
        let result_xform = |acc: u32, x| acc.wrapping_add(x);
        let prod = prod_ctor();
        let par_r =
            ParallelIterator::new(prod_ctor, xform_ctor).fold(0, &result_xform);
        let seq_r = prod.map(do_some_work).fold(0, &result_xform);
        assert_eq!(par_r, seq_r);
    }
}