1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
/*!
Crate `ordered-parallel-iterator` provides an iterator over task results which performs tasks in parallel returning completed tasks in order of source range iterator. It can be useful if you need to process some data in parallel but need to have results in the order of appearance (FIFO).

# Installation

Add following dependency to your `Cargo.toml`:

```toml,ignore
[dependencies]
ordered-parallel-iterator = "0.1"
```

# Usage

```rust
use ordered_parallel_iterator::OrderedParallelIterator;

fn run_me(x: usize) -> usize {
    x + 1
}

fn main() {
    for i in OrderedParallelIterator::new(|| 0..10, || run_me) {
        println!("Result from iterator: {}", i);
    }
}
```

In this example each `run_me` call will happen in own thread, but results will be returned sequentially as fast as first will be finished. Count of pending tasks running in parallel bind to count of CPU cores.
*/

use crossbeam::channel::bounded;
use crossbeam::deque::{Steal, Stealer, Worker};
use std::sync::atomic::{AtomicBool, Ordering};

use std::marker::Send;
use std::marker::Sync;
use std::sync::Arc;
use std::thread;
use std::thread::JoinHandle;

use std_semaphore::Semaphore;

pub struct OrderedParallelIterator<O> {
    scheduler_thread: Option<JoinHandle<()>>,
    tasks: Stealer<JoinHandle<O>>,
    semaphore: Arc<Semaphore>,
    running: Arc<AtomicBool>,
}

impl<O> OrderedParallelIterator<O> {
    pub fn new<PC, XC, P, X, I>(producer_ctor: PC, xform_ctor: XC) -> Self
    where
        PC: 'static + Send + FnOnce() -> P,
        XC: 'static + Send + Sync + Fn() -> X,
        X: FnMut(I) -> O,
        I: 'static + Send,
        O: 'static + Send,
        P: IntoIterator<Item = I>,
    {
        let semaphore = Arc::new(Semaphore::new(num_cpus::get() as isize));
        let (tx, rx) = bounded(num_cpus::get());
        let semaphore_copy = semaphore.clone();
        let xform_ctor = Arc::new(xform_ctor);
        let running_flag = Arc::new(AtomicBool::new(true));
        let running = running_flag.clone();
        let scheduler_thread = Some(thread::spawn(move || {
            let tasks = Worker::new_fifo();
            let mut first = true;
            for e in producer_ctor() {
                semaphore_copy.acquire();
                let xform_ctor = xform_ctor.clone();
                let worker_thread = thread::spawn(move || {
                    let mut xform = xform_ctor();
                    xform(e)
                });
                tasks.push(worker_thread);
                if first {
                    let stealer = tasks.stealer();
                    tx.send(stealer).unwrap();
                    first = false;
                }
            }
            running_flag.store(false, Ordering::Relaxed);
            if first {
                // means empty range
                let stealer = tasks.stealer();
                tx.send(stealer).unwrap();
            }
        }));

        let tasks = rx.recv().unwrap();

        Self {
            scheduler_thread,
            tasks,
            semaphore,
            running,
        }
    }
}

impl<T> Iterator for OrderedParallelIterator<T> {
    type Item = T;

    fn next(&mut self) -> Option<T> {
        self.semaphore.release();
        loop {
            let item = self.tasks.steal();
            match item {
                Steal::Success(x) => {
                    return Some(x.join().expect("Cannot get data from thread"));
                }
                Steal::Empty => {
                    if !self.running.load(Ordering::Relaxed) {
                        break;
                    }
                }
                Steal::Retry => (),
            }
        }

        self.scheduler_thread
            .take()
            .unwrap()
            .join()
            .expect("The scheduler thread has paniced.");

        None
    }
}

#[cfg(test)]
mod tests {

    fn run_me(x: usize) -> usize {
        x + 1
    }

    #[test]
    fn it_works() {
        let mut iterator = crate::OrderedParallelIterator::new(|| 0..10, || run_me);
        for i in 0..10 {
            assert_eq!(iterator.next(), Some(i + 1));
        }
    }

    #[test]
    fn empty() {
        for _ in crate::OrderedParallelIterator::new(|| 0..0, || run_me) {
            panic!("Must not reach this point");
        }
    }

}