1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
//! Work-stealing queue for efficient multi-threaded data loading
//!
//! This module provides a work-stealing queue implementation that allows
//! worker threads to steal work from other threads when they run out of tasks,
//! leading to better load balancing and higher throughput.
use std::collections::VecDeque;
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
use std::sync::{Arc, Condvar, Mutex};
/// A work-stealing queue that allows multiple workers to efficiently share work.
/// Workers can push/pop from their own queue and steal from others when idle.
pub struct WorkStealingQueue<T> {
/// Local queues for each worker thread
worker_queues: Vec<Arc<Mutex<VecDeque<T>>>>,
/// Number of worker threads
num_workers: usize,
/// Atomic counter for round-robin work distribution
next_worker: AtomicUsize,
/// Signal for shutdown
shutdown: Arc<AtomicBool>,
/// Condition variable for blocking when no work is available
work_available: Arc<(Mutex<bool>, Condvar)>,
/// Total number of tasks in the system
total_tasks: AtomicUsize,
}
impl<T> WorkStealingQueue<T> {
/// Create a new work-stealing queue with the specified number of workers
pub fn new(num_workers: usize) -> Self {
let mut worker_queues = Vec::with_capacity(num_workers);
for _ in 0..num_workers {
worker_queues.push(Arc::new(Mutex::new(VecDeque::new())));
}
Self {
worker_queues,
num_workers,
next_worker: AtomicUsize::new(0),
shutdown: Arc::new(AtomicBool::new(false)),
work_available: Arc::new((Mutex::new(false), Condvar::new())),
total_tasks: AtomicUsize::new(0),
}
}
/// Push work to the queue. Uses round-robin distribution to balance load.
pub fn push(&self, item: T) {
let worker_id = self.next_worker.fetch_add(1, Ordering::Relaxed) % self.num_workers;
{
let mut queue = self.worker_queues[worker_id]
.lock()
.expect("lock should not be poisoned");
queue.push_back(item);
}
// Update task count and notify waiting workers
self.total_tasks.fetch_add(1, Ordering::Relaxed);
let (lock, cvar) = &*self.work_available;
{
let mut available = lock.lock().expect("lock should not be poisoned");
*available = true;
}
cvar.notify_all();
}
/// Pop work from a specific worker's queue (used by the worker itself)
pub fn pop(&self, worker_id: usize) -> Option<T> {
if worker_id >= self.num_workers {
return None;
}
let mut queue = self.worker_queues[worker_id]
.lock()
.expect("lock should not be poisoned");
let item = queue.pop_front();
if item.is_some() {
self.total_tasks.fetch_sub(1, Ordering::Relaxed);
}
item
}
/// Steal work from other workers when this worker has no work
pub fn steal(&self, worker_id: usize) -> Option<T> {
if worker_id >= self.num_workers {
return None;
}
// Try to steal from other workers, starting from a random offset
let start_offset = (worker_id + 1) % self.num_workers;
for i in 0..self.num_workers - 1 {
let target_worker = (start_offset + i) % self.num_workers;
if target_worker == worker_id {
continue; // Skip self
}
let mut queue = self.worker_queues[target_worker]
.lock()
.expect("lock should not be poisoned");
// Steal from the back to minimize contention with the owner
if let Some(item) = queue.pop_back() {
self.total_tasks.fetch_sub(1, Ordering::Relaxed);
return Some(item);
}
}
None
}
/// Try to get work for a specific worker (pop from own queue or steal from others)
pub fn get_work(&self, worker_id: usize) -> Option<T> {
// First try to pop from own queue
if let Some(item) = self.pop(worker_id) {
return Some(item);
}
// If no local work, try to steal from others
self.steal(worker_id)
}
/// Wait for work to become available (blocking operation)
pub fn wait_for_work(&self, worker_id: usize, timeout_ms: Option<u64>) -> Option<T> {
// First check if work is immediately available
if let Some(item) = self.get_work(worker_id) {
return Some(item);
}
// If shutdown is signaled, return immediately
if self.shutdown.load(Ordering::Relaxed) {
return None;
}
// Wait for work to become available
let (lock, cvar) = &*self.work_available;
let mut available = lock.lock().expect("lock should not be poisoned");
loop {
// Check for shutdown signal
if self.shutdown.load(Ordering::Relaxed) {
return None;
}
// Try to get work again
drop(available); // Release lock temporarily
if let Some(item) = self.get_work(worker_id) {
return Some(item);
}
available = lock.lock().expect("lock should not be poisoned");
// If still no work and no tasks in the system, we're done
if self.total_tasks.load(Ordering::Relaxed) == 0 {
*available = false;
return None;
}
// Wait for notification or timeout
available = if let Some(timeout) = timeout_ms {
let (guard, result) = cvar
.wait_timeout(available, std::time::Duration::from_millis(timeout))
.expect("condvar wait_timeout should not fail");
if result.timed_out() {
return None;
}
guard
} else {
cvar.wait(available).expect("condvar wait should not fail")
};
}
}
/// Signal shutdown to all workers
pub fn shutdown(&self) {
self.shutdown.store(true, Ordering::Relaxed);
let (lock, cvar) = &*self.work_available;
{
let mut available = lock.lock().expect("lock should not be poisoned");
*available = true;
}
cvar.notify_all();
}
/// Check if the queue is empty (all workers have no work)
pub fn is_empty(&self) -> bool {
self.total_tasks.load(Ordering::Relaxed) == 0
}
/// Get the total number of tasks currently in the system
pub fn total_tasks(&self) -> usize {
self.total_tasks.load(Ordering::Relaxed)
}
/// Get statistics about each worker's queue length
pub fn queue_lengths(&self) -> Vec<usize> {
self.worker_queues
.iter()
.map(|queue| queue.lock().expect("lock should not be poisoned").len())
.collect()
}
/// Get the number of workers
pub fn num_workers(&self) -> usize {
self.num_workers
}
}
impl<T> Clone for WorkStealingQueue<T> {
fn clone(&self) -> Self {
Self {
worker_queues: self.worker_queues.clone(),
num_workers: self.num_workers,
next_worker: AtomicUsize::new(self.next_worker.load(Ordering::Relaxed)),
shutdown: Arc::clone(&self.shutdown),
work_available: Arc::clone(&self.work_available),
total_tasks: AtomicUsize::new(self.total_tasks.load(Ordering::Relaxed)),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::thread;
use std::time::Duration;
#[test]
fn test_basic_push_pop() {
let queue = WorkStealingQueue::new(2);
queue.push(1);
queue.push(2);
assert_eq!(queue.total_tasks(), 2);
assert_eq!(queue.pop(0), Some(1));
assert_eq!(queue.total_tasks(), 1);
assert_eq!(queue.pop(1), Some(2));
assert_eq!(queue.total_tasks(), 0);
assert!(queue.is_empty());
}
#[test]
fn test_work_stealing() {
let queue = WorkStealingQueue::new(3);
// Push work to worker 0
queue.push(1);
queue.push(2);
queue.push(3);
// Worker 1 should be able to steal work
assert!(queue.steal(1).is_some());
assert_eq!(queue.total_tasks(), 2);
}
#[test]
fn test_get_work() {
let queue = WorkStealingQueue::new(2);
queue.push(1);
queue.push(2);
// Worker 0 should get work (either from own queue or by stealing)
assert!(queue.get_work(0).is_some());
assert!(queue.get_work(1).is_some());
assert!(queue.get_work(0).is_none());
}
#[test]
fn test_concurrent_access() {
let queue = Arc::new(WorkStealingQueue::new(4));
// Add work first to ensure it's available when workers start
for i in 0..100 {
queue.push(i);
}
let mut handles = Vec::new();
// Spawn workers after work is added
for worker_id in 0..4 {
let queue_clone = Arc::clone(&queue);
let handle = thread::spawn(move || {
let mut processed = 0;
// Use wait_for_work to handle the case where work might not be immediately available
while let Some(_item) = queue_clone.wait_for_work(worker_id, Some(50)) {
processed += 1;
thread::sleep(Duration::from_millis(1));
}
processed
});
handles.push(handle);
}
// Give workers time to process all work
thread::sleep(Duration::from_millis(200));
queue.shutdown();
// Collect results
let total_processed: usize = handles
.into_iter()
.map(|h| h.join().expect("thread join should succeed"))
.sum();
assert_eq!(total_processed, 100);
}
#[test]
fn test_queue_lengths() {
let queue = WorkStealingQueue::new(3);
// Add items and check distribution
for i in 0..6 {
queue.push(i);
}
let lengths = queue.queue_lengths();
assert_eq!(lengths.len(), 3);
assert_eq!(lengths.iter().sum::<usize>(), 6);
}
}