oxiphysics_gpu/parallel/
types.rs1#[allow(unused_imports)]
6use super::functions::*;
7#[allow(unused_imports)]
8use super::functions_2::*;
9
10#[allow(dead_code)]
12pub struct LoadBalancePlan {
13 pub ranges: Vec<std::ops::Range<usize>>,
15 pub weights: Vec<f64>,
17}
18impl LoadBalancePlan {
19 #[allow(dead_code)]
21 pub fn num_workers(&self) -> usize {
22 self.ranges.len()
23 }
24 #[allow(dead_code)]
26 pub fn max_weight(&self) -> f64 {
27 self.weights
28 .iter()
29 .copied()
30 .fold(f64::NEG_INFINITY, f64::max)
31 }
32 #[allow(dead_code)]
34 pub fn imbalance_ratio(&self) -> f64 {
35 if self.weights.is_empty() {
36 return 1.0;
37 }
38 let total: f64 = self.weights.iter().sum();
39 let avg = total / self.weights.len() as f64;
40 if avg < 1e-15 {
41 return 1.0;
42 }
43 self.max_weight() / avg
44 }
45}
46#[allow(dead_code)]
52pub struct WorkStealQueue<T> {
53 pub(super) items: std::collections::VecDeque<T>,
54}
55#[allow(dead_code)]
56impl<T: Send> WorkStealQueue<T> {
57 pub fn new() -> Self {
59 Self {
60 items: std::collections::VecDeque::new(),
61 }
62 }
63 pub fn push(&mut self, task: T) {
65 self.items.push_back(task);
66 }
67 pub fn pop(&mut self) -> Option<T> {
69 self.items.pop_back()
70 }
71 pub fn steal(&mut self) -> Option<T> {
73 self.items.pop_front()
74 }
75 pub fn len(&self) -> usize {
77 self.items.len()
78 }
79 pub fn is_empty(&self) -> bool {
81 self.items.is_empty()
82 }
83}
84#[allow(dead_code)]
89pub struct WorkGroupConfig {
90 pub preferred_size: usize,
92 pub max_size: usize,
94 pub min_size: usize,
96}
97impl WorkGroupConfig {
98 #[allow(dead_code)]
100 pub fn new(preferred_size: usize) -> Self {
101 Self {
102 preferred_size: preferred_size.max(1),
103 max_size: 1024,
104 min_size: 32,
105 }
106 }
107 #[allow(dead_code)]
109 pub fn cpu_default() -> Self {
110 let threads = rayon::current_num_threads().max(1);
111 Self {
112 preferred_size: 64,
113 max_size: 1024,
114 min_size: threads,
115 }
116 }
117 #[allow(dead_code)]
122 pub fn optimal_size(&self, total: usize) -> usize {
123 if total == 0 {
124 return self.min_size;
125 }
126 if total <= self.preferred_size {
127 return total.max(self.min_size).min(self.max_size);
128 }
129 let preferred_groups = total.div_ceil(self.preferred_size);
130 let preferred_waste = preferred_groups * self.preferred_size - total;
131 let preferred_waste_ratio = preferred_waste as f64 / total as f64;
132 if preferred_waste_ratio < 0.25 {
133 return self.preferred_size;
134 }
135 let mut best_size = self.preferred_size;
136 let mut best_waste = preferred_waste;
137 for candidate in (self.min_size..=self.max_size).step_by(self.min_size) {
138 let groups = total.div_ceil(candidate);
139 let waste = groups * candidate - total;
140 if waste < best_waste {
141 best_waste = waste;
142 best_size = candidate;
143 }
144 }
145 best_size
146 }
147 #[allow(dead_code)]
149 pub fn num_groups(&self, total: usize) -> usize {
150 let size = self.optimal_size(total);
151 total.div_ceil(size)
152 }
153 #[allow(dead_code)]
155 pub fn group_ranges(&self, total: usize) -> Vec<std::ops::Range<usize>> {
156 let size = self.optimal_size(total);
157 (0..total)
158 .step_by(size.max(1))
159 .map(|start| start..(start + size).min(total))
160 .collect()
161 }
162}
163#[derive(Debug, Clone, Copy, PartialEq)]
165#[allow(dead_code)]
166pub enum LoadBalanceStrategy {
167 Static,
169 Weighted,
171 Guided,
173}
174pub struct WorkChunker {
179 pub n: usize,
181 pub chunk_size: usize,
183}
184impl WorkChunker {
185 pub fn new(n: usize) -> Self {
189 let threads = rayon::current_num_threads().max(1);
190 let chunk_size = n / threads + 1;
191 Self { n, chunk_size }
192 }
193 pub fn chunks(&self) -> Vec<std::ops::Range<usize>> {
195 let cs = self.chunk_size.max(1);
196 (0..self.n)
197 .step_by(cs)
198 .map(|start| start..(start + cs).min(self.n))
199 .collect()
200 }
201}