oxiphysics_gpu/parallel/
types.rs

1//! Auto-generated module
2//!
3//! 🤖 Generated with [SplitRS](https://github.com/cool-japan/splitrs)
4
5#[allow(unused_imports)]
6use super::functions::*;
7#[allow(unused_imports)]
8use super::functions_2::*;
9
10/// Result of load balancing: a set of ranges assigned to workers.
11#[allow(dead_code)]
12pub struct LoadBalancePlan {
13    /// Ranges for each worker.
14    pub ranges: Vec<std::ops::Range<usize>>,
15    /// Total weight assigned to each worker (for Weighted strategy).
16    pub weights: Vec<f64>,
17}
18impl LoadBalancePlan {
19    /// Number of workers in this plan.
20    #[allow(dead_code)]
21    pub fn num_workers(&self) -> usize {
22        self.ranges.len()
23    }
24    /// Maximum weight across workers (a measure of imbalance).
25    #[allow(dead_code)]
26    pub fn max_weight(&self) -> f64 {
27        self.weights
28            .iter()
29            .copied()
30            .fold(f64::NEG_INFINITY, f64::max)
31    }
32    /// Imbalance ratio: max_weight / avg_weight. 1.0 is perfect balance.
33    #[allow(dead_code)]
34    pub fn imbalance_ratio(&self) -> f64 {
35        if self.weights.is_empty() {
36            return 1.0;
37        }
38        let total: f64 = self.weights.iter().sum();
39        let avg = total / self.weights.len() as f64;
40        if avg < 1e-15 {
41            return 1.0;
42        }
43        self.max_weight() / avg
44    }
45}
46/// A simple work-stealing queue for task-parallel scheduling.
47///
48/// Internally backed by a `Vec`T` with a front/back cursor pair.  "Stealing"
49/// takes from the front (like a deque), while the owner pushes/pops from the
50/// back.
51#[allow(dead_code)]
52pub struct WorkStealQueue<T> {
53    pub(super) items: std::collections::VecDeque<T>,
54}
55#[allow(dead_code)]
56impl<T: Send> WorkStealQueue<T> {
57    /// Create an empty work-steal queue.
58    pub fn new() -> Self {
59        Self {
60            items: std::collections::VecDeque::new(),
61        }
62    }
63    /// Push a task onto the owner end (back).
64    pub fn push(&mut self, task: T) {
65        self.items.push_back(task);
66    }
67    /// Pop a task from the owner end (back).  Returns `None` if empty.
68    pub fn pop(&mut self) -> Option<T> {
69        self.items.pop_back()
70    }
71    /// Steal a task from the thief end (front).  Returns `None` if empty.
72    pub fn steal(&mut self) -> Option<T> {
73        self.items.pop_front()
74    }
75    /// Number of pending tasks.
76    pub fn len(&self) -> usize {
77        self.items.len()
78    }
79    /// Whether the queue is empty.
80    pub fn is_empty(&self) -> bool {
81        self.items.is_empty()
82    }
83}
84/// Configuration for choosing optimal work group sizes.
85///
86/// Models GPU-like work group sizing where the total work is divided into
87/// groups of a fixed size, potentially with padding in the last group.
88#[allow(dead_code)]
89pub struct WorkGroupConfig {
90    /// Preferred work group size (e.g. 64, 128, 256).
91    pub preferred_size: usize,
92    /// Maximum work group size supported.
93    pub max_size: usize,
94    /// Minimum work group size (avoid groups too small for efficiency).
95    pub min_size: usize,
96}
97impl WorkGroupConfig {
98    /// Create a new config with preferred group size.
99    #[allow(dead_code)]
100    pub fn new(preferred_size: usize) -> Self {
101        Self {
102            preferred_size: preferred_size.max(1),
103            max_size: 1024,
104            min_size: 32,
105        }
106    }
107    /// Create a default config suitable for CPU-side Rayon parallelism.
108    #[allow(dead_code)]
109    pub fn cpu_default() -> Self {
110        let threads = rayon::current_num_threads().max(1);
111        Self {
112            preferred_size: 64,
113            max_size: 1024,
114            min_size: threads,
115        }
116    }
117    /// Compute the optimal work group size for `total` items.
118    ///
119    /// Returns a size in `\[min_size, max_size\]` that balances occupancy.
120    /// Prefers `preferred_size` but adjusts if `total` is small.
121    #[allow(dead_code)]
122    pub fn optimal_size(&self, total: usize) -> usize {
123        if total == 0 {
124            return self.min_size;
125        }
126        if total <= self.preferred_size {
127            return total.max(self.min_size).min(self.max_size);
128        }
129        let preferred_groups = total.div_ceil(self.preferred_size);
130        let preferred_waste = preferred_groups * self.preferred_size - total;
131        let preferred_waste_ratio = preferred_waste as f64 / total as f64;
132        if preferred_waste_ratio < 0.25 {
133            return self.preferred_size;
134        }
135        let mut best_size = self.preferred_size;
136        let mut best_waste = preferred_waste;
137        for candidate in (self.min_size..=self.max_size).step_by(self.min_size) {
138            let groups = total.div_ceil(candidate);
139            let waste = groups * candidate - total;
140            if waste < best_waste {
141                best_waste = waste;
142                best_size = candidate;
143            }
144        }
145        best_size
146    }
147    /// Compute the number of work groups needed for `total` items.
148    #[allow(dead_code)]
149    pub fn num_groups(&self, total: usize) -> usize {
150        let size = self.optimal_size(total);
151        total.div_ceil(size)
152    }
153    /// Return ranges for each work group covering `0..total`.
154    #[allow(dead_code)]
155    pub fn group_ranges(&self, total: usize) -> Vec<std::ops::Range<usize>> {
156        let size = self.optimal_size(total);
157        (0..total)
158            .step_by(size.max(1))
159            .map(|start| start..(start + size).min(total))
160            .collect()
161    }
162}
163/// Load balancing strategy for distributing work across threads.
164#[derive(Debug, Clone, Copy, PartialEq)]
165#[allow(dead_code)]
166pub enum LoadBalanceStrategy {
167    /// Static: divide work evenly by index count.
168    Static,
169    /// Weighted: divide work so each thread gets roughly equal weight.
170    Weighted,
171    /// Guided: start with large chunks, decrease chunk size as work progresses.
172    Guided,
173}
174/// Splits `n` particle indices into chunks suitable for parallel work.
175///
176/// `chunk_size` is chosen so that each Rayon worker thread gets at least one
177/// chunk.
178pub struct WorkChunker {
179    /// Total number of items.
180    pub n: usize,
181    /// Size of each chunk.
182    pub chunk_size: usize,
183}
184impl WorkChunker {
185    /// Create a new `WorkChunker` for `n` items.
186    ///
187    /// `chunk_size` is set to `n / rayon::current_num_threads() + 1`.
188    pub fn new(n: usize) -> Self {
189        let threads = rayon::current_num_threads().max(1);
190        let chunk_size = n / threads + 1;
191        Self { n, chunk_size }
192    }
193    /// Return contiguous index ranges covering `0..n` without gaps or overlaps.
194    pub fn chunks(&self) -> Vec<std::ops::Range<usize>> {
195        let cs = self.chunk_size.max(1);
196        (0..self.n)
197            .step_by(cs)
198            .map(|start| start..(start + cs).min(self.n))
199            .collect()
200    }
201}
oxiphysics_gpu/parallel/types.rs

oxiphysics_gpu/parallel/
types.rs