ibverbs_rs/network/barrier/
mod.rs1use crate::channel::TransportError;
2use crate::ibverbs::error::IbvResult;
3use crate::ibverbs::protection_domain::ProtectionDomain;
4use crate::multi_channel::MultiChannel;
5use crate::multi_channel::PeerRemoteMemoryRegion;
6use crate::network::barrier::binary_tree::{BinaryTreeBarrier, PreparedBinaryTreeBarrier};
7use crate::network::barrier::dissemination::{DisseminationBarrier, PreparedDisseminationBarrier};
8use crate::network::barrier::linear::{LinearBarrier, PreparedLinearBarrier};
9use std::time::Duration;
10use thiserror::Error;
11
12mod binary_tree;
13mod dissemination;
14mod linear;
15mod memory;
16
17#[derive(Debug, Error)]
24pub enum BarrierError {
25 #[error("Barrier is poisoned from a previous error")]
27 Poisoned,
28 #[error("Self not in the barrier group")]
30 SelfNotInGroup,
31 #[error("Peers not in ascending order in barrier group")]
33 UnorderedPeers,
34 #[error("Duplicate peers in barrier group")]
36 DuplicatePeers,
37 #[error("Barrier timeout")]
39 Timeout,
40 #[error("Transport error: {0}")]
42 TransportError(#[from] TransportError),
43}
44
45#[derive(Debug, Copy, Clone)]
55pub enum BarrierAlgorithm {
56 Centralized,
60 BinaryTree,
65 Dissemination,
71}
72
73impl BarrierAlgorithm {
74 pub fn instance(
76 &self,
77 pd: &ProtectionDomain,
78 rank: usize,
79 world_size: usize,
80 ) -> IbvResult<PreparedBarrier> {
81 match self {
82 BarrierAlgorithm::Centralized => Barrier::new_centralized(pd, rank, world_size),
83 BarrierAlgorithm::BinaryTree => Barrier::new_binary_tree(pd, rank, world_size),
84 BarrierAlgorithm::Dissemination => Barrier::new_dissemination(pd, rank, world_size),
85 }
86 }
87}
88
89#[derive(Debug)]
110pub enum Barrier {
111 Centralized(LinearBarrier),
113 BinaryTree(BinaryTreeBarrier),
115 Dissemination(DisseminationBarrier),
117}
118
119impl Barrier {
120 fn new_centralized(
121 pd: &ProtectionDomain,
122 rank: usize,
123 world_size: usize,
124 ) -> IbvResult<PreparedBarrier> {
125 Ok(PreparedBarrier::Centralized(PreparedLinearBarrier::new(
126 pd, rank, world_size,
127 )?))
128 }
129
130 fn new_binary_tree(
131 pd: &ProtectionDomain,
132 rank: usize,
133 world_size: usize,
134 ) -> IbvResult<PreparedBarrier> {
135 Ok(PreparedBarrier::BinaryTree(PreparedBinaryTreeBarrier::new(
136 pd, rank, world_size,
137 )?))
138 }
139
140 fn new_dissemination(
141 pd: &ProtectionDomain,
142 rank: usize,
143 world_size: usize,
144 ) -> IbvResult<PreparedBarrier> {
145 Ok(PreparedBarrier::Dissemination(
146 PreparedDisseminationBarrier::new(pd, rank, world_size)?,
147 ))
148 }
149
150 pub fn barrier(
152 &mut self,
153 multi_channel: &mut MultiChannel,
154 peers: &[usize],
155 timeout: Duration,
156 ) -> Result<(), BarrierError> {
157 match self {
158 Barrier::Centralized(b) => b.barrier(multi_channel, peers, timeout),
159 Barrier::BinaryTree(b) => b.barrier(multi_channel, peers, timeout),
160 Barrier::Dissemination(b) => b.barrier(multi_channel, peers, timeout),
161 }
162 }
163
164 pub fn barrier_unchecked(
166 &mut self,
167 multi_channel: &mut MultiChannel,
168 peers: &[usize],
169 timeout: Duration,
170 ) -> Result<(), BarrierError> {
171 match self {
172 Barrier::Centralized(b) => b.barrier_unchecked(multi_channel, peers, timeout),
173 Barrier::BinaryTree(b) => b.barrier_unchecked(multi_channel, peers, timeout),
174 Barrier::Dissemination(b) => b.barrier_unchecked(multi_channel, peers, timeout),
175 }
176 }
177}
178
179#[derive(Debug)]
184pub enum PreparedBarrier {
185 Centralized(PreparedLinearBarrier),
187 BinaryTree(PreparedBinaryTreeBarrier),
189 Dissemination(PreparedDisseminationBarrier),
191}
192
193pub(super) fn validate_peer_list(peers: &[usize]) -> Result<(), BarrierError> {
197 if !peers.is_sorted() {
198 return Err(BarrierError::UnorderedPeers);
199 }
200 if peers.windows(2).any(|w| w[0] == w[1]) {
201 return Err(BarrierError::DuplicatePeers);
202 }
203 Ok(())
204}
205
206impl PreparedBarrier {
207 pub fn remote_mr(&self) -> PeerRemoteMemoryRegion {
209 match self {
210 PreparedBarrier::Centralized(p) => p.remote(),
211 PreparedBarrier::BinaryTree(p) => p.remote(),
212 PreparedBarrier::Dissemination(p) => p.remote(),
213 }
214 }
215
216 pub fn link_remote(self, remote_mrs: Box<[PeerRemoteMemoryRegion]>) -> Barrier {
218 match self {
219 PreparedBarrier::Centralized(p) => Barrier::Centralized(p.link_remote(remote_mrs)),
220 PreparedBarrier::BinaryTree(p) => Barrier::BinaryTree(p.link_remote(remote_mrs)),
221 PreparedBarrier::Dissemination(p) => Barrier::Dissemination(p.link_remote(remote_mrs)),
222 }
223 }
224}
225
226#[cfg(test)]
227mod tests {
228 use super::*;
229
230 #[test]
231 fn valid_sorted_unique_peers() {
232 assert!(validate_peer_list(&[0, 1, 2, 3]).is_ok());
233 }
234
235 #[test]
236 fn empty_peer_list_is_valid() {
237 assert!(validate_peer_list(&[]).is_ok());
238 }
239
240 #[test]
241 fn single_peer_is_valid() {
242 assert!(validate_peer_list(&[5]).is_ok());
243 }
244
245 #[test]
246 fn non_contiguous_ranks_are_valid() {
247 assert!(validate_peer_list(&[0, 3, 7, 100]).is_ok());
248 }
249
250 #[test]
251 fn unsorted_peers_rejected() {
252 let err = validate_peer_list(&[2, 1, 3]).unwrap_err();
253 assert!(matches!(err, BarrierError::UnorderedPeers));
254 }
255
256 #[test]
257 fn duplicate_peers_rejected() {
258 let err = validate_peer_list(&[0, 1, 1, 2]).unwrap_err();
259 assert!(matches!(err, BarrierError::DuplicatePeers));
260 }
261
262 #[test]
263 fn unsorted_takes_precedence_over_duplicate() {
264 let err = validate_peer_list(&[2, 1, 1]).unwrap_err();
266 assert!(matches!(err, BarrierError::UnorderedPeers));
267 }
268
269 #[test]
270 fn all_same_ranks_detected_as_unsorted_or_duplicate() {
271 let err = validate_peer_list(&[3, 3, 3]).unwrap_err();
273 assert!(matches!(err, BarrierError::DuplicatePeers));
274 }
275}