ibverbs_rs/multi_channel/mod.rs
1//! Multi-peer RDMA channels — indexed connections sharing a protection domain, with scatter/gather and one-sided operation support.
2//!
3//! A [`MultiChannel`] holds one [`Channel`] per peer and routes each operation to the
4//! correct channel based on the peer index embedded in the work request. All channels
5//! share a single [`ProtectionDomain`], so memory regions registered once can be used
6//! with any peer without re-registration.
7//!
8//! # Connection lifecycle
9//!
10//! Construction mirrors [`Channel`] but establishes a separate queue
11//! pair for each peer instead of a single one.
12//!
13//! 1. **Build** — call [`MultiChannel::builder`] (or
14//! [`ProtectionDomain::create_multi_channel`]) and set the number of peers with
15//! [`num_channels`](MultiChannelBuilder::num_channels). [`build`](MultiChannelBuilder::build)
16//! returns a [`PreparedMultiChannel`].
17//! 2. **Handshake** — collect the local [`endpoints`](PreparedMultiChannel::endpoints),
18//! exchange them with every peer out-of-band, then call
19//! [`PreparedMultiChannel::handshake`] with the full list of remote endpoints to
20//! obtain the connected [`MultiChannel`].
21//!
22//! # Peer-indexed work requests
23//!
24//! Every operation takes a peer-aware wrapper that pairs a standard work request with
25//! a target (or source) peer index:
26//!
27//! * [`PeerSendWorkRequest`] / [`PeerReceiveWorkRequest`] — two-sided messaging.
28//! * [`PeerWriteWorkRequest`] / [`PeerReadWorkRequest`] — one-sided RDMA.
29//!
30//! # Posting operations
31//!
32//! The same three control levels as [`channel`](crate::channel) are available, extended
33//! to operate over multiple peers at once:
34//!
35//! * **Blocking** — [`scatter_send`](MultiChannel::scatter_send),
36//! [`scatter_write`](MultiChannel::scatter_write),
37//! [`gather_receive`](MultiChannel::gather_receive),
38//! [`gather_read`](MultiChannel::gather_read) post an iterator of per-peer work
39//! requests and block until all complete.
40//! [`multicast_send`](MultiChannel::multicast_send) fans the same send out to an
41//! arbitrary set of peers.
42//! * **Scoped** — [`MultiChannel::scope`] and [`MultiChannel::manual_scope`] open a
43//! [`PollingScope`](crate::channel::PollingScope) whose `post_scatter_*` /
44//! `post_gather_*` / `post_multicast_send` methods return
45//! [`ScopedPendingWork`](crate::channel::ScopedPendingWork) handles for fine-grained
46//! polling. All outstanding work is automatically polled when the scope exits.
47//! * **Unpolled** — `unsafe` `scatter_*_unpolled` / `gather_*_unpolled` variants
48//! return raw [`PendingWork`](crate::channel::PendingWork) handles for maximum
49//! control. Prefer the scoped API unless you need direct access to these primitives.
50//!
51//! # Example: scatter/gather across peers
52//!
53//! ```no_run
54//! use ibverbs_rs::ibverbs;
55//! use ibverbs_rs::channel::{ScopeError, TransportError};
56//! use ibverbs_rs::multi_channel::{MultiChannel, PeerSendWorkRequest, PeerReceiveWorkRequest};
57//!
58//! let ctx = ibverbs::open_device("mlx5_0")?;
59//! let pd = ctx.allocate_pd()?;
60//!
61//! let prepared = MultiChannel::builder().pd(&pd).num_channels(2).build()?;
62//! let endpoints = prepared.endpoints();
63//! let mut mc = prepared.handshake(endpoints)?;
64//!
65//! let mut buf = [0u8; 4];
66//! let mr = pd.register_local_mr_slice(&buf)?;
67//!
68//! let (tx, rx) = buf.split_at_mut(2);
69//!
70//! // Pre-build SGE lists (they must outlive the work requests)
71//! let send_sges: Vec<Vec<_>> = tx.chunks(1)
72//! .map(|chunk| vec![mr.gather_element(chunk)])
73//! .collect();
74//! let mut recv_sges: Vec<Vec<_>> = rx.chunks_mut(1)
75//! .map(|chunk| vec![mr.scatter_element(chunk)])
76//! .collect();
77//!
78//! mc.scope(|s| {
79//! let sends = send_sges.iter().enumerate()
80//! .map(|(peer, sges)| PeerSendWorkRequest::new(peer, sges));
81//! s.post_scatter_send(sends)?;
82//!
83//! let recvs = recv_sges.iter_mut().enumerate()
84//! .map(|(peer, sges)| PeerReceiveWorkRequest::new(peer, sges));
85//! s.post_gather_receive(recvs)?;
86//!
87//! Ok::<(), ScopeError<TransportError>>(())
88//! })?;
89//! # Ok::<(), Box<dyn std::error::Error>>(())
90//! ```
91//!
92//! See also [`examples/multi_channel_scatter_gather.rs`](https://github.com/Tikitikitikidesuka/ibverbs-rs/blob/main/examples/multi_channel_scatter_gather.rs)
93//! for a complete runnable example.
94//!
95//! [`ProtectionDomain`]: crate::ibverbs::protection_domain::ProtectionDomain
96
97mod builder;
98mod ops;
99mod polling_scope;
100mod remote_memory_region;
101mod work_request;
102
103#[doc(hidden)]
104pub use builder::multi_channel_builder::{
105 Empty, SetAccess, SetAckTimeout, SetMaxAckRetries, SetMaxRecvSge, SetMaxRecvWr,
106 SetMaxRnrRetries, SetMaxSendSge, SetMaxSendWr, SetMinCqEntries, SetMinRnrTimer, SetMtu,
107 SetNumChannels, SetPd, SetRecvPsn, SetSendPsn,
108};
109pub use builder::{MultiChannelBuilder, PreparedMultiChannel};
110pub use remote_memory_region::PeerRemoteMemoryRegion;
111pub use work_request::{
112 PeerReadWorkRequest, PeerReceiveWorkRequest, PeerSendWorkRequest, PeerWriteWorkRequest,
113};
114
115use crate::channel::Channel;
116use crate::ibverbs::error::{IbvError, IbvResult};
117use crate::ibverbs::protection_domain::ProtectionDomain;
118
119/// A set of [`Channel`]s to different peers, sharing a single [`ProtectionDomain`].
120///
121/// Each peer is identified by its index. Operations are routed to the correct channel
122/// based on the peer index in the work request.
123///
124/// Use [`ProtectionDomain::create_multi_channel`] or [`MultiChannel::builder`] to construct one.
125#[derive(Debug)]
126pub struct MultiChannel {
127 channels: Box<[Channel]>,
128 pd: ProtectionDomain,
129}
130
131impl MultiChannel {
132 /// Returns the number of peer channels.
133 pub fn num_channels(&self) -> usize {
134 self.channels.len()
135 }
136
137 /// Returns a reference to the shared [`ProtectionDomain`].
138 pub fn pd(&self) -> &ProtectionDomain {
139 &self.pd
140 }
141
142 pub(crate) fn channel(&mut self, peer: usize) -> IbvResult<&mut Channel> {
143 self.channels
144 .get_mut(peer)
145 .ok_or_else(|| IbvError::NotFound(format!("Peer {peer} not found")))
146 }
147}
148
149impl ProtectionDomain {
150 /// Returns a builder with this protection domain already set.
151 pub fn create_multi_channel(&self) -> MultiChannelBuilder<'_, SetPd> {
152 MultiChannel::builder().pd(self)
153 }
154}