Skip to main content

ibverbs_rs/channel/
mod.rs

1//! Point-to-point RDMA channel — builder API with lifetime-safe operation posting and scope-based completion polling.
2//!
3//! A [`Channel`] wraps an InfiniBand [`QueuePair`] and uses Rust's borrow checker
4//! to statically prevent data races between the CPU and the NIC: memory passed to a
5//! work request is borrowed for the duration of the operation, so the compiler rejects
6//! any attempt to read or drop it while the hardware may still be performing DMA.
7//!
8//! # Connection lifecycle
9//!
10//! A channel is established in two steps so that the endpoints can exchange connection
11//! information out-of-band (typically over TCP) before the RDMA link is brought up.
12//!
13//! 1. **Build** — call [`Channel::builder`] (or [`ProtectionDomain::create_channel`])
14//!    and configure the queue pair parameters. [`build`](ChannelBuilder::build) returns
15//!    a [`PreparedChannel`] whose [`endpoint`](PreparedChannel::endpoint) contains the
16//!    local connection information.
17//! 2. **Handshake** — exchange [`QueuePairEndpoint`]s with the remote peer, then call
18//!    [`PreparedChannel::handshake`] to bring the queue pair to the Ready-To-Send state
19//!    and obtain the connected [`Channel`].
20//!
21//! # Posting operations
22//!
23//! Once connected, operations can be posted at three levels of control:
24//!
25//! * **Blocking** — [`Channel::send`], [`Channel::receive`], [`Channel::write`],
26//!   [`Channel::read`] each post a single operation and spin-poll until it completes.
27//!   Best for simple, sequential use cases.
28//! * **Scoped** — [`Channel::scope`] and [`Channel::manual_scope`] open a
29//!   [`PollingScope`] through which multiple operations can be posted and polled
30//!   independently as [`ScopedPendingWork`] handles. The scope guarantees all
31//!   outstanding work is polled to completion before it returns, even if the closure
32//!   panics. This mirrors the design of [`std::thread::scope`].
33//! * **Unpolled** — [`Channel::send_unpolled`], [`Channel::receive_unpolled`],
34//!   [`Channel::write_unpolled`], [`Channel::read_unpolled`] are `unsafe` and return
35//!   raw [`PendingWork`] handles. These are the primitives that the two higher levels
36//!   are built on; prefer those unless you need direct control.
37//!
38//! # Memory safety
39//!
40//! Work requests borrow their data buffers for the lifetime of the operation. That
41//! borrow is released only once the operation is polled to completion — or, if the
42//! handle is dropped without being polled, by blocking until the hardware finishes.
43//! It is therefore impossible in safe code to free or reuse a buffer that the NIC is
44//! still reading from or writing to.
45//!
46//! # Choosing `scope` vs `manual_scope`
47//!
48//! * Use [`scope`](Channel::scope) when you want automatic cleanup: any work not
49//!   manually polled is polled to completion when the scope exits, even on panic.
50//!   Errors are wrapped in [`ScopeError`] to distinguish closure errors from
51//!   auto-poll errors.
52//! * Use [`manual_scope`](Channel::manual_scope) when you want to poll everything
53//!   yourself and get `Result<T, E>` directly. It panics if you leave work unpolled
54//!   on the success path, acting as a safety net against forgotten completions.
55//!
56//! # Error handling
57//!
58//! Transport-layer errors are reported as [`TransportError`], which covers both
59//! low-level ibverbs call failures and work completion errors.
60//! [`Channel::scope`] wraps errors further in [`ScopeError`] to distinguish between
61//! closure errors and errors discovered during automatic polling at scope exit.
62//!
63//! # Examples
64//!
65//! ## Blocking send and receive
66//!
67//! ```no_run
68//! use ibverbs_rs::ibverbs;
69//! use ibverbs_rs::channel::Channel;
70//! use ibverbs_rs::ibverbs::work::{SendWorkRequest, ReceiveWorkRequest};
71//!
72//! let ctx = ibverbs::open_device("mlx5_0")?;
73//! let pd = ctx.allocate_pd()?;
74//! let prepared = Channel::builder().pd(&pd).build()?;
75//!
76//! // Exchange endpoints out-of-band (loopback for illustration)
77//! let endpoint = prepared.endpoint();
78//! let mut channel = prepared.handshake(endpoint)?;
79//!
80//! let mut buf = [0u8; 64];
81//! let mr = pd.register_local_mr_slice(&buf)?;
82//!
83//! // Blocking receive (posts one WR and spins until complete)
84//! channel.receive(ReceiveWorkRequest::new(&mut [mr.scatter_element(&mut buf)]))?;
85//!
86//! // Blocking send
87//! channel.send(SendWorkRequest::new(&[mr.gather_element(&buf)]))?;
88//! # Ok::<(), Box<dyn std::error::Error>>(())
89//! ```
90//!
91//! ## Scoped operations
92//!
93//! ```no_run
94//! use ibverbs_rs::ibverbs;
95//! use ibverbs_rs::channel::{Channel, ScopeError, TransportError};
96//! use ibverbs_rs::ibverbs::work::{SendWorkRequest, ReceiveWorkRequest};
97//!
98//! let ctx = ibverbs::open_device("mlx5_0")?;
99//! let pd = ctx.allocate_pd()?;
100//! let prepared = Channel::builder().pd(&pd).build()?;
101//! let endpoint = prepared.endpoint();
102//! let mut channel = prepared.handshake(endpoint)?;
103//!
104//! let mut buf = [0u8; 64];
105//! let mr = pd.register_local_mr_slice(&buf)?;
106//!
107//! channel.scope(|s| {
108//!     let (tx, rx) = buf.split_at_mut(32);
109//!
110//!     // Post both operations — they execute concurrently on the NIC
111//!     let send = s.post_send(SendWorkRequest::new(&[mr.gather_element(tx)]))?;
112//!     let recv = s.post_receive(ReceiveWorkRequest::new(&mut [mr.scatter_element(rx)]))?;
113//!
114//!     // Optionally poll individual handles for fine-grained control
115//!     while send.poll().is_none() {}   // spin until complete
116//!     while recv.poll().is_none() {}   // spin until complete
117//!
118//!     Ok::<(), ScopeError<TransportError>>(())
119//! })?;
120//! # Ok::<(), Box<dyn std::error::Error>>(())
121//! ```
122//!
123//! See also the [`examples/channel.rs`](https://github.com/Tikitikitikidesuka/ibverbs-rs/blob/main/examples/channel.rs) file
124//! for a complete runnable example.
125//!
126//! [`QueuePair`]: crate::ibverbs::queue_pair::QueuePair
127//! [`QueuePairEndpoint`]: crate::ibverbs::queue_pair::builder::QueuePairEndpoint
128
129use crate::channel::cached_completion_queue::CachedCompletionQueue;
130use crate::ibverbs::error::IbvError;
131use crate::ibverbs::protection_domain::ProtectionDomain;
132use crate::ibverbs::queue_pair::QueuePair;
133use crate::ibverbs::work::WorkError;
134use std::cell::RefCell;
135use std::rc::Rc;
136use thiserror::Error;
137
138mod builder;
139mod cached_completion_queue;
140mod ops;
141mod pending_work;
142mod polling_scope;
143
144#[doc(hidden)]
145pub use builder::channel_builder::{
146    Empty, SetAccess, SetAckTimeout, SetMaxAckRetries, SetMaxRecvSge, SetMaxRecvWr,
147    SetMaxRnrRetries, SetMaxSendSge, SetMaxSendWr, SetMinCqEntries, SetMinRnrTimer, SetMtu, SetPd,
148    SetRecvPsn, SetSendPsn,
149};
150pub use builder::{ChannelBuilder, PreparedChannel};
151pub use pending_work::PendingWork;
152pub use polling_scope::{PollingScope, ScopeError, ScopeResult, ScopedPendingWork};
153
154/// A safe RDMA communication endpoint built on top of a [`QueuePair`].
155///
156/// `Channel` wraps a queue pair with lifetime-safe operation posting through
157/// [`scope`](Self::scope) and [`manual_scope`](Self::manual_scope).
158///
159/// A channel belongs to a [`ProtectionDomain`] and can share memory regions with
160/// other channels under the same domain.
161/// Use [`ProtectionDomain::create_channel`] or [`Channel::builder`] to construct one.
162#[derive(Debug)]
163pub struct Channel {
164    qp: QueuePair,
165    cq: Rc<RefCell<CachedCompletionQueue>>,
166    pd: ProtectionDomain,
167    next_wr_id: u64,
168}
169
170impl Channel {
171    /// Returns a reference to the channel's [`ProtectionDomain`].
172    pub fn pd(&self) -> &ProtectionDomain {
173        &self.pd
174    }
175}
176
177impl ProtectionDomain {
178    /// Returns a builder with this protection domain already set.
179    pub fn create_channel(&self) -> ChannelBuilder<'_, SetPd> {
180        Channel::builder().pd(self)
181    }
182}
183
184/// An error from an RDMA transport operation.
185///
186/// Wraps both low-level ibverbs errors and work completion errors into a single type.
187#[derive(Debug, Error)]
188pub enum TransportError {
189    #[error(transparent)]
190    IbvError(#[from] IbvError),
191    #[error(transparent)]
192    WorkError(#[from] WorkError),
193}
194
195/// Convenience alias for a [`Result`] with [`TransportError`].
196pub type TransportResult<T> = Result<T, TransportError>;