ibverbs_rs/ibverbs/numa.rs
1//! NUMA affinity — thread and memory binding for RDMA devices.
2//!
3//! This module requires the **`numa`** Cargo feature and the `libnuma` system library.
4//!
5//! Enabling the `"numa"` Cargo feature provides two ways to apply NUMA affinity:
6//!
7//! - **Device-relative** — [`Device::bind_thread_to_numa`] and
8//! [`Device::bind_thread_to_numa_strict`] look up the NUMA node of an InfiniBand device
9//! via sysfs and call the free functions below on your behalf.
10//!
11//! - **Node-relative** — [`set_numa_node`] and [`set_numa_node_strict`] accept a NUMA node
12//! number directly, for cases where you already know the node or want to pin independently
13//! of a specific device.
14
15use crate::ibverbs::device::Device;
16use std::io;
17
18impl<'a> Device<'a> {
19 /// Bind the calling task (OS thread) to the NUMA node local to this InfiniBand device.
20 ///
21 /// This reads the device’s NUMA node from sysfs (`/sys/class/infiniband/<dev>/device/numa_node`)
22 /// and then applies the affinity using [`set_numa_node`], which calls both `numa_run_on_node()`
23 /// and `numa_set_localalloc()`.
24 ///
25 /// # Errors
26 ///
27 /// Returns an error if:
28 /// - The device name is not available (`self.name()` is `None`).
29 /// - The sysfs file cannot be read (I/O error).
30 /// - The sysfs contents cannot be parsed as an `i32` (reported as `InvalidData`).
31 /// - `numa_run_on_node()` fails (returns `-1` and sets `errno`; returned via
32 /// [`io::Error::last_os_error`]).
33 pub fn bind_thread_to_numa(&self) -> io::Result<()> {
34 let dev = self
35 .name()
36 .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "invalid device name"))?;
37
38 let numa = get_numa_node(dev)?;
39
40 set_numa_node(numa)?;
41
42 log::debug!("Task bound to numa node {numa}");
43 Ok(())
44 }
45
46 /// Like [`bind_thread_to_numa`](Self::bind_thread_to_numa), but also sets a strict bind
47 /// policy — memory allocations will only be served from the local NUMA node, with no
48 /// fallback to other nodes.
49 ///
50 /// # Errors
51 ///
52 /// Same as [`bind_thread_to_numa`](Self::bind_thread_to_numa).
53 pub fn bind_thread_to_numa_strict(&self) -> io::Result<()> {
54 let dev = self
55 .name()
56 .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "invalid device name"))?;
57
58 let numa = get_numa_node(dev)?;
59
60 set_numa_node_strict(numa)?;
61
62 log::debug!("Task bound to numa node {numa}");
63 Ok(())
64 }
65}
66
67/// Pins the current task (OS thread) to the specified NUMA node and sets the memory allocation
68/// policy to local allocation via `numa_set_localalloc()`.
69///
70/// Calls `numa_run_on_node()` to restrict CPU scheduling, then `numa_set_localalloc()` so that
71/// subsequent memory allocations are served from the local node. On success, returns `Ok(())`; on
72/// failure returns the OS error reported via `errno`.
73///
74/// Passing `-1` to `numa_run_on_node()` permits the kernel to schedule the task on all nodes again,
75/// effectively resetting the CPU affinity (but the local-alloc policy set by `numa_set_localalloc()`
76/// remains in effect).
77///
78/// # Errors
79///
80/// Returns the OS error from `numa_run_on_node()` if it fails.
81pub fn set_numa_node(node: i32) -> io::Result<()> {
82 let res = unsafe { numa_run_on_node(node) };
83 if res != 0 {
84 return Err(io::Error::last_os_error());
85 }
86
87 // Allocate future memory from this node
88 unsafe { numa_set_localalloc() };
89
90 Ok(())
91}
92
93/// Like [`set_numa_node`], but also enables strict bind policy via `numa_set_bind_policy(1)`,
94/// so memory allocations will not fall back to other NUMA nodes.
95///
96/// # Errors
97///
98/// Returns the OS error from `numa_run_on_node()` if it fails.
99pub fn set_numa_node_strict(node: i32) -> io::Result<()> {
100 let res = unsafe { numa_run_on_node(node) };
101 if res != 0 {
102 return Err(io::Error::last_os_error());
103 }
104
105 // 1 = strict binding (no fallback to other nodes)
106 unsafe { numa_set_bind_policy(1) };
107 // Allocate future memory from this node
108 unsafe { numa_set_localalloc() };
109
110 Ok(())
111}
112
113#[link(name = "numa")]
114unsafe extern "C" {
115 fn numa_run_on_node(node: std::os::raw::c_int) -> std::os::raw::c_int;
116 fn numa_set_localalloc();
117 fn numa_set_bind_policy(strict: std::os::raw::c_int);
118}
119
120/// Read the NUMA node for an InfiniBand device from sysfs.
121///
122/// Reads `/sys/class/infiniband/<dev>/device/numa_node` and parses it as an `i32`.
123///
124/// # Errors
125///
126/// Returns an error if the file cannot be read, or if the contents cannot be parsed as an `i32`.
127fn get_numa_node(dev: &str) -> io::Result<i32> {
128 let numa_path = format!("/sys/class/infiniband/{dev}/device/numa_node");
129 let s = std::fs::read_to_string(numa_path)?;
130 let node = s
131 .trim()
132 .parse::<i32>()
133 .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
134
135 if node < 0 {
136 Err(io::Error::new(
137 io::ErrorKind::NotFound,
138 format!("numa node for {dev} not found"),
139 ))
140 } else {
141 Ok(node)
142 }
143}