Skip to main content

ibverbs_rs/ibverbs/
numa.rs

1//! NUMA affinity — thread and memory binding for RDMA devices.
2//!
3//! This module requires the **`numa`** Cargo feature and the `libnuma` system library.
4//!
5//! Enabling the `"numa"` Cargo feature provides two ways to apply NUMA affinity:
6//!
7//! - **Device-relative** — [`Device::bind_thread_to_numa`] and
8//!   [`Device::bind_thread_to_numa_strict`] look up the NUMA node of an InfiniBand device
9//!   via sysfs and call the free functions below on your behalf.
10//!
11//! - **Node-relative** — [`set_numa_node`] and [`set_numa_node_strict`] accept a NUMA node
12//!   number directly, for cases where you already know the node or want to pin independently
13//!   of a specific device.
14
15use crate::ibverbs::device::Device;
16use std::io;
17
18impl<'a> Device<'a> {
19    /// Bind the calling task (OS thread) to the NUMA node local to this InfiniBand device.
20    ///
21    /// This reads the device’s NUMA node from sysfs (`/sys/class/infiniband/<dev>/device/numa_node`)
22    /// and then applies the affinity using [`set_numa_node`], which calls both `numa_run_on_node()`
23    /// and `numa_set_localalloc()`.
24    ///
25    /// # Errors
26    ///
27    /// Returns an error if:
28    /// - The device name is not available (`self.name()` is `None`).
29    /// - The sysfs file cannot be read (I/O error).
30    /// - The sysfs contents cannot be parsed as an `i32` (reported as `InvalidData`).
31    /// - `numa_run_on_node()` fails (returns `-1` and sets `errno`; returned via
32    ///   [`io::Error::last_os_error`]).
33    pub fn bind_thread_to_numa(&self) -> io::Result<()> {
34        let dev = self
35            .name()
36            .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "invalid device name"))?;
37
38        let numa = get_numa_node(dev)?;
39
40        set_numa_node(numa)?;
41
42        log::debug!("Task bound to numa node {numa}");
43        Ok(())
44    }
45
46    /// Like [`bind_thread_to_numa`](Self::bind_thread_to_numa), but also sets a strict bind
47    /// policy — memory allocations will only be served from the local NUMA node, with no
48    /// fallback to other nodes.
49    ///
50    /// # Errors
51    ///
52    /// Same as [`bind_thread_to_numa`](Self::bind_thread_to_numa).
53    pub fn bind_thread_to_numa_strict(&self) -> io::Result<()> {
54        let dev = self
55            .name()
56            .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "invalid device name"))?;
57
58        let numa = get_numa_node(dev)?;
59
60        set_numa_node_strict(numa)?;
61
62        log::debug!("Task bound to numa node {numa}");
63        Ok(())
64    }
65}
66
67/// Pins the current task (OS thread) to the specified NUMA node and sets the memory allocation
68/// policy to local allocation via `numa_set_localalloc()`.
69///
70/// Calls `numa_run_on_node()` to restrict CPU scheduling, then `numa_set_localalloc()` so that
71/// subsequent memory allocations are served from the local node. On success, returns `Ok(())`; on
72/// failure returns the OS error reported via `errno`.
73///
74/// Passing `-1` to `numa_run_on_node()` permits the kernel to schedule the task on all nodes again,
75/// effectively resetting the CPU affinity (but the local-alloc policy set by `numa_set_localalloc()`
76/// remains in effect).
77///
78/// # Errors
79///
80/// Returns the OS error from `numa_run_on_node()` if it fails.
81pub fn set_numa_node(node: i32) -> io::Result<()> {
82    let res = unsafe { numa_run_on_node(node) };
83    if res != 0 {
84        return Err(io::Error::last_os_error());
85    }
86
87    // Allocate future memory from this node
88    unsafe { numa_set_localalloc() };
89
90    Ok(())
91}
92
93/// Like [`set_numa_node`], but also enables strict bind policy via `numa_set_bind_policy(1)`,
94/// so memory allocations will not fall back to other NUMA nodes.
95///
96/// # Errors
97///
98/// Returns the OS error from `numa_run_on_node()` if it fails.
99pub fn set_numa_node_strict(node: i32) -> io::Result<()> {
100    let res = unsafe { numa_run_on_node(node) };
101    if res != 0 {
102        return Err(io::Error::last_os_error());
103    }
104
105    // 1 = strict binding (no fallback to other nodes)
106    unsafe { numa_set_bind_policy(1) };
107    // Allocate future memory from this node
108    unsafe { numa_set_localalloc() };
109
110    Ok(())
111}
112
113#[link(name = "numa")]
114unsafe extern "C" {
115    fn numa_run_on_node(node: std::os::raw::c_int) -> std::os::raw::c_int;
116    fn numa_set_localalloc();
117    fn numa_set_bind_policy(strict: std::os::raw::c_int);
118}
119
120/// Read the NUMA node for an InfiniBand device from sysfs.
121///
122/// Reads `/sys/class/infiniband/<dev>/device/numa_node` and parses it as an `i32`.
123///
124/// # Errors
125///
126/// Returns an error if the file cannot be read, or if the contents cannot be parsed as an `i32`.
127fn get_numa_node(dev: &str) -> io::Result<i32> {
128    let numa_path = format!("/sys/class/infiniband/{dev}/device/numa_node");
129    let s = std::fs::read_to_string(numa_path)?;
130    let node = s
131        .trim()
132        .parse::<i32>()
133        .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
134
135    if node < 0 {
136        Err(io::Error::new(
137            io::ErrorKind::NotFound,
138            format!("numa node for {dev} not found"),
139        ))
140    } else {
141        Ok(node)
142    }
143}