ibverbs-rs 0.4.1

Safe, ergonomic Rust bindings for the InfiniBand libibverbs API
Documentation
//! NUMA affinity — thread and memory binding for RDMA devices.
//!
//! This module requires the **`numa`** Cargo feature and the `libnuma` system library.
//!
//! Enabling the `"numa"` Cargo feature provides two ways to apply NUMA affinity:
//!
//! - **Device-relative** — [`Device::bind_thread_to_numa`] and
//!   [`Device::bind_thread_to_numa_strict`] look up the NUMA node of an InfiniBand device
//!   via sysfs and call the free functions below on your behalf.
//!
//! - **Node-relative** — [`set_numa_node`] and [`set_numa_node_strict`] accept a NUMA node
//!   number directly, for cases where you already know the node or want to pin independently
//!   of a specific device.

use crate::ibverbs::device::Device;
use std::io;

impl<'a> Device<'a> {
    /// Bind the calling task (OS thread) to the NUMA node local to this InfiniBand device.
    ///
    /// This reads the device’s NUMA node from sysfs (`/sys/class/infiniband/<dev>/device/numa_node`)
    /// and then applies the affinity using [`set_numa_node`], which calls both `numa_run_on_node()`
    /// and `numa_set_localalloc()`.
    ///
    /// # Errors
    ///
    /// Returns an error if:
    /// - The device name is not available (`self.name()` is `None`).
    /// - The sysfs file cannot be read (I/O error).
    /// - The sysfs contents cannot be parsed as an `i32` (reported as `InvalidData`).
    /// - `numa_run_on_node()` fails (returns `-1` and sets `errno`; returned via
    ///   [`io::Error::last_os_error`]).
    pub fn bind_thread_to_numa(&self) -> io::Result<()> {
        let dev = self
            .name()
            .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "invalid device name"))?;

        let numa = get_numa_node(dev)?;

        set_numa_node(numa)?;

        log::debug!("Task bound to numa node {numa}");
        Ok(())
    }

    /// Like [`bind_thread_to_numa`](Self::bind_thread_to_numa), but also sets a strict bind
    /// policy — memory allocations will only be served from the local NUMA node, with no
    /// fallback to other nodes.
    ///
    /// # Errors
    ///
    /// Same as [`bind_thread_to_numa`](Self::bind_thread_to_numa).
    pub fn bind_thread_to_numa_strict(&self) -> io::Result<()> {
        let dev = self
            .name()
            .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "invalid device name"))?;

        let numa = get_numa_node(dev)?;

        set_numa_node_strict(numa)?;

        log::debug!("Task bound to numa node {numa}");
        Ok(())
    }
}

/// Pins the current task (OS thread) to the specified NUMA node and sets the memory allocation
/// policy to local allocation via `numa_set_localalloc()`.
///
/// Calls `numa_run_on_node()` to restrict CPU scheduling, then `numa_set_localalloc()` so that
/// subsequent memory allocations are served from the local node. On success, returns `Ok(())`; on
/// failure returns the OS error reported via `errno`.
///
/// Passing `-1` to `numa_run_on_node()` permits the kernel to schedule the task on all nodes again,
/// effectively resetting the CPU affinity (but the local-alloc policy set by `numa_set_localalloc()`
/// remains in effect).
///
/// # Errors
///
/// Returns the OS error from `numa_run_on_node()` if it fails.
pub fn set_numa_node(node: i32) -> io::Result<()> {
    let res = unsafe { numa_run_on_node(node) };
    if res != 0 {
        return Err(io::Error::last_os_error());
    }

    // Allocate future memory from this node
    unsafe { numa_set_localalloc() };

    Ok(())
}

/// Like [`set_numa_node`], but also enables strict bind policy via `numa_set_bind_policy(1)`,
/// so memory allocations will not fall back to other NUMA nodes.
///
/// # Errors
///
/// Returns the OS error from `numa_run_on_node()` if it fails.
pub fn set_numa_node_strict(node: i32) -> io::Result<()> {
    let res = unsafe { numa_run_on_node(node) };
    if res != 0 {
        return Err(io::Error::last_os_error());
    }

    // 1 = strict binding (no fallback to other nodes)
    unsafe { numa_set_bind_policy(1) };
    // Allocate future memory from this node
    unsafe { numa_set_localalloc() };

    Ok(())
}

#[link(name = "numa")]
unsafe extern "C" {
    fn numa_run_on_node(node: std::os::raw::c_int) -> std::os::raw::c_int;
    fn numa_set_localalloc();
    fn numa_set_bind_policy(strict: std::os::raw::c_int);
}

/// Read the NUMA node for an InfiniBand device from sysfs.
///
/// Reads `/sys/class/infiniband/<dev>/device/numa_node` and parses it as an `i32`.
///
/// # Errors
///
/// Returns an error if the file cannot be read, or if the contents cannot be parsed as an `i32`.
fn get_numa_node(dev: &str) -> io::Result<i32> {
    let numa_path = format!("/sys/class/infiniband/{dev}/device/numa_node");
    let s = std::fs::read_to_string(numa_path)?;
    let node = s
        .trim()
        .parse::<i32>()
        .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;

    if node < 0 {
        Err(io::Error::new(
            io::ErrorKind::NotFound,
            format!("numa node for {dev} not found"),
        ))
    } else {
        Ok(node)
    }
}