nornir 0.3.1

Companion to cargo: dependency tracking, release gating, deploy, benchmarks, and documentation assembly. Project-agnostic.
Documentation
//! Make the ort CUDA execution provider "just work" without a manual
//! `LD_LIBRARY_PATH`.
//!
//! The onnxruntime CUDA provider (`libonnxruntime_providers_cuda.so`) is
//! `dlopen`ed lazily when the session is built, and it `NEEDS` the CUDA
//! runtime libs (`libcudart`, `libcublas`, `libcudnn`, …). glibc fixes the
//! loader search path at process start, so setting `LD_LIBRARY_PATH` from
//! inside the process is ignored. The robust fix is to **`dlopen` the CUDA
//! libs ourselves with `RTLD_GLOBAL`** before the provider loads — then its
//! `NEEDED` sonames resolve against the already-loaded globals.
//!
//! Discovery order (first dir that contains a given lib wins):
//!   1. `NORNIR_CUDA_LIBS` — explicit colon-separated dirs (highest priority).
//!   2. Active Python env: `$VIRTUAL_ENV` / `$CONDA_PREFIX` →
//!      `…/site-packages/nvidia/*/lib` (pip `nvidia-*-cu12` wheels).
//!   3. `NORNIR_CUDA_SCAN_ROOTS` — colon-separated roots scanned for
//!      `**/site-packages/nvidia/*/lib` (one level of venvs).
//!   4. Known system dirs: ollama bundles, `/usr/local/cuda*/lib64`, etc.
//!
//! Best-effort: anything missing just means ort falls back to CPU (no panic).
//!
//! Cargo feature: `embed-ort`.

use std::path::{Path, PathBuf};
use std::sync::OnceLock;

/// Leaf-first load order: a lib's own `NEEDED` CUDA deps must already be
/// global when it loads. `cudnn` (and its split sub-libs) come last.
const ORDERED_SONAMES: &[&str] = &[
    "libcudart.so.12",
    "libnvJitLink.so.12",
    "libcublasLt.so.12",
    "libcublas.so.12",
    "libcufft.so.11",
    "libcurand.so.10",
    "libcusparse.so.12",
    "libcusolver.so.11",
];

/// Outcome of a preload attempt (for logging / diagnostics).
#[derive(Debug, Default, Clone)]
pub struct CudaPreload {
    /// Sonames successfully `dlopen`ed (kept resident for the process life).
    pub loaded: Vec<String>,
    /// Directories that were searched.
    pub dirs: Vec<PathBuf>,
    /// True if a cuDNN lib was found and loaded.
    pub cudnn: bool,
}

// Loaded libraries are leaked on purpose: they must stay resident for the
// whole process so the provider can resolve against them.
static PRELOAD: OnceLock<CudaPreload> = OnceLock::new();

/// Discover + `dlopen` CUDA libs once. Idempotent; safe to call before every
/// session build.
pub fn ensure() -> &'static CudaPreload {
    PRELOAD.get_or_init(run)
}

fn run() -> CudaPreload {
    let dirs = candidate_dirs();
    let mut out = CudaPreload {
        dirs: dirs.clone(),
        ..Default::default()
    };
    for soname in ORDERED_SONAMES {
        if let Some(path) = find_lib(&dirs, soname) {
            if dlopen_global(&path) {
                out.loaded.push(soname.to_string());
            }
        }
    }
    // cuDNN 9 is split: load its sub-libs (precompiled engines, ops, …) before
    // the umbrella `libcudnn.so.9`, all from the same dir.
    if let Some(dir) = dirs.iter().find(|d| d.join("libcudnn.so.9").exists()) {
        let mut subs: Vec<PathBuf> = std::fs::read_dir(dir)
            .into_iter()
            .flatten()
            .flatten()
            .map(|e| e.path())
            .filter(|p| {
                p.file_name()
                    .and_then(|n| n.to_str())
                    .is_some_and(|n| n.starts_with("libcudnn") && n.contains(".so.") && !n.ends_with("libcudnn.so.9"))
            })
            .collect();
        subs.sort();
        for p in subs {
            dlopen_global(&p);
        }
        if dlopen_global(&dir.join("libcudnn.so.9")) {
            out.loaded.push("libcudnn.so.9".to_string());
            out.cudnn = true;
        }
    }
    out
}

fn candidate_dirs() -> Vec<PathBuf> {
    let mut dirs: Vec<PathBuf> = Vec::new();
    let push = |p: PathBuf, dirs: &mut Vec<PathBuf>| {
        if p.is_dir() && !dirs.contains(&p) {
            dirs.push(p);
        }
    };

    // 1. Explicit override.
    if let Some(v) = std::env::var_os("NORNIR_CUDA_LIBS") {
        for p in std::env::split_paths(&v) {
            push(p, &mut dirs);
        }
    }
    // 2. Active Python env → pip nvidia wheels.
    for key in ["VIRTUAL_ENV", "CONDA_PREFIX"] {
        if let Some(root) = std::env::var_os(key) {
            for d in nvidia_pkg_dirs(Path::new(&root)) {
                push(d, &mut dirs);
            }
        }
    }
    // 3. Extra scan roots (each scanned for venvs one level deep).
    if let Some(v) = std::env::var_os("NORNIR_CUDA_SCAN_ROOTS") {
        for root in std::env::split_paths(&v) {
            for d in scan_root_for_nvidia(&root) {
                push(d, &mut dirs);
            }
        }
    }
    // 4. Known system locations (ollama bundles, system CUDA).
    for sys in [
        "/usr/local/lib/ollama/cuda_v12",
        "/usr/local/lib/ollama/cuda_v13",
        "/usr/local/cuda/lib64",
        "/usr/local/cuda-12/lib64",
        "/opt/cuda/lib64",
        "/usr/lib/x86_64-linux-gnu",
    ] {
        push(PathBuf::from(sys), &mut dirs);
    }
    dirs
}

/// `<root>/lib*/python*/site-packages/nvidia/*/lib` for a venv/conda root.
fn nvidia_pkg_dirs(root: &Path) -> Vec<PathBuf> {
    let mut out = Vec::new();
    for libname in ["lib", "lib64"] {
        let pyroot = root.join(libname);
        let Ok(entries) = std::fs::read_dir(&pyroot) else {
            continue;
        };
        for e in entries.flatten() {
            let nvidia = e.path().join("site-packages/nvidia");
            if let Ok(pkgs) = std::fs::read_dir(&nvidia) {
                for p in pkgs.flatten() {
                    let lib = p.path().join("lib");
                    if lib.is_dir() {
                        out.push(lib);
                    }
                }
            }
        }
    }
    out
}

/// Scan one level of subdirs of `root` as candidate venv roots.
fn scan_root_for_nvidia(root: &Path) -> Vec<PathBuf> {
    let mut out = nvidia_pkg_dirs(root);
    if let Ok(entries) = std::fs::read_dir(root) {
        for e in entries.flatten() {
            if e.path().is_dir() {
                out.extend(nvidia_pkg_dirs(&e.path()));
            }
        }
    }
    out
}

/// First dir containing `soname` (exact) or a `soname.<minor>` variant.
fn find_lib(dirs: &[PathBuf], soname: &str) -> Option<PathBuf> {
    for d in dirs {
        let exact = d.join(soname);
        if exact.exists() {
            return Some(exact);
        }
        // e.g. libcudart.so.12 → match libcudart.so.12.8.90
        if let Ok(entries) = std::fs::read_dir(d) {
            for e in entries.flatten() {
                if let Some(name) = e.file_name().to_str() {
                    if name.starts_with(soname) {
                        return Some(e.path());
                    }
                }
            }
        }
    }
    None
}

fn dlopen_global(path: &Path) -> bool {
    use libloading::os::unix::{Library, RTLD_GLOBAL, RTLD_NOW};
    // SAFETY: loading a CUDA shared lib by absolute path; the handle is leaked
    // so it stays resident for the provider to resolve against.
    match unsafe { Library::open(Some(path), RTLD_NOW | RTLD_GLOBAL) } {
        Ok(lib) => {
            std::mem::forget(lib);
            true
        }
        Err(_) => false,
    }
}