Skip to main content

ripvec_core/backend/
blas_info.rs

1//! Runtime BLAS detection and optimization recommendations.
2//!
3//! Probes the linked BLAS library at runtime via `dlsym` and recommends
4//! the optimal BLAS for the current CPU vendor.
5
6/// Detected BLAS library.
7#[derive(Debug, Clone, Copy, PartialEq, Eq)]
8pub enum BlasKind {
9    /// `OpenBLAS` (good default, not optimal on AMD or Intel).
10    OpenBlas,
11    /// Intel MKL / oneMKL (optimal on Intel, crippled on AMD).
12    IntelMkl,
13    /// BLIS or AMD AOCL-BLAS (optimal on AMD).
14    Blis,
15    /// Apple Accelerate (optimal on Apple Silicon).
16    Accelerate,
17    /// Unknown or no external BLAS (pure Rust ndarray fallback).
18    Unknown,
19}
20
21/// Detected CPU vendor.
22#[derive(Debug, Clone, Copy, PartialEq, Eq)]
23pub enum CpuVendor {
24    /// AMD (Ryzen, EPYC, Threadripper).
25    Amd,
26    /// Intel (Core, Xeon).
27    Intel,
28    /// Apple Silicon (M-series).
29    Apple,
30    /// Unknown vendor.
31    Unknown,
32}
33
34impl std::fmt::Display for BlasKind {
35    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
36        match self {
37            Self::OpenBlas => write!(f, "OpenBLAS"),
38            Self::IntelMkl => write!(f, "Intel MKL"),
39            Self::Blis => write!(f, "BLIS/AOCL"),
40            Self::Accelerate => write!(f, "Apple Accelerate"),
41            Self::Unknown => write!(f, "pure Rust (no external BLAS)"),
42        }
43    }
44}
45
46impl std::fmt::Display for CpuVendor {
47    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
48        match self {
49            Self::Amd => write!(f, "AMD"),
50            Self::Intel => write!(f, "Intel"),
51            Self::Apple => write!(f, "Apple"),
52            Self::Unknown => write!(f, "unknown"),
53        }
54    }
55}
56
57/// Detect the CPU vendor from `/proc/cpuinfo` (Linux) or sysctl (macOS).
58#[must_use]
59pub fn detect_cpu_vendor() -> CpuVendor {
60    #[cfg(target_os = "macos")]
61    return CpuVendor::Apple;
62
63    #[cfg(target_os = "linux")]
64    {
65        if let Ok(cpuinfo) = std::fs::read_to_string("/proc/cpuinfo") {
66            if cpuinfo.contains("AuthenticAMD") {
67                return CpuVendor::Amd;
68            }
69            if cpuinfo.contains("GenuineIntel") {
70                return CpuVendor::Intel;
71            }
72        }
73        CpuVendor::Unknown
74    }
75
76    #[cfg(not(any(target_os = "macos", target_os = "linux")))]
77    CpuVendor::Unknown
78}
79
80/// Detect which BLAS library is linked at runtime.
81///
82/// Probes for vendor-specific symbols using `dlsym(RTLD_DEFAULT, ...)`.
83///
84/// # Panics
85///
86/// Panics if a BLAS symbol name contains an interior NUL byte (should never
87/// happen since all probed symbols are ASCII literals).
88#[must_use]
89pub fn detect_blas() -> BlasKind {
90    #[cfg(target_os = "macos")]
91    {
92        // On macOS, ndarray links Accelerate by default
93        BlasKind::Accelerate
94    }
95
96    #[cfg(target_os = "linux")]
97    {
98        // Probe for vendor-specific symbols in the loaded libraries
99        use std::ffi::CString;
100
101        let probe = |symbol: &str| -> bool {
102            // SAFETY: BLAS symbol names are ASCII-only, no interior NULs.
103            let c_sym = CString::new(symbol).expect("BLAS symbol contains NUL byte");
104            #[expect(unsafe_code, reason = "dlsym probe for BLAS detection")]
105            unsafe {
106                !libc::dlsym(libc::RTLD_DEFAULT, c_sym.as_ptr()).is_null()
107            }
108        };
109
110        // BLIS / AOCL-BLAS
111        if probe("bli_info_get_version_str") {
112            return BlasKind::Blis;
113        }
114
115        // Intel MKL
116        if probe("mkl_get_version") {
117            return BlasKind::IntelMkl;
118        }
119
120        // OpenBLAS
121        if probe("openblas_get_config") {
122            return BlasKind::OpenBlas;
123        }
124
125        BlasKind::Unknown
126    }
127
128    #[cfg(not(any(target_os = "linux", target_os = "macos")))]
129    {
130        BlasKind::Unknown
131    }
132}
133
134/// Return a recommendation string if the user isn't using the optimal BLAS.
135///
136/// Returns `None` if the current BLAS is optimal for the CPU, or if
137/// we can't determine a better option.
138#[must_use]
139pub fn recommend_blas() -> Option<String> {
140    let blas = detect_blas();
141    let cpu = detect_cpu_vendor();
142
143    match (cpu, blas) {
144        // Suboptimal — recommend better
145        (CpuVendor::Amd, BlasKind::OpenBlas) => Some(
146            "tip: AOCL-BLAS is 10-15% faster than OpenBLAS on AMD CPUs. \
147             Install: https://developer.amd.com/amd-aocl/"
148                .to_string(),
149        ),
150        (CpuVendor::Amd, BlasKind::IntelMkl) => Some(
151            "warning: Intel MKL is intentionally slow on AMD CPUs (CPUID check). \
152             Use AOCL-BLAS or OpenBLAS instead."
153                .to_string(),
154        ),
155        (CpuVendor::Intel, BlasKind::OpenBlas) => Some(
156            "tip: Intel MKL is faster than OpenBLAS on Intel CPUs. \
157             Install: sudo apt install libmkl-dev"
158                .to_string(),
159        ),
160
161        // No BLAS at all
162        (CpuVendor::Amd, BlasKind::Unknown) => Some(
163            "warning: no BLAS library detected — CPU inference will be slow. \
164             Install: sudo apt install libopenblas-dev (or AOCL-BLAS for best AMD performance)"
165                .to_string(),
166        ),
167        (CpuVendor::Intel, BlasKind::Unknown) => Some(
168            "warning: no BLAS library detected — CPU inference will be slow. \
169             Install: sudo apt install libmkl-dev"
170                .to_string(),
171        ),
172        (_, BlasKind::Unknown) => Some(
173            "warning: no BLAS library detected — CPU inference will be slow. \
174             Install: sudo apt install libopenblas-dev"
175                .to_string(),
176        ),
177
178        // Everything else is fine enough
179        _ => None,
180    }
181}