Skip to main content

iqdb_distance/
features.rs

1//! Runtime CPU-feature detection and the test-only scalar override.
2//!
3//! [`detect_features`] probes the host once via `std::sync::OnceLock` and
4//! returns a [`CpuFeatures`] snapshot. The per-metric dispatch in
5//! [`crate::metrics`] consults it to pick AVX2 (x86_64), NEON (aarch64), or
6//! the scalar reference. `force_scalar` is a sticky global override used
7//! by tests to exercise the scalar path on a host that would otherwise pick
8//! a SIMD kernel.
9
10use core::sync::atomic::{AtomicBool, Ordering};
11use std::sync::OnceLock;
12
13/// Snapshot of the host CPU features [`detect_features`] cares about.
14///
15/// The struct is intentionally small and `Copy`: it is read on the hot
16/// path of every distance call. New fields will be added in additive
17/// releases — match on it exhaustively at your own risk.
18///
19/// The `forced_scalar` field reflects the value of the override at the
20/// moment [`detect_features`] returned. Do not cache a [`CpuFeatures`]
21/// across a `force_scalar` call: call [`detect_features`] each time
22/// you need a fresh view.
23///
24/// # Examples
25///
26/// ```
27/// let features = iqdb_distance::detect_features();
28/// // Repeated calls return the same value (snapshot is cached).
29/// assert_eq!(features, iqdb_distance::detect_features());
30/// ```
31#[derive(Debug, Clone, Copy, PartialEq, Eq)]
32pub struct CpuFeatures {
33    /// True if the host advertises AVX2 (x86_64 only). Always `false` on
34    /// other architectures.
35    pub avx2: bool,
36    /// True if the host advertises NEON (aarch64 only). Always `false` on
37    /// other architectures.
38    pub neon: bool,
39    /// True if `force_scalar` has been called in this process. Once set,
40    /// the flag is read on every dispatch — see the `force_scalar` docs.
41    pub forced_scalar: bool,
42}
43
44static CPU_FEATURES: OnceLock<CpuFeatures> = OnceLock::new();
45static FORCED_SCALAR: AtomicBool = AtomicBool::new(false);
46
47/// Return the host CPU-feature snapshot, computing it on first call.
48///
49/// The probe runs at most once per process; subsequent calls return the
50/// cached value. The `forced_scalar` field reflects the *current* state of
51/// the `force_scalar` override, so the snapshot remains accurate even if
52/// the override is set after the probe ran.
53///
54/// # Examples
55///
56/// ```
57/// let features = iqdb_distance::detect_features();
58/// // On a host without AVX2 the flag is false; on a host without NEON
59/// // the flag is false. Both fields are always observable.
60/// let _ = (features.avx2, features.neon, features.forced_scalar);
61/// ```
62#[must_use]
63pub fn detect_features() -> CpuFeatures {
64    let probed = *CPU_FEATURES.get_or_init(probe);
65    CpuFeatures {
66        forced_scalar: forced_scalar(),
67        ..probed
68    }
69}
70
71/// Return `true` if `force_scalar` has been called in this process.
72///
73/// Reads an atomic flag — cheap, allocation-free, monotonic once set.
74/// `Relaxed` is sufficient: the flag is set-once `false → true` and the
75/// test harness coordinates the set/observe boundary through
76/// `std::sync::Once`, whose `call_once` provides happens-before for
77/// observers.
78///
79/// # Examples
80///
81/// ```
82/// // This crate never calls `force_scalar` itself, so the flag is normally
83/// // false unless a test has set it.
84/// let _ = iqdb_distance::forced_scalar();
85/// ```
86#[must_use]
87pub fn forced_scalar() -> bool {
88    FORCED_SCALAR.load(Ordering::Relaxed)
89}
90
91/// Force every dispatched distance call in this process onto the scalar
92/// reference path.
93///
94/// The flag is **sticky**: once set, it remains set for the lifetime of
95/// the process. There is intentionally no `unforce_scalar` — the override
96/// exists so test suites can exercise the scalar path on hardware that
97/// would otherwise pick a SIMD kernel, and a sticky flag keeps the test
98/// state visible.
99///
100/// Available only when the crate is built with the `testing` feature. A
101/// production build cannot reach the override, so SIMD cannot be disabled
102/// at runtime by accident.
103///
104/// # Examples
105///
106/// ```
107/// # #[cfg(feature = "testing")]
108/// # {
109/// use iqdb_distance::{Cosine, Distance};
110///
111/// let a = [1.0_f32, 0.0];
112/// let b = [0.0_f32, 1.0];
113/// let before = Cosine::compute(&a, &b).expect("valid pair");
114///
115/// // Calling `force_scalar` makes every subsequent call go scalar.
116/// // iqdb_distance::force_scalar();
117///
118/// let after = Cosine::compute(&a, &b).expect("valid pair");
119/// assert!((before - after).abs() < 1e-6);
120/// # }
121/// ```
122#[cfg(any(test, feature = "testing"))]
123pub fn force_scalar() {
124    FORCED_SCALAR.store(true, Ordering::Relaxed);
125}
126
127/// The kernel a distance call would dispatch to right now.
128///
129/// Held internally and consumed by the per-metric dispatch in
130/// [`crate::metrics`] via [`select_kernel`]. Variants exist only on
131/// architectures where they are reachable, so `match` arms in each
132/// metric's `dispatch` stay exhaustive without an `_` fallback that
133/// could mask a routing mistake.
134#[derive(Debug, Clone, Copy, PartialEq, Eq)]
135pub(crate) enum Kernel {
136    /// Scalar reference path. Always available.
137    Scalar,
138    /// AVX2 kernel; only constructible on x86_64.
139    #[cfg(target_arch = "x86_64")]
140    Avx2,
141    /// NEON kernel; only constructible on aarch64.
142    #[cfg(target_arch = "aarch64")]
143    Neon,
144}
145
146/// Decide which kernel a distance call should route to, given a snapshot
147/// of the host CPU features.
148///
149/// This is the **single source of truth** for the dispatch decision. The
150/// per-metric `dispatch` fns in [`crate::metrics`] and the testing-only
151/// [`which_kernel`] accessor both call this function — they cannot drift,
152/// so the differential test's "SIMD actually ran" assertion is asserting
153/// the real path, not a copy of it.
154pub(crate) fn select_kernel(features: CpuFeatures) -> Kernel {
155    if features.forced_scalar {
156        return Kernel::Scalar;
157    }
158    #[cfg(target_arch = "x86_64")]
159    if features.avx2 {
160        return Kernel::Avx2;
161    }
162    #[cfg(target_arch = "aarch64")]
163    if features.neon {
164        return Kernel::Neon;
165    }
166    Kernel::Scalar
167}
168
169/// Return the kernel a distance call would dispatch to right now, as a
170/// short identifier: `"scalar"`, `"avx2"`, or `"neon"`.
171///
172/// This accessor exists so the differential SIMD-vs-scalar test can prove
173/// the dispatcher actually routed to the host's SIMD kernel before
174/// gathering "SIMD" samples — without this, a runtime detection
175/// regression that silently fell back to scalar would let the test pass
176/// vacuously (scalar-vs-scalar comparison).
177///
178/// Built only under `cfg(any(test, feature = "testing"))`. **Not part of
179/// the stable public surface** — the return type and strings are
180/// testing-internals and may change.
181///
182/// Internally delegates to the crate-private `select_kernel`, the same
183/// function the real dispatch path uses, so the test cannot disagree
184/// with reality.
185///
186/// # Examples
187///
188/// ```
189/// # #[cfg(feature = "testing")]
190/// # {
191/// // On any host the accessor returns one of "scalar", "avx2", "neon".
192/// let kernel = iqdb_distance::which_kernel();
193/// assert!(matches!(kernel, "scalar" | "avx2" | "neon"));
194/// # }
195/// ```
196#[cfg(any(test, feature = "testing"))]
197#[must_use]
198pub fn which_kernel() -> &'static str {
199    match select_kernel(detect_features()) {
200        Kernel::Scalar => "scalar",
201        #[cfg(target_arch = "x86_64")]
202        Kernel::Avx2 => "avx2",
203        #[cfg(target_arch = "aarch64")]
204        Kernel::Neon => "neon",
205    }
206}
207
208fn probe() -> CpuFeatures {
209    CpuFeatures {
210        avx2: probe_avx2(),
211        neon: probe_neon(),
212        forced_scalar: false,
213    }
214}
215
216#[cfg(target_arch = "x86_64")]
217fn probe_avx2() -> bool {
218    std::is_x86_feature_detected!("avx2")
219}
220
221#[cfg(not(target_arch = "x86_64"))]
222fn probe_avx2() -> bool {
223    false
224}
225
226#[cfg(target_arch = "aarch64")]
227fn probe_neon() -> bool {
228    std::arch::is_aarch64_feature_detected!("neon")
229}
230
231#[cfg(not(target_arch = "aarch64"))]
232fn probe_neon() -> bool {
233    false
234}