1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
// SPDX-FileCopyrightText: Copyright 2026 Au-Zone Technologies
// SPDX-License-Identifier: Apache-2.0
use crate::DecoderError;
/// Detected CPU instruction-set features.
///
/// Probed once at `DecoderBuilder::build()` and cached in the plan.
/// `from_env_or_probe()` reads `EDGEFIRST_DECODER_FORCE_KERNEL` for
/// debugging / benchmarking overrides.
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
#[allow(dead_code)] // Consumed by Phase 1 dispatch tables in later tasks.
pub(crate) struct CpuFeatures {
pub(crate) neon_baseline: bool,
pub(crate) neon_fp16: bool,
pub(crate) neon_dotprod: bool,
pub(crate) neon_i8mm: bool,
pub(crate) avx2: bool,
pub(crate) f16c: bool,
pub(crate) avx512f: bool,
}
impl CpuFeatures {
/// Detect features supported by the current CPU.
pub(crate) fn probe() -> Self {
let mut f = Self::default();
#[cfg(target_arch = "aarch64")]
{
f.neon_baseline = true;
f.neon_fp16 = std::arch::is_aarch64_feature_detected!("fp16");
f.neon_dotprod = std::arch::is_aarch64_feature_detected!("dotprod");
f.neon_i8mm = std::arch::is_aarch64_feature_detected!("i8mm");
}
#[cfg(target_arch = "x86_64")]
{
f.avx2 = std::arch::is_x86_feature_detected!("avx2");
f.f16c = std::arch::is_x86_feature_detected!("f16c");
f.avx512f = std::arch::is_x86_feature_detected!("avx512f");
}
f
}
/// Honour `EDGEFIRST_DECODER_FORCE_KERNEL=<tier>` if set.
/// Recognised tiers (case-insensitive): `scalar`, `neon` (alias
/// `neon_baseline`), `neon_fp16`, `neon_dotprod`. Anything else
/// returns `ForcedKernelUnavailable`.
pub(crate) fn from_env_or_probe() -> Result<Self, DecoderError> {
let probed = Self::probe();
let Ok(forced) = std::env::var("EDGEFIRST_DECODER_FORCE_KERNEL") else {
return Ok(probed);
};
match forced.to_ascii_lowercase().as_str() {
"scalar" => Ok(Self::default()),
"neon" | "neon_baseline" => {
if !probed.neon_baseline {
return Err(DecoderError::ForcedKernelUnavailable {
tier: "neon",
missing_feature: "neon",
});
}
Ok(Self {
neon_baseline: true,
..Self::default()
})
}
"neon_fp16" => {
if !probed.neon_fp16 {
return Err(DecoderError::ForcedKernelUnavailable {
tier: "neon_fp16",
missing_feature: "fp16",
});
}
Ok(Self {
neon_baseline: true,
neon_fp16: true,
..Self::default()
})
}
"neon_dotprod" => {
if !probed.neon_dotprod {
return Err(DecoderError::ForcedKernelUnavailable {
tier: "neon_dotprod",
missing_feature: "dotprod",
});
}
Ok(Self {
neon_baseline: true,
neon_dotprod: true,
..Self::default()
})
}
_ => Err(DecoderError::ForcedKernelUnavailable {
tier: "unknown",
missing_feature: "unknown tier name",
}),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
/// RAII guard that overrides an env var for the lifetime of the
/// guard, capturing the prior value on construction and restoring it
/// on drop. Restores even on panic, and treats "unset" as a distinct
/// state from "set to empty string" so we don't accidentally leave a
/// stray empty value behind.
///
/// The repo runs tests with `--test-threads=1`, so the per-test
/// mutation is serialized with respect to other tests in this
/// process. The guard ensures we also don't leak state to test
/// invocations that follow this one (or to a developer's shell when
/// the env var was set externally before `cargo test`).
struct EnvGuard {
key: &'static str,
prev: Option<String>,
}
impl EnvGuard {
fn set(key: &'static str, value: &str) -> Self {
let prev = std::env::var(key).ok();
std::env::set_var(key, value);
Self { key, prev }
}
}
impl Drop for EnvGuard {
fn drop(&mut self) {
match self.prev.take() {
Some(v) => std::env::set_var(self.key, v),
None => std::env::remove_var(self.key),
}
}
}
#[test]
fn probe_does_not_panic() {
let _ = CpuFeatures::probe();
}
#[test]
fn probe_on_aarch64_has_neon_baseline() {
let f = CpuFeatures::probe();
#[cfg(target_arch = "aarch64")]
assert!(f.neon_baseline, "aarch64 builds always have NEON baseline");
#[cfg(not(target_arch = "aarch64"))]
assert!(!f.neon_baseline);
}
#[test]
fn from_env_with_scalar_clears_all_simd() {
let _g = EnvGuard::set("EDGEFIRST_DECODER_FORCE_KERNEL", "scalar");
let f = CpuFeatures::from_env_or_probe().unwrap();
assert!(!f.neon_baseline);
assert!(!f.neon_fp16);
assert!(!f.neon_dotprod);
assert!(!f.avx2);
}
#[test]
fn from_env_with_unknown_tier_errors() {
let _g = EnvGuard::set("EDGEFIRST_DECODER_FORCE_KERNEL", "wibble");
let r = CpuFeatures::from_env_or_probe();
assert!(r.is_err());
}
}