Skip to main content

keyhog_scanner/hw_probe/
mod.rs

1//! Hardware capability probing with once-cached results.
2//!
3//! Detects CPU features (AVX-512, AVX2, NEON), GPU compute (wgpu/Vulkan),
4//! Hyperscan availability, io_uring support, memory, and core counts.
5//! All detection is done once at startup and cached for the process
6//! lifetime.
7//!
8//! Split into focused submodules so no single file exceeds the
9//! 500-line cap:
10//!
11//!   * [`thresholds`] - GPU routing crossover constants (pub, also
12//!     consumed by tests and the `keyhog backend` debug subcommand).
13//!   * [`tier`] - [`GpuTier`] enum + `classify_gpu_tier` substring
14//!     heuristics + tier→threshold lookups.
15//!   * [`select`] - [`select_backend`] routing logic + env-override
16//!     parsing.
17//!   * [`banner`] - `startup_banner` formatter for the CLI header.
18//!   * [`platform`] - per-OS detection of physical cores, memory,
19//!     and io_uring availability.
20
21use std::sync::OnceLock;
22
23mod banner;
24mod platform;
25mod select;
26mod tier;
27
28pub mod thresholds;
29
30pub use banner::startup_banner;
31pub use select::{
32    clear_test_backend_override, forced_backend_from_env, forced_backend_from_env_uncached,
33    select_backend, set_test_backend_override,
34};
35pub use tier::{
36    classify_gpu_tier, gpu_min_bytes_for_tier, gpu_pattern_breakeven_for_tier,
37    gpu_solo_bytes_for_tier, GpuTier,
38};
39
40/// Scan execution backend selected for a given workload.
41#[derive(Debug, Clone, Copy, PartialEq, Eq)]
42#[non_exhaustive]
43pub enum ScanBackend {
44    /// GPU pattern matching via vyre's literal-set engine
45    /// (`GpuLiteralSet`). The default GPU path; <~1500 patterns,
46    /// literal-prefix matching only.
47    Gpu,
48    /// GPU regex multimatch via vyre's `RulePipeline` mega-scan
49    /// pipeline (NFA-based). Activated by `KEYHOG_BACKEND=mega-scan`;
50    /// the regex-completion path that handles patterns
51    /// `GpuLiteralSet`'s literal prefix can't reduce to a literal.
52    MegaScan,
53    /// Hyperscan NFA multi-pattern matching + SIMD prefilter.
54    /// This is the primary high-throughput path on all platforms.
55    SimdCpu,
56    /// Pure CPU: vyre AC + regex. No Hyperscan, no GPU.
57    CpuFallback,
58}
59
60impl ScanBackend {
61    /// Stable label for logs and CLI startup banner.
62    #[must_use]
63    pub fn label(self) -> &'static str {
64        match self {
65            Self::Gpu => "gpu-zero-copy",
66            Self::MegaScan => "gpu-mega-scan",
67            Self::SimdCpu => "simd-regex",
68            Self::CpuFallback => "cpu-fallback",
69        }
70    }
71}
72
73/// Hardware capabilities detected at startup.
74#[derive(Debug, Clone)]
75pub struct HardwareCaps {
76    pub physical_cores: usize,
77    pub logical_cores: usize,
78    pub has_avx2: bool,
79    pub has_avx512: bool,
80    pub has_neon: bool,
81    pub gpu_available: bool,
82    pub gpu_name: Option<String>,
83    pub gpu_vram_mb: Option<u64>,
84    /// True when the GPU is a software renderer (llvmpipe/lavapipe) - always slower than CPU.
85    pub gpu_is_software: bool,
86    pub total_memory_mb: Option<u64>,
87    pub io_uring_available: bool,
88    /// True when the `simd` feature is compiled in AND Hyperscan initialized.
89    pub hyperscan_available: bool,
90}
91
92static HW_PROBE: OnceLock<HardwareCaps> = OnceLock::new();
93
94/// Probe hardware once and cache the result.
95pub fn probe_hardware() -> &'static HardwareCaps {
96    HW_PROBE.get_or_init(|| {
97        let logical_cores = std::thread::available_parallelism()
98            .map(|n| n.get())
99            .unwrap_or(1);
100        let physical_cores = platform::physical_core_count().unwrap_or(logical_cores);
101
102        #[cfg(target_arch = "x86_64")]
103        let (has_avx2, has_avx512, has_neon) = (
104            std::arch::is_x86_feature_detected!("avx2"),
105            std::arch::is_x86_feature_detected!("avx512f"),
106            false,
107        );
108        #[cfg(target_arch = "aarch64")]
109        let (has_avx2, has_avx512, has_neon) = (false, false, true);
110        #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
111        let (has_avx2, has_avx512, has_neon) = (false, false, false);
112
113        let (gpu_available, gpu_name, gpu_vram_mb) = crate::gpu::gpu_probe();
114
115        let gpu_is_software = gpu_name.as_deref().is_some_and(|name: &str| {
116            let lower = name.to_ascii_lowercase();
117            lower.contains("llvmpipe")
118                || lower.contains("lavapipe")
119                || lower.contains("swiftshader")
120        });
121        if gpu_is_software {
122            tracing::warn!(
123                gpu = ?gpu_name,
124                "Software GPU detected: GPU scanning disabled (slower than CPU)"
125            );
126        }
127
128        let hyperscan_available = cfg!(feature = "simd");
129        let total_memory_mb = platform::detect_total_memory_mb();
130        let io_uring_available = platform::detect_io_uring();
131
132        let caps = HardwareCaps {
133            physical_cores,
134            logical_cores,
135            has_avx2,
136            has_avx512,
137            has_neon,
138            gpu_available,
139            gpu_name: gpu_name.clone(),
140            gpu_vram_mb,
141            gpu_is_software,
142            total_memory_mb,
143            io_uring_available,
144            hyperscan_available,
145        };
146
147        tracing::info!(
148            physical_cores,
149            logical_cores,
150            gpu_available,
151            gpu_name = ?gpu_name,
152            has_avx512 = caps.has_avx512,
153            has_avx2 = caps.has_avx2,
154            has_neon = caps.has_neon,
155            hyperscan = hyperscan_available,
156            io_uring = io_uring_available,
157            "hardware probe complete"
158        );
159
160        caps
161    })
162}