Skip to main content

keyhog_scanner/hw_probe/
mod.rs

1//! Hardware capability probing with once-cached results.
2//!
3//! Detects CPU features (AVX-512, AVX2, NEON), GPU compute (wgpu/Vulkan),
4//! Hyperscan availability, io_uring support, memory, and core counts.
5//! All detection is done once at startup and cached for the process
6//! lifetime.
7//!
8//! Split into focused submodules so no single file exceeds the
9//! 500-line cap:
10//!
11//!   * [`thresholds`] - GPU routing crossover constants (pub, also
12//!     consumed by tests and the `keyhog backend` debug subcommand).
13//!   * [`tier`] - [`GpuTier`] enum + `classify_gpu_tier` substring
14//!     heuristics + tier→threshold lookups.
15//!   * [`select`] - [`select_backend`] routing logic + env-override
16//!     parsing.
17//!   * [`banner`] - `startup_banner` formatter for the CLI header.
18//!   * [`platform`] - per-OS detection of physical cores, memory,
19//!     and io_uring availability.
20
21use std::sync::OnceLock;
22
23mod banner;
24mod platform;
25mod select;
26mod tier;
27
28pub mod thresholds;
29
30pub use banner::startup_banner;
31pub use select::{
32    clear_test_backend_override, forced_backend_from_env, forced_backend_from_env_uncached,
33    gpu_could_engage, parse_backend_str, select_backend, select_backend_for_batch,
34    set_test_backend_override,
35};
36pub use tier::{
37    classify_gpu_tier, gpu_min_bytes_for_tier, gpu_pattern_breakeven_for_tier,
38    gpu_solo_bytes_for_tier, GpuTier,
39};
40
41/// Scan execution backend selected for a given workload.
42#[derive(Debug, Clone, Copy, PartialEq, Eq)]
43#[non_exhaustive]
44pub enum ScanBackend {
45    /// GPU pattern matching via vyre's literal-set engine
46    /// (`GpuLiteralSet`). The default GPU path; <~1500 patterns,
47    /// literal-prefix matching only.
48    Gpu,
49    /// GPU regex multimatch via vyre's `RulePipeline` mega-scan
50    /// pipeline (NFA-based). Activated by `KEYHOG_BACKEND=mega-scan`;
51    /// the regex-completion path that handles patterns
52    /// `GpuLiteralSet`'s literal prefix can't reduce to a literal.
53    MegaScan,
54    /// Hyperscan NFA multi-pattern matching + SIMD prefilter.
55    /// This is the primary high-throughput path on all platforms.
56    SimdCpu,
57    /// Pure CPU: vyre AC + regex. No Hyperscan, no GPU.
58    CpuFallback,
59}
60
61impl ScanBackend {
62    /// Stable label for logs and CLI startup banner.
63    #[must_use]
64    pub fn label(self) -> &'static str {
65        match self {
66            Self::Gpu => "gpu-zero-copy",
67            Self::MegaScan => "gpu-mega-scan",
68            Self::SimdCpu => "simd-regex",
69            Self::CpuFallback => "cpu-fallback",
70        }
71    }
72}
73
74/// Hardware capabilities detected at startup.
75#[derive(Debug, Clone)]
76pub struct HardwareCaps {
77    pub physical_cores: usize,
78    pub logical_cores: usize,
79    pub has_avx2: bool,
80    pub has_avx512: bool,
81    pub has_neon: bool,
82    pub gpu_available: bool,
83    pub gpu_name: Option<String>,
84    pub gpu_vram_mb: Option<u64>,
85    /// True when the GPU is a software renderer (llvmpipe/lavapipe) - always slower than CPU.
86    pub gpu_is_software: bool,
87    pub total_memory_mb: Option<u64>,
88    pub io_uring_available: bool,
89    /// True when the `simd` feature is compiled in AND Hyperscan initialized.
90    pub hyperscan_available: bool,
91}
92
93static HW_PROBE: OnceLock<HardwareCaps> = OnceLock::new();
94
95/// Probe hardware once and cache the result.
96pub fn probe_hardware() -> &'static HardwareCaps {
97    HW_PROBE.get_or_init(|| {
98        let logical_cores = std::thread::available_parallelism()
99            .map(|n| n.get())
100            .unwrap_or(1);
101        let physical_cores = platform::physical_core_count().unwrap_or(logical_cores);
102
103        #[cfg(target_arch = "x86_64")]
104        let (has_avx2, has_avx512, has_neon) = (
105            std::arch::is_x86_feature_detected!("avx2"),
106            std::arch::is_x86_feature_detected!("avx512f"),
107            false,
108        );
109        #[cfg(target_arch = "aarch64")]
110        let (has_avx2, has_avx512, has_neon) = (false, false, true);
111        #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
112        let (has_avx2, has_avx512, has_neon) = (false, false, false);
113
114        let (gpu_available, gpu_name, gpu_vram_mb) = crate::gpu::gpu_probe();
115
116        let gpu_is_software = gpu_name.as_deref().is_some_and(|name: &str| {
117            let lower = name.to_ascii_lowercase();
118            lower.contains("llvmpipe")
119                || lower.contains("lavapipe")
120                || lower.contains("swiftshader")
121        });
122        if gpu_is_software {
123            tracing::warn!(
124                gpu = ?gpu_name,
125                "Software GPU detected: GPU scanning disabled (slower than CPU)"
126            );
127        }
128
129        let hyperscan_available = cfg!(feature = "simd");
130        let total_memory_mb = platform::detect_total_memory_mb();
131        let io_uring_available = platform::detect_io_uring();
132
133        let caps = HardwareCaps {
134            physical_cores,
135            logical_cores,
136            has_avx2,
137            has_avx512,
138            has_neon,
139            gpu_available,
140            gpu_name: gpu_name.clone(),
141            gpu_vram_mb,
142            gpu_is_software,
143            total_memory_mb,
144            io_uring_available,
145            hyperscan_available,
146        };
147
148        tracing::info!(
149            physical_cores,
150            logical_cores,
151            gpu_available,
152            gpu_name = ?gpu_name,
153            has_avx512 = caps.has_avx512,
154            has_avx2 = caps.has_avx2,
155            has_neon = caps.has_neon,
156            hyperscan = hyperscan_available,
157            io_uring = io_uring_available,
158            "hardware probe complete"
159        );
160
161        caps
162    })
163}