Skip to main content

keyhog_scanner/
hw_probe.rs

1//! Hardware capability probing with once-cached results.
2//!
3//! Detects CPU features (AVX-512, AVX2, NEON), GPU compute (wgpu/Vulkan),
4//! Hyperscan availability, io_uring support, memory, and core counts.
5//! All detection is done once at startup and cached for the process lifetime.
6
7use std::sync::OnceLock;
8
9/// Scan execution backend selected for a given workload.
10#[derive(Debug, Clone, Copy, PartialEq, Eq)]
11#[non_exhaustive]
12pub enum ScanBackend {
13    /// GPU pattern matching via warpstate (for <100 patterns).
14    Gpu,
15    /// Hyperscan NFA multi-pattern matching + SIMD prefilter.
16    /// This is the primary high-throughput path on all platforms.
17    SimdCpu,
18    /// Pure CPU: warpstate AC + regex. No Hyperscan, no GPU.
19    CpuFallback,
20}
21
22impl ScanBackend {
23    /// Stable label for logs and CLI startup banner.
24    #[must_use]
25    pub fn label(self) -> &'static str {
26        match self {
27            Self::Gpu => "gpu-zero-copy",
28            Self::SimdCpu => "simd-regex",
29            Self::CpuFallback => "cpu-fallback",
30        }
31    }
32}
33
34/// Hardware capabilities detected at startup.
35#[derive(Debug, Clone)]
36pub struct HardwareCaps {
37    pub physical_cores: usize,
38    pub logical_cores: usize,
39    pub has_avx2: bool,
40    pub has_avx512: bool,
41    pub has_neon: bool,
42    pub gpu_available: bool,
43    pub gpu_name: Option<String>,
44    pub gpu_vram_mb: Option<u64>,
45    /// True when the GPU is a software renderer (llvmpipe/lavapipe) — always slower than CPU.
46    pub gpu_is_software: bool,
47    pub total_memory_mb: Option<u64>,
48    pub io_uring_available: bool,
49    /// True when the `simd` feature is compiled in AND Hyperscan initialized.
50    pub hyperscan_available: bool,
51}
52
53static HW_PROBE: OnceLock<HardwareCaps> = OnceLock::new();
54
55/// Probe hardware once and cache the result.
56pub fn probe_hardware() -> &'static HardwareCaps {
57    HW_PROBE.get_or_init(|| {
58        let logical_cores = std::thread::available_parallelism()
59            .map(|n| n.get())
60            .unwrap_or(1);
61        let physical_cores = physical_core_count().unwrap_or(logical_cores);
62
63        #[cfg(target_arch = "x86_64")]
64        let (has_avx2, has_avx512, has_neon) = (
65            std::arch::is_x86_feature_detected!("avx2"),
66            std::arch::is_x86_feature_detected!("avx512f"),
67            false,
68        );
69        #[cfg(target_arch = "aarch64")]
70        let (has_avx2, has_avx512, has_neon) = (false, false, true);
71        #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
72        let (has_avx2, has_avx512, has_neon) = (false, false, false);
73
74        #[cfg(feature = "gpu")]
75        let (gpu_available, gpu_name, gpu_vram_mb) = crate::gpu::gpu_probe();
76        #[cfg(not(feature = "gpu"))]
77        let (gpu_available, gpu_name, gpu_vram_mb) = (false, None, None);
78
79        let gpu_is_software = gpu_name.as_deref().map_or(false, |name: &str| {
80            let lower = name.to_ascii_lowercase();
81            lower.contains("llvmpipe") || lower.contains("lavapipe") || lower.contains("swiftshader")
82        });
83        if gpu_is_software {
84            tracing::warn!(
85                gpu = ?gpu_name,
86                "Software GPU detected — GPU scanning disabled (slower than CPU)"
87            );
88        }
89
90        let hyperscan_available = cfg!(feature = "simd");
91        let total_memory_mb = detect_total_memory_mb();
92        let io_uring_available = detect_io_uring();
93
94        let caps = HardwareCaps {
95            physical_cores,
96            logical_cores,
97            has_avx2,
98            has_avx512,
99            has_neon,
100            gpu_available,
101            gpu_name: gpu_name.clone(),
102            gpu_vram_mb,
103            gpu_is_software,
104            total_memory_mb,
105            io_uring_available,
106            hyperscan_available,
107        };
108
109        tracing::info!(
110            physical_cores,
111            logical_cores,
112            gpu_available,
113            gpu_name = ?gpu_name,
114            has_avx512 = caps.has_avx512,
115            has_avx2 = caps.has_avx2,
116            has_neon = caps.has_neon,
117            hyperscan = hyperscan_available,
118            io_uring = io_uring_available,
119            "hardware probe complete"
120        );
121
122        caps
123    })
124}
125
126/// Select the best scan backend for the current hardware.
127///
128/// Priority (highest first):
129///   1. **GPU** — wgpu AC automaton on GPU cores. Pattern count is irrelevant;
130///      the automaton is the same size regardless. With cudagrep (GPUDirect
131///      Storage), data flows NVMe → GPU VRAM via DMA. Fastest path.
132///   2. **Hyperscan/SIMD** — NFA multi-pattern matching at ~500 MB/s on
133///      AVX-512/AVX2/NEON. Primary path for most deployments.
134///   3. **CPU fallback** — warpstate Aho-Corasick + regex. Works everywhere.
135///
136/// The `scan_coalesced` pipeline calls this once per scan. Individual files
137/// are routed through the selected backend automatically.
138#[must_use]
139pub fn select_backend(caps: &HardwareCaps, file_count: u64, pattern_count: usize) -> ScanBackend {
140    // GPU is fastest for batch workloads (many files, many patterns).
141    // Below the threshold, GPU dispatch overhead exceeds the parallelism benefit.
142    // Software GPUs (llvmpipe/lavapipe) are always slower than real CPU scanning.
143    const GPU_MIN_FILES: u64 = 16;
144    const GPU_MIN_PATTERNS: usize = 10;
145
146    if caps.gpu_available
147        && !caps.gpu_is_software
148        && file_count >= GPU_MIN_FILES
149        && pattern_count >= GPU_MIN_PATTERNS
150    {
151        return ScanBackend::Gpu;
152    }
153
154    // Hyperscan is always preferred when available — handles any pattern count.
155    if caps.hyperscan_available {
156        return ScanBackend::SimdCpu;
157    }
158
159    // SIMD prefilter available (AVX-512/AVX2/NEON) but no Hyperscan.
160    if caps.has_avx512 || caps.has_avx2 || caps.has_neon {
161        return ScanBackend::SimdCpu;
162    }
163
164    ScanBackend::CpuFallback
165}
166
167/// Format a one-line startup banner summarizing detected hardware.
168pub fn startup_banner(caps: &HardwareCaps, detector_count: usize, pattern_count: usize) -> String {
169    let gpu = if let Some(name) = &caps.gpu_name {
170        format!("GPU: {name}")
171    } else {
172        "GPU: none".to_string()
173    };
174
175    let simd = if caps.has_avx512 {
176        "AVX-512"
177    } else if caps.has_avx2 {
178        "AVX2"
179    } else if caps.has_neon {
180        "NEON"
181    } else {
182        "scalar"
183    };
184
185    let hs = if caps.hyperscan_available {
186        "Hyperscan"
187    } else {
188        "AC"
189    };
190    let uring = if caps.io_uring_available {
191        " io_uring"
192    } else {
193        ""
194    };
195
196    format!(
197        "{} cores | {} | SIMD: {} | {} | {detector_count} detectors ({pattern_count} patterns){uring}",
198        caps.physical_cores, gpu, simd, hs,
199    )
200}
201
202// ── Platform-specific detection ─────────────────────────────────────
203
204fn physical_core_count() -> Option<usize> {
205    #[cfg(target_os = "linux")]
206    {
207        linux_physical_cores()
208    }
209    #[cfg(target_os = "macos")]
210    {
211        macos_physical_cores()
212    }
213    #[cfg(target_os = "windows")]
214    {
215        windows_physical_cores()
216    }
217    #[cfg(not(any(target_os = "linux", target_os = "macos", target_os = "windows")))]
218    {
219        None
220    }
221}
222
223#[cfg(target_os = "linux")]
224fn linux_physical_cores() -> Option<usize> {
225    let content = std::fs::read_to_string("/proc/cpuinfo").ok()?;
226    let mut pairs = std::collections::HashSet::new();
227    let mut physical_id = None::<usize>;
228    let mut core_id = None::<usize>;
229    for line in content.lines() {
230        if line.starts_with("physical id") {
231            physical_id = line.split(':').nth(1)?.trim().parse().ok();
232        } else if line.starts_with("core id") {
233            core_id = line.split(':').nth(1)?.trim().parse().ok();
234        } else if line.trim().is_empty() {
235            if let (Some(p), Some(c)) = (physical_id, core_id) {
236                pairs.insert((p, c));
237            }
238            physical_id = None;
239            core_id = None;
240        }
241    }
242    if pairs.is_empty() {
243        None
244    } else {
245        Some(pairs.len())
246    }
247}
248
249#[cfg(target_os = "macos")]
250fn macos_physical_cores() -> Option<usize> {
251    std::process::Command::new("sysctl")
252        .args(["-n", "hw.physicalcpu"])
253        .output()
254        .ok()
255        .and_then(|o| String::from_utf8_lossy(&o.stdout).trim().parse().ok())
256}
257
258#[cfg(target_os = "windows")]
259fn windows_physical_cores() -> Option<usize> {
260    // Try PowerShell first (modern), fall back to wmic (legacy).
261    std::process::Command::new("powershell")
262        .args([
263            "-NoProfile",
264            "-Command",
265            "(Get-CimInstance Win32_Processor).NumberOfCores",
266        ])
267        .output()
268        .ok()
269        .and_then(|o| String::from_utf8_lossy(&o.stdout).trim().parse().ok())
270        .or_else(|| {
271            std::process::Command::new("wmic")
272                .args(["cpu", "get", "NumberOfCores", "/value"])
273                .output()
274                .ok()
275                .and_then(|o| {
276                    String::from_utf8_lossy(&o.stdout)
277                        .lines()
278                        .find(|l| l.starts_with("NumberOfCores="))
279                        .and_then(|l| l.split('=').nth(1))
280                        .and_then(|v| v.trim().parse().ok())
281                })
282        })
283}
284
285fn detect_total_memory_mb() -> Option<u64> {
286    #[cfg(target_os = "linux")]
287    {
288        let content = std::fs::read_to_string("/proc/meminfo").ok()?;
289        for line in content.lines() {
290            if line.starts_with("MemTotal:") {
291                let kb: u64 = line.split_whitespace().nth(1)?.parse().ok()?;
292                return Some(kb / 1024);
293            }
294        }
295        None
296    }
297    #[cfg(target_os = "macos")]
298    {
299        std::process::Command::new("sysctl")
300            .args(["-n", "hw.memsize"])
301            .output()
302            .ok()
303            .and_then(|o| {
304                let bytes: u64 = String::from_utf8_lossy(&o.stdout).trim().parse().ok()?;
305                Some(bytes / 1024 / 1024)
306            })
307    }
308    #[cfg(target_os = "windows")]
309    {
310        std::process::Command::new("powershell")
311            .args([
312                "-NoProfile",
313                "-Command",
314                "(Get-CimInstance Win32_ComputerSystem).TotalPhysicalMemory",
315            ])
316            .output()
317            .ok()
318            .and_then(|o| {
319                let bytes: u64 = String::from_utf8_lossy(&o.stdout).trim().parse().ok()?;
320                Some(bytes / 1024 / 1024)
321            })
322            .or_else(|| {
323                std::process::Command::new("wmic")
324                    .args(["computersystem", "get", "TotalPhysicalMemory", "/value"])
325                    .output()
326                    .ok()
327                    .and_then(|o| {
328                        String::from_utf8_lossy(&o.stdout)
329                            .lines()
330                            .find(|l| l.starts_with("TotalPhysicalMemory="))
331                            .and_then(|l| l.split('=').nth(1))
332                            .and_then(|v| v.trim().parse::<u64>().ok())
333                            .map(|bytes| bytes / 1024 / 1024)
334                    })
335            })
336    }
337    #[cfg(not(any(target_os = "linux", target_os = "macos", target_os = "windows")))]
338    {
339        None
340    }
341}
342
343fn detect_io_uring() -> bool {
344    #[cfg(target_os = "linux")]
345    {
346        let kernel_ok = std::fs::read_to_string("/proc/sys/kernel/osrelease")
347            .ok()
348            .and_then(|s| {
349                let parts: Vec<&str> = s.trim().split('.').collect();
350                if parts.len() >= 2 {
351                    let major = parts[0].parse::<u32>().ok()?;
352                    let minor = parts[1].parse::<u32>().ok()?;
353                    Some(major > 5 || (major == 5 && minor >= 1))
354                } else {
355                    None
356                }
357            })
358            .unwrap_or(false);
359        if !kernel_ok {
360            return false;
361        }
362        io_uring::IoUring::new(1).is_ok()
363    }
364    #[cfg(not(target_os = "linux"))]
365    {
366        false
367    }
368}
369
370#[cfg(test)]
371mod tests {
372    use super::*;
373
374    fn caps() -> HardwareCaps {
375        HardwareCaps {
376            physical_cores: 8,
377            logical_cores: 16,
378            has_avx2: false,
379            has_avx512: false,
380            has_neon: false,
381            gpu_available: false,
382            gpu_name: None,
383            gpu_vram_mb: None,
384            gpu_is_software: false,
385            total_memory_mb: Some(32 * 1024),
386            io_uring_available: false,
387            hyperscan_available: false,
388        }
389    }
390
391    #[test]
392    fn gpu_preferred_for_batch_workloads() {
393        let mut hw = caps();
394        hw.gpu_available = true;
395        // GPU is used when file count and pattern count exceed thresholds
396        assert_eq!(select_backend(&hw, 100, 50), ScanBackend::Gpu);
397        assert_eq!(select_backend(&hw, 1000, 1000), ScanBackend::Gpu);
398        // GPU skipped for very small workloads (dispatch overhead dominates)
399        assert_ne!(select_backend(&hw, 1, 50), ScanBackend::Gpu);
400    }
401
402    #[test]
403    fn software_gpu_rejected() {
404        let mut hw = caps();
405        hw.gpu_available = true;
406        hw.gpu_is_software = true;
407        hw.gpu_name = Some("llvmpipe (LLVM 15.0.7, 256 bits)".to_string());
408        assert_ne!(select_backend(&hw, 1000, 1000), ScanBackend::Gpu);
409    }
410
411    #[test]
412    fn simd_when_no_hyperscan() {
413        let mut hw = caps();
414        hw.has_avx2 = true;
415        assert_eq!(select_backend(&hw, 0, 10), ScanBackend::SimdCpu);
416    }
417
418    #[test]
419    fn fallback_when_nothing_available() {
420        assert_eq!(select_backend(&caps(), 0, 10), ScanBackend::CpuFallback);
421    }
422
423    #[test]
424    fn startup_banner_format() {
425        let mut hw = caps();
426        hw.has_avx2 = true;
427        hw.hyperscan_available = true;
428        hw.io_uring_available = true;
429        let banner = startup_banner(&hw, 896, 1509);
430        assert!(banner.contains("AVX2"));
431        assert!(banner.contains("Hyperscan"));
432        assert!(banner.contains("io_uring"));
433        assert!(banner.contains("896 detectors"));
434    }
435
436    #[test]
437    fn windows_powershell_fallback() {
438        // Just verify the function compiles and doesn't panic
439        #[cfg(target_os = "windows")]
440        {
441            let _ = windows_physical_cores();
442        }
443    }
444}