Skip to main content

keyhog_scanner/engine/
compile.rs

1use super::*;
2
3impl CompiledScanner {
4    pub fn compile(detectors: Vec<DetectorSpec>) -> Result<Self> {
5        let mut state = build_compile_state(&detectors)?;
6        let ac = build_ac_pattern_set(&state.ac_literals)?;
7        // GPU is unconditional in the build; runtime probe decides whether to
8        // actually use it. `gpu_available` is set by hw_probe based on adapter
9        // detection (excluding software renderers like llvmpipe/lavapipe).
10        // Resolve the active GPU backend with the cascade
11        //     CUDA (when `cuda` feature on + libcuda.so loadable)
12        //     → wgpu (any-vendor cross-platform fallback)
13        //     → None (auto-routes to SIMD/CPU).
14        // CUDA bypasses the wgpu validation layers + naga IR + WGSL
15        // text + driver shader compile; the path through CUDA driver
16        // API + PTX is empirically 5-10× faster on NVIDIA hardware
17        // and is the headline path. CUDA acquisition is opaque to
18        // failures: if libcuda.so is missing or the driver refuses,
19        // `acquire()` returns Err and we fall through to wgpu so
20        // nothing regresses on non-CUDA hosts.
21        // `crate::gpu::env_no_gpu()` is the single source of truth for
22        // "skip every GPU init path". Explicit KEYHOG_NO_GPU wins both
23        // directions; in its absence the helper auto-detects CI runners
24        // (CI=true + a dozen platform-specific markers) and returns
25        // true, since CI runners have no discrete GPU - the wgpu probe
26        // would enumerate llvmpipe, get rejected as software, and the
27        // operator would see a confusing "GPU MoE init failed" warning
28        // after burning ~250ms on cold-start. Set KEYHOG_NO_GPU=0 in CI
29        // to opt back in on self-hosted GPU runners.
30        let gpu_disabled = crate::gpu::env_no_gpu();
31        if gpu_disabled {
32            let in_ci = crate::gpu::is_ci_environment() && std::env::var("KEYHOG_NO_GPU").is_err();
33            if in_ci {
34                tracing::info!(
35                    target: "keyhog::routing",
36                    "CI environment detected (CI= or platform-specific marker set); bypassing CUDA/wgpu init. \
37                     Set KEYHOG_NO_GPU=0 to force GPU on self-hosted GPU runners."
38                );
39            } else {
40                tracing::info!(
41                    target: "keyhog::routing",
42                    "KEYHOG_NO_GPU set: bypassing CUDA/wgpu init, routing every chunk through the CPU/SIMD path"
43                );
44            }
45        }
46        let (gpu_literals, gpu_backend, wgpu_backend) =
47            if !gpu_disabled && crate::hw_probe::probe_hardware().gpu_available {
48                let literals = build_gpu_literals(&state.ac_literals);
49                let cuda_backend: Option<Arc<dyn vyre::VyreBackend>> = {
50                    #[cfg(target_os = "linux")]
51                    {
52                        match vyre_driver_cuda::cuda_factory() {
53                            Ok(boxed) => {
54                                tracing::info!(
55                                    target: "keyhog::routing",
56                                    "CUDA backend acquired, bypassing wgpu/naga/WGSL path"
57                                );
58                                Some(Arc::from(boxed))
59                            }
60                            Err(error) => {
61                                surface_cuda_acquisition_failure(&error);
62                                None
63                            }
64                        }
65                    }
66                    #[cfg(not(target_os = "linux"))]
67                    {
68                        None
69                    }
70                };
71                match cuda_backend {
72                    Some(cuda) => (literals, Some(cuda), None),
73                    None => match vyre_driver_wgpu::WgpuBackend::shared() {
74                        Ok(wgpu) => {
75                            let trait_obj: Arc<dyn vyre::VyreBackend> = wgpu.clone();
76                            (literals, Some(trait_obj), Some(wgpu))
77                        }
78                        Err(error) => {
79                            tracing::warn!(
80                                target: "keyhog::routing",
81                                %error,
82                                "wgpu backend unavailable; scan will use CPU-only path"
83                            );
84                            (literals, None, None)
85                        }
86                    },
87                }
88            } else {
89                (None, None, None)
90            };
91        let prefix_propagation = build_prefix_propagation(&state.ac_literals);
92        let same_prefix_patterns = build_same_prefix_patterns(&state.ac_literals);
93
94        // Build the Hyperscan scanner BEFORE the keyword fallback so we
95        // learn which ac_map patterns Hyperscan rejected (over-long, or an
96        // unsupported construct like a large `{100,200}` bounded repeat).
97        // A rejected pattern produces zero HS matches, and because it took
98        // the literal-prefix (ac_map) branch in build_compile_state it is
99        // NOT in the keyword fallback either - so it is silently dead under
100        // the HS backend (the default on Linux/CI). Reroute each one into
101        // the keyword fallback, gated by its detector's keywords, so it
102        // fires via the backend-independent regex sweep. Closes the
103        // contracts_runner recall hole on line/paloalto/tower/keystonejs/
104        // snowflake/bandwidth and the matching adversarial-wrapper misses.
105        #[cfg(feature = "simd")]
106        let (simd_prefilter, hs_index_map) =
107            match super::build_simd_scanner(&state.ac_map, &state.fallback) {
108                Some((scanner, index_map, unsupported_ac)) => {
109                    for ac_idx in unsupported_ac {
110                        let pattern = state.ac_map[ac_idx].clone();
111                        let keywords = detectors[pattern.detector_index].keywords.clone();
112                        state.fallback.push((pattern, keywords));
113                    }
114                    (Some(scanner), index_map)
115                }
116                None => (None, Vec::new()),
117            };
118
119        let (fallback_keyword_ac, fallback_keyword_to_patterns) =
120            build_fallback_keyword_ac(&state.fallback);
121        // Precompute the per-pattern "always-active" bitmap so the per-chunk
122        // hot path avoids walking every pattern's keyword list. See the
123        // doc comment on the field for rationale.
124        let fallback_always_active: Vec<bool> = state
125            .fallback
126            .iter()
127            // Mirrors `compiler::build_fallback_keyword_ac`'s
128            // 4-char floor - see the rationale comment there. The
129            // experimental 3-char floor measured a net F1 regression
130            // on SecretBench-medium, so both checks stay at 4.
131            .map(|(_, keywords)| !keywords.iter().any(|k| k.len() >= 4))
132            .collect();
133
134        log_quality_warnings(&state.quality_warnings);
135
136        let mut alphabet_targets = state.ac_literals.clone();
137        for (_, keywords) in &state.fallback {
138            alphabet_targets.extend(keywords.clone());
139        }
140        let alphabet_screen = if alphabet_targets.is_empty() {
141            None
142        } else {
143            Some(crate::alphabet_filter::AlphabetScreen::new(
144                &alphabet_targets,
145            ))
146        };
147
148        let bigram_bloom =
149            crate::bigram_bloom::BigramBloom::from_literal_prefixes(&alphabet_targets);
150        tracing::debug!(
151            popcount = bigram_bloom.popcount(),
152            "bigram bloom built (4096 bits, lower popcount = stronger filter)"
153        );
154
155        // Pre-intern detector metadata strings into a CHD perfect
156        // hash so per-scan `intern_metadata` calls hand out shared
157        // `Arc<str>` without touching the global allocator. Built
158        // once per scanner; lock-free on read.
159        let static_intern_strings: Vec<&str> = detectors
160            .iter()
161            .flat_map(|d| [d.id.as_str(), d.name.as_str(), d.service.as_str()].into_iter())
162            .collect();
163        let static_intern = Arc::new(crate::static_intern::StaticInterner::from_detector_strings(
164            static_intern_strings,
165        ));
166
167        // Precise-regex validators for the simdsieve hot fast-path. Built here
168        // (before `detectors` is moved into the struct) so the fast path can
169        // reject literal-prefix candidates the detector's own regex would not
170        // match - see `build_hot_pattern_validators`.
171        #[cfg(feature = "simdsieve")]
172        let hot_pattern_validators =
173            crate::simdsieve_prefilter::build_hot_pattern_validators(&detectors);
174
175        Ok(Self {
176            ac,
177            gpu_backend,
178            wgpu_backend,
179            gpu_literals,
180            gpu_matcher: OnceLock::new(),
181            gpu_const_packs: OnceLock::new(),
182            gpu_ac_const_packs: OnceLock::new(),
183            ac_gpu_program: OnceLock::new(),
184
185            rule_pipeline: OnceLock::new(),
186            fused_program: OnceLock::new(),
187            fused_decode_programs: OnceLock::new(),
188            static_intern,
189            ac_map: state.ac_map,
190            prefix_propagation,
191            fallback: state.fallback,
192            companions: state.companions,
193            detectors,
194            same_prefix_patterns,
195            fallback_keyword_ac,
196            fallback_keyword_to_patterns,
197            fallback_always_active,
198            #[cfg(feature = "simd")]
199            simd_prefilter,
200            #[cfg(feature = "simd")]
201            hs_index_map,
202            #[cfg(feature = "simdsieve")]
203            hot_pattern_validators,
204            config: ScannerConfig::default(),
205            alphabet_screen,
206            bigram_bloom,
207            fragment_cache: crate::fragment_cache::FragmentCache::new(1000),
208        })
209    }
210
211    /// Apply a custom configuration to the compiled scanner.
212    pub fn with_config(mut self, config: ScannerConfig) -> Self {
213        self.config = config;
214        self
215    }
216}
217
218/// One-shot guard so the CUDA-acquisition-failed warning fires
219/// exactly once per process, not on every recompile. The CUDA factory
220/// is called inside `compile()` and a binary that re-compiles a
221/// scanner per-job (daemon mode, watch mode) would otherwise spam.
222#[cfg(target_os = "linux")]
223static CUDA_FALLBACK_WARNED: std::sync::OnceLock<()> = std::sync::OnceLock::new();
224
225/// Surface a CUDA-backend acquisition failure when the host looks
226/// like it should have a working CUDA stack. We don't want to warn
227/// on plain non-NVIDIA Linux (the wgpu fall-through is the right
228/// path); we DO want to warn when the user is on an NVIDIA box with
229/// libcuda.so or /proc/driver/nvidia present, because in that case
230/// they paid for the CUDA stack and we just dropped them onto the
231/// 5-10x slower wgpu path silently. KEYHOG_REQUIRE_GPU=1 turns the
232/// warning into a hard exit, matching the contract used by the MoE
233/// init and the scan dispatch paths.
234#[cfg(target_os = "linux")]
235fn surface_cuda_acquisition_failure(error: &dyn std::fmt::Display) {
236    let on_nvidia_host = nvidia_userland_present();
237    let require_gpu = std::env::var("KEYHOG_REQUIRE_GPU").as_deref() == Ok("1");
238    let no_gpu = std::env::var("KEYHOG_NO_GPU").as_deref() == Ok("1");
239
240    if require_gpu && on_nvidia_host {
241        eprintln!(
242            "keyhog: KEYHOG_REQUIRE_GPU=1 but CUDA backend acquisition failed on \
243an NVIDIA host: {error}. Refusing to fall back to WGPU."
244        );
245        std::process::exit(2);
246    }
247
248    if no_gpu {
249        return;
250    }
251
252    if on_nvidia_host && CUDA_FALLBACK_WARNED.set(()).is_ok() {
253        eprintln!(
254            "keyhog: CUDA backend unavailable on this NVIDIA host ({error}); \
255falling back to WGPU (typically 5-10x slower than CUDA on the same hardware). \
256This is usually a libcuda.so version mismatch or a driver upgrade pending a \
257reboot. Set KEYHOG_NO_GPU=1 to silence this warning, or KEYHOG_REQUIRE_GPU=1 \
258to hard-fail next time."
259        );
260    }
261    tracing::warn!("CUDA backend unavailable, falling back to wgpu: {error}");
262}
263
264/// Check the common libcuda.so locations + /proc/driver/nvidia to
265/// decide whether this host appears to have an NVIDIA CUDA userland
266/// installed. Mirrors the probes install.sh uses so the runtime view
267/// matches the install-time view.
268#[cfg(target_os = "linux")]
269fn nvidia_userland_present() -> bool {
270    if std::path::Path::new("/proc/driver/nvidia").exists() {
271        return true;
272    }
273    for p in [
274        "/usr/lib/x86_64-linux-gnu/libcuda.so",
275        "/usr/lib/x86_64-linux-gnu/libcuda.so.1",
276        "/usr/lib64/libcuda.so",
277        "/usr/lib64/libcuda.so.1",
278        "/usr/local/cuda/lib64/libcuda.so",
279        "/opt/cuda/lib64/libcuda.so",
280    ] {
281        if std::path::Path::new(p).exists() {
282            return true;
283        }
284    }
285    false
286}