1#[cfg(feature = "gpu")]
27#[path = "gpu_shader.rs"]
28mod gpu_shader;
29
30#[cfg(feature = "gpu")]
31#[path = "gpu_moe_backend.rs"]
32mod backend;
33
34#[path = "gpu_env.rs"]
35mod env;
36pub use env::*;
37
38pub fn batch_ml_inference(
59 candidates: &[(&str, &str)],
60 config: &crate::types::ScannerConfig,
61) -> Vec<f64> {
62 if candidates.is_empty() {
63 return Vec::new();
64 }
65
66 #[cfg(feature = "ml")]
67 {
68 use rayon::prelude::*;
69 let features: Vec<[f32; crate::ml_scorer::NUM_FEATURES]> = candidates
72 .par_iter()
73 .map(|(text, ctx)| {
74 if text.is_empty() {
75 [0.0; crate::ml_scorer::NUM_FEATURES]
76 } else {
77 crate::ml_scorer::compute_features_with_config(
78 text,
79 ctx,
80 &config.known_prefixes,
81 &config.secret_keywords,
82 &config.test_keywords,
83 &config.placeholder_keywords,
84 )
85 }
86 })
87 .collect();
88
89 #[cfg(feature = "gpu")]
90 if let Some(mut scores) = backend::batch_score_features(&features) {
91 for ((text, _ctx), score) in candidates.iter().zip(scores.iter_mut()) {
92 if text.is_empty() {
93 *score = 0.0;
94 }
95 }
96 return scores;
97 }
98
99 candidates
100 .par_iter()
101 .zip(features.par_iter())
102 .map(|((text, _ctx), features)| {
103 if text.is_empty() {
104 0.0
105 } else {
106 crate::ml_scorer::score_features(features)
107 }
108 })
109 .collect()
110 }
111
112 #[cfg(not(feature = "ml"))]
113 {
114 let _ = candidates;
115 let _ = config;
116 Vec::new()
117 }
118}
119
120pub fn gpu_available() -> bool {
130 #[cfg(feature = "gpu")]
131 {
132 backend::get_gpu().is_some()
133 }
134 #[cfg(not(feature = "gpu"))]
135 {
136 false
137 }
138}
139
140#[derive(Debug, Clone, PartialEq, Eq)]
142pub struct GpuSelfTest {
143 pub adapter_name: String,
145 pub vram_mb: Option<u64>,
147 pub scores: usize,
149}
150
151#[derive(Debug, Clone, PartialEq, Eq)]
153pub struct VyreGpuSelfTest {
154 pub direct_matches: usize,
156 pub coalesced_matches: usize,
158}
159
160#[cfg(feature = "gpu")]
161static GPU_SELF_TEST_CACHE: std::sync::OnceLock<std::result::Result<GpuSelfTest, String>> =
162 std::sync::OnceLock::new();
163
164pub fn gpu_self_test() -> Result<GpuSelfTest, String> {
170 #[cfg(not(feature = "gpu"))]
171 {
172 return Err(
173 "GPU support not compiled in (lean ci build). Rebuild with `--features gpu` \
174 (or the default profile) to exercise the wgpu/CUDA path."
175 .to_string(),
176 );
177 }
178 #[cfg(feature = "gpu")]
179 GPU_SELF_TEST_CACHE
180 .get_or_init(|| {
181 const SELF_TEST_BATCH: usize = 64;
182
183 let gpu = backend::get_gpu().ok_or_else(|| {
184 "GPU adapter unavailable; install or enable a non-software GPU adapter and driver"
185 .to_string()
186 })?;
187
188 let features = [[0.0_f32; crate::ml_scorer::NUM_FEATURES]; SELF_TEST_BATCH];
189 let scores = backend::batch_score_features(&features)
190 .ok_or_else(|| "GPU dispatch produced no result".to_string())?;
191
192 if scores.len() != SELF_TEST_BATCH {
193 return Err(format!(
194 "GPU dispatch returned {} scores for {SELF_TEST_BATCH} inputs",
195 scores.len()
196 ));
197 }
198
199 if let Some((index, score)) = scores
200 .iter()
201 .enumerate()
202 .find(|(_, score)| !score.is_finite() || !(0.0..=1.0).contains(*score))
203 {
204 return Err(format!(
205 "GPU dispatch returned invalid score {score} at index {index}"
206 ));
207 }
208
209 Ok(GpuSelfTest {
210 adapter_name: gpu.gpu_name().to_string(),
211 vram_mb: gpu.vram_mb(),
212 scores: scores.len(),
213 })
214 })
215 .clone()
216}
217
218#[cfg(not(feature = "gpu"))]
225pub fn vyre_gpu_self_test() -> Result<VyreGpuSelfTest, String> {
226 Err(
227 "vyre GPU self-test not available in the lean ci build (no wgpu driver compiled in). \
228 Rebuild with `--features gpu`."
229 .to_string(),
230 )
231}
232
233#[cfg(feature = "gpu")]
234pub fn vyre_gpu_self_test() -> Result<VyreGpuSelfTest, String> {
235 use vyre_driver_wgpu::WgpuBackend;
236 use vyre_libs::scan::GpuLiteralSet;
237
238 let patterns: Vec<Vec<u8>> = vec![b"needle".to_vec()];
239 let pattern_refs: Vec<&[u8]> = patterns.iter().map(Vec::as_slice).collect();
240
241 let backend = WgpuBackend::shared().map_err(|e| format!("failed to init wgpu backend: {e}"))?;
242 let scanner = GpuLiteralSet::compile(&pattern_refs);
243
244 let direct = scanner
245 .scan(backend.as_ref(), b"needle", 100)
246 .map_err(|error| format!("vyre direct GPU scan failed: {error}"))?;
247 if direct.len() != 1 || direct[0].pattern_id != 0 || direct[0].start != 0 {
248 return Err(format!(
249 "vyre direct GPU scan returned unexpected matches: {direct:?}"
250 ));
251 }
252
253 let items: Vec<Vec<u8>> = (0..100)
255 .map(|index| format!("id-{index:03}-needle").into_bytes())
256 .collect();
257 let mut buffer = Vec::with_capacity(items.iter().map(Vec::len).sum());
258 for item in &items {
259 buffer.extend_from_slice(item);
260 }
261
262 let coalesced = scanner
263 .scan(backend.as_ref(), &buffer, 10_000)
264 .map_err(|error| format!("vyre coalesced GPU scan failed: {error}"))?;
265
266 Ok(VyreGpuSelfTest {
267 direct_matches: direct.len(),
268 coalesced_matches: coalesced.len(),
269 })
270}
271
272pub struct VyreAcKernelSelfTest {
276 pub matches: usize,
278 pub backend_id: &'static str,
283}
284
285#[cfg(not(feature = "gpu"))]
300pub fn vyre_ac_kernel_self_test() -> Result<VyreAcKernelSelfTest, String> {
301 Err(
302 "vyre AC-kernel self-test not available in the lean ci build. \
303 Rebuild with `--features gpu` to exercise the GPU AC phase-1 path."
304 .to_string(),
305 )
306}
307
308#[cfg(feature = "gpu")]
309pub fn vyre_ac_kernel_self_test() -> Result<VyreAcKernelSelfTest, String> {
310 use crate::engine::{CompiledScanner, GpuPhase1Output};
311 use keyhog_core::{Chunk, ChunkMetadata, DetectorSpec, PatternSpec, Severity};
312
313 let detector = DetectorSpec {
314 tests: Vec::new(),
315 id: "kh-gpu-self-test".into(),
316 name: "GPU self-test".into(),
317 service: "test".into(),
318 severity: Severity::Low,
319 patterns: vec![PatternSpec {
320 regex: "needle".into(),
321 description: None,
322 group: None,
323 client_safe: false,
324 }],
325 keywords: vec!["needle".into()],
326 min_confidence: None,
327 ..Default::default()
328 };
329
330 let scanner = CompiledScanner::compile(vec![detector])
331 .map_err(|e| format!("CompiledScanner::compile failed during self-test: {e}"))?;
332
333 let backend_id = scanner
334 .gpu_backend_label()
335 .ok_or_else(|| "no GPU backend acquired during self-test compile".to_string())?;
336
337 let chunk = Chunk {
338 data: "the quick brown needle jumps over the lazy fox".into(),
339 metadata: ChunkMetadata::default(),
340 };
341
342 match scanner.scan_coalesced_gpu_ac_phase1(&[chunk]) {
343 GpuPhase1Output::Hits(hits) => {
344 let total: usize = hits.iter().map(Vec::len).sum();
345 if total == 0 {
346 return Err(
347 "AC kernel ran on GPU but reported zero hits for the planted 'needle' \
348literal. Indicates either a phase-1 lowering regression or a workgroup-size mismatch."
349 .to_string(),
350 );
351 }
352 Ok(VyreAcKernelSelfTest {
353 matches: total,
354 backend_id,
355 })
356 }
357 GpuPhase1Output::Done(_) => {
358 let detail = scanner
359 .last_gpu_degrade_reason()
360 .unwrap_or_else(|| "no concrete degrade reason was recorded".to_string());
361 Err(format!(
362 "AC phase 1 degraded to SIMD/CPU at runtime despite an acquired GPU stack: {detail}"
363 ))
364 }
365 }
366}