keyhog_scanner/engine/gpu_decode_scan.rs
1//! Fused GPU decode→scan: base64 and hex decode + Aho-Corasick match in a
2//! single GPU dispatch.
3//!
4//! # Motivation
5//!
6//! keyhog's CPU decode pipeline (`decode/pipeline.rs`) extracts base64/hex
7//! blobs, decodes them on the CPU, and re-scans the decoded output through
8//! the GPU literal-set engine. This creates a full CPU→GPU round-trip per
9//! encoded chunk. Vyre's fused decode builders compose decode + AC-scan
10//! into a single `vyre::Program` where decoded bytes never leave VRAM:
11//!
12//! ```text
13//! encoded bytes (host)
14//! ↓ upload once
15//! ↓ base64_decode_then_aho_corasick (one GPU dispatch)
16//! ↓ readback match triples only
17//! host match offsets
18//! ```
19//!
20//! Eliminates ~4 GiB of throwaway allocations on a 1 GiB scan with
21//! 512 × 2 MiB shards.
22//!
23//! # Architecture
24//!
25//! The fused programs are built at scanner compile time alongside the
26//! `GpuLiteralSet`. They share the same DFA transition/accept tables
27//! (from the literal-set AC automaton) but prepend a decode stage
28//! that transforms the encoded input in-place before the AC walk.
29//!
30//! Two encoding variants are supported:
31//! - **Base64** via `vyre_libs::decode::base64_decode_then_aho_corasick`
32//! - **Hex** via `vyre_libs::decode::hex_decode_then_aho_corasick`
33//!
34//! # Fallback
35//!
36//! If GPU dispatch fails (no backend, device lost, program compilation
37//! error), the caller falls back to the existing CPU decode pipeline.
38//! This module never panics on GPU failure.
39
40/// Supported encoding types for fused GPU decode→scan.
41#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
42pub enum FusedEncoding {
43 /// Standard base64 (RFC 4648 §4).
44 Base64,
45 /// Lowercase/uppercase hex (case-insensitive).
46 Hex,
47}
48
49impl FusedEncoding {
50 /// Human-readable label for logging.
51 #[must_use]
52 pub fn label(self) -> &'static str {
53 match self {
54 Self::Base64 => "base64",
55 Self::Hex => "hex",
56 }
57 }
58}
59
60/// Compiled fused decode+scan programs, lazily built and cached.
61///
62/// Holds the vyre `Program` objects for base64-then-AC and hex-then-AC.
63/// These programs share the same DFA tables as the literal-set AC engine
64/// but prepend an on-GPU decode stage.
65pub struct FusedDecodeScanPrograms {
66 /// Fused base64 decode + AC scan program. `None` if the DFA tables
67 /// are not available (no patterns compiled).
68 pub base64_program: Option<vyre::Program>,
69 /// Fused hex decode + AC scan program.
70 pub hex_program: Option<vyre::Program>,
71 /// Number of DFA states in the shared AC automaton.
72 pub state_count: u32,
73}
74
75/// Build fused decode→scan programs from the same DFA tables the
76/// `GpuLiteralSet` uses.
77///
78/// # Arguments
79///
80/// * `transitions` - Flattened `state_count × 256` DFA transition table
81/// * `accept` - Per-state accept/output array
82/// * `state_count` - Number of DFA states
83/// * `input_len` - Maximum input buffer length (bytes)
84///
85/// # Returns
86///
87/// `FusedDecodeScanPrograms` with both base64 and hex fused programs.
88/// If construction fails for either, that field is `None`.
89pub fn build_fused_programs(state_count: u32, input_len: u32) -> FusedDecodeScanPrograms {
90 // Buffer names follow vyre convention for interop with existing
91 // dispatch infrastructure.
92 let base64_program = std::panic::catch_unwind(|| {
93 vyre_libs::decode::base64_decode_then_aho_corasick(
94 "haystack",
95 "decoded",
96 "transitions",
97 "accept",
98 "matches",
99 input_len,
100 state_count,
101 )
102 })
103 .ok();
104
105 let hex_program = std::panic::catch_unwind(|| {
106 vyre_libs::decode::hex_decode_then_aho_corasick(
107 "haystack",
108 "decoded",
109 "transitions",
110 "accept",
111 "matches",
112 input_len,
113 state_count,
114 )
115 })
116 .ok();
117
118 if base64_program.is_none() {
119 tracing::debug!(
120 target: "keyhog::gpu",
121 "fused base64 decode+scan program build failed - will use CPU decode path"
122 );
123 }
124 if hex_program.is_none() {
125 tracing::debug!(
126 target: "keyhog::gpu",
127 "fused hex decode+scan program build failed - will use CPU decode path"
128 );
129 }
130
131 FusedDecodeScanPrograms {
132 base64_program,
133 hex_program,
134 state_count,
135 }
136}
137
138impl FusedDecodeScanPrograms {
139 /// Get the fused program for the given encoding, if available.
140 #[must_use]
141 pub fn program_for(&self, encoding: FusedEncoding) -> Option<&vyre::Program> {
142 match encoding {
143 FusedEncoding::Base64 => self.base64_program.as_ref(),
144 FusedEncoding::Hex => self.hex_program.as_ref(),
145 }
146 }
147
148 /// Returns `true` if at least one fused program was built successfully.
149 #[must_use]
150 pub fn any_available(&self) -> bool {
151 self.base64_program.is_some() || self.hex_program.is_some()
152 }
153}
154
155/// Detect likely encoding of a byte slice.
156///
157/// Returns `Some(FusedEncoding::Base64)` if the input looks like base64,
158/// `Some(FusedEncoding::Hex)` if it looks like hex, or `None` if neither.
159/// Uses fast heuristics (character frequency, length modular checks).
160#[must_use]
161pub fn detect_encoding(data: &[u8]) -> Option<FusedEncoding> {
162 if data.is_empty() {
163 return None;
164 }
165
166 // Quick length checks.
167 let len = data.len();
168
169 // Count character classes for classification.
170 let mut hex_chars = 0usize;
171 let mut b64_chars = 0usize;
172 let mut other = 0usize;
173
174 // Sample up to 256 bytes for speed on large inputs.
175 let sample = &data[..len.min(256)];
176 for &b in sample {
177 match b {
178 b'0'..=b'9' => {
179 hex_chars += 1;
180 b64_chars += 1;
181 }
182 b'a'..=b'f' | b'A'..=b'F' => {
183 hex_chars += 1;
184 b64_chars += 1;
185 }
186 b'g'..=b'z' | b'G'..=b'Z' => {
187 b64_chars += 1;
188 }
189 b'+' | b'/' | b'=' => {
190 b64_chars += 1;
191 }
192 b'\n' | b'\r' | b' ' | b'\t' => {
193 // Whitespace is neutral.
194 }
195 _ => {
196 other += 1;
197 }
198 }
199 }
200
201 // If >20% is non-alphanumeric non-whitespace, it's not encoded.
202 if other * 5 > sample.len() {
203 return None;
204 }
205
206 // Pure hex: all chars are 0-9a-fA-F and length is even.
207 if hex_chars == b64_chars && hex_chars > 0 && len % 2 == 0 {
208 return Some(FusedEncoding::Hex);
209 }
210
211 // Base64: includes chars outside hex range, length is multiple of 4
212 // or has padding.
213 if b64_chars > hex_chars && (len % 4 == 0 || data.ends_with(b"=")) {
214 return Some(FusedEncoding::Base64);
215 }
216
217 // Default to base64 if it has any base64-only chars.
218 if b64_chars > hex_chars {
219 return Some(FusedEncoding::Base64);
220 }
221
222 None
223}