Skip to main content

keyhog_scanner/engine/
gpu_decode_scan.rs

1//! Fused GPU decode→scan: base64 and hex decode + Aho-Corasick match in a
2//! single GPU dispatch.
3//!
4//! # Motivation
5//!
6//! keyhog's CPU decode pipeline (`decode/pipeline.rs`) extracts base64/hex
7//! blobs, decodes them on the CPU, and re-scans the decoded output through
8//! the GPU literal-set engine. This creates a full CPU→GPU round-trip per
9//! encoded chunk. Vyre's fused decode builders compose decode + AC-scan
10//! into a single `vyre::Program` where decoded bytes never leave VRAM:
11//!
12//! ```text
13//! encoded bytes (host)
14//!   ↓  upload once
15//!   ↓  base64_decode_then_aho_corasick (one GPU dispatch)
16//!   ↓  readback match triples only
17//! host match offsets
18//! ```
19//!
20//! Eliminates ~4 GiB of throwaway allocations on a 1 GiB scan with
21//! 512 × 2 MiB shards.
22//!
23//! # Architecture
24//!
25//! The fused programs are built at scanner compile time alongside the
26//! `GpuLiteralSet`. They share the same DFA transition/accept tables
27//! (from the literal-set AC automaton) but prepend a decode stage
28//! that transforms the encoded input in-place before the AC walk.
29//!
30//! Two encoding variants are supported:
31//! - **Base64** via `vyre_libs::decode::base64_decode_then_aho_corasick`
32//! - **Hex** via `vyre_libs::decode::hex_decode_then_aho_corasick`
33//!
34//! # Fallback
35//!
36//! If GPU dispatch fails (no backend, device lost, program compilation
37//! error), the caller falls back to the existing CPU decode pipeline.
38//! This module never panics on GPU failure.
39
40/// Supported encoding types for fused GPU decode→scan.
41#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
42pub enum FusedEncoding {
43    /// Standard base64 (RFC 4648 §4).
44    Base64,
45    /// Lowercase/uppercase hex (case-insensitive).
46    Hex,
47}
48
49impl FusedEncoding {
50    /// Human-readable label for logging.
51    #[must_use]
52    pub fn label(self) -> &'static str {
53        match self {
54            Self::Base64 => "base64",
55            Self::Hex => "hex",
56        }
57    }
58}
59
60/// Compiled fused decode+scan programs, lazily built and cached.
61///
62/// Holds the vyre `Program` objects for base64-then-AC and hex-then-AC.
63/// These programs share the same DFA tables as the literal-set AC engine
64/// but prepend an on-GPU decode stage.
65pub struct FusedDecodeScanPrograms {
66    /// Fused base64 decode + AC scan program. `None` if the DFA tables
67    /// are not available (no patterns compiled).
68    pub base64_program: Option<vyre::Program>,
69    /// Fused hex decode + AC scan program.
70    pub hex_program: Option<vyre::Program>,
71    /// Number of DFA states in the shared AC automaton.
72    pub state_count: u32,
73}
74
75/// Build fused decode→scan programs from the same DFA tables the
76/// `GpuLiteralSet` uses.
77///
78/// # Arguments
79///
80/// * `transitions` - Flattened `state_count × 256` DFA transition table
81/// * `accept` - Per-state accept/output array
82/// * `state_count` - Number of DFA states
83/// * `input_len` - Maximum input buffer length (bytes)
84///
85/// # Returns
86///
87/// `FusedDecodeScanPrograms` with both base64 and hex fused programs.
88/// If construction fails for either, that field is `None`.
89pub fn build_fused_programs(state_count: u32, input_len: u32) -> FusedDecodeScanPrograms {
90    // Buffer names follow vyre convention for interop with existing
91    // dispatch infrastructure.
92    let base64_program = std::panic::catch_unwind(|| {
93        vyre_libs::decode::base64_decode_then_aho_corasick(
94            "haystack",
95            "decoded",
96            "transitions",
97            "accept",
98            "matches",
99            input_len,
100            state_count,
101        )
102    })
103    .ok();
104
105    let hex_program = std::panic::catch_unwind(|| {
106        vyre_libs::decode::hex_decode_then_aho_corasick(
107            "haystack",
108            "decoded",
109            "transitions",
110            "accept",
111            "matches",
112            input_len,
113            state_count,
114        )
115    })
116    .ok();
117
118    if base64_program.is_none() {
119        tracing::debug!(
120            target: "keyhog::gpu",
121            "fused base64 decode+scan program build failed - will use CPU decode path"
122        );
123    }
124    if hex_program.is_none() {
125        tracing::debug!(
126            target: "keyhog::gpu",
127            "fused hex decode+scan program build failed - will use CPU decode path"
128        );
129    }
130
131    FusedDecodeScanPrograms {
132        base64_program,
133        hex_program,
134        state_count,
135    }
136}
137
138impl FusedDecodeScanPrograms {
139    /// Get the fused program for the given encoding, if available.
140    #[must_use]
141    pub fn program_for(&self, encoding: FusedEncoding) -> Option<&vyre::Program> {
142        match encoding {
143            FusedEncoding::Base64 => self.base64_program.as_ref(),
144            FusedEncoding::Hex => self.hex_program.as_ref(),
145        }
146    }
147
148    /// Returns `true` if at least one fused program was built successfully.
149    #[must_use]
150    pub fn any_available(&self) -> bool {
151        self.base64_program.is_some() || self.hex_program.is_some()
152    }
153}
154
155/// Detect likely encoding of a byte slice.
156///
157/// Returns `Some(FusedEncoding::Base64)` if the input looks like base64,
158/// `Some(FusedEncoding::Hex)` if it looks like hex, or `None` if neither.
159/// Uses fast heuristics (character frequency, length modular checks).
160#[must_use]
161pub fn detect_encoding(data: &[u8]) -> Option<FusedEncoding> {
162    if data.is_empty() {
163        return None;
164    }
165
166    // Quick length checks.
167    let len = data.len();
168
169    // Count character classes for classification.
170    let mut hex_chars = 0usize;
171    let mut b64_chars = 0usize;
172    let mut other = 0usize;
173
174    // Sample up to 256 bytes for speed on large inputs.
175    let sample = &data[..len.min(256)];
176    for &b in sample {
177        match b {
178            b'0'..=b'9' => {
179                hex_chars += 1;
180                b64_chars += 1;
181            }
182            b'a'..=b'f' | b'A'..=b'F' => {
183                hex_chars += 1;
184                b64_chars += 1;
185            }
186            b'g'..=b'z' | b'G'..=b'Z' => {
187                b64_chars += 1;
188            }
189            b'+' | b'/' | b'=' => {
190                b64_chars += 1;
191            }
192            b'\n' | b'\r' | b' ' | b'\t' => {
193                // Whitespace is neutral.
194            }
195            _ => {
196                other += 1;
197            }
198        }
199    }
200
201    // If >20% is non-alphanumeric non-whitespace, it's not encoded.
202    if other * 5 > sample.len() {
203        return None;
204    }
205
206    // Pure hex: all chars are 0-9a-fA-F and length is even.
207    if hex_chars == b64_chars && hex_chars > 0 && len % 2 == 0 {
208        return Some(FusedEncoding::Hex);
209    }
210
211    // Base64: includes chars outside hex range, length is multiple of 4
212    // or has padding.
213    if b64_chars > hex_chars && (len % 4 == 0 || data.ends_with(b"=")) {
214        return Some(FusedEncoding::Base64);
215    }
216
217    // Default to base64 if it has any base64-only chars.
218    if b64_chars > hex_chars {
219        return Some(FusedEncoding::Base64);
220    }
221
222    None
223}