Skip to main content

keyhog_scanner/engine/
mod.rs

1//! Core scanning engine implementation.
2
3mod backend;
4mod backend_dispatch;
5mod backend_pattern_hits;
6mod backend_prepared;
7mod backend_triggered;
8pub mod boundary;
9mod compile;
10mod extract;
11mod fallback;
12mod fallback_entropy;
13mod fallback_entropy_helpers;
14mod fallback_generic;
15mod gpu_ac_phase1;
16mod gpu_cache;
17mod gpu_coalesce;
18pub mod gpu_decode_scan;
19mod gpu_dispatch;
20mod gpu_forced;
21mod gpu_lazy;
22mod gpu_literal_phase1;
23mod gpu_megascan;
24mod gpu_phase2;
25pub(crate) mod gpu_postprocess;
26pub mod gpu_program_fusion;
27pub mod gpu_regex_dfa;
28mod gpu_scan_wrappers;
29mod hot_patterns;
30mod process;
31mod rule_pipeline;
32mod scan;
33mod scan_filters;
34mod scan_postprocess;
35pub mod segment_attribution;
36mod windowed;
37
38// `build_simd_scanner` only exists under the `simd` (Hyperscan) feature; its
39// sole call site in compile.rs is `#[cfg(feature = "simd")]` too. Gate the
40// import to match, or non-simd builds (the `portable` feature used for the
41// macOS/Windows/musl release assets) fail with E0432.
42#[cfg(feature = "simd")]
43pub(crate) use backend_prepared::build_simd_scanner;
44pub(crate) use backend_prepared::PreparedChunk;
45pub use gpu_cache::{AcConstPacks, GpuConstPacks};
46pub use gpu_coalesce::coalesce_chunks;
47pub use gpu_regex_dfa::{build_regex_dfa, RegexDfaError};
48pub use gpu_scan_wrappers::GpuPhase1Output;
49pub use rule_pipeline::{
50    build_rule_pipeline, megascan_input_len, rule_pipeline_cached, AC_GPU_MAX_MATCHES_PER_DISPATCH,
51    MEGASCAN_INPUT_LEN, MEGASCAN_INPUT_LEN_DEFAULT,
52};
53pub use windowed::{
54    floor_char_boundary, line_number_for_offset, next_window_offset, record_window_match,
55    window_chunk, window_end_offset,
56};
57
58use crate::compiler::*;
59use crate::error::Result;
60use crate::pipeline::*;
61use crate::types::*;
62use aho_corasick::AhoCorasick;
63use keyhog_core::{Chunk, DetectorSpec, RawMatch};
64use std::sync::Arc;
65use std::sync::OnceLock;
66
67pub use vyre_libs::scan::LiteralMatch;
68
69/// Read `KEYHOG_PER_CHUNK_TIMEOUT_MS` and turn it into a per-chunk
70/// deadline `Instant`. Returns `None` when the env var is unset or
71/// malformed - the historical "scan until done" behavior.
72///
73/// Wired into the public `scan` / `scan_with_backend` entry points
74/// so a hostile or pathological input (e.g. the Apple Silicon
75/// regex-DFA construction stall surfaced during cross-platform
76/// dogfood - a single 171-byte line with `var token = identifier.Flag(...)`
77/// shape spends minutes inside the multiline preprocessor) bails
78/// after the configured budget instead of hanging the entire
79/// `keyhog scan <repo>` run. The CLI orchestrator path runs scans
80/// in parallel via rayon; a stuck worker would otherwise keep one
81/// core pinned at 100% indefinitely.
82///
83/// Default unset (no timeout) preserves prior behavior. Recommend
84/// `export KEYHOG_PER_CHUNK_TIMEOUT_MS=30000` (30 s) for production
85/// scans where bounded latency matters more than scan completeness.
86fn env_per_chunk_deadline() -> Option<std::time::Instant> {
87    static MS: std::sync::OnceLock<Option<u64>> = std::sync::OnceLock::new();
88    let ms = *MS.get_or_init(|| {
89        std::env::var("KEYHOG_PER_CHUNK_TIMEOUT_MS")
90            .ok()
91            .and_then(|v| v.parse::<u64>().ok())
92            .filter(|&v| v > 0)
93    });
94    ms.map(|ms| std::time::Instant::now() + std::time::Duration::from_millis(ms))
95}
96
97pub enum MlScoreResult<'a> {
98    /// Score is final and the match can be pushed immediately.
99    Final(f64),
100    #[cfg(feature = "ml")]
101    /// ML scoring is batched at the end of the scan.
102    Pending {
103        heuristic_conf: f64,
104        code_context: crate::context::CodeContext,
105        credential: std::borrow::Cow<'a, str>,
106        ml_context: std::borrow::Cow<'a, str>,
107    },
108    /// Zero-sized placeholder that keeps the `'a` lifetime live when ML batch
109    /// scoring is compiled out (lean / `--no-default-features` build). Never
110    /// constructed - it exists solely so the type still carries `'a` without
111    /// the `ml` feature, where only the borrowing `Pending` variant uses it.
112    #[cfg(not(feature = "ml"))]
113    #[doc(hidden)]
114    _Lifetime(std::marker::PhantomData<&'a ()>),
115}
116
117/// Compressed-sparse-row (CSR) index table: a flattened replacement for a
118/// `Vec<Vec<usize>>` whose rows are pattern/literal indices.
119///
120/// The detector-side index maps (`prefix_propagation`, `same_prefix_patterns`,
121/// `fallback_keyword_to_patterns`, and the simd `hs_index_map`) are each
122/// indexed parallel to the ~1000+ AC literals / fallback patterns. Stored as
123/// `Vec<Vec<usize>>` that is ~1000+ separate heap allocations per table, each
124/// inner `Vec` carrying a 24-byte (ptr+len+cap) header plus capacity slack -
125/// even for the overwhelmingly common empty or single-element row. That
126/// fragments the heap, forces pointer-chasing on the hot lookup path (every
127/// row a separate cacheline), and wastes 8-byte `usize` where the values are
128/// corpus-bounded indices that fit in `u32`.
129///
130/// CSR collapses each table to exactly two allocations: `data` holds every
131/// row concatenated, and `offsets` (length `n + 1`) records where each row
132/// starts, so `row(i) == &data[offsets[i]..offsets[i + 1]]`. Empty rows cost
133/// zero data bytes instead of a header, element width halves to `u32`, and
134/// lookups are contiguous. Build it once from the existing
135/// `Vec<Vec<usize>>`-producing builders via `From` (or directly with
136/// `from_rows`); reads go through [`CsrU32::get`], mirroring the slice/`Vec`
137/// API the old field type exposed.
138#[derive(Clone, Debug, Default)]
139pub(crate) struct CsrU32 {
140    /// All rows concatenated, in row order.
141    data: Vec<u32>,
142    /// `offsets[i]..offsets[i + 1]` is the slice of `data` for row `i`.
143    /// Always non-empty once built: a table of `n` rows has `n + 1` offsets.
144    offsets: Vec<u32>,
145}
146
147impl CsrU32 {
148    /// Build a CSR table from per-row index lists in a single pass.
149    ///
150    /// Accepts any iterator of rows so the existing builders can feed their
151    /// `Vec<Vec<usize>>` (or borrowed slices) straight in without an
152    /// intermediate allocation. Values are narrowed to `u32`; a corpus index
153    /// can never exceed the pattern count, which is far below `u32::MAX`.
154    pub(crate) fn from_rows<R, I>(rows: R) -> Self
155    where
156        R: IntoIterator<Item = I>,
157        I: IntoIterator<Item = usize>,
158    {
159        let mut data = Vec::new();
160        let mut offsets = vec![0u32];
161        for row in rows {
162            for v in row {
163                data.push(v as u32);
164            }
165            offsets.push(data.len() as u32);
166        }
167        Self { data, offsets }
168    }
169
170    /// Row `i` as a contiguous slice, or `None` when `i` is out of range.
171    /// Replaces `Vec::get(i) -> Option<&Vec<usize>>` on the hot lookup path.
172    #[inline]
173    pub(crate) fn get(&self, i: usize) -> Option<&[u32]> {
174        let start = *self.offsets.get(i)? as usize;
175        let end = *self.offsets.get(i + 1)? as usize;
176        Some(&self.data[start..end])
177    }
178}
179
180impl From<Vec<Vec<usize>>> for CsrU32 {
181    fn from(rows: Vec<Vec<usize>>) -> Self {
182        Self::from_rows(rows)
183    }
184}
185
186impl std::ops::Index<usize> for CsrU32 {
187    type Output = [u32];
188
189    #[inline]
190    fn index(&self, i: usize) -> &[u32] {
191        let start = self.offsets[i] as usize;
192        let end = self.offsets[i + 1] as usize;
193        &self.data[start..end]
194    }
195}
196
197#[derive(Clone, Copy, Debug, Eq, PartialEq)]
198pub enum GpuInitPolicy {
199    /// Honor KEYHOG_NO_GPU / CI auto-disable.
200    FromEnvironment,
201    /// Acquire a GPU backend when hardware is present, regardless of
202    /// KEYHOG_NO_GPU. Used when the operator explicitly forces GPU.
203    ForceEnabled,
204    /// Skip CUDA/wgpu acquisition. Used when the selected CLI path cannot
205    /// route to GPU, avoiding startup and RSS overhead without changing scan
206    /// results.
207    ForceDisabled,
208}
209
210pub struct CompiledScanner {
211    pub(crate) fragment_cache: crate::fragment_cache::FragmentCache,
212    pub(crate) ac: Option<AhoCorasick>,
213    pub(crate) gpu_backend: Option<Arc<dyn vyre::VyreBackend>>,
214    // Only the `gpu` build holds a concrete wgpu handle — its sole purpose
215    // is to reach `dispatch_borrowed_batch`, which the trait object can't
216    // express. Without the feature, the CUDA / wgpu drivers aren't linked
217    // at all and `gpu_backend` is always None.
218    #[cfg(feature = "gpu")]
219    pub(crate) wgpu_backend: Option<Arc<vyre_driver_wgpu::WgpuBackend>>,
220    pub(crate) gpu_literals: Option<Arc<Vec<Vec<u8>>>>,
221    pub(crate) gpu_matcher: OnceLock<Option<vyre_libs::scan::GpuLiteralSet>>,
222    pub(crate) gpu_const_packs: OnceLock<GpuConstPacks>,
223    pub(crate) gpu_ac_const_packs: OnceLock<AcConstPacks>,
224    pub(crate) ac_gpu_program: OnceLock<Option<vyre::Program>>,
225    pub(crate) gpu_last_degrade_reason: std::sync::Mutex<Option<String>>,
226    pub(crate) rule_pipeline: OnceLock<Option<vyre_libs::scan::RulePipeline>>,
227    /// Fused AC + rule pipeline program (single GPU dispatch instead of two).
228    /// Lazily built on first access via `fused_program()`.
229    pub(crate) fused_program: OnceLock<Option<vyre::Program>>,
230    /// Fused decode→scan programs for base64/hex GPU decode.
231    /// Lazily built on first access.
232    pub(crate) fused_decode_programs: OnceLock<Option<gpu_decode_scan::FusedDecodeScanPrograms>>,
233    pub(crate) static_intern: Arc<crate::static_intern::StaticInterner>,
234    /// Per-detector interned `(id, name, service)` metadata triple, indexed by
235    /// `detector_index`. Built ONCE at scanner construction from the same
236    /// frozen `StaticInterner` the per-match path used to re-hash against.
237    /// Every emission site has the detector index in hand, so emitting metadata
238    /// is three `Arc::clone`s (atomic refcount bumps) instead of three CHD
239    /// perfect-hash lookups (2x FNV-1a + verify-hash + full string compare per
240    /// field). The strings are byte-identical to `static_intern.lookup(...)`
241    /// because they ARE its arena entries — see `perf_locality_intern.rs`.
242    pub(crate) metadata_by_index: Vec<(Arc<str>, Arc<str>, Arc<str>)>,
243    pub(crate) ac_map: Vec<CompiledPattern>,
244    pub(crate) prefix_propagation: CsrU32,
245    pub(crate) fallback: Vec<(CompiledPattern, Vec<String>)>,
246    pub(crate) companions: Vec<Vec<CompiledCompanion>>,
247    pub(crate) detectors: Vec<DetectorSpec>,
248    pub(crate) same_prefix_patterns: CsrU32,
249    pub(crate) fallback_keyword_ac: Option<AhoCorasick>,
250    pub(crate) fallback_keyword_to_patterns: CsrU32,
251    pub(crate) fallback_always_active_indices: Vec<usize>,
252    #[cfg(feature = "simd")]
253    pub(crate) simd_prefilter: Option<crate::simd::backend::HsScanner>,
254    #[cfg(feature = "simd")]
255    pub(crate) hs_index_map: CsrU32,
256    /// Precise-regex validator per hot-pattern slot (index-parallel with
257    /// `simdsieve_prefilter::HOT_PATTERNS`). The hot fast-path runs each
258    /// literal-prefix candidate through these before emitting so it can never
259    /// surface a token the detector's own regex rejects (the length floor
260    /// alone let `ghp_…_…`/`xoxp-123-456-789-abc` through). `None` for the one
261    /// slot with no canonical detector (square).
262    #[cfg(feature = "simdsieve")]
263    pub(crate) hot_pattern_validators: Vec<Option<regex::Regex>>,
264    /// Pre-interned `(detector_id, detector_name, service)` triple per
265    /// hot-pattern slot, index-parallel with `simdsieve_prefilter::HOT_PATTERNS`
266    /// / `HOT_PATTERN_NAMES`. The simdsieve fast path emits directly and used to
267    /// re-hash the three `&'static str` metadata constants through the CHD
268    /// interner on every hot hit; this caches the resolved `Arc<str>` once so
269    /// each emission is three `Arc::clone`s (PERF-locality_intern-1). Byte-
270    /// identical to `static_intern.lookup(HOT_PATTERN_*[idx])`.
271    #[cfg(feature = "simdsieve")]
272    pub(crate) hot_metadata_by_index: Vec<(Arc<str>, Arc<str>, Arc<str>)>,
273    /// Pre-interned `(detector_id, detector_name, service)` triple for each of
274    /// the four synthetic entropy-fallback classes, indexed by
275    /// `classify_entropy_detector_index` (0 generic / 1 password / 2 token /
276    /// 3 api-key). The entropy fallback emits directly and used to re-intern
277    /// these fixed `&'static str` constants per finding; caching the four
278    /// `Arc<str>` triples once turns each emit into three `Arc::clone`s
279    /// (PERF-locality_intern-1). String values are unchanged.
280    #[cfg(feature = "entropy")]
281    pub(crate) entropy_metadata_by_index: [(Arc<str>, Arc<str>, Arc<str>); 4],
282    pub config: ScannerConfig,
283    pub alphabet_screen: Option<crate::alphabet_filter::AlphabetScreen>,
284    pub(crate) bigram_bloom: crate::bigram_bloom::BigramBloom,
285}
286
287const _: () = {
288    const fn assert_send_sync<T: Send + Sync>() {}
289    let _ = assert_send_sync::<CompiledScanner>;
290};
291
292impl CompiledScanner {
293    /// Whether a SIMD (Hyperscan/Vectorscan) prefilter is compiled in and live.
294    ///
295    /// The GPU phase-1 paths reroute a batch through the SIMD coalesced scan
296    /// when the GPU prefix output is too dense for phase 2. That reroute only
297    /// exists when the `simd` feature is on; in `--no-default-features`
298    /// (portable / macOS no-system-libs) builds the `simd_prefilter` field is
299    /// `#[cfg]`-compiled out entirely, so there is nothing to reroute into and
300    /// the answer is always `false`. This accessor keeps the reroute guards
301    /// compiling in every feature combination without scattering
302    /// `#[cfg(feature = "simd")]` across each call site.
303    #[cfg(feature = "simd")]
304    #[inline]
305    pub(crate) fn has_simd_prefilter(&self) -> bool {
306        self.simd_prefilter.is_some()
307    }
308
309    #[cfg(not(feature = "simd"))]
310    #[inline]
311    pub(crate) fn has_simd_prefilter(&self) -> bool {
312        false
313    }
314
315    /// Number of loaded detectors.
316    pub fn detector_count(&self) -> usize {
317        self.detectors.len()
318    }
319
320    /// Pre-interned `(detector_id, detector_name, service)` triple for the
321    /// detector at `detector_index`. Three `Arc::clone`s, zero hashing — the
322    /// hot-path replacement for three `ScanState::intern_metadata` calls on
323    /// frozen detector metadata (PERF-locality_intern-1). Returns byte-for-byte
324    /// the same `Arc<str>` values `static_intern.lookup(...)` would, because
325    /// they ARE the same arena entries, so emitted findings are unchanged.
326    #[inline]
327    pub(crate) fn interned_detector_metadata(
328        &self,
329        detector_index: usize,
330    ) -> (Arc<str>, Arc<str>, Arc<str>) {
331        let (id, name, service) = &self.metadata_by_index[detector_index];
332        (Arc::clone(id), Arc::clone(name), Arc::clone(service))
333    }
334
335    /// Total number of patterns (AC + fallback).
336    pub fn pattern_count(&self) -> usize {
337        self.ac_map.len() + self.fallback.len()
338    }
339
340    /// Eagerly compile every pattern's regex, in parallel, up front.
341    ///
342    /// Patterns compile lazily on first use (see [`crate::types::LazyRegex`]),
343    /// which makes a one-shot CLI scan start in milliseconds instead of
344    /// paying ~450ms-2.3s to build the whole corpus. For a LONG-lived or
345    /// LARGE scan - the daemon, `watch`, `scan-system`, or a big repo where a
346    /// detector fires across thousands of files - it's better to pay the
347    /// compile once, in parallel, before the hot loop rather than stalling
348    /// the first file that touches each detector. Callers on those paths
349    /// should `warm()` after building the scanner.
350    ///
351    /// Idempotent and cheap to repeat: an already-compiled pattern is a
352    /// `OnceLock` hit. Also the correct setup for a per-scan perf benchmark,
353    /// which means to measure match throughput, not one-time compilation.
354    pub fn warm(&self) {
355        use rayon::prelude::*;
356        // Warm the lazy regex transition caches in parallel so the first real
357        // source batch does not serialize DFA first-touch under worker load.
358        const WARM_SAMPLE: &str = concat!(
359            "int main(void){ char *buf = malloc(4096); for(size_t i=0;i<len;i++){ ",
360            "config.timeout_ms = 30000; user_id=0x1f3b9c; const KEY = \"abcDEF0123456789\"; ",
361            "https://example.org/api/v2?token=eyJhbGciOi&id=550e8400-e29b-41d4-a716; ",
362            "base64=QUtJQUlPU0ZPRE5ON0VYQU1QTEU= sha=da39a3ee5e6b4b0d3255bfef95601890; ",
363            "snake_case_name camelCaseName SCREAMING_CASE path/to/file.rs node_modules ",
364            "} /* comment */ // trailing\n\t<xml attr='v'>text</xml> {\"json\":true,\"n\":42}"
365        );
366        self.ac_map.par_iter().for_each(|p| {
367            let _ = p.regex.get().find(WARM_SAMPLE);
368        });
369        self.fallback.par_iter().for_each(|(p, _)| {
370            let _ = p.regex.get().find(WARM_SAMPLE);
371        });
372        crate::shared_regexes::warm_runtime_regexes();
373        fallback_generic::warm_generic_assignment_runtime();
374        crate::multiline::warm_runtime_regexes();
375        crate::checksum::warm_runtime_regexes();
376    }
377
378    /// Iterator over the FINAL regex source strings (post anchoring /
379    /// group extraction / normalization) the scanner uses.
380    pub fn pattern_regex_strs(&self) -> Vec<&str> {
381        let mut out = Vec::with_capacity(self.ac_map.len() + self.fallback.len());
382        out.extend(self.ac_map.iter().map(|p| p.regex.as_str()));
383        out.extend(self.fallback.iter().map(|(p, _)| p.regex.as_str()));
384        out
385    }
386
387    /// Return the preferred backend for a file of the given size.
388    #[must_use]
389    pub fn select_backend_for_file(&self, file_size: u64) -> crate::hw_probe::ScanBackend {
390        crate::hw_probe::select_backend(
391            crate::hw_probe::probe_hardware(),
392            file_size,
393            self.pattern_count(),
394        )
395    }
396
397    /// Identifier of the GPU backend acquired at compile time, or
398    /// None if scanning routes to CPU/SIMD only. Mirrors
399    /// `VyreBackend::id()` which returns "cuda", "wgpu", or the
400    /// driver-defined name. The startup banner uses this so the
401    /// operator can tell at a glance whether they got CUDA (the
402    /// headline 5-10x faster path on NVIDIA hardware) or the WGPU
403    /// fallback, rather than just "Gpu" which collapses both.
404    #[must_use]
405    pub fn gpu_backend_label(&self) -> Option<&'static str> {
406        self.gpu_backend.as_ref().map(|b| b.id())
407    }
408
409    /// Most recent concrete GPU runtime-degrade reason for this compiled
410    /// scanner, if one has occurred. Used by health probes to emit
411    /// machine-readable failure causes without scraping stderr.
412    pub fn last_gpu_degrade_reason(&self) -> Option<String> {
413        self.gpu_last_degrade_reason
414            .lock()
415            .ok()
416            .and_then(|guard| guard.clone())
417    }
418
419    /// Return the steady-state backend label used for startup reporting.
420    #[must_use]
421    pub fn preferred_backend_label(&self) -> &'static str {
422        self.select_backend_for_file(0).label()
423    }
424
425    /// Warm backend resources that are initialized lazily during scanning.
426    pub fn warm_backend(&self, backend: crate::hw_probe::ScanBackend) -> bool {
427        let ready = match backend {
428            crate::hw_probe::ScanBackend::Gpu => self.gpu_stack_usable(),
429            crate::hw_probe::ScanBackend::MegaScan => {
430                let pipeline_ready = self.rule_pipeline().is_some();
431                let stack_ready = self.gpu_stack_usable();
432                if !pipeline_ready && stack_ready {
433                    gpu_forced::deny_silent_megascan_degrade(
434                        "regex pipeline compile rejected the detector set",
435                    );
436                }
437                pipeline_ready && stack_ready
438            }
439            crate::hw_probe::ScanBackend::SimdCpu | crate::hw_probe::ScanBackend::CpuFallback => {
440                true
441            }
442        };
443        if !ready {
444            gpu_forced::deny_silent_gpu_degrade(self, backend);
445        }
446        ready
447    }
448
449    /// Scan a chunk of text and return all raw credential matches.
450    pub fn scan(&self, chunk: &Chunk) -> Vec<RawMatch> {
451        self.scan_with_deadline(chunk, env_per_chunk_deadline())
452    }
453
454    /// Scan a chunk using a caller-selected backend.
455    pub fn scan_with_backend(
456        &self,
457        chunk: &Chunk,
458        backend: crate::hw_probe::ScanBackend,
459    ) -> Vec<RawMatch> {
460        self.scan_with_deadline_and_backend(chunk, env_per_chunk_deadline(), Some(backend))
461    }
462
463    /// Scan multiple chunks using a caller-selected backend.
464    pub fn scan_chunks_with_backend(
465        &self,
466        chunks: &[Chunk],
467        backend: crate::hw_probe::ScanBackend,
468    ) -> Vec<Vec<RawMatch>> {
469        gpu_forced::deny_silent_gpu_degrade(self, backend);
470        self.scan_chunks_with_backend_internal(chunks, backend)
471    }
472
473    /// Reset the cross-file fragment-reassembly cache.
474    pub fn clear_fragment_cache(&self) {
475        self.fragment_cache.clear();
476    }
477
478    /// Scan a chunk of text against all compiled detectors.
479    pub fn scan_with_deadline(
480        &self,
481        chunk: &Chunk,
482        deadline: Option<std::time::Instant>,
483    ) -> Vec<RawMatch> {
484        self.scan_with_deadline_and_backend(chunk, deadline, None)
485    }
486
487    pub fn scan_with_deadline_and_backend(
488        &self,
489        chunk: &Chunk,
490        deadline: Option<std::time::Instant>,
491        backend: Option<crate::hw_probe::ScanBackend>,
492    ) -> Vec<RawMatch> {
493        if let Some(path) = chunk.metadata.path.as_deref() {
494            let filename = path.rsplit(['/', '\\']).next().unwrap_or(path);
495            if filename == ".keyhog"
496                || filename == ".keyhogignore"
497                || path.split(['/', '\\']).any(|c| c == "detectors")
498            {
499                crate::telemetry::record_file_skipped();
500                return Vec::new();
501            }
502        }
503
504        // Direct-match prefilters: skip chunks that carry none of any
505        // detector's literal bytes (`AlphabetScreen`) or bigrams (bloom). A
506        // FULLY-ENCODED secret (e.g. `data = "<base64-of-ghp_…>"`) carries none
507        // of those - its plaintext prefix only appears AFTER decoding - so the
508        // prefilters would drop it before decode-through could recover it,
509        // silently defeating the decode-through feature on the encoded-only
510        // case. When the prefilter rejects but decode is enabled AND the chunk
511        // carries a long base64/hex run, fall through to a DECODE-ONLY pass
512        // instead of skipping. Bounded: only encoded-looking rejected chunks
513        // pay the decode cost, so normal traffic keeps the fast skip.
514        let alphabet_ok = self
515            .alphabet_screen
516            .as_ref()
517            .map_or(true, |screen| screen.screen(chunk.data.as_bytes()));
518        let bigram_ok =
519            chunk.data.len() < 64 || self.bigram_bloom.maybe_overlaps(chunk.data.as_bytes());
520        if !(alphabet_ok && bigram_ok) {
521            #[cfg(feature = "decode")]
522            if self.config.max_decode_depth > 0
523                && chunk.data.len() <= self.config.max_decode_bytes
524                && crate::decode::has_decodable_payload(chunk.data.as_bytes())
525            {
526                // Direct scan is skipped (the outer bytes match nothing); only
527                // the decoded sub-chunks are scanned, inside post_process.
528                let mut matches = Vec::new();
529                self.post_process_matches(chunk, &mut matches, deadline);
530                return matches;
531            }
532            crate::telemetry::record_file_skipped();
533            return Vec::new();
534        }
535
536        let selected_backend =
537            backend.unwrap_or_else(|| self.select_backend_for_file(chunk.data.len() as u64));
538        gpu_forced::deny_silent_gpu_degrade(self, selected_backend);
539        tracing::trace!(
540            target: "keyhog::routing",
541            backend = selected_backend.label(),
542            chunk_bytes = chunk.data.len(),
543            source_type = chunk.metadata.source_type.as_str(),
544            "scan dispatch"
545        );
546        let mut matches = if chunk.data.len() > MAX_SCAN_CHUNK_BYTES {
547            self.scan_windowed(chunk, deadline)
548        } else {
549            self.scan_inner(chunk, selected_backend, deadline)
550        };
551
552        self.post_process_matches(chunk, &mut matches, deadline);
553
554        matches
555    }
556}