1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
use super::*;
use crate::hw_probe::ScanBackend;
use keyhog_core::Chunk;
impl CompiledScanner {
pub(crate) fn scan_chunks_with_backend_internal(
&self,
chunks: &[Chunk],
backend: ScanBackend,
) -> Vec<Vec<RawMatch>> {
// GPU paths: literal-set (Gpu) and regex-NFA (MegaScan). Both
// require a working GPU adapter + compiled matchers; the lazy
// compile is gated below so a missing GPU silently degrades to
// SIMD via `scan_with_backend` per chunk.
let gpu_path = matches!(backend, ScanBackend::Gpu | ScanBackend::MegaScan);
if !gpu_path || chunks.is_empty() {
// Parallel CPU path: rayon's global pool is configured by the
// CLI orchestrator with --threads / KEYHOG_THREADS / physical
// core count. Hyperscan + AC scans are CPU-bound and trivially
// independent per-chunk, so par_iter() saturates cores cleanly
// - was previously a serial iter().map() that pinned to one
// worker even on 32-core boxes.
use rayon::prelude::*;
let mut results: Vec<Vec<RawMatch>> = chunks
.par_iter()
.map(|chunk| self.scan_with_backend(chunk, backend))
.collect();
// Cross-chunk window-boundary reassembly. Without this, a
// secret straddling the seam between two adjacent gapless
// chunks from the same file is invisible - both halves are
// too short to match the regex on their own. The GPU paths
// below call `scan_chunk_boundaries` after their batch
// dispatch (see `scan_coalesced_megascan`/`scan_coalesced_gpu`);
// the CPU path historically did NOT, so callers using
// `scan_chunks_with_backend(_, SimdCpu | CpuFallback)` lost
// boundary recall silently. P3 proptest regression: a 38-byte
// tail chunk plus 911-byte head chunk dropped an ASIA…
// credential that straddled byte 911. Boundary scan
// synthesises a 2 KiB tail+head buffer per adjacent pair
// (`MAX_BOUNDARY` per side) and runs a fresh in-chunk scan;
// cost is `(N-1) × ~2 KiB` total, negligible vs per-chunk
// scan cost on the same dataset.
super::boundary::scan_chunk_boundaries(self, chunks, &mut results);
return results;
}
// GPU batch path: `scan_coalesced_gpu` produces full per-chunk
// RawMatch results in one device dispatch + parallel post-process.
// The previous `populate_gpu_batch_triggers` was a comment-only TODO
// that threw the GPU results away - see audit release-2026-04-26.
if self.gpu_literals.is_none() || self.gpu_backend.is_none() {
super::gpu_forced::deny_silent_gpu_degrade(self, backend);
let fallback_backend = self.degraded_backend_after_gpu_failure();
use rayon::prelude::*;
let mut results: Vec<Vec<RawMatch>> = chunks
.par_iter()
.map(|chunk| self.scan_with_backend(chunk, fallback_backend))
.collect();
super::boundary::scan_chunk_boundaries(self, chunks, &mut results);
return results;
}
match backend {
ScanBackend::MegaScan => self.scan_coalesced_megascan(chunks),
_ => self.scan_coalesced_gpu(chunks),
}
}
pub(crate) fn prepare_chunk<'a>(&self, chunk: &'a Chunk) -> PreparedChunk<'a> {
// Note: non-ASCII normalization used to swap `chunk` to an
// owned `Chunk` via `normalize_scannable_chunk`. That path
// is rarely-hit (most source code is pure ASCII) and the
// returned Chunk was immediately consumed via clone into the
// owned PreparedChunk anyway, so the borrow design works:
// for non-ASCII inputs we still feed the normalization
// through `unicode_hardening::normalize_homoglyphs` Cow
// below, which lands the normalized text in
// `preprocessed.text`. The raw `chunk.data` borrow remains
// intact for the few downstream consumers that read it
// (extract_confirmed_patterns uses preprocessed.text by
// default; raw `chunk.data` only via the drift fallback).
// Homoglyph normalization: zero-allocation Cow fast path. Pure-ASCII
// and evasion-free inputs (the 99% case) borrow `chunk.data` directly.
// Only inputs containing actual homoglyphs/zero-width/RTL allocate.
//
// The Cow MUST borrow `chunk.data` (lifetime `'a`) on the no-op path,
// not a local, so the borrowed passthrough text below can outlive this
// call inside `PreparedChunk<'a>`. We therefore chain the two
// normalization stages explicitly: a stage that rewrites bytes yields
// `Cow::Owned`; a no-op stage preserves the `&'a chunk.data` borrow.
let data_to_pp: std::borrow::Cow<'a, str> = if self.config.unicode_normalization {
match crate::unicode_hardening::normalize_homoglyphs(&chunk.data) {
// Homoglyph stage rewrote the bytes: the owned String is the
// canonical text. The interior-control strip then operates on
// that owned buffer; either outcome stays owned.
std::borrow::Cow::Owned(normalized) => {
match crate::unicode_hardening::strip_interior_evasion_controls(&normalized) {
std::borrow::Cow::Owned(stripped) => std::borrow::Cow::Owned(stripped),
std::borrow::Cow::Borrowed(_) => std::borrow::Cow::Owned(normalized),
}
}
// Homoglyph stage was a no-op: bytes are still `chunk.data`.
// Run the interior-control strip against `chunk.data` itself so
// a no-op there preserves the `'a` borrow on the chunk.
std::borrow::Cow::Borrowed(_) => {
crate::unicode_hardening::strip_interior_evasion_controls(&chunk.data)
}
}
} else {
std::borrow::Cow::Borrowed(&chunk.data)
};
// For the structured / multiline-join paths the preprocessed text is
// freshly synthesized (owned regardless of `data_to_pp`), so they read
// it through a plain `&str`. The passthrough path, by contrast, is
// byte-identical to `data_to_pp` and carries the Cow through unchanged
// so a borrowed chunk stays borrowed (no full-body copy).
let preprocessed = if let Some(pp) =
crate::structured::preprocess(&data_to_pp, chunk.metadata.path.as_deref())
{
pp
} else {
#[cfg(feature = "multiline")]
{
if crate::multiline::has_concatenation_indicators(&data_to_pp) {
crate::multiline::preprocess_multiline(
data_to_pp,
&self.config.multiline,
&self.fragment_cache,
)
} else {
ScannerPreprocessedText::passthrough(data_to_pp)
}
}
#[cfg(not(feature = "multiline"))]
ScannerPreprocessedText::passthrough(data_to_pp)
};
PreparedChunk {
chunk,
preprocessed,
line_offsets: std::sync::OnceLock::new(),
}
}
}