use std::alloc::{GlobalAlloc, Layout, System};
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
use keyhog_core::{Chunk, ChunkMetadata, DetectorFile};
use keyhog_scanner::CompiledScanner;
struct CountingAlloc;
static BYTES_ALLOCATED: AtomicUsize = AtomicUsize::new(0);
static COUNTING: AtomicBool = AtomicBool::new(false);
unsafe impl GlobalAlloc for CountingAlloc {
unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
if COUNTING.load(Ordering::Relaxed) {
BYTES_ALLOCATED.fetch_add(layout.size(), Ordering::Relaxed);
}
System.alloc(layout)
}
unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
System.dealloc(ptr, layout)
}
unsafe fn realloc(&self, ptr: *mut u8, layout: Layout, new_size: usize) -> *mut u8 {
if COUNTING.load(Ordering::Relaxed) {
if new_size > layout.size() {
BYTES_ALLOCATED.fetch_add(new_size - layout.size(), Ordering::Relaxed);
}
}
System.realloc(ptr, layout, new_size)
}
}
#[global_allocator]
static ALLOC: CountingAlloc = CountingAlloc;
fn load_embedded_detectors() -> Vec<keyhog_core::DetectorSpec> {
let embedded = keyhog_core::embedded_detector_tomls();
assert!(
!embedded.is_empty(),
"no embedded detectors - rebuild keyhog-core with detectors directory"
);
embedded
.iter()
.filter_map(|(_, toml)| toml::from_str::<DetectorFile>(toml).ok())
.map(|f| f.detector)
.collect()
}
fn passthrough_chunk(target_bytes: usize) -> Chunk {
const LINE: &str = "let value_name = compute_label(index_position, lookup_table);\n";
let mut s = String::with_capacity(target_bytes + LINE.len());
while s.len() < target_bytes {
s.push_str(LINE);
}
Chunk {
data: s.into(),
metadata: ChunkMetadata {
base_offset: 0,
base_line: 0,
source_type: "filesystem".into(),
path: Some("src/module/component_helper.rs".into()),
commit: None,
author: None,
date: None,
mtime_ns: None,
size_bytes: None,
},
}
}
fn scan_alloc_bytes(scanner: &CompiledScanner, chunk: &Chunk) -> usize {
BYTES_ALLOCATED.store(0, Ordering::Relaxed);
COUNTING.store(true, Ordering::Relaxed);
let matches = scanner.scan(chunk);
COUNTING.store(false, Ordering::Relaxed);
assert!(
matches.is_empty(),
"fixture chunk should produce no matches (got {}); a matching chunk \
would add body-proportional post-processing allocations and pollute \
the per-chunk-copy signal",
matches.len()
);
BYTES_ALLOCATED.load(Ordering::Relaxed)
}
#[test]
fn passthrough_prepare_does_not_copy_whole_chunk_body() {
const N: usize = 256 * 1024; const FLOOR_BYTES: usize = N / 2;
let detectors = load_embedded_detectors();
let scanner = CompiledScanner::compile(detectors).expect("compile scanner");
let chunk_n = passthrough_chunk(N);
let chunk_2n = passthrough_chunk(2 * N);
for _ in 0..2 {
let _ = scanner.scan(&chunk_n);
let _ = scanner.scan(&chunk_2n);
}
const K: usize = 3;
let mut min_n = usize::MAX;
let mut min_2n = usize::MAX;
for _ in 0..K {
min_n = min_n.min(scan_alloc_bytes(&scanner, &chunk_n));
min_2n = min_2n.min(scan_alloc_bytes(&scanner, &chunk_2n));
}
let growth = min_2n.saturating_sub(min_n);
assert!(
growth < FLOOR_BYTES,
"PERF-alloc_perchunk-1: per-chunk allocation grows with chunk body \
size — the passthrough path copies the whole chunk into a fresh \
String.\n scan({N}B) allocated {min_n} B; scan({}B) allocated {min_2n} B\n \
growth (2N - N) = {growth} B (must be < {FLOOR_BYTES} B = N/2)\n \
Defect: ScannerPreprocessedText::passthrough does `text.to_string()` \
(crates/scanner/src/multiline/config.rs:80, \
preprocessor.rs:168, types.rs:147), reached from \
prepare_chunk (engine/backend_dispatch.rs:116) on every non-multiline \
chunk. growth >= N here means one full-body copy per chunk.\n \
Fix: make the passthrough preprocessed text BORROW chunk.data \
(Cow<'a, str> / &str) instead of owning a copy, so per-chunk \
allocation is size-independent and `growth` drops to the line-mapping \
bookkeeping (a few KiB).",
2 * N
);
}