use keyhog_core::{load_detectors, Chunk, ChunkMetadata, RawMatch};
use keyhog_scanner::{CompiledScanner, ScanBackend};
use std::collections::BTreeSet;
use std::path::PathBuf;
fn detectors_dir() -> PathBuf {
PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../../detectors")
}
fn make_chunk(data: &str) -> Chunk {
Chunk {
data: data.to_string().into(),
metadata: ChunkMetadata {
base_offset: 0,
base_line: 0,
source_type: "perf-tripwire".into(),
path: Some("perf_simd_scan.txt".into()),
commit: None,
author: None,
date: None,
mtime_ns: None,
size_bytes: None,
},
}
}
fn finding_keys(ms: &[RawMatch]) -> BTreeSet<(String, String)> {
ms.iter()
.map(|m| (m.detector_id.to_string(), m.credential.to_string()))
.collect()
}
const HS_MINUS_AC_FIXTURES: &[(&str, &str, &str)] = &[
(
"datadog-api-key",
"DD_API_KEY=7b3e5d8c1a9f4e2b6c8d3a5e9f1b7c4d",
"7b3e5d8c1a9f4e2b6c8d3a5e9f1b7c4d",
),
(
"twilio-auth-token",
"TWILIO_ACCOUNT_SID=AC7b3e5d8c1a9f4e2b6c8d3a5e9f1b7c4d\n\
TWILIO_AUTH_TOKEN=4c9a8f6e3b7d1a2c5e8f0b9d6a3c4e1f",
"4c9a8f6e3b7d1a2c5e8f0b9d6a3c4e1f",
),
(
"africastalking-api-key",
"africastalking_api_key=Kp4Qx7Rm2Sn5Tb8Vw3YzKp4Qx7Rm2Sn5",
"Kp4Qx7Rm2Sn5Tb8Vw3YzKp4Qx7Rm2Sn5",
),
];
#[test]
fn simd_union_is_load_bearing_for_recall() {
if !cfg!(feature = "simd") {
eprintln!(
"perf_simd_scan: `simd` feature not compiled — no Hyperscan prefilter, \
SimdCpu == CpuFallback (AC only), so the HS\\AC differential does not \
exist; skipping the load-bearing assertion."
);
return;
}
let detectors = load_detectors(&detectors_dir()).expect("load detectors");
let scanner = CompiledScanner::compile(detectors).expect("compile scanner");
let mut union_load_bearing = 0usize;
for &(detector_id, text, credential) in HS_MINUS_AC_FIXTURES {
let chunk = make_chunk(text);
let simd = scanner.scan_with_backend(&chunk, ScanBackend::SimdCpu);
let cpu = scanner.scan_with_backend(&chunk, ScanBackend::CpuFallback);
let simd_found = simd
.iter()
.any(|m| m.detector_id.as_ref() == detector_id && m.credential.as_ref() == credential);
let cpu_found = cpu
.iter()
.any(|m| m.detector_id.as_ref() == detector_id && m.credential.as_ref() == credential);
assert!(
simd_found,
"SimdCpu (AC ∪ Hyperscan) FAILED to find the `{detector_id}` credential \
`{credential}`. The Hyperscan union in collect_triggered_patterns_simd \
(backend_triggered.rs) is the only thing that triggers this no-literal \
detector — if it was dropped, this is the recall regression PERF-simd_scan-1 \
caused (49 detectors lost). Restore the AC ∪ HS union."
);
assert!(
!cpu_found || simd_found,
"SimdCpu dropped a `{detector_id}` finding that CpuFallback made — the SIMD \
path must be a superset of the scalar path."
);
if simd_found && !cpu_found {
union_load_bearing += 1;
eprintln!(
"perf_simd_scan: union load-bearing for `{detector_id}` \
(SimdCpu finds it, CpuFallback/AC-only misses it)."
);
}
}
assert!(
union_load_bearing >= 1,
"NONE of the {} known HS\\AC fixtures was missed by CpuFallback — either every \
one gained a usable AC literal (update the fixtures) or the CpuFallback path \
silently started using Hyperscan. The union must remain provably load-bearing.",
HS_MINUS_AC_FIXTURES.len()
);
}
#[test]
fn simd_findings_are_a_superset_of_scalar() {
if !cfg!(feature = "simd") {
eprintln!(
"perf_simd_scan: `simd` feature not compiled — SimdCpu == CpuFallback; \
superset assertion is vacuous, skipping."
);
return;
}
let detectors = load_detectors(&detectors_dir()).expect("load detectors");
let scanner = CompiledScanner::compile(detectors).expect("compile scanner");
let control = make_chunk("const AWS_KEY = \"AKIAQYLPMN5HFIQR7XYA\";\n");
let control_simd = finding_keys(&scanner.scan_with_backend(&control, ScanBackend::SimdCpu));
let control_cpu = finding_keys(&scanner.scan_with_backend(&control, ScanBackend::CpuFallback));
assert!(
!control_cpu.is_empty() && control_cpu.is_subset(&control_simd),
"literal-anchored control regressed: CpuFallback={control_cpu:?} must be non-empty \
and a subset of SimdCpu={control_simd:?} — the union must not drop the AC fast path."
);
for &(detector_id, text, _cred) in HS_MINUS_AC_FIXTURES {
let chunk = make_chunk(text);
let simd = finding_keys(&scanner.scan_with_backend(&chunk, ScanBackend::SimdCpu));
let cpu = finding_keys(&scanner.scan_with_backend(&chunk, ScanBackend::CpuFallback));
let dropped: Vec<_> = cpu.difference(&simd).collect();
assert!(
dropped.is_empty(),
"on the `{detector_id}` fixture, SimdCpu dropped findings the scalar CpuFallback \
path made (CpuFallback ⊄ SimdCpu): {dropped:?}. The SIMD path must be a recall \
superset of the scalar path."
);
}
eprintln!(
"perf_simd_scan: superset OK — control (AKIA) found by both backends; SimdCpu drops \
no scalar finding on any HS\\AC fixture. (Strictness proven by the load-bearing test.)"
);
}