mod support;
use support::paths::detector_dir;
use keyhog_core::{Chunk, ChunkMetadata};
use keyhog_scanner::{CompiledScanner, ScanBackend};
use std::collections::BTreeSet;
fn scanner() -> CompiledScanner {
let detectors = keyhog_core::load_detectors(&detector_dir()).expect("detectors");
CompiledScanner::compile(detectors).expect("compile")
}
fn make_chunk(text: &str, path: &str) -> Chunk {
Chunk {
data: text.into(),
metadata: ChunkMetadata {
source_type: "api-consistency".into(),
path: Some(path.into()),
base_offset: 0,
..Default::default()
},
}
}
type FindingKey = (String, String, usize);
fn key(matches: &[keyhog_core::RawMatch]) -> BTreeSet<FindingKey> {
matches
.iter()
.map(|m| {
(
m.credential.as_ref().to_string(),
m.location
.file_path
.as_deref()
.map(str::to_string)
.unwrap_or_default(),
m.location.offset,
)
})
.collect()
}
fn key_chunks(per_chunk: &[Vec<keyhog_core::RawMatch>]) -> BTreeSet<FindingKey> {
let mut s = BTreeSet::new();
for chunk in per_chunk {
for m in chunk {
s.insert((
m.credential.as_ref().to_string(),
m.location
.file_path
.as_deref()
.map(str::to_string)
.unwrap_or_default(),
m.location.offset,
));
}
}
s
}
#[test]
fn scan_and_scan_with_deadline_none_agree() {
let scanner = scanner();
let chunk = make_chunk(
"const AWS = \"AKIAQYLPMN5HFIQR7XYA\";\nconst PAT = \"ghp_aBcD1234EFgh5678ijklMNop9012qrSTuvWX\";\n",
"fixtures/aws_pat.rs",
);
let auto = key(&scanner.scan(&chunk));
let deadline_none = key(&scanner.scan_with_deadline(&chunk, None));
assert_eq!(
auto, deadline_none,
"scan() and scan_with_deadline(None) must produce identical findings"
);
}
#[test]
fn scan_with_backend_each_matches_scan_chunks_with_backend() {
let scanner = scanner();
let chunk = make_chunk(
"auth: \"sk_live_4eC39HqLyjWDarjtT1zdp7dc\"\npayload: \"AKIAQYLPMN5HFIQR7BBB\"\n",
"fixtures/stripe_aws.yml",
);
for backend in [ScanBackend::SimdCpu, ScanBackend::CpuFallback] {
let single = key(&scanner.scan_with_backend(&chunk, backend));
let multi =
key_chunks(&scanner.scan_chunks_with_backend(std::slice::from_ref(&chunk), backend));
assert_eq!(
single,
multi,
"scan_with_backend({backend:?}) and scan_chunks_with_backend(&[chunk], {backend:?}) \
must produce identical findings: single={} multi={}",
single.len(),
multi.len()
);
}
}
#[test]
fn scan_repeated_invocations_produce_identical_findings() {
let scanner = scanner();
let chunk = make_chunk(
"GITHUB_TOKEN=ghp_aBcD1234EFgh5678ijklMNop9012qrSTuvWX\n",
"env.txt",
);
let a = key(&scanner.scan(&chunk));
let b = key(&scanner.scan(&chunk));
let c = key(&scanner.scan(&chunk));
assert_eq!(a, b);
assert_eq!(b, c);
}
#[test]
fn empty_chunks_slice_returns_empty_results() {
let scanner = scanner();
let r = scanner.scan_chunks_with_backend(&[], ScanBackend::SimdCpu);
assert!(
r.is_empty(),
"empty input slice must return empty result slice"
);
}
#[test]
fn multi_chunk_input_preserves_per_chunk_attribution() {
let scanner = scanner();
let chunks = vec![
make_chunk("noise\n", "a.txt"),
make_chunk("AWS = \"AKIAQYLPMN5HFIQR7XYA\"\n", "b.txt"),
make_chunk("more noise\n", "c.txt"),
make_chunk(
"PAT = \"ghp_aBcD1234EFgh5678ijklMNop9012qrSTuvWX\"\n",
"d.txt",
),
];
let results = scanner.scan_chunks_with_backend(&chunks, ScanBackend::SimdCpu);
assert_eq!(
results.len(),
chunks.len(),
"per-chunk results slice length mismatch"
);
assert!(
results[0].is_empty(),
"a.txt should have no findings, has {}",
results[0].len()
);
assert!(
!results[1].is_empty(),
"b.txt should have AKIA finding, has 0"
);
assert!(
results[2].is_empty(),
"c.txt should have no findings, has {}",
results[2].len()
);
assert!(
!results[3].is_empty(),
"d.txt should have ghp_ finding, has 0"
);
for (idx, chunk_results) in results.iter().enumerate() {
let expected_path = chunks[idx].metadata.path.as_deref().unwrap();
for m in chunk_results {
assert_eq!(
m.location.file_path.as_deref(),
Some(expected_path),
"chunk {idx} finding {:?} attributed to wrong path: got {:?}, want {expected_path}",
m.credential.as_ref(),
m.location.file_path
);
}
}
}