mod support;
use support::paths::detector_dir;
use std::collections::BTreeMap;
use std::path::PathBuf;
use base64::{engine::general_purpose, Engine as _};
use keyhog_core::{Chunk, ChunkMetadata};
use keyhog_scanner::CompiledScanner;
use serde::Deserialize;
#[derive(Debug, Deserialize)]
struct Contract {
#[allow(dead_code)]
schema_version: u32,
detector_id: String,
#[allow(dead_code)]
service: String,
#[allow(dead_code)]
severity: String,
#[serde(default)]
positive: Vec<Positive>,
}
#[derive(Debug, Deserialize)]
struct Positive {
text: String,
credential: String,
#[allow(dead_code)]
reason: String,
}
fn contracts_dir() -> PathBuf {
let mut d = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
d.push("tests");
d.push("contracts");
d
}
fn load_contracts() -> Vec<(PathBuf, Contract)> {
let dir = contracts_dir();
let mut out = Vec::new();
let Ok(entries) = std::fs::read_dir(&dir) else {
return out;
};
for entry in entries.flatten() {
let path = entry.path();
if path.extension().and_then(|e| e.to_str()) != Some("toml") {
continue;
}
let Ok(text) = std::fs::read_to_string(&path) else {
continue;
};
let Ok(contract) = toml::from_str::<Contract>(&text) else {
continue;
};
out.push((path, contract));
}
out
}
fn scanner() -> CompiledScanner {
let detectors = keyhog_core::load_detectors(&detector_dir())
.expect("detectors directory loadable from compound encoding runner");
CompiledScanner::compile(detectors).expect("scanner compile from compound encoding runner")
}
#[derive(Debug, Clone, Copy)]
enum Layer {
Base64Std,
Base64Url,
Hex,
UrlPercent,
}
impl Layer {
const ALL: &'static [Layer] = &[
Layer::Base64Std,
Layer::Base64Url,
Layer::Hex,
Layer::UrlPercent,
];
fn label(self) -> &'static str {
match self {
Layer::Base64Std => "base64-std",
Layer::Base64Url => "base64-url",
Layer::Hex => "hex",
Layer::UrlPercent => "url-percent",
}
}
fn encode(self, input: &str) -> String {
match self {
Layer::Base64Std => general_purpose::STANDARD.encode(input.as_bytes()),
Layer::Base64Url => general_purpose::URL_SAFE_NO_PAD.encode(input.as_bytes()),
Layer::Hex => hex::encode(input.as_bytes()),
Layer::UrlPercent => percent_encode_all(input.as_bytes()),
}
}
}
fn percent_encode_all(bytes: &[u8]) -> String {
let mut out = String::with_capacity(bytes.len() * 3);
for b in bytes {
out.push_str(&format!("%{:02X}", b));
}
out
}
fn make_chunk(text: &str) -> Chunk {
Chunk {
data: text.into(),
metadata: ChunkMetadata {
source_type: "compound-encoding".into(),
path: Some("compound.txt".into()),
..Default::default()
},
}
}
fn any_credential_contains(matches: &[keyhog_core::RawMatch], expected: &str) -> bool {
matches
.iter()
.any(|m| m.credential.as_ref().contains(expected))
}
fn wrap_with_encoded_cred(text: &str, raw: &str, encoded: &str) -> String {
if let Some(pos) = text.find(raw) {
let mut out = String::with_capacity(text.len() - raw.len() + encoded.len());
out.push_str(&text[..pos]);
out.push_str(encoded);
out.push_str(&text[pos + raw.len()..]);
out
} else {
text.to_string()
}
}
#[test]
fn every_positive_swept_through_two_layer_encodings() {
let scanner = scanner();
let contracts = load_contracts();
assert!(
!contracts.is_empty(),
"tests/contracts/ has no *.toml - compound runner has nothing to drive"
);
let mut per_pair: BTreeMap<(&'static str, &'static str), (usize, usize)> = BTreeMap::new();
let mut total_runs: usize = 0;
let mut total_hits: usize = 0;
for (_path, c) in &contracts {
for p in &c.positive {
for inner in Layer::ALL {
for outer in Layer::ALL {
if std::ptr::eq(inner as *const _, outer as *const _)
|| inner.label() == outer.label()
{
continue;
}
let inner_encoded = inner.encode(&p.credential);
let outer_encoded = outer.encode(&inner_encoded);
let text = wrap_with_encoded_cred(&p.text, &p.credential, &outer_encoded);
scanner.clear_fragment_cache();
let chunk = make_chunk(&text);
let matches = scanner.scan(&chunk);
let hit = any_credential_contains(&matches, &p.credential);
let bucket = per_pair
.entry((outer.label(), inner.label()))
.or_insert((0, 0));
bucket.0 += 1;
total_runs += 1;
if hit {
bucket.1 += 1;
total_hits += 1;
}
}
}
}
}
let mut summary = String::from("compound-encoding per (outer × inner) pair decode-hit rate:\n");
for ((outer, inner), (runs, hits)) in &per_pair {
let pct = (*hits as f64 / (*runs).max(1) as f64) * 100.0;
summary.push_str(&format!(
" {outer:<14} ∘ {inner:<14} {hits:>4}/{runs:<4} ({pct:5.1}%)\n"
));
}
let overall = (total_hits as f64 / total_runs.max(1) as f64) * 100.0;
summary.push_str(&format!(
" TOTAL {total_hits}/{total_runs} ({overall:.1}%) across {} pairs\n",
per_pair.len(),
));
eprintln!("{summary}");
let strict = std::env::var("KEYHOG_COMPOUND_STRICT")
.map(|v| v == "1" || v.eq_ignore_ascii_case("true"))
.unwrap_or(false);
if strict && overall < 50.0 {
panic!("compound-encoding overall recall {overall:.1}% dropped below 50% floor");
}
}