use void_crawl_core::classify_antibot;
struct Case {
name: &'static str,
status: u16,
headers: &'static [(&'static str, &'static str)],
body: &'static str,
expect_vendor: Option<&'static str>,
expect_challenged: bool,
}
fn cases() -> Vec<Case> {
vec![
Case {
name: "datadome_block_page",
status: 403,
headers: &[
("server", "DataDome"),
("x-datadome", "protected"),
("x-dd-b", "1"),
],
body: "<html><body>Please enable JS and cookies. <script src=\"https://geo.captcha-delivery.com/captcha/\"></script></body></html>",
expect_vendor: Some("datadome"),
expect_challenged: true,
},
Case {
name: "imperva_incapsula_incident",
status: 403,
headers: &[
("x-iinfo", "12-3456-7890"),
("set-cookie", "visid_incap_123=abc; path=/"),
],
body: "<html>Request unsuccessful. Incident ID: 1234-5678. Powered by _Incapsula_Resource</html>",
expect_vendor: Some("imperva"),
expect_challenged: true,
},
Case {
name: "akamai_access_denied",
status: 403,
headers: &[("server", "AkamaiGHost"), ("mime-version", "1.0")],
body: "<html><head><title>Access Denied</title></head><body>Reference #18.abcd1234 generated by errors.edgesuite.net</body></html>",
expect_vendor: Some("akamai"),
expect_challenged: true,
},
Case {
name: "perimeterx_captcha",
status: 403,
headers: &[("set-cookie", "_pxhd=abc; path=/")],
body: "<html><div id=\"px-captcha\"></div><script>window._pxAppId='PXabc'</script></html>",
expect_vendor: Some("perimeterx"),
expect_challenged: true,
},
Case {
name: "f5_bigip_rejected",
status: 200,
headers: &[("set-cookie", "BIGipServerpool=123.456.789; path=/")],
body: "<html><body>The requested URL was rejected. Please consult with your administrator. Your support ID is: 1234567890</body></html>",
expect_vendor: Some("f5"),
expect_challenged: true,
},
Case {
name: "awswaf_token_challenge",
status: 202,
headers: &[("x-amzn-waf-action", "challenge")],
body: "<html><script src=\"https://abc.token.awswaf.com/abc/challenge.js\"></script></html>",
expect_vendor: Some("awswaf"),
expect_challenged: true,
},
Case {
name: "hcaptcha_wall",
status: 200,
headers: &[("content-type", "text/html")],
body: "<html><div class=\"h-captcha\" data-sitekey=\"abc\"></div><script src=\"https://hcaptcha.com/1/api.js\"></script></html>",
expect_vendor: Some("hcaptcha"),
expect_challenged: true,
},
Case {
name: "sucuri_firewall_block",
status: 403,
headers: &[
("server", "Sucuri/Cloudproxy"),
("x-sucuri-id", "12001"),
("x-sucuri-block", "RBL"),
],
body: "<html>Access Denied - Sucuri Website Firewall</html>",
expect_vendor: Some("sucuri"),
expect_challenged: true,
},
Case {
name: "cloudflare_cdn_passthrough",
status: 200,
headers: &[
("server", "cloudflare"),
("cf-ray", "8aabbccdd1122-IAD"),
("cf-cache-status", "HIT"),
],
body: "<html><body>Real content served fine.</body></html>",
expect_vendor: Some("cloudflare"),
expect_challenged: false,
},
Case {
name: "akamai_cdn_passthrough",
status: 200,
headers: &[
("server", "AkamaiGHost"),
("x-akamai-transformed", "9 1234 0 pmb=mTOPb"),
],
body: "<html><body>Article text, no wall.</body></html>",
expect_vendor: Some("akamai"),
expect_challenged: false,
},
Case {
name: "cloudfront_cdn_presence",
status: 200,
headers: &[
("via", "1.1 abcdef.cloudfront.net (CloudFront)"),
("x-amz-cf-id", "abcdef=="),
],
body: "<html><body>Static asset.</body></html>",
expect_vendor: Some("cloudfront"),
expect_challenged: false,
},
Case {
name: "plain_nginx",
status: 200,
headers: &[
("server", "nginx/1.25.3"),
("content-type", "text/html; charset=utf-8"),
],
body: "<html><body><h1>Welcome</h1></body></html>",
expect_vendor: None,
expect_challenged: false,
},
Case {
name: "plain_apache_404",
status: 404,
headers: &[("server", "Apache/2.4.59")],
body: "<html><body>Not Found</body></html>",
expect_vendor: None,
expect_challenged: false,
},
]
}
#[test]
#[allow(clippy::cast_precision_loss)] fn detection_accuracy_meets_threshold() {
let cases = cases();
let mut vendor_tp = 0usize; let mut vendor_fn = 0usize; let mut vendor_fp = 0usize; let mut challenged_correct = 0usize;
let total = cases.len();
let mut failures: Vec<String> = Vec::new();
for c in &cases {
let headers: Vec<(String, String)> =
c.headers.iter().map(|(k, v)| ((*k).to_string(), (*v).to_string())).collect();
let v = classify_antibot(c.status, &headers, c.body);
match c.expect_vendor {
Some(want) => {
if v.vendors.iter().any(|d| d == want) {
vendor_tp += 1;
} else {
vendor_fn += 1;
failures
.push(format!("MISS [{}] expected {want:?}, got {:?}", c.name, v.vendors));
}
}
None => {
if v.detected() {
vendor_fp += 1;
failures
.push(format!("FALSE [{}] expected clean, got {:?}", c.name, v.vendors));
}
}
}
if v.challenged == c.expect_challenged {
challenged_correct += 1;
} else {
failures.push(format!(
"FLAG [{}] challenged: expected {}, got {}",
c.name, c.expect_challenged, v.challenged
));
}
}
let precision = vendor_tp as f64 / (vendor_tp + vendor_fp).max(1) as f64;
let recall = vendor_tp as f64 / (vendor_tp + vendor_fn).max(1) as f64;
let challenged_acc = challenged_correct as f64 / total.max(1) as f64;
let detail = failures.join("\n ");
assert!(precision >= 0.95, "vendor precision {precision:.3} below 0.95\n {detail}");
assert!(recall >= 0.95, "vendor recall {recall:.3} below 0.95\n {detail}");
assert!(
challenged_acc >= 0.95,
"challenged-flag accuracy {challenged_acc:.3} below 0.95\n {detail}"
);
}