#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ChallengeVendor {
AwsWaf,
DataDome,
Cloudflare,
PerimeterX,
Akamai,
}
impl ChallengeVendor {
#[must_use]
pub fn code(self) -> &'static str {
match self {
Self::AwsWaf => "aws_waf",
Self::DataDome => "datadome",
Self::Cloudflare => "cloudflare",
Self::PerimeterX => "perimeterx",
Self::Akamai => "akamai",
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ChallengeReference {
pub vendor: ChallengeVendor,
pub snippet: String,
}
const CHALLENGE_NEEDLES: &[(&str, ChallengeVendor)] = &[
(".awswaf.com", ChallengeVendor::AwsWaf),
(".datadome.co", ChallengeVendor::DataDome),
("challenges.cloudflare.com", ChallengeVendor::Cloudflare),
(".perimeterx.net", ChallengeVendor::PerimeterX),
(".px-cdn.net", ChallengeVendor::PerimeterX),
(".px-cloud.net", ChallengeVendor::PerimeterX),
("/akam/", ChallengeVendor::Akamai),
];
#[must_use]
pub fn scan_for_challenges(html: &str) -> Vec<ChallengeReference> {
let mut hits: Vec<ChallengeReference> = Vec::new();
let lower = html.to_ascii_lowercase();
let needles: &[(&str, ChallengeVendor)] = CHALLENGE_NEEDLES;
for (needle, vendor) in needles {
let mut start = 0;
while let Some(idx) = lower[start..].find(needle) {
let abs = start + idx;
let window_start = lower[..abs]
.rmatch_indices(['"', '\'', '<', ' '])
.next()
.map_or(abs.saturating_sub(16), |(i, _)| i + 1);
let window_end = lower[abs..]
.find(['"', '\'', '>', ' '])
.map_or(lower.len().min(abs + needle.len() + 32), |i| abs + i);
let snippet = html.get(window_start..window_end).unwrap_or("").to_string();
let reference = ChallengeReference {
vendor: *vendor,
snippet,
};
if !hits.iter().any(|existing| existing == &reference) {
hits.push(reference);
}
start = abs + needle.len();
if start >= lower.len() {
break;
}
}
}
hits
}
#[must_use]
pub fn first_vendor(html: &str) -> Option<ChallengeVendor> {
scan_for_challenges(html)
.into_iter()
.next()
.map(|r| r.vendor)
}
#[cfg(test)]
mod tests {
use super::{ChallengeVendor, first_vendor, scan_for_challenges};
#[test]
fn detects_aws_waf_script() {
let html = r#"<script src="https://abc123.awswaf.com/xyz/challenge.js"></script>"#;
let hits = scan_for_challenges(html);
assert_eq!(hits.len(), 1);
assert_eq!(hits[0].vendor, ChallengeVendor::AwsWaf);
assert!(hits[0].snippet.contains("awswaf.com"));
}
#[test]
fn detects_datadome_script() {
let html = r"<script src='https://js.datadome.co/boot.js'></script>";
assert_eq!(first_vendor(html), Some(ChallengeVendor::DataDome));
}
#[test]
fn detects_cloudflare_turnstile_iframe() {
let html = r#"<iframe src="https://challenges.cloudflare.com/cdn-cgi/challenge-platform/..."></iframe>"#;
assert_eq!(first_vendor(html), Some(ChallengeVendor::Cloudflare));
}
#[test]
fn detects_perimeterx_cdn() {
let html = r#"<script src="https://client.perimeterx.net/PX12345/main.min.js"></script>"#;
assert_eq!(first_vendor(html), Some(ChallengeVendor::PerimeterX));
}
#[test]
fn ignores_clean_html() {
let html = "<html><body><h1>Welcome</h1><p>No challenge here.</p></body></html>";
assert!(scan_for_challenges(html).is_empty());
assert_eq!(first_vendor(html), None);
}
#[test]
fn is_case_insensitive() {
let html = r#"<SCRIPT SRC="HTTPS://ABC.AWSWAF.COM/x.js"></SCRIPT>"#;
assert_eq!(first_vendor(html), Some(ChallengeVendor::AwsWaf));
}
#[test]
fn deduplicates_identical_snippets() {
let html = r#"
<script src="https://a.awswaf.com/x.js"></script>
<script src="https://a.awswaf.com/x.js"></script>
"#;
let hits = scan_for_challenges(html);
assert_eq!(hits.len(), 1, "expected dedup on identical snippets");
}
#[test]
fn reports_multiple_vendors() {
let html = r#"
<script src="https://abc.awswaf.com/c.js"></script>
<script src="https://js.datadome.co/b.js"></script>
"#;
let hits = scan_for_challenges(html);
assert_eq!(hits.len(), 2);
let vendors: Vec<_> = hits.iter().map(|h| h.vendor).collect();
assert!(vendors.contains(&ChallengeVendor::AwsWaf));
assert!(vendors.contains(&ChallengeVendor::DataDome));
}
}