use super::signatures::{match_vendor_url, PxSignal, PX_SIGNALS};
use super::ChallengeVendor;
use serde::{Deserialize, Serialize};
use std::collections::VecDeque;
use std::time::{Duration, SystemTime};
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[serde(tag = "kind", rename_all = "snake_case")]
pub enum PayloadShape {
AkamaiSensorDataV1_7 {
keys_found: Vec<String>,
},
AkamaiSensorDataV2 { has_sbsd_ek: bool },
PerimeterXCollector { event_ids: Vec<String> },
DataDomeReport { signal_count: usize },
CloudflareChallenge { has_tk: bool },
HCaptchaExecute { sitekey: Option<String> },
RecaptchaReload {
k: Option<String>,
v: Option<String>,
},
OpaqueVendor { note: &'static str },
Unknown,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct AkamaiSensorInfo {
pub version: AkamaiVersion,
pub payload_len: usize,
pub top_level_keys: Vec<String>,
pub likely_fields: Vec<AkamaiField>,
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum AkamaiVersion {
V1_7,
V2,
Unknown,
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)]
#[serde(rename_all = "snake_case")]
pub enum AkamaiField {
MouseEvents,
TouchEvents,
Typing,
Screen,
Sensor,
Fingerprint,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VendorTelemetry {
pub vendor: ChallengeVendor,
pub endpoint: url::Url,
pub method: String,
pub payload_size: usize,
pub payload_shape: PayloadShape,
#[serde(with = "super::system_time_serde")]
pub observed_at: SystemTime,
pub session_id: String,
pub pattern_label: &'static str,
}
#[derive(Debug, Clone)]
pub struct ObservedRequest<'a> {
pub url: &'a url::Url,
pub method: &'a str,
pub body: &'a [u8],
pub session_id: &'a str,
}
pub fn classify_request(req: &ObservedRequest<'_>) -> Option<VendorTelemetry> {
let pattern = match_vendor_url(req.url)?;
let shape = classify_shape(pattern.vendor, req.url, req.body);
Some(VendorTelemetry {
vendor: pattern.vendor,
endpoint: req.url.clone(),
method: req.method.to_string(),
payload_size: req.body.len(),
payload_shape: shape,
observed_at: SystemTime::now(),
session_id: req.session_id.to_string(),
pattern_label: pattern.label,
})
}
fn classify_shape(vendor: ChallengeVendor, url: &url::Url, body: &[u8]) -> PayloadShape {
match vendor {
ChallengeVendor::Akamai => classify_akamai(body),
ChallengeVendor::PerimeterX => classify_perimeterx(body),
ChallengeVendor::DataDome => classify_datadome(body),
ChallengeVendor::CloudflareJsChallenge | ChallengeVendor::CloudflareTurnstile => {
classify_cloudflare(body)
}
ChallengeVendor::HCaptcha => classify_hcaptcha(url, body),
ChallengeVendor::Recaptcha | ChallengeVendor::RecaptchaEnterprise => {
classify_recaptcha(url, body)
}
ChallengeVendor::GenericCaptcha | ChallengeVendor::AccessDenied => {
PayloadShape::OpaqueVendor { note: "generic" }
}
}
}
fn classify_akamai(body: &[u8]) -> PayloadShape {
if body.is_empty() {
return PayloadShape::Unknown;
}
let head = &body[..body.len().min(4096)];
let as_str = std::str::from_utf8(head).unwrap_or("");
if as_str.contains("sbsd_ek") {
return PayloadShape::AkamaiSensorDataV2 { has_sbsd_ek: true };
}
if as_str.contains("sensor_data") {
let mut keys = Vec::<String>::new();
if let Ok(v) = serde_json::from_slice::<serde_json::Value>(body) {
if let Some(obj) = v.as_object() {
for k in obj.keys() {
keys.push(k.clone());
}
}
}
if keys.is_empty() {
keys.push("sensor_data".to_string());
}
return PayloadShape::AkamaiSensorDataV1_7 { keys_found: keys };
}
PayloadShape::Unknown
}
pub fn infer_akamai_fields(payload: &str) -> Vec<AkamaiField> {
let mut out = Vec::new();
if payload.contains("mmd") || payload.contains("mouse") {
out.push(AkamaiField::MouseEvents);
}
if payload.contains("touch") || payload.contains("doa") {
out.push(AkamaiField::TouchEvents);
}
if payload.contains("kact") || payload.contains("key") {
out.push(AkamaiField::Typing);
}
if payload.contains("sc;") || payload.contains("screen") {
out.push(AkamaiField::Screen);
}
if payload.contains("acc;") || payload.contains("gyro") {
out.push(AkamaiField::Sensor);
}
if payload.contains("uaend") || payload.contains("fpValstr") {
out.push(AkamaiField::Fingerprint);
}
out
}
fn classify_perimeterx(body: &[u8]) -> PayloadShape {
if body.is_empty() {
return PayloadShape::Unknown;
}
let head = &body[..body.len().min(8192)];
let as_str = std::str::from_utf8(head).unwrap_or("");
let mut found = Vec::new();
for sig in PX_SIGNALS.iter() {
if as_str.contains(sig.id) {
found.push(sig.id.to_string());
}
}
PayloadShape::PerimeterXCollector { event_ids: found }
}
fn classify_datadome(body: &[u8]) -> PayloadShape {
let count = body.iter().filter(|&&b| b == b',').count();
PayloadShape::DataDomeReport {
signal_count: count,
}
}
fn classify_cloudflare(body: &[u8]) -> PayloadShape {
let has_tk = !body.is_empty() && std_bstr_contains(body, b"\"tk\"");
PayloadShape::CloudflareChallenge { has_tk }
}
fn classify_hcaptcha(url: &url::Url, _body: &[u8]) -> PayloadShape {
let sitekey = url
.query_pairs()
.find(|(k, _)| k == "sitekey" || k == "k")
.map(|(_, v)| v.into_owned());
PayloadShape::HCaptchaExecute { sitekey }
}
fn classify_recaptcha(url: &url::Url, _body: &[u8]) -> PayloadShape {
let mut k = None;
let mut v = None;
for (name, value) in url.query_pairs() {
if name == "k" {
k = Some(value.into_owned());
} else if name == "v" {
v = Some(value.into_owned());
}
}
PayloadShape::RecaptchaReload { k, v }
}
fn std_bstr_contains(haystack: &[u8], needle: &[u8]) -> bool {
if needle.is_empty() || haystack.len() < needle.len() {
return false;
}
haystack.windows(needle.len()).any(|w| w == needle)
}
#[derive(Debug)]
pub struct TelemetryTracker {
window: Duration,
threshold: usize,
buckets: std::collections::HashMap<(String, ChallengeVendor), VecDeque<SystemTime>>,
}
impl TelemetryTracker {
pub fn new() -> Self {
Self {
window: Duration::from_secs(30),
threshold: 20,
buckets: std::collections::HashMap::new(),
}
}
pub fn with_config(window: Duration, threshold: usize) -> Self {
Self {
window,
threshold,
buckets: std::collections::HashMap::new(),
}
}
pub fn observe(&mut self, session_id: &str, vendor: ChallengeVendor, at: SystemTime) -> bool {
let key = (session_id.to_string(), vendor);
let bucket = self.buckets.entry(key).or_default();
while let Some(front) = bucket.front().copied() {
if at
.duration_since(front)
.map(|d| d > self.window)
.unwrap_or(false)
{
bucket.pop_front();
} else {
break;
}
}
bucket.push_back(at);
bucket.len() >= self.threshold
}
pub fn hits(&self, session_id: &str, vendor: ChallengeVendor) -> usize {
self.buckets
.get(&(session_id.to_string(), vendor))
.map(|b| b.len())
.unwrap_or(0)
}
}
impl Default for TelemetryTracker {
fn default() -> Self {
Self::new()
}
}
pub fn px_catalog() -> &'static [PxSignal] {
PX_SIGNALS
}
#[cfg(test)]
mod tests {
use super::*;
use std::time::Duration;
fn req<'a>(url: &'a url::Url, method: &'a str, body: &'a [u8]) -> ObservedRequest<'a> {
ObservedRequest {
url,
method,
body,
session_id: "s1",
}
}
#[test]
fn classify_none_on_unknown_url() {
let u = url::Url::parse("https://example.com/foo").unwrap();
assert!(classify_request(&req(&u, "GET", b"")).is_none());
}
#[test]
fn classify_akamai_v17() {
let u = url::Url::parse("https://www.example.com/_bm/_data").unwrap();
let body = br#"{"sensor_data":"1.7,-1,0,0,...garbage"}"#;
let t = classify_request(&req(&u, "POST", body)).unwrap();
assert_eq!(t.vendor, ChallengeVendor::Akamai);
assert!(matches!(
t.payload_shape,
PayloadShape::AkamaiSensorDataV1_7 { .. }
));
}
#[test]
fn classify_akamai_v2_sbsd() {
let u = url::Url::parse("https://www.example.com/akam/11/abc").unwrap();
let body = br#"{"sbsd_ek":"encblobhere","t":"..."}"#;
let t = classify_request(&req(&u, "POST", body)).unwrap();
assert_eq!(
t.payload_shape,
PayloadShape::AkamaiSensorDataV2 { has_sbsd_ek: true }
);
}
#[test]
fn classify_perimeterx_extracts_signal_ids() {
let u = url::Url::parse("https://client.perimeterx.net/api/v2/collector?appId=PX").unwrap();
let body = br#"{"PX320":"1","PX333":"Intel Iris","PX346":"false"}"#;
let t = classify_request(&req(&u, "POST", body)).unwrap();
match t.payload_shape {
PayloadShape::PerimeterXCollector { event_ids } => {
assert!(event_ids.contains(&"PX320".to_string()));
assert!(event_ids.contains(&"PX333".to_string()));
assert!(event_ids.contains(&"PX346".to_string()));
}
other => panic!("expected PerimeterXCollector, got {other:?}"),
}
}
#[test]
fn classify_hcaptcha_picks_sitekey() {
let u = url::Url::parse(
"https://hcaptcha.com/checkcaptcha/xyz?sitekey=10000000-ffff-ffff-ffff-000000000001",
)
.unwrap();
let t = classify_request(&req(&u, "POST", b"")).unwrap();
match t.payload_shape {
PayloadShape::HCaptchaExecute { sitekey } => assert!(sitekey.is_some()),
_ => panic!("expected HCaptchaExecute"),
}
}
#[test]
fn classify_recaptcha_reload() {
let u =
url::Url::parse("https://www.google.com/recaptcha/api2/reload?k=SITEKEY&v=V").unwrap();
let t = classify_request(&req(&u, "POST", b"foo")).unwrap();
match t.payload_shape {
PayloadShape::RecaptchaReload { k, v } => {
assert_eq!(k.as_deref(), Some("SITEKEY"));
assert_eq!(v.as_deref(), Some("V"));
}
_ => panic!("expected RecaptchaReload"),
}
}
#[test]
fn tracker_fires_at_threshold() {
let mut t = TelemetryTracker::with_config(Duration::from_secs(30), 5);
let now = SystemTime::now();
for i in 0..4 {
assert!(
!t.observe(
"s1",
ChallengeVendor::PerimeterX,
now + Duration::from_millis(i * 10)
),
"should not fire at {i}"
);
}
assert!(
t.observe(
"s1",
ChallengeVendor::PerimeterX,
now + Duration::from_millis(50)
),
"5th hit should fire"
);
}
#[test]
fn tracker_window_expires_old_entries() {
let mut t = TelemetryTracker::with_config(Duration::from_secs(1), 3);
let now = SystemTime::now();
t.observe("s1", ChallengeVendor::Akamai, now);
t.observe("s1", ChallengeVendor::Akamai, now);
let later = now + Duration::from_secs(2);
assert!(!t.observe("s1", ChallengeVendor::Akamai, later));
assert_eq!(t.hits("s1", ChallengeVendor::Akamai), 1);
}
#[test]
fn infer_akamai_fields_basic() {
let s = "uaend;mmd=1;touch=no;sc;1920,1080;kact;abc;fpValstr=x";
let fields = infer_akamai_fields(s);
assert!(fields.contains(&AkamaiField::MouseEvents));
assert!(fields.contains(&AkamaiField::Screen));
assert!(fields.contains(&AkamaiField::Typing));
assert!(fields.contains(&AkamaiField::Fingerprint));
}
}