use http::HeaderMap;
use serde::{Deserialize, Serialize};
use crate::antibot::{ChallengeLevel, ChallengeSignal, ChallengeVendor, SessionState};
use crate::config::RenderSessionScope;
use crate::escalation::detect_antibot_vendor;
use crate::policy::profile::PolicyThresholds;
use crate::policy::reason::{Decision, DecisionReason};
use crate::queue::FetchMethod;
#[derive(Debug, Clone)]
pub enum ScopeSignal {
LoginPageDetected,
AntibotHostility(ChallengeVendor, ChallengeLevel),
HostQuarantined,
CrossOriginFetch,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ScopeDecision {
Keep,
DemoteTo(RenderSessionScope),
PromoteTo(RenderSessionScope),
Force(RenderSessionScope),
}
fn scope_rank(s: RenderSessionScope) -> u8 {
match s {
RenderSessionScope::Url => 0,
RenderSessionScope::Origin => 1,
RenderSessionScope::Host => 2,
RenderSessionScope::RegistrableDomain => 3,
}
}
pub fn decide_scope(current: RenderSessionScope, signal: &ScopeSignal) -> ScopeDecision {
match signal {
ScopeSignal::LoginPageDetected => {
if scope_rank(current) > scope_rank(RenderSessionScope::Origin) {
ScopeDecision::DemoteTo(RenderSessionScope::Origin)
} else {
ScopeDecision::Keep
}
}
ScopeSignal::AntibotHostility(_, level) => match level {
ChallengeLevel::HardBlock => {
if scope_rank(current) > scope_rank(RenderSessionScope::Url) {
ScopeDecision::DemoteTo(RenderSessionScope::Url)
} else {
ScopeDecision::Keep
}
}
ChallengeLevel::ChallengePage | ChallengeLevel::WidgetPresent => {
if scope_rank(current) > scope_rank(RenderSessionScope::Origin) {
ScopeDecision::DemoteTo(RenderSessionScope::Origin)
} else {
ScopeDecision::Keep
}
}
ChallengeLevel::Suspected => ScopeDecision::Keep,
},
ScopeSignal::HostQuarantined => ScopeDecision::Force(RenderSessionScope::Url),
ScopeSignal::CrossOriginFetch => ScopeDecision::Keep,
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum SessionAction {
ReuseSession,
RotateProxy,
KillContext,
ReopenBrowser,
GiveUp,
}
impl SessionAction {
pub fn as_str(&self) -> &'static str {
match self {
Self::ReuseSession => "reuse_session",
Self::RotateProxy => "rotate_proxy",
Self::KillContext => "kill_context",
Self::ReopenBrowser => "reopen_browser",
Self::GiveUp => "give_up",
}
}
}
pub struct PolicyContext<'a> {
pub url: &'a url::Url,
pub host: &'a str,
pub initial_method: FetchMethod,
pub response_status: Option<u16>,
pub response_headers: Option<&'a HeaderMap>,
pub response_body: Option<&'a [u8]>,
pub proxy_score: Option<f32>,
pub attempts: u32,
pub render_budget_left: Option<u64>,
pub host_cooldown_ms_left: u64,
pub thresholds: &'a PolicyThresholds,
}
pub struct PolicyEngine;
impl PolicyEngine {
pub fn decide_pre_fetch(ctx: &PolicyContext<'_>) -> (Decision, DecisionReason) {
if ctx.thresholds.always_capture_artifacts {
return (
Decision::CollectArtifacts,
DecisionReason::new("collect_artifacts:profile").with_detail("forensics"),
);
}
let render_allowed = ctx.render_budget_left.is_none_or(|n| n > 0)
&& !matches!(ctx.thresholds.max_render_jobs, Some(0));
match ctx.initial_method {
FetchMethod::HttpSpoof => (Decision::Http, DecisionReason::initial_http()),
FetchMethod::Render if render_allowed => {
(Decision::Render, DecisionReason::initial_render())
}
FetchMethod::Render => (Decision::Http, DecisionReason::initial_http()),
FetchMethod::Auto => (Decision::Http, DecisionReason::initial_http()),
}
}
pub fn decide_post_fetch(ctx: &PolicyContext<'_>) -> (Decision, DecisionReason) {
let status = ctx.response_status.unwrap_or(0);
let headers = ctx.response_headers;
let body = ctx.response_body;
if let Some(score) = ctx.proxy_score {
if score < ctx.thresholds.proxy_score_floor {
return (Decision::SwitchProxy, DecisionReason::proxy_bad_score());
}
}
if let (Some(hdrs), Some(body)) = (headers, body) {
if let Some(vendor) = detect_antibot_vendor(status, hdrs, body) {
let render_allowed = ctx.render_budget_left.is_none_or(|n| n > 0)
&& !matches!(ctx.thresholds.max_render_jobs, Some(0))
&& ctx.initial_method != FetchMethod::Render;
if render_allowed {
return (
Decision::Render,
DecisionReason::antibot_challenge(vendor.as_str()),
);
} else {
return (
Decision::Drop,
DecisionReason::antibot_challenge(vendor.as_str())
.with_detail("render_forbidden"),
);
}
}
}
let render_allowed = ctx.render_budget_left.is_none_or(|n| n > 0)
&& !matches!(ctx.thresholds.max_render_jobs, Some(0))
&& ctx.initial_method != FetchMethod::Render;
if status == 200
&& render_allowed
&& ctx.initial_method == FetchMethod::Auto
&& headers_look_html(headers)
&& body.is_some_and(looks_like_js_shell)
{
return (Decision::Render, DecisionReason::js_only_content());
}
if matches!(status, 401 | 403) {
return (
Decision::CollectArtifacts,
DecisionReason::new("collect_artifacts:status").with_detail(status.to_string()),
);
}
if matches!(status, 429 | 500 | 502 | 503 | 504) {
if ctx.attempts + 1 >= ctx.thresholds.max_retries {
return (
Decision::Drop,
DecisionReason::status_transient(status).with_detail("max_retries"),
);
}
let backoff = ctx
.thresholds
.retry_base_ms
.saturating_mul(1u64 << ctx.attempts.min(8));
return (
Decision::Retry { after_ms: backoff },
DecisionReason::status_transient(status),
);
}
(Decision::Http, DecisionReason::initial_http())
}
pub fn decide_post_challenge(
signal: &ChallengeSignal,
session: SessionState,
_proxy: Option<&url::Url>,
) -> SessionAction {
if matches!(signal.level, ChallengeLevel::HardBlock)
|| matches!(signal.vendor, ChallengeVendor::AccessDenied)
{
return SessionAction::GiveUp;
}
match signal.vendor {
ChallengeVendor::CloudflareTurnstile
| ChallengeVendor::Recaptcha
| ChallengeVendor::RecaptchaEnterprise
| ChallengeVendor::HCaptcha
| ChallengeVendor::GenericCaptcha => match signal.level {
ChallengeLevel::Suspected => SessionAction::RotateProxy,
ChallengeLevel::WidgetPresent | ChallengeLevel::ChallengePage => {
SessionAction::KillContext
}
ChallengeLevel::HardBlock => SessionAction::GiveUp,
},
ChallengeVendor::CloudflareJsChallenge
| ChallengeVendor::DataDome
| ChallengeVendor::PerimeterX
| ChallengeVendor::Akamai => match (signal.level, session) {
(ChallengeLevel::Suspected, _) => SessionAction::RotateProxy,
(ChallengeLevel::WidgetPresent, _) => SessionAction::KillContext,
(ChallengeLevel::ChallengePage, SessionState::Clean | SessionState::Warm) => {
SessionAction::ReopenBrowser
}
(
ChallengeLevel::ChallengePage,
SessionState::Contaminated | SessionState::Blocked,
) => SessionAction::GiveUp,
(ChallengeLevel::HardBlock, _) => SessionAction::GiveUp,
},
ChallengeVendor::AccessDenied => SessionAction::GiveUp,
}
}
pub fn maybe_human_handoff(
action: SessionAction,
signal: &ChallengeSignal,
screenshot_path: Option<std::path::PathBuf>,
) -> Option<Decision> {
if !matches!(action, SessionAction::GiveUp) {
return None;
}
#[cfg(feature = "cdp-backend")]
{
if !crate::render::handoff::handoff_enabled() {
return None;
}
let req = crate::render::handoff::HandoffRequest::from_signal(signal, screenshot_path);
Some(req.into_policy_decision())
}
#[cfg(not(feature = "cdp-backend"))]
{
let _ = (signal, screenshot_path);
None
}
}
pub fn decide_on_telemetry_volume(
vendor: crate::antibot::ChallengeVendor,
session: SessionState,
) -> SessionAction {
if matches!(session, SessionState::Blocked) {
return SessionAction::GiveUp;
}
match vendor {
crate::antibot::ChallengeVendor::HCaptcha
| crate::antibot::ChallengeVendor::Recaptcha
| crate::antibot::ChallengeVendor::RecaptchaEnterprise => SessionAction::ReuseSession,
_ => SessionAction::RotateProxy,
}
}
pub fn decide_post_error(
ctx: &PolicyContext<'_>,
err_kind: &str,
) -> (Decision, DecisionReason) {
if ctx.attempts + 1 >= ctx.thresholds.max_retries {
return (
Decision::Drop,
DecisionReason::new(format!("drop:{err_kind}:max_retries")),
);
}
match err_kind {
"dns" | "tls" | "io" | "http" | "request-timeout" => {
let backoff = ctx
.thresholds
.retry_base_ms
.saturating_mul(1u64 << ctx.attempts.min(8));
(
Decision::Retry { after_ms: backoff },
DecisionReason::new(format!("retry:{err_kind}")),
)
}
_ => (
Decision::Drop,
DecisionReason::new(format!("drop:{err_kind}")),
),
}
}
}
fn headers_look_html(headers: Option<&HeaderMap>) -> bool {
headers
.and_then(|h| h.get("content-type"))
.and_then(|v| v.to_str().ok())
.map(|ct| {
let ct = ct.to_ascii_lowercase();
ct.contains("text/html") || ct.contains("application/xhtml")
})
.unwrap_or(false)
}
fn looks_like_js_shell(body: &[u8]) -> bool {
let max = body.len().min(96 * 1024);
let lower = String::from_utf8_lossy(&body[..max]).to_ascii_lowercase();
if lower.contains("enable javascript to run this app")
|| lower.contains("requires javascript")
|| lower.contains("javascript is required")
{
return true;
}
let has_mount = lower.contains("id=\"root\"")
|| lower.contains("id='root'")
|| lower.contains("id=\"app\"")
|| lower.contains("id='app'")
|| lower.contains("id=\"__next\"")
|| lower.contains("id='__next'");
if !has_mount || !lower.contains("<script") {
return false;
}
let anchor_count = lower.matches("<a ").take(4).count();
let paragraph_count = lower.matches("<p").take(3).count();
let content_markers = lower.contains("<article")
|| lower.contains("<main")
|| anchor_count >= 3
|| paragraph_count >= 2;
!content_markers
}