use serde_json::Value;
use crate::{error::Result, page::Page};
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum CaptchaKind {
Recaptcha,
Hcaptcha,
Turnstile,
CloudflareChallenge,
DatadomeBlock,
Unknown(String),
}
impl CaptchaKind {
pub fn as_str(&self) -> &str {
match self {
Self::Recaptcha => "recaptcha",
Self::Hcaptcha => "hcaptcha",
Self::Turnstile => "turnstile",
Self::CloudflareChallenge => "cloudflare_challenge",
Self::DatadomeBlock => "datadome",
Self::Unknown(s) => s,
}
}
fn from_tag(tag: &str) -> Self {
match tag {
"recaptcha" => Self::Recaptcha,
"hcaptcha" => Self::Hcaptcha,
"turnstile" => Self::Turnstile,
"cloudflare_challenge" => Self::CloudflareChallenge,
"datadome" => Self::DatadomeBlock,
other => Self::Unknown(other.strip_prefix("unknown:").unwrap_or(other).to_string()),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct WidgetRect {
pub x: f64,
pub y: f64,
pub width: f64,
pub height: f64,
}
#[derive(Debug, Clone, PartialEq)]
pub struct CaptchaInfo {
pub kind: CaptchaKind,
pub sitekey: Option<String>,
pub widget_selector: Option<String>,
pub widget_rect: Option<WidgetRect>,
pub widget_rendered: bool,
pub response_field_selector: Option<String>,
pub existing_token: Option<String>,
pub action: Option<String>,
pub cdata: Option<String>,
pub page_url: String,
}
pub async fn capture_captcha(page: &Page) -> Result<Option<CaptchaInfo>> {
let val = page.evaluate_js(CAPTURE_JS).await?;
let Some(obj) = val.as_object() else { return Ok(None) };
let Some(kind_tag) = obj.get("kind").and_then(Value::as_str) else { return Ok(None) };
let kind = CaptchaKind::from_tag(kind_tag);
let page_url = obj.get("page_url").and_then(Value::as_str).unwrap_or_default().to_string();
Ok(Some(CaptchaInfo {
kind,
sitekey: obj.get("sitekey").and_then(Value::as_str).map(str::to_owned),
widget_selector: obj.get("widget_selector").and_then(Value::as_str).map(str::to_owned),
widget_rect: obj.get("widget_rect").and_then(Value::as_object).and_then(|r| {
Some(WidgetRect {
x: r.get("x")?.as_f64()?,
y: r.get("y")?.as_f64()?,
width: r.get("width")?.as_f64()?,
height: r.get("height")?.as_f64()?,
})
}),
widget_rendered: obj.get("widget_rendered").and_then(Value::as_bool).unwrap_or(false),
response_field_selector: obj
.get("response_field_selector")
.and_then(Value::as_str)
.map(str::to_owned),
existing_token: obj
.get("existing_token")
.and_then(Value::as_str)
.filter(|s| !s.is_empty())
.map(str::to_owned),
action: obj.get("action").and_then(Value::as_str).map(str::to_owned),
cdata: obj.get("cdata").and_then(Value::as_str).map(str::to_owned),
page_url,
}))
}
pub async fn detect_captcha(page: &Page) -> Result<Option<CaptchaKind>> {
Ok(capture_captcha(page).await?.map(|info| info.kind))
}
pub async fn inject_captcha_token(page: &Page, kind: CaptchaKind, token: &str) -> Result<()> {
let kind_tag = kind.as_str();
let token_lit = token.replace('\\', "\\\\").replace('"', "\\\"");
let kind_lit = kind_tag.replace('\\', "\\\\").replace('"', "\\\"");
let js = format!(
r#"
(function(kind, token) {{
const selectors = {{
turnstile: ['input[name="cf-turnstile-response"]', 'textarea[name="cf-turnstile-response"]'],
recaptcha: ['textarea[name="g-recaptcha-response"]', '#g-recaptcha-response'],
hcaptcha: ['textarea[name="h-captcha-response"]', '[name="h-captcha-response"]'],
}};
const list = selectors[kind] || [];
let written = 0;
for (const sel of list) {{
const nodes = document.querySelectorAll(sel);
for (const el of nodes) {{
const proto = el.tagName === 'TEXTAREA'
? window.HTMLTextAreaElement.prototype
: window.HTMLInputElement.prototype;
const setter = Object.getOwnPropertyDescriptor(proto, 'value').set;
setter.call(el, token);
el.dispatchEvent(new Event('input', {{ bubbles: true }}));
el.dispatchEvent(new Event('change', {{ bubbles: true }}));
written += 1;
}}
}}
// Turnstile: invoke the widget's success callback if the page
// registered one via data-callback="fnName".
if (kind === 'turnstile') {{
const cb_el = document.querySelector('[data-callback]');
const fn_name = cb_el ? cb_el.getAttribute('data-callback') : null;
if (fn_name && typeof window[fn_name] === 'function') {{
try {{ window[fn_name](token); }} catch (e) {{}}
}}
}}
return written;
}})("{kind_lit}", "{token_lit}")
"#
);
page.evaluate_js(&js).await?;
Ok(())
}
const CAPTURE_JS: &str = r#"
(function() {
try {
const page_url = (typeof location !== 'undefined') ? location.href : '';
function rectOf(el) {
if (!el || !el.getBoundingClientRect) return null;
const r = el.getBoundingClientRect();
if (r.width < 1 && r.height < 1) return null;
return { x: r.left, y: r.top, width: r.width, height: r.height };
}
function readHidden(sel) {
const el = document.querySelector(sel);
if (!el) return '';
return el.value || el.textContent || '';
}
// ── Turnstile ───────────────────────────────────────────
// Rendered widget OR a data-sitekey container OR just the
// runtime loaded (Ahrefs-style lazy mount).
const ts_iframe = document.querySelector('iframe[src*="challenges.cloudflare.com/turnstile"]');
const ts_container = document.querySelector('.cf-turnstile, [data-sitekey][class*="turnstile" i]');
const ts_runtime_loaded = !!document.querySelector('script[src*="challenges.cloudflare.com/turnstile"]')
|| (typeof window.turnstile === 'object');
if (ts_iframe || ts_container || ts_runtime_loaded) {
const rect_src = ts_iframe || ts_container;
const sk_node = document.querySelector('.cf-turnstile[data-sitekey], [data-sitekey]');
return {
kind: 'turnstile',
sitekey: sk_node ? sk_node.getAttribute('data-sitekey') : null,
widget_selector: ts_container ? '.cf-turnstile' : (ts_iframe ? 'iframe[src*="challenges.cloudflare.com/turnstile"]' : null),
widget_rect: rectOf(rect_src),
widget_rendered: !!(ts_iframe || ts_container),
response_field_selector: 'input[name="cf-turnstile-response"]',
existing_token: readHidden('input[name="cf-turnstile-response"], textarea[name="cf-turnstile-response"]'),
action: sk_node ? sk_node.getAttribute('data-action') : null,
cdata: sk_node ? sk_node.getAttribute('data-cdata') : null,
page_url,
};
}
// ── reCAPTCHA v2/v3 ─────────────────────────────────────
const rc_iframe = document.querySelector('iframe[src*="google.com/recaptcha"], iframe[src*="recaptcha/api2"]');
const rc_container = document.querySelector('.g-recaptcha, #g-recaptcha');
if (rc_iframe || rc_container) {
const rect_src = rc_iframe || rc_container;
const sk_node = document.querySelector('.g-recaptcha[data-sitekey], [data-sitekey]');
// reCAPTCHA iframe src carries `k=<sitekey>` too
let sitekey = sk_node ? sk_node.getAttribute('data-sitekey') : null;
if (!sitekey && rc_iframe) {
const m = rc_iframe.src.match(/[?&]k=([^&]+)/);
if (m) sitekey = decodeURIComponent(m[1]);
}
return {
kind: 'recaptcha',
sitekey,
widget_selector: rc_container ? '.g-recaptcha' : 'iframe[src*="recaptcha"]',
widget_rect: rectOf(rect_src),
widget_rendered: true,
response_field_selector: 'textarea[name="g-recaptcha-response"]',
existing_token: readHidden('textarea[name="g-recaptcha-response"], #g-recaptcha-response'),
action: null,
cdata: null,
page_url,
};
}
// ── hCaptcha ────────────────────────────────────────────
const hc_iframe = document.querySelector('iframe[src*="hcaptcha.com"]');
const hc_container = document.querySelector('.h-captcha, [data-hcaptcha-widget-id]');
if (hc_iframe || hc_container) {
const rect_src = hc_iframe || hc_container;
const sk_node = document.querySelector('[data-sitekey]');
let sitekey = sk_node ? sk_node.getAttribute('data-sitekey') : null;
if (!sitekey && hc_iframe) {
const m = hc_iframe.src.match(/[?&]sitekey=([^&]+)/);
if (m) sitekey = decodeURIComponent(m[1]);
}
return {
kind: 'hcaptcha',
sitekey,
widget_selector: hc_container ? '.h-captcha' : 'iframe[src*="hcaptcha"]',
widget_rect: rectOf(rect_src),
widget_rendered: true,
response_field_selector: 'textarea[name="h-captcha-response"]',
existing_token: readHidden('textarea[name="h-captcha-response"], [name="h-captcha-response"]'),
action: null,
cdata: null,
page_url,
};
}
// ── Cloudflare interstitial (managed challenge) ─────────
const cf_el = document.querySelector('#cf-challenge-running, #challenge-running, #cf-chl-widget, .cf-browser-verification');
const title_l = (document.title || '').toLowerCase();
if (cf_el || title_l.includes('just a moment') || title_l.includes('attention required')) {
return {
kind: 'cloudflare_challenge',
sitekey: null,
widget_selector: cf_el ? '#cf-challenge-running, #challenge-running, #cf-chl-widget' : null,
widget_rect: rectOf(cf_el),
widget_rendered: !!cf_el,
response_field_selector: null,
existing_token: '',
action: null,
cdata: null,
page_url,
};
}
// ── DataDome ────────────────────────────────────────────
const dd_el = document.querySelector('#datadome-captcha, [id^="dd_"]');
if (dd_el) {
return {
kind: 'datadome',
sitekey: null,
widget_selector: '#datadome-captcha',
widget_rect: rectOf(dd_el),
widget_rendered: true,
response_field_selector: null,
existing_token: '',
action: null,
cdata: null,
page_url,
};
}
// ── PerimeterX ──────────────────────────────────────────
const px_el = document.querySelector('#px-captcha');
if (px_el) {
return {
kind: 'unknown:perimeterx',
sitekey: null,
widget_selector: '#px-captcha',
widget_rect: rectOf(px_el),
widget_rendered: true,
response_field_selector: null,
existing_token: '',
action: null,
cdata: null,
page_url,
};
}
return null;
} catch (e) { return null; }
})()
"#;