Skip to main content

voidcrawl_mcp/tools/
actions.rs

1//! Session-scoped interaction primitives: click, type, eval JS, read
2//! title, extract text, capture network entries, wait for network idle.
3//!
4//! Each fn takes an existing session (already opened via `session_open`)
5//! and runs one action against its page. These are the Claude-Code-facing
6//! primitives — small, composable, no hidden state.
7
8use std::{sync::Arc, time::Duration};
9
10use rmcp::ErrorData;
11use schemars::JsonSchema;
12use serde::{Deserialize, Serialize};
13use serde_json::Value;
14use tokio::time::{Instant, sleep};
15use void_crawl_core::{
16    CaptchaInfo, CaptchaKind, DispatchMouseEventType, MouseButton, ax, capture_captcha,
17    detect_captcha, inject_captcha_token,
18};
19
20use crate::{
21    errors::map_err, server::VoidCrawlServer, sessions::DedicatedSession,
22    tools::session::DEFAULT_TIMEOUT_SECS,
23};
24
25// ── Schema helpers ───────────────────────────────────────────────────────
26//
27// `serde_json::Value` fields make schemars emit a boolean `true` sub-schema,
28// which Claude Code's tool-output validator rejects — and one bad tool schema
29// fails the ENTIRE `tools/list`, so the client connects but registers zero
30// tools. These emit an explicit permissive object schema (`{}`) instead, which
31// validates cleanly across hosts.
32
33fn any_value_schema(_: &mut schemars::SchemaGenerator) -> schemars::Schema {
34    schemars::json_schema!({})
35}
36
37fn any_value_array_schema(_: &mut schemars::SchemaGenerator) -> schemars::Schema {
38    schemars::json_schema!({ "type": "array", "items": {} })
39}
40
41// ── Click ───────────────────────────────────────────────────────────────
42
43#[derive(Debug, Deserialize, JsonSchema, Default)]
44pub struct ClickArgs {
45    pub session_id: String,
46    /// CSS selector of the element to click.
47    pub selector:   String,
48}
49
50#[derive(Debug, Serialize, JsonSchema)]
51pub struct OkResult {
52    pub ok: bool,
53}
54
55pub async fn click(server: &VoidCrawlServer, args: ClickArgs) -> Result<OkResult, ErrorData> {
56    let handle = lookup(server, &args.session_id).await?;
57    let page = handle.page.lock().await;
58    page.click_element(&args.selector).await.map_err(map_err)?;
59    Ok(OkResult { ok: true })
60}
61
62// ── Click visual coords ─────────────────────────────────────────────────
63
64#[derive(Debug, Deserialize, JsonSchema, Default)]
65pub struct ClickVisualCoordsArgs {
66    pub session_id: String,
67    /// X coordinate in CSS pixels (pre-DPR).
68    pub x:          f64,
69    /// Y coordinate in CSS pixels (pre-DPR).
70    pub y:          f64,
71    /// When true, the cursor first travels a humanized curved path to (x, y)
72    /// (multiple MouseMoved events) before the click. Off by default.
73    #[serde(default)]
74    pub humanize:   bool,
75}
76
77pub async fn click_visual_coords(
78    server: &VoidCrawlServer,
79    args: ClickVisualCoordsArgs,
80) -> Result<OkResult, ErrorData> {
81    let handle = lookup(server, &args.session_id).await?;
82    let page = handle.page.lock().await;
83    // Trusted compositor click at (x, y): the CDP recipe that React-rendered
84    // forms respond to when CSS selector clicks fail silently. `humanize` adds a
85    // realistic cursor approach first.
86    page.click_xy(args.x, args.y, args.humanize).await.map_err(map_err)?;
87    Ok(OkResult { ok: true })
88}
89
90// ── Type text ───────────────────────────────────────────────────────────
91
92#[derive(Debug, Deserialize, JsonSchema, Default)]
93pub struct TypeTextArgs {
94    pub session_id: String,
95    /// CSS selector of the target input. When omitted, keys are
96    /// dispatched to whatever currently has focus.
97    #[serde(default)]
98    pub selector:   Option<String>,
99    pub text:       String,
100}
101
102pub async fn type_text(
103    server: &VoidCrawlServer,
104    args: TypeTextArgs,
105) -> Result<OkResult, ErrorData> {
106    let handle = lookup(server, &args.session_id).await?;
107    let page = handle.page.lock().await;
108    if let Some(sel) = args.selector {
109        page.type_into(&sel, &args.text).await.map_err(map_err)?;
110    } else {
111        // No selector: dispatch each character as a keypress to the
112        // currently-focused element (matches the React recipe where
113        // you click first, then type).
114        for ch in args.text.chars() {
115            let s = ch.to_string();
116            page.dispatch_key_event(
117                void_crawl_core::DispatchKeyEventType::Char,
118                Some(&s),
119                None,
120                Some(&s),
121                None,
122            )
123            .await
124            .map_err(map_err)?;
125        }
126    }
127    Ok(OkResult { ok: true })
128}
129
130// ── Eval JS ─────────────────────────────────────────────────────────────
131
132#[derive(Debug, Deserialize, JsonSchema, Default)]
133pub struct EvalJsArgs {
134    pub session_id: String,
135    /// A JavaScript expression. Its value is returned as JSON.
136    pub expression: String,
137}
138
139#[derive(Debug, Serialize, JsonSchema)]
140pub struct EvalJsResult {
141    #[schemars(schema_with = "any_value_schema")]
142    pub value: Value,
143}
144
145pub async fn eval_js(
146    server: &VoidCrawlServer,
147    args: EvalJsArgs,
148) -> Result<EvalJsResult, ErrorData> {
149    let handle = lookup(server, &args.session_id).await?;
150    let page = handle.page.lock().await;
151    let value = page.evaluate_js(&args.expression).await.map_err(map_err)?;
152    Ok(EvalJsResult { value })
153}
154
155#[derive(Debug, Deserialize, JsonSchema, Default)]
156pub struct EvalJsInFrameArgs {
157    pub session_id:        String,
158    /// Substring of the target frame's URL (e.g. "recaptcha/api2/bframe").
159    /// The expression runs inside the first frame whose URL contains this —
160    /// the way to reach a **cross-origin** iframe whose `contentDocument` is
161    /// null from the parent.
162    pub frame_url_pattern: String,
163    /// A JavaScript expression. Runs as the frame's own page script
164    /// (`document` is the frame's document). Its value is returned as JSON.
165    pub expression:        String,
166}
167
168pub async fn eval_js_in_frame(
169    server: &VoidCrawlServer,
170    args: EvalJsInFrameArgs,
171) -> Result<EvalJsResult, ErrorData> {
172    let handle = lookup(server, &args.session_id).await?;
173    let page = handle.page.lock().await;
174    let value = page
175        .evaluate_js_in_frame(&args.frame_url_pattern, &args.expression)
176        .await
177        .map_err(map_err)?;
178    Ok(EvalJsResult { value })
179}
180
181// ── Title ───────────────────────────────────────────────────────────────
182
183#[derive(Debug, Deserialize, JsonSchema, Default)]
184pub struct SessionIdArgs {
185    pub session_id: String,
186}
187
188#[derive(Debug, Serialize, JsonSchema)]
189pub struct TitleResult {
190    pub title: Option<String>,
191}
192
193pub async fn title(
194    server: &VoidCrawlServer,
195    args: SessionIdArgs,
196) -> Result<TitleResult, ErrorData> {
197    let handle = lookup(server, &args.session_id).await?;
198    let page = handle.page.lock().await;
199    Ok(TitleResult { title: page.title().await.ok().flatten() })
200}
201
202// ── Extract ─────────────────────────────────────────────────────────────
203
204#[derive(Debug, Deserialize, JsonSchema, Default)]
205pub struct ExtractArgs {
206    pub session_id: String,
207    /// CSS selector. Uses `document.querySelectorAll` — returns text
208    /// content (not inner HTML) for each matching element.
209    pub selector:   String,
210}
211
212#[derive(Debug, Serialize, JsonSchema)]
213pub struct ExtractResult {
214    pub texts: Vec<String>,
215}
216
217pub async fn extract(
218    server: &VoidCrawlServer,
219    args: ExtractArgs,
220) -> Result<ExtractResult, ErrorData> {
221    let handle = lookup(server, &args.session_id).await?;
222    let page = handle.page.lock().await;
223    let js = format!(
224        "Array.from(document.querySelectorAll({sel:?})).map(e => e.textContent || '')",
225        sel = args.selector
226    );
227    let value = page.evaluate_js(&js).await.map_err(map_err)?;
228    let texts = match value {
229        Value::Array(arr) => {
230            arr.into_iter().map(|v| v.as_str().unwrap_or("").to_string()).collect()
231        }
232        _ => Vec::new(),
233    };
234    Ok(ExtractResult { texts })
235}
236
237// ── Accessibility tree ────────────────────────────────────────────────
238
239#[derive(Debug, Deserialize, JsonSchema, Default)]
240pub struct AxTreeArgs {
241    pub session_id: String,
242    /// "compact" (default): a pruned, indented role/name outline meant for an
243    /// agent to read. "raw": the full CDP AX nodes for programmatic use.
244    #[serde(default)]
245    pub mode:       Option<String>,
246    /// Maximum descendant depth to traverse; omit for the whole tree.
247    #[serde(default)]
248    pub depth:      Option<i64>,
249}
250
251#[derive(Debug, Serialize, JsonSchema)]
252pub struct AxTreeResult {
253    /// Indented `role "name"` outline. Populated in compact mode only.
254    pub tree:        String,
255    /// Raw CDP AX nodes. Populated in raw mode only.
256    #[schemars(schema_with = "any_value_array_schema")]
257    pub nodes:       Vec<Value>,
258    /// Total AX nodes the browser returned.
259    pub node_count:  usize,
260    /// Non-ignored nodes carrying a non-empty accessible name. A low ratio of
261    /// `named_count` to `node_count` signals a thin/poor AX tree — prefer
262    /// falling back to HTML, screenshot, or CSS selectors on such pages.
263    pub named_count: usize,
264}
265
266pub async fn ax_tree(
267    server: &VoidCrawlServer,
268    args: AxTreeArgs,
269) -> Result<AxTreeResult, ErrorData> {
270    let handle = lookup(server, &args.session_id).await?;
271    let page = handle.page.lock().await;
272    let value = page.get_full_ax_tree(args.depth).await.map_err(map_err)?;
273    let nodes = match value {
274        Value::Array(arr) => arr,
275        _ => Vec::new(),
276    };
277    let (node_count, named_count) = ax::richness(&nodes);
278
279    let raw = args.mode.as_deref() == Some("raw");
280    let (tree, nodes) =
281        if raw { (String::new(), nodes) } else { (ax::compact_outline(&nodes), Vec::new()) };
282    Ok(AxTreeResult { tree, nodes, node_count, named_count })
283}
284
285#[derive(Debug, Deserialize, JsonSchema, Default)]
286pub struct ClickByRoleArgs {
287    pub session_id: String,
288    /// Computed accessibility role, e.g. "button", "link", "checkbox".
289    pub role:       String,
290    /// Computed accessible name (exact match).
291    pub name:       String,
292    /// 0-based index when several nodes match the same role + name.
293    #[serde(default)]
294    pub nth:        Option<usize>,
295    /// When true, click at the element's box-model centre with a humanized
296    /// compositor pointer path instead of a DOM `.click()`. Off by default.
297    #[serde(default)]
298    pub humanize:   bool,
299}
300
301pub async fn click_by_role(
302    server: &VoidCrawlServer,
303    args: ClickByRoleArgs,
304) -> Result<OkResult, ErrorData> {
305    let handle = lookup(server, &args.session_id).await?;
306    let page = handle.page.lock().await;
307    page.click_by_role(&args.role, &args.name, args.nth.unwrap_or(0), args.humanize)
308        .await
309        .map_err(map_err)?;
310    Ok(OkResult { ok: true })
311}
312
313// ── Wait for network idle ───────────────────────────────────────────────
314
315#[derive(Debug, Deserialize, JsonSchema, Default)]
316pub struct WaitIdleArgs {
317    pub session_id:   String,
318    #[serde(default)]
319    pub timeout_secs: Option<u64>,
320}
321
322pub async fn wait_for_network_idle(
323    server: &VoidCrawlServer,
324    args: WaitIdleArgs,
325) -> Result<OkResult, ErrorData> {
326    let handle = lookup(server, &args.session_id).await?;
327    let page = handle.page.lock().await;
328    let timeout = Duration::from_secs(args.timeout_secs.unwrap_or(DEFAULT_TIMEOUT_SECS));
329    page.wait_for_network_idle(timeout).await.map_err(map_err)?;
330    Ok(OkResult { ok: true })
331}
332
333// ── Network capture ─────────────────────────────────────────────────────
334
335#[derive(Debug, Serialize, JsonSchema)]
336pub struct NetworkEntry {
337    pub url:            String,
338    pub initiator_type: String,
339    pub transfer_size:  f64,
340    pub duration_ms:    f64,
341}
342
343#[derive(Debug, Serialize, JsonSchema)]
344pub struct NetworkCaptureResult {
345    pub entries: Vec<NetworkEntry>,
346}
347
348pub async fn network_capture(
349    server: &VoidCrawlServer,
350    args: SessionIdArgs,
351) -> Result<NetworkCaptureResult, ErrorData> {
352    let handle = lookup(server, &args.session_id).await?;
353    let page = handle.page.lock().await;
354    // Pull from the Resource Timing API — same source DevTools uses for
355    // the Network panel's "transferred" column.
356    const JS: &str = r#"
357        performance.getEntriesByType('resource').map(e => ({
358            url: e.name,
359            initiator_type: e.initiatorType || '',
360            transfer_size: e.transferSize || 0,
361            duration_ms: e.duration || 0,
362        }))
363    "#;
364    let value = page.evaluate_js(JS).await.map_err(map_err)?;
365    let entries = match value {
366        Value::Array(arr) => arr
367            .into_iter()
368            .filter_map(|v| {
369                let obj = v.as_object()?;
370                Some(NetworkEntry {
371                    url:            obj.get("url")?.as_str()?.to_string(),
372                    initiator_type: obj.get("initiator_type")?.as_str().unwrap_or("").to_string(),
373                    transfer_size:  obj.get("transfer_size").and_then(Value::as_f64).unwrap_or(0.0),
374                    duration_ms:    obj.get("duration_ms").and_then(Value::as_f64).unwrap_or(0.0),
375                })
376            })
377            .collect(),
378        _ => Vec::new(),
379    };
380    Ok(NetworkCaptureResult { entries })
381}
382
383// ── Detect captcha ──────────────────────────────────────────────────────
384
385#[derive(Debug, Serialize, JsonSchema)]
386pub struct DetectCaptchaResult {
387    pub kind: Option<String>,
388}
389
390pub async fn detect_captcha_tool(
391    server: &VoidCrawlServer,
392    args: SessionIdArgs,
393) -> Result<DetectCaptchaResult, ErrorData> {
394    let handle = lookup(server, &args.session_id).await?;
395    let page = handle.page.lock().await;
396    let kind = detect_captcha(&page).await.map_err(map_err)?;
397    Ok(DetectCaptchaResult { kind: kind.map(|k| k.as_str().to_string()) })
398}
399
400// ── Capture captcha (full structured) ───────────────────────────────────
401
402#[derive(Debug, Serialize, JsonSchema)]
403pub struct WidgetRectJson {
404    pub x:      f64,
405    pub y:      f64,
406    pub width:  f64,
407    pub height: f64,
408}
409
410#[derive(Debug, Serialize, JsonSchema)]
411pub struct CaptureCaptchaResult {
412    /// Kind tag (same values as detect_captcha). Null when no captcha.
413    pub kind:                    Option<String>,
414    /// Site key for third-party solver APIs (2Captcha, CapSolver, etc.).
415    pub sitekey:                 Option<String>,
416    /// CSS selector of the widget container.
417    pub widget_selector:         Option<String>,
418    pub widget_rect:             Option<WidgetRectJson>,
419    /// True when the widget element is actually in the DOM.
420    /// False when only the runtime is loaded (Ahrefs-style lazy mount).
421    pub widget_rendered:         bool,
422    /// Field to write a solved token into (via `inject_captcha_token`).
423    pub response_field_selector: Option<String>,
424    /// Token already present — skip solving when set.
425    pub existing_token:          Option<String>,
426    /// Turnstile action / cdata attributes (pass through to solver).
427    pub action:                  Option<String>,
428    pub cdata:                   Option<String>,
429    /// Current document URL — required by most solver APIs.
430    pub page_url:                String,
431}
432
433pub async fn capture_captcha_tool(
434    server: &VoidCrawlServer,
435    args: SessionIdArgs,
436) -> Result<CaptureCaptchaResult, ErrorData> {
437    let handle = lookup(server, &args.session_id).await?;
438    let page = handle.page.lock().await;
439    let info: Option<CaptchaInfo> = capture_captcha(&page).await.map_err(map_err)?;
440    Ok(match info {
441        None => CaptureCaptchaResult {
442            kind:                    None,
443            sitekey:                 None,
444            widget_selector:         None,
445            widget_rect:             None,
446            widget_rendered:         false,
447            response_field_selector: None,
448            existing_token:          None,
449            action:                  None,
450            cdata:                   None,
451            page_url:                String::new(),
452        },
453        Some(i) => CaptureCaptchaResult {
454            kind:                    Some(i.kind.as_str().to_string()),
455            sitekey:                 i.sitekey,
456            widget_selector:         i.widget_selector,
457            widget_rect:             i.widget_rect.map(|r| WidgetRectJson {
458                x:      r.x,
459                y:      r.y,
460                width:  r.width,
461                height: r.height,
462            }),
463            widget_rendered:         i.widget_rendered,
464            response_field_selector: i.response_field_selector,
465            existing_token:          i.existing_token,
466            action:                  i.action,
467            cdata:                   i.cdata,
468            page_url:                i.page_url,
469        },
470    })
471}
472
473// ── Inject captcha token ────────────────────────────────────────────────
474
475#[derive(Debug, Deserialize, JsonSchema, Default)]
476pub struct InjectCaptchaTokenArgs {
477    pub session_id: String,
478    /// Token returned by your solver (e.g. 2Captcha's `gRecaptchaResponse`).
479    pub token:      String,
480    /// Kind tag. Must match the captcha on the page: one of
481    /// "turnstile", "recaptcha", "hcaptcha". Defaults to whatever
482    /// `capture_captcha` currently detects.
483    #[serde(default)]
484    pub kind:       Option<String>,
485}
486
487pub async fn inject_captcha_token_tool(
488    server: &VoidCrawlServer,
489    args: InjectCaptchaTokenArgs,
490) -> Result<OkResult, ErrorData> {
491    let handle = lookup(server, &args.session_id).await?;
492    let page = handle.page.lock().await;
493    let kind = match args.kind.as_deref() {
494        Some("turnstile") => CaptchaKind::Turnstile,
495        Some("recaptcha") => CaptchaKind::Recaptcha,
496        Some("hcaptcha") => CaptchaKind::Hcaptcha,
497        Some(other) => {
498            return Err(ErrorData::invalid_params(
499                format!(
500                    "unknown captcha kind {other:?} — expected 'turnstile', 'recaptcha', or 'hcaptcha'"
501                ),
502                None,
503            ));
504        }
505        None => {
506            // Auto-detect from the page.
507            let info = capture_captcha(&page).await.map_err(map_err)?;
508            info.map(|i| i.kind).ok_or_else(|| {
509                ErrorData::invalid_params(
510                    String::from("no captcha detected on page — pass `kind` explicitly"),
511                    None,
512                )
513            })?
514        }
515    };
516    inject_captcha_token(&page, kind, &args.token).await.map_err(map_err)?;
517    Ok(OkResult { ok: true })
518}
519
520// ── Solve captcha ───────────────────────────────────────────────────────
521
522#[derive(Debug, Deserialize, JsonSchema, Default)]
523pub struct SolveCaptchaArgs {
524    pub session_id:        String,
525    /// How long to wait (seconds) for the response token to appear after
526    /// clicking the widget. Default 20.
527    #[serde(default)]
528    pub wait_secs:         Option<u64>,
529    /// Click offset inside the widget's bounding rect from the left edge,
530    /// in CSS pixels. Default 28 — matches the checkbox position for
531    /// Turnstile / reCAPTCHA-v2 / hCaptcha anchor iframes. Override only
532    /// when a site customises widget size.
533    #[serde(default)]
534    pub checkbox_offset_x: Option<f64>,
535}
536
537#[derive(Debug, Serialize, JsonSchema)]
538pub struct SolveCaptchaResult {
539    /// Detected captcha kind (same tags as detect_captcha).
540    pub kind:    Option<String>,
541    /// Click coordinates dispatched in CSS pixels, if a widget rect was
542    /// found.
543    pub clicked: Option<(f64, f64)>,
544    /// Response token value, if one was observed within wait_secs.
545    /// Turnstile: `input[name=cf-turnstile-response]`.
546    /// reCAPTCHA: `#g-recaptcha-response`.
547    /// hCaptcha: `textarea[name=h-captcha-response]`.
548    pub token:   Option<String>,
549    /// True when a token was obtained (widget solved) or when the
550    /// detector no longer reports a captcha (page passed the wall).
551    pub solved:  bool,
552}
553
554pub async fn solve_captcha(
555    server: &VoidCrawlServer,
556    args: SolveCaptchaArgs,
557) -> Result<SolveCaptchaResult, ErrorData> {
558    let handle = lookup(server, &args.session_id).await?;
559    let page = handle.page.lock().await;
560
561    // 1. Identify what's on the page.
562    let kind = detect_captcha(&page).await.map_err(map_err)?;
563    let Some(kind) = kind else {
564        return Ok(SolveCaptchaResult {
565            kind:    None,
566            clicked: None,
567            token:   None,
568            solved:  true,
569        });
570    };
571    let kind_tag = kind.as_str().to_string();
572
573    // 2. Locate the widget's bounding rect. We try candidate selectors specific to
574    //    the detected kind, then fall back to generic iframe queries. Returns {x,
575    //    y, w, h} of the widget's *on-screen* box in CSS pixels, already offset by
576    //    any enclosing iframe origins.
577    const RECT_JS: &str = r#"
578        (function(kind) {
579            function rectOf(el) {
580                if (!el) return null;
581                const r = el.getBoundingClientRect();
582                if (r.width < 4 || r.height < 4) return null;
583                return { x: r.left, y: r.top, w: r.width, h: r.height };
584            }
585            const SELS = {
586                turnstile: [
587                    '.cf-turnstile iframe',
588                    'iframe[src*="challenges.cloudflare.com/turnstile"]',
589                    '.cf-turnstile',
590                ],
591                recaptcha: [
592                    'iframe[src*="recaptcha/api2/anchor"]',
593                    'iframe[src*="google.com/recaptcha"]',
594                    '.g-recaptcha',
595                ],
596                hcaptcha: [
597                    'iframe[src*="hcaptcha.com"][data-hcaptcha-widget-id]',
598                    'iframe[src*="hcaptcha.com"]',
599                    '.h-captcha',
600                ],
601            };
602            const list = SELS[kind] || [];
603            for (const sel of list) {
604                const el = document.querySelector(sel);
605                const r = rectOf(el);
606                if (r) return r;
607            }
608            return null;
609        })(arguments_kind_placeholder)
610    "#;
611    // The evaluate_js API takes a bare expression; inject the literal.
612    let rect_expr = RECT_JS.replace("arguments_kind_placeholder", &format!("{kind_tag:?}"));
613    let rect_val = page.evaluate_js(&rect_expr).await.map_err(map_err)?;
614
615    let Some(rect) = rect_val.as_object() else {
616        return Ok(SolveCaptchaResult {
617            kind:    Some(kind_tag),
618            clicked: None,
619            token:   None,
620            solved:  false,
621        });
622    };
623    let rx = rect.get("x").and_then(Value::as_f64).unwrap_or(0.0);
624    let ry = rect.get("y").and_then(Value::as_f64).unwrap_or(0.0);
625    let rh = rect.get("h").and_then(Value::as_f64).unwrap_or(0.0);
626
627    // 3. Compute click point — the standard checkbox sits ~28px from the iframe's
628    //    left edge, vertically centred. Small jitter keeps the event looking less
629    //    mechanical.
630    let offset_x = args.checkbox_offset_x.unwrap_or(28.0);
631    let jitter_x: f64 = (rx.fract() * 100.0) % 3.0 - 1.5; // deterministic ±1.5px
632    let jitter_y: f64 = (ry.fract() * 100.0) % 3.0 - 1.5;
633    let cx = rx + offset_x + jitter_x;
634    let cy = ry + rh / 2.0 + jitter_y;
635
636    // 4. Move, press, release — distinct MouseMoved first gives the widget's JS a
637    //    chance to observe a realistic pointer track.
638    page.dispatch_mouse_event(
639        void_crawl_core::DispatchMouseEventType::MouseMoved,
640        cx,
641        cy,
642        None,
643        None,
644        None,
645        None,
646        None,
647    )
648    .await
649    .map_err(map_err)?;
650    sleep(Duration::from_millis(60)).await;
651    page.dispatch_mouse_event(
652        DispatchMouseEventType::MousePressed,
653        cx,
654        cy,
655        Some(MouseButton::Left),
656        Some(1),
657        None,
658        None,
659        None,
660    )
661    .await
662    .map_err(map_err)?;
663    sleep(Duration::from_millis(50)).await;
664    page.dispatch_mouse_event(
665        DispatchMouseEventType::MouseReleased,
666        cx,
667        cy,
668        Some(MouseButton::Left),
669        Some(1),
670        None,
671        None,
672        None,
673    )
674    .await
675    .map_err(map_err)?;
676
677    // 5. Poll for the response token. Each family writes its solved token into a
678    //    known hidden input/textarea — presence of a non-empty value is the
679    //    canonical "solved" signal.
680    const TOKEN_JS: &str = r#"
681        (function() {
682            const q = (s) => { const el = document.querySelector(s); return el ? (el.value || el.textContent || '') : ''; };
683            const t = q('input[name="cf-turnstile-response"]') || q('textarea[name="cf-turnstile-response"]');
684            if (t) return t;
685            const r = q('#g-recaptcha-response') || q('textarea[name="g-recaptcha-response"]');
686            if (r) return r;
687            const h = q('textarea[name="h-captcha-response"]') || q('[name="h-captcha-response"]');
688            if (h) return h;
689            return '';
690        })()
691    "#;
692    let wait_for = Duration::from_secs(args.wait_secs.unwrap_or(20));
693    let deadline = Instant::now() + wait_for;
694    let mut token: Option<String> = None;
695    let mut solved = false;
696    while Instant::now() < deadline {
697        let v = page.evaluate_js(TOKEN_JS).await.map_err(map_err)?;
698        if let Some(s) = v.as_str()
699            && !s.is_empty()
700        {
701            token = Some(s.to_string());
702            solved = true;
703            break;
704        }
705        // Also accept: detector no longer sees a captcha (page passed
706        // the interstitial entirely, e.g. Cloudflare managed challenge).
707        if detect_captcha(&page).await.map_err(map_err)?.is_none() {
708            solved = true;
709            break;
710        }
711        sleep(Duration::from_millis(500)).await;
712    }
713
714    Ok(SolveCaptchaResult { kind: Some(kind_tag), clicked: Some((cx, cy)), token, solved })
715}
716
717// ── Teleport (geolocation / timezone / locale override) ─────────────────
718
719#[derive(Debug, Deserialize, JsonSchema, Default)]
720pub struct TeleportArgs {
721    pub session_id: String,
722    /// Latitude in decimal degrees (e.g. 30.2672 for Austin, TX).
723    pub latitude:   f64,
724    /// Longitude in decimal degrees (e.g. -97.7431 for Austin, TX).
725    pub longitude:  f64,
726    /// IANA timezone matching the coordinates (e.g. "America/Chicago").
727    /// Omit to leave the session's timezone unchanged.
728    #[serde(default)]
729    pub timezone:   Option<String>,
730    /// Accept-Language / locale to match (e.g. "en-US"). Omit to leave it.
731    #[serde(default)]
732    pub locale:     Option<String>,
733    /// GPS accuracy in meters reported to `navigator.geolocation`. Default 50.
734    #[serde(default)]
735    pub accuracy:   Option<f64>,
736}
737
738/// Override the session's geolocation (and optionally timezone + locale) so
739/// `navigator.geolocation` and location-aware sites resolve to the given
740/// coordinates — "teleport" the browser. The geolocation permission is granted
741/// automatically. Apply AFTER `session_open` and BEFORE navigating; the
742/// override persists across navigations on the session.
743pub async fn teleport(server: &VoidCrawlServer, args: TeleportArgs) -> Result<OkResult, ErrorData> {
744    let handle = lookup(server, &args.session_id).await?;
745    let page = handle.page.lock().await;
746    page.set_geolocation(args.latitude, args.longitude, args.accuracy).await.map_err(map_err)?;
747    if let Some(tz) = args.timezone.as_deref() {
748        page.set_timezone(tz).await.map_err(map_err)?;
749    }
750    if let Some(loc) = args.locale.as_deref() {
751        page.set_locale(loc).await.map_err(map_err)?;
752    }
753    Ok(OkResult { ok: true })
754}
755
756// ── Helper ──────────────────────────────────────────────────────────────
757
758async fn lookup(server: &VoidCrawlServer, id: &str) -> Result<Arc<DedicatedSession>, ErrorData> {
759    server
760        .state()
761        .sessions
762        .get(id)
763        .await
764        .ok_or_else(|| ErrorData::invalid_params(format!("unknown session_id: {id}"), None))
765}