Skip to main content

voidcrawl_mcp/tools/
actions.rs

1//! Session-scoped interaction primitives: click, type, eval JS, read
2//! title, extract text, capture network entries, wait for network idle.
3//!
4//! Each fn takes an existing session (already opened via `session_open`)
5//! and runs one action against its page. These are the Claude-Code-facing
6//! primitives — small, composable, no hidden state.
7
8use std::{sync::Arc, time::Duration};
9
10use rmcp::ErrorData;
11use schemars::JsonSchema;
12use serde::{Deserialize, Serialize};
13use serde_json::Value;
14use tokio::time::{Instant, sleep};
15use void_crawl_core::{
16    CaptchaInfo, CaptchaKind, DispatchMouseEventType, MouseButton, ax, capture_captcha,
17    detect_captcha, inject_captcha_token,
18};
19
20use crate::{
21    errors::map_err, server::VoidCrawlServer, sessions::DedicatedSession,
22    tools::session::DEFAULT_TIMEOUT_SECS,
23};
24
25// ── Schema helpers ───────────────────────────────────────────────────────
26//
27// `serde_json::Value` fields make schemars emit a boolean `true` sub-schema,
28// which Claude Code's tool-output validator rejects — and one bad tool schema
29// fails the ENTIRE `tools/list`, so the client connects but registers zero
30// tools. These emit an explicit permissive object schema (`{}`) instead, which
31// validates cleanly across hosts.
32
33fn any_value_schema(_: &mut schemars::SchemaGenerator) -> schemars::Schema {
34    schemars::json_schema!({})
35}
36
37fn any_value_array_schema(_: &mut schemars::SchemaGenerator) -> schemars::Schema {
38    schemars::json_schema!({ "type": "array", "items": {} })
39}
40
41// ── Click ───────────────────────────────────────────────────────────────
42
43#[derive(Debug, Deserialize, JsonSchema, Default)]
44pub struct ClickArgs {
45    pub session_id: String,
46    /// CSS selector of the element to click.
47    pub selector:   String,
48}
49
50#[derive(Debug, Serialize, JsonSchema)]
51pub struct OkResult {
52    pub ok: bool,
53}
54
55pub async fn click(server: &VoidCrawlServer, args: ClickArgs) -> Result<OkResult, ErrorData> {
56    let handle = lookup(server, &args.session_id).await?;
57    let page = handle.page.lock().await;
58    page.click_element(&args.selector).await.map_err(map_err)?;
59    Ok(OkResult { ok: true })
60}
61
62// ── Click visual coords ─────────────────────────────────────────────────
63
64#[derive(Debug, Deserialize, JsonSchema, Default)]
65pub struct ClickVisualCoordsArgs {
66    pub session_id: String,
67    /// X coordinate in CSS pixels (pre-DPR).
68    pub x:          f64,
69    /// Y coordinate in CSS pixels (pre-DPR).
70    pub y:          f64,
71}
72
73pub async fn click_visual_coords(
74    server: &VoidCrawlServer,
75    args: ClickVisualCoordsArgs,
76) -> Result<OkResult, ErrorData> {
77    let handle = lookup(server, &args.session_id).await?;
78    let page = handle.page.lock().await;
79    // mousePressed + mouseReleased at (x, y) with left button. Matches
80    // the CDP recipe that React-rendered forms respond to when CSS
81    // selector clicks fail silently.
82    page.dispatch_mouse_event(
83        DispatchMouseEventType::MousePressed,
84        args.x,
85        args.y,
86        Some(MouseButton::Left),
87        Some(1),
88        None,
89        None,
90        None,
91    )
92    .await
93    .map_err(map_err)?;
94    page.dispatch_mouse_event(
95        DispatchMouseEventType::MouseReleased,
96        args.x,
97        args.y,
98        Some(MouseButton::Left),
99        Some(1),
100        None,
101        None,
102        None,
103    )
104    .await
105    .map_err(map_err)?;
106    Ok(OkResult { ok: true })
107}
108
109// ── Type text ───────────────────────────────────────────────────────────
110
111#[derive(Debug, Deserialize, JsonSchema, Default)]
112pub struct TypeTextArgs {
113    pub session_id: String,
114    /// CSS selector of the target input. When omitted, keys are
115    /// dispatched to whatever currently has focus.
116    #[serde(default)]
117    pub selector:   Option<String>,
118    pub text:       String,
119}
120
121pub async fn type_text(
122    server: &VoidCrawlServer,
123    args: TypeTextArgs,
124) -> Result<OkResult, ErrorData> {
125    let handle = lookup(server, &args.session_id).await?;
126    let page = handle.page.lock().await;
127    if let Some(sel) = args.selector {
128        page.type_into(&sel, &args.text).await.map_err(map_err)?;
129    } else {
130        // No selector: dispatch each character as a keypress to the
131        // currently-focused element (matches the React recipe where
132        // you click first, then type).
133        for ch in args.text.chars() {
134            let s = ch.to_string();
135            page.dispatch_key_event(
136                void_crawl_core::DispatchKeyEventType::Char,
137                Some(&s),
138                None,
139                Some(&s),
140                None,
141            )
142            .await
143            .map_err(map_err)?;
144        }
145    }
146    Ok(OkResult { ok: true })
147}
148
149// ── Eval JS ─────────────────────────────────────────────────────────────
150
151#[derive(Debug, Deserialize, JsonSchema, Default)]
152pub struct EvalJsArgs {
153    pub session_id: String,
154    /// A JavaScript expression. Its value is returned as JSON.
155    pub expression: String,
156}
157
158#[derive(Debug, Serialize, JsonSchema)]
159pub struct EvalJsResult {
160    #[schemars(schema_with = "any_value_schema")]
161    pub value: Value,
162}
163
164pub async fn eval_js(
165    server: &VoidCrawlServer,
166    args: EvalJsArgs,
167) -> Result<EvalJsResult, ErrorData> {
168    let handle = lookup(server, &args.session_id).await?;
169    let page = handle.page.lock().await;
170    let value = page.evaluate_js(&args.expression).await.map_err(map_err)?;
171    Ok(EvalJsResult { value })
172}
173
174#[derive(Debug, Deserialize, JsonSchema, Default)]
175pub struct EvalJsInFrameArgs {
176    pub session_id:        String,
177    /// Substring of the target frame's URL (e.g. "recaptcha/api2/bframe").
178    /// The expression runs inside the first frame whose URL contains this —
179    /// the way to reach a **cross-origin** iframe whose `contentDocument` is
180    /// null from the parent.
181    pub frame_url_pattern: String,
182    /// A JavaScript expression. Runs as the frame's own page script
183    /// (`document` is the frame's document). Its value is returned as JSON.
184    pub expression:        String,
185}
186
187pub async fn eval_js_in_frame(
188    server: &VoidCrawlServer,
189    args: EvalJsInFrameArgs,
190) -> Result<EvalJsResult, ErrorData> {
191    let handle = lookup(server, &args.session_id).await?;
192    let page = handle.page.lock().await;
193    let value = page
194        .evaluate_js_in_frame(&args.frame_url_pattern, &args.expression)
195        .await
196        .map_err(map_err)?;
197    Ok(EvalJsResult { value })
198}
199
200// ── Title ───────────────────────────────────────────────────────────────
201
202#[derive(Debug, Deserialize, JsonSchema, Default)]
203pub struct SessionIdArgs {
204    pub session_id: String,
205}
206
207#[derive(Debug, Serialize, JsonSchema)]
208pub struct TitleResult {
209    pub title: Option<String>,
210}
211
212pub async fn title(
213    server: &VoidCrawlServer,
214    args: SessionIdArgs,
215) -> Result<TitleResult, ErrorData> {
216    let handle = lookup(server, &args.session_id).await?;
217    let page = handle.page.lock().await;
218    Ok(TitleResult { title: page.title().await.ok().flatten() })
219}
220
221// ── Extract ─────────────────────────────────────────────────────────────
222
223#[derive(Debug, Deserialize, JsonSchema, Default)]
224pub struct ExtractArgs {
225    pub session_id: String,
226    /// CSS selector. Uses `document.querySelectorAll` — returns text
227    /// content (not inner HTML) for each matching element.
228    pub selector:   String,
229}
230
231#[derive(Debug, Serialize, JsonSchema)]
232pub struct ExtractResult {
233    pub texts: Vec<String>,
234}
235
236pub async fn extract(
237    server: &VoidCrawlServer,
238    args: ExtractArgs,
239) -> Result<ExtractResult, ErrorData> {
240    let handle = lookup(server, &args.session_id).await?;
241    let page = handle.page.lock().await;
242    let js = format!(
243        "Array.from(document.querySelectorAll({sel:?})).map(e => e.textContent || '')",
244        sel = args.selector
245    );
246    let value = page.evaluate_js(&js).await.map_err(map_err)?;
247    let texts = match value {
248        Value::Array(arr) => {
249            arr.into_iter().map(|v| v.as_str().unwrap_or("").to_string()).collect()
250        }
251        _ => Vec::new(),
252    };
253    Ok(ExtractResult { texts })
254}
255
256// ── Accessibility tree ────────────────────────────────────────────────
257
258#[derive(Debug, Deserialize, JsonSchema, Default)]
259pub struct AxTreeArgs {
260    pub session_id: String,
261    /// "compact" (default): a pruned, indented role/name outline meant for an
262    /// agent to read. "raw": the full CDP AX nodes for programmatic use.
263    #[serde(default)]
264    pub mode:       Option<String>,
265    /// Maximum descendant depth to traverse; omit for the whole tree.
266    #[serde(default)]
267    pub depth:      Option<i64>,
268}
269
270#[derive(Debug, Serialize, JsonSchema)]
271pub struct AxTreeResult {
272    /// Indented `role "name"` outline. Populated in compact mode only.
273    pub tree:        String,
274    /// Raw CDP AX nodes. Populated in raw mode only.
275    #[schemars(schema_with = "any_value_array_schema")]
276    pub nodes:       Vec<Value>,
277    /// Total AX nodes the browser returned.
278    pub node_count:  usize,
279    /// Non-ignored nodes carrying a non-empty accessible name. A low ratio of
280    /// `named_count` to `node_count` signals a thin/poor AX tree — prefer
281    /// falling back to HTML, screenshot, or CSS selectors on such pages.
282    pub named_count: usize,
283}
284
285pub async fn ax_tree(
286    server: &VoidCrawlServer,
287    args: AxTreeArgs,
288) -> Result<AxTreeResult, ErrorData> {
289    let handle = lookup(server, &args.session_id).await?;
290    let page = handle.page.lock().await;
291    let value = page.get_full_ax_tree(args.depth).await.map_err(map_err)?;
292    let nodes = match value {
293        Value::Array(arr) => arr,
294        _ => Vec::new(),
295    };
296    let (node_count, named_count) = ax::richness(&nodes);
297
298    let raw = args.mode.as_deref() == Some("raw");
299    let (tree, nodes) =
300        if raw { (String::new(), nodes) } else { (ax::compact_outline(&nodes), Vec::new()) };
301    Ok(AxTreeResult { tree, nodes, node_count, named_count })
302}
303
304#[derive(Debug, Deserialize, JsonSchema, Default)]
305pub struct ClickByRoleArgs {
306    pub session_id: String,
307    /// Computed accessibility role, e.g. "button", "link", "checkbox".
308    pub role:       String,
309    /// Computed accessible name (exact match).
310    pub name:       String,
311    /// 0-based index when several nodes match the same role + name.
312    #[serde(default)]
313    pub nth:        Option<usize>,
314}
315
316pub async fn click_by_role(
317    server: &VoidCrawlServer,
318    args: ClickByRoleArgs,
319) -> Result<OkResult, ErrorData> {
320    let handle = lookup(server, &args.session_id).await?;
321    let page = handle.page.lock().await;
322    page.click_by_role(&args.role, &args.name, args.nth.unwrap_or(0)).await.map_err(map_err)?;
323    Ok(OkResult { ok: true })
324}
325
326// ── Wait for network idle ───────────────────────────────────────────────
327
328#[derive(Debug, Deserialize, JsonSchema, Default)]
329pub struct WaitIdleArgs {
330    pub session_id:   String,
331    #[serde(default)]
332    pub timeout_secs: Option<u64>,
333}
334
335pub async fn wait_for_network_idle(
336    server: &VoidCrawlServer,
337    args: WaitIdleArgs,
338) -> Result<OkResult, ErrorData> {
339    let handle = lookup(server, &args.session_id).await?;
340    let page = handle.page.lock().await;
341    let timeout = Duration::from_secs(args.timeout_secs.unwrap_or(DEFAULT_TIMEOUT_SECS));
342    page.wait_for_network_idle(timeout).await.map_err(map_err)?;
343    Ok(OkResult { ok: true })
344}
345
346// ── Network capture ─────────────────────────────────────────────────────
347
348#[derive(Debug, Serialize, JsonSchema)]
349pub struct NetworkEntry {
350    pub url:            String,
351    pub initiator_type: String,
352    pub transfer_size:  f64,
353    pub duration_ms:    f64,
354}
355
356#[derive(Debug, Serialize, JsonSchema)]
357pub struct NetworkCaptureResult {
358    pub entries: Vec<NetworkEntry>,
359}
360
361pub async fn network_capture(
362    server: &VoidCrawlServer,
363    args: SessionIdArgs,
364) -> Result<NetworkCaptureResult, ErrorData> {
365    let handle = lookup(server, &args.session_id).await?;
366    let page = handle.page.lock().await;
367    // Pull from the Resource Timing API — same source DevTools uses for
368    // the Network panel's "transferred" column.
369    const JS: &str = r#"
370        performance.getEntriesByType('resource').map(e => ({
371            url: e.name,
372            initiator_type: e.initiatorType || '',
373            transfer_size: e.transferSize || 0,
374            duration_ms: e.duration || 0,
375        }))
376    "#;
377    let value = page.evaluate_js(JS).await.map_err(map_err)?;
378    let entries = match value {
379        Value::Array(arr) => arr
380            .into_iter()
381            .filter_map(|v| {
382                let obj = v.as_object()?;
383                Some(NetworkEntry {
384                    url:            obj.get("url")?.as_str()?.to_string(),
385                    initiator_type: obj.get("initiator_type")?.as_str().unwrap_or("").to_string(),
386                    transfer_size:  obj.get("transfer_size").and_then(Value::as_f64).unwrap_or(0.0),
387                    duration_ms:    obj.get("duration_ms").and_then(Value::as_f64).unwrap_or(0.0),
388                })
389            })
390            .collect(),
391        _ => Vec::new(),
392    };
393    Ok(NetworkCaptureResult { entries })
394}
395
396// ── Detect captcha ──────────────────────────────────────────────────────
397
398#[derive(Debug, Serialize, JsonSchema)]
399pub struct DetectCaptchaResult {
400    pub kind: Option<String>,
401}
402
403pub async fn detect_captcha_tool(
404    server: &VoidCrawlServer,
405    args: SessionIdArgs,
406) -> Result<DetectCaptchaResult, ErrorData> {
407    let handle = lookup(server, &args.session_id).await?;
408    let page = handle.page.lock().await;
409    let kind = detect_captcha(&page).await.map_err(map_err)?;
410    Ok(DetectCaptchaResult { kind: kind.map(|k| k.as_str().to_string()) })
411}
412
413// ── Capture captcha (full structured) ───────────────────────────────────
414
415#[derive(Debug, Serialize, JsonSchema)]
416pub struct WidgetRectJson {
417    pub x:      f64,
418    pub y:      f64,
419    pub width:  f64,
420    pub height: f64,
421}
422
423#[derive(Debug, Serialize, JsonSchema)]
424pub struct CaptureCaptchaResult {
425    /// Kind tag (same values as detect_captcha). Null when no captcha.
426    pub kind:                    Option<String>,
427    /// Site key for third-party solver APIs (2Captcha, CapSolver, etc.).
428    pub sitekey:                 Option<String>,
429    /// CSS selector of the widget container.
430    pub widget_selector:         Option<String>,
431    pub widget_rect:             Option<WidgetRectJson>,
432    /// True when the widget element is actually in the DOM.
433    /// False when only the runtime is loaded (Ahrefs-style lazy mount).
434    pub widget_rendered:         bool,
435    /// Field to write a solved token into (via `inject_captcha_token`).
436    pub response_field_selector: Option<String>,
437    /// Token already present — skip solving when set.
438    pub existing_token:          Option<String>,
439    /// Turnstile action / cdata attributes (pass through to solver).
440    pub action:                  Option<String>,
441    pub cdata:                   Option<String>,
442    /// Current document URL — required by most solver APIs.
443    pub page_url:                String,
444}
445
446pub async fn capture_captcha_tool(
447    server: &VoidCrawlServer,
448    args: SessionIdArgs,
449) -> Result<CaptureCaptchaResult, ErrorData> {
450    let handle = lookup(server, &args.session_id).await?;
451    let page = handle.page.lock().await;
452    let info: Option<CaptchaInfo> = capture_captcha(&page).await.map_err(map_err)?;
453    Ok(match info {
454        None => CaptureCaptchaResult {
455            kind:                    None,
456            sitekey:                 None,
457            widget_selector:         None,
458            widget_rect:             None,
459            widget_rendered:         false,
460            response_field_selector: None,
461            existing_token:          None,
462            action:                  None,
463            cdata:                   None,
464            page_url:                String::new(),
465        },
466        Some(i) => CaptureCaptchaResult {
467            kind:                    Some(i.kind.as_str().to_string()),
468            sitekey:                 i.sitekey,
469            widget_selector:         i.widget_selector,
470            widget_rect:             i.widget_rect.map(|r| WidgetRectJson {
471                x:      r.x,
472                y:      r.y,
473                width:  r.width,
474                height: r.height,
475            }),
476            widget_rendered:         i.widget_rendered,
477            response_field_selector: i.response_field_selector,
478            existing_token:          i.existing_token,
479            action:                  i.action,
480            cdata:                   i.cdata,
481            page_url:                i.page_url,
482        },
483    })
484}
485
486// ── Inject captcha token ────────────────────────────────────────────────
487
488#[derive(Debug, Deserialize, JsonSchema, Default)]
489pub struct InjectCaptchaTokenArgs {
490    pub session_id: String,
491    /// Token returned by your solver (e.g. 2Captcha's `gRecaptchaResponse`).
492    pub token:      String,
493    /// Kind tag. Must match the captcha on the page: one of
494    /// "turnstile", "recaptcha", "hcaptcha". Defaults to whatever
495    /// `capture_captcha` currently detects.
496    #[serde(default)]
497    pub kind:       Option<String>,
498}
499
500pub async fn inject_captcha_token_tool(
501    server: &VoidCrawlServer,
502    args: InjectCaptchaTokenArgs,
503) -> Result<OkResult, ErrorData> {
504    let handle = lookup(server, &args.session_id).await?;
505    let page = handle.page.lock().await;
506    let kind = match args.kind.as_deref() {
507        Some("turnstile") => CaptchaKind::Turnstile,
508        Some("recaptcha") => CaptchaKind::Recaptcha,
509        Some("hcaptcha") => CaptchaKind::Hcaptcha,
510        Some(other) => {
511            return Err(ErrorData::invalid_params(
512                format!(
513                    "unknown captcha kind {other:?} — expected 'turnstile', 'recaptcha', or 'hcaptcha'"
514                ),
515                None,
516            ));
517        }
518        None => {
519            // Auto-detect from the page.
520            let info = capture_captcha(&page).await.map_err(map_err)?;
521            info.map(|i| i.kind).ok_or_else(|| {
522                ErrorData::invalid_params(
523                    String::from("no captcha detected on page — pass `kind` explicitly"),
524                    None,
525                )
526            })?
527        }
528    };
529    inject_captcha_token(&page, kind, &args.token).await.map_err(map_err)?;
530    Ok(OkResult { ok: true })
531}
532
533// ── Solve captcha ───────────────────────────────────────────────────────
534
535#[derive(Debug, Deserialize, JsonSchema, Default)]
536pub struct SolveCaptchaArgs {
537    pub session_id:        String,
538    /// How long to wait (seconds) for the response token to appear after
539    /// clicking the widget. Default 20.
540    #[serde(default)]
541    pub wait_secs:         Option<u64>,
542    /// Click offset inside the widget's bounding rect from the left edge,
543    /// in CSS pixels. Default 28 — matches the checkbox position for
544    /// Turnstile / reCAPTCHA-v2 / hCaptcha anchor iframes. Override only
545    /// when a site customises widget size.
546    #[serde(default)]
547    pub checkbox_offset_x: Option<f64>,
548}
549
550#[derive(Debug, Serialize, JsonSchema)]
551pub struct SolveCaptchaResult {
552    /// Detected captcha kind (same tags as detect_captcha).
553    pub kind:    Option<String>,
554    /// Click coordinates dispatched in CSS pixels, if a widget rect was
555    /// found.
556    pub clicked: Option<(f64, f64)>,
557    /// Response token value, if one was observed within wait_secs.
558    /// Turnstile: `input[name=cf-turnstile-response]`.
559    /// reCAPTCHA: `#g-recaptcha-response`.
560    /// hCaptcha: `textarea[name=h-captcha-response]`.
561    pub token:   Option<String>,
562    /// True when a token was obtained (widget solved) or when the
563    /// detector no longer reports a captcha (page passed the wall).
564    pub solved:  bool,
565}
566
567pub async fn solve_captcha(
568    server: &VoidCrawlServer,
569    args: SolveCaptchaArgs,
570) -> Result<SolveCaptchaResult, ErrorData> {
571    let handle = lookup(server, &args.session_id).await?;
572    let page = handle.page.lock().await;
573
574    // 1. Identify what's on the page.
575    let kind = detect_captcha(&page).await.map_err(map_err)?;
576    let Some(kind) = kind else {
577        return Ok(SolveCaptchaResult {
578            kind:    None,
579            clicked: None,
580            token:   None,
581            solved:  true,
582        });
583    };
584    let kind_tag = kind.as_str().to_string();
585
586    // 2. Locate the widget's bounding rect. We try candidate selectors specific to
587    //    the detected kind, then fall back to generic iframe queries. Returns {x,
588    //    y, w, h} of the widget's *on-screen* box in CSS pixels, already offset by
589    //    any enclosing iframe origins.
590    const RECT_JS: &str = r#"
591        (function(kind) {
592            function rectOf(el) {
593                if (!el) return null;
594                const r = el.getBoundingClientRect();
595                if (r.width < 4 || r.height < 4) return null;
596                return { x: r.left, y: r.top, w: r.width, h: r.height };
597            }
598            const SELS = {
599                turnstile: [
600                    '.cf-turnstile iframe',
601                    'iframe[src*="challenges.cloudflare.com/turnstile"]',
602                    '.cf-turnstile',
603                ],
604                recaptcha: [
605                    'iframe[src*="recaptcha/api2/anchor"]',
606                    'iframe[src*="google.com/recaptcha"]',
607                    '.g-recaptcha',
608                ],
609                hcaptcha: [
610                    'iframe[src*="hcaptcha.com"][data-hcaptcha-widget-id]',
611                    'iframe[src*="hcaptcha.com"]',
612                    '.h-captcha',
613                ],
614            };
615            const list = SELS[kind] || [];
616            for (const sel of list) {
617                const el = document.querySelector(sel);
618                const r = rectOf(el);
619                if (r) return r;
620            }
621            return null;
622        })(arguments_kind_placeholder)
623    "#;
624    // The evaluate_js API takes a bare expression; inject the literal.
625    let rect_expr = RECT_JS.replace("arguments_kind_placeholder", &format!("{kind_tag:?}"));
626    let rect_val = page.evaluate_js(&rect_expr).await.map_err(map_err)?;
627
628    let Some(rect) = rect_val.as_object() else {
629        return Ok(SolveCaptchaResult {
630            kind:    Some(kind_tag),
631            clicked: None,
632            token:   None,
633            solved:  false,
634        });
635    };
636    let rx = rect.get("x").and_then(Value::as_f64).unwrap_or(0.0);
637    let ry = rect.get("y").and_then(Value::as_f64).unwrap_or(0.0);
638    let rh = rect.get("h").and_then(Value::as_f64).unwrap_or(0.0);
639
640    // 3. Compute click point — the standard checkbox sits ~28px from the iframe's
641    //    left edge, vertically centred. Small jitter keeps the event looking less
642    //    mechanical.
643    let offset_x = args.checkbox_offset_x.unwrap_or(28.0);
644    let jitter_x: f64 = (rx.fract() * 100.0) % 3.0 - 1.5; // deterministic ±1.5px
645    let jitter_y: f64 = (ry.fract() * 100.0) % 3.0 - 1.5;
646    let cx = rx + offset_x + jitter_x;
647    let cy = ry + rh / 2.0 + jitter_y;
648
649    // 4. Move, press, release — distinct MouseMoved first gives the widget's JS a
650    //    chance to observe a realistic pointer track.
651    page.dispatch_mouse_event(
652        void_crawl_core::DispatchMouseEventType::MouseMoved,
653        cx,
654        cy,
655        None,
656        None,
657        None,
658        None,
659        None,
660    )
661    .await
662    .map_err(map_err)?;
663    sleep(Duration::from_millis(60)).await;
664    page.dispatch_mouse_event(
665        DispatchMouseEventType::MousePressed,
666        cx,
667        cy,
668        Some(MouseButton::Left),
669        Some(1),
670        None,
671        None,
672        None,
673    )
674    .await
675    .map_err(map_err)?;
676    sleep(Duration::from_millis(50)).await;
677    page.dispatch_mouse_event(
678        DispatchMouseEventType::MouseReleased,
679        cx,
680        cy,
681        Some(MouseButton::Left),
682        Some(1),
683        None,
684        None,
685        None,
686    )
687    .await
688    .map_err(map_err)?;
689
690    // 5. Poll for the response token. Each family writes its solved token into a
691    //    known hidden input/textarea — presence of a non-empty value is the
692    //    canonical "solved" signal.
693    const TOKEN_JS: &str = r#"
694        (function() {
695            const q = (s) => { const el = document.querySelector(s); return el ? (el.value || el.textContent || '') : ''; };
696            const t = q('input[name="cf-turnstile-response"]') || q('textarea[name="cf-turnstile-response"]');
697            if (t) return t;
698            const r = q('#g-recaptcha-response') || q('textarea[name="g-recaptcha-response"]');
699            if (r) return r;
700            const h = q('textarea[name="h-captcha-response"]') || q('[name="h-captcha-response"]');
701            if (h) return h;
702            return '';
703        })()
704    "#;
705    let wait_for = Duration::from_secs(args.wait_secs.unwrap_or(20));
706    let deadline = Instant::now() + wait_for;
707    let mut token: Option<String> = None;
708    let mut solved = false;
709    while Instant::now() < deadline {
710        let v = page.evaluate_js(TOKEN_JS).await.map_err(map_err)?;
711        if let Some(s) = v.as_str()
712            && !s.is_empty()
713        {
714            token = Some(s.to_string());
715            solved = true;
716            break;
717        }
718        // Also accept: detector no longer sees a captcha (page passed
719        // the interstitial entirely, e.g. Cloudflare managed challenge).
720        if detect_captcha(&page).await.map_err(map_err)?.is_none() {
721            solved = true;
722            break;
723        }
724        sleep(Duration::from_millis(500)).await;
725    }
726
727    Ok(SolveCaptchaResult { kind: Some(kind_tag), clicked: Some((cx, cy)), token, solved })
728}
729
730// ── Teleport (geolocation / timezone / locale override) ─────────────────
731
732#[derive(Debug, Deserialize, JsonSchema, Default)]
733pub struct TeleportArgs {
734    pub session_id: String,
735    /// Latitude in decimal degrees (e.g. 30.2672 for Austin, TX).
736    pub latitude:   f64,
737    /// Longitude in decimal degrees (e.g. -97.7431 for Austin, TX).
738    pub longitude:  f64,
739    /// IANA timezone matching the coordinates (e.g. "America/Chicago").
740    /// Omit to leave the session's timezone unchanged.
741    #[serde(default)]
742    pub timezone:   Option<String>,
743    /// Accept-Language / locale to match (e.g. "en-US"). Omit to leave it.
744    #[serde(default)]
745    pub locale:     Option<String>,
746    /// GPS accuracy in meters reported to `navigator.geolocation`. Default 50.
747    #[serde(default)]
748    pub accuracy:   Option<f64>,
749}
750
751/// Override the session's geolocation (and optionally timezone + locale) so
752/// `navigator.geolocation` and location-aware sites resolve to the given
753/// coordinates — "teleport" the browser. The geolocation permission is granted
754/// automatically. Apply AFTER `session_open` and BEFORE navigating; the
755/// override persists across navigations on the session.
756pub async fn teleport(server: &VoidCrawlServer, args: TeleportArgs) -> Result<OkResult, ErrorData> {
757    let handle = lookup(server, &args.session_id).await?;
758    let page = handle.page.lock().await;
759    page.set_geolocation(args.latitude, args.longitude, args.accuracy).await.map_err(map_err)?;
760    if let Some(tz) = args.timezone.as_deref() {
761        page.set_timezone(tz).await.map_err(map_err)?;
762    }
763    if let Some(loc) = args.locale.as_deref() {
764        page.set_locale(loc).await.map_err(map_err)?;
765    }
766    Ok(OkResult { ok: true })
767}
768
769// ── Helper ──────────────────────────────────────────────────────────────
770
771async fn lookup(server: &VoidCrawlServer, id: &str) -> Result<Arc<DedicatedSession>, ErrorData> {
772    server
773        .state()
774        .sessions
775        .get(id)
776        .await
777        .ok_or_else(|| ErrorData::invalid_params(format!("unknown session_id: {id}"), None))
778}