Skip to main content

voidcrawl_mcp/
server.rs

1//! Top-level MCP service. Owns `AppState` and the `ToolRouter`.
2//!
3//! Each tool method is a thin adapter that delegates to a free
4//! function in `crate::tools::*`; the heavy lifting lives there so
5//! this file stays focused on wire-protocol concerns.
6
7use std::sync::Arc;
8
9use rmcp::{
10    ErrorData,
11    handler::server::{
12        ServerHandler,
13        router::tool::ToolRouter,
14        wrapper::{Json, Parameters},
15    },
16    model::{CallToolResult, Implementation, ProtocolVersion, ServerCapabilities, ServerInfo},
17    tool, tool_handler, tool_router,
18};
19
20use crate::{
21    errors::map_err,
22    state::AppState,
23    tools,
24    tools::{
25        actions::{
26            AxTreeArgs, AxTreeResult, CaptureCaptchaResult, ClickArgs, ClickByRoleArgs,
27            ClickVisualCoordsArgs, DetectCaptchaResult, EvalJsArgs, EvalJsInFrameArgs,
28            EvalJsResult, ExtractArgs, ExtractResult, InjectCaptchaTokenArgs, NetworkCaptureResult,
29            OkResult, SessionIdArgs as ActionSessionIdArgs, SolveCaptchaArgs, SolveCaptchaResult,
30            TeleportArgs, TitleResult, TypeTextArgs, WaitIdleArgs,
31        },
32        download::{
33            DownloadArgs, DownloadArmArgs, DownloadArmResult, DownloadResult, DownloadWaitArgs,
34        },
35        fetch::{FetchArgs, FetchManyArgs, FetchManyResult, FetchResult},
36        introspect::PoolStatus,
37        screenshot::ScreenshotArgs,
38        session::{
39            SessionCloseResult, SessionContentResult, SessionIdArgs, SessionNavigateArgs,
40            SessionNavigateResult, SessionOpenArgs, SessionOpenResult,
41        },
42    },
43};
44
45/// The MCP service struct. Cheap to `Arc`-share.
46#[derive(Debug)]
47pub struct VoidCrawlServer {
48    state:       Arc<AppState>,
49    #[allow(dead_code, reason = "read by the `#[tool_handler]` macro expansion")]
50    tool_router: ToolRouter<Self>,
51}
52
53impl VoidCrawlServer {
54    pub fn new(state: Arc<AppState>) -> Self {
55        Self { state, tool_router: Self::tool_router() }
56    }
57
58    pub fn state(&self) -> &AppState {
59        &self.state
60    }
61}
62
63#[tool_router]
64impl VoidCrawlServer {
65    #[tool(
66        name = "fetch",
67        description = "Fetch a URL with stealth headless Chrome and return HTML + metadata. \
68Use for single-shot scrapes; for bulk use fetch_many."
69    )]
70    pub async fn fetch(
71        &self,
72        Parameters(args): Parameters<FetchArgs>,
73    ) -> Result<Json<FetchResult>, ErrorData> {
74        tools::fetch::run(self, args).await.map(Json).map_err(map_err)
75    }
76
77    #[tool(
78        name = "fetch_many",
79        description = "Fetch many URLs in parallel over the shared browser pool. Returns \
80one entry per request in input order; per-request errors do not abort the batch. \
81Each result carries `waited_ms` (time queued for a tab), and the batch carries a \
82`pool` summary {max_tabs, submitted, queued, max_waited_ms, note} — if `queued > 0` \
83you oversubscribed the pool; cap batches at `max_tabs` (see pool_status) for full parallelism."
84    )]
85    pub async fn fetch_many(
86        &self,
87        Parameters(args): Parameters<FetchManyArgs>,
88    ) -> Result<Json<FetchManyResult>, ErrorData> {
89        Ok(Json(tools::fetch::run_many(self, args).await))
90    }
91
92    #[tool(
93        name = "download",
94        description = "Download a file (PDF, archive, image, …) through stealth Chrome and scan \
95it with a built-in Rust antivirus gate (magic-byte type check + yara-x signatures) BEFORE it is \
96trusted. The file is fetched into a quarantine dir and only moved into `output_dir` if it passes \
97every check; a flagged file is deleted and the result has `ok=false` with a `reason`. Returns \
98{ok, verdict, path?, reason?, detected_mime, size}. Use this instead of `fetch` when you need the \
99actual bytes of a downloadable resource rather than rendered HTML. OPT-IN: disabled unless the \
100server is run with VOIDCRAWL_ALLOW_DOWNLOADS=1. NOTE: a `clean` verdict means it passed the \
101size + content-type + bundled-signature checks, not that it is guaranteed malware-free."
102    )]
103    pub async fn download(
104        &self,
105        Parameters(args): Parameters<DownloadArgs>,
106    ) -> Result<Json<DownloadResult>, ErrorData> {
107        tools::download::run(self, args).await.map(Json).map_err(map_err)
108    }
109
110    #[tool(
111        name = "download_arm",
112        description = "Arm an open session to capture the file produced by the NEXT \
113download-triggering action — for downloads started by clicking a button (e.g. Google Drive's \
114'Download'), where there's no stable URL to pass to `download`. Flow: session_open → \
115session_navigate → download_arm → click_by_role(\"button\",\"Download\") (+ \"Download anyway\" if \
116an interstitial appears) → download_wait. OPT-IN: needs VOIDCRAWL_ALLOW_DOWNLOADS=1."
117    )]
118    pub async fn download_arm(
119        &self,
120        Parameters(args): Parameters<DownloadArmArgs>,
121    ) -> Result<Json<DownloadArmResult>, ErrorData> {
122        tools::download::arm(self, args).await.map(Json).map_err(map_err)
123    }
124
125    #[tool(
126        name = "download_wait",
127        description = "Wait for the download armed by `download_arm` to land, scan it with the \
128antivirus gate, and (if clean) move it into the output dir. Returns {ok, verdict, path?, reason?, \
129detected_mime, size}. Call after the click(s) that trigger the download. NOTE: a `clean` verdict \
130means it passed the size + bundled-signature checks; the content-type disguise check does NOT run \
131on action downloads (no Content-Type is observed), so `clean` is not a malware-free guarantee."
132    )]
133    pub async fn download_wait(
134        &self,
135        Parameters(args): Parameters<DownloadWaitArgs>,
136    ) -> Result<Json<DownloadResult>, ErrorData> {
137        tools::download::wait(self, args).await.map(Json).map_err(map_err)
138    }
139
140    #[tool(
141        name = "screenshot",
142        description = "Load a URL in stealth headless Chrome and return a full-page PNG."
143    )]
144    pub async fn screenshot(
145        &self,
146        Parameters(args): Parameters<ScreenshotArgs>,
147    ) -> Result<CallToolResult, ErrorData> {
148        tools::screenshot::run(self, args).await
149    }
150
151    #[tool(
152        name = "session_open",
153        description = "Open a new stateful browser session with a dedicated Chrome instance. \
154Returns a session_id used by session_navigate / session_content / session_close. \
155Pass `user_data_dir` to mount a persistent profile (e.g. one already logged into LinkedIn); \
156omit it for an ephemeral cookieless profile. Set `headful=true` to bring up a visible window \
157(useful for a one-time manual login into the persistent profile)."
158    )]
159    pub async fn session_open(
160        &self,
161        Parameters(args): Parameters<SessionOpenArgs>,
162    ) -> Result<Json<SessionOpenResult>, ErrorData> {
163        tools::session::open(self, args).await.map(Json)
164    }
165
166    #[tool(
167        name = "session_navigate",
168        description = "Navigate the given session to a URL and wait for it to settle. \
169wait_for accepts 'networkidle' (default) or 'selector:<css>' (event-driven, no polling)."
170    )]
171    pub async fn session_navigate(
172        &self,
173        Parameters(args): Parameters<SessionNavigateArgs>,
174    ) -> Result<Json<SessionNavigateResult>, ErrorData> {
175        tools::session::navigate(self, args).await.map(Json)
176    }
177
178    #[tool(
179        name = "session_content",
180        description = "Return the current HTML, title, and URL of the given session's page."
181    )]
182    pub async fn session_content(
183        &self,
184        Parameters(args): Parameters<SessionIdArgs>,
185    ) -> Result<Json<SessionContentResult>, ErrorData> {
186        tools::session::content(self, args).await.map(Json)
187    }
188
189    #[tool(
190        name = "session_close",
191        description = "Close the given session: shut down its Chrome instance and free resources. \
192Always call this when you're done — otherwise the browser stays alive until the server exits."
193    )]
194    pub async fn session_close(
195        &self,
196        Parameters(args): Parameters<SessionIdArgs>,
197    ) -> Result<Json<SessionCloseResult>, ErrorData> {
198        tools::session::close(self, args).await.map(Json)
199    }
200
201    #[tool(
202        name = "pool_status",
203        description = "Report the browser pool configuration plus a live snapshot of \
204concurrency: `max_tabs`, `available` (free slots right now), `in_flight`, and \
205`sessions_open`. Read `available` before a big fan-out to size the batch."
206    )]
207    pub async fn pool_status(&self) -> Result<Json<PoolStatus>, ErrorData> {
208        tools::introspect::pool_status(self).await.map(Json).map_err(map_err)
209    }
210
211    #[tool(
212        name = "click",
213        description = "Click the first element matching a CSS selector in an open session."
214    )]
215    pub async fn click(
216        &self,
217        Parameters(args): Parameters<ClickArgs>,
218    ) -> Result<Json<OkResult>, ErrorData> {
219        tools::actions::click(self, args).await.map(Json)
220    }
221
222    #[tool(
223        name = "teleport",
224        description = "Override the session's geolocation (and optionally timezone + locale) so \
225navigator.geolocation and location-aware sites resolve to the given lat/lon — 'teleport' the \
226browser. The geolocation permission is granted automatically. Call after session_open and \
227BEFORE navigating; the override persists across navigations. For Google Maps 'near me' queries: \
228use a FRESH session per location, and navigate to the search twice (prime + read) — Maps resolves \
229location on first load and applies it on the next request."
230    )]
231    pub async fn teleport(
232        &self,
233        Parameters(args): Parameters<TeleportArgs>,
234    ) -> Result<Json<OkResult>, ErrorData> {
235        tools::actions::teleport(self, args).await.map(Json)
236    }
237
238    #[tool(
239        name = "click_visual_coords",
240        description = "Click at pixel coordinates (x, y) in CSS pixels. Use when selector-based \
241clicks fail silently (React forms that ignore dispatchEvent clicks). Coords are pre-DPR: \
242divide screenshot pixels by devicePixelRatio on HiDPI."
243    )]
244    pub async fn click_visual_coords(
245        &self,
246        Parameters(args): Parameters<ClickVisualCoordsArgs>,
247    ) -> Result<Json<OkResult>, ErrorData> {
248        tools::actions::click_visual_coords(self, args).await.map(Json)
249    }
250
251    #[tool(
252        name = "type_text",
253        description = "Type text into an input. With `selector`, focuses + types. Without, \
254dispatches keys to whatever currently has focus (pair with click_visual_coords first)."
255    )]
256    pub async fn type_text(
257        &self,
258        Parameters(args): Parameters<TypeTextArgs>,
259    ) -> Result<Json<OkResult>, ErrorData> {
260        tools::actions::type_text(self, args).await.map(Json)
261    }
262
263    #[tool(
264        name = "eval_js",
265        description = "Evaluate a JS expression in the session's page. Returns the value as JSON."
266    )]
267    pub async fn eval_js(
268        &self,
269        Parameters(args): Parameters<EvalJsArgs>,
270    ) -> Result<Json<EvalJsResult>, ErrorData> {
271        tools::actions::eval_js(self, args).await.map(Json)
272    }
273
274    #[tool(
275        name = "eval_js_in_frame",
276        description = "Evaluate a JS expression inside a specific (possibly cross-origin) iframe, \
277                       selected by a substring of its URL. The expression runs in that frame's own \
278                       execution context (`document` is the frame's document) — the way to read or \
279                       drive an iframe whose `contentDocument` is null from the parent. Returns the \
280                       value as JSON."
281    )]
282    pub async fn eval_js_in_frame(
283        &self,
284        Parameters(args): Parameters<EvalJsInFrameArgs>,
285    ) -> Result<Json<EvalJsResult>, ErrorData> {
286        tools::actions::eval_js_in_frame(self, args).await.map(Json)
287    }
288
289    #[tool(name = "title", description = "Return the current document title of the session.")]
290    pub async fn title(
291        &self,
292        Parameters(args): Parameters<ActionSessionIdArgs>,
293    ) -> Result<Json<TitleResult>, ErrorData> {
294        tools::actions::title(self, args).await.map(Json)
295    }
296
297    #[tool(
298        name = "extract",
299        description = "Run document.querySelectorAll(selector) and return each element's text content."
300    )]
301    pub async fn extract(
302        &self,
303        Parameters(args): Parameters<ExtractArgs>,
304    ) -> Result<Json<ExtractResult>, ErrorData> {
305        tools::actions::extract(self, args).await.map(Json)
306    }
307
308    #[tool(
309        name = "session_ax_tree",
310        description = "Return the page's accessibility (AX) tree — the semantic view assistive \
311tech sees, with implicit roles resolved, accessible names computed, and hidden nodes pruned. \
312Default `mode=compact` gives a pruned, indented role/name outline for reading; `mode=raw` gives \
313full CDP nodes. `named_count` vs `node_count` signals AX richness: when low, fall back to HTML, \
314screenshot, or CSS selectors. Complements (does not replace) the DOM/visual tools."
315    )]
316    pub async fn session_ax_tree(
317        &self,
318        Parameters(args): Parameters<AxTreeArgs>,
319    ) -> Result<Json<AxTreeResult>, ErrorData> {
320        tools::actions::ax_tree(self, args).await.map(Json)
321    }
322
323    #[tool(
324        name = "click_by_role",
325        description = "Click an element by its accessibility role + accessible name (e.g. \
326role=\"button\", name=\"Load more\") instead of a CSS selector. More durable across redesigns, \
327but flakier when names are ambiguous, localized, or duplicated — pair with session_ax_tree to \
328see available roles/names, and fall back to `click` (CSS) or `click_visual_coords` when it fails."
329    )]
330    pub async fn click_by_role(
331        &self,
332        Parameters(args): Parameters<ClickByRoleArgs>,
333    ) -> Result<Json<OkResult>, ErrorData> {
334        tools::actions::click_by_role(self, args).await.map(Json)
335    }
336
337    #[tool(
338        name = "wait_for_network_idle",
339        description = "Wait for Chrome's network-idle lifecycle event. Event-driven, no polling."
340    )]
341    pub async fn wait_for_network_idle(
342        &self,
343        Parameters(args): Parameters<WaitIdleArgs>,
344    ) -> Result<Json<OkResult>, ErrorData> {
345        tools::actions::wait_for_network_idle(self, args).await.map(Json)
346    }
347
348    #[tool(
349        name = "network_capture",
350        description = "Return the Resource Timing entries (URL, initiator type, transfer size, duration) \
351observed since the session's most recent navigation. Backed by performance.getEntriesByType('resource')."
352    )]
353    pub async fn network_capture(
354        &self,
355        Parameters(args): Parameters<ActionSessionIdArgs>,
356    ) -> Result<Json<NetworkCaptureResult>, ErrorData> {
357        tools::actions::network_capture(self, args).await.map(Json)
358    }
359
360    #[tool(
361        name = "solve_captcha",
362        description = "Click the Turnstile / reCAPTCHA-v2 / hCaptcha checkbox in an open session \
363using real CDP mouse events (not JS click — widgets detect that) and wait for the response \
364token to appear. Returns the kind detected, the coordinates clicked, the token value (once \
365the widget writes it into its hidden input), and a `solved` flag. No-op (solved=true) when \
366the page has no captcha. Only handles widgets whose anchor frame is already visible — if \
367detect_captcha reports `turnstile` because the runtime loaded but no widget mounted, trigger \
368the form submit that mounts the widget first."
369    )]
370    pub async fn solve_captcha(
371        &self,
372        Parameters(args): Parameters<SolveCaptchaArgs>,
373    ) -> Result<Json<SolveCaptchaResult>, ErrorData> {
374        tools::actions::solve_captcha(self, args).await.map(Json)
375    }
376
377    #[tool(
378        name = "detect_captcha",
379        description = "Probe the DOM for captcha / bot-wall markers. Returns the kind tag \
380(recaptcha, hcaptcha, turnstile, cloudflare_challenge, datadome) or null."
381    )]
382    pub async fn detect_captcha(
383        &self,
384        Parameters(args): Parameters<ActionSessionIdArgs>,
385    ) -> Result<Json<DetectCaptchaResult>, ErrorData> {
386        tools::actions::detect_captcha_tool(self, args).await.map(Json)
387    }
388
389    #[tool(
390        name = "capture_captcha",
391        description = "Deep structured probe of a captcha challenge. Returns kind, sitekey, \
392widget rect + selector, response-field selector, existing token (if already solved), page URL, \
393and Turnstile action/cdata attrs. Use this to hand off to a third-party solver API \
394(2Captcha / CapSolver / Anti-Captcha) or a human-in-the-loop flow, then call \
395`inject_captcha_token` with the resulting token."
396    )]
397    pub async fn capture_captcha(
398        &self,
399        Parameters(args): Parameters<ActionSessionIdArgs>,
400    ) -> Result<Json<CaptureCaptchaResult>, ErrorData> {
401        tools::actions::capture_captcha_tool(self, args).await.map(Json)
402    }
403
404    #[tool(
405        name = "inject_captcha_token",
406        description = "Write a solved captcha token into the page's hidden response field and \
407fire input/change events so React-controlled forms pick it up. For Turnstile, invokes any \
408registered `data-callback` function. `kind` defaults to whatever is currently detected; pass \
409explicitly ('turnstile'/'recaptcha'/'hcaptcha') to skip re-detection."
410    )]
411    pub async fn inject_captcha_token(
412        &self,
413        Parameters(args): Parameters<InjectCaptchaTokenArgs>,
414    ) -> Result<Json<OkResult>, ErrorData> {
415        tools::actions::inject_captcha_token_tool(self, args).await.map(Json)
416    }
417}
418
419#[tool_handler]
420impl ServerHandler for VoidCrawlServer {
421    fn get_info(&self) -> ServerInfo {
422        let mut info = ServerInfo::default();
423        info.protocol_version = ProtocolVersion::default();
424        info.capabilities = ServerCapabilities::builder().enable_tools().build();
425        info.server_info = {
426            let mut imp = Implementation::default();
427            imp.name = "voidcrawl-mcp".into();
428            imp.version = env!("CARGO_PKG_VERSION").into();
429            imp
430        };
431        // Shipped to EVERY MCP client on connect (Claude, opencode, Codex,
432        // Cursor, Cline, Zed, …), so the AX-first workflow + gotchas reach
433        // hosts that have no skill-file mechanism. Keep this condensed; the
434        // full guide is .claude/skills/voidcrawl/SKILL.md.
435        info.instructions = Some(
436            "Stealthy headless Chrome over a shared, fingerprint-patched tab pool — a drop-in \
437replacement for Playwright / Chromium MCP.\n\n\
438WORKFLOW. Stateless scrape: `fetch` (one URL) or `fetch_many` (parallel; returns \
439{results:[{ok,result,error}]} in input order — per-item errors don't abort the batch, and \
440status_code is nested under each item's `result`). Stateful flows (login, pagination, clicking): \
441`session_open` → `session_navigate` → … → `session_close`. ALWAYS session_close; sessions are \
442cookie-isolated.\n\n\
443PERCEIVE → ACT → EXTRACT. To see a page, call `session_ax_tree` — a compact role/name outline, \
444far cheaper than HTML (don't dump raw HTML to reason over a page). If `named_count` is low vs \
445`node_count` the accessibility tree is thin; fall back to `screenshot`. To click: `click` (CSS \
446selector) or `click_by_role` (accessibility role + accessible name — durable across redesigns); \
447last resort `click_visual_coords` for React forms that ignore synthetic clicks. To extract data, \
448run `extract` / `eval_js` with a JS expression and return data, not markup.\n\n\
449GOTCHAS. `click_by_role` name matching is EXACT (case + whitespace) — read the exact name from \
450`session_ax_tree` first; use `nth` for duplicates. After an in-page (SPA) click, \
451`wait_for_network_idle` may run to its full timeout — pass a short `timeout_secs` or use \
452`wait_for:\"selector:<css>\"`. On a captcha error, surface it and rotate proxy/profile; don't \
453retry the same URL and don't try to solve."
454                .into(),
455        );
456        info
457    }
458}