Skip to main content

voidcrawl_mcp/tools/
session.rs

1//! Stateful session tools. Each `session_open` launches a dedicated
2//! headless `BrowserSession` with its own temporary profile; callers
3//! hold the returned `session_id` across tool calls until
4//! `session_close`.
5
6use std::{env, sync::Arc, time::Duration};
7
8use rmcp::ErrorData;
9use schemars::JsonSchema;
10use serde::{Deserialize, Serialize};
11use tokio::sync::Mutex;
12use uuid::Uuid;
13use void_crawl_core::{AntibotVerdict, BrowserSession, VoidCrawlError};
14
15use crate::{
16    errors::map_err,
17    server::VoidCrawlServer,
18    sessions::DedicatedSession,
19    tools::{fetch::AntibotInfo, wait},
20};
21
22pub const DEFAULT_TIMEOUT_SECS: u64 = 30;
23
24#[derive(Debug, Deserialize, JsonSchema, Default)]
25pub struct SessionOpenArgs {
26    /// Run headful (visible) instead of headless. Default is headless.
27    /// Set this to true if you want to log into a site manually in the
28    /// spawned Chrome window (pair with `user_data_dir` to persist).
29    #[serde(default)]
30    pub headful:       bool,
31    /// Optional proxy URL (e.g. "http://user:pass@host:port").
32    #[serde(default)]
33    pub proxy:         Option<String>,
34    /// Persistent Chrome profile directory. Omit for an ephemeral,
35    /// cookieless profile. Provide a path (e.g.
36    /// `~/.config/voidcrawl-linkedin`) to mount a profile across
37    /// sessions — log in once with `headful=true`, then subsequent
38    /// sessions reuse the cookie. Pick a path DEDICATED to voidcrawl;
39    /// Chrome locks a profile while running, so pointing at your
40    /// daily-driver profile while normal Chrome is open will fail.
41    #[serde(default)]
42    pub user_data_dir: Option<String>,
43}
44
45#[derive(Debug, Serialize, JsonSchema)]
46pub struct SessionOpenResult {
47    pub session_id: String,
48}
49
50#[derive(Debug, Deserialize, JsonSchema, Default)]
51pub struct SessionNavigateArgs {
52    pub session_id:   String,
53    pub url:          String,
54    /// "networkidle" (default) or "selector:<css>". Event-driven.
55    #[serde(default)]
56    pub wait_for:     Option<String>,
57    #[serde(default)]
58    pub timeout_secs: Option<u64>,
59}
60
61#[derive(Debug, Serialize, JsonSchema)]
62pub struct SessionNavigateResult {
63    pub url:         String,
64    pub status_code: Option<u16>,
65    pub redirected:  bool,
66    /// Anti-bot / CDN vendor fingerprint of the navigated response, or `null`
67    /// when no vendor was detected. See [`crate::tools::fetch::AntibotInfo`].
68    pub antibot:     Option<AntibotInfo>,
69}
70
71#[derive(Debug, Deserialize, JsonSchema, Default)]
72pub struct SessionIdArgs {
73    pub session_id: String,
74}
75
76#[derive(Debug, Serialize, JsonSchema)]
77pub struct SessionContentResult {
78    pub url:   Option<String>,
79    pub title: Option<String>,
80    pub html:  String,
81}
82
83#[derive(Debug, Serialize, JsonSchema)]
84pub struct SessionCloseResult {
85    pub closed: bool,
86}
87
88pub async fn open(
89    server: &VoidCrawlServer,
90    args: SessionOpenArgs,
91) -> Result<SessionOpenResult, ErrorData> {
92    let mut builder = BrowserSession::builder();
93    builder = if args.headful { builder.headful() } else { builder.headless() };
94    if let Some(proxy) = args.proxy {
95        builder = builder.proxy(proxy);
96    }
97    if let Some(path) = args.user_data_dir {
98        builder = builder.user_data_dir(expand_tilde(&path));
99    }
100    let session = builder.launch().await.map_err(map_err)?;
101    let page = session.new_blank_page().await.map_err(map_err)?;
102    let id = Uuid::new_v4().to_string();
103    let handle = Arc::new(DedicatedSession {
104        session:          Arc::new(session),
105        page:             Mutex::new(page),
106        pending_download: Mutex::new(None),
107    });
108    server.state().sessions.insert(id.clone(), handle).await;
109    Ok(SessionOpenResult { session_id: id })
110}
111
112pub async fn navigate(
113    server: &VoidCrawlServer,
114    args: SessionNavigateArgs,
115) -> Result<SessionNavigateResult, ErrorData> {
116    let handle = lookup(server, &args.session_id).await?;
117    let page = handle.page.lock().await;
118    let timeout = Duration::from_secs(args.timeout_secs.unwrap_or(DEFAULT_TIMEOUT_SECS));
119    let resp = page.goto_and_wait_for_idle(&args.url, timeout).await.map_err(map_err)?;
120    wait::apply_post_navigate(&page, args.wait_for.as_deref(), timeout).await.map_err(map_err)?;
121    let antibot = resp.antibot.filter(AntibotVerdict::detected).map(AntibotInfo::from);
122    Ok(SessionNavigateResult {
123        url: resp.url,
124        status_code: resp.status_code,
125        redirected: resp.redirected,
126        antibot,
127    })
128}
129
130pub async fn content(
131    server: &VoidCrawlServer,
132    args: SessionIdArgs,
133) -> Result<SessionContentResult, ErrorData> {
134    let handle = lookup(server, &args.session_id).await?;
135    let page = handle.page.lock().await;
136    let html = page.content().await.map_err(map_err)?;
137    let title = page.title().await.ok().flatten();
138    let url = page.url().await.ok().flatten();
139    Ok(SessionContentResult { url, title, html })
140}
141
142pub async fn close(
143    server: &VoidCrawlServer,
144    args: SessionIdArgs,
145) -> Result<SessionCloseResult, ErrorData> {
146    let Some(handle) = server.state().sessions.remove(&args.session_id).await else {
147        return Ok(SessionCloseResult { closed: false });
148    };
149    close_handle(handle).await.map_err(map_err)?;
150    Ok(SessionCloseResult { closed: true })
151}
152
153async fn lookup(server: &VoidCrawlServer, id: &str) -> Result<Arc<DedicatedSession>, ErrorData> {
154    server
155        .state()
156        .sessions
157        .get(id)
158        .await
159        .ok_or_else(|| ErrorData::invalid_params(format!("unknown session_id: {id}"), None))
160}
161
162/// Shut down the browser backing a session.
163pub async fn close_handle(handle: Arc<DedicatedSession>) -> Result<(), VoidCrawlError> {
164    handle.session.close().await
165}
166
167/// Expand a leading `~/` or bare `~` using the `HOME` env var. Returns
168/// the input unchanged if `~` isn't leading or if `HOME` is unset —
169/// callers pass absolute paths, so either behaviour is a no-op in the
170/// common case.
171fn expand_tilde(path: &str) -> String {
172    let Some(rest) = path.strip_prefix('~') else { return path.to_owned() };
173    let Ok(home) = env::var("HOME") else { return path.to_owned() };
174    if rest.is_empty() {
175        home
176    } else if let Some(tail) = rest.strip_prefix('/') {
177        format!("{home}/{tail}")
178    } else {
179        path.to_owned()
180    }
181}