Skip to main content

voidcrawl_mcp/tools/
session.rs

1//! Stateful session tools. Each `session_open` launches a dedicated
2//! headless `BrowserSession` with its own temporary profile; callers
3//! hold the returned `session_id` across tool calls until
4//! `session_close`.
5
6use std::{env, sync::Arc, time::Duration};
7
8use rmcp::ErrorData;
9use schemars::JsonSchema;
10use serde::{Deserialize, Serialize};
11use tokio::sync::Mutex;
12use uuid::Uuid;
13use void_crawl_core::{BrowserSession, VoidCrawlError};
14
15use crate::{errors::map_err, server::VoidCrawlServer, sessions::DedicatedSession, tools::wait};
16
17pub const DEFAULT_TIMEOUT_SECS: u64 = 30;
18
19#[derive(Debug, Deserialize, JsonSchema, Default)]
20pub struct SessionOpenArgs {
21    /// Run headful (visible) instead of headless. Default is headless.
22    /// Set this to true if you want to log into a site manually in the
23    /// spawned Chrome window (pair with `user_data_dir` to persist).
24    #[serde(default)]
25    pub headful:       bool,
26    /// Optional proxy URL (e.g. "http://user:pass@host:port").
27    #[serde(default)]
28    pub proxy:         Option<String>,
29    /// Persistent Chrome profile directory. Omit for an ephemeral,
30    /// cookieless profile. Provide a path (e.g.
31    /// `~/.config/voidcrawl-linkedin`) to mount a profile across
32    /// sessions — log in once with `headful=true`, then subsequent
33    /// sessions reuse the cookie. Pick a path DEDICATED to voidcrawl;
34    /// Chrome locks a profile while running, so pointing at your
35    /// daily-driver profile while normal Chrome is open will fail.
36    #[serde(default)]
37    pub user_data_dir: Option<String>,
38}
39
40#[derive(Debug, Serialize, JsonSchema)]
41pub struct SessionOpenResult {
42    pub session_id: String,
43}
44
45#[derive(Debug, Deserialize, JsonSchema, Default)]
46pub struct SessionNavigateArgs {
47    pub session_id:   String,
48    pub url:          String,
49    /// "networkidle" (default) or "selector:<css>". Event-driven.
50    #[serde(default)]
51    pub wait_for:     Option<String>,
52    #[serde(default)]
53    pub timeout_secs: Option<u64>,
54}
55
56#[derive(Debug, Serialize, JsonSchema)]
57pub struct SessionNavigateResult {
58    pub url:         String,
59    pub status_code: Option<u16>,
60    pub redirected:  bool,
61}
62
63#[derive(Debug, Deserialize, JsonSchema, Default)]
64pub struct SessionIdArgs {
65    pub session_id: String,
66}
67
68#[derive(Debug, Serialize, JsonSchema)]
69pub struct SessionContentResult {
70    pub url:   Option<String>,
71    pub title: Option<String>,
72    pub html:  String,
73}
74
75#[derive(Debug, Serialize, JsonSchema)]
76pub struct SessionCloseResult {
77    pub closed: bool,
78}
79
80pub async fn open(
81    server: &VoidCrawlServer,
82    args: SessionOpenArgs,
83) -> Result<SessionOpenResult, ErrorData> {
84    let mut builder = BrowserSession::builder();
85    builder = if args.headful { builder.headful() } else { builder.headless() };
86    if let Some(proxy) = args.proxy {
87        builder = builder.proxy(proxy);
88    }
89    if let Some(path) = args.user_data_dir {
90        builder = builder.user_data_dir(expand_tilde(&path));
91    }
92    let session = builder.launch().await.map_err(map_err)?;
93    let page = session.new_blank_page().await.map_err(map_err)?;
94    let id = Uuid::new_v4().to_string();
95    let handle = Arc::new(DedicatedSession {
96        session:          Arc::new(session),
97        page:             Mutex::new(page),
98        pending_download: Mutex::new(None),
99    });
100    server.state().sessions.insert(id.clone(), handle).await;
101    Ok(SessionOpenResult { session_id: id })
102}
103
104pub async fn navigate(
105    server: &VoidCrawlServer,
106    args: SessionNavigateArgs,
107) -> Result<SessionNavigateResult, ErrorData> {
108    let handle = lookup(server, &args.session_id).await?;
109    let page = handle.page.lock().await;
110    let timeout = Duration::from_secs(args.timeout_secs.unwrap_or(DEFAULT_TIMEOUT_SECS));
111    let resp = page.goto_and_wait_for_idle(&args.url, timeout).await.map_err(map_err)?;
112    wait::apply_post_navigate(&page, args.wait_for.as_deref(), timeout).await.map_err(map_err)?;
113    Ok(SessionNavigateResult {
114        url:         resp.url,
115        status_code: resp.status_code,
116        redirected:  resp.redirected,
117    })
118}
119
120pub async fn content(
121    server: &VoidCrawlServer,
122    args: SessionIdArgs,
123) -> Result<SessionContentResult, ErrorData> {
124    let handle = lookup(server, &args.session_id).await?;
125    let page = handle.page.lock().await;
126    let html = page.content().await.map_err(map_err)?;
127    let title = page.title().await.ok().flatten();
128    let url = page.url().await.ok().flatten();
129    Ok(SessionContentResult { url, title, html })
130}
131
132pub async fn close(
133    server: &VoidCrawlServer,
134    args: SessionIdArgs,
135) -> Result<SessionCloseResult, ErrorData> {
136    let Some(handle) = server.state().sessions.remove(&args.session_id).await else {
137        return Ok(SessionCloseResult { closed: false });
138    };
139    close_handle(handle).await.map_err(map_err)?;
140    Ok(SessionCloseResult { closed: true })
141}
142
143async fn lookup(server: &VoidCrawlServer, id: &str) -> Result<Arc<DedicatedSession>, ErrorData> {
144    server
145        .state()
146        .sessions
147        .get(id)
148        .await
149        .ok_or_else(|| ErrorData::invalid_params(format!("unknown session_id: {id}"), None))
150}
151
152/// Shut down the browser backing a session.
153pub async fn close_handle(handle: Arc<DedicatedSession>) -> Result<(), VoidCrawlError> {
154    handle.session.close().await
155}
156
157/// Expand a leading `~/` or bare `~` using the `HOME` env var. Returns
158/// the input unchanged if `~` isn't leading or if `HOME` is unset —
159/// callers pass absolute paths, so either behaviour is a no-op in the
160/// common case.
161fn expand_tilde(path: &str) -> String {
162    let Some(rest) = path.strip_prefix('~') else { return path.to_owned() };
163    let Ok(home) = env::var("HOME") else { return path.to_owned() };
164    if rest.is_empty() {
165        home
166    } else if let Some(tail) = rest.strip_prefix('/') {
167        format!("{home}/{tail}")
168    } else {
169        path.to_owned()
170    }
171}