use std::{
collections::HashMap,
sync::{Arc, LazyLock},
time::Duration,
};
use serde::{Deserialize, Serialize};
use serde_json::{Value, json};
use tokio::{
io::{AsyncBufReadExt, AsyncWriteExt, BufReader},
sync::Mutex,
};
use tracing::{debug, info};
use ulid::Ulid;
use crate::{
BrowserHandle, BrowserPool,
config::StealthLevel,
error::{BrowserError, Result},
page::WaitUntil,
};
#[derive(Debug, Deserialize)]
pub struct JsonRpcRequest {
pub jsonrpc: String,
pub method: String,
#[serde(default)]
pub params: Value,
#[serde(default)]
pub id: Value,
}
#[derive(Debug, Serialize)]
pub struct JsonRpcResponse {
jsonrpc: &'static str,
#[serde(skip_serializing_if = "Option::is_none")]
result: Option<Value>,
#[serde(skip_serializing_if = "Option::is_none")]
error: Option<JsonRpcError>,
id: Value,
}
#[derive(Debug, Serialize)]
pub struct JsonRpcError {
code: i32,
message: String,
#[serde(skip_serializing_if = "Option::is_none")]
data: Option<Value>,
}
impl JsonRpcResponse {
const fn ok(id: Value, result: Value) -> Self {
Self {
jsonrpc: "2.0",
result: Some(result),
error: None,
id,
}
}
fn err(id: Value, code: i32, message: impl Into<String>) -> Self {
Self {
jsonrpc: "2.0",
result: None,
error: Some(JsonRpcError {
code,
message: message.into(),
data: None,
}),
id,
}
}
fn method_not_found(id: Value, method: &str) -> Self {
Self::err(id, -32601, format!("Method not found: {method}"))
}
}
struct McpSession {
handle: Arc<Mutex<Option<BrowserHandle>>>,
stealth_level: StealthLevel,
tls_profile: Option<String>,
webrtc_policy: Option<String>,
cdp_fix_mode: Option<String>,
proxy: Option<String>,
current_url: Option<String>,
}
static TOOL_DEFINITIONS: LazyLock<Vec<Value>> = LazyLock::new(|| {
let mut tools = vec![
json!({
"name": "browser_acquire",
"description": "Acquire a browser from the pool and open a session. The optional parameters are stored as session metadata labels and echoed back in the response; they do not reconfigure the pool-acquired browser at runtime. Use them to annotate sessions (e.g. for `browser_verify_stealth` attribution).",
"inputSchema": {
"type": "object",
"properties": {
"stealth_level": {
"type": "string",
"enum": ["none", "basic", "advanced"],
"description": "Anti-detection intensity. Defaults to 'advanced'."
},
"tls_profile": {
"type": "string",
"description": "TLS fingerprint profile label (free-form; requires stealth feature; browser-launch-level). Examples: chrome131, firefox133, safari18, edge131."
},
"webrtc_policy": {
"type": "string",
"description": "WebRTC IP-leak policy label (free-form; requires stealth feature; browser-launch-level). Examples: allow_all, disable_non_proxied, block_all."
},
"cdp_fix_mode": {
"type": "string",
"enum": ["addBinding", "isolatedWorld", "enableDisable", "none"],
"description": "CDP Runtime.enable leak-mitigation mode."
},
"proxy": {
"type": "string",
"description": "HTTP/SOCKS proxy URL, e.g. 'http://user:pass@host:port' (browser-launch-level)."
}
},
"required": []
}
}),
json!({
"name": "browser_navigate",
"description": "Navigate to a URL within a session. Opens a new page if needed.",
"inputSchema": {
"type": "object",
"properties": {
"session_id": { "type": "string" },
"url": { "type": "string" },
"timeout_secs": { "type": "integer", "default": 30 }
},
"required": ["session_id", "url"]
}
}),
json!({
"name": "browser_eval",
"description": "Evaluate JavaScript in the current page of a session.",
"inputSchema": {
"type": "object",
"properties": {
"session_id": { "type": "string" },
"script": { "type": "string" }
},
"required": ["session_id", "script"]
}
}),
json!({
"name": "browser_screenshot",
"description": "Capture a full-page PNG screenshot. Returns base64-encoded PNG.",
"inputSchema": {
"type": "object",
"properties": {
"session_id": { "type": "string" }
},
"required": ["session_id"]
}
}),
json!({
"name": "browser_content",
"description": "Get the full HTML content of the current page.",
"inputSchema": {
"type": "object",
"properties": {
"session_id": { "type": "string" }
},
"required": ["session_id"]
}
}),
json!({
"name": "browser_release",
"description": "Release a browser session back to the pool.",
"inputSchema": {
"type": "object",
"properties": {
"session_id": { "type": "string" }
},
"required": ["session_id"]
}
}),
json!({
"name": "pool_stats",
"description": "Return current browser pool statistics.",
"inputSchema": {
"type": "object",
"properties": {},
"required": []
}
}),
];
tools.push(json!({
"name": "browser_query",
"description": "Navigate to a URL, query all elements matching a CSS selector, and return their text content or specific attributes. If `fields` is omitted each result is a plain string (the text content). If `fields` is supplied each result is an object with one key per field.",
"inputSchema": {
"type": "object",
"properties": {
"session_id": { "type": "string" },
"url": { "type": "string" },
"selector": { "type": "string", "description": "CSS selector passed to querySelectorAll." },
"fields": {
"type": "object",
"description": "Map of output field name → { \"attr\": \"attribute-name\" }. Omit `attr` to get text content for that field.",
"additionalProperties": {
"type": "object",
"properties": { "attr": { "type": "string" } }
}
},
"limit": { "type": "integer", "default": 50, "description": "Maximum number of nodes to return." },
"timeout_secs": { "type": "number", "default": 30 }
},
"required": ["session_id", "url", "selector"]
}
}));
tools.push(json!({
"name": "browser_extract",
"description": "Navigate to a URL and perform schema-driven structured extraction. Each element matching `root_selector` becomes one result object; fields within each root are resolved by their own sub-selectors relative to the root. This is the runtime equivalent of the `#[derive(Extract)]` macro.",
"inputSchema": {
"type": "object",
"properties": {
"session_id": { "type": "string" },
"url": { "type": "string" },
"root_selector": { "type": "string", "description": "CSS selector whose matches become the root of each result object." },
"schema": {
"type": "object",
"description": "Map of field name → { \"selector\": \"...\", \"attr\": \"...\", \"required\": true/false }.",
"additionalProperties": {
"type": "object",
"properties": {
"selector": { "type": "string" },
"attr": { "type": "string" },
"required": { "type": "boolean", "default": false }
},
"required": ["selector"]
}
},
"timeout_secs": { "type": "number", "default": 30 }
},
"required": ["session_id", "url", "root_selector", "schema"]
}
}));
#[cfg(feature = "similarity")]
tools.push(json!({
"name": "browser_find_similar",
"description": "Navigate to a URL and find DOM elements that are structurally similar to a reference element (identified by a CSS selector). Useful when a site has been redesigned and stored selectors no longer match. Requires the `similarity` feature.",
"inputSchema": {
"type": "object",
"properties": {
"session_id": { "type": "string" },
"url": { "type": "string" },
"reference_selector": { "type": "string", "description": "CSS selector identifying the reference node. The first match is used." },
"threshold": { "type": "number", "default": 0.7, "description": "Minimum similarity score [0.0, 1.0]." },
"max_results": { "type": "integer", "default": 10 },
"timeout_secs": { "type": "number", "default": 30 }
},
"required": ["session_id", "url", "reference_selector"]
}
}));
#[cfg(feature = "stealth")]
tools.push(json!({
"name": "browser_verify_stealth",
"description": "Navigate to a URL and run built-in stealth checks with optional transport diagnostics (JA3/JA4/HTTP3). Returns a DiagnosticReport with pass/fail results, coverage percentage, and transport mismatch details when observation fields are provided.",
"inputSchema": {
"type": "object",
"properties": {
"session_id": { "type": "string" },
"url": { "type": "string", "description": "URL to navigate to before running checks." },
"timeout_secs": { "type": "integer", "default": 15, "description": "Navigation timeout in seconds." },
"observed_ja3_hash": { "type": "string", "description": "Optional observed JA3 hash to compare against expected profile." },
"observed_ja4": { "type": "string", "description": "Optional observed JA4 fingerprint to compare against expected profile." },
"observed_http3_perk_text": { "type": "string", "description": "Optional observed HTTP/3 perk text (SETTINGS|PSEUDO_HEADERS)." },
"observed_http3_perk_hash": { "type": "string", "description": "Optional observed HTTP/3 perk hash." }
},
"required": ["session_id", "url"]
}
}));
tools
});
pub struct McpBrowserServer {
pool: Arc<BrowserPool>,
sessions: Arc<Mutex<HashMap<String, McpSession>>>,
}
struct ExtractFieldDef {
selector: String,
attr: Option<String>,
required: bool,
}
impl McpBrowserServer {
pub fn new(pool: Arc<BrowserPool>) -> Self {
Self {
pool,
sessions: Arc::new(Mutex::new(HashMap::new())),
}
}
pub async fn run(&self) -> Result<()> {
info!("MCP browser server starting (stdin/stdout mode)");
let stdin = tokio::io::stdin();
let stdout = tokio::io::stdout();
let mut reader = BufReader::new(stdin).lines();
let mut stdout = stdout;
while let Some(line) = reader.next_line().await.map_err(BrowserError::Io)? {
let line = line.trim().to_string();
if line.is_empty() {
continue;
}
debug!(?line, "MCP request");
let response = match serde_json::from_str::<Value>(&line) {
Ok(req) => {
let is_well_formed_notification = req.is_object()
&& req.get("jsonrpc").and_then(Value::as_str) == Some("2.0")
&& req.get("id").is_none()
&& req.get("method").and_then(Value::as_str).is_some();
let response = self.dispatch(&req).await;
if is_well_formed_notification {
continue;
}
response
}
Err(e) => serde_json::to_value(JsonRpcResponse::err(
Value::Null,
-32700,
format!("Parse error: {e}"),
))
.unwrap_or_else(|_| {
json!({"jsonrpc":"2.0","id":null,"error":{"code":-32603,"message":"Internal error"}})
}),
};
let mut out = serde_json::to_string(&response).unwrap_or_default();
out.push('\n');
stdout
.write_all(out.as_bytes())
.await
.map_err(BrowserError::Io)?;
stdout.flush().await.map_err(BrowserError::Io)?;
}
info!("MCP browser server stopping (stdin closed)");
Ok(())
}
pub async fn dispatch(&self, req: &Value) -> Value {
let typed: JsonRpcRequest = match serde_json::from_value(req.clone()) {
Ok(r) => r,
Err(e) => {
return json!({
"jsonrpc": "2.0",
"id": req.get("id").cloned().unwrap_or(Value::Null),
"error": { "code": -32700, "message": format!("Parse error: {e}") }
});
}
};
let resp = self.handle_request(typed).await;
serde_json::to_value(resp).unwrap_or_else(|_| json!({"jsonrpc":"2.0","id":null,"error":{"code":-32603,"message":"Internal error"}}))
}
async fn handle_request(&self, req: JsonRpcRequest) -> JsonRpcResponse {
let id = req.id.clone();
match req.method.as_str() {
"initialize" => Self::handle_initialize(id),
"tools/list" => Self::handle_tools_list(id),
"tools/call" => self.handle_tools_call(id, req.params).await,
"resources/list" => self.handle_resources_list(id).await,
"resources/read" => self.handle_resources_read(id, req.params).await,
"notifications/initialized" | "ping" => {
JsonRpcResponse::ok(id, json!({}))
}
other => JsonRpcResponse::method_not_found(id, other),
}
}
fn handle_initialize(id: Value) -> JsonRpcResponse {
JsonRpcResponse::ok(
id,
json!({
"protocolVersion": "2025-11-25",
"capabilities": {
"tools": { "listChanged": false },
"resources": { "listChanged": false, "subscribe": false }
},
"serverInfo": {
"name": "stygian-browser",
"version": env!("CARGO_PKG_VERSION")
}
}),
)
}
fn handle_tools_list(id: Value) -> JsonRpcResponse {
JsonRpcResponse::ok(id, json!({ "tools": &*TOOL_DEFINITIONS }))
}
async fn handle_tools_call(&self, id: Value, params: Value) -> JsonRpcResponse {
let name = match params.get("name").and_then(|v| v.as_str()) {
Some(n) => n.to_string(),
None => return JsonRpcResponse::err(id, -32602, "Missing tool 'name'"),
};
let args = params
.get("arguments")
.cloned()
.unwrap_or_else(|| json!({}));
let result = match name.as_str() {
"browser_acquire" => self.tool_browser_acquire(&args).await,
"browser_navigate" => self.tool_browser_navigate(&args).await,
"browser_eval" => self.tool_browser_eval(&args).await,
"browser_screenshot" => self.tool_browser_screenshot(&args).await,
"browser_content" => self.tool_browser_content(&args).await,
#[cfg(feature = "stealth")]
"browser_verify_stealth" => self.tool_browser_verify_stealth(&args).await,
#[cfg(not(feature = "stealth"))]
"browser_verify_stealth" => Err(BrowserError::ConfigError(
"browser_verify_stealth requires the 'stealth' feature".to_string(),
)),
"browser_release" => self.tool_browser_release(&args).await,
"pool_stats" => Ok(self.tool_pool_stats()),
"browser_query" => self.tool_browser_query(&args).await,
"browser_extract" => self.tool_browser_extract(&args).await,
#[cfg(feature = "similarity")]
"browser_find_similar" => self.tool_browser_find_similar(&args).await,
other => Err(BrowserError::ConfigError(format!("Unknown tool: {other}"))),
};
match result {
Ok(content) => JsonRpcResponse::ok(
id,
json!({ "content": [{ "type": "text", "text": content.to_string() }], "isError": false }),
),
Err(e) => JsonRpcResponse::ok(
id,
json!({ "content": [{ "type": "text", "text": e.to_string() }], "isError": true }),
),
}
}
async fn tool_browser_acquire(&self, args: &Value) -> Result<Value> {
let stealth_level = args
.get("stealth_level")
.and_then(|v| v.as_str())
.map(|s| match s {
"none" => StealthLevel::None,
"basic" => StealthLevel::Basic,
_ => StealthLevel::Advanced,
})
.unwrap_or_default();
let tls_profile = args
.get("tls_profile")
.and_then(|v| v.as_str())
.map(ToString::to_string);
let webrtc_policy = args
.get("webrtc_policy")
.and_then(|v| v.as_str())
.map(ToString::to_string);
let cdp_fix_mode = args
.get("cdp_fix_mode")
.and_then(|v| v.as_str())
.map(ToString::to_string);
let proxy = args
.get("proxy")
.and_then(|v| v.as_str())
.map(ToString::to_string);
let handle = self.pool.acquire().await?;
let session_id = Ulid::new().to_string();
let effective_stealth = format!("{stealth_level:?}").to_lowercase();
self.sessions.lock().await.insert(
session_id.clone(),
McpSession {
handle: Arc::new(Mutex::new(Some(handle))),
stealth_level,
tls_profile: tls_profile.clone(),
webrtc_policy: webrtc_policy.clone(),
cdp_fix_mode: cdp_fix_mode.clone(),
proxy: proxy.clone(),
current_url: None,
},
);
info!(%session_id, %effective_stealth, "MCP session acquired");
Ok(json!({
"session_id": session_id,
"requested_metadata": {
"stealth_level": effective_stealth,
"tls_profile": tls_profile,
"webrtc_policy": webrtc_policy,
"cdp_fix_mode": cdp_fix_mode,
"proxy": proxy
}
}))
}
#[cfg(feature = "stealth")]
async fn tool_browser_verify_stealth(&self, args: &Value) -> Result<Value> {
let session_id = Self::require_str(args, "session_id")?;
let url = Self::require_str(args, "url")?;
let timeout_secs = args
.get("timeout_secs")
.and_then(serde_json::Value::as_u64)
.unwrap_or(15);
let observed = crate::diagnostic::TransportObservations {
ja3_hash: args
.get("observed_ja3_hash")
.and_then(serde_json::Value::as_str)
.map(ToString::to_string),
ja4: args
.get("observed_ja4")
.and_then(serde_json::Value::as_str)
.map(ToString::to_string),
http3_perk_text: args
.get("observed_http3_perk_text")
.and_then(serde_json::Value::as_str)
.map(ToString::to_string),
http3_perk_hash: args
.get("observed_http3_perk_hash")
.and_then(serde_json::Value::as_str)
.map(ToString::to_string),
};
let (session_arc, requested_stealth) = self.session_handle_and_stealth(&session_id).await?;
let mut page = session_arc
.lock()
.await
.as_ref()
.ok_or_else(|| {
BrowserError::ConfigError(format!("Session already released: {session_id}"))
})?
.browser()
.ok_or_else(|| {
BrowserError::ConfigError(format!("Browser handle invalid: {session_id}"))
})?
.new_page()
.await?;
if let Err(e) = page
.navigate(
&url,
WaitUntil::DomContentLoaded,
Duration::from_secs(timeout_secs),
)
.await
{
page.close().await.ok();
return Err(e);
}
let mut result = Self::run_stealth_diagnostic(&page, observed).await;
page.close().await?;
if let Ok(ref mut v) = result
&& let Some(obj) = v.as_object_mut()
{
obj.insert(
"requested_stealth_level".to_string(),
Value::String(requested_stealth),
);
}
result
}
#[cfg(feature = "stealth")]
async fn run_stealth_diagnostic(
page: &crate::page::PageHandle,
observed: crate::diagnostic::TransportObservations,
) -> Result<Value> {
let report = page.verify_stealth_with_transport(Some(observed)).await?;
serde_json::to_value(&report)
.map_err(|e| BrowserError::ConfigError(format!("failed to serialize report: {e}")))
}
async fn tool_browser_navigate(&self, args: &Value) -> Result<Value> {
let session_id = Self::require_str(args, "session_id")?;
let url = Self::require_str(args, "url")?;
let timeout_secs = args
.get("timeout_secs")
.and_then(serde_json::Value::as_f64)
.unwrap_or(30.0);
let session_arc = self.session_handle(&session_id).await?;
let mut page = session_arc
.lock()
.await
.as_ref()
.ok_or_else(|| {
BrowserError::ConfigError(format!("Session already released: {session_id}"))
})?
.browser()
.ok_or_else(|| {
BrowserError::ConfigError(format!("Browser handle invalid: {session_id}"))
})?
.new_page()
.await?;
page.navigate(
&url,
WaitUntil::Selector("body".to_string()),
Duration::from_secs_f64(timeout_secs),
)
.await?;
let title = page.title().await.unwrap_or_default();
let current_url = url.clone();
page.close().await?;
if let Some(session) = self.sessions.lock().await.get_mut(&session_id) {
session.current_url = Some(current_url.clone());
}
Ok(json!({ "title": title, "url": current_url }))
}
async fn tool_browser_eval(&self, args: &Value) -> Result<Value> {
let session_id = Self::require_str(args, "session_id")?;
let script = Self::require_str(args, "script")?;
let timeout_secs = args
.get("timeout_secs")
.and_then(serde_json::Value::as_f64)
.unwrap_or(30.0);
let (session_arc, nav_url_opt) = self
.sessions
.lock()
.await
.get(&session_id)
.map(|s| (s.handle.clone(), s.current_url.clone()))
.ok_or_else(|| BrowserError::ConfigError(format!("Unknown session: {session_id}")))?;
let nav_url = nav_url_opt.ok_or_else(|| {
BrowserError::ConfigError(
"No page loaded — call browser_navigate before browser_eval".to_string(),
)
})?;
let mut page = session_arc
.lock()
.await
.as_ref()
.ok_or_else(|| {
BrowserError::ConfigError(format!("Session already released: {session_id}"))
})?
.browser()
.ok_or_else(|| {
BrowserError::ConfigError(format!("Browser handle invalid: {session_id}"))
})?
.new_page()
.await?;
page.navigate(
&nav_url,
WaitUntil::DomContentLoaded,
Duration::from_secs_f64(timeout_secs),
)
.await?;
let result: Value = page.eval(&script).await?;
page.close().await?;
Ok(json!({ "result": result }))
}
async fn tool_browser_screenshot(&self, args: &Value) -> Result<Value> {
use base64::Engine as _;
let session_id = Self::require_str(args, "session_id")?;
let timeout_secs = args
.get("timeout_secs")
.and_then(serde_json::Value::as_f64)
.unwrap_or(30.0);
let (session_arc, nav_url_opt) = self
.sessions
.lock()
.await
.get(&session_id)
.map(|s| (s.handle.clone(), s.current_url.clone()))
.ok_or_else(|| BrowserError::ConfigError(format!("Unknown session: {session_id}")))?;
let nav_url = nav_url_opt.ok_or_else(|| {
BrowserError::ConfigError(
"No page loaded — call browser_navigate before browser_screenshot".to_string(),
)
})?;
let mut page = session_arc
.lock()
.await
.as_ref()
.ok_or_else(|| {
BrowserError::ConfigError(format!("Session already released: {session_id}"))
})?
.browser()
.ok_or_else(|| {
BrowserError::ConfigError(format!("Browser handle invalid: {session_id}"))
})?
.new_page()
.await?;
page.navigate(
&nav_url,
WaitUntil::DomContentLoaded,
Duration::from_secs_f64(timeout_secs),
)
.await?;
let png_bytes = page.screenshot().await?;
page.close().await?;
let encoded = base64::engine::general_purpose::STANDARD.encode(&png_bytes);
Ok(json!({ "data": encoded, "mimeType": "image/png", "bytes": png_bytes.len() }))
}
async fn tool_browser_content(&self, args: &Value) -> Result<Value> {
let session_id = Self::require_str(args, "session_id")?;
let timeout_secs = args
.get("timeout_secs")
.and_then(serde_json::Value::as_f64)
.unwrap_or(30.0);
let (session_arc, nav_url_opt) = self
.sessions
.lock()
.await
.get(&session_id)
.map(|s| (s.handle.clone(), s.current_url.clone()))
.ok_or_else(|| BrowserError::ConfigError(format!("Unknown session: {session_id}")))?;
let nav_url = nav_url_opt.ok_or_else(|| {
BrowserError::ConfigError(
"No page loaded — call browser_navigate before browser_content".to_string(),
)
})?;
let mut page = session_arc
.lock()
.await
.as_ref()
.ok_or_else(|| {
BrowserError::ConfigError(format!("Session already released: {session_id}"))
})?
.browser()
.ok_or_else(|| {
BrowserError::ConfigError(format!("Browser handle invalid: {session_id}"))
})?
.new_page()
.await?;
page.navigate(
&nav_url,
WaitUntil::DomContentLoaded,
Duration::from_secs_f64(timeout_secs),
)
.await?;
let html = page.content().await?;
page.close().await?;
Ok(json!({ "html": html, "bytes": html.len() }))
}
async fn tool_browser_query(&self, args: &Value) -> Result<Value> {
let session_id = Self::require_str(args, "session_id")?;
let url = Self::require_str(args, "url")?;
let selector = Self::require_str(args, "selector")?;
let limit = usize::try_from(
args.get("limit")
.and_then(serde_json::Value::as_u64)
.unwrap_or(50),
)
.unwrap_or(50);
let timeout_secs = args
.get("timeout_secs")
.and_then(serde_json::Value::as_f64)
.unwrap_or(30.0);
let fields: Option<Vec<(String, Option<String>)>> =
args.get("fields").and_then(|v| v.as_object()).map(|obj| {
obj.iter()
.map(|(k, v)| {
let attr = v
.get("attr")
.and_then(serde_json::Value::as_str)
.map(ToString::to_string);
(k.clone(), attr)
})
.collect()
});
let session_arc = self.session_handle(&session_id).await?;
let mut page = session_arc
.lock()
.await
.as_ref()
.ok_or_else(|| {
BrowserError::ConfigError(format!("Session already released: {session_id}"))
})?
.browser()
.ok_or_else(|| {
BrowserError::ConfigError(format!("Browser handle invalid: {session_id}"))
})?
.new_page()
.await?;
page.navigate(
&url,
WaitUntil::DomContentLoaded,
Duration::from_secs_f64(timeout_secs),
)
.await?;
let all_nodes = page.query_selector_all(&selector).await?;
let nodes = all_nodes.get(..limit).unwrap_or(&all_nodes);
let mut results: Vec<Value> = Vec::with_capacity(nodes.len());
if let Some(ref field_defs) = fields {
for node in nodes {
let mut obj = serde_json::Map::new();
for (field_name, attr_name) in field_defs {
let val = if let Some(attr) = attr_name {
node.attr(attr)
.await
.map_or(Value::Null, |opt| opt.map_or(Value::Null, Value::String))
} else {
node.text_content().await.map_or(Value::Null, Value::String)
};
obj.insert(field_name.clone(), val);
}
results.push(Value::Object(obj));
}
} else {
for node in nodes {
let text = node.text_content().await.unwrap_or_default();
results.push(Value::String(text));
}
}
page.close().await?;
Ok(json!({
"url": url,
"selector": selector,
"count": results.len(),
"results": results
}))
}
async fn tool_browser_extract(&self, args: &Value) -> Result<Value> {
let session_id = Self::require_str(args, "session_id")?;
let url = Self::require_str(args, "url")?;
let root_selector = Self::require_str(args, "root_selector")?;
let timeout_secs = args
.get("timeout_secs")
.and_then(serde_json::Value::as_f64)
.unwrap_or(30.0);
let schema_obj = args
.get("schema")
.and_then(|v| v.as_object())
.ok_or_else(|| {
BrowserError::ConfigError("Missing or non-object 'schema' argument".to_string())
})?;
let schema: Vec<(String, ExtractFieldDef)> = schema_obj
.iter()
.filter_map(|(name, spec)| {
let selector = spec
.get("selector")
.and_then(serde_json::Value::as_str)
.map(ToString::to_string)?;
let attr = spec
.get("attr")
.and_then(serde_json::Value::as_str)
.map(ToString::to_string);
let required = spec
.get("required")
.and_then(serde_json::Value::as_bool)
.unwrap_or(false);
Some((
name.clone(),
ExtractFieldDef {
selector,
attr,
required,
},
))
})
.collect();
let session_arc = self.session_handle(&session_id).await?;
let mut page = session_arc
.lock()
.await
.as_ref()
.ok_or_else(|| {
BrowserError::ConfigError(format!("Session already released: {session_id}"))
})?
.browser()
.ok_or_else(|| {
BrowserError::ConfigError(format!("Browser handle invalid: {session_id}"))
})?
.new_page()
.await?;
page.navigate(
&url,
WaitUntil::DomContentLoaded,
Duration::from_secs_f64(timeout_secs),
)
.await?;
let roots = page.query_selector_all(&root_selector).await?;
let mut results: Vec<Value> = Vec::with_capacity(roots.len());
for root in &roots {
if let Some(obj) = Self::extract_record(root, &schema).await {
results.push(Value::Object(obj));
}
}
page.close().await?;
Ok(json!({
"url": url,
"root_selector": root_selector,
"count": results.len(),
"results": results
}))
}
#[cfg(feature = "similarity")]
async fn tool_browser_find_similar(&self, args: &Value) -> Result<Value> {
use crate::similarity::SimilarityConfig;
let session_id = Self::require_str(args, "session_id")?;
let url = Self::require_str(args, "url")?;
let reference_selector = Self::require_str(args, "reference_selector")?;
#[allow(clippy::cast_possible_truncation)]
let threshold = args
.get("threshold")
.and_then(serde_json::Value::as_f64)
.map_or(SimilarityConfig::DEFAULT_THRESHOLD, |v| v as f32);
let max_results = usize::try_from(
args.get("max_results")
.and_then(serde_json::Value::as_u64)
.unwrap_or(10),
)
.unwrap_or(10);
let timeout_secs = args
.get("timeout_secs")
.and_then(serde_json::Value::as_f64)
.unwrap_or(30.0);
let config = SimilarityConfig {
threshold,
max_results,
};
let session_arc = self.session_handle(&session_id).await?;
let mut page = session_arc
.lock()
.await
.as_ref()
.ok_or_else(|| {
BrowserError::ConfigError(format!("Session already released: {session_id}"))
})?
.browser()
.ok_or_else(|| {
BrowserError::ConfigError(format!("Browser handle invalid: {session_id}"))
})?
.new_page()
.await?;
page.navigate(
&url,
WaitUntil::DomContentLoaded,
Duration::from_secs_f64(timeout_secs),
)
.await?;
let refs = page.query_selector_all(&reference_selector).await?;
let Some(reference) = refs.into_iter().next() else {
page.close().await?;
return Ok(json!({
"isError": true,
"error": format!("Reference selector matched no elements: {reference_selector}")
}));
};
let ref_fp = reference.fingerprint().await?;
let matches = page.find_similar(&reference, config).await?;
let mut match_results: Vec<Value> = Vec::with_capacity(matches.len());
for m in &matches {
let text = m.node.text_content().await.unwrap_or_default();
let snippet = m.node.inner_html().await.unwrap_or_default();
let snippet: String = snippet.chars().take(200).collect();
match_results.push(json!({
"score": m.score,
"text": text,
"outer_html_snippet": snippet
}));
}
page.close().await?;
Ok(json!({
"url": url,
"reference": {
"tag": ref_fp.tag,
"classes": ref_fp.classes,
"attr_names": ref_fp.attr_names,
"depth": ref_fp.depth
},
"count": match_results.len(),
"matches": match_results
}))
}
async fn tool_browser_release(&self, args: &Value) -> Result<Value> {
let session_id = Self::require_str(args, "session_id")?;
let session_arc = {
let mut sessions = self.sessions.lock().await;
sessions
.remove(&session_id)
.ok_or_else(|| BrowserError::ConfigError(format!("Unknown session: {session_id}")))?
.handle
};
let handle = session_arc.lock().await.take();
if let Some(h) = handle {
h.release().await;
}
info!(%session_id, "MCP session released");
Ok(json!({ "released": true, "session_id": session_id }))
}
fn tool_pool_stats(&self) -> Value {
let stats = self.pool.stats();
json!({
"active": stats.active,
"max": stats.max,
"available": stats.available
})
}
async fn handle_resources_list(&self, id: Value) -> JsonRpcResponse {
let resources: Vec<Value> = self
.sessions
.lock()
.await
.keys()
.map(|sid| {
json!({
"uri": format!("browser://session/{sid}"),
"name": format!("Browser session {sid}"),
"mimeType": "application/json"
})
})
.collect();
JsonRpcResponse::ok(id, json!({ "resources": resources }))
}
async fn handle_resources_read(&self, id: Value, params: Value) -> JsonRpcResponse {
let uri = match params.get("uri").and_then(|v| v.as_str()) {
Some(u) => u.to_string(),
None => return JsonRpcResponse::err(id, -32602, "Missing 'uri'"),
};
let session_id = uri
.strip_prefix("browser://session/")
.unwrap_or("")
.to_string();
let session_config: Option<Value> = {
let sessions = self.sessions.lock().await;
sessions.get(&session_id).map(|s| {
json!({
"stealth_level": format!("{:?}", s.stealth_level).to_lowercase(),
"tls_profile": s.tls_profile,
"webrtc_policy": s.webrtc_policy,
"cdp_fix_mode": s.cdp_fix_mode,
"proxy": s.proxy
})
})
};
if let Some(config) = session_config {
let pool_stats = self.pool.stats();
JsonRpcResponse::ok(
id,
json!({
"contents": [{
"uri": uri,
"mimeType": "application/json",
"text": serde_json::to_string_pretty(&json!({
"session_id": session_id,
"config": config,
"pool_active": pool_stats.active,
"pool_max": pool_stats.max
})).unwrap_or_default()
}]
}),
)
} else {
JsonRpcResponse::err(id, -32002, format!("Resource not found: {uri}"))
}
}
async fn session_handle(&self, session_id: &str) -> Result<Arc<Mutex<Option<BrowserHandle>>>> {
Ok(self
.sessions
.lock()
.await
.get(session_id)
.ok_or_else(|| BrowserError::ConfigError(format!("Unknown session: {session_id}")))?
.handle
.clone())
}
#[cfg(feature = "stealth")]
async fn session_handle_and_stealth(
&self,
session_id: &str,
) -> Result<(Arc<Mutex<Option<BrowserHandle>>>, String)> {
self.sessions
.lock()
.await
.get(session_id)
.map(|s| {
(
s.handle.clone(),
format!("{:?}", s.stealth_level).to_lowercase(),
)
})
.ok_or_else(|| BrowserError::ConfigError(format!("Unknown session: {session_id}")))
}
async fn extract_record(
root: &crate::page::NodeHandle,
schema: &[(String, ExtractFieldDef)],
) -> Option<serde_json::Map<String, Value>> {
let mut obj = serde_json::Map::new();
for (field_name, def) in schema {
let Ok(children) = root.children_matching(&def.selector).await else {
if def.required {
return None;
}
obj.insert(field_name.clone(), Value::Null);
continue;
};
let val = match children.into_iter().next() {
None => {
if def.required {
return None;
}
Value::Null
}
Some(node) => {
if let Some(attr) = &def.attr {
node.attr(attr)
.await
.map_or(Value::Null, |opt| opt.map_or(Value::Null, Value::String))
} else {
node.text_content().await.map_or(Value::Null, Value::String)
}
}
};
obj.insert(field_name.clone(), val);
}
Some(obj)
}
fn require_str(args: &Value, key: &str) -> Result<String> {
args.get(key)
.and_then(|v| v.as_str())
.map(ToString::to_string)
.ok_or_else(|| BrowserError::ConfigError(format!("Missing required argument: {key}")))
}
}
fn mcp_enabled_from(value: &str) -> bool {
matches!(value.to_lowercase().as_str(), "true" | "1" | "yes")
}
pub fn is_mcp_enabled() -> bool {
mcp_enabled_from(&std::env::var("STYGIAN_MCP_ENABLED").unwrap_or_default())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn tool_defs_include_browser_query() {
let defs = &*TOOL_DEFINITIONS;
assert!(
defs.iter()
.any(|t| t.get("name").and_then(|n| n.as_str()) == Some("browser_query")),
"TOOL_DEFINITIONS must contain browser_query"
);
}
#[test]
fn tool_defs_include_browser_extract() {
let defs = &*TOOL_DEFINITIONS;
assert!(
defs.iter()
.any(|t| t.get("name").and_then(|n| n.as_str()) == Some("browser_extract")),
"TOOL_DEFINITIONS must contain browser_extract"
);
}
#[test]
fn browser_query_required_args() -> std::result::Result<(), Box<dyn std::error::Error>> {
let defs = &*TOOL_DEFINITIONS;
let def = defs
.iter()
.find(|t| t.get("name").and_then(|n| n.as_str()) == Some("browser_query"))
.ok_or("browser_query must be in TOOL_DEFINITIONS")?;
let required = def
.get("inputSchema")
.and_then(|s| s.get("required"))
.ok_or("browser_query inputSchema missing 'required'")?;
assert!(
required
.as_array()
.is_some_and(|a| a.iter().any(|v| v == "session_id"))
);
assert!(
required
.as_array()
.is_some_and(|a| a.iter().any(|v| v == "url"))
);
assert!(
required
.as_array()
.is_some_and(|a| a.iter().any(|v| v == "selector"))
);
Ok(())
}
#[test]
fn browser_extract_required_args() -> std::result::Result<(), Box<dyn std::error::Error>> {
let defs = &*TOOL_DEFINITIONS;
let def = defs
.iter()
.find(|t| t.get("name").and_then(|n| n.as_str()) == Some("browser_extract"))
.ok_or("browser_extract must be in TOOL_DEFINITIONS")?;
let required = def
.get("inputSchema")
.and_then(|s| s.get("required"))
.ok_or("browser_extract inputSchema missing 'required'")?;
assert!(
required
.as_array()
.is_some_and(|a| a.iter().any(|v| v == "root_selector"))
);
assert!(
required
.as_array()
.is_some_and(|a| a.iter().any(|v| v == "schema"))
);
Ok(())
}
#[test]
fn jsonrpc_response_ok_serializes() -> std::result::Result<(), Box<dyn std::error::Error>> {
let r = JsonRpcResponse::ok(json!(1), json!({ "hello": "world" }));
let s = serde_json::to_string(&r)?;
assert!(s.contains("\"hello\""));
assert!(s.contains("\"jsonrpc\":\"2.0\""));
assert!(!s.contains("\"error\""));
Ok(())
}
#[test]
fn jsonrpc_response_err_serializes() -> std::result::Result<(), Box<dyn std::error::Error>> {
let r = JsonRpcResponse::err(json!(2), -32601, "Method not found");
let s = serde_json::to_string(&r)?;
assert!(s.contains("-32601"));
assert!(s.contains("Method not found"));
assert!(!s.contains("\"result\""));
Ok(())
}
#[test]
fn browser_extract_schema_parse_empty_schema()
-> std::result::Result<(), Box<dyn std::error::Error>> {
let defs = &*TOOL_DEFINITIONS;
let def = defs
.iter()
.find(|t| t.get("name").and_then(|n| n.as_str()) == Some("browser_extract"))
.ok_or("browser_extract must be in TOOL_DEFINITIONS")?;
let required = def
.get("inputSchema")
.and_then(|s| s.get("required"))
.and_then(|r| r.as_array())
.ok_or("browser_extract inputSchema missing 'required' array")?;
assert!(
required.iter().any(|v| v == "schema"),
"schema must be required in browser_extract"
);
let schema_type = def
.get("inputSchema")
.and_then(|s| s.get("properties"))
.and_then(|p| p.get("schema"))
.and_then(|s| s.get("type"))
.and_then(|t| t.as_str())
.ok_or("browser_extract inputSchema.properties.schema.type missing")?;
assert_eq!(
schema_type, "object",
"schema property must have type object"
);
Ok(())
}
#[test]
fn browser_query_missing_session() -> std::result::Result<(), Box<dyn std::error::Error>> {
let defs = &*TOOL_DEFINITIONS;
let def = defs
.iter()
.find(|t| t.get("name").and_then(|n| n.as_str()) == Some("browser_query"))
.ok_or("browser_query must be in TOOL_DEFINITIONS")?;
let required = def
.get("inputSchema")
.and_then(|s| s.get("required"))
.and_then(|r| r.as_array())
.ok_or("browser_query inputSchema missing 'required' array")?;
assert!(
required.iter().any(|v| v == "session_id"),
"session_id must be required so missing-session is caught at validation"
);
Ok(())
}
#[test]
fn mcp_env_disabled_by_default() {
let cases = ["false", "0", "no", "", "off"];
for val in cases {
assert!(!mcp_enabled_from(val), "expected disabled for {val:?}");
}
}
#[test]
fn mcp_env_enabled_values() {
let cases = ["true", "True", "TRUE", "1", "yes", "YES"];
for val in cases {
assert!(mcp_enabled_from(val), "expected enabled for {val:?}");
}
}
}