use clap::Parser;
use crw_core::mcp::{
JsonRpcRequest, JsonRpcResponse, ProtocolResult, handle_protocol_method, tool_result_response,
};
use serde_json::{Value, json};
use tokio::io::{AsyncBufReadExt, AsyncWriteExt, BufReader};
#[cfg(feature = "embedded")]
mod browser;
const SERVER_NAME: &str = "crw-mcp";
const SERVER_VERSION: &str = env!("CARGO_PKG_VERSION");
#[derive(Parser)]
#[command(name = "crw-mcp", about = "MCP server for the CRW web scraper")]
struct Cli {
#[arg(long, env = "CRW_API_URL")]
api_url: Option<String>,
#[arg(long, env = "CRW_API_KEY")]
api_key: Option<String>,
#[arg(long, env = "CRW_CONFIG")]
config: Option<String>,
}
enum Backend {
Proxy {
client: reqwest::Client,
base_url: String,
api_key: Option<String>,
},
#[cfg(feature = "embedded")]
Embedded { state: crw_server::state::AppState },
}
impl Backend {
async fn call_tool(&self, tool_name: &str, args: Value) -> Result<Value, String> {
match self {
Backend::Proxy {
client,
base_url,
api_key,
} => proxy_call_tool(client, base_url, api_key, tool_name, args).await,
#[cfg(feature = "embedded")]
Backend::Embedded { state } => {
crw_server::routes::mcp::call_tool(state, tool_name, args).await
}
}
}
async fn handle_request(&self, req: JsonRpcRequest) -> Option<JsonRpcResponse> {
match handle_protocol_method(SERVER_NAME, SERVER_VERSION, &req) {
ProtocolResult::Response(resp) => return Some(resp),
ProtocolResult::Notification => return None,
ProtocolResult::NotHandled => {}
}
match req.method.as_str() {
"tools/call" => {
let id = req.id.unwrap_or(Value::Null);
let tool_name = req
.params
.get("name")
.and_then(|v| v.as_str())
.unwrap_or("");
let arguments = req.params.get("arguments").cloned().unwrap_or(json!({}));
let result = self.call_tool(tool_name, arguments).await;
Some(tool_result_response(id, result))
}
_ => {
if let Some(id) = req.id {
Some(JsonRpcResponse::error(
id,
-32601,
format!("method not found: {}", req.method),
))
} else {
None
}
}
}
}
}
async fn proxy_call_tool(
client: &reqwest::Client,
base_url: &str,
api_key: &Option<String>,
tool_name: &str,
args: Value,
) -> Result<Value, String> {
let mut headers = reqwest::header::HeaderMap::new();
headers.insert("content-type", "application/json".parse().unwrap());
if let Some(key) = api_key {
headers.insert(
"authorization",
format!("Bearer {key}")
.parse()
.map_err(|e| format!("invalid api key: {e}"))?,
);
}
match tool_name {
"crw_scrape" => {
let resp = client
.post(format!("{base_url}/v1/scrape"))
.headers(headers)
.json(&args)
.send()
.await
.map_err(|e| format!("HTTP request failed: {e}"))?;
parse_response(resp).await
}
"crw_crawl" => {
let resp = client
.post(format!("{base_url}/v1/crawl"))
.headers(headers)
.json(&args)
.send()
.await
.map_err(|e| format!("HTTP request failed: {e}"))?;
parse_response(resp).await
}
"crw_check_crawl_status" => {
let id = args
.get("id")
.and_then(|v| v.as_str())
.ok_or("missing required parameter: id")?;
let resp = client
.get(format!("{base_url}/v1/crawl/{id}"))
.headers(headers)
.send()
.await
.map_err(|e| format!("HTTP request failed: {e}"))?;
parse_response(resp).await
}
"crw_map" => {
let resp = client
.post(format!("{base_url}/v1/map"))
.headers(headers)
.json(&args)
.send()
.await
.map_err(|e| format!("HTTP request failed: {e}"))?;
parse_response(resp).await
}
_ => Err(format!("unknown tool: {tool_name}")),
}
}
async fn parse_response(resp: reqwest::Response) -> Result<Value, String> {
let status = resp.status();
let body = resp
.text()
.await
.map_err(|e| format!("failed to read response: {e}"))?;
if !status.is_success() {
return Err(format!("API error ({}): {}", status, truncate(&body, 500)));
}
serde_json::from_str(&body).map_err(|e| format!("invalid JSON response: {e}"))
}
fn truncate(s: &str, max: usize) -> &str {
if s.len() <= max {
s
} else {
let end = s.floor_char_boundary(max);
&s[..end]
}
}
#[tokio::main]
async fn main() {
tracing_subscriber::fmt()
.with_writer(std::io::stderr)
.with_env_filter(
tracing_subscriber::EnvFilter::try_from_default_env()
.unwrap_or_else(|_| "crw_mcp=info".parse().unwrap()),
)
.init();
let cli = Cli::parse();
let backend = if let Some(api_url) = cli.api_url {
tracing::info!("Starting {SERVER_NAME} v{SERVER_VERSION} (proxy mode)");
tracing::info!("API URL: {api_url}");
let client = reqwest::Client::builder()
.redirect(crw_core::url_safety::safe_redirect_policy())
.timeout(std::time::Duration::from_secs(120))
.connect_timeout(std::time::Duration::from_secs(10))
.build()
.expect("reqwest client build failed");
Backend::Proxy {
client,
base_url: api_url,
api_key: cli.api_key,
}
} else {
#[cfg(feature = "embedded")]
{
tracing::info!("Starting {SERVER_NAME} v{SERVER_VERSION} (embedded mode)");
if let Some(ref config_path) = cli.config {
unsafe { std::env::set_var("CRW_CONFIG", config_path) };
}
let mut config = crw_core::config::AppConfig::load().unwrap_or_else(|e| {
tracing::warn!("Failed to load config, using defaults: {e}");
crw_core::config::AppConfig {
server: Default::default(),
renderer: Default::default(),
crawler: Default::default(),
extraction: Default::default(),
auth: Default::default(),
}
});
let user_configured_renderer = std::env::var("CRW_RENDERER__LIGHTPANDA__WS_URL")
.is_ok()
|| std::env::var("CRW_RENDERER__CHROME__WS_URL").is_ok()
|| std::env::var("CRW_RENDERER__PLAYWRIGHT__WS_URL").is_ok();
let _browser_guard = if !user_configured_renderer {
match browser::spawn_headless().await {
Some((guard, ws_url)) => {
config.renderer.lightpanda = Some(crw_core::config::CdpEndpoint { ws_url });
Some(guard)
}
None => {
tracing::info!(
"No browser found — JS rendering disabled. \
Install LightPanda or Chrome for full SPA support."
);
None
}
}
} else {
tracing::info!("CDP renderer already configured — skipping auto-spawn");
None
};
let state = crw_server::state::AppState::new(config);
let backend = Backend::Embedded { state };
run_stdio_loop(backend).await;
drop(_browser_guard);
return;
}
#[cfg(not(feature = "embedded"))]
{
tracing::error!(
"Embedded mode not available (compiled without 'embedded' feature). \
Use --api-url to connect to a remote CRW server."
);
std::process::exit(1);
}
};
run_stdio_loop(backend).await;
}
async fn run_stdio_loop(backend: Backend) {
let mut stdout = tokio::io::stdout();
let stdin = tokio::io::stdin();
let mut reader = BufReader::new(stdin);
let mut line = String::new();
loop {
line.clear();
match reader.read_line(&mut line).await {
Ok(0) => break, Ok(_) => {}
Err(e) => {
tracing::error!("stdin read error: {e}");
break;
}
}
let trimmed = line.trim();
if trimmed.is_empty() {
continue;
}
tracing::debug!("← {trimmed}");
let req: JsonRpcRequest = match serde_json::from_str(trimmed) {
Ok(r) => r,
Err(e) => {
let err = JsonRpcResponse::error(Value::Null, -32700, format!("parse error: {e}"));
let out = serde_json::to_string(&err).unwrap();
tracing::debug!("→ {out}");
let _ = stdout.write_all(out.as_bytes()).await;
let _ = stdout.write_all(b"\n").await;
let _ = stdout.flush().await;
continue;
}
};
if let Some(resp) = backend.handle_request(req).await {
let out = serde_json::to_string(&resp).unwrap();
tracing::debug!("→ {out}");
let _ = stdout.write_all(out.as_bytes()).await;
let _ = stdout.write_all(b"\n").await;
let _ = stdout.flush().await;
}
}
}