1use actix_web::{web, HttpRequest, HttpResponse, Result};
3use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5use std::env;
6use std::sync::Arc;
7use std::time::Duration;
8use tokio::sync::RwLock;
9use chrono::{DateTime, Utc};
10use reqwest::Client;
11use uuid::Uuid;
12
13#[derive(Debug, Clone)]
14pub struct Config {
15 pub api_token: String,
16 pub web_unlocker_zone: String,
17 pub browser_zone: String,
18 pub rate_limit: Option<String>,
19 pub timeout: Duration,
20 pub max_retries: u32,
21}
22
23impl Config {
24 pub fn from_env() -> Result<Self, std::io::Error> {
25 Ok(Self {
26 api_token: env::var("API_TOKEN").unwrap_or_default(),
27 web_unlocker_zone: env::var("WEB_UNLOCKER_ZONE").unwrap_or_else(|_| "default_zone".to_string()),
28 browser_zone: env::var("BROWSER_ZONE").unwrap_or_else(|_| "default_browser".to_string()),
29 rate_limit: env::var("RATE_LIMIT").ok(),
30 timeout: Duration::from_secs(env::var("REQUEST_TIMEOUT").unwrap_or_else(|_| "60".to_string()).parse().unwrap_or(60)),
31 max_retries: env::var("MAX_RETRIES").unwrap_or_else(|_| "3".to_string()).parse().unwrap_or(3),
32 })
33 }
34}
35
36#[derive(Debug)]
37pub struct AppState {
38 pub config: Config,
39 pub session_id: Uuid,
40 pub http_client: Client,
41 pub rate_limits: Arc<RwLock<HashMap<String, (u32, DateTime<Utc>)>>>,
42 pub start_time: DateTime<Utc>,
43}
44
45impl AppState {
46 pub fn new(config: Config) -> Self {
47 Self {
48 session_id: Uuid::new_v4(),
49 config: config.clone(),
50 http_client: Client::builder().timeout(config.timeout).build().unwrap(),
51 rate_limits: Arc::new(RwLock::new(HashMap::new())),
52 start_time: Utc::now(),
53 }
54 }
55}
56
57#[derive(Debug, Serialize, Deserialize)]
58pub struct McpRequest {
59 pub jsonrpc: String,
60 pub id: Option<serde_json::Value>,
61 pub method: String,
62 pub params: Option<serde_json::Value>,
63}
64
65#[derive(Debug, Serialize, Deserialize)]
66pub struct McpResponse {
67 pub jsonrpc: String,
68 pub id: Option<serde_json::Value>,
69 pub result: Option<serde_json::Value>,
70 pub error: Option<McpError>,
71}
72
73#[derive(Debug, Serialize, Deserialize)]
74pub struct McpError {
75 pub code: i32,
76 pub message: String,
77 pub data: Option<serde_json::Value>,
78}
79
80pub struct BrightDataUrls;
81
82impl BrightDataUrls {
83 pub const REQUEST_API: &'static str = "https://api.brightdata.com/request";
84}
85
86pub async fn handle_mcp_request(
87 _req: HttpRequest,
88 payload: web::Json<McpRequest>,
89 state: web::Data<AppState>,
90) -> Result<HttpResponse> {
91 let req = payload.into_inner();
92 let id = req.id.clone();
93
94 let mcp_result: Result<McpResponse, String> = match req.method.as_str() {
96 "tools/list" => Ok(McpResponse {
97 jsonrpc: "2.0".to_string(),
98 id,
99 result: Some(serde_json::json!({
100 "tools": [
101 { "name": "scrape_website", "description": "Scrape a web page" },
102 { "name": "search_web", "description": "Perform a web search" },
103 { "name": "extract_data", "description": "Extract structured data from a webpage (WIP)" }
104 ]
105 })),
106 error: None,
107 }),
108
109 "tools/call" => {
110 if let Some(params) = req.params {
111 let name = params.get("name").and_then(|v| v.as_str()).unwrap_or("");
112 let args = params.get("arguments").cloned().unwrap_or_default();
113
114 if !check_rate_limit(name, &state).await {
115 return Ok(HttpResponse::TooManyRequests().json(McpResponse {
116 jsonrpc: "2.0".to_string(),
117 id,
118 result: None,
119 error: Some(McpError {
120 code: -32000,
121 message: "Rate limit exceeded".to_string(),
122 data: None,
123 }),
124 }));
125 }
126
127 let result = match name {
128 "scrape_website" => handle_scrape_website(&args, &state).await,
129 "search_web" => handle_search_web(&args, &state).await,
130 "extract_data" => handle_extract_placeholder(&args).await,
131 _ => Err("Unknown tool".to_string()),
132 };
133
134 Ok(match result {
135 Ok(content) => McpResponse {
136 jsonrpc: "2.0".to_string(),
137 id,
138 result: Some(serde_json::json!({ "content": content })),
139 error: None,
140 },
141 Err(msg) => McpResponse {
142 jsonrpc: "2.0".to_string(),
143 id,
144 result: None,
145 error: Some(McpError {
146 code: -32603,
147 message: msg,
148 data: None,
149 }),
150 },
151 })
152 } else {
153 Ok(McpResponse {
154 jsonrpc: "2.0".to_string(),
155 id,
156 result: None,
157 error: Some(McpError {
158 code: -32602,
159 message: "Missing parameters".into(),
160 data: None,
161 }),
162 })
163 }
164 }
165
166 _ => Ok(McpResponse {
167 jsonrpc: "2.0".to_string(),
168 id,
169 result: None,
170 error: Some(McpError {
171 code: -32601,
172 message: "Method not found".to_string(),
173 data: None,
174 }),
175 }),
176 };
177
178 match mcp_result {
180 Ok(resp) => Ok(HttpResponse::Ok().json(resp)),
181 Err(e) => Ok(HttpResponse::InternalServerError().json(McpResponse {
182 jsonrpc: "2.0".to_string(),
183 id: req.id,
184 result: None,
185 error: Some(McpError {
186 code: -32603,
187 message: e,
188 data: None,
189 }),
190 })),
191 }
192}
193
194
195pub async fn health_check(state: web::Data<AppState>) -> Result<HttpResponse> {
196 Ok(HttpResponse::Ok().json(serde_json::json!({
197 "status": "healthy",
198 "session_id": state.session_id,
199 "uptime_seconds": (Utc::now() - state.start_time).num_seconds(),
200 })))
201}
202
203pub async fn cors_handler() -> HttpResponse {
204 HttpResponse::Ok()
205 .insert_header(("Access-Control-Allow-Origin", "*"))
206 .insert_header(("Access-Control-Allow-Methods", "POST, GET, OPTIONS"))
207 .insert_header(("Access-Control-Allow-Headers", "Content-Type, Authorization"))
208 .finish()
209}
210
211async fn check_rate_limit(tool: &str, state: &web::Data<AppState>) -> bool {
212 let mut limits = state.rate_limits.write().await;
213 let now = Utc::now();
214 let entry = limits.entry(tool.to_string()).or_insert((0, now));
215
216 let limit = 10;
217 let window = chrono::Duration::seconds(60);
218
219 if now - entry.1 > window {
220 entry.0 = 0;
221 entry.1 = now;
222 }
223
224 if entry.0 >= limit {
225 false
226 } else {
227 entry.0 += 1;
228 true
229 }
230}
231
232
233
234
235
236
237async fn handle_scrape_website(args: &serde_json::Value, state: &web::Data<AppState>) -> Result<String, String> {
238 let url = args.get("url").and_then(|v| v.as_str()).ok_or("Missing 'url'")?;
239 let format = args.get("format").and_then(|v| v.as_str()).unwrap_or("markdown");
240
241 let mut payload = serde_json::json!({
242 "url": url,
243 "zone": state.config.web_unlocker_zone,
244 "format": "raw",
245 });
246
247 if format == "markdown" {
248 payload["data_format"] = serde_json::json!("markdown");
249 }
250
251 let res = state.http_client
252 .post(BrightDataUrls::REQUEST_API)
253 .header("Authorization", format!("Bearer {}", state.config.api_token))
254 .json(&payload)
255 .send()
256 .await
257 .map_err(|e| e.to_string())?;
258
259 let body = res.text().await.map_err(|e| e.to_string())?;
260 Ok(body)
261}
262
263async fn handle_search_web(args: &serde_json::Value, state: &web::Data<AppState>) -> Result<String, String> {
264 let query = args.get("query").and_then(|v| v.as_str()).ok_or("Missing 'query'")?;
265 let engine = args.get("engine").and_then(|v| v.as_str()).unwrap_or("google");
266 let cursor = args.get("cursor").and_then(|v| v.as_str()).unwrap_or("0");
267
268 let search_url = build_search_url(engine, query, cursor);
269
270 let payload = serde_json::json!({
271 "url": search_url,
272 "zone": state.config.web_unlocker_zone,
273 "format": "raw",
274 "data_format": "markdown"
275 });
276
277 let res = state.http_client
278 .post(BrightDataUrls::REQUEST_API)
279 .header("Authorization", format!("Bearer {}", state.config.api_token))
280 .json(&payload)
281 .send()
282 .await
283 .map_err(|e| e.to_string())?;
284
285 let body = res.text().await.map_err(|e| e.to_string())?;
286 Ok(body)
287}
288
289async fn handle_extract_placeholder(_args: &serde_json::Value) -> Result<String, String> {
290 Ok("🧠Extract tool placeholder: AI-based structured data extraction coming soon.".to_string())
291}
292
293fn build_search_url(engine: &str, query: &str, cursor: &str) -> String {
294 let q = urlencoding::encode(query);
295 let page: usize = cursor.parse().unwrap_or(0);
296 let start = page * 10;
297
298 match engine {
299 "yandex" => format!("https://yandex.com/search/?text={q}&p={page}"),
300 "bing" => format!("https://www.bing.com/search?q={q}&first={}", start + 1),
301 "duckduckgo" => format!("https://duckduckgo.com/?q={q}&s={start}"),
302 _ => format!("https://www.google.com/search?q={q}&start={start}"),
303 }
304}