1use actix_web::{web, HttpRequest, HttpResponse, Result};
3use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5use std::env;
6use std::sync::Arc;
7use std::time::Duration;
8use tokio::sync::RwLock;
9use chrono::{DateTime, Utc};
10use reqwest::Client;
11use uuid::Uuid;
12use crate::error::BrightDataError;
13
14#[derive(Debug, Clone)]
15pub struct Config {
16 pub api_token: String,
17 pub web_unlocker_zone: String,
18 pub browser_zone: String,
19 pub rate_limit: Option<String>,
20 pub timeout: Duration,
21 pub max_retries: u32,
22}
23
24impl Config {
25 pub fn from_env() -> Result<Self, std::io::Error> {
26 Ok(Self {
27 api_token: env::var("API_TOKEN").unwrap_or_default(),
28 web_unlocker_zone: env::var("WEB_UNLOCKER_ZONE").unwrap_or_else(|_| "default_zone".to_string()),
29 browser_zone: env::var("BROWSER_ZONE").unwrap_or_else(|_| "default_browser".to_string()),
30 rate_limit: env::var("RATE_LIMIT").ok(),
31 timeout: Duration::from_secs(env::var("REQUEST_TIMEOUT").unwrap_or_else(|_| "300".to_string()).parse().unwrap_or(300)),
32 max_retries: env::var("MAX_RETRIES").unwrap_or_else(|_| "3".to_string()).parse().unwrap_or(3),
33 })
34 }
35}
36
37#[derive(Debug)]
38pub struct AppState {
39 pub config: Config,
40 pub session_id: Uuid,
41 pub http_client: Client,
42 pub rate_limits: Arc<RwLock<HashMap<String, (u32, DateTime<Utc>)>>>,
43 pub start_time: DateTime<Utc>,
44}
45
46impl AppState {
47 pub fn new(config: Config) -> Self {
48 Self {
49 session_id: Uuid::new_v4(),
50 config: config.clone(),
51 http_client: Client::builder().timeout(config.timeout).build().unwrap(),
52 rate_limits: Arc::new(RwLock::new(HashMap::new())),
53 start_time: Utc::now(),
54 }
55 }
56}
57
58#[derive(Debug, Serialize, Deserialize)]
59pub struct McpRequest {
60 pub jsonrpc: String,
61 pub id: Option<serde_json::Value>,
62 pub method: String,
63 pub params: Option<serde_json::Value>,
64}
65
66#[derive(Debug, Serialize, Deserialize)]
67pub struct McpResponse {
68 pub jsonrpc: String,
69 pub id: Option<serde_json::Value>,
70 pub result: Option<serde_json::Value>,
71 pub error: Option<McpError>,
72}
73
74#[derive(Debug, Serialize, Deserialize)]
75pub struct McpError {
76 pub code: i32,
77 pub message: String,
78 pub data: Option<serde_json::Value>,
79}
80
81pub struct BrightDataUrls;
82
83impl BrightDataUrls {
84 pub fn request_api() -> String {
85 let base_url = std::env::var("BRIGHTDATA_BASE_URL")
86 .unwrap_or_else(|_| "https://api.brightdata.com".to_string());
87 format!("{}/request", base_url)
88 }
89}
90
91pub async fn handle_mcp_request(
92 _req: HttpRequest,
93 payload: web::Json<McpRequest>,
94 state: web::Data<AppState>,
95) -> Result<HttpResponse> {
96 let req = payload.into_inner();
97 let id = req.id.clone();
98
99 let mcp_result: Result<McpResponse, String> = match req.method.as_str() {
101 "tools/list" => Ok(McpResponse {
102 jsonrpc: "2.0".to_string(),
103 id,
104 result: Some(serde_json::json!({
105 "tools": [
106 { "name": "scrape_website", "description": "Scrape a web page" },
107 { "name": "search_web", "description": "Perform a web search" },
108 { "name": "extract_data", "description": "Extract structured data from a webpage (WIP)" }
109 ]
110 })),
111 error: None,
112 }),
113
114 "tools/call" => {
115 if let Some(params) = req.params {
116 let name = params.get("name").and_then(|v| v.as_str()).unwrap_or("");
117 let args = params.get("arguments").cloned().unwrap_or_default();
118
119 if !check_rate_limit(name, &state).await {
120 return Ok(HttpResponse::TooManyRequests().json(McpResponse {
121 jsonrpc: "2.0".to_string(),
122 id,
123 result: None,
124 error: Some(McpError {
125 code: -32000,
126 message: "Rate limit exceeded".to_string(),
127 data: None,
128 }),
129 }));
130 }
131
132 let result = match name {
133 "scrape_website" => handle_scrape_website(&args, &state).await,
134 "search_web" => handle_search_web(&args, &state).await,
135 "extract_data" => handle_extract_placeholder(&args).await,
136 _ => Err("Unknown tool".to_string()),
137 };
138
139 Ok(match result {
140 Ok(content) => McpResponse {
141 jsonrpc: "2.0".to_string(),
142 id,
143 result: Some(serde_json::json!({ "content": content })),
144 error: None,
145 },
146 Err(msg) => McpResponse {
147 jsonrpc: "2.0".to_string(),
148 id,
149 result: None,
150 error: Some(McpError {
151 code: -32603,
152 message: msg,
153 data: None,
154 }),
155 },
156 })
157 } else {
158 Ok(McpResponse {
159 jsonrpc: "2.0".to_string(),
160 id,
161 result: None,
162 error: Some(McpError {
163 code: -32602,
164 message: "Missing parameters".into(),
165 data: None,
166 }),
167 })
168 }
169 }
170
171 _ => Ok(McpResponse {
172 jsonrpc: "2.0".to_string(),
173 id,
174 result: None,
175 error: Some(McpError {
176 code: -32601,
177 message: "Method not found".to_string(),
178 data: None,
179 }),
180 }),
181 };
182
183 match mcp_result {
185 Ok(resp) => Ok(HttpResponse::Ok().json(resp)),
186 Err(e) => Ok(HttpResponse::InternalServerError().json(McpResponse {
187 jsonrpc: "2.0".to_string(),
188 id: req.id,
189 result: None,
190 error: Some(McpError {
191 code: -32603,
192 message: e,
193 data: None,
194 }),
195 })),
196 }
197}
198
199
200pub async fn health_check(state: web::Data<AppState>) -> Result<HttpResponse> {
201 Ok(HttpResponse::Ok().json(serde_json::json!({
202 "status": "healthy",
203 "session_id": state.session_id,
204 "uptime_seconds": (Utc::now() - state.start_time).num_seconds(),
205 })))
206}
207
208pub async fn cors_handler() -> HttpResponse {
209 HttpResponse::Ok()
210 .insert_header(("Access-Control-Allow-Origin", "*"))
211 .insert_header(("Access-Control-Allow-Methods", "POST, GET, OPTIONS"))
212 .insert_header(("Access-Control-Allow-Headers", "Content-Type, Authorization"))
213 .finish()
214}
215
216async fn check_rate_limit(tool: &str, state: &web::Data<AppState>) -> bool {
217 let mut limits = state.rate_limits.write().await;
218 let now = Utc::now();
219 let entry = limits.entry(tool.to_string()).or_insert((0, now));
220
221 let limit = 10;
222 let window = chrono::Duration::seconds(60);
223
224 if now - entry.1 > window {
225 entry.0 = 0;
226 entry.1 = now;
227 }
228
229 if entry.0 >= limit {
230 false
231 } else {
232 entry.0 += 1;
233 true
234 }
235}
236
237
238
239pub async fn handle_scrape_website(args: &serde_json::Value, state: &web::Data<AppState>) -> Result<String, String> {
240 let url = args.get("url").and_then(|v| v.as_str()).ok_or("Missing 'url'")?;
241 let format = args.get("format").and_then(|v| v.as_str()).unwrap_or("markdown");
242
243 let mut payload = serde_json::json!({
244 "url": url,
245 "zone": state.config.web_unlocker_zone,
246 "format": "raw",
247 });
248
249 if format == "markdown" {
250 payload["data_format"] = serde_json::json!("markdown");
251 }
252
253 let url = BrightDataUrls::request_api();
254
255 let res = state.http_client
256 .post(&url)
257 .header("Authorization", format!("Bearer {}", state.config.api_token))
258 .json(&payload)
259 .send()
260 .await
261 .map_err(|e| e.to_string())?;
262
263 let body = res.text().await.map_err(|e| e.to_string())?;
264 Ok(body)
265}
266
267pub async fn handle_search_web(args: &serde_json::Value, state: &web::Data<AppState>) -> Result<String, String> {
268 let query = args.get("query").and_then(|v| v.as_str()).ok_or("Missing 'query'")?;
269 let engine = args.get("engine").and_then(|v| v.as_str()).unwrap_or("google");
270 let cursor = args.get("cursor").and_then(|v| v.as_str()).unwrap_or("0");
271
272 let search_url = build_search_url(engine, query, cursor);
273
274 let payload = serde_json::json!({
275 "url": search_url,
276 "zone": state.config.web_unlocker_zone,
277 "format": "raw",
278 "data_format": "markdown"
279 });
280
281 let url = BrightDataUrls::request_api();
282 let res = state.http_client
283 .post(&url)
284 .header("Authorization", format!("Bearer {}", state.config.api_token))
285 .json(&payload)
286 .send()
287 .await
288 .map_err(|e| e.to_string())?;
289
290 let body = res.text().await.map_err(|e| e.to_string())?;
291 Ok(body)
292}
293
294pub async fn handle_extract_placeholder(_args: &serde_json::Value) -> Result<String, String> {
295 Ok("🧠Extract tool placeholder: AI-based structured data extraction coming soon.".to_string())
296}
297
298fn build_search_url(engine: &str, query: &str, cursor: &str) -> String {
299 let q = urlencoding::encode(query);
300 let page: usize = cursor.parse().unwrap_or(0);
301 let start = page * 10;
302
303 match engine {
304 "yandex" => format!("https://yandex.com/search/?text={q}&p={page}"),
305 "bing" => format!("https://www.bing.com/search?q={q}&first={}", start + 1),
306 "duckduckgo" => format!("https://duckduckgo.com/?q={q}&s={start}"),
307 _ => format!("https://www.google.com/search?q={q}&start={start}"),
308 }
309}