snm_brightdata_client/
server.rs

1// src/server.rs
2use actix_web::{web, HttpRequest, HttpResponse, Result};
3use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5use std::env;
6use std::sync::Arc;
7use std::time::Duration;
8use tokio::sync::RwLock;
9use chrono::{DateTime, Utc};
10use reqwest::Client;
11use uuid::Uuid;
12
13#[derive(Debug, Clone)]
14pub struct Config {
15    pub api_token: String,
16    pub web_unlocker_zone: String,
17    pub browser_zone: String,
18    pub rate_limit: Option<String>,
19    pub timeout: Duration,
20    pub max_retries: u32,
21}
22
23impl Config {
24    pub fn from_env() -> Result<Self, std::io::Error> {
25        Ok(Self {
26            api_token: env::var("API_TOKEN").unwrap_or_default(),
27            web_unlocker_zone: env::var("WEB_UNLOCKER_ZONE").unwrap_or_else(|_| "default_zone".to_string()),
28            browser_zone: env::var("BROWSER_ZONE").unwrap_or_else(|_| "default_browser".to_string()),
29            rate_limit: env::var("RATE_LIMIT").ok(),
30            timeout: Duration::from_secs(env::var("REQUEST_TIMEOUT").unwrap_or_else(|_| "60".to_string()).parse().unwrap_or(60)),
31            max_retries: env::var("MAX_RETRIES").unwrap_or_else(|_| "3".to_string()).parse().unwrap_or(3),
32        })
33    }
34}
35
36#[derive(Debug)]
37pub struct AppState {
38    pub config: Config,
39    pub session_id: Uuid,
40    pub http_client: Client,
41    pub rate_limits: Arc<RwLock<HashMap<String, (u32, DateTime<Utc>)>>>,
42    pub start_time: DateTime<Utc>,
43}
44
45impl AppState {
46    pub fn new(config: Config) -> Self {
47        Self {
48            session_id: Uuid::new_v4(),
49            config: config.clone(),
50            http_client: Client::builder().timeout(config.timeout).build().unwrap(),
51            rate_limits: Arc::new(RwLock::new(HashMap::new())),
52            start_time: Utc::now(),
53        }
54    }
55}
56
57#[derive(Debug, Serialize, Deserialize)]
58pub struct McpRequest {
59    pub jsonrpc: String,
60    pub id: Option<serde_json::Value>,
61    pub method: String,
62    pub params: Option<serde_json::Value>,
63}
64
65#[derive(Debug, Serialize, Deserialize)]
66pub struct McpResponse {
67    pub jsonrpc: String,
68    pub id: Option<serde_json::Value>,
69    pub result: Option<serde_json::Value>,
70    pub error: Option<McpError>,
71}
72
73#[derive(Debug, Serialize, Deserialize)]
74pub struct McpError {
75    pub code: i32,
76    pub message: String,
77    pub data: Option<serde_json::Value>,
78}
79
80pub struct BrightDataUrls;
81
82impl BrightDataUrls {
83    pub const REQUEST_API: &'static str = "https://api.brightdata.com/request";
84}
85
86pub async fn handle_mcp_request(
87    _req: HttpRequest,
88    payload: web::Json<McpRequest>,
89    state: web::Data<AppState>,
90) -> Result<HttpResponse> {
91    let req = payload.into_inner();
92    let id = req.id.clone();
93
94    // Match returns Result<McpResponse, String>
95    let mcp_result: Result<McpResponse, String> = match req.method.as_str() {
96        "tools/list" => Ok(McpResponse {
97            jsonrpc: "2.0".to_string(),
98            id,
99            result: Some(serde_json::json!({
100                "tools": [
101                    { "name": "scrape_website", "description": "Scrape a web page" },
102                    { "name": "search_web", "description": "Perform a web search" },
103                    { "name": "extract_data", "description": "Extract structured data from a webpage (WIP)" }
104                ]
105            })),
106            error: None,
107        }),
108
109        "tools/call" => {
110            if let Some(params) = req.params {
111                let name = params.get("name").and_then(|v| v.as_str()).unwrap_or("");
112                let args = params.get("arguments").cloned().unwrap_or_default();
113
114                if !check_rate_limit(name, &state).await {
115                    return Ok(HttpResponse::TooManyRequests().json(McpResponse {
116                        jsonrpc: "2.0".to_string(),
117                        id,
118                        result: None,
119                        error: Some(McpError {
120                            code: -32000,
121                            message: "Rate limit exceeded".to_string(),
122                            data: None,
123                        }),
124                    }));
125                }
126
127                let result = match name {
128                    "scrape_website" => handle_scrape_website(&args, &state).await,
129                    "search_web" => handle_search_web(&args, &state).await,
130                    "extract_data" => handle_extract_placeholder(&args).await,
131                    _ => Err("Unknown tool".to_string()),
132                };
133
134                Ok(match result {
135                    Ok(content) => McpResponse {
136                        jsonrpc: "2.0".to_string(),
137                        id,
138                        result: Some(serde_json::json!({ "content": content })),
139                        error: None,
140                    },
141                    Err(msg) => McpResponse {
142                        jsonrpc: "2.0".to_string(),
143                        id,
144                        result: None,
145                        error: Some(McpError {
146                            code: -32603,
147                            message: msg,
148                            data: None,
149                        }),
150                    },
151                })
152            } else {
153                Ok(McpResponse {
154                    jsonrpc: "2.0".to_string(),
155                    id,
156                    result: None,
157                    error: Some(McpError {
158                        code: -32602,
159                        message: "Missing parameters".into(),
160                        data: None,
161                    }),
162                })
163            }
164        }
165
166        _ => Ok(McpResponse {
167            jsonrpc: "2.0".to_string(),
168            id,
169            result: None,
170            error: Some(McpError {
171                code: -32601,
172                message: "Method not found".to_string(),
173                data: None,
174            }),
175        }),
176    };
177
178    // Wrap the unified result into an HTTP response
179    match mcp_result {
180        Ok(resp) => Ok(HttpResponse::Ok().json(resp)),
181        Err(e) => Ok(HttpResponse::InternalServerError().json(McpResponse {
182            jsonrpc: "2.0".to_string(),
183            id: req.id,
184            result: None,
185            error: Some(McpError {
186                code: -32603,
187                message: e,
188                data: None,
189            }),
190        })),
191    }
192}
193
194
195pub async fn health_check(state: web::Data<AppState>) -> Result<HttpResponse> {
196    Ok(HttpResponse::Ok().json(serde_json::json!({
197        "status": "healthy",
198        "session_id": state.session_id,
199        "uptime_seconds": (Utc::now() - state.start_time).num_seconds(),
200    })))
201}
202
203pub async fn cors_handler() -> HttpResponse {
204    HttpResponse::Ok()
205        .insert_header(("Access-Control-Allow-Origin", "*"))
206        .insert_header(("Access-Control-Allow-Methods", "POST, GET, OPTIONS"))
207        .insert_header(("Access-Control-Allow-Headers", "Content-Type, Authorization"))
208        .finish()
209}
210
211async fn check_rate_limit(tool: &str, state: &web::Data<AppState>) -> bool {
212    let mut limits = state.rate_limits.write().await;
213    let now = Utc::now();
214    let entry = limits.entry(tool.to_string()).or_insert((0, now));
215
216    let limit = 10;
217    let window = chrono::Duration::seconds(60);
218
219    if now - entry.1 > window {
220        entry.0 = 0;
221        entry.1 = now;
222    }
223
224    if entry.0 >= limit {
225        false
226    } else {
227        entry.0 += 1;
228        true
229    }
230}
231
232
233
234
235
236
237async fn handle_scrape_website(args: &serde_json::Value, state: &web::Data<AppState>) -> Result<String, String> {
238    let url = args.get("url").and_then(|v| v.as_str()).ok_or("Missing 'url'")?;
239    let format = args.get("format").and_then(|v| v.as_str()).unwrap_or("markdown");
240
241    let mut payload = serde_json::json!({
242        "url": url,
243        "zone": state.config.web_unlocker_zone,
244        "format": "raw",
245    });
246
247    if format == "markdown" {
248        payload["data_format"] = serde_json::json!("markdown");
249    }
250
251    let res = state.http_client
252        .post(BrightDataUrls::REQUEST_API)
253        .header("Authorization", format!("Bearer {}", state.config.api_token))
254        .json(&payload)
255        .send()
256        .await
257        .map_err(|e| e.to_string())?;
258
259    let body = res.text().await.map_err(|e| e.to_string())?;
260    Ok(body)
261}
262
263async fn handle_search_web(args: &serde_json::Value, state: &web::Data<AppState>) -> Result<String, String> {
264    let query = args.get("query").and_then(|v| v.as_str()).ok_or("Missing 'query'")?;
265    let engine = args.get("engine").and_then(|v| v.as_str()).unwrap_or("google");
266    let cursor = args.get("cursor").and_then(|v| v.as_str()).unwrap_or("0");
267
268    let search_url = build_search_url(engine, query, cursor);
269
270    let payload = serde_json::json!({
271        "url": search_url,
272        "zone": state.config.web_unlocker_zone,
273        "format": "raw",
274        "data_format": "markdown"
275    });
276
277    let res = state.http_client
278        .post(BrightDataUrls::REQUEST_API)
279        .header("Authorization", format!("Bearer {}", state.config.api_token))
280        .json(&payload)
281        .send()
282        .await
283        .map_err(|e| e.to_string())?;
284
285    let body = res.text().await.map_err(|e| e.to_string())?;
286    Ok(body)
287}
288
289async fn handle_extract_placeholder(_args: &serde_json::Value) -> Result<String, String> {
290    Ok("🧠 Extract tool placeholder: AI-based structured data extraction coming soon.".to_string())
291}
292
293fn build_search_url(engine: &str, query: &str, cursor: &str) -> String {
294    let q = urlencoding::encode(query);
295    let page: usize = cursor.parse().unwrap_or(0);
296    let start = page * 10;
297
298    match engine {
299        "yandex" => format!("https://yandex.com/search/?text={q}&p={page}"),
300        "bing" => format!("https://www.bing.com/search?q={q}&first={}", start + 1),
301        "duckduckgo" => format!("https://duckduckgo.com/?q={q}&s={start}"),
302        _ => format!("https://www.google.com/search?q={q}&start={start}"),
303    }
304}