snm_brightdata_client/
server.rs

1// src/server.rs
2use actix_web::{web, HttpRequest, HttpResponse, Result};
3use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5use std::env;
6use std::sync::Arc;
7use std::time::Duration;
8use tokio::sync::RwLock;
9use chrono::{DateTime, Utc};
10use reqwest::Client;
11use uuid::Uuid;
12use crate::error::BrightDataError;
13
14#[derive(Debug, Clone)]
15pub struct Config {
16    pub api_token: String,
17    pub web_unlocker_zone: String,
18    pub browser_zone: String,
19    pub rate_limit: Option<String>,
20    pub timeout: Duration,
21    pub max_retries: u32,
22}
23
24impl Config {
25    pub fn from_env() -> Result<Self, std::io::Error> {
26        Ok(Self {
27            api_token: env::var("API_TOKEN").unwrap_or_default(),
28            web_unlocker_zone: env::var("WEB_UNLOCKER_ZONE").unwrap_or_else(|_| "default_zone".to_string()),
29            browser_zone: env::var("BROWSER_ZONE").unwrap_or_else(|_| "default_browser".to_string()),
30            rate_limit: env::var("RATE_LIMIT").ok(),
31            timeout: Duration::from_secs(env::var("REQUEST_TIMEOUT").unwrap_or_else(|_| "300".to_string()).parse().unwrap_or(300)),
32            max_retries: env::var("MAX_RETRIES").unwrap_or_else(|_| "3".to_string()).parse().unwrap_or(3),
33        })
34    }
35}
36
37#[derive(Debug)]
38pub struct AppState {
39    pub config: Config,
40    pub session_id: Uuid,
41    pub http_client: Client,
42    pub rate_limits: Arc<RwLock<HashMap<String, (u32, DateTime<Utc>)>>>,
43    pub start_time: DateTime<Utc>,
44}
45
46impl AppState {
47    pub fn new(config: Config) -> Self {
48        Self {
49            session_id: Uuid::new_v4(),
50            config: config.clone(),
51            http_client: Client::builder().timeout(config.timeout).build().unwrap(),
52            rate_limits: Arc::new(RwLock::new(HashMap::new())),
53            start_time: Utc::now(),
54        }
55    }
56}
57
58#[derive(Debug, Serialize, Deserialize)]
59pub struct McpRequest {
60    pub jsonrpc: String,
61    pub id: Option<serde_json::Value>,
62    pub method: String,
63    pub params: Option<serde_json::Value>,
64}
65
66#[derive(Debug, Serialize, Deserialize)]
67pub struct McpResponse {
68    pub jsonrpc: String,
69    pub id: Option<serde_json::Value>,
70    pub result: Option<serde_json::Value>,
71    pub error: Option<McpError>,
72}
73
74#[derive(Debug, Serialize, Deserialize)]
75pub struct McpError {
76    pub code: i32,
77    pub message: String,
78    pub data: Option<serde_json::Value>,
79}
80
81pub struct BrightDataUrls;
82
83impl BrightDataUrls {
84    pub fn request_api() -> String {
85        let base_url = std::env::var("BRIGHTDATA_BASE_URL")
86            .unwrap_or_else(|_| "https://api.brightdata.com".to_string());
87        format!("{}/request", base_url)
88    }
89}
90
91pub async fn handle_mcp_request(
92    _req: HttpRequest,
93    payload: web::Json<McpRequest>,
94    state: web::Data<AppState>,
95) -> Result<HttpResponse> {
96    let req = payload.into_inner();
97    let id = req.id.clone();
98
99    // Match returns Result<McpResponse, String>
100    let mcp_result: Result<McpResponse, String> = match req.method.as_str() {
101        "tools/list" => Ok(McpResponse {
102            jsonrpc: "2.0".to_string(),
103            id,
104            result: Some(serde_json::json!({
105                "tools": [
106                    { "name": "scrape_website", "description": "Scrape a web page" },
107                    { "name": "search_web", "description": "Perform a web search" },
108                    { "name": "extract_data", "description": "Extract structured data from a webpage (WIP)" }
109                ]
110            })),
111            error: None,
112        }),
113
114        "tools/call" => {
115            if let Some(params) = req.params {
116                let name = params.get("name").and_then(|v| v.as_str()).unwrap_or("");
117                let args = params.get("arguments").cloned().unwrap_or_default();
118
119                if !check_rate_limit(name, &state).await {
120                    return Ok(HttpResponse::TooManyRequests().json(McpResponse {
121                        jsonrpc: "2.0".to_string(),
122                        id,
123                        result: None,
124                        error: Some(McpError {
125                            code: -32000,
126                            message: "Rate limit exceeded".to_string(),
127                            data: None,
128                        }),
129                    }));
130                }
131
132                let result = match name {
133                    "scrape_website" => handle_scrape_website(&args, &state).await,
134                    "search_web" => handle_search_web(&args, &state).await,
135                    "extract_data" => handle_extract_placeholder(&args).await,
136                    _ => Err("Unknown tool".to_string()),
137                };
138
139                Ok(match result {
140                    Ok(content) => McpResponse {
141                        jsonrpc: "2.0".to_string(),
142                        id,
143                        result: Some(serde_json::json!({ "content": content })),
144                        error: None,
145                    },
146                    Err(msg) => McpResponse {
147                        jsonrpc: "2.0".to_string(),
148                        id,
149                        result: None,
150                        error: Some(McpError {
151                            code: -32603,
152                            message: msg,
153                            data: None,
154                        }),
155                    },
156                })
157            } else {
158                Ok(McpResponse {
159                    jsonrpc: "2.0".to_string(),
160                    id,
161                    result: None,
162                    error: Some(McpError {
163                        code: -32602,
164                        message: "Missing parameters".into(),
165                        data: None,
166                    }),
167                })
168            }
169        }
170
171        _ => Ok(McpResponse {
172            jsonrpc: "2.0".to_string(),
173            id,
174            result: None,
175            error: Some(McpError {
176                code: -32601,
177                message: "Method not found".to_string(),
178                data: None,
179            }),
180        }),
181    };
182
183    // Wrap the unified result into an HTTP response
184    match mcp_result {
185        Ok(resp) => Ok(HttpResponse::Ok().json(resp)),
186        Err(e) => Ok(HttpResponse::InternalServerError().json(McpResponse {
187            jsonrpc: "2.0".to_string(),
188            id: req.id,
189            result: None,
190            error: Some(McpError {
191                code: -32603,
192                message: e,
193                data: None,
194            }),
195        })),
196    }
197}
198
199
200pub async fn health_check(state: web::Data<AppState>) -> Result<HttpResponse> {
201    Ok(HttpResponse::Ok().json(serde_json::json!({
202        "status": "healthy",
203        "session_id": state.session_id,
204        "uptime_seconds": (Utc::now() - state.start_time).num_seconds(),
205    })))
206}
207
208pub async fn cors_handler() -> HttpResponse {
209    HttpResponse::Ok()
210        .insert_header(("Access-Control-Allow-Origin", "*"))
211        .insert_header(("Access-Control-Allow-Methods", "POST, GET, OPTIONS"))
212        .insert_header(("Access-Control-Allow-Headers", "Content-Type, Authorization"))
213        .finish()
214}
215
216async fn check_rate_limit(tool: &str, state: &web::Data<AppState>) -> bool {
217    let mut limits = state.rate_limits.write().await;
218    let now = Utc::now();
219    let entry = limits.entry(tool.to_string()).or_insert((0, now));
220
221    let limit = 10;
222    let window = chrono::Duration::seconds(60);
223
224    if now - entry.1 > window {
225        entry.0 = 0;
226        entry.1 = now;
227    }
228
229    if entry.0 >= limit {
230        false
231    } else {
232        entry.0 += 1;
233        true
234    }
235}
236
237
238
239pub async fn handle_scrape_website(args: &serde_json::Value, state: &web::Data<AppState>) -> Result<String, String> {
240    let url = args.get("url").and_then(|v| v.as_str()).ok_or("Missing 'url'")?;
241    let format = args.get("format").and_then(|v| v.as_str()).unwrap_or("markdown");
242
243    let mut payload = serde_json::json!({
244        "url": url,
245        "zone": state.config.web_unlocker_zone,
246        "format": "raw",
247    });
248
249    if format == "markdown" {
250        payload["data_format"] = serde_json::json!("markdown");
251    }
252
253    let url = BrightDataUrls::request_api();
254
255    let res = state.http_client
256        .post(&url)
257        .header("Authorization", format!("Bearer {}", state.config.api_token))
258        .json(&payload)
259        .send()
260        .await
261        .map_err(|e| e.to_string())?;
262
263    let body = res.text().await.map_err(|e| e.to_string())?;
264    Ok(body)
265}
266
267pub async fn handle_search_web(args: &serde_json::Value, state: &web::Data<AppState>) -> Result<String, String> {
268    let query = args.get("query").and_then(|v| v.as_str()).ok_or("Missing 'query'")?;
269    let engine = args.get("engine").and_then(|v| v.as_str()).unwrap_or("google");
270    let cursor = args.get("cursor").and_then(|v| v.as_str()).unwrap_or("0");
271
272    let search_url = build_search_url(engine, query, cursor);
273
274    let payload = serde_json::json!({
275        "url": search_url,
276        "zone": state.config.web_unlocker_zone,
277        "format": "raw",
278        "data_format": "markdown"
279    });
280
281    let url = BrightDataUrls::request_api();
282    let res = state.http_client
283        .post(&url)
284        .header("Authorization", format!("Bearer {}", state.config.api_token))
285        .json(&payload)
286        .send()
287        .await
288        .map_err(|e| e.to_string())?;
289
290    let body = res.text().await.map_err(|e| e.to_string())?;
291    Ok(body)
292}
293
294pub async fn handle_extract_placeholder(_args: &serde_json::Value) -> Result<String, String> {
295    Ok("🧠 Extract tool placeholder: AI-based structured data extraction coming soon.".to_string())
296}
297
298fn build_search_url(engine: &str, query: &str, cursor: &str) -> String {
299    let q = urlencoding::encode(query);
300    let page: usize = cursor.parse().unwrap_or(0);
301    let start = page * 10;
302
303    match engine {
304        "yandex" => format!("https://yandex.com/search/?text={q}&p={page}"),
305        "bing" => format!("https://www.bing.com/search?q={q}&first={}", start + 1),
306        "duckduckgo" => format!("https://duckduckgo.com/?q={q}&s={start}"),
307        _ => format!("https://www.google.com/search?q={q}&start={start}"),
308    }
309}