snm_brightdata_client/tools/
extract.rs

1// src/tools/extract.rs
2use crate::tool::{Tool, ToolResult, McpContent};
3use crate::error::BrightDataError;
4use async_trait::async_trait;
5use reqwest::Client;
6use serde_json::{json, Value};
7use std::env;
8use std::time::Duration;
9
10pub struct Extractor;
11
12#[async_trait]
13impl Tool for Extractor {
14    fn name(&self) -> &str {
15        "extract_data"
16    }
17
18    fn description(&self) -> &str {
19        "Extract data from a webpage using BrightData"
20    }
21
22    fn input_schema(&self) -> Value {
23        json!({
24            "type": "object",
25            "properties": {
26                "url": {
27                    "type": "string",
28                    "description": "The URL to extract data from"
29                }
30            },
31            "required": ["url"]
32        })
33    }
34
35    async fn execute(&self, parameters: Value) -> Result<ToolResult, BrightDataError> {
36        let url = parameters
37            .get("url")
38            .and_then(|v| v.as_str())
39            .ok_or_else(|| BrightDataError::ToolError("Missing 'url' parameter".into()))?;
40
41        let result = self.extract_with_brightdata(url).await?;
42
43        let content_text = result.get("content").and_then(|c| c.as_str()).unwrap_or("No data");
44        let mcp_content = vec![McpContent::text(format!(
45            "📊 **Data extracted from {}**\n\n{}",
46            url,
47            content_text
48        ))];
49
50        Ok(ToolResult::success_with_raw(mcp_content, result))
51    }
52}
53
54impl Extractor {
55    async fn extract_with_brightdata(&self, url: &str) -> Result<Value, BrightDataError> {
56        let api_token = env::var("BRIGHTDATA_API_TOKEN")
57            .or_else(|_| env::var("API_TOKEN"))
58            .map_err(|_| BrightDataError::ToolError("Missing BRIGHTDATA_API_TOKEN".into()))?;
59
60        let base_url = env::var("BRIGHTDATA_BASE_URL")
61            .unwrap_or_else(|_| "https://api.brightdata.com".to_string());
62
63        let zone = env::var("WEB_UNLOCKER_ZONE")
64            .unwrap_or_else(|_| "default".to_string());
65
66        // Valid BrightData parameters only
67        let payload = json!({
68            "url": url,
69            "zone": zone,
70            "format": "raw",
71            "data_format": "markdown"
72        });
73
74        let client = Client::builder()
75            .timeout(Duration::from_secs(120))
76            .build()
77            .map_err(|e| BrightDataError::ToolError(e.to_string()))?;
78
79        let response = client
80            .post(&format!("{}/request", base_url))
81            .header("Authorization", format!("Bearer {}", api_token))
82            .header("Content-Type", "application/json")
83            .json(&payload)
84            .send()
85            .await
86            .map_err(|e| BrightDataError::ToolError(format!("Extraction request failed: {}", e)))?;
87
88        let status = response.status();
89        if !status.is_success() {
90            let error_text = response.text().await.unwrap_or_default();
91            return Err(BrightDataError::ToolError(format!(
92                "BrightData extraction error {}: {}",
93                status, error_text
94            )));
95        }
96
97        let content = response.text().await
98            .map_err(|e| BrightDataError::ToolError(e.to_string()))?;
99
100        Ok(json!({
101            "content": content,
102            "url": url,
103            "success": true
104        }))
105    }
106}