snm_brightdata_client/tools/
extract.rs1use crate::tool::{Tool, ToolResult, McpContent};
3use crate::error::BrightDataError;
4use async_trait::async_trait;
5use reqwest::Client;
6use serde_json::{json, Value};
7use std::env;
8use std::time::Duration;
9
10pub struct Extractor;
11
12#[async_trait]
13impl Tool for Extractor {
14 fn name(&self) -> &str {
15 "extract_data"
16 }
17
18 fn description(&self) -> &str {
19 "Extract data from a webpage using BrightData"
20 }
21
22 fn input_schema(&self) -> Value {
23 json!({
24 "type": "object",
25 "properties": {
26 "url": {
27 "type": "string",
28 "description": "The URL to extract data from"
29 }
30 },
31 "required": ["url"]
32 })
33 }
34
35 async fn execute(&self, parameters: Value) -> Result<ToolResult, BrightDataError> {
36 let url = parameters
37 .get("url")
38 .and_then(|v| v.as_str())
39 .ok_or_else(|| BrightDataError::ToolError("Missing 'url' parameter".into()))?;
40
41 let result = self.extract_with_brightdata(url).await?;
42
43 let content_text = result.get("content").and_then(|c| c.as_str()).unwrap_or("No data");
44 let mcp_content = vec![McpContent::text(format!(
45 "📊 **Data extracted from {}**\n\n{}",
46 url,
47 content_text
48 ))];
49
50 Ok(ToolResult::success_with_raw(mcp_content, result))
51 }
52}
53
54impl Extractor {
55 async fn extract_with_brightdata(&self, url: &str) -> Result<Value, BrightDataError> {
56 let api_token = env::var("BRIGHTDATA_API_TOKEN")
57 .or_else(|_| env::var("API_TOKEN"))
58 .map_err(|_| BrightDataError::ToolError("Missing BRIGHTDATA_API_TOKEN".into()))?;
59
60 let base_url = env::var("BRIGHTDATA_BASE_URL")
61 .unwrap_or_else(|_| "https://api.brightdata.com".to_string());
62
63 let zone = env::var("WEB_UNLOCKER_ZONE")
64 .unwrap_or_else(|_| "default".to_string());
65
66 let payload = json!({
68 "url": url,
69 "zone": zone,
70 "format": "raw",
71 "data_format": "markdown"
72 });
73
74 let client = Client::builder()
75 .timeout(Duration::from_secs(120))
76 .build()
77 .map_err(|e| BrightDataError::ToolError(e.to_string()))?;
78
79 let response = client
80 .post(&format!("{}/request", base_url))
81 .header("Authorization", format!("Bearer {}", api_token))
82 .header("Content-Type", "application/json")
83 .json(&payload)
84 .send()
85 .await
86 .map_err(|e| BrightDataError::ToolError(format!("Extraction request failed: {}", e)))?;
87
88 let status = response.status();
89 if !status.is_success() {
90 let error_text = response.text().await.unwrap_or_default();
91 return Err(BrightDataError::ToolError(format!(
92 "BrightData extraction error {}: {}",
93 status, error_text
94 )));
95 }
96
97 let content = response.text().await
98 .map_err(|e| BrightDataError::ToolError(e.to_string()))?;
99
100 Ok(json!({
101 "content": content,
102 "url": url,
103 "success": true
104 }))
105 }
106}