snm_brightdata_client/tools/
scrape.rs1use crate::tool::{Tool, ToolResult, McpContent};
3use crate::error::BrightDataError;
4use async_trait::async_trait;
5use serde_json::{Value, json};
6use reqwest::Client;
7use std::time::Duration;
8
9pub struct ScrapeMarkdown;
10
11#[async_trait]
12impl Tool for ScrapeMarkdown {
13 fn name(&self) -> &str {
14 "scrape_website"
15 }
16
17 fn description(&self) -> &str {
18 "Scrape a webpage using BrightData Web Unlocker"
19 }
20
21 fn input_schema(&self) -> Value {
22 json!({
23 "type": "object",
24 "properties": {
25 "url": {
26 "type": "string",
27 "description": "The URL to scrape"
28 },
29 "format": {
30 "type": "string",
31 "enum": ["raw", "markdown"],
32 "description": "Output format",
33 "default": "raw"
34 }
35 },
36 "required": ["url"]
37 })
38 }
39
40 async fn execute(&self, parameters: Value) -> Result<ToolResult, BrightDataError> {
41 let url = parameters
42 .get("url")
43 .and_then(|v| v.as_str())
44 .ok_or_else(|| BrightDataError::ToolError("Missing 'url' parameter".into()))?;
45
46 let format = parameters
47 .get("format")
48 .and_then(|v| v.as_str())
49 .unwrap_or("raw");
50
51 let result = self.scrape_with_brightdata(url, format).await?;
52
53 let content_text = result.get("content").and_then(|c| c.as_str()).unwrap_or("No content");
54 let mcp_content = vec![McpContent::text(format!(
55 "🌐 **Scraped from {}**\n\n{}",
56 url,
57 content_text
58 ))];
59
60 Ok(ToolResult::success_with_raw(mcp_content, result))
61 }
62}
63
64impl ScrapeMarkdown {
65 async fn scrape_with_brightdata(&self, url: &str, format: &str) -> Result<Value, BrightDataError> {
66 let api_token = std::env::var("BRIGHTDATA_API_TOKEN")
67 .or_else(|_| std::env::var("API_TOKEN"))
68 .map_err(|_| BrightDataError::ToolError("Missing BRIGHTDATA_API_TOKEN".into()))?;
69
70 let base_url = std::env::var("BRIGHTDATA_BASE_URL")
71 .unwrap_or_else(|_| "https://api.brightdata.com".to_string());
72
73 let zone = std::env::var("WEB_UNLOCKER_ZONE")
74 .unwrap_or_else(|_| "default".to_string());
75
76 let mut payload = json!({
78 "url": url,
79 "zone": zone,
80 "format": "raw" });
82
83 if format == "markdown" {
85 payload["data_format"] = json!("markdown");
86 }
87
88 let client = Client::builder()
89 .timeout(Duration::from_secs(120))
90 .build()
91 .map_err(|e| BrightDataError::ToolError(e.to_string()))?;
92
93 let response = client
94 .post(&format!("{}/request", base_url))
95 .header("Authorization", format!("Bearer {}", api_token))
96 .header("Content-Type", "application/json")
97 .json(&payload)
98 .send()
99 .await
100 .map_err(|e| BrightDataError::ToolError(format!("Request failed: {}", e)))?;
101
102 let status = response.status();
103 if !status.is_success() {
104 let error_text = response.text().await.unwrap_or_default();
105 return Err(BrightDataError::ToolError(format!(
106 "BrightData API error {}: {}",
107 status, error_text
108 )));
109 }
110
111 let content = response.text().await
112 .map_err(|e| BrightDataError::ToolError(e.to_string()))?;
113
114 Ok(json!({
115 "content": content,
116 "url": url,
117 "format": format,
118 "success": true
119 }))
120 }
121}