Skip to main content

scrapfly_sdk/
cloud_browser.rs

1//! Cloud Browser API — port of `sdk/go/cloud_browser.go`.
2
3use reqwest::header::{HeaderMap, HeaderValue, CONTENT_TYPE};
4use reqwest::{Method, Url};
5use serde::{Deserialize, Serialize};
6
7use crate::client::Client;
8use crate::error::{from_response, ScrapflyError};
9
10/// Configuration for a Cloud Browser WebSocket session (passed to
11/// [`cloud_browser_url`]).
12#[derive(Debug, Clone, Default, Serialize)]
13pub struct BrowserConfig {
14    /// Proxy pool.
15    #[serde(skip_serializing_if = "Option::is_none")]
16    pub proxy_pool: Option<String>,
17    /// OS fingerprint.
18    #[serde(skip_serializing_if = "Option::is_none")]
19    pub os: Option<String>,
20    /// Proxy country.
21    #[serde(skip_serializing_if = "Option::is_none")]
22    pub country: Option<String>,
23    /// Session name.
24    #[serde(skip_serializing_if = "Option::is_none")]
25    pub session: Option<String>,
26    /// Session timeout (seconds).
27    #[serde(skip_serializing_if = "Option::is_none")]
28    pub timeout: Option<u32>,
29    /// Block images.
30    #[serde(skip_serializing_if = "is_false")]
31    pub block_images: bool,
32    /// Block stylesheets.
33    #[serde(skip_serializing_if = "is_false")]
34    pub block_styles: bool,
35    /// Block fonts.
36    #[serde(skip_serializing_if = "is_false")]
37    pub block_fonts: bool,
38    /// Block media.
39    #[serde(skip_serializing_if = "is_false")]
40    pub block_media: bool,
41    /// Enable screenshot capability.
42    #[serde(skip_serializing_if = "is_false")]
43    pub screenshot: bool,
44    /// Enable cache.
45    #[serde(skip_serializing_if = "is_false")]
46    pub cache: bool,
47    /// Enable blacklist.
48    #[serde(skip_serializing_if = "is_false")]
49    pub blacklist: bool,
50    /// Debug.
51    #[serde(skip_serializing_if = "is_false")]
52    pub debug: bool,
53    /// Resolution.
54    #[serde(skip_serializing_if = "Option::is_none")]
55    pub resolution: Option<String>,
56    /// Browser brand.
57    #[serde(skip_serializing_if = "Option::is_none")]
58    pub browser_brand: Option<String>,
59    /// BYOP proxy URL.
60    #[serde(skip_serializing_if = "Option::is_none")]
61    pub byop_proxy: Option<String>,
62    /// Enable MCP (Model Context Protocol) support.
63    #[serde(skip_serializing_if = "is_false")]
64    pub enable_mcp: bool,
65    /// Arm Scrapium's built-in captcha detector + solver on the first page attach.
66    /// Turnstile, DataDome slider, reCAPTCHA, GeeTest, PerimeterX hold, and
67    /// puzzle captchas are handled automatically. Billed per solve; failures
68    /// cost nothing. See <https://scrapfly.io/docs/cloud-browser-api/captcha-solver>.
69    #[serde(skip_serializing_if = "is_false")]
70    pub solve_captcha: bool,
71}
72
73fn is_false(v: &bool) -> bool {
74    !*v
75}
76
77/// Normalize an arbitrary Cloud Browser host to a `wss://` URL, regardless of
78/// the scheme the caller configured. Accepted input schemes: `https://`
79/// (default), `wss://`, `ws://`, `http://`, and bare `host[:port]`. Mirrors
80/// `sdk/go/cloud_browser.go::wsBase`.
81fn ws_base(host: &str) -> String {
82    if let Some(rest) = host.strip_prefix("wss://") {
83        format!("wss://{}", rest)
84    } else if let Some(rest) = host.strip_prefix("ws://") {
85        format!("ws://{}", rest)
86    } else if let Some(rest) = host.strip_prefix("https://") {
87        format!("wss://{}", rest)
88    } else if let Some(rest) = host.strip_prefix("http://") {
89        format!("ws://{}", rest)
90    } else {
91        format!("wss://{}", host)
92    }
93}
94
95/// Normalize an arbitrary Cloud Browser host to its REST form (`https://` or
96/// `http://`). Callers typically configure a `wss://` / `ws://` host (the CDP
97/// entry point); the REST endpoints (`/unblock`, `/session/.../stop`) live on
98/// the HTTP-equivalent origin. Mirrors `sdk/go/cloud_browser.go::restBase`.
99fn rest_base(host: &str) -> String {
100    if let Some(rest) = host.strip_prefix("wss://") {
101        format!("https://{}", rest)
102    } else if let Some(rest) = host.strip_prefix("ws://") {
103        format!("http://{}", rest)
104    } else if host.starts_with("https://") || host.starts_with("http://") {
105        host.to_string()
106    } else {
107        format!("https://{}", host)
108    }
109}
110
111/// Unblock request body.
112#[derive(Debug, Clone, Default, Serialize)]
113pub struct UnblockConfig {
114    /// Target URL.
115    pub url: String,
116    /// Proxy country.
117    #[serde(skip_serializing_if = "Option::is_none")]
118    pub country: Option<String>,
119    /// Navigation timeout.
120    #[serde(skip_serializing_if = "Option::is_none")]
121    pub timeout: Option<u32>,
122    /// Browser session timeout.
123    #[serde(skip_serializing_if = "Option::is_none")]
124    pub browser_timeout: Option<u32>,
125    /// Enable MCP support in the browser.
126    #[serde(skip_serializing_if = "is_false")]
127    pub enable_mcp: bool,
128    /// Arm the captcha solver on the post-unblock session.
129    #[serde(skip_serializing_if = "is_false")]
130    pub solve_captcha: bool,
131}
132
133/// Response from `POST /unblock`.
134#[derive(Debug, Clone, Deserialize, Default)]
135pub struct UnblockResult {
136    /// WebSocket URL to connect to.
137    #[serde(default)]
138    pub ws_url: String,
139    /// Session id.
140    #[serde(default)]
141    pub session_id: String,
142    /// Run id.
143    #[serde(default)]
144    pub run_id: String,
145    /// MCP endpoint (only when enable_mcp was set).
146    #[serde(default)]
147    pub mcp_endpoint: String,
148}
149
150impl Client {
151    /// Build the WebSocket URL for a new Cloud Browser session.
152    pub fn cloud_browser_url(&self, config: &BrowserConfig) -> String {
153        let ws_host = ws_base(self.cloud_browser_host());
154        let mut pairs: Vec<(String, String)> = vec![("api_key".into(), self.api_key().into())];
155        if let Some(v) = &config.proxy_pool {
156            pairs.push(("proxy_pool".into(), v.clone()));
157        }
158        if let Some(v) = &config.os {
159            pairs.push(("os".into(), v.clone()));
160        }
161        if let Some(v) = &config.country {
162            pairs.push(("country".into(), v.clone()));
163        }
164        if let Some(v) = &config.session {
165            pairs.push(("session".into(), v.clone()));
166        }
167        if let Some(v) = config.timeout {
168            pairs.push(("timeout".into(), v.to_string()));
169        }
170        if config.block_images {
171            pairs.push(("block_images".into(), "true".into()));
172        }
173        if config.block_styles {
174            pairs.push(("block_styles".into(), "true".into()));
175        }
176        if config.block_fonts {
177            pairs.push(("block_fonts".into(), "true".into()));
178        }
179        if config.block_media {
180            pairs.push(("block_media".into(), "true".into()));
181        }
182        if config.screenshot {
183            pairs.push(("screenshot".into(), "true".into()));
184        }
185        if config.cache {
186            pairs.push(("cache".into(), "true".into()));
187        }
188        if config.blacklist {
189            pairs.push(("blacklist".into(), "true".into()));
190        }
191        if config.debug {
192            pairs.push(("debug".into(), "true".into()));
193        }
194        if let Some(v) = &config.resolution {
195            pairs.push(("resolution".into(), v.clone()));
196        }
197        if let Some(v) = &config.browser_brand {
198            pairs.push(("browser_brand".into(), v.clone()));
199        }
200        if let Some(v) = &config.byop_proxy {
201            pairs.push(("byop_proxy".into(), v.clone()));
202        }
203        if config.enable_mcp {
204            pairs.push(("enable_mcp".into(), "true".into()));
205        }
206        if config.solve_captcha {
207            pairs.push(("solve_captcha".into(), "true".into()));
208        }
209        let qs = serde_urlencoded::to_string(&pairs).unwrap_or_default();
210        format!("{}?{}", ws_host, qs)
211    }
212
213    /// Call `POST /unblock` to bypass anti-bot protection.
214    pub async fn cloud_browser_unblock(
215        &self,
216        config: &UnblockConfig,
217    ) -> Result<UnblockResult, ScrapflyError> {
218        let url = format!(
219            "{}/unblock?key={}",
220            rest_base(self.cloud_browser_host()),
221            self.api_key()
222        );
223        let url = Url::parse(&url)
224            .map_err(|e| ScrapflyError::Config(format!("invalid unblock url: {}", e)))?;
225        let body = serde_json::to_vec(config)?;
226        let mut headers = HeaderMap::new();
227        headers.insert(CONTENT_TYPE, HeaderValue::from_static("application/json"));
228        let resp = self
229            .send_with_retry(Method::POST, url, Some(headers), Some(body))
230            .await?;
231        let status = resp.status().as_u16();
232        let body = resp.bytes().await.map_err(ScrapflyError::Transport)?;
233        if status != 200 {
234            return Err(from_response(status, &body, 0, false));
235        }
236        Ok(serde_json::from_slice(&body)?)
237    }
238
239    /// List browser extensions for the account.
240    pub async fn cloud_browser_extension_list(&self) -> Result<serde_json::Value, ScrapflyError> {
241        let url = format!(
242            "{}/extension?key={}",
243            rest_base(self.cloud_browser_host()),
244            self.api_key()
245        );
246        let url = Url::parse(&url)
247            .map_err(|e| ScrapflyError::Config(format!("invalid extension url: {}", e)))?;
248        let resp = self.send_with_retry(Method::GET, url, None, None).await?;
249        let status = resp.status().as_u16();
250        let body = resp.bytes().await.map_err(ScrapflyError::Transport)?;
251        if status != 200 {
252            return Err(from_response(status, &body, 0, false));
253        }
254        Ok(serde_json::from_slice(&body)?)
255    }
256
257    /// Get details of a specific browser extension.
258    pub async fn cloud_browser_extension_get(
259        &self,
260        extension_id: &str,
261    ) -> Result<serde_json::Value, ScrapflyError> {
262        let url = format!(
263            "{}/extension/{}?key={}",
264            rest_base(self.cloud_browser_host()),
265            extension_id,
266            self.api_key()
267        );
268        let url = Url::parse(&url)
269            .map_err(|e| ScrapflyError::Config(format!("invalid extension url: {}", e)))?;
270        let resp = self.send_with_retry(Method::GET, url, None, None).await?;
271        let status = resp.status().as_u16();
272        let body = resp.bytes().await.map_err(ScrapflyError::Transport)?;
273        if status != 200 {
274            return Err(from_response(status, &body, 0, false));
275        }
276        Ok(serde_json::from_slice(&body)?)
277    }
278
279    /// Upload a browser extension from a local file (.zip or .crx).
280    pub async fn cloud_browser_extension_upload(
281        &self,
282        file_path: &std::path::Path,
283    ) -> Result<serde_json::Value, ScrapflyError> {
284        let url = format!(
285            "{}/extension?key={}",
286            rest_base(self.cloud_browser_host()),
287            self.api_key()
288        );
289        let url = Url::parse(&url)
290            .map_err(|e| ScrapflyError::Config(format!("invalid extension url: {}", e)))?;
291        let file_bytes = std::fs::read(file_path)
292            .map_err(|e| ScrapflyError::Config(format!("failed to read extension file: {}", e)))?;
293        let file_name = file_path
294            .file_name()
295            .and_then(|n| n.to_str())
296            .unwrap_or("extension.zip")
297            .to_string();
298        // Build multipart body manually (reqwest multipart feature not enabled)
299        let boundary = format!(
300            "----ScrapflyBoundary{}",
301            std::time::SystemTime::now()
302                .duration_since(std::time::UNIX_EPOCH)
303                .unwrap_or_default()
304                .as_millis()
305        );
306        let mut body = Vec::new();
307        body.extend_from_slice(format!("--{}\r\n", boundary).as_bytes());
308        body.extend_from_slice(
309            format!(
310                "Content-Disposition: form-data; name=\"file\"; filename=\"{}\"\r\n\
311                 Content-Type: application/octet-stream\r\n\r\n",
312                file_name
313            )
314            .as_bytes(),
315        );
316        body.extend_from_slice(&file_bytes);
317        body.extend_from_slice(format!("\r\n--{}--\r\n", boundary).as_bytes());
318        let mut headers = HeaderMap::new();
319        headers.insert(
320            CONTENT_TYPE,
321            HeaderValue::from_str(&format!("multipart/form-data; boundary={}", boundary))
322                .map_err(|e| ScrapflyError::Config(format!("invalid content-type: {}", e)))?,
323        );
324        let resp = self
325            .send_with_retry(Method::POST, url, Some(headers), Some(body))
326            .await?;
327        let status = resp.status().as_u16();
328        let body = resp.bytes().await.map_err(ScrapflyError::Transport)?;
329        if status != 200 && status != 201 {
330            return Err(from_response(status, &body, 0, false));
331        }
332        Ok(serde_json::from_slice(&body)?)
333    }
334
335    /// Delete a browser extension by ID.
336    pub async fn cloud_browser_extension_delete(
337        &self,
338        extension_id: &str,
339    ) -> Result<serde_json::Value, ScrapflyError> {
340        let url = format!(
341            "{}/extension/{}?key={}",
342            rest_base(self.cloud_browser_host()),
343            extension_id,
344            self.api_key()
345        );
346        let url = Url::parse(&url)
347            .map_err(|e| ScrapflyError::Config(format!("invalid extension url: {}", e)))?;
348        let resp = self
349            .send_with_retry(Method::DELETE, url, None, None)
350            .await?;
351        let status = resp.status().as_u16();
352        let body = resp.bytes().await.map_err(ScrapflyError::Transport)?;
353        if status != 200 {
354            return Err(from_response(status, &body, 0, false));
355        }
356        Ok(serde_json::from_slice(&body)?)
357    }
358
359    /// Get debug recording playback metadata for a run.
360    pub async fn cloud_browser_playback(
361        &self,
362        run_id: &str,
363    ) -> Result<serde_json::Value, ScrapflyError> {
364        let url = format!(
365            "{}/run/{}/playback?key={}",
366            rest_base(self.cloud_browser_host()),
367            run_id,
368            self.api_key()
369        );
370        let url = Url::parse(&url)
371            .map_err(|e| ScrapflyError::Config(format!("invalid playback url: {}", e)))?;
372        let resp = self.send_with_retry(Method::GET, url, None, None).await?;
373        let status = resp.status().as_u16();
374        let body = resp.bytes().await.map_err(ScrapflyError::Transport)?;
375        if status != 200 {
376            return Err(from_response(status, &body, 0, false));
377        }
378        Ok(serde_json::from_slice(&body)?)
379    }
380
381    /// Terminate a Cloud Browser session.
382    pub async fn cloud_browser_session_stop(&self, session_id: &str) -> Result<(), ScrapflyError> {
383        if session_id.is_empty() {
384            return Err(ScrapflyError::Config("session_id is required".into()));
385        }
386        let url = format!(
387            "{}/session/{}/stop?key={}",
388            rest_base(self.cloud_browser_host()),
389            session_id,
390            self.api_key()
391        );
392        let url = Url::parse(&url)
393            .map_err(|e| ScrapflyError::Config(format!("invalid session url: {}", e)))?;
394        let resp = self.send_with_retry(Method::POST, url, None, None).await?;
395        let status = resp.status().as_u16();
396        if status != 200 {
397            let body = resp.bytes().await.map_err(ScrapflyError::Transport)?;
398            return Err(from_response(status, &body, 0, false));
399        }
400        Ok(())
401    }
402
403    /// List all running Cloud Browser sessions.
404    pub async fn cloud_browser_sessions(&self) -> Result<serde_json::Value, ScrapflyError> {
405        let url = format!(
406            "{}/sessions?key={}",
407            rest_base(self.cloud_browser_host()),
408            self.api_key()
409        );
410        let url = Url::parse(&url)
411            .map_err(|e| ScrapflyError::Config(format!("invalid sessions url: {}", e)))?;
412        let resp = self.send_with_retry(Method::GET, url, None, None).await?;
413        let status = resp.status().as_u16();
414        let body = resp.bytes().await.map_err(ScrapflyError::Transport)?;
415        if status != 200 {
416            return Err(from_response(status, &body, 0, false));
417        }
418        Ok(serde_json::from_slice(&body)?)
419    }
420
421    /// Download a debug session recording video (raw bytes).
422    pub async fn cloud_browser_video(&self, run_id: &str) -> Result<Vec<u8>, ScrapflyError> {
423        let url = format!(
424            "{}/run/{}/video?key={}",
425            rest_base(self.cloud_browser_host()),
426            run_id,
427            self.api_key()
428        );
429        let url = Url::parse(&url)
430            .map_err(|e| ScrapflyError::Config(format!("invalid video url: {}", e)))?;
431        let resp = self.send_with_retry(Method::GET, url, None, None).await?;
432        let status = resp.status().as_u16();
433        let body = resp.bytes().await.map_err(ScrapflyError::Transport)?;
434        if status != 200 {
435            return Err(from_response(status, &body, 0, false));
436        }
437        Ok(body.to_vec())
438    }
439}