browsr_client/
lib.rs

1//! Browsr Client - HTTP client for browser automation
2//!
3//! This crate provides a client for interacting with Browsr servers for browser automation,
4//! web scraping, and structured content extraction.
5//!
6//! # Quick Start
7//!
8//! ```rust,ignore
9//! use browsr_client::{BrowsrClient, BrowsrClientConfig};
10//! use browsr_types::Commands;
11//!
12//! // From environment variables
13//! let client = BrowsrClient::from_env();
14//!
15//! // Navigate to a page
16//! let response = client.navigate("https://example.com", None).await?;
17//!
18//! // Extract structured content
19//! let data = client.extract_structured(
20//!     "Extract the main heading and first paragraph",
21//!     None,
22//!     None,
23//! ).await?;
24//! ```
25//!
26//! # Configuration
27//!
28//! The client can be configured via environment variables or programmatically:
29//!
30//! - `BROWSR_BASE_URL`: Base URL (defaults to `https://api.browsr.dev`)
31//! - `BROWSR_API_KEY`: Optional API key for authentication
32
33mod config;
34
35pub use config::{BrowsrClientConfig, DEFAULT_BASE_URL, ENV_API_KEY, ENV_BASE_URL};
36
37// Re-export browser_step types for convenient access
38pub use browsr_types::{BrowserStepInput, BrowserStepRequest, BrowserStepResult};
39
40use browsr_types::{
41    AutomateResponse, BrowserContext, Commands, ObserveResponse, ScrapeOptions, SearchOptions,
42    SearchResponse,
43};
44use reqwest::StatusCode;
45use serde::{Deserialize, Serialize, de::DeserializeOwned};
46use serde_json::{Value, json};
47use thiserror::Error;
48use tokio::process::Command;
49
50#[derive(Debug, Clone)]
51pub enum TransportConfig {
52    Http { base_url: String },
53    Stdout { command: String },
54}
55
56/// Browsr HTTP client for browser automation.
57///
58/// # Example
59///
60/// ```rust,ignore
61/// use browsr_client::BrowsrClient;
62///
63/// // From environment variables (BROWSR_BASE_URL, BROWSR_API_KEY)
64/// let client = BrowsrClient::from_env();
65///
66/// // From explicit URL (for local development)
67/// let client = BrowsrClient::new("http://localhost:8082");
68///
69/// // With API key authentication
70/// let client = BrowsrClient::new("https://api.browsr.dev")
71///     .with_api_key("your-api-key");
72/// ```
73#[derive(Debug, Clone)]
74pub struct BrowsrClient {
75    transport: BrowsrTransport,
76    config: BrowsrClientConfig,
77}
78
79#[derive(Debug, Clone)]
80enum BrowsrTransport {
81    Http(HttpTransport),
82    Stdout(StdoutTransport),
83}
84
85impl BrowsrClient {
86    /// Create a new client with the specified base URL (no authentication).
87    /// For local development, use this method.
88    pub fn new(base_url: impl Into<String>) -> Self {
89        let config = BrowsrClientConfig::new(base_url);
90        Self::from_client_config(config)
91    }
92
93    /// Create a new client from environment variables.
94    ///
95    /// - `BROWSR_BASE_URL`: Base URL (defaults to `https://api.browsr.dev`)
96    /// - `BROWSR_API_KEY`: Optional API key for authentication
97    pub fn from_env() -> Self {
98        let config = BrowsrClientConfig::from_env();
99        Self::from_client_config(config)
100    }
101
102    /// Create a new client from explicit configuration.
103    pub fn from_client_config(config: BrowsrClientConfig) -> Self {
104        let http = config
105            .build_http_client()
106            .expect("Failed to build HTTP client");
107
108        Self {
109            transport: BrowsrTransport::Http(HttpTransport::new_with_client(
110                &config.base_url,
111                http,
112            )),
113            config,
114        }
115    }
116
117    /// Set the API key for authentication.
118    /// This rebuilds the HTTP client with the new authentication header.
119    pub fn with_api_key(mut self, api_key: impl Into<String>) -> Self {
120        self.config = self.config.with_api_key(api_key);
121        let http = self
122            .config
123            .build_http_client()
124            .expect("Failed to build HTTP client");
125        self.transport =
126            BrowsrTransport::Http(HttpTransport::new_with_client(&self.config.base_url, http));
127        self
128    }
129
130    /// Create HTTP transport client (legacy method).
131    pub fn new_http(base_url: impl Into<String>) -> Self {
132        Self::new(base_url)
133    }
134
135    /// Create stdout transport client.
136    pub fn new_stdout(command: impl Into<String>) -> Self {
137        Self {
138            transport: BrowsrTransport::Stdout(StdoutTransport::new(command)),
139            config: BrowsrClientConfig::default(),
140        }
141    }
142
143    /// Create client from transport config (legacy method).
144    pub fn from_config(cfg: TransportConfig) -> Self {
145        match cfg {
146            TransportConfig::Http { base_url } => Self::new_http(base_url),
147            TransportConfig::Stdout { command } => Self::new_stdout(command),
148        }
149    }
150
151    /// Get the base URL.
152    pub fn base_url(&self) -> &str {
153        &self.config.base_url
154    }
155
156    /// Get the current configuration.
157    pub fn config(&self) -> &BrowsrClientConfig {
158        &self.config
159    }
160
161    /// Check if the client has authentication configured.
162    pub fn has_auth(&self) -> bool {
163        self.config.has_auth()
164    }
165
166    /// Check if this is a local development client.
167    pub fn is_local(&self) -> bool {
168        self.config.is_local()
169    }
170
171    // ============================================================
172    // Session Management
173    // ============================================================
174
175    /// List all active browser sessions.
176    pub async fn list_sessions(&self) -> Result<Vec<String>, ClientError> {
177        match &self.transport {
178            BrowsrTransport::Http(inner) => {
179                let response: SessionList = inner.get("/sessions").await?;
180                Ok(response.sessions)
181            }
182            BrowsrTransport::Stdout(inner) => inner.list_sessions().await,
183        }
184    }
185
186    /// Create a new browser session.
187    /// Returns the full session info including viewer_url, sse_url, and frame_url.
188    pub async fn create_session(&self) -> Result<SessionCreated, ClientError> {
189        match &self.transport {
190            BrowsrTransport::Http(inner) => {
191                inner.post("/sessions", &Value::Null).await
192            }
193            BrowsrTransport::Stdout(inner) => {
194                let session_id = inner.create_session().await?;
195                Ok(SessionCreated {
196                    session_id,
197                    sse_url: None,
198                    frame_url: None,
199                    frame_token: None,
200                })
201            }
202        }
203    }
204
205    /// Destroy a browser session.
206    pub async fn destroy_session(&self, session_id: &str) -> Result<(), ClientError> {
207        match &self.transport {
208            BrowsrTransport::Http(inner) => inner
209                .delete(&format!("/sessions/{}", session_id))
210                .await
211                .map(|_: Value| ()),
212            BrowsrTransport::Stdout(inner) => inner.destroy_session(session_id).await,
213        }
214    }
215
216    // ============================================================
217    // Command Execution
218    // ============================================================
219
220    /// Execute a list of browser commands.
221    pub async fn execute_commands(
222        &self,
223        commands: Vec<Commands>,
224        session_id: Option<String>,
225        headless: Option<bool>,
226        context: Option<BrowserContext>,
227    ) -> Result<AutomateResponse, ClientError> {
228        let payload = CommandsPayload {
229            commands,
230            session_id,
231            headless: headless.or(self.config.headless),
232            context,
233        };
234
235        match &self.transport {
236            BrowsrTransport::Http(inner) => inner.post("/commands", &payload).await,
237            BrowsrTransport::Stdout(inner) => inner.execute_commands(&payload).await,
238        }
239    }
240
241    /// Execute a single browser command.
242    pub async fn execute_command(
243        &self,
244        command: Commands,
245        session_id: Option<String>,
246        headless: Option<bool>,
247    ) -> Result<AutomateResponse, ClientError> {
248        self.execute_commands(vec![command], session_id, headless, None)
249            .await
250    }
251
252    // ============================================================
253    // Convenience Methods for Common Commands
254    // ============================================================
255
256    /// Navigate to a URL.
257    pub async fn navigate(
258        &self,
259        url: &str,
260        session_id: Option<String>,
261    ) -> Result<AutomateResponse, ClientError> {
262        self.execute_command(
263            Commands::NavigateTo {
264                url: url.to_string(),
265            },
266            session_id,
267            None,
268        )
269        .await
270    }
271
272    /// Click an element by selector.
273    pub async fn click(
274        &self,
275        selector: &str,
276        session_id: Option<String>,
277    ) -> Result<AutomateResponse, ClientError> {
278        self.execute_command(
279            Commands::Click {
280                selector: selector.to_string(),
281            },
282            session_id,
283            None,
284        )
285        .await
286    }
287
288    /// Type text into an element.
289    pub async fn type_text(
290        &self,
291        selector: &str,
292        text: &str,
293        clear: Option<bool>,
294        session_id: Option<String>,
295    ) -> Result<AutomateResponse, ClientError> {
296        self.execute_command(
297            Commands::TypeText {
298                selector: selector.to_string(),
299                text: text.to_string(),
300                clear,
301            },
302            session_id,
303            None,
304        )
305        .await
306    }
307
308    /// Wait for an element to appear.
309    pub async fn wait_for_element(
310        &self,
311        selector: &str,
312        timeout_ms: Option<u64>,
313        session_id: Option<String>,
314    ) -> Result<AutomateResponse, ClientError> {
315        self.execute_command(
316            Commands::WaitForElement {
317                selector: selector.to_string(),
318                timeout_ms,
319                visible_only: None,
320            },
321            session_id,
322            None,
323        )
324        .await
325    }
326
327    /// Take a screenshot.
328    pub async fn screenshot(
329        &self,
330        full_page: bool,
331        session_id: Option<String>,
332    ) -> Result<AutomateResponse, ClientError> {
333        self.execute_command(
334            Commands::Screenshot {
335                full_page: Some(full_page),
336                path: None,
337            },
338            session_id,
339            None,
340        )
341        .await
342    }
343
344    /// Get page title.
345    pub async fn get_title(
346        &self,
347        session_id: Option<String>,
348    ) -> Result<AutomateResponse, ClientError> {
349        self.execute_command(Commands::GetTitle, session_id, None)
350            .await
351    }
352
353    /// Get text content of an element.
354    pub async fn get_text(
355        &self,
356        selector: &str,
357        session_id: Option<String>,
358    ) -> Result<AutomateResponse, ClientError> {
359        self.execute_command(
360            Commands::GetText {
361                selector: selector.to_string(),
362            },
363            session_id,
364            None,
365        )
366        .await
367    }
368
369    /// Get HTML content of an element or page.
370    pub async fn get_content(
371        &self,
372        selector: Option<String>,
373        session_id: Option<String>,
374    ) -> Result<AutomateResponse, ClientError> {
375        self.execute_command(
376            Commands::GetContent {
377                selector,
378                kind: None,
379            },
380            session_id,
381            None,
382        )
383        .await
384    }
385
386    /// Evaluate JavaScript expression.
387    pub async fn evaluate(
388        &self,
389        expression: &str,
390        session_id: Option<String>,
391    ) -> Result<AutomateResponse, ClientError> {
392        self.execute_command(
393            Commands::Evaluate {
394                expression: expression.to_string(),
395            },
396            session_id,
397            None,
398        )
399        .await
400    }
401
402    // ============================================================
403    // Structured Extraction
404    // ============================================================
405
406    /// Extract structured content from the current page using AI.
407    ///
408    /// # Arguments
409    /// * `query` - Natural language description of what to extract
410    /// * `schema` - Optional JSON schema for the output
411    /// * `max_chars` - Optional maximum characters to process
412    /// * `session_id` - Optional session ID
413    ///
414    /// # Example
415    /// ```rust,ignore
416    /// let data = client.extract_structured(
417    ///     "Extract all product names and prices",
418    ///     None,
419    ///     None,
420    /// ).await?;
421    /// ```
422    pub async fn extract_structured(
423        &self,
424        query: &str,
425        schema: Option<serde_json::Value>,
426        max_chars: Option<usize>,
427        session_id: Option<String>,
428    ) -> Result<AutomateResponse, ClientError> {
429        self.execute_command(
430            Commands::ExtractStructuredContent {
431                query: query.to_string(),
432                schema,
433                max_chars,
434            },
435            session_id,
436            None,
437        )
438        .await
439    }
440
441    // ============================================================
442    // Observation
443    // ============================================================
444
445    /// Observe the current browser state (screenshot + DOM snapshot).
446    pub async fn observe(
447        &self,
448        session_id: Option<String>,
449        headless: Option<bool>,
450        opts: ObserveOptions,
451    ) -> Result<ObserveResponse, ClientError> {
452        let payload = ObservePayload {
453            session_id,
454            headless: headless.or(self.config.headless),
455            use_image: opts.use_image,
456            full_page: opts.full_page,
457            wait_ms: opts.wait_ms,
458            include_content: opts.include_content,
459        };
460
461        match &self.transport {
462            BrowsrTransport::Http(inner) => {
463                let envelope: ObserveEnvelope = inner.post("/observe", &payload).await?;
464                Ok(envelope.observation)
465            }
466            BrowsrTransport::Stdout(inner) => inner.observe(&payload).await,
467        }
468    }
469
470    // ============================================================
471    // Scraping (v1 API)
472    // ============================================================
473
474    /// Scrape a URL with full format options (v1 API).
475    pub async fn scrape_v1(&self, request: ScrapeApiRequest) -> Result<ScrapeApiResponse, ClientError> {
476        match &self.transport {
477            BrowsrTransport::Http(inner) => inner.post("/v1/scrape", &request).await,
478            BrowsrTransport::Stdout(inner) => inner.request("scrape", &request).await,
479        }
480    }
481
482    /// Scrape a URL with default options (markdown output).
483    pub async fn scrape_url(&self, url: &str) -> Result<ScrapeApiResponse, ClientError> {
484        self.scrape_v1(ScrapeApiRequest::new(url)).await
485    }
486
487    /// Crawl a website starting from a URL.
488    pub async fn crawl(&self, request: CrawlApiRequest) -> Result<CrawlApiResponse, ClientError> {
489        match &self.transport {
490            BrowsrTransport::Http(inner) => inner.post("/v1/crawl", &request).await,
491            BrowsrTransport::Stdout(inner) => inner.request("crawl", &request).await,
492        }
493    }
494
495    /// Crawl a URL with default options (markdown, 10 pages, depth 2).
496    pub async fn crawl_url(&self, url: &str) -> Result<CrawlApiResponse, ClientError> {
497        self.crawl(CrawlApiRequest::new(url)).await
498    }
499
500    /// Scrape content from a URL (legacy API, returns raw JSON).
501    pub async fn scrape_legacy(&self, options: ScrapeOptions) -> Result<Value, ClientError> {
502        match &self.transport {
503            BrowsrTransport::Http(inner) => inner.post("/scrape", &options).await,
504            BrowsrTransport::Stdout(inner) => inner.scrape(&options).await,
505        }
506    }
507
508    // ============================================================
509    // Search
510    // ============================================================
511
512    /// Perform a web search.
513    pub async fn search(&self, options: SearchOptions) -> Result<SearchResponse, ClientError> {
514        match &self.transport {
515            BrowsrTransport::Http(inner) => inner.post("/search", &options).await,
516            BrowsrTransport::Stdout(inner) => inner.search(&options).await,
517        }
518    }
519
520    /// Search with a query string.
521    pub async fn search_query(&self, query: &str) -> Result<SearchResponse, ClientError> {
522        let options = SearchOptions {
523            query: query.to_string(),
524            limit: None,
525        };
526        self.search(options).await
527    }
528
529    // ============================================================
530    // Browser Step (Agent Integration)
531    // ============================================================
532
533    /// Execute a browser step with commands and optional context.
534    ///
535    /// This is the primary method for agent integration, providing full control
536    /// over browser automation with context for sequence persistence.
537    ///
538    /// # Arguments
539    /// * `request` - Full browser step request with commands and context
540    ///
541    /// # Example
542    /// ```rust,ignore
543    /// use browsr_client::{BrowsrClient, BrowserStepRequest, BrowserStepInput};
544    /// use browsr_types::Commands;
545    ///
546    /// let client = BrowsrClient::from_env();
547    ///
548    /// let input = BrowserStepInput::new(vec![
549    ///     Commands::NavigateTo { url: "https://example.com".to_string() },
550    ///     Commands::Screenshot { full_page: Some(false), path: None },
551    /// ]);
552    ///
553    /// let request = BrowserStepRequest::new(input)
554    ///     .with_session_id("my-session")
555    ///     .with_thread_id("thread-123");
556    ///
557    /// let result = client.step(request).await?;
558    /// println!("Success: {}, URL: {:?}", result.success, result.url);
559    /// ```
560    pub async fn step(&self, request: BrowserStepRequest) -> Result<BrowserStepResult, ClientError> {
561        match &self.transport {
562            BrowsrTransport::Http(inner) => inner.post("/browser_step", &request).await,
563            BrowsrTransport::Stdout(inner) => inner.browser_step(&request).await,
564        }
565    }
566
567    /// Execute a browser step with just commands (simple usage).
568    ///
569    /// Use this for quick automation tasks without context tracking.
570    ///
571    /// # Example
572    /// ```rust,ignore
573    /// use browsr_client::BrowsrClient;
574    /// use browsr_types::Commands;
575    ///
576    /// let client = BrowsrClient::from_env();
577    ///
578    /// let result = client.step_commands(vec![
579    ///     Commands::NavigateTo { url: "https://example.com".to_string() },
580    /// ]).await?;
581    /// ```
582    pub async fn step_commands(
583        &self,
584        commands: Vec<Commands>,
585    ) -> Result<BrowserStepResult, ClientError> {
586        let input = BrowserStepInput::new(commands);
587        let request = BrowserStepRequest::new(input);
588        self.step(request).await
589    }
590}
591
592// ============================================================
593// Scrape & Crawl Types (v1 API)
594// ============================================================
595
596/// Output format for scrape/crawl results.
597#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
598#[serde(rename_all = "camelCase")]
599pub enum ScrapeFormat {
600    Markdown,
601    Summary,
602    Html,
603    RawHtml,
604    Screenshot,
605    Links,
606    Json,
607    Images,
608    Branding,
609}
610
611/// Options for JSON structured extraction.
612#[derive(Debug, Clone, Serialize, Deserialize)]
613#[serde(rename_all = "camelCase")]
614pub struct JsonExtractionOptions {
615    #[serde(default, skip_serializing_if = "Option::is_none")]
616    pub prompt: Option<String>,
617    #[serde(default, skip_serializing_if = "Option::is_none")]
618    pub schema: Option<Value>,
619}
620
621/// A browser action to perform before scraping.
622#[derive(Debug, Clone, Serialize, Deserialize)]
623#[serde(rename_all = "camelCase")]
624pub struct ScrapeAction {
625    #[serde(rename = "type")]
626    pub action_type: String,
627    #[serde(default, skip_serializing_if = "Option::is_none")]
628    pub selector: Option<String>,
629    #[serde(default, skip_serializing_if = "Option::is_none")]
630    pub text: Option<String>,
631    #[serde(default, skip_serializing_if = "Option::is_none")]
632    pub milliseconds: Option<u64>,
633    #[serde(default, skip_serializing_if = "Option::is_none")]
634    pub expression: Option<String>,
635}
636
637/// Request body for scraping (v1 API).
638#[derive(Debug, Clone, Serialize, Deserialize)]
639#[serde(rename_all = "camelCase")]
640pub struct ScrapeApiRequest {
641    pub url: String,
642    #[serde(default = "default_scrape_formats")]
643    pub formats: Vec<ScrapeFormat>,
644    #[serde(default, skip_serializing_if = "Option::is_none")]
645    pub wait_for: Option<u64>,
646    #[serde(default, skip_serializing_if = "Option::is_none")]
647    pub actions: Option<Vec<ScrapeAction>>,
648    #[serde(default, skip_serializing_if = "Option::is_none")]
649    pub json_options: Option<JsonExtractionOptions>,
650    #[serde(default = "default_true")]
651    pub only_main_content: bool,
652    #[serde(default = "default_true")]
653    pub remove_base64_images: bool,
654}
655
656fn default_scrape_formats() -> Vec<ScrapeFormat> {
657    vec![ScrapeFormat::Markdown]
658}
659
660fn default_true() -> bool {
661    true
662}
663
664impl ScrapeApiRequest {
665    /// Create a simple scrape request for a URL with markdown output.
666    pub fn new(url: impl Into<String>) -> Self {
667        Self {
668            url: url.into(),
669            formats: vec![ScrapeFormat::Markdown],
670            wait_for: None,
671            actions: None,
672            json_options: None,
673            only_main_content: true,
674            remove_base64_images: true,
675        }
676    }
677
678    /// Set the output formats.
679    pub fn with_formats(mut self, formats: Vec<ScrapeFormat>) -> Self {
680        self.formats = formats;
681        self
682    }
683
684    /// Set wait time in milliseconds.
685    pub fn with_wait(mut self, ms: u64) -> Self {
686        self.wait_for = Some(ms);
687        self
688    }
689}
690
691/// Page metadata.
692#[derive(Debug, Clone, Serialize, Deserialize)]
693#[serde(rename_all = "camelCase")]
694pub struct PageMetadata {
695    pub title: Option<String>,
696    pub description: Option<String>,
697    #[serde(rename = "sourceURL")]
698    pub source_url: String,
699    pub status_code: Option<u16>,
700}
701
702/// A link extracted from a page.
703#[derive(Debug, Clone, Serialize, Deserialize)]
704pub struct ExtractedLink {
705    pub href: String,
706    #[serde(default)]
707    pub text: String,
708}
709
710/// An image extracted from a page.
711#[derive(Debug, Clone, Serialize, Deserialize)]
712pub struct ExtractedImage {
713    pub src: String,
714    #[serde(default)]
715    pub alt: Option<String>,
716}
717
718/// Brand identity.
719#[derive(Debug, Clone, Serialize, Deserialize)]
720pub struct BrandingInfo {
721    #[serde(default)]
722    pub colors: Option<Vec<String>>,
723    #[serde(default)]
724    pub fonts: Option<Vec<String>>,
725    #[serde(default)]
726    pub logo: Option<String>,
727    #[serde(default)]
728    pub favicon: Option<String>,
729    #[serde(default)]
730    pub name: Option<String>,
731}
732
733/// Scraped data for a single page.
734#[derive(Debug, Clone, Serialize, Deserialize)]
735#[serde(rename_all = "camelCase")]
736pub struct ScrapeData {
737    #[serde(default)]
738    pub markdown: Option<String>,
739    #[serde(default)]
740    pub summary: Option<String>,
741    #[serde(default)]
742    pub html: Option<String>,
743    #[serde(default)]
744    pub raw_html: Option<String>,
745    #[serde(default)]
746    pub screenshot: Option<String>,
747    #[serde(default)]
748    pub links: Option<Vec<ExtractedLink>>,
749    #[serde(default)]
750    pub json: Option<Value>,
751    #[serde(default)]
752    pub images: Option<Vec<ExtractedImage>>,
753    #[serde(default)]
754    pub branding: Option<BrandingInfo>,
755    pub metadata: PageMetadata,
756    #[serde(default)]
757    pub warning: Option<String>,
758}
759
760/// Response from scraping (v1 API).
761#[derive(Debug, Clone, Serialize, Deserialize)]
762pub struct ScrapeApiResponse {
763    pub success: bool,
764    pub data: ScrapeData,
765}
766
767/// Request body for crawling (v1 API).
768#[derive(Debug, Clone, Serialize, Deserialize)]
769#[serde(rename_all = "camelCase")]
770pub struct CrawlApiRequest {
771    pub url: String,
772    #[serde(default = "default_crawl_limit")]
773    pub limit: usize,
774    #[serde(default = "default_crawl_depth")]
775    pub max_depth: usize,
776    #[serde(default = "default_scrape_formats")]
777    pub formats: Vec<ScrapeFormat>,
778    #[serde(default, skip_serializing_if = "Option::is_none")]
779    pub wait_for: Option<u64>,
780    #[serde(default, skip_serializing_if = "Option::is_none")]
781    pub include_paths: Option<Vec<String>>,
782    #[serde(default, skip_serializing_if = "Option::is_none")]
783    pub exclude_paths: Option<Vec<String>>,
784    #[serde(default = "default_true")]
785    pub only_main_content: bool,
786    #[serde(default, skip_serializing_if = "Option::is_none")]
787    pub json_options: Option<JsonExtractionOptions>,
788}
789
790fn default_crawl_limit() -> usize {
791    10
792}
793
794fn default_crawl_depth() -> usize {
795    2
796}
797
798impl CrawlApiRequest {
799    /// Create a crawl request for a URL with default options.
800    pub fn new(url: impl Into<String>) -> Self {
801        Self {
802            url: url.into(),
803            limit: 10,
804            max_depth: 2,
805            formats: vec![ScrapeFormat::Markdown],
806            wait_for: None,
807            include_paths: None,
808            exclude_paths: None,
809            only_main_content: true,
810            json_options: None,
811        }
812    }
813}
814
815/// Response from crawling (v1 API).
816#[derive(Debug, Clone, Serialize, Deserialize)]
817pub struct CrawlApiResponse {
818    pub success: bool,
819    pub total: usize,
820    pub completed: usize,
821    pub data: Vec<ScrapeData>,
822}
823
824// ============================================================
825// Internal Types
826// ============================================================
827
828#[derive(Debug, Clone, Serialize, Deserialize)]
829struct SessionList {
830    sessions: Vec<String>,
831}
832
833/// Response from creating a browser session
834#[derive(Debug, Clone, Serialize, Deserialize)]
835pub struct SessionCreated {
836    pub session_id: String,
837    /// SSE stream URL (backend API) for event streaming
838    #[serde(default)]
839    pub sse_url: Option<String>,
840    /// Frame URL for embedding (frontend app with token)
841    #[serde(default)]
842    pub frame_url: Option<String>,
843    /// Frame token for SSE authentication
844    #[serde(default)]
845    pub frame_token: Option<String>,
846}
847
848impl SessionCreated {
849    /// Build SSE stream URL with authentication token and optional dimensions
850    pub fn build_sse_url(&self, base_url: &str, width: Option<u32>, height: Option<u32>) -> String {
851        let mut url = self.sse_url.clone().unwrap_or_else(|| {
852            format!("{}/stream/sse?session_id={}", base_url, self.session_id)
853        });
854
855        // Add token if available
856        if let Some(ref token) = self.frame_token {
857            let sep = if url.contains('?') { "&" } else { "?" };
858            url = format!("{}{}token={}", url, sep, token);
859        }
860
861        // Add dimensions if provided
862        if let Some(w) = width {
863            let sep = if url.contains('?') { "&" } else { "?" };
864            url = format!("{}{}width={}", url, sep, w);
865        }
866        if let Some(h) = height {
867            url = format!("{}&height={}", url, h);
868        }
869
870        url
871    }
872}
873
874#[derive(Debug, Clone, Serialize, Deserialize)]
875struct CommandsPayload {
876    commands: Vec<Commands>,
877    #[serde(skip_serializing_if = "Option::is_none")]
878    session_id: Option<String>,
879    #[serde(skip_serializing_if = "Option::is_none")]
880    headless: Option<bool>,
881    #[serde(skip_serializing_if = "Option::is_none")]
882    context: Option<BrowserContext>,
883}
884
885/// Options for observing the browser state.
886#[derive(Debug, Clone, Serialize, Deserialize)]
887pub struct ObserveOptions {
888    pub use_image: Option<bool>,
889    pub full_page: Option<bool>,
890    pub wait_ms: Option<u64>,
891    pub include_content: Option<bool>,
892}
893
894impl Default for ObserveOptions {
895    fn default() -> Self {
896        Self {
897            use_image: Some(true),
898            full_page: None,
899            wait_ms: None,
900            include_content: Some(true),
901        }
902    }
903}
904
905#[derive(Debug, Clone, Serialize, Deserialize)]
906struct ObservePayload {
907    #[serde(default, skip_serializing_if = "Option::is_none")]
908    pub session_id: Option<String>,
909    #[serde(default, skip_serializing_if = "Option::is_none")]
910    pub headless: Option<bool>,
911    #[serde(default, skip_serializing_if = "Option::is_none")]
912    pub use_image: Option<bool>,
913    #[serde(default, skip_serializing_if = "Option::is_none")]
914    pub full_page: Option<bool>,
915    #[serde(default, skip_serializing_if = "Option::is_none")]
916    pub wait_ms: Option<u64>,
917    #[serde(default, skip_serializing_if = "Option::is_none")]
918    pub include_content: Option<bool>,
919}
920
921#[derive(Debug, Clone, Serialize, Deserialize)]
922struct ObserveEnvelope {
923    pub session_id: String,
924    pub observation: ObserveResponse,
925}
926
927// ============================================================
928// Error Types
929// ============================================================
930
931#[derive(Debug, Error)]
932pub enum ClientError {
933    #[error("http request failed: {0}")]
934    Http(#[from] reqwest::Error),
935    #[error("stdout transport failed: {0}")]
936    Stdout(String),
937    #[error("invalid response: {0}")]
938    InvalidResponse(String),
939    #[error("serialization error: {0}")]
940    Serialization(#[from] serde_json::Error),
941    #[error("io error: {0}")]
942    Io(#[from] std::io::Error),
943}
944
945// ============================================================
946// HTTP Transport
947// ============================================================
948
949#[derive(Debug, Clone)]
950struct HttpTransport {
951    base_url: String,
952    client: reqwest::Client,
953}
954
955impl HttpTransport {
956    fn new_with_client(base_url: impl Into<String>, client: reqwest::Client) -> Self {
957        Self {
958            base_url: base_url.into(),
959            client,
960        }
961    }
962
963    fn url(&self, path: &str) -> String {
964        let base = self.base_url.trim_end_matches('/');
965        let suffix = path.trim_start_matches('/');
966        format!("{}/{}", base, suffix)
967    }
968
969    async fn get<T: DeserializeOwned>(&self, path: &str) -> Result<T, ClientError> {
970        let resp = self.client.get(self.url(path)).send().await?;
971        Self::handle_response(resp).await
972    }
973
974    async fn delete<T: DeserializeOwned>(&self, path: &str) -> Result<T, ClientError> {
975        let resp = self.client.delete(self.url(path)).send().await?;
976        Self::handle_response(resp).await
977    }
978
979    async fn post<T: DeserializeOwned, B: Serialize + ?Sized>(
980        &self,
981        path: &str,
982        body: &B,
983    ) -> Result<T, ClientError> {
984        let resp = self.client.post(self.url(path)).json(body).send().await?;
985        Self::handle_response(resp).await
986    }
987
988    async fn handle_response<T: DeserializeOwned>(
989        resp: reqwest::Response,
990    ) -> Result<T, ClientError> {
991        let status = resp.status();
992        if status == StatusCode::NO_CONTENT {
993            let empty: Value = Value::Null;
994            let value: T = serde_json::from_value(empty).map_err(ClientError::Serialization)?;
995            return Ok(value);
996        }
997
998        let text = resp.text().await?;
999        if !status.is_success() {
1000            return Err(ClientError::InvalidResponse(format!(
1001                "{}: {}",
1002                status, text
1003            )));
1004        }
1005
1006        serde_json::from_str(&text).map_err(ClientError::Serialization)
1007    }
1008}
1009
1010// ============================================================
1011// Stdout Transport
1012// ============================================================
1013
1014#[derive(Debug, Clone)]
1015struct StdoutTransport {
1016    command: String,
1017}
1018
1019impl StdoutTransport {
1020    fn new(command: impl Into<String>) -> Self {
1021        Self {
1022            command: command.into(),
1023        }
1024    }
1025
1026    async fn list_sessions(&self) -> Result<Vec<String>, ClientError> {
1027        let envelope: SessionList = self.request("sessions", &Value::Null).await?;
1028        Ok(envelope.sessions)
1029    }
1030
1031    async fn create_session(&self) -> Result<String, ClientError> {
1032        let created: SessionCreated = self.request("create_session", &Value::Null).await?;
1033        Ok(created.session_id)
1034    }
1035
1036    async fn destroy_session(&self, session_id: &str) -> Result<(), ClientError> {
1037        let payload = json!({ "session_id": session_id });
1038        let _: Value = self.request("destroy_session", &payload).await?;
1039        Ok(())
1040    }
1041
1042    async fn execute_commands(
1043        &self,
1044        payload: &CommandsPayload,
1045    ) -> Result<AutomateResponse, ClientError> {
1046        self.request("execute", payload).await
1047    }
1048
1049    async fn observe(&self, payload: &ObservePayload) -> Result<ObserveResponse, ClientError> {
1050        let envelope: ObserveEnvelope = self.request("observe", payload).await?;
1051        Ok(envelope.observation)
1052    }
1053
1054    async fn scrape(&self, options: &ScrapeOptions) -> Result<Value, ClientError> {
1055        self.request("scrape", options).await
1056    }
1057
1058    async fn search(&self, options: &SearchOptions) -> Result<SearchResponse, ClientError> {
1059        self.request("search", options).await
1060    }
1061
1062    async fn browser_step(
1063        &self,
1064        request: &BrowserStepRequest,
1065    ) -> Result<BrowserStepResult, ClientError> {
1066        self.request("browser_step", request).await
1067    }
1068
1069    async fn request<T: DeserializeOwned, B: Serialize>(
1070        &self,
1071        operation: &str,
1072        payload: &B,
1073    ) -> Result<T, ClientError> {
1074        let mut cmd = Command::new(&self.command);
1075        cmd.arg("client").arg(operation);
1076
1077        let payload_str = serde_json::to_string(&payload).map_err(ClientError::Serialization)?;
1078        cmd.env("BROWSR_CLIENT_PAYLOAD", &payload_str);
1079
1080        let output = cmd.output().await?;
1081        if !output.status.success() {
1082            let stderr = String::from_utf8_lossy(&output.stderr).to_string();
1083            return Err(ClientError::Stdout(format!(
1084                "browsr command failed ({}): {}",
1085                output.status, stderr
1086            )));
1087        }
1088
1089        let stdout = String::from_utf8_lossy(&output.stdout);
1090        if stdout.trim().is_empty() {
1091            return Err(ClientError::InvalidResponse(
1092                "empty stdout from browsr".to_string(),
1093            ));
1094        }
1095
1096        serde_json::from_str(stdout.trim()).map_err(ClientError::Serialization)
1097    }
1098}
1099
1100// ============================================================
1101// Legacy Helper Functions
1102// ============================================================
1103
1104/// Derive a default base URL for HTTP transport from standard env vars.
1105pub fn default_base_url() -> Option<String> {
1106    if let Ok(url) = std::env::var("BROWSR_API_URL") {
1107        return Some(url);
1108    }
1109    if let Ok(url) = std::env::var("BROWSR_BASE_URL") {
1110        return Some(url);
1111    }
1112    if let Ok(port) = std::env::var("BROWSR_PORT") {
1113        let host = std::env::var("BROWSR_HOST").unwrap_or_else(|_| "127.0.0.1".to_string());
1114        return Some(format!("http://{}:{}", host, port));
1115    }
1116    // Return cloud default
1117    Some(DEFAULT_BASE_URL.to_string())
1118}
1119
1120pub fn default_transport() -> TransportConfig {
1121    TransportConfig::Http {
1122        base_url: default_base_url().unwrap_or_else(|| DEFAULT_BASE_URL.to_string()),
1123    }
1124}
browsr_client/lib.rs

browsr_client/
lib.rs