browsr_client/
lib.rs

1//! Browsr Client - HTTP client for browser automation
2//!
3//! This crate provides a client for interacting with Browsr servers for browser automation,
4//! web scraping, and structured content extraction.
5//!
6//! # Quick Start
7//!
8//! ```rust,ignore
9//! use browsr_client::{BrowsrClient, BrowsrClientConfig};
10//! use browsr_types::Commands;
11//!
12//! // From environment variables
13//! let client = BrowsrClient::from_env();
14//!
15//! // Navigate to a page
16//! let response = client.navigate("https://example.com", None).await?;
17//!
18//! // Extract structured content
19//! let data = client.extract_structured(
20//!     "Extract the main heading and first paragraph",
21//!     None,
22//!     None,
23//! ).await?;
24//! ```
25//!
26//! # Configuration
27//!
28//! The client can be configured via environment variables or programmatically:
29//!
30//! - `BROWSR_BASE_URL`: Base URL (defaults to `https://api.browsr.dev`)
31//! - `BROWSR_API_KEY`: Optional API key for authentication
32
33mod config;
34
35pub use config::{BrowsrClientConfig, DEFAULT_BASE_URL, ENV_API_KEY, ENV_BASE_URL};
36
37use browsr_types::{
38    AutomateResponse, BrowserContext, Commands, ObserveResponse, ScrapeOptions, SearchOptions,
39    SearchResponse,
40};
41use reqwest::StatusCode;
42use serde::{Deserialize, Serialize, de::DeserializeOwned};
43use serde_json::{Value, json};
44use thiserror::Error;
45use tokio::process::Command;
46
47#[derive(Debug, Clone)]
48pub enum TransportConfig {
49    Http { base_url: String },
50    Stdout { command: String },
51}
52
53/// Browsr HTTP client for browser automation.
54///
55/// # Example
56///
57/// ```rust,ignore
58/// use browsr_client::BrowsrClient;
59///
60/// // From environment variables (BROWSR_BASE_URL, BROWSR_API_KEY)
61/// let client = BrowsrClient::from_env();
62///
63/// // From explicit URL (for local development)
64/// let client = BrowsrClient::new("http://localhost:8082");
65///
66/// // With API key authentication
67/// let client = BrowsrClient::new("https://api.browsr.dev")
68///     .with_api_key("your-api-key");
69/// ```
70#[derive(Debug, Clone)]
71pub struct BrowsrClient {
72    transport: BrowsrTransport,
73    config: BrowsrClientConfig,
74}
75
76#[derive(Debug, Clone)]
77enum BrowsrTransport {
78    Http(HttpTransport),
79    Stdout(StdoutTransport),
80}
81
82impl BrowsrClient {
83    /// Create a new client with the specified base URL (no authentication).
84    /// For local development, use this method.
85    pub fn new(base_url: impl Into<String>) -> Self {
86        let config = BrowsrClientConfig::new(base_url);
87        Self::from_client_config(config)
88    }
89
90    /// Create a new client from environment variables.
91    ///
92    /// - `BROWSR_BASE_URL`: Base URL (defaults to `https://api.browsr.dev`)
93    /// - `BROWSR_API_KEY`: Optional API key for authentication
94    pub fn from_env() -> Self {
95        let config = BrowsrClientConfig::from_env();
96        Self::from_client_config(config)
97    }
98
99    /// Create a new client from explicit configuration.
100    pub fn from_client_config(config: BrowsrClientConfig) -> Self {
101        let http = config
102            .build_http_client()
103            .expect("Failed to build HTTP client");
104
105        Self {
106            transport: BrowsrTransport::Http(HttpTransport::new_with_client(
107                &config.base_url,
108                http,
109            )),
110            config,
111        }
112    }
113
114    /// Set the API key for authentication.
115    /// This rebuilds the HTTP client with the new authentication header.
116    pub fn with_api_key(mut self, api_key: impl Into<String>) -> Self {
117        self.config = self.config.with_api_key(api_key);
118        let http = self
119            .config
120            .build_http_client()
121            .expect("Failed to build HTTP client");
122        self.transport =
123            BrowsrTransport::Http(HttpTransport::new_with_client(&self.config.base_url, http));
124        self
125    }
126
127    /// Create HTTP transport client (legacy method).
128    pub fn new_http(base_url: impl Into<String>) -> Self {
129        Self::new(base_url)
130    }
131
132    /// Create stdout transport client.
133    pub fn new_stdout(command: impl Into<String>) -> Self {
134        Self {
135            transport: BrowsrTransport::Stdout(StdoutTransport::new(command)),
136            config: BrowsrClientConfig::default(),
137        }
138    }
139
140    /// Create client from transport config (legacy method).
141    pub fn from_config(cfg: TransportConfig) -> Self {
142        match cfg {
143            TransportConfig::Http { base_url } => Self::new_http(base_url),
144            TransportConfig::Stdout { command } => Self::new_stdout(command),
145        }
146    }
147
148    /// Get the base URL.
149    pub fn base_url(&self) -> &str {
150        &self.config.base_url
151    }
152
153    /// Get the current configuration.
154    pub fn config(&self) -> &BrowsrClientConfig {
155        &self.config
156    }
157
158    /// Check if the client has authentication configured.
159    pub fn has_auth(&self) -> bool {
160        self.config.has_auth()
161    }
162
163    /// Check if this is a local development client.
164    pub fn is_local(&self) -> bool {
165        self.config.is_local()
166    }
167
168    // ============================================================
169    // Session Management
170    // ============================================================
171
172    /// List all active browser sessions.
173    pub async fn list_sessions(&self) -> Result<Vec<String>, ClientError> {
174        match &self.transport {
175            BrowsrTransport::Http(inner) => {
176                let response: SessionList = inner.get("/sessions").await?;
177                Ok(response.sessions)
178            }
179            BrowsrTransport::Stdout(inner) => inner.list_sessions().await,
180        }
181    }
182
183    /// Create a new browser session.
184    pub async fn create_session(&self) -> Result<String, ClientError> {
185        match &self.transport {
186            BrowsrTransport::Http(inner) => {
187                let response: SessionCreated = inner.post("/sessions", &Value::Null).await?;
188                Ok(response.session_id)
189            }
190            BrowsrTransport::Stdout(inner) => inner.create_session().await,
191        }
192    }
193
194    /// Destroy a browser session.
195    pub async fn destroy_session(&self, session_id: &str) -> Result<(), ClientError> {
196        match &self.transport {
197            BrowsrTransport::Http(inner) => inner
198                .delete(&format!("/sessions/{}", session_id))
199                .await
200                .map(|_: Value| ()),
201            BrowsrTransport::Stdout(inner) => inner.destroy_session(session_id).await,
202        }
203    }
204
205    // ============================================================
206    // Command Execution
207    // ============================================================
208
209    /// Execute a list of browser commands.
210    pub async fn execute_commands(
211        &self,
212        commands: Vec<Commands>,
213        session_id: Option<String>,
214        headless: Option<bool>,
215        context: Option<BrowserContext>,
216    ) -> Result<AutomateResponse, ClientError> {
217        let payload = CommandsPayload {
218            commands,
219            session_id,
220            headless: headless.or(self.config.headless),
221            context,
222        };
223
224        match &self.transport {
225            BrowsrTransport::Http(inner) => inner.post("/commands", &payload).await,
226            BrowsrTransport::Stdout(inner) => inner.execute_commands(&payload).await,
227        }
228    }
229
230    /// Execute a single browser command.
231    pub async fn execute_command(
232        &self,
233        command: Commands,
234        session_id: Option<String>,
235        headless: Option<bool>,
236    ) -> Result<AutomateResponse, ClientError> {
237        self.execute_commands(vec![command], session_id, headless, None)
238            .await
239    }
240
241    // ============================================================
242    // Convenience Methods for Common Commands
243    // ============================================================
244
245    /// Navigate to a URL.
246    pub async fn navigate(
247        &self,
248        url: &str,
249        session_id: Option<String>,
250    ) -> Result<AutomateResponse, ClientError> {
251        self.execute_command(
252            Commands::NavigateTo {
253                url: url.to_string(),
254            },
255            session_id,
256            None,
257        )
258        .await
259    }
260
261    /// Click an element by selector.
262    pub async fn click(
263        &self,
264        selector: &str,
265        session_id: Option<String>,
266    ) -> Result<AutomateResponse, ClientError> {
267        self.execute_command(
268            Commands::Click {
269                selector: selector.to_string(),
270            },
271            session_id,
272            None,
273        )
274        .await
275    }
276
277    /// Type text into an element.
278    pub async fn type_text(
279        &self,
280        selector: &str,
281        text: &str,
282        clear: Option<bool>,
283        session_id: Option<String>,
284    ) -> Result<AutomateResponse, ClientError> {
285        self.execute_command(
286            Commands::TypeText {
287                selector: selector.to_string(),
288                text: text.to_string(),
289                clear,
290            },
291            session_id,
292            None,
293        )
294        .await
295    }
296
297    /// Wait for an element to appear.
298    pub async fn wait_for_element(
299        &self,
300        selector: &str,
301        timeout_ms: Option<u64>,
302        session_id: Option<String>,
303    ) -> Result<AutomateResponse, ClientError> {
304        self.execute_command(
305            Commands::WaitForElement {
306                selector: selector.to_string(),
307                timeout_ms,
308                visible_only: None,
309            },
310            session_id,
311            None,
312        )
313        .await
314    }
315
316    /// Take a screenshot.
317    pub async fn screenshot(
318        &self,
319        full_page: bool,
320        session_id: Option<String>,
321    ) -> Result<AutomateResponse, ClientError> {
322        self.execute_command(
323            Commands::Screenshot {
324                full_page: Some(full_page),
325                path: None,
326            },
327            session_id,
328            None,
329        )
330        .await
331    }
332
333    /// Get page title.
334    pub async fn get_title(
335        &self,
336        session_id: Option<String>,
337    ) -> Result<AutomateResponse, ClientError> {
338        self.execute_command(Commands::GetTitle, session_id, None)
339            .await
340    }
341
342    /// Get text content of an element.
343    pub async fn get_text(
344        &self,
345        selector: &str,
346        session_id: Option<String>,
347    ) -> Result<AutomateResponse, ClientError> {
348        self.execute_command(
349            Commands::GetText {
350                selector: selector.to_string(),
351            },
352            session_id,
353            None,
354        )
355        .await
356    }
357
358    /// Get HTML content of an element or page.
359    pub async fn get_content(
360        &self,
361        selector: Option<String>,
362        session_id: Option<String>,
363    ) -> Result<AutomateResponse, ClientError> {
364        self.execute_command(
365            Commands::GetContent {
366                selector,
367                kind: None,
368            },
369            session_id,
370            None,
371        )
372        .await
373    }
374
375    /// Evaluate JavaScript expression.
376    pub async fn evaluate(
377        &self,
378        expression: &str,
379        session_id: Option<String>,
380    ) -> Result<AutomateResponse, ClientError> {
381        self.execute_command(
382            Commands::Evaluate {
383                expression: expression.to_string(),
384            },
385            session_id,
386            None,
387        )
388        .await
389    }
390
391    // ============================================================
392    // Structured Extraction
393    // ============================================================
394
395    /// Extract structured content from the current page using AI.
396    ///
397    /// # Arguments
398    /// * `query` - Natural language description of what to extract
399    /// * `schema` - Optional JSON schema for the output
400    /// * `max_chars` - Optional maximum characters to process
401    /// * `session_id` - Optional session ID
402    ///
403    /// # Example
404    /// ```rust,ignore
405    /// let data = client.extract_structured(
406    ///     "Extract all product names and prices",
407    ///     None,
408    ///     None,
409    /// ).await?;
410    /// ```
411    pub async fn extract_structured(
412        &self,
413        query: &str,
414        schema: Option<serde_json::Value>,
415        max_chars: Option<usize>,
416        session_id: Option<String>,
417    ) -> Result<AutomateResponse, ClientError> {
418        self.execute_command(
419            Commands::ExtractStructuredContent {
420                query: query.to_string(),
421                schema,
422                max_chars,
423            },
424            session_id,
425            None,
426        )
427        .await
428    }
429
430    // ============================================================
431    // Observation
432    // ============================================================
433
434    /// Observe the current browser state (screenshot + DOM snapshot).
435    pub async fn observe(
436        &self,
437        session_id: Option<String>,
438        headless: Option<bool>,
439        opts: ObserveOptions,
440    ) -> Result<ObserveResponse, ClientError> {
441        let payload = ObservePayload {
442            session_id,
443            headless: headless.or(self.config.headless),
444            use_image: opts.use_image,
445            full_page: opts.full_page,
446            wait_ms: opts.wait_ms,
447            include_content: opts.include_content,
448        };
449
450        match &self.transport {
451            BrowsrTransport::Http(inner) => {
452                let envelope: ObserveEnvelope = inner.post("/observe", &payload).await?;
453                Ok(envelope.observation)
454            }
455            BrowsrTransport::Stdout(inner) => inner.observe(&payload).await,
456        }
457    }
458
459    // ============================================================
460    // Scraping
461    // ============================================================
462
463    /// Scrape content from a URL or the current page.
464    pub async fn scrape(&self, options: ScrapeOptions) -> Result<Value, ClientError> {
465        match &self.transport {
466            BrowsrTransport::Http(inner) => inner.post("/scrape", &options).await,
467            BrowsrTransport::Stdout(inner) => inner.scrape(&options).await,
468        }
469    }
470
471    /// Scrape a URL with default options.
472    pub async fn scrape_url(&self, url: &str) -> Result<Value, ClientError> {
473        let options = ScrapeOptions {
474            url: url.to_string(),
475            use_js: None,
476            wait_for: None,
477            extract_links: None,
478            selector: None,
479            extract_images: None,
480            extract_metadata: None,
481            extract_tables: None,
482            extract_forms: None,
483            extract_structured_data: None,
484            readable_content: None,
485            remove_base64_images: None,
486        };
487        self.scrape(options).await
488    }
489
490    // ============================================================
491    // Search
492    // ============================================================
493
494    /// Perform a web search.
495    pub async fn search(&self, options: SearchOptions) -> Result<SearchResponse, ClientError> {
496        match &self.transport {
497            BrowsrTransport::Http(inner) => inner.post("/search", &options).await,
498            BrowsrTransport::Stdout(inner) => inner.search(&options).await,
499        }
500    }
501
502    /// Search with a query string.
503    pub async fn search_query(&self, query: &str) -> Result<SearchResponse, ClientError> {
504        let options = SearchOptions {
505            query: query.to_string(),
506            limit: None,
507        };
508        self.search(options).await
509    }
510}
511
512// ============================================================
513// Internal Types
514// ============================================================
515
516#[derive(Debug, Clone, Serialize, Deserialize)]
517struct SessionList {
518    sessions: Vec<String>,
519}
520
521#[derive(Debug, Clone, Serialize, Deserialize)]
522struct SessionCreated {
523    session_id: String,
524}
525
526#[derive(Debug, Clone, Serialize, Deserialize)]
527struct CommandsPayload {
528    commands: Vec<Commands>,
529    #[serde(skip_serializing_if = "Option::is_none")]
530    session_id: Option<String>,
531    #[serde(skip_serializing_if = "Option::is_none")]
532    headless: Option<bool>,
533    #[serde(skip_serializing_if = "Option::is_none")]
534    context: Option<BrowserContext>,
535}
536
537/// Options for observing the browser state.
538#[derive(Debug, Clone, Serialize, Deserialize)]
539pub struct ObserveOptions {
540    pub use_image: Option<bool>,
541    pub full_page: Option<bool>,
542    pub wait_ms: Option<u64>,
543    pub include_content: Option<bool>,
544}
545
546impl Default for ObserveOptions {
547    fn default() -> Self {
548        Self {
549            use_image: Some(true),
550            full_page: None,
551            wait_ms: None,
552            include_content: Some(true),
553        }
554    }
555}
556
557#[derive(Debug, Clone, Serialize, Deserialize)]
558struct ObservePayload {
559    #[serde(default, skip_serializing_if = "Option::is_none")]
560    pub session_id: Option<String>,
561    #[serde(default, skip_serializing_if = "Option::is_none")]
562    pub headless: Option<bool>,
563    #[serde(default, skip_serializing_if = "Option::is_none")]
564    pub use_image: Option<bool>,
565    #[serde(default, skip_serializing_if = "Option::is_none")]
566    pub full_page: Option<bool>,
567    #[serde(default, skip_serializing_if = "Option::is_none")]
568    pub wait_ms: Option<u64>,
569    #[serde(default, skip_serializing_if = "Option::is_none")]
570    pub include_content: Option<bool>,
571}
572
573#[derive(Debug, Clone, Serialize, Deserialize)]
574struct ObserveEnvelope {
575    pub session_id: String,
576    pub observation: ObserveResponse,
577}
578
579// ============================================================
580// Error Types
581// ============================================================
582
583#[derive(Debug, Error)]
584pub enum ClientError {
585    #[error("http request failed: {0}")]
586    Http(#[from] reqwest::Error),
587    #[error("stdout transport failed: {0}")]
588    Stdout(String),
589    #[error("invalid response: {0}")]
590    InvalidResponse(String),
591    #[error("serialization error: {0}")]
592    Serialization(#[from] serde_json::Error),
593    #[error("io error: {0}")]
594    Io(#[from] std::io::Error),
595}
596
597// ============================================================
598// HTTP Transport
599// ============================================================
600
601#[derive(Debug, Clone)]
602struct HttpTransport {
603    base_url: String,
604    client: reqwest::Client,
605}
606
607impl HttpTransport {
608    fn new_with_client(base_url: impl Into<String>, client: reqwest::Client) -> Self {
609        Self {
610            base_url: base_url.into(),
611            client,
612        }
613    }
614
615    fn url(&self, path: &str) -> String {
616        let base = self.base_url.trim_end_matches('/');
617        let suffix = path.trim_start_matches('/');
618        format!("{}/{}", base, suffix)
619    }
620
621    async fn get<T: DeserializeOwned>(&self, path: &str) -> Result<T, ClientError> {
622        let resp = self.client.get(self.url(path)).send().await?;
623        Self::handle_response(resp).await
624    }
625
626    async fn delete<T: DeserializeOwned>(&self, path: &str) -> Result<T, ClientError> {
627        let resp = self.client.delete(self.url(path)).send().await?;
628        Self::handle_response(resp).await
629    }
630
631    async fn post<T: DeserializeOwned, B: Serialize + ?Sized>(
632        &self,
633        path: &str,
634        body: &B,
635    ) -> Result<T, ClientError> {
636        let resp = self.client.post(self.url(path)).json(body).send().await?;
637        Self::handle_response(resp).await
638    }
639
640    async fn handle_response<T: DeserializeOwned>(
641        resp: reqwest::Response,
642    ) -> Result<T, ClientError> {
643        let status = resp.status();
644        if status == StatusCode::NO_CONTENT {
645            let empty: Value = Value::Null;
646            let value: T = serde_json::from_value(empty).map_err(ClientError::Serialization)?;
647            return Ok(value);
648        }
649
650        let text = resp.text().await?;
651        if !status.is_success() {
652            return Err(ClientError::InvalidResponse(format!(
653                "{}: {}",
654                status, text
655            )));
656        }
657
658        serde_json::from_str(&text).map_err(ClientError::Serialization)
659    }
660}
661
662// ============================================================
663// Stdout Transport
664// ============================================================
665
666#[derive(Debug, Clone)]
667struct StdoutTransport {
668    command: String,
669}
670
671impl StdoutTransport {
672    fn new(command: impl Into<String>) -> Self {
673        Self {
674            command: command.into(),
675        }
676    }
677
678    async fn list_sessions(&self) -> Result<Vec<String>, ClientError> {
679        let envelope: SessionList = self.request("sessions", &Value::Null).await?;
680        Ok(envelope.sessions)
681    }
682
683    async fn create_session(&self) -> Result<String, ClientError> {
684        let created: SessionCreated = self.request("create_session", &Value::Null).await?;
685        Ok(created.session_id)
686    }
687
688    async fn destroy_session(&self, session_id: &str) -> Result<(), ClientError> {
689        let payload = json!({ "session_id": session_id });
690        let _: Value = self.request("destroy_session", &payload).await?;
691        Ok(())
692    }
693
694    async fn execute_commands(
695        &self,
696        payload: &CommandsPayload,
697    ) -> Result<AutomateResponse, ClientError> {
698        self.request("execute", payload).await
699    }
700
701    async fn observe(&self, payload: &ObservePayload) -> Result<ObserveResponse, ClientError> {
702        let envelope: ObserveEnvelope = self.request("observe", payload).await?;
703        Ok(envelope.observation)
704    }
705
706    async fn scrape(&self, options: &ScrapeOptions) -> Result<Value, ClientError> {
707        self.request("scrape", options).await
708    }
709
710    async fn search(&self, options: &SearchOptions) -> Result<SearchResponse, ClientError> {
711        self.request("search", options).await
712    }
713
714    async fn request<T: DeserializeOwned, B: Serialize>(
715        &self,
716        operation: &str,
717        payload: &B,
718    ) -> Result<T, ClientError> {
719        let mut cmd = Command::new(&self.command);
720        cmd.arg("client").arg(operation);
721
722        let payload_str = serde_json::to_string(&payload).map_err(ClientError::Serialization)?;
723        cmd.env("BROWSR_CLIENT_PAYLOAD", &payload_str);
724
725        let output = cmd.output().await?;
726        if !output.status.success() {
727            let stderr = String::from_utf8_lossy(&output.stderr).to_string();
728            return Err(ClientError::Stdout(format!(
729                "browsr command failed ({}): {}",
730                output.status, stderr
731            )));
732        }
733
734        let stdout = String::from_utf8_lossy(&output.stdout);
735        if stdout.trim().is_empty() {
736            return Err(ClientError::InvalidResponse(
737                "empty stdout from browsr".to_string(),
738            ));
739        }
740
741        serde_json::from_str(stdout.trim()).map_err(ClientError::Serialization)
742    }
743}
744
745// ============================================================
746// Legacy Helper Functions
747// ============================================================
748
749/// Derive a default base URL for HTTP transport from standard env vars.
750pub fn default_base_url() -> Option<String> {
751    if let Ok(url) = std::env::var("BROWSR_API_URL") {
752        return Some(url);
753    }
754    if let Ok(url) = std::env::var("BROWSR_BASE_URL") {
755        return Some(url);
756    }
757    if let Ok(port) = std::env::var("BROWSR_PORT") {
758        let host = std::env::var("BROWSR_HOST").unwrap_or_else(|_| "127.0.0.1".to_string());
759        return Some(format!("http://{}:{}", host, port));
760    }
761    // Return cloud default
762    Some(DEFAULT_BASE_URL.to_string())
763}
764
765pub fn default_transport() -> TransportConfig {
766    TransportConfig::Http {
767        base_url: default_base_url().unwrap_or_else(|| DEFAULT_BASE_URL.to_string()),
768    }
769}