Skip to main content

starpod_browser/
lib.rs

1//! Lightweight browser automation for Starpod via Chrome DevTools Protocol.
2//!
3//! This crate provides [`BrowserSession`], a high-level async interface for
4//! controlling a CDP-speaking browser (Lightpanda or headless Chromium). It
5//! uses direct CDP over WebSocket and handles process lifecycle, connection
6//! management, and common browser operations.
7//!
8//! # Architecture
9//!
10//! ```text
11//! ┌────────────────────┐     CDP/WebSocket     ┌──────────────────────┐
12//! │  BrowserSession    │ ◄──────────────────── │  lightpanda serve    │
13//! │  (async-tungstenite)│                       │  (auto-spawned)      │
14//! └────────────────────┘                       └──────────────────────┘
15//! ```
16//!
17//! # Usage modes
18//!
19//! - **Auto-spawn** (recommended): [`BrowserSession::launch()`] finds a free
20//!   port, spawns `lightpanda serve`, waits for CDP readiness, and connects.
21//!   The process is killed on [`close()`](BrowserSession::close) or [`Drop`].
22//!
23//! - **External**: [`BrowserSession::connect()`] attaches to a pre-existing
24//!   CDP endpoint (e.g. headless Chromium started by the user or systemd).
25//!
26//! # Requirements
27//!
28//! For auto-spawn mode, `lightpanda` is automatically downloaded and installed
29//! to `~/.local/bin/` if not already on `PATH`. No manual setup is needed.
30//!
31//! # Example
32//!
33//! ```rust,no_run
34//! # async fn example() -> starpod_browser::Result<()> {
35//! use starpod_browser::BrowserSession;
36//!
37//! // Auto-spawn Lightpanda and navigate
38//! let session = BrowserSession::launch().await?;
39//! let title = session.navigate("https://example.com").await?;
40//! println!("Page title: {title}");
41//!
42//! // Extract page text
43//! let text = session.extract(None).await?;
44//! println!("Page text: {text}");
45//!
46//! // Clean up
47//! session.close().await?;
48//! # Ok(())
49//! # }
50//! ```
51
52use std::collections::HashMap;
53use std::path::PathBuf;
54use std::process::Stdio;
55use std::sync::atomic::{AtomicU64, Ordering};
56use std::sync::Arc;
57use std::time::Duration;
58
59use async_tungstenite::tungstenite::Message as WsMessage;
60use futures_util::{SinkExt, StreamExt};
61use tokio::process::{Child, Command};
62use tokio::sync::{broadcast, oneshot, Mutex};
63use tracing::{debug, info};
64
65/// Timeout for individual CDP commands.
66const CDP_TIMEOUT: Duration = Duration::from_secs(30);
67
68// ---------------------------------------------------------------------------
69// Errors
70// ---------------------------------------------------------------------------
71
72/// Errors from browser operations.
73#[derive(Debug, thiserror::Error)]
74pub enum BrowserError {
75    /// No browser session is active.
76    #[error("browser not connected")]
77    NotConnected,
78
79    /// Failed to spawn the Lightpanda child process.
80    #[error("failed to spawn lightpanda: {0}")]
81    SpawnFailed(String),
82
83    /// CDP WebSocket connection failed.
84    #[error("CDP connection failed: {0}")]
85    ConnectionFailed(String),
86
87    /// Page navigation failed (invalid URL, network error, etc.).
88    #[error("navigation failed: {0}")]
89    NavigationFailed(String),
90
91    /// CSS selector matched no elements.
92    #[error("element not found: {0}")]
93    ElementNotFound(String),
94
95    /// JavaScript evaluation failed.
96    #[error("JS evaluation failed: {0}")]
97    EvalFailed(String),
98
99    /// Timed out waiting for the browser process to accept CDP connections.
100    #[error("timeout waiting for browser to start")]
101    Timeout,
102
103    /// Auto-installation of Lightpanda failed.
104    #[error("failed to install lightpanda: {0}")]
105    InstallFailed(String),
106}
107
108/// Convenience alias for `Result<T, BrowserError>`.
109pub type Result<T> = std::result::Result<T, BrowserError>;
110
111// ---------------------------------------------------------------------------
112// CdpClient — lightweight CDP-over-WebSocket client
113// ---------------------------------------------------------------------------
114
115type WsWriter = futures_util::stream::SplitSink<
116    async_tungstenite::WebSocketStream<async_tungstenite::tokio::ConnectStream>,
117    WsMessage,
118>;
119
120/// A lightweight CDP client that sends commands and routes responses by `id`.
121struct CdpClient {
122    writer: Mutex<WsWriter>,
123    next_id: AtomicU64,
124    pending: Arc<Mutex<HashMap<u64, oneshot::Sender<serde_json::Value>>>>,
125    events: broadcast::Sender<serde_json::Value>,
126    _reader_task: tokio::task::JoinHandle<()>,
127}
128
129impl CdpClient {
130    /// Connect to a CDP WebSocket endpoint.
131    async fn connect(ws_url: &str) -> Result<Self> {
132        let (ws, _) = async_tungstenite::tokio::connect_async(ws_url)
133            .await
134            .map_err(|e| BrowserError::ConnectionFailed(e.to_string()))?;
135
136        let (writer, reader) = ws.split();
137        let pending: Arc<Mutex<HashMap<u64, oneshot::Sender<serde_json::Value>>>> =
138            Arc::new(Mutex::new(HashMap::new()));
139        let (events_tx, _) = broadcast::channel(64);
140
141        let pending_clone = Arc::clone(&pending);
142        let events_clone = events_tx.clone();
143
144        let reader_task = tokio::spawn(async move {
145            let mut reader = reader;
146            while let Some(msg) = reader.next().await {
147                let text = match msg {
148                    Ok(WsMessage::Text(t)) => t.to_string(),
149                    Ok(WsMessage::Close(_)) => break,
150                    Ok(_) => continue,
151                    Err(e) => {
152                        debug!("CDP WebSocket error: {e}");
153                        break;
154                    }
155                };
156
157                let json: serde_json::Value = match serde_json::from_str(&text) {
158                    Ok(v) => v,
159                    Err(e) => {
160                        debug!("CDP parse error: {e}");
161                        continue;
162                    }
163                };
164
165                // Response (has "id" field) → route to pending sender
166                if let Some(id) = json.get("id").and_then(|v| v.as_u64()) {
167                    let mut map = pending_clone.lock().await;
168                    if let Some(tx) = map.remove(&id) {
169                        let _ = tx.send(json);
170                    }
171                } else {
172                    // Event (has "method" field) → broadcast
173                    let _ = events_clone.send(json);
174                }
175            }
176        });
177
178        Ok(Self {
179            writer: Mutex::new(writer),
180            next_id: AtomicU64::new(1),
181            pending,
182            events: events_tx,
183            _reader_task: reader_task,
184        })
185    }
186
187    /// Send a CDP command and wait for its response.
188    async fn send(
189        &self,
190        method: &str,
191        params: serde_json::Value,
192        session_id: Option<&str>,
193    ) -> Result<serde_json::Value> {
194        let id = self.next_id.fetch_add(1, Ordering::Relaxed);
195
196        let mut msg = serde_json::json!({
197            "id": id,
198            "method": method,
199            "params": params,
200        });
201        if let Some(sid) = session_id {
202            msg["sessionId"] = serde_json::Value::String(sid.to_string());
203        }
204
205        let (tx, rx) = oneshot::channel();
206        self.pending.lock().await.insert(id, tx);
207
208        let text = serde_json::to_string(&msg)
209            .map_err(|e| BrowserError::ConnectionFailed(e.to_string()))?;
210
211        self.writer
212            .lock()
213            .await
214            .send(WsMessage::Text(text))
215            .await
216            .map_err(|e| BrowserError::ConnectionFailed(e.to_string()))?;
217
218        let resp = tokio::time::timeout(CDP_TIMEOUT, rx)
219            .await
220            .map_err(|_| BrowserError::Timeout)?
221            .map_err(|_| BrowserError::ConnectionFailed("response channel closed".into()))?;
222
223        // Check for CDP error
224        if let Some(err) = resp.get("error") {
225            let message = err
226                .get("message")
227                .and_then(|m| m.as_str())
228                .unwrap_or("unknown CDP error");
229            return Err(BrowserError::EvalFailed(message.to_string()));
230        }
231
232        // Return the "result" field, or the whole response if no "result"
233        Ok(resp
234            .get("result")
235            .cloned()
236            .unwrap_or(serde_json::Value::Object(serde_json::Map::new())))
237    }
238
239    /// Subscribe to CDP events.
240    fn subscribe(&self) -> broadcast::Receiver<serde_json::Value> {
241        self.events.subscribe()
242    }
243}
244
245// ---------------------------------------------------------------------------
246// BrowserSession
247// ---------------------------------------------------------------------------
248
249/// A browser automation session backed by CDP.
250///
251/// Manages an optional child process (auto-spawned Lightpanda) and a direct
252/// CDP WebSocket connection for interaction.
253///
254/// The session is designed to be held behind `Arc<tokio::sync::Mutex<Option<BrowserSession>>>`
255/// for shared access across async tool calls. All public methods take `&self`.
256///
257/// # Process lifecycle
258///
259/// When created via [`launch()`](Self::launch), the Lightpanda process is
260/// spawned with `kill_on_drop(true)` and additionally killed in the [`Drop`]
261/// impl. This ensures cleanup even if [`close()`](Self::close) is not called.
262pub struct BrowserSession {
263    /// Auto-spawned browser process (`None` if connected to external endpoint).
264    process: Option<Child>,
265    /// The CDP WebSocket client.
266    cdp: CdpClient,
267    /// The CDP session ID for the attached target.
268    session_id: String,
269}
270
271impl std::fmt::Debug for BrowserSession {
272    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
273        f.debug_struct("BrowserSession")
274            .field("auto_spawned", &self.process.is_some())
275            .finish_non_exhaustive()
276    }
277}
278
279impl BrowserSession {
280    /// Spawn a Lightpanda process and connect to it via CDP.
281    ///
282    /// 1. Finds a free TCP port by binding to `127.0.0.1:0`
283    /// 2. Starts `lightpanda serve --host 127.0.0.1 --port <port>`
284    /// 3. Polls the port until CDP accepts connections (up to 10 seconds)
285    /// 4. Connects via WebSocket and opens a blank page
286    ///
287    /// If `lightpanda` is not found on `PATH`, it is automatically downloaded
288    /// from GitHub releases and installed to `~/.local/bin/`.
289    ///
290    /// # Errors
291    ///
292    /// - [`BrowserError::InstallFailed`] if auto-installation fails
293    /// - [`BrowserError::SpawnFailed`] if `lightpanda` fails to start after installation
294    /// - [`BrowserError::Timeout`] if CDP doesn't become available within 10 seconds
295    /// - [`BrowserError::ConnectionFailed`] if WebSocket handshake fails
296    pub async fn launch() -> Result<Self> {
297        let port = find_free_port().await?;
298        let addr = format!("127.0.0.1:{port}");
299
300        // Resolve the lightpanda binary, auto-installing if needed.
301        let binary = resolve_lightpanda_binary().await?;
302
303        info!(port, binary = %binary.display(), "Spawning lightpanda");
304
305        let child = Command::new(&binary)
306            .args([
307                "serve",
308                "--host",
309                "127.0.0.1",
310                "--port",
311                &port.to_string(),
312                // Keep alive for up to 1 hour — the agent can take minutes
313                // between tool calls, and the default 10s timeout kills the
314                // session mid-conversation.
315                "--timeout",
316                "3600",
317            ])
318            .stdout(Stdio::null())
319            .stderr(Stdio::null())
320            .kill_on_drop(true)
321            .spawn()
322            .map_err(|e| BrowserError::SpawnFailed(e.to_string()))?;
323
324        // Wait for CDP to become available, then discover the WebSocket URL
325        wait_for_cdp(&addr).await?;
326        let ws_url = discover_ws_url(&addr).await?;
327        Self::connect_internal(Some(child), &ws_url).await
328    }
329
330    /// Connect to an existing CDP endpoint.
331    ///
332    /// Use this when the browser is managed externally (e.g. headless Chromium
333    /// started by systemd, or a shared Lightpanda instance).
334    ///
335    /// # Arguments
336    ///
337    /// * `cdp_url` — WebSocket URL (e.g. `ws://127.0.0.1:9222/`).
338    ///
339    /// # Errors
340    ///
341    /// - [`BrowserError::ConnectionFailed`] if the endpoint is unreachable
342    pub async fn connect(cdp_url: &str) -> Result<Self> {
343        debug!(url = cdp_url, "Connecting to existing CDP endpoint");
344        Self::connect_internal(None, cdp_url).await
345    }
346
347    /// Shared connection logic for both `launch()` and `connect()`.
348    async fn connect_internal(process: Option<Child>, ws_url: &str) -> Result<Self> {
349        let cdp = CdpClient::connect(ws_url).await?;
350
351        // 1. Enable target discovery
352        cdp.send(
353            "Target.setDiscoverTargets",
354            serde_json::json!({"discover": true}),
355            None,
356        )
357        .await?;
358
359        // 2. Create a new target (page)
360        let result = cdp
361            .send(
362                "Target.createTarget",
363                serde_json::json!({"url": "about:blank"}),
364                None,
365            )
366            .await?;
367        let target_id = result["targetId"]
368            .as_str()
369            .ok_or_else(|| BrowserError::ConnectionFailed("no targetId in response".into()))?
370            .to_string();
371
372        // 3. Attach to the target to get a session ID
373        let result = cdp
374            .send(
375                "Target.attachToTarget",
376                serde_json::json!({"targetId": target_id, "flatten": true}),
377                None,
378            )
379            .await?;
380        let session_id = result["sessionId"]
381            .as_str()
382            .ok_or_else(|| BrowserError::ConnectionFailed("no sessionId in response".into()))?
383            .to_string();
384
385        // 4. Enable Page domain (needed for navigation events)
386        cdp.send("Page.enable", serde_json::json!({}), Some(&session_id))
387            .await?;
388
389        debug!(session_id = %session_id, "CDP session established");
390
391        Ok(Self {
392            process,
393            cdp,
394            session_id,
395        })
396    }
397
398    /// Navigate to a URL. Returns the page title after load.
399    ///
400    /// # Errors
401    ///
402    /// - [`BrowserError::NavigationFailed`] on invalid URL or network error
403    pub async fn navigate(&self, url: &str) -> Result<String> {
404        // Subscribe to events BEFORE sending the navigate command
405        let mut events = self.cdp.subscribe();
406
407        self.cdp
408            .send(
409                "Page.navigate",
410                serde_json::json!({"url": url}),
411                Some(&self.session_id),
412            )
413            .await
414            .map_err(|e| BrowserError::NavigationFailed(e.to_string()))?;
415
416        // Wait for Page.loadEventFired
417        let deadline = tokio::time::Instant::now() + CDP_TIMEOUT;
418        loop {
419            match tokio::time::timeout_at(deadline, events.recv()).await {
420                Ok(Ok(event)) => {
421                    if event.get("method").and_then(|m| m.as_str()) == Some("Page.loadEventFired") {
422                        break;
423                    }
424                }
425                Ok(Err(broadcast::error::RecvError::Lagged(_))) => continue,
426                Ok(Err(_)) => break,
427                Err(_) => {
428                    return Err(BrowserError::NavigationFailed("page load timeout".into()));
429                }
430            }
431        }
432
433        // Get the page title
434        self.evaluate("document.title").await
435    }
436
437    /// Extract text content from the page or a specific element.
438    ///
439    /// - `selector = None` → returns `document.body.innerText` (full page text)
440    /// - `selector = Some("h1")` → returns text of the first matching element
441    ///
442    /// # Errors
443    ///
444    /// - [`BrowserError::ElementNotFound`] if the selector matches nothing
445    /// - [`BrowserError::EvalFailed`] if text extraction fails
446    pub async fn extract(&self, selector: Option<&str>) -> Result<String> {
447        match selector {
448            None => self.evaluate("document.body.textContent").await,
449            Some(sel) => {
450                let sel_json = serde_json::to_string(sel)
451                    .map_err(|e| BrowserError::EvalFailed(e.to_string()))?;
452                let js = format!(
453                    r#"(function(){{ var el = document.querySelector({sel}); if (!el) return null; return el.textContent; }})()"#,
454                    sel = sel_json,
455                );
456                let result = self.evaluate(&js).await?;
457                if result == "null" || result.is_empty() {
458                    Err(BrowserError::ElementNotFound(sel.to_string()))
459                } else {
460                    Ok(result)
461                }
462            }
463        }
464    }
465
466    /// Click an element by CSS selector.
467    ///
468    /// Dispatches a full `MouseEvent` (not just `el.click()`) so that
469    /// framework event listeners (React, Vue, etc.) see the event.
470    /// For submit buttons, also calls `form.requestSubmit()` to ensure
471    /// form submission fires correctly.
472    ///
473    /// # Errors
474    ///
475    /// - [`BrowserError::ElementNotFound`] if the selector matches nothing
476    pub async fn click(&self, selector: &str) -> Result<()> {
477        let sel_json =
478            serde_json::to_string(selector).map_err(|e| BrowserError::EvalFailed(e.to_string()))?;
479        let js = format!(
480            r#"(function(){{
481  var el = document.querySelector({sel});
482  if (!el) throw new Error('element not found');
483  el.dispatchEvent(new MouseEvent('click', {{bubbles: true, cancelable: true, view: window}}));
484  if ((el.type === 'submit' || el.tagName === 'BUTTON') && el.form) {{
485    try {{ el.form.requestSubmit(el); }} catch(e) {{ el.form.submit(); }}
486  }}
487  return true;
488}})()"#,
489            sel = sel_json,
490        );
491        self.evaluate(&js).await.map_err(|e| {
492            if e.to_string().contains("element not found") {
493                BrowserError::ElementNotFound(selector.to_string())
494            } else {
495                e
496            }
497        })?;
498        Ok(())
499    }
500
501    /// Type text into an element identified by CSS selector.
502    ///
503    /// Uses the native HTMLInputElement value setter to bypass React's
504    /// internal value tracker, then dispatches `input` and `change` events
505    /// so both React controlled inputs and plain HTML inputs update correctly.
506    ///
507    /// # Errors
508    ///
509    /// - [`BrowserError::ElementNotFound`] if the selector matches nothing
510    pub async fn type_text(&self, selector: &str, text: &str) -> Result<()> {
511        let sel_json =
512            serde_json::to_string(selector).map_err(|e| BrowserError::EvalFailed(e.to_string()))?;
513        let val_json =
514            serde_json::to_string(text).map_err(|e| BrowserError::EvalFailed(e.to_string()))?;
515        let js = format!(
516            r#"(function(){{
517  var el = document.querySelector({sel});
518  if (!el) throw new Error('element not found');
519  el.focus();
520  var nativeSetter = Object.getOwnPropertyDescriptor(
521    HTMLInputElement.prototype, 'value'
522  );
523  if (nativeSetter && nativeSetter.set) {{
524    nativeSetter.set.call(el, {val});
525  }} else {{
526    el.value = {val};
527  }}
528  el.dispatchEvent(new Event('input', {{bubbles: true}}));
529  el.dispatchEvent(new Event('change', {{bubbles: true}}));
530  return true;
531}})()"#,
532            sel = sel_json,
533            val = val_json,
534        );
535        self.evaluate(&js).await.map_err(|e| {
536            if e.to_string().contains("element not found") {
537                BrowserError::ElementNotFound(selector.to_string())
538            } else {
539                e
540            }
541        })?;
542        Ok(())
543    }
544
545    /// Execute JavaScript on the page and return the result as a string.
546    ///
547    /// String values are returned as-is. Numbers, booleans, objects, and arrays
548    /// are serialized via `serde_json::Value::to_string()`.
549    ///
550    /// # Errors
551    ///
552    /// - [`BrowserError::EvalFailed`] on syntax errors or runtime exceptions
553    pub async fn evaluate(&self, js: &str) -> Result<String> {
554        // Wrap in an IIFE to isolate variable declarations between calls.
555        // Without this, consecutive evaluate() calls that declare `const` or
556        // `let` variables with the same name would fail with
557        // "Identifier has already been declared".
558        //
559        // Code that is already an IIFE `(function(){...})()` is left as-is.
560        // Simple expressions get `return` prepended so they return a value.
561        // Multi-statement code is wrapped as-is (caller must use `return`).
562        let trimmed = js.trim();
563        let wrapped = if trimmed.starts_with("(function") {
564            // Already an IIFE — don't double-wrap
565            trimmed.to_string()
566        } else if trimmed.contains(';') || trimmed.contains('\n') {
567            format!("(function(){{{trimmed}}})()")
568        } else {
569            format!("(function(){{ return {trimmed} }})()")
570        };
571        let result = self
572            .cdp
573            .send(
574                "Runtime.evaluate",
575                serde_json::json!({
576                    "expression": wrapped,
577                    "returnByValue": true,
578                }),
579                Some(&self.session_id),
580            )
581            .await
582            .map_err(|e| BrowserError::EvalFailed(e.to_string()))?;
583
584        // Check for exception
585        if let Some(exc) = result.get("exceptionDetails") {
586            // Try exception.description first (has the real message),
587            // fall back to exceptionDetails.text
588            let text = exc
589                .get("exception")
590                .and_then(|e| e.get("description"))
591                .and_then(|d| d.as_str())
592                .or_else(|| exc.get("text").and_then(|t| t.as_str()))
593                .unwrap_or("unknown error");
594            return Err(BrowserError::EvalFailed(text.to_string()));
595        }
596
597        let value = &result["result"]["value"];
598        match value {
599            serde_json::Value::String(s) => Ok(s.clone()),
600            serde_json::Value::Null => Ok(String::new()),
601            other => Ok(other.to_string()),
602        }
603    }
604
605    /// Get the current page URL.
606    pub async fn url(&self) -> Result<String> {
607        self.evaluate("window.location.href").await
608    }
609
610    /// Returns `true` if this session was auto-spawned (vs. connected to an
611    /// external endpoint).
612    pub fn is_auto_spawned(&self) -> bool {
613        self.process.is_some()
614    }
615
616    /// Close the browser session and kill the process if auto-spawned.
617    ///
618    /// This is the preferred way to end a session. If not called, the [`Drop`]
619    /// impl will still attempt to kill the child process, but cannot await
620    /// its termination.
621    pub async fn close(mut self) -> Result<()> {
622        if let Some(ref mut child) = self.process {
623            debug!("Killing auto-spawned browser process");
624            let _ = child.kill().await;
625        }
626        Ok(())
627    }
628}
629
630impl Drop for BrowserSession {
631    fn drop(&mut self) {
632        if let Some(ref mut child) = self.process {
633            // Best-effort kill on drop. This is non-async so we can only
634            // *start* the kill signal; the OS will reap the process.
635            let _ = child.start_kill();
636        }
637    }
638}
639
640// ---------------------------------------------------------------------------
641// Auto-install
642// ---------------------------------------------------------------------------
643
644/// Resolve the `lightpanda` binary path.
645///
646/// 1. Check if `lightpanda` is on `PATH` (via `which`).
647/// 2. Check the default install location (`~/.local/bin/lightpanda`).
648/// 3. If not found, download from GitHub releases and install to `~/.local/bin/`.
649async fn resolve_lightpanda_binary() -> Result<PathBuf> {
650    // 1. Already on PATH?
651    if let Ok(path) = which_lightpanda().await {
652        debug!(path = %path.display(), "Found lightpanda on PATH");
653        return Ok(path);
654    }
655
656    // 2. Check default install location
657    let install_dir = default_install_dir()?;
658    let binary_path = install_dir.join("lightpanda");
659    if binary_path.is_file() {
660        debug!(path = %binary_path.display(), "Found lightpanda in ~/.local/bin");
661        return Ok(binary_path);
662    }
663
664    // 3. Auto-install
665    info!("lightpanda not found — downloading automatically");
666    install_lightpanda(&install_dir).await?;
667    Ok(binary_path)
668}
669
670/// Try to find `lightpanda` on PATH using `which`.
671async fn which_lightpanda() -> Result<PathBuf> {
672    let output = tokio::process::Command::new("which")
673        .arg("lightpanda")
674        .output()
675        .await
676        .map_err(|e| BrowserError::SpawnFailed(e.to_string()))?;
677
678    if output.status.success() {
679        let path = String::from_utf8_lossy(&output.stdout).trim().to_string();
680        if !path.is_empty() {
681            return Ok(PathBuf::from(path));
682        }
683    }
684    Err(BrowserError::SpawnFailed("not on PATH".into()))
685}
686
687/// Returns `~/.local/bin`, creating it if it doesn't exist.
688fn default_install_dir() -> Result<PathBuf> {
689    let home =
690        std::env::var("HOME").map_err(|_| BrowserError::InstallFailed("HOME not set".into()))?;
691    let dir = PathBuf::from(home).join(".local").join("bin");
692    Ok(dir)
693}
694
695/// Returns the platform-specific asset name for lightpanda GitHub releases.
696fn lightpanda_asset_name() -> Result<&'static str> {
697    let os = std::env::consts::OS;
698    let arch = std::env::consts::ARCH;
699
700    match (os, arch) {
701        ("macos", "aarch64") => Ok("lightpanda-aarch64-macos"),
702        ("macos", "x86_64") => Ok("lightpanda-x86_64-macos"),
703        ("linux", "aarch64") => Ok("lightpanda-aarch64-linux"),
704        ("linux", "x86_64") => Ok("lightpanda-x86_64-linux"),
705        _ => Err(BrowserError::InstallFailed(format!(
706            "unsupported platform: {os}/{arch}"
707        ))),
708    }
709}
710
711/// Download and install the lightpanda binary to `install_dir`.
712async fn install_lightpanda(install_dir: &std::path::Path) -> Result<()> {
713    let asset = lightpanda_asset_name()?;
714    let url = format!("https://github.com/lightpanda-io/browser/releases/download/nightly/{asset}");
715
716    info!(url = %url, "Downloading lightpanda");
717
718    // Download with curl (follows redirects, which GitHub requires)
719    let output = tokio::process::Command::new("curl")
720        .args(["-fsSL", "--output", "-", &url])
721        .output()
722        .await
723        .map_err(|e| BrowserError::InstallFailed(format!("curl failed: {e}")))?;
724
725    if !output.status.success() {
726        let stderr = String::from_utf8_lossy(&output.stderr);
727        return Err(BrowserError::InstallFailed(format!(
728            "download failed ({}): {stderr}",
729            output.status
730        )));
731    }
732
733    if output.stdout.is_empty() {
734        return Err(BrowserError::InstallFailed(
735            "downloaded file is empty".into(),
736        ));
737    }
738
739    // Create install directory
740    tokio::fs::create_dir_all(install_dir).await.map_err(|e| {
741        BrowserError::InstallFailed(format!("cannot create {}: {e}", install_dir.display()))
742    })?;
743
744    let binary_path = install_dir.join("lightpanda");
745
746    // Write binary
747    tokio::fs::write(&binary_path, &output.stdout)
748        .await
749        .map_err(|e| BrowserError::InstallFailed(format!("cannot write binary: {e}")))?;
750
751    // Make executable
752    #[cfg(unix)]
753    {
754        use std::os::unix::fs::PermissionsExt;
755        let perms = std::fs::Permissions::from_mode(0o755);
756        tokio::fs::set_permissions(&binary_path, perms)
757            .await
758            .map_err(|e| BrowserError::InstallFailed(format!("chmod failed: {e}")))?;
759    }
760
761    info!(path = %binary_path.display(), "lightpanda installed successfully");
762    Ok(())
763}
764
765// ---------------------------------------------------------------------------
766// Helpers
767// ---------------------------------------------------------------------------
768
769/// Find a free TCP port by binding to port 0 and immediately releasing.
770async fn find_free_port() -> Result<u16> {
771    let listener = tokio::net::TcpListener::bind("127.0.0.1:0")
772        .await
773        .map_err(|e| BrowserError::SpawnFailed(format!("cannot bind: {e}")))?;
774    let port = listener
775        .local_addr()
776        .map_err(|e| BrowserError::SpawnFailed(format!("cannot get addr: {e}")))?
777        .port();
778    drop(listener);
779    Ok(port)
780}
781
782/// Fetch the WebSocket debugger URL from the CDP `/json/version` endpoint.
783///
784/// Falls back to `ws://{addr}/` if the endpoint is unavailable.
785async fn discover_ws_url(addr: &str) -> Result<String> {
786    let url = format!("http://{addr}/json/version");
787
788    let output = tokio::process::Command::new("curl")
789        .args(["-sf", "--max-time", "5", &url])
790        .output()
791        .await
792        .map_err(|e| BrowserError::ConnectionFailed(format!("curl /json/version: {e}")))?;
793
794    if output.status.success() {
795        if let Ok(json) = serde_json::from_slice::<serde_json::Value>(&output.stdout) {
796            if let Some(ws) = json.get("webSocketDebuggerUrl").and_then(|v| v.as_str()) {
797                debug!(ws_url = %ws, "Discovered WebSocket URL");
798                return Ok(ws.to_string());
799            }
800        }
801    }
802
803    // Fallback — append trailing slash (lightpanda requires it)
804    let fallback = format!("ws://{addr}/");
805    debug!(ws_url = %fallback, "Using fallback WebSocket URL");
806    Ok(fallback)
807}
808
809/// Wait for a TCP endpoint to accept connections, polling every 100ms.
810///
811/// Times out after 10 seconds with [`BrowserError::Timeout`].
812async fn wait_for_cdp(addr: &str) -> Result<()> {
813    let deadline = tokio::time::Instant::now() + Duration::from_secs(10);
814
815    loop {
816        if tokio::time::Instant::now() > deadline {
817            return Err(BrowserError::Timeout);
818        }
819
820        match tokio::net::TcpStream::connect(addr).await {
821            Ok(_) => {
822                debug!(addr, "CDP endpoint is ready");
823                return Ok(());
824            }
825            Err(_) => {
826                tokio::time::sleep(Duration::from_millis(100)).await;
827            }
828        }
829    }
830}
831
832// ---------------------------------------------------------------------------
833// Tests
834// ---------------------------------------------------------------------------
835
836#[cfg(test)]
837mod tests {
838    use super::*;
839
840    // -- Unit tests (no browser required) --
841
842    #[tokio::test]
843    async fn find_free_port_returns_nonzero() {
844        let port = find_free_port().await.unwrap();
845        assert!(port > 0, "port should be nonzero, got {port}");
846    }
847
848    #[tokio::test]
849    async fn find_free_port_returns_different_ports() {
850        let p1 = find_free_port().await.unwrap();
851        let p2 = find_free_port().await.unwrap();
852        // Not guaranteed to differ, but practically always will
853        // Just verify both are valid
854        assert!(p1 > 0);
855        assert!(p2 > 0);
856    }
857
858    #[tokio::test]
859    async fn wait_for_cdp_succeeds_when_listener_exists() {
860        // Start a TCP listener, then verify wait_for_cdp connects to it
861        let listener = tokio::net::TcpListener::bind("127.0.0.1:0").await.unwrap();
862        let addr = listener.local_addr().unwrap().to_string();
863
864        // Should succeed immediately since the port is already listening
865        let result = wait_for_cdp(&addr).await;
866        assert!(result.is_ok(), "should connect to existing listener");
867    }
868
869    #[tokio::test]
870    async fn wait_for_cdp_times_out_on_closed_port() {
871        // Find a port and immediately close it
872        let port = find_free_port().await.unwrap();
873        let addr = format!("127.0.0.1:{port}");
874
875        // Override the timeout for faster testing — but wait_for_cdp uses 10s
876        // internally, so we test with a small helper instead
877        let start = tokio::time::Instant::now();
878        let deadline = start + Duration::from_millis(500);
879
880        let result = tokio::time::timeout(Duration::from_millis(500), wait_for_cdp(&addr)).await;
881
882        // Should time out (either our timeout or the internal 10s one)
883        assert!(
884            result.is_err() || result.unwrap().is_err(),
885            "should fail on closed port"
886        );
887        assert!(
888            start.elapsed() <= deadline.elapsed() + Duration::from_millis(600),
889            "should not hang"
890        );
891    }
892
893    #[tokio::test]
894    async fn wait_for_cdp_succeeds_when_listener_starts_late() {
895        let port = find_free_port().await.unwrap();
896        let addr_str = format!("127.0.0.1:{port}");
897        let addr_clone = addr_str.clone();
898
899        // Start a listener after a 200ms delay
900        let _listener_handle = tokio::spawn(async move {
901            tokio::time::sleep(Duration::from_millis(200)).await;
902            tokio::net::TcpListener::bind(&addr_clone).await.unwrap()
903            // Keep the listener alive by returning it (held by JoinHandle)
904        });
905
906        // wait_for_cdp should poll until it connects
907        let result = wait_for_cdp(&addr_str).await;
908        assert!(result.is_ok(), "should connect after delayed start");
909    }
910
911    #[test]
912    fn error_display_messages() {
913        let err = BrowserError::SpawnFailed("not found".into());
914        assert_eq!(err.to_string(), "failed to spawn lightpanda: not found");
915
916        let err = BrowserError::ElementNotFound("div.missing".into());
917        assert_eq!(err.to_string(), "element not found: div.missing");
918
919        let err = BrowserError::Timeout;
920        assert_eq!(err.to_string(), "timeout waiting for browser to start");
921
922        let err = BrowserError::NotConnected;
923        assert_eq!(err.to_string(), "browser not connected");
924
925        let err = BrowserError::ConnectionFailed("refused".into());
926        assert_eq!(err.to_string(), "CDP connection failed: refused");
927
928        let err = BrowserError::NavigationFailed("404".into());
929        assert_eq!(err.to_string(), "navigation failed: 404");
930
931        let err = BrowserError::EvalFailed("syntax error".into());
932        assert_eq!(err.to_string(), "JS evaluation failed: syntax error");
933    }
934
935    #[test]
936    fn lightpanda_asset_name_returns_valid_name() {
937        // Should succeed on any supported CI/dev platform
938        let name = lightpanda_asset_name().unwrap();
939        assert!(
940            name.starts_with("lightpanda-"),
941            "asset name should start with 'lightpanda-', got: {name}"
942        );
943    }
944
945    #[test]
946    fn install_failed_error_display() {
947        let err = BrowserError::InstallFailed("no curl".into());
948        assert_eq!(err.to_string(), "failed to install lightpanda: no curl");
949    }
950
951    #[test]
952    fn default_install_dir_is_under_home() {
953        let dir = default_install_dir().unwrap();
954        assert!(
955            dir.ends_with(".local/bin"),
956            "install dir should end with .local/bin, got: {}",
957            dir.display()
958        );
959    }
960
961    #[tokio::test]
962    async fn connect_fails_on_bad_endpoint() {
963        let port = find_free_port().await.unwrap();
964        let result = BrowserSession::connect(&format!("ws://127.0.0.1:{port}")).await;
965
966        assert!(result.is_err(), "should fail on unreachable endpoint");
967        let err = result.unwrap_err();
968        assert!(
969            matches!(err, BrowserError::ConnectionFailed(_)),
970            "expected ConnectionFailed, got: {err}"
971        );
972    }
973
974    // -- Integration tests (require a running CDP browser) --
975    //
976    // These tests are gated behind the BROWSER_CDP_URL env var.
977    // To run them:
978    //
979    //   # Start Lightpanda
980    //   lightpanda serve &
981    //
982    //   # Run integration tests
983    //   BROWSER_CDP_URL=ws://127.0.0.1:9222/ cargo test -p starpod-browser -- --ignored
984    //
985
986    fn cdp_url() -> Option<String> {
987        std::env::var("BROWSER_CDP_URL").ok()
988    }
989
990    #[tokio::test]
991    #[ignore = "requires running CDP browser (set BROWSER_CDP_URL)"]
992    async fn integration_connect_and_navigate() {
993        let url = cdp_url().expect("BROWSER_CDP_URL not set");
994        let session = BrowserSession::connect(&url).await.unwrap();
995        assert!(!session.is_auto_spawned());
996
997        let title = session.navigate("https://example.com").await.unwrap();
998        assert!(
999            !title.is_empty(),
1000            "title should not be empty after navigating to example.com"
1001        );
1002
1003        session.close().await.unwrap();
1004    }
1005
1006    #[tokio::test]
1007    #[ignore = "requires running CDP browser (set BROWSER_CDP_URL)"]
1008    async fn integration_extract_page_text() {
1009        let url = cdp_url().expect("BROWSER_CDP_URL not set");
1010        let session = BrowserSession::connect(&url).await.unwrap();
1011
1012        session.navigate("https://example.com").await.unwrap();
1013
1014        // Full page text
1015        let text = session.extract(None).await.unwrap();
1016        assert!(
1017            text.contains("Example Domain"),
1018            "page text should contain 'Example Domain', got: {text}"
1019        );
1020
1021        // Specific element
1022        let h1 = session.extract(Some("h1")).await.unwrap();
1023        assert_eq!(h1.trim(), "Example Domain");
1024
1025        session.close().await.unwrap();
1026    }
1027
1028    #[tokio::test]
1029    #[ignore = "requires running CDP browser (set BROWSER_CDP_URL)"]
1030    async fn integration_evaluate_javascript() {
1031        let url = cdp_url().expect("BROWSER_CDP_URL not set");
1032        let session = BrowserSession::connect(&url).await.unwrap();
1033
1034        session.navigate("https://example.com").await.unwrap();
1035
1036        // String result
1037        let title = session.evaluate("document.title").await.unwrap();
1038        assert!(!title.is_empty());
1039
1040        // Numeric result (serialized as string)
1041        let sum = session.evaluate("1 + 2").await.unwrap();
1042        assert_eq!(sum, "3");
1043
1044        // Boolean result
1045        let t = session.evaluate("true").await.unwrap();
1046        assert_eq!(t, "true");
1047
1048        session.close().await.unwrap();
1049    }
1050
1051    #[tokio::test]
1052    #[ignore = "requires running CDP browser (set BROWSER_CDP_URL)"]
1053    async fn integration_click_element() {
1054        let url = cdp_url().expect("BROWSER_CDP_URL not set");
1055        let session = BrowserSession::connect(&url).await.unwrap();
1056
1057        session.navigate("https://example.com").await.unwrap();
1058
1059        // example.com has a link — clicking it should work (or at least not error)
1060        let result = session.click("a").await;
1061        assert!(result.is_ok(), "clicking a link should succeed");
1062
1063        session.close().await.unwrap();
1064    }
1065
1066    #[tokio::test]
1067    #[ignore = "requires running CDP browser (set BROWSER_CDP_URL)"]
1068    async fn integration_element_not_found() {
1069        let url = cdp_url().expect("BROWSER_CDP_URL not set");
1070        let session = BrowserSession::connect(&url).await.unwrap();
1071
1072        session.navigate("https://example.com").await.unwrap();
1073
1074        let result = session.click("div.nonexistent-class-12345").await;
1075        assert!(result.is_err());
1076        assert!(
1077            matches!(result.unwrap_err(), BrowserError::ElementNotFound(_)),
1078            "should return ElementNotFound for missing selector"
1079        );
1080
1081        session.close().await.unwrap();
1082    }
1083
1084    #[tokio::test]
1085    #[ignore = "requires running CDP browser (set BROWSER_CDP_URL)"]
1086    async fn integration_get_url() {
1087        let url = cdp_url().expect("BROWSER_CDP_URL not set");
1088        let session = BrowserSession::connect(&url).await.unwrap();
1089
1090        session.navigate("https://example.com").await.unwrap();
1091        let page_url = session.url().await.unwrap();
1092        assert!(
1093            page_url.contains("example.com"),
1094            "URL should contain example.com, got: {page_url}"
1095        );
1096
1097        session.close().await.unwrap();
1098    }
1099
1100    #[tokio::test]
1101    #[ignore = "requires lightpanda binary on PATH"]
1102    async fn integration_launch_and_close() {
1103        let session = BrowserSession::launch().await.unwrap();
1104        assert!(session.is_auto_spawned());
1105
1106        let title = session.navigate("https://example.com").await.unwrap();
1107        assert!(!title.is_empty());
1108
1109        session.close().await.unwrap();
1110    }
1111}