Skip to main content

starpod_browser/
lib.rs

1//! Lightweight browser automation for Starpod via Chrome DevTools Protocol.
2//!
3//! This crate provides [`BrowserSession`], a high-level async interface for
4//! controlling a CDP-speaking browser (Lightpanda or headless Chromium). It
5//! uses direct CDP over WebSocket and handles process lifecycle, connection
6//! management, and common browser operations.
7//!
8//! # Architecture
9//!
10//! ```text
11//! ┌────────────────────┐     CDP/WebSocket     ┌──────────────────────┐
12//! │  BrowserSession    │ ◄──────────────────── │  lightpanda serve    │
13//! │  (async-tungstenite)│                       │  (auto-spawned)      │
14//! └────────────────────┘                       └──────────────────────┘
15//! ```
16//!
17//! # Usage modes
18//!
19//! - **Auto-spawn** (recommended): [`BrowserSession::launch()`] finds a free
20//!   port, spawns `lightpanda serve`, waits for CDP readiness, and connects.
21//!   The process is killed on [`close()`](BrowserSession::close) or [`Drop`].
22//!
23//! - **External**: [`BrowserSession::connect()`] attaches to a pre-existing
24//!   CDP endpoint (e.g. headless Chromium started by the user or systemd).
25//!
26//! # Requirements
27//!
28//! For auto-spawn mode, `lightpanda` is automatically downloaded and installed
29//! to `~/.local/bin/` if not already on `PATH`. No manual setup is needed.
30//!
31//! # Example
32//!
33//! ```rust,no_run
34//! # async fn example() -> starpod_browser::Result<()> {
35//! use starpod_browser::BrowserSession;
36//!
37//! // Auto-spawn Lightpanda and navigate
38//! let session = BrowserSession::launch().await?;
39//! let title = session.navigate("https://example.com").await?;
40//! println!("Page title: {title}");
41//!
42//! // Extract page text
43//! let text = session.extract(None).await?;
44//! println!("Page text: {text}");
45//!
46//! // Clean up
47//! session.close().await?;
48//! # Ok(())
49//! # }
50//! ```
51
52use std::collections::HashMap;
53use std::path::PathBuf;
54use std::process::Stdio;
55use std::sync::atomic::{AtomicU64, Ordering};
56use std::sync::Arc;
57use std::time::Duration;
58
59use async_tungstenite::tungstenite::Message as WsMessage;
60use futures_util::{SinkExt, StreamExt};
61use tokio::process::{Child, Command};
62use tokio::sync::{broadcast, oneshot, Mutex};
63use tracing::{debug, info};
64
65/// Timeout for individual CDP commands.
66const CDP_TIMEOUT: Duration = Duration::from_secs(30);
67
68// ---------------------------------------------------------------------------
69// Errors
70// ---------------------------------------------------------------------------
71
72/// Errors from browser operations.
73#[derive(Debug, thiserror::Error)]
74pub enum BrowserError {
75    /// No browser session is active.
76    #[error("browser not connected")]
77    NotConnected,
78
79    /// Failed to spawn the Lightpanda child process.
80    #[error("failed to spawn lightpanda: {0}")]
81    SpawnFailed(String),
82
83    /// CDP WebSocket connection failed.
84    #[error("CDP connection failed: {0}")]
85    ConnectionFailed(String),
86
87    /// Page navigation failed (invalid URL, network error, etc.).
88    #[error("navigation failed: {0}")]
89    NavigationFailed(String),
90
91    /// CSS selector matched no elements.
92    #[error("element not found: {0}")]
93    ElementNotFound(String),
94
95    /// JavaScript evaluation failed.
96    #[error("JS evaluation failed: {0}")]
97    EvalFailed(String),
98
99    /// Timed out waiting for the browser process to accept CDP connections.
100    #[error("timeout waiting for browser to start")]
101    Timeout,
102
103    /// Auto-installation of Lightpanda failed.
104    #[error("failed to install lightpanda: {0}")]
105    InstallFailed(String),
106}
107
108/// Convenience alias for `Result<T, BrowserError>`.
109pub type Result<T> = std::result::Result<T, BrowserError>;
110
111// ---------------------------------------------------------------------------
112// CdpClient — lightweight CDP-over-WebSocket client
113// ---------------------------------------------------------------------------
114
115type WsWriter = futures_util::stream::SplitSink<
116    async_tungstenite::WebSocketStream<async_tungstenite::tokio::ConnectStream>,
117    WsMessage,
118>;
119
120/// A lightweight CDP client that sends commands and routes responses by `id`.
121struct CdpClient {
122    writer: Mutex<WsWriter>,
123    next_id: AtomicU64,
124    pending: Arc<Mutex<HashMap<u64, oneshot::Sender<serde_json::Value>>>>,
125    events: broadcast::Sender<serde_json::Value>,
126    _reader_task: tokio::task::JoinHandle<()>,
127}
128
129impl CdpClient {
130    /// Connect to a CDP WebSocket endpoint.
131    async fn connect(ws_url: &str) -> Result<Self> {
132        let (ws, _) = async_tungstenite::tokio::connect_async(ws_url)
133            .await
134            .map_err(|e| BrowserError::ConnectionFailed(e.to_string()))?;
135
136        let (writer, reader) = ws.split();
137        let pending: Arc<Mutex<HashMap<u64, oneshot::Sender<serde_json::Value>>>> =
138            Arc::new(Mutex::new(HashMap::new()));
139        let (events_tx, _) = broadcast::channel(64);
140
141        let pending_clone = Arc::clone(&pending);
142        let events_clone = events_tx.clone();
143
144        let reader_task = tokio::spawn(async move {
145            let mut reader = reader;
146            while let Some(msg) = reader.next().await {
147                let text = match msg {
148                    Ok(WsMessage::Text(t)) => t.to_string(),
149                    Ok(WsMessage::Close(_)) => break,
150                    Ok(_) => continue,
151                    Err(e) => {
152                        debug!("CDP WebSocket error: {e}");
153                        break;
154                    }
155                };
156
157                let json: serde_json::Value = match serde_json::from_str(&text) {
158                    Ok(v) => v,
159                    Err(e) => {
160                        debug!("CDP parse error: {e}");
161                        continue;
162                    }
163                };
164
165                // Response (has "id" field) → route to pending sender
166                if let Some(id) = json.get("id").and_then(|v| v.as_u64()) {
167                    let mut map = pending_clone.lock().await;
168                    if let Some(tx) = map.remove(&id) {
169                        let _ = tx.send(json);
170                    }
171                } else {
172                    // Event (has "method" field) → broadcast
173                    let _ = events_clone.send(json);
174                }
175            }
176        });
177
178        Ok(Self {
179            writer: Mutex::new(writer),
180            next_id: AtomicU64::new(1),
181            pending,
182            events: events_tx,
183            _reader_task: reader_task,
184        })
185    }
186
187    /// Send a CDP command and wait for its response.
188    async fn send(
189        &self,
190        method: &str,
191        params: serde_json::Value,
192        session_id: Option<&str>,
193    ) -> Result<serde_json::Value> {
194        let id = self.next_id.fetch_add(1, Ordering::Relaxed);
195
196        let mut msg = serde_json::json!({
197            "id": id,
198            "method": method,
199            "params": params,
200        });
201        if let Some(sid) = session_id {
202            msg["sessionId"] = serde_json::Value::String(sid.to_string());
203        }
204
205        let (tx, rx) = oneshot::channel();
206        self.pending.lock().await.insert(id, tx);
207
208        let text = serde_json::to_string(&msg)
209            .map_err(|e| BrowserError::ConnectionFailed(e.to_string()))?;
210
211        self.writer
212            .lock()
213            .await
214            .send(WsMessage::Text(text.into()))
215            .await
216            .map_err(|e| BrowserError::ConnectionFailed(e.to_string()))?;
217
218        let resp = tokio::time::timeout(CDP_TIMEOUT, rx)
219            .await
220            .map_err(|_| BrowserError::Timeout)?
221            .map_err(|_| BrowserError::ConnectionFailed("response channel closed".into()))?;
222
223        // Check for CDP error
224        if let Some(err) = resp.get("error") {
225            let message = err
226                .get("message")
227                .and_then(|m| m.as_str())
228                .unwrap_or("unknown CDP error");
229            return Err(BrowserError::EvalFailed(message.to_string()));
230        }
231
232        // Return the "result" field, or the whole response if no "result"
233        Ok(resp
234            .get("result")
235            .cloned()
236            .unwrap_or(serde_json::Value::Object(serde_json::Map::new())))
237    }
238
239    /// Subscribe to CDP events.
240    fn subscribe(&self) -> broadcast::Receiver<serde_json::Value> {
241        self.events.subscribe()
242    }
243}
244
245// ---------------------------------------------------------------------------
246// BrowserSession
247// ---------------------------------------------------------------------------
248
249/// A browser automation session backed by CDP.
250///
251/// Manages an optional child process (auto-spawned Lightpanda) and a direct
252/// CDP WebSocket connection for interaction.
253///
254/// The session is designed to be held behind `Arc<tokio::sync::Mutex<Option<BrowserSession>>>`
255/// for shared access across async tool calls. All public methods take `&self`.
256///
257/// # Process lifecycle
258///
259/// When created via [`launch()`](Self::launch), the Lightpanda process is
260/// spawned with `kill_on_drop(true)` and additionally killed in the [`Drop`]
261/// impl. This ensures cleanup even if [`close()`](Self::close) is not called.
262pub struct BrowserSession {
263    /// Auto-spawned browser process (`None` if connected to external endpoint).
264    process: Option<Child>,
265    /// The CDP WebSocket client.
266    cdp: CdpClient,
267    /// The CDP session ID for the attached target.
268    session_id: String,
269}
270
271impl std::fmt::Debug for BrowserSession {
272    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
273        f.debug_struct("BrowserSession")
274            .field("auto_spawned", &self.process.is_some())
275            .finish_non_exhaustive()
276    }
277}
278
279impl BrowserSession {
280    /// Spawn a Lightpanda process and connect to it via CDP.
281    ///
282    /// 1. Finds a free TCP port by binding to `127.0.0.1:0`
283    /// 2. Starts `lightpanda serve --host 127.0.0.1 --port <port>`
284    /// 3. Polls the port until CDP accepts connections (up to 10 seconds)
285    /// 4. Connects via WebSocket and opens a blank page
286    ///
287    /// If `lightpanda` is not found on `PATH`, it is automatically downloaded
288    /// from GitHub releases and installed to `~/.local/bin/`.
289    ///
290    /// # Errors
291    ///
292    /// - [`BrowserError::InstallFailed`] if auto-installation fails
293    /// - [`BrowserError::SpawnFailed`] if `lightpanda` fails to start after installation
294    /// - [`BrowserError::Timeout`] if CDP doesn't become available within 10 seconds
295    /// - [`BrowserError::ConnectionFailed`] if WebSocket handshake fails
296    pub async fn launch() -> Result<Self> {
297        let port = find_free_port().await?;
298        let addr = format!("127.0.0.1:{port}");
299
300        // Resolve the lightpanda binary, auto-installing if needed.
301        let binary = resolve_lightpanda_binary().await?;
302
303        info!(port, binary = %binary.display(), "Spawning lightpanda");
304
305        let child = Command::new(&binary)
306            .args([
307                "serve",
308                "--host", "127.0.0.1",
309                "--port", &port.to_string(),
310                // Keep alive for up to 1 hour — the agent can take minutes
311                // between tool calls, and the default 10s timeout kills the
312                // session mid-conversation.
313                "--timeout", "3600",
314            ])
315            .stdout(Stdio::null())
316            .stderr(Stdio::null())
317            .kill_on_drop(true)
318            .spawn()
319            .map_err(|e| BrowserError::SpawnFailed(e.to_string()))?;
320
321        // Wait for CDP to become available, then discover the WebSocket URL
322        wait_for_cdp(&addr).await?;
323        let ws_url = discover_ws_url(&addr).await?;
324        Self::connect_internal(Some(child), &ws_url).await
325    }
326
327    /// Connect to an existing CDP endpoint.
328    ///
329    /// Use this when the browser is managed externally (e.g. headless Chromium
330    /// started by systemd, or a shared Lightpanda instance).
331    ///
332    /// # Arguments
333    ///
334    /// * `cdp_url` — WebSocket URL (e.g. `ws://127.0.0.1:9222/`).
335    ///
336    /// # Errors
337    ///
338    /// - [`BrowserError::ConnectionFailed`] if the endpoint is unreachable
339    pub async fn connect(cdp_url: &str) -> Result<Self> {
340        debug!(url = cdp_url, "Connecting to existing CDP endpoint");
341        Self::connect_internal(None, cdp_url).await
342    }
343
344    /// Shared connection logic for both `launch()` and `connect()`.
345    async fn connect_internal(process: Option<Child>, ws_url: &str) -> Result<Self> {
346        let cdp = CdpClient::connect(ws_url).await?;
347
348        // 1. Enable target discovery
349        cdp.send("Target.setDiscoverTargets", serde_json::json!({"discover": true}), None)
350            .await?;
351
352        // 2. Create a new target (page)
353        let result = cdp
354            .send("Target.createTarget", serde_json::json!({"url": "about:blank"}), None)
355            .await?;
356        let target_id = result["targetId"]
357            .as_str()
358            .ok_or_else(|| BrowserError::ConnectionFailed("no targetId in response".into()))?
359            .to_string();
360
361        // 3. Attach to the target to get a session ID
362        let result = cdp
363            .send(
364                "Target.attachToTarget",
365                serde_json::json!({"targetId": target_id, "flatten": true}),
366                None,
367            )
368            .await?;
369        let session_id = result["sessionId"]
370            .as_str()
371            .ok_or_else(|| BrowserError::ConnectionFailed("no sessionId in response".into()))?
372            .to_string();
373
374        // 4. Enable Page domain (needed for navigation events)
375        cdp.send("Page.enable", serde_json::json!({}), Some(&session_id))
376            .await?;
377
378        debug!(session_id = %session_id, "CDP session established");
379
380        Ok(Self {
381            process,
382            cdp,
383            session_id,
384        })
385    }
386
387    /// Navigate to a URL. Returns the page title after load.
388    ///
389    /// # Errors
390    ///
391    /// - [`BrowserError::NavigationFailed`] on invalid URL or network error
392    pub async fn navigate(&self, url: &str) -> Result<String> {
393        // Subscribe to events BEFORE sending the navigate command
394        let mut events = self.cdp.subscribe();
395
396        self.cdp
397            .send(
398                "Page.navigate",
399                serde_json::json!({"url": url}),
400                Some(&self.session_id),
401            )
402            .await
403            .map_err(|e| BrowserError::NavigationFailed(e.to_string()))?;
404
405        // Wait for Page.loadEventFired
406        let deadline = tokio::time::Instant::now() + CDP_TIMEOUT;
407        loop {
408            match tokio::time::timeout_at(deadline, events.recv()).await {
409                Ok(Ok(event)) => {
410                    if event.get("method").and_then(|m| m.as_str())
411                        == Some("Page.loadEventFired")
412                    {
413                        break;
414                    }
415                }
416                Ok(Err(broadcast::error::RecvError::Lagged(_))) => continue,
417                Ok(Err(_)) => break,
418                Err(_) => {
419                    return Err(BrowserError::NavigationFailed("page load timeout".into()));
420                }
421            }
422        }
423
424        // Get the page title
425        self.evaluate("document.title").await
426    }
427
428    /// Extract text content from the page or a specific element.
429    ///
430    /// - `selector = None` → returns `document.body.innerText` (full page text)
431    /// - `selector = Some("h1")` → returns text of the first matching element
432    ///
433    /// # Errors
434    ///
435    /// - [`BrowserError::ElementNotFound`] if the selector matches nothing
436    /// - [`BrowserError::EvalFailed`] if text extraction fails
437    pub async fn extract(&self, selector: Option<&str>) -> Result<String> {
438        match selector {
439            None => self.evaluate("document.body.textContent").await,
440            Some(sel) => {
441                let sel_json = serde_json::to_string(sel)
442                    .map_err(|e| BrowserError::EvalFailed(e.to_string()))?;
443                let js = format!(
444                    r#"(function(){{ var el = document.querySelector({sel}); if (!el) return null; return el.textContent; }})()"#,
445                    sel = sel_json,
446                );
447                let result = self.evaluate(&js).await?;
448                if result == "null" || result.is_empty() {
449                    Err(BrowserError::ElementNotFound(sel.to_string()))
450                } else {
451                    Ok(result)
452                }
453            }
454        }
455    }
456
457    /// Click an element by CSS selector.
458    ///
459    /// Dispatches a full `MouseEvent` (not just `el.click()`) so that
460    /// framework event listeners (React, Vue, etc.) see the event.
461    /// For submit buttons, also calls `form.requestSubmit()` to ensure
462    /// form submission fires correctly.
463    ///
464    /// # Errors
465    ///
466    /// - [`BrowserError::ElementNotFound`] if the selector matches nothing
467    pub async fn click(&self, selector: &str) -> Result<()> {
468        let sel_json = serde_json::to_string(selector)
469            .map_err(|e| BrowserError::EvalFailed(e.to_string()))?;
470        let js = format!(
471            r#"(function(){{
472  var el = document.querySelector({sel});
473  if (!el) throw new Error('element not found');
474  el.dispatchEvent(new MouseEvent('click', {{bubbles: true, cancelable: true, view: window}}));
475  if ((el.type === 'submit' || el.tagName === 'BUTTON') && el.form) {{
476    try {{ el.form.requestSubmit(el); }} catch(e) {{ el.form.submit(); }}
477  }}
478  return true;
479}})()"#,
480            sel = sel_json,
481        );
482        self.evaluate(&js).await.map_err(|e| {
483            if e.to_string().contains("element not found") {
484                BrowserError::ElementNotFound(selector.to_string())
485            } else {
486                e
487            }
488        })?;
489        Ok(())
490    }
491
492    /// Type text into an element identified by CSS selector.
493    ///
494    /// Uses the native HTMLInputElement value setter to bypass React's
495    /// internal value tracker, then dispatches `input` and `change` events
496    /// so both React controlled inputs and plain HTML inputs update correctly.
497    ///
498    /// # Errors
499    ///
500    /// - [`BrowserError::ElementNotFound`] if the selector matches nothing
501    pub async fn type_text(&self, selector: &str, text: &str) -> Result<()> {
502        let sel_json = serde_json::to_string(selector)
503            .map_err(|e| BrowserError::EvalFailed(e.to_string()))?;
504        let val_json = serde_json::to_string(text)
505            .map_err(|e| BrowserError::EvalFailed(e.to_string()))?;
506        let js = format!(
507            r#"(function(){{
508  var el = document.querySelector({sel});
509  if (!el) throw new Error('element not found');
510  el.focus();
511  var nativeSetter = Object.getOwnPropertyDescriptor(
512    HTMLInputElement.prototype, 'value'
513  );
514  if (nativeSetter && nativeSetter.set) {{
515    nativeSetter.set.call(el, {val});
516  }} else {{
517    el.value = {val};
518  }}
519  el.dispatchEvent(new Event('input', {{bubbles: true}}));
520  el.dispatchEvent(new Event('change', {{bubbles: true}}));
521  return true;
522}})()"#,
523            sel = sel_json,
524            val = val_json,
525        );
526        self.evaluate(&js).await.map_err(|e| {
527            if e.to_string().contains("element not found") {
528                BrowserError::ElementNotFound(selector.to_string())
529            } else {
530                e
531            }
532        })?;
533        Ok(())
534    }
535
536    /// Execute JavaScript on the page and return the result as a string.
537    ///
538    /// String values are returned as-is. Numbers, booleans, objects, and arrays
539    /// are serialized via `serde_json::Value::to_string()`.
540    ///
541    /// # Errors
542    ///
543    /// - [`BrowserError::EvalFailed`] on syntax errors or runtime exceptions
544    pub async fn evaluate(&self, js: &str) -> Result<String> {
545        // Wrap in an IIFE to isolate variable declarations between calls.
546        // Without this, consecutive evaluate() calls that declare `const` or
547        // `let` variables with the same name would fail with
548        // "Identifier has already been declared".
549        //
550        // Code that is already an IIFE `(function(){...})()` is left as-is.
551        // Simple expressions get `return` prepended so they return a value.
552        // Multi-statement code is wrapped as-is (caller must use `return`).
553        let trimmed = js.trim();
554        let wrapped = if trimmed.starts_with("(function") {
555            // Already an IIFE — don't double-wrap
556            trimmed.to_string()
557        } else if trimmed.contains(';') || trimmed.contains('\n') {
558            format!("(function(){{{trimmed}}})()")
559        } else {
560            format!("(function(){{ return {trimmed} }})()")
561        };
562        let result = self
563            .cdp
564            .send(
565                "Runtime.evaluate",
566                serde_json::json!({
567                    "expression": wrapped,
568                    "returnByValue": true,
569                }),
570                Some(&self.session_id),
571            )
572            .await
573            .map_err(|e| BrowserError::EvalFailed(e.to_string()))?;
574
575        // Check for exception
576        if let Some(exc) = result.get("exceptionDetails") {
577            // Try exception.description first (has the real message),
578            // fall back to exceptionDetails.text
579            let text = exc
580                .get("exception")
581                .and_then(|e| e.get("description"))
582                .and_then(|d| d.as_str())
583                .or_else(|| exc.get("text").and_then(|t| t.as_str()))
584                .unwrap_or("unknown error");
585            return Err(BrowserError::EvalFailed(text.to_string()));
586        }
587
588        let value = &result["result"]["value"];
589        match value {
590            serde_json::Value::String(s) => Ok(s.clone()),
591            serde_json::Value::Null => Ok(String::new()),
592            other => Ok(other.to_string()),
593        }
594    }
595
596    /// Get the current page URL.
597    pub async fn url(&self) -> Result<String> {
598        self.evaluate("window.location.href").await
599    }
600
601    /// Returns `true` if this session was auto-spawned (vs. connected to an
602    /// external endpoint).
603    pub fn is_auto_spawned(&self) -> bool {
604        self.process.is_some()
605    }
606
607    /// Close the browser session and kill the process if auto-spawned.
608    ///
609    /// This is the preferred way to end a session. If not called, the [`Drop`]
610    /// impl will still attempt to kill the child process, but cannot await
611    /// its termination.
612    pub async fn close(mut self) -> Result<()> {
613        if let Some(ref mut child) = self.process {
614            debug!("Killing auto-spawned browser process");
615            let _ = child.kill().await;
616        }
617        Ok(())
618    }
619}
620
621impl Drop for BrowserSession {
622    fn drop(&mut self) {
623        if let Some(ref mut child) = self.process {
624            // Best-effort kill on drop. This is non-async so we can only
625            // *start* the kill signal; the OS will reap the process.
626            let _ = child.start_kill();
627        }
628    }
629}
630
631// ---------------------------------------------------------------------------
632// Auto-install
633// ---------------------------------------------------------------------------
634
635/// Resolve the `lightpanda` binary path.
636///
637/// 1. Check if `lightpanda` is on `PATH` (via `which`).
638/// 2. Check the default install location (`~/.local/bin/lightpanda`).
639/// 3. If not found, download from GitHub releases and install to `~/.local/bin/`.
640async fn resolve_lightpanda_binary() -> Result<PathBuf> {
641    // 1. Already on PATH?
642    if let Ok(path) = which_lightpanda().await {
643        debug!(path = %path.display(), "Found lightpanda on PATH");
644        return Ok(path);
645    }
646
647    // 2. Check default install location
648    let install_dir = default_install_dir()?;
649    let binary_path = install_dir.join("lightpanda");
650    if binary_path.is_file() {
651        debug!(path = %binary_path.display(), "Found lightpanda in ~/.local/bin");
652        return Ok(binary_path);
653    }
654
655    // 3. Auto-install
656    info!("lightpanda not found — downloading automatically");
657    install_lightpanda(&install_dir).await?;
658    Ok(binary_path)
659}
660
661/// Try to find `lightpanda` on PATH using `which`.
662async fn which_lightpanda() -> Result<PathBuf> {
663    let output = tokio::process::Command::new("which")
664        .arg("lightpanda")
665        .output()
666        .await
667        .map_err(|e| BrowserError::SpawnFailed(e.to_string()))?;
668
669    if output.status.success() {
670        let path = String::from_utf8_lossy(&output.stdout).trim().to_string();
671        if !path.is_empty() {
672            return Ok(PathBuf::from(path));
673        }
674    }
675    Err(BrowserError::SpawnFailed("not on PATH".into()))
676}
677
678/// Returns `~/.local/bin`, creating it if it doesn't exist.
679fn default_install_dir() -> Result<PathBuf> {
680    let home = std::env::var("HOME")
681        .map_err(|_| BrowserError::InstallFailed("HOME not set".into()))?;
682    let dir = PathBuf::from(home).join(".local").join("bin");
683    Ok(dir)
684}
685
686/// Returns the platform-specific asset name for lightpanda GitHub releases.
687fn lightpanda_asset_name() -> Result<&'static str> {
688    let os = std::env::consts::OS;
689    let arch = std::env::consts::ARCH;
690
691    match (os, arch) {
692        ("macos", "aarch64") => Ok("lightpanda-aarch64-macos"),
693        ("macos", "x86_64") => Ok("lightpanda-x86_64-macos"),
694        ("linux", "aarch64") => Ok("lightpanda-aarch64-linux"),
695        ("linux", "x86_64") => Ok("lightpanda-x86_64-linux"),
696        _ => Err(BrowserError::InstallFailed(format!(
697            "unsupported platform: {os}/{arch}"
698        ))),
699    }
700}
701
702/// Download and install the lightpanda binary to `install_dir`.
703async fn install_lightpanda(install_dir: &std::path::Path) -> Result<()> {
704    let asset = lightpanda_asset_name()?;
705    let url = format!(
706        "https://github.com/lightpanda-io/browser/releases/download/nightly/{asset}"
707    );
708
709    info!(url = %url, "Downloading lightpanda");
710
711    // Download with curl (follows redirects, which GitHub requires)
712    let output = tokio::process::Command::new("curl")
713        .args(["-fsSL", "--output", "-", &url])
714        .output()
715        .await
716        .map_err(|e| BrowserError::InstallFailed(format!("curl failed: {e}")))?;
717
718    if !output.status.success() {
719        let stderr = String::from_utf8_lossy(&output.stderr);
720        return Err(BrowserError::InstallFailed(format!(
721            "download failed ({}): {stderr}",
722            output.status
723        )));
724    }
725
726    if output.stdout.is_empty() {
727        return Err(BrowserError::InstallFailed(
728            "downloaded file is empty".into(),
729        ));
730    }
731
732    // Create install directory
733    tokio::fs::create_dir_all(install_dir)
734        .await
735        .map_err(|e| {
736            BrowserError::InstallFailed(format!(
737                "cannot create {}: {e}",
738                install_dir.display()
739            ))
740        })?;
741
742    let binary_path = install_dir.join("lightpanda");
743
744    // Write binary
745    tokio::fs::write(&binary_path, &output.stdout)
746        .await
747        .map_err(|e| BrowserError::InstallFailed(format!("cannot write binary: {e}")))?;
748
749    // Make executable
750    #[cfg(unix)]
751    {
752        use std::os::unix::fs::PermissionsExt;
753        let perms = std::fs::Permissions::from_mode(0o755);
754        tokio::fs::set_permissions(&binary_path, perms)
755            .await
756            .map_err(|e| BrowserError::InstallFailed(format!("chmod failed: {e}")))?;
757    }
758
759    info!(path = %binary_path.display(), "lightpanda installed successfully");
760    Ok(())
761}
762
763// ---------------------------------------------------------------------------
764// Helpers
765// ---------------------------------------------------------------------------
766
767/// Find a free TCP port by binding to port 0 and immediately releasing.
768async fn find_free_port() -> Result<u16> {
769    let listener = tokio::net::TcpListener::bind("127.0.0.1:0")
770        .await
771        .map_err(|e| BrowserError::SpawnFailed(format!("cannot bind: {e}")))?;
772    let port = listener
773        .local_addr()
774        .map_err(|e| BrowserError::SpawnFailed(format!("cannot get addr: {e}")))?
775        .port();
776    drop(listener);
777    Ok(port)
778}
779
780/// Fetch the WebSocket debugger URL from the CDP `/json/version` endpoint.
781///
782/// Falls back to `ws://{addr}/` if the endpoint is unavailable.
783async fn discover_ws_url(addr: &str) -> Result<String> {
784    let url = format!("http://{addr}/json/version");
785
786    let output = tokio::process::Command::new("curl")
787        .args(["-sf", "--max-time", "5", &url])
788        .output()
789        .await
790        .map_err(|e| BrowserError::ConnectionFailed(format!("curl /json/version: {e}")))?;
791
792    if output.status.success() {
793        if let Ok(json) = serde_json::from_slice::<serde_json::Value>(&output.stdout) {
794            if let Some(ws) = json.get("webSocketDebuggerUrl").and_then(|v| v.as_str()) {
795                debug!(ws_url = %ws, "Discovered WebSocket URL");
796                return Ok(ws.to_string());
797            }
798        }
799    }
800
801    // Fallback — append trailing slash (lightpanda requires it)
802    let fallback = format!("ws://{addr}/");
803    debug!(ws_url = %fallback, "Using fallback WebSocket URL");
804    Ok(fallback)
805}
806
807/// Wait for a TCP endpoint to accept connections, polling every 100ms.
808///
809/// Times out after 10 seconds with [`BrowserError::Timeout`].
810async fn wait_for_cdp(addr: &str) -> Result<()> {
811    let deadline = tokio::time::Instant::now() + Duration::from_secs(10);
812
813    loop {
814        if tokio::time::Instant::now() > deadline {
815            return Err(BrowserError::Timeout);
816        }
817
818        match tokio::net::TcpStream::connect(addr).await {
819            Ok(_) => {
820                debug!(addr, "CDP endpoint is ready");
821                return Ok(());
822            }
823            Err(_) => {
824                tokio::time::sleep(Duration::from_millis(100)).await;
825            }
826        }
827    }
828}
829
830// ---------------------------------------------------------------------------
831// Tests
832// ---------------------------------------------------------------------------
833
834#[cfg(test)]
835mod tests {
836    use super::*;
837
838    // -- Unit tests (no browser required) --
839
840    #[tokio::test]
841    async fn find_free_port_returns_nonzero() {
842        let port = find_free_port().await.unwrap();
843        assert!(port > 0, "port should be nonzero, got {port}");
844    }
845
846    #[tokio::test]
847    async fn find_free_port_returns_different_ports() {
848        let p1 = find_free_port().await.unwrap();
849        let p2 = find_free_port().await.unwrap();
850        // Not guaranteed to differ, but practically always will
851        // Just verify both are valid
852        assert!(p1 > 0);
853        assert!(p2 > 0);
854    }
855
856    #[tokio::test]
857    async fn wait_for_cdp_succeeds_when_listener_exists() {
858        // Start a TCP listener, then verify wait_for_cdp connects to it
859        let listener = tokio::net::TcpListener::bind("127.0.0.1:0")
860            .await
861            .unwrap();
862        let addr = listener.local_addr().unwrap().to_string();
863
864        // Should succeed immediately since the port is already listening
865        let result = wait_for_cdp(&addr).await;
866        assert!(result.is_ok(), "should connect to existing listener");
867    }
868
869    #[tokio::test]
870    async fn wait_for_cdp_times_out_on_closed_port() {
871        // Find a port and immediately close it
872        let port = find_free_port().await.unwrap();
873        let addr = format!("127.0.0.1:{port}");
874
875        // Override the timeout for faster testing — but wait_for_cdp uses 10s
876        // internally, so we test with a small helper instead
877        let start = tokio::time::Instant::now();
878        let deadline = start + Duration::from_millis(500);
879
880        let result = tokio::time::timeout(Duration::from_millis(500), wait_for_cdp(&addr)).await;
881
882        // Should time out (either our timeout or the internal 10s one)
883        assert!(
884            result.is_err() || result.unwrap().is_err(),
885            "should fail on closed port"
886        );
887        assert!(
888            start.elapsed() <= deadline.elapsed() + Duration::from_millis(600),
889            "should not hang"
890        );
891    }
892
893    #[tokio::test]
894    async fn wait_for_cdp_succeeds_when_listener_starts_late() {
895        let port = find_free_port().await.unwrap();
896        let addr_str = format!("127.0.0.1:{port}");
897        let addr_clone = addr_str.clone();
898
899        // Start a listener after a 200ms delay
900        let _listener_handle = tokio::spawn(async move {
901            tokio::time::sleep(Duration::from_millis(200)).await;
902            tokio::net::TcpListener::bind(&addr_clone).await.unwrap()
903            // Keep the listener alive by returning it (held by JoinHandle)
904        });
905
906        // wait_for_cdp should poll until it connects
907        let result = wait_for_cdp(&addr_str).await;
908        assert!(result.is_ok(), "should connect after delayed start");
909    }
910
911    #[test]
912    fn error_display_messages() {
913        let err = BrowserError::SpawnFailed("not found".into());
914        assert_eq!(err.to_string(), "failed to spawn lightpanda: not found");
915
916        let err = BrowserError::ElementNotFound("div.missing".into());
917        assert_eq!(err.to_string(), "element not found: div.missing");
918
919        let err = BrowserError::Timeout;
920        assert_eq!(err.to_string(), "timeout waiting for browser to start");
921
922        let err = BrowserError::NotConnected;
923        assert_eq!(err.to_string(), "browser not connected");
924
925        let err = BrowserError::ConnectionFailed("refused".into());
926        assert_eq!(err.to_string(), "CDP connection failed: refused");
927
928        let err = BrowserError::NavigationFailed("404".into());
929        assert_eq!(err.to_string(), "navigation failed: 404");
930
931        let err = BrowserError::EvalFailed("syntax error".into());
932        assert_eq!(err.to_string(), "JS evaluation failed: syntax error");
933    }
934
935    #[test]
936    fn lightpanda_asset_name_returns_valid_name() {
937        // Should succeed on any supported CI/dev platform
938        let name = lightpanda_asset_name().unwrap();
939        assert!(
940            name.starts_with("lightpanda-"),
941            "asset name should start with 'lightpanda-', got: {name}"
942        );
943    }
944
945    #[test]
946    fn install_failed_error_display() {
947        let err = BrowserError::InstallFailed("no curl".into());
948        assert_eq!(err.to_string(), "failed to install lightpanda: no curl");
949    }
950
951    #[test]
952    fn default_install_dir_is_under_home() {
953        let dir = default_install_dir().unwrap();
954        assert!(
955            dir.ends_with(".local/bin"),
956            "install dir should end with .local/bin, got: {}",
957            dir.display()
958        );
959    }
960
961    #[tokio::test]
962    async fn connect_fails_on_bad_endpoint() {
963        let port = find_free_port().await.unwrap();
964        let result = BrowserSession::connect(&format!("ws://127.0.0.1:{port}")).await;
965
966        assert!(result.is_err(), "should fail on unreachable endpoint");
967        let err = result.unwrap_err();
968        assert!(
969            matches!(err, BrowserError::ConnectionFailed(_)),
970            "expected ConnectionFailed, got: {err}"
971        );
972    }
973
974    // -- Integration tests (require a running CDP browser) --
975    //
976    // These tests are gated behind the BROWSER_CDP_URL env var.
977    // To run them:
978    //
979    //   # Start Lightpanda
980    //   lightpanda serve &
981    //
982    //   # Run integration tests
983    //   BROWSER_CDP_URL=ws://127.0.0.1:9222/ cargo test -p starpod-browser -- --ignored
984    //
985
986    fn cdp_url() -> Option<String> {
987        std::env::var("BROWSER_CDP_URL").ok()
988    }
989
990    #[tokio::test]
991    #[ignore = "requires running CDP browser (set BROWSER_CDP_URL)"]
992    async fn integration_connect_and_navigate() {
993        let url = cdp_url().expect("BROWSER_CDP_URL not set");
994        let session = BrowserSession::connect(&url).await.unwrap();
995        assert!(!session.is_auto_spawned());
996
997        let title = session.navigate("https://example.com").await.unwrap();
998        assert!(
999            !title.is_empty(),
1000            "title should not be empty after navigating to example.com"
1001        );
1002
1003        session.close().await.unwrap();
1004    }
1005
1006    #[tokio::test]
1007    #[ignore = "requires running CDP browser (set BROWSER_CDP_URL)"]
1008    async fn integration_extract_page_text() {
1009        let url = cdp_url().expect("BROWSER_CDP_URL not set");
1010        let session = BrowserSession::connect(&url).await.unwrap();
1011
1012        session.navigate("https://example.com").await.unwrap();
1013
1014        // Full page text
1015        let text = session.extract(None).await.unwrap();
1016        assert!(
1017            text.contains("Example Domain"),
1018            "page text should contain 'Example Domain', got: {text}"
1019        );
1020
1021        // Specific element
1022        let h1 = session.extract(Some("h1")).await.unwrap();
1023        assert_eq!(h1.trim(), "Example Domain");
1024
1025        session.close().await.unwrap();
1026    }
1027
1028    #[tokio::test]
1029    #[ignore = "requires running CDP browser (set BROWSER_CDP_URL)"]
1030    async fn integration_evaluate_javascript() {
1031        let url = cdp_url().expect("BROWSER_CDP_URL not set");
1032        let session = BrowserSession::connect(&url).await.unwrap();
1033
1034        session.navigate("https://example.com").await.unwrap();
1035
1036        // String result
1037        let title = session.evaluate("document.title").await.unwrap();
1038        assert!(!title.is_empty());
1039
1040        // Numeric result (serialized as string)
1041        let sum = session.evaluate("1 + 2").await.unwrap();
1042        assert_eq!(sum, "3");
1043
1044        // Boolean result
1045        let t = session.evaluate("true").await.unwrap();
1046        assert_eq!(t, "true");
1047
1048        session.close().await.unwrap();
1049    }
1050
1051    #[tokio::test]
1052    #[ignore = "requires running CDP browser (set BROWSER_CDP_URL)"]
1053    async fn integration_click_element() {
1054        let url = cdp_url().expect("BROWSER_CDP_URL not set");
1055        let session = BrowserSession::connect(&url).await.unwrap();
1056
1057        session.navigate("https://example.com").await.unwrap();
1058
1059        // example.com has a link — clicking it should work (or at least not error)
1060        let result = session.click("a").await;
1061        assert!(result.is_ok(), "clicking a link should succeed");
1062
1063        session.close().await.unwrap();
1064    }
1065
1066    #[tokio::test]
1067    #[ignore = "requires running CDP browser (set BROWSER_CDP_URL)"]
1068    async fn integration_element_not_found() {
1069        let url = cdp_url().expect("BROWSER_CDP_URL not set");
1070        let session = BrowserSession::connect(&url).await.unwrap();
1071
1072        session.navigate("https://example.com").await.unwrap();
1073
1074        let result = session.click("div.nonexistent-class-12345").await;
1075        assert!(result.is_err());
1076        assert!(
1077            matches!(result.unwrap_err(), BrowserError::ElementNotFound(_)),
1078            "should return ElementNotFound for missing selector"
1079        );
1080
1081        session.close().await.unwrap();
1082    }
1083
1084    #[tokio::test]
1085    #[ignore = "requires running CDP browser (set BROWSER_CDP_URL)"]
1086    async fn integration_get_url() {
1087        let url = cdp_url().expect("BROWSER_CDP_URL not set");
1088        let session = BrowserSession::connect(&url).await.unwrap();
1089
1090        session.navigate("https://example.com").await.unwrap();
1091        let page_url = session.url().await.unwrap();
1092        assert!(
1093            page_url.contains("example.com"),
1094            "URL should contain example.com, got: {page_url}"
1095        );
1096
1097        session.close().await.unwrap();
1098    }
1099
1100    #[tokio::test]
1101    #[ignore = "requires lightpanda binary on PATH"]
1102    async fn integration_launch_and_close() {
1103        let session = BrowserSession::launch().await.unwrap();
1104        assert!(session.is_auto_spawned());
1105
1106        let title = session.navigate("https://example.com").await.unwrap();
1107        assert!(!title.is_empty());
1108
1109        session.close().await.unwrap();
1110    }
1111}