Skip to main content

punch_types/
browser.rs

1//! Browser automation types — ring-side view into the web.
2//!
3//! This module defines the types, traits, and pool management for CDP-based
4//! browser automation. Agents use these scouting moves to navigate web pages,
5//! take screenshots, click elements, and extract content. The actual CDP
6//! WebSocket driver is plugged in separately — this module provides the
7//! contract and the session arena.
8
9use async_trait::async_trait;
10use chrono::{DateTime, Utc};
11use dashmap::DashMap;
12use serde::{Deserialize, Serialize};
13use uuid::Uuid;
14
15use crate::{PunchError, PunchResult};
16
17// ---------------------------------------------------------------------------
18// Configuration
19// ---------------------------------------------------------------------------
20
21/// Configuration for launching a browser instance.
22///
23/// Sensible defaults let a fighter step into the ring without fuss —
24/// headless Chrome on port 9222, 30-second timeout, standard viewport.
25#[derive(Debug, Clone, Serialize, Deserialize)]
26pub struct BrowserConfig {
27    /// Path to the Chrome/Chromium binary. `None` means auto-detect.
28    pub chrome_path: Option<String>,
29    /// Run headless (no visible window). Default: `true`.
30    pub headless: bool,
31    /// Remote debugging port for CDP. Default: `9222`.
32    pub remote_debugging_port: u16,
33    /// Custom user-data directory. `None` uses a temp directory.
34    pub user_data_dir: Option<String>,
35    /// Per-action timeout in seconds. Default: `30`.
36    pub timeout_secs: u64,
37    /// Viewport width in pixels. Default: `1280`.
38    pub viewport_width: u32,
39    /// Viewport height in pixels. Default: `720`.
40    pub viewport_height: u32,
41}
42
43impl Default for BrowserConfig {
44    fn default() -> Self {
45        Self {
46            chrome_path: None,
47            headless: true,
48            remote_debugging_port: 9222,
49            user_data_dir: None,
50            timeout_secs: 30,
51            viewport_width: 1280,
52            viewport_height: 720,
53        }
54    }
55}
56
57// ---------------------------------------------------------------------------
58// Session & State
59// ---------------------------------------------------------------------------
60
61/// The current state of a browser session in the ring.
62#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
63#[serde(rename_all = "snake_case", tag = "status", content = "detail")]
64pub enum BrowserState {
65    /// Browser process is starting up — warming up before the bout.
66    Starting,
67    /// Connected to CDP endpoint — ready to receive orders.
68    Connected,
69    /// A navigation is in progress — fighter is on the move.
70    Navigating,
71    /// Page loaded and ready for interaction — stance is set.
72    Ready,
73    /// An error occurred — fighter took a hit.
74    Error(String),
75    /// Session has been closed — bout is over.
76    Closed,
77}
78
79/// A live browser session — one fighter's ring-side view into the web.
80#[derive(Debug, Clone, Serialize, Deserialize)]
81pub struct BrowserSession {
82    /// Unique session identifier.
83    pub id: Uuid,
84    /// When this session was created.
85    pub created_at: DateTime<Utc>,
86    /// The URL currently loaded, if any.
87    pub current_url: Option<String>,
88    /// The page title, if available.
89    pub page_title: Option<String>,
90    /// Current session state.
91    pub state: BrowserState,
92}
93
94impl BrowserSession {
95    /// Create a new session in the `Starting` state.
96    pub fn new() -> Self {
97        Self {
98            id: Uuid::new_v4(),
99            created_at: Utc::now(),
100            current_url: None,
101            page_title: None,
102            state: BrowserState::Starting,
103        }
104    }
105}
106
107impl Default for BrowserSession {
108    fn default() -> Self {
109        Self::new()
110    }
111}
112
113// ---------------------------------------------------------------------------
114// Actions & Results
115// ---------------------------------------------------------------------------
116
117/// An action the agent wants to perform in the browser — a scouting move.
118#[derive(Debug, Clone, Serialize, Deserialize)]
119#[serde(rename_all = "snake_case", tag = "action")]
120pub enum BrowserAction {
121    /// Navigate to a URL.
122    Navigate { url: String },
123    /// Click an element matching the CSS selector.
124    Click { selector: String },
125    /// Type text into an element matching the CSS selector.
126    Type { selector: String, text: String },
127    /// Take a screenshot (full page or viewport only).
128    Screenshot { full_page: bool },
129    /// Get the text content of an element (or the whole page).
130    GetContent { selector: Option<String> },
131    /// Get the HTML of an element (or the whole page).
132    GetHtml { selector: Option<String> },
133    /// Wait for an element matching the selector to appear.
134    WaitForSelector { selector: String, timeout_ms: u64 },
135    /// Execute arbitrary JavaScript in the page context.
136    Evaluate { javascript: String },
137    /// Navigate back in history.
138    GoBack,
139    /// Navigate forward in history.
140    GoForward,
141    /// Reload the current page.
142    Reload,
143    /// Close the browser session.
144    Close,
145}
146
147/// The result of executing a browser action.
148#[derive(Debug, Clone, Serialize, Deserialize)]
149pub struct BrowserResult {
150    /// Whether the action succeeded.
151    pub success: bool,
152    /// Result data — screenshot base64, extracted text, JS return value, etc.
153    pub data: serde_json::Value,
154    /// The page URL after the action completed.
155    pub page_url: Option<String>,
156    /// The page title after the action completed.
157    pub page_title: Option<String>,
158    /// How long the action took in milliseconds.
159    pub duration_ms: u64,
160    /// Error message if the action failed.
161    pub error: Option<String>,
162}
163
164impl BrowserResult {
165    /// Construct a successful result with the given data.
166    pub fn ok(data: serde_json::Value) -> Self {
167        Self {
168            success: true,
169            data,
170            page_url: None,
171            page_title: None,
172            duration_ms: 0,
173            error: None,
174        }
175    }
176
177    /// Construct a failed result with an error message.
178    pub fn fail(message: impl Into<String>) -> Self {
179        Self {
180            success: false,
181            data: serde_json::Value::Null,
182            page_url: None,
183            page_title: None,
184            duration_ms: 0,
185            error: Some(message.into()),
186        }
187    }
188}
189
190// ---------------------------------------------------------------------------
191// Driver trait
192// ---------------------------------------------------------------------------
193
194/// Trait for a browser automation driver — the engine behind the punches.
195///
196/// Implementations handle the actual CDP WebSocket communication. This trait
197/// defines the contract so drivers can be swapped or mocked in tests.
198#[async_trait]
199pub trait BrowserDriver: Send + Sync {
200    /// Launch a browser instance and return a connected session.
201    async fn launch(&self, config: &BrowserConfig) -> PunchResult<BrowserSession>;
202
203    /// Execute a browser action within the given session.
204    async fn execute(
205        &self,
206        session: &mut BrowserSession,
207        action: BrowserAction,
208    ) -> PunchResult<BrowserResult>;
209
210    /// Close the browser session and clean up resources.
211    async fn close(&self, session: &mut BrowserSession) -> PunchResult<()>;
212}
213
214// ---------------------------------------------------------------------------
215// Session Pool
216// ---------------------------------------------------------------------------
217
218/// A pool of browser sessions — the roster of active ring-side scouts.
219///
220/// Manages concurrent browser sessions with an upper bound, backed by
221/// `DashMap` for lock-free concurrent access.
222pub struct BrowserPool {
223    /// Active sessions keyed by their UUID.
224    sessions: DashMap<Uuid, BrowserSession>,
225    /// Configuration applied to new sessions.
226    config: BrowserConfig,
227    /// Maximum number of concurrent sessions.
228    max_sessions: usize,
229}
230
231impl BrowserPool {
232    /// Create a new browser pool with the given config and session limit.
233    pub fn new(config: BrowserConfig, max_sessions: usize) -> Self {
234        Self {
235            sessions: DashMap::new(),
236            config,
237            max_sessions,
238        }
239    }
240
241    /// Retrieve a clone of a session by its ID.
242    pub fn get_session(&self, id: &Uuid) -> Option<BrowserSession> {
243        self.sessions.get(id).map(|entry| entry.value().clone())
244    }
245
246    /// List all active sessions.
247    pub fn active_sessions(&self) -> Vec<BrowserSession> {
248        self.sessions
249            .iter()
250            .map(|entry| entry.value().clone())
251            .collect()
252    }
253
254    /// Return the number of active sessions.
255    pub fn session_count(&self) -> usize {
256        self.sessions.len()
257    }
258
259    /// Create a new session and add it to the pool.
260    ///
261    /// Returns an error if the pool is at capacity — no room in the ring.
262    pub fn create_session(&self) -> PunchResult<BrowserSession> {
263        if self.sessions.len() >= self.max_sessions {
264            return Err(PunchError::Tool {
265                tool: "browser".into(),
266                message: format!(
267                    "browser pool at capacity ({}/{})",
268                    self.sessions.len(),
269                    self.max_sessions
270                ),
271            });
272        }
273
274        let session = BrowserSession::new();
275        self.sessions.insert(session.id, session.clone());
276        Ok(session)
277    }
278
279    /// Close and remove a session from the pool.
280    pub fn close_session(&self, id: &Uuid) -> PunchResult<()> {
281        self.sessions.remove(id).ok_or_else(|| PunchError::Tool {
282            tool: "browser".into(),
283            message: format!("session {} not found in pool", id),
284        })?;
285        Ok(())
286    }
287
288    /// Close all sessions — clear the ring.
289    pub fn close_all(&self) {
290        self.sessions.clear();
291    }
292
293    /// Get a reference to the pool's browser configuration.
294    pub fn config(&self) -> &BrowserConfig {
295        &self.config
296    }
297}
298
299// ---------------------------------------------------------------------------
300// Tests
301// ---------------------------------------------------------------------------
302
303#[cfg(test)]
304mod tests {
305    use super::*;
306
307    #[test]
308    fn test_browser_config_defaults() {
309        let config = BrowserConfig::default();
310        assert!(config.headless);
311        assert_eq!(config.remote_debugging_port, 9222);
312        assert!(config.chrome_path.is_none());
313        assert!(config.user_data_dir.is_none());
314        assert_eq!(config.timeout_secs, 30);
315        assert_eq!(config.viewport_width, 1280);
316        assert_eq!(config.viewport_height, 720);
317    }
318
319    #[test]
320    fn test_browser_session_creation() {
321        let session = BrowserSession::new();
322        assert_eq!(session.state, BrowserState::Starting);
323        assert!(session.current_url.is_none());
324        assert!(session.page_title.is_none());
325    }
326
327    #[test]
328    fn test_browser_pool_create_session() {
329        let pool = BrowserPool::new(BrowserConfig::default(), 5);
330        assert_eq!(pool.session_count(), 0);
331
332        let session = pool.create_session().expect("should create session");
333        assert_eq!(pool.session_count(), 1);
334
335        let retrieved = pool.get_session(&session.id);
336        assert!(retrieved.is_some());
337        assert_eq!(retrieved.expect("should exist").id, session.id);
338    }
339
340    #[test]
341    fn test_browser_pool_max_sessions_enforced() {
342        let pool = BrowserPool::new(BrowserConfig::default(), 2);
343
344        pool.create_session().expect("session 1");
345        pool.create_session().expect("session 2");
346
347        let result = pool.create_session();
348        assert!(result.is_err());
349        let err = result.unwrap_err().to_string();
350        assert!(err.contains("at capacity"), "error: {}", err);
351    }
352
353    #[test]
354    fn test_browser_pool_close_session() {
355        let pool = BrowserPool::new(BrowserConfig::default(), 5);
356        let session = pool.create_session().expect("should create session");
357        assert_eq!(pool.session_count(), 1);
358
359        pool.close_session(&session.id)
360            .expect("should close session");
361        assert_eq!(pool.session_count(), 0);
362
363        // Closing again should fail — fighter already left the ring.
364        let result = pool.close_session(&session.id);
365        assert!(result.is_err());
366    }
367
368    #[test]
369    fn test_browser_pool_close_all() {
370        let pool = BrowserPool::new(BrowserConfig::default(), 10);
371        for _ in 0..5 {
372            pool.create_session().expect("should create session");
373        }
374        assert_eq!(pool.session_count(), 5);
375
376        pool.close_all();
377        assert_eq!(pool.session_count(), 0);
378    }
379
380    #[test]
381    fn test_browser_action_serialization() {
382        let action = BrowserAction::Navigate {
383            url: "https://example.com".into(),
384        };
385        let json = serde_json::to_string(&action).expect("should serialize");
386        assert!(json.contains("navigate"));
387        assert!(json.contains("https://example.com"));
388
389        let deserialized: BrowserAction = serde_json::from_str(&json).expect("should deserialize");
390        match deserialized {
391            BrowserAction::Navigate { url } => assert_eq!(url, "https://example.com"),
392            _ => panic!("expected Navigate variant"),
393        }
394    }
395
396    #[test]
397    fn test_browser_result_construction() {
398        let ok_result = BrowserResult::ok(serde_json::json!({"html": "<h1>Hello</h1>"}));
399        assert!(ok_result.success);
400        assert!(ok_result.error.is_none());
401        assert_eq!(ok_result.data["html"], "<h1>Hello</h1>");
402
403        let fail_result = BrowserResult::fail("element not found");
404        assert!(!fail_result.success);
405        assert_eq!(fail_result.error.as_deref(), Some("element not found"));
406        assert_eq!(fail_result.data, serde_json::Value::Null);
407    }
408
409    #[test]
410    fn test_browser_state_transitions() {
411        // Verify state variants can be constructed and compared.
412        let states = vec![
413            BrowserState::Starting,
414            BrowserState::Connected,
415            BrowserState::Navigating,
416            BrowserState::Ready,
417            BrowserState::Error("timeout".into()),
418            BrowserState::Closed,
419        ];
420
421        // Each state is distinct.
422        for (i, a) in states.iter().enumerate() {
423            for (j, b) in states.iter().enumerate() {
424                if i == j {
425                    assert_eq!(a, b);
426                } else {
427                    assert_ne!(a, b);
428                }
429            }
430        }
431
432        // Error states with different messages are distinct.
433        let err1 = BrowserState::Error("timeout".into());
434        let err2 = BrowserState::Error("crash".into());
435        assert_ne!(err1, err2);
436    }
437
438    #[test]
439    fn test_browser_config_serialization_roundtrip() {
440        let config = BrowserConfig {
441            chrome_path: Some("/usr/bin/chromium".into()),
442            headless: false,
443            remote_debugging_port: 9333,
444            user_data_dir: Some("/tmp/chrome-data".into()),
445            timeout_secs: 60,
446            viewport_width: 1920,
447            viewport_height: 1080,
448        };
449
450        let json = serde_json::to_string(&config).expect("should serialize config");
451        let deserialized: BrowserConfig =
452            serde_json::from_str(&json).expect("should deserialize config");
453
454        assert_eq!(
455            deserialized.chrome_path.as_deref(),
456            Some("/usr/bin/chromium")
457        );
458        assert!(!deserialized.headless);
459        assert_eq!(deserialized.remote_debugging_port, 9333);
460        assert_eq!(
461            deserialized.user_data_dir.as_deref(),
462            Some("/tmp/chrome-data")
463        );
464        assert_eq!(deserialized.timeout_secs, 60);
465        assert_eq!(deserialized.viewport_width, 1920);
466        assert_eq!(deserialized.viewport_height, 1080);
467    }
468
469    #[test]
470    fn test_browser_pool_active_sessions() {
471        let pool = BrowserPool::new(BrowserConfig::default(), 5);
472        let s1 = pool.create_session().expect("session 1");
473        let s2 = pool.create_session().expect("session 2");
474
475        let active = pool.active_sessions();
476        assert_eq!(active.len(), 2);
477
478        let ids: Vec<Uuid> = active.iter().map(|s| s.id).collect();
479        assert!(ids.contains(&s1.id));
480        assert!(ids.contains(&s2.id));
481    }
482
483    #[test]
484    fn test_browser_session_default() {
485        let session = BrowserSession::default();
486        assert_eq!(session.state, BrowserState::Starting);
487    }
488}