halldyll_core/render/
browser.rs

1//! Browser - Headless browser abstraction for JS rendering
2
3use std::time::Duration;
4use url::Url;
5
6use crate::types::error::{Error, Result};
7
8/// Browser response after rendering
9#[derive(Debug, Clone)]
10pub struct BrowserResponse {
11    /// Final URL after redirects
12    pub final_url: Url,
13    /// Rendered HTML content
14    pub html: String,
15    /// Page title
16    pub title: Option<String>,
17    /// Console logs from the page
18    pub console_logs: Vec<ConsoleMessage>,
19    /// Network requests made during rendering
20    pub network_requests: Vec<NetworkRequest>,
21    /// Render duration
22    pub render_time_ms: u64,
23    /// Screenshot (PNG bytes) if requested
24    pub screenshot: Option<Vec<u8>>,
25}
26
27/// Console message from the browser
28#[derive(Debug, Clone)]
29pub struct ConsoleMessage {
30    /// Message level (log, warn, error)
31    pub level: ConsoleLevel,
32    /// Message text
33    pub text: String,
34}
35
36/// Console message level
37#[derive(Debug, Clone, PartialEq, Eq)]
38pub enum ConsoleLevel {
39    /// Log level
40    Log,
41    /// Warning level
42    Warn,
43    /// Error level
44    Error,
45    /// Debug level
46    Debug,
47}
48
49/// Network request captured during rendering
50#[derive(Debug, Clone)]
51pub struct NetworkRequest {
52    /// Request URL
53    pub url: String,
54    /// HTTP method
55    pub method: String,
56    /// Resource type (document, script, image, etc.)
57    pub resource_type: ResourceType,
58    /// Response status code (if completed)
59    pub status: Option<u16>,
60}
61
62/// Resource type for network requests
63#[derive(Debug, Clone, PartialEq, Eq)]
64pub enum ResourceType {
65    /// HTML document
66    Document,
67    /// JavaScript
68    Script,
69    /// Stylesheet
70    Stylesheet,
71    /// Image
72    Image,
73    /// Font
74    Font,
75    /// XHR/Fetch request
76    Xhr,
77    /// WebSocket
78    WebSocket,
79    /// Other resource
80    Other,
81}
82
83/// Browser render options
84#[derive(Debug, Clone)]
85pub struct RenderOptions {
86    /// Timeout for page load
87    pub timeout: Duration,
88    /// Wait for network idle
89    pub wait_for_network_idle: bool,
90    /// Network idle timeout (ms with no requests)
91    pub network_idle_timeout_ms: u64,
92    /// Wait for a specific selector
93    pub wait_for_selector: Option<String>,
94    /// Execute JavaScript before extraction
95    pub execute_script: Option<String>,
96    /// Capture screenshot
97    pub capture_screenshot: bool,
98    /// Viewport width
99    pub viewport_width: u32,
100    /// Viewport height
101    pub viewport_height: u32,
102    /// User agent override
103    pub user_agent: Option<String>,
104    /// Block resource types
105    pub block_resources: Vec<ResourceType>,
106    /// Extra HTTP headers
107    pub extra_headers: Vec<(String, String)>,
108}
109
110impl Default for RenderOptions {
111    fn default() -> Self {
112        Self {
113            timeout: Duration::from_secs(30),
114            wait_for_network_idle: true,
115            network_idle_timeout_ms: 500,
116            wait_for_selector: None,
117            execute_script: None,
118            capture_screenshot: false,
119            viewport_width: 1920,
120            viewport_height: 1080,
121            user_agent: None,
122            block_resources: vec![ResourceType::Image, ResourceType::Font],
123            extra_headers: Vec::new(),
124        }
125    }
126}
127
128/// Browser backend trait for abstraction
129#[allow(async_fn_in_trait)]
130pub trait BrowserBackend: Send + Sync {
131    /// Render a URL and return the result
132    async fn render(&self, url: &Url, options: &RenderOptions) -> Result<BrowserResponse>;
133    
134    /// Check if the browser is healthy
135    async fn health_check(&self) -> Result<()>;
136    
137    /// Close the browser
138    async fn close(&self) -> Result<()>;
139}
140
141/// Headless browser pool for concurrent rendering
142pub struct BrowserPool {
143    /// Backend type
144    backend_type: BrowserBackendType,
145    /// Maximum concurrent browsers
146    max_concurrent: usize,
147    /// Default render options
148    default_options: RenderOptions,
149    /// Current active count
150    active_count: std::sync::atomic::AtomicUsize,
151}
152
153/// Supported browser backends
154#[derive(Debug, Clone, PartialEq, Eq)]
155pub enum BrowserBackendType {
156    /// Chrome DevTools Protocol (headless Chrome)
157    ChromeCdp,
158    /// Playwright
159    Playwright,
160    /// Puppeteer-like (via external process)
161    Puppeteer,
162    /// No browser (stub for testing)
163    None,
164}
165
166impl Default for BrowserPool {
167    fn default() -> Self {
168        Self::new(BrowserBackendType::None, 4)
169    }
170}
171
172impl BrowserPool {
173    /// Create a new browser pool
174    pub fn new(backend_type: BrowserBackendType, max_concurrent: usize) -> Self {
175        Self {
176            backend_type,
177            max_concurrent,
178            default_options: RenderOptions::default(),
179            active_count: std::sync::atomic::AtomicUsize::new(0),
180        }
181    }
182
183    /// Configure default options
184    pub fn with_options(mut self, options: RenderOptions) -> Self {
185        self.default_options = options;
186        self
187    }
188
189    /// Get the backend type
190    pub fn backend_type(&self) -> &BrowserBackendType {
191        &self.backend_type
192    }
193
194    /// Get max concurrent browsers
195    pub fn max_concurrent(&self) -> usize {
196        self.max_concurrent
197    }
198
199    /// Check if a browser slot is available
200    pub fn has_available_slot(&self) -> bool {
201        self.active_count.load(std::sync::atomic::Ordering::Relaxed) < self.max_concurrent
202    }
203
204    /// Acquire a browser slot
205    pub fn acquire(&self) -> Option<BrowserSlot<'_>> {
206        let current = self.active_count.fetch_add(1, std::sync::atomic::Ordering::SeqCst);
207        if current >= self.max_concurrent {
208            self.active_count.fetch_sub(1, std::sync::atomic::Ordering::SeqCst);
209            None
210        } else {
211            Some(BrowserSlot { pool: self })
212        }
213    }
214
215    /// Render a URL using the pool
216    pub async fn render(&self, url: &Url, options: Option<&RenderOptions>) -> Result<BrowserResponse> {
217        let _slot = self.acquire().ok_or_else(|| {
218            Error::Config("No browser slots available".to_string())
219        })?;
220
221        let opts = options.unwrap_or(&self.default_options);
222
223        match self.backend_type {
224            BrowserBackendType::None => {
225                // Stub implementation - return error indicating browser needed
226                Err(Error::Config("No browser backend configured".to_string()))
227            }
228            BrowserBackendType::ChromeCdp => {
229                self.render_chrome_cdp(url, opts).await
230            }
231            BrowserBackendType::Playwright => {
232                self.render_playwright(url, opts).await
233            }
234            BrowserBackendType::Puppeteer => {
235                self.render_puppeteer(url, opts).await
236            }
237        }
238    }
239
240    /// Chrome CDP implementation
241    async fn render_chrome_cdp(&self, url: &Url, options: &RenderOptions) -> Result<BrowserResponse> {
242        // TODO: Implement Chrome DevTools Protocol
243        // This would connect to a running Chrome instance via WebSocket
244        // and use CDP commands to navigate and extract content
245        let _ = (url, options);
246        Err(Error::Config("Chrome CDP backend not yet implemented".to_string()))
247    }
248
249    /// Playwright implementation
250    async fn render_playwright(&self, url: &Url, options: &RenderOptions) -> Result<BrowserResponse> {
251        // TODO: Implement Playwright integration
252        // This would spawn a Playwright process or use a Playwright server
253        let _ = (url, options);
254        Err(Error::Config("Playwright backend not yet implemented".to_string()))
255    }
256
257    /// Puppeteer implementation
258    async fn render_puppeteer(&self, url: &Url, options: &RenderOptions) -> Result<BrowserResponse> {
259        // TODO: Implement Puppeteer integration
260        let _ = (url, options);
261        Err(Error::Config("Puppeteer backend not yet implemented".to_string()))
262    }
263
264    /// Active browser count
265    pub fn active_count(&self) -> usize {
266        self.active_count.load(std::sync::atomic::Ordering::Relaxed)
267    }
268}
269
270/// RAII guard for browser slot
271pub struct BrowserSlot<'a> {
272    pool: &'a BrowserPool,
273}
274
275impl Drop for BrowserSlot<'_> {
276    fn drop(&mut self) {
277        self.pool.active_count.fetch_sub(1, std::sync::atomic::Ordering::SeqCst);
278    }
279}
280
281/// Stub browser for testing (returns static HTML)
282pub struct StubBrowser {
283    /// HTML to return
284    html: String,
285}
286
287impl StubBrowser {
288    /// Create a new stub browser
289    pub fn new(html: impl Into<String>) -> Self {
290        Self { html: html.into() }
291    }
292}
293
294impl BrowserBackend for StubBrowser {
295    async fn render(&self, url: &Url, _options: &RenderOptions) -> Result<BrowserResponse> {
296        Ok(BrowserResponse {
297            final_url: url.clone(),
298            html: self.html.clone(),
299            title: None,
300            console_logs: Vec::new(),
301            network_requests: Vec::new(),
302            render_time_ms: 0,
303            screenshot: None,
304        })
305    }
306
307    async fn health_check(&self) -> Result<()> {
308        Ok(())
309    }
310
311    async fn close(&self) -> Result<()> {
312        Ok(())
313    }
314}