1use crate::{Result, WebCaptureError};
10use std::path::{Path, PathBuf};
11use std::sync::atomic::{AtomicU64, Ordering};
12use std::time::{Duration, SystemTime, UNIX_EPOCH};
13use tokio::process::Command;
14use tracing::{debug, info};
15
16static USER_DATA_DIR_COUNTER: AtomicU64 = AtomicU64::new(0);
17
18#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
20pub enum BrowserEngine {
21 #[default]
23 Chromiumoxide,
24}
25
26impl std::fmt::Display for BrowserEngine {
27 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
28 match self {
29 Self::Chromiumoxide => write!(f, "chromiumoxide"),
30 }
31 }
32}
33
34impl std::str::FromStr for BrowserEngine {
35 type Err = WebCaptureError;
36
37 fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
38 match s.to_lowercase().as_str() {
39 "chromiumoxide" | "chromium" | "chrome" => Ok(Self::Chromiumoxide),
40 _ => Err(WebCaptureError::BrowserError(format!(
41 "Unknown browser engine: {s}"
42 ))),
43 }
44 }
45}
46
47pub async fn render_html(url: &str) -> Result<String> {
66 render_html_with_timeout(url, Duration::from_secs(60)).await
67}
68
69pub async fn render_html_with_timeout(url: &str, timeout: Duration) -> Result<String> {
75 info!("Rendering HTML for URL: {}", url);
76
77 let chrome = find_chrome_executable().ok_or_else(|| {
78 WebCaptureError::BrowserError(
79 "Chrome/Chromium executable was not found. Set WEB_CAPTURE_CHROME, CHROME_PATH, or GOOGLE_CHROME_BIN.".to_string(),
80 )
81 })?;
82 let user_data_dir = temporary_user_data_dir();
83 std::fs::create_dir_all(&user_data_dir)?;
84 let args = chrome_render_args(&user_data_dir, url);
85 debug!(
86 chrome = %chrome.display(),
87 user_data_dir = %user_data_dir.display(),
88 args = ?args,
89 "launching headless Chrome for DOM capture"
90 );
91
92 let mut command = Command::new(&chrome);
93 command.args(&args).kill_on_drop(true);
94 let output_result = tokio::time::timeout(timeout, command.output()).await;
95 let _ = std::fs::remove_dir_all(&user_data_dir);
96
97 let output = output_result
98 .map_err(|_| {
99 WebCaptureError::BrowserError(format!(
100 "Timed out waiting for headless Chrome to render {url}"
101 ))
102 })?
103 .map_err(|e| WebCaptureError::BrowserError(format!("Failed to launch Chrome: {e}")))?;
104 debug!(
105 status = %output.status,
106 stdout_bytes = output.stdout.len(),
107 stderr_bytes = output.stderr.len(),
108 stderr = %String::from_utf8_lossy(&output.stderr),
109 "headless Chrome DOM capture finished"
110 );
111
112 if !output.status.success() {
113 return Err(WebCaptureError::BrowserError(format!(
114 "Headless Chrome failed with status {}: {}",
115 output.status,
116 String::from_utf8_lossy(&output.stderr)
117 )));
118 }
119
120 let html = String::from_utf8(output.stdout)
121 .map_err(|e| WebCaptureError::BrowserError(format!("Chrome output was not UTF-8: {e}")))?;
122
123 info!("Successfully rendered HTML ({} bytes)", html.len());
124 Ok(html)
125}
126
127fn chrome_render_args(user_data_dir: &Path, url: &str) -> Vec<String> {
128 let mut args = common_chrome_args(user_data_dir);
129 args.extend([
130 "--dump-dom".to_string(),
131 "--timeout=30000".to_string(),
132 "--virtual-time-budget=8000".to_string(),
133 "--run-all-compositor-stages-before-draw".to_string(),
134 "--window-size=1280,800".to_string(),
135 url.to_string(),
136 ]);
137 args
138}
139
140fn common_chrome_args(user_data_dir: &Path) -> Vec<String> {
141 vec![
142 "--headless=new".to_string(),
143 "--disable-gpu".to_string(),
144 "--disable-extensions".to_string(),
145 "--disable-dev-shm-usage".to_string(),
146 "--disable-background-networking".to_string(),
147 "--disable-component-update".to_string(),
148 "--disable-default-apps".to_string(),
149 "--disable-sync".to_string(),
150 "--metrics-recording-only".to_string(),
151 "--no-default-browser-check".to_string(),
152 "--no-first-run".to_string(),
153 "--no-sandbox".to_string(),
154 format!("--user-data-dir={}", user_data_dir.display()),
155 ]
156}
157
158pub(crate) fn find_chrome_executable() -> Option<PathBuf> {
159 for env_var in [
160 "WEB_CAPTURE_CHROME",
161 "CHROME_PATH",
162 "GOOGLE_CHROME_BIN",
163 "CHROMIUM_PATH",
164 ] {
165 if let Ok(path) = std::env::var(env_var) {
166 let candidate = PathBuf::from(path);
167 if candidate.exists() {
168 return Some(candidate);
169 }
170 }
171 }
172
173 for name in [
174 "google-chrome",
175 "google-chrome-stable",
176 "chromium",
177 "chromium-browser",
178 "chrome",
179 ] {
180 if let Some(path) = find_on_path(name) {
181 return Some(path);
182 }
183 }
184
185 for path in [
186 "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
187 "/Applications/Chromium.app/Contents/MacOS/Chromium",
188 r"C:\Program Files\Google\Chrome\Application\chrome.exe",
189 r"C:\Program Files (x86)\Google\Chrome\Application\chrome.exe",
190 r"C:\Program Files\Chromium\Application\chrome.exe",
191 ] {
192 let candidate = PathBuf::from(path);
193 if candidate.exists() {
194 return Some(candidate);
195 }
196 }
197
198 None
199}
200
201fn find_on_path(name: &str) -> Option<PathBuf> {
202 let paths = std::env::var_os("PATH")?;
203 for dir in std::env::split_paths(&paths) {
204 let candidate = dir.join(name);
205 if candidate.exists() {
206 return Some(candidate);
207 }
208 #[cfg(windows)]
209 {
210 let candidate = dir.join(format!("{name}.exe"));
211 if candidate.exists() {
212 return Some(candidate);
213 }
214 }
215 }
216 None
217}
218
219pub(crate) fn temporary_user_data_dir() -> PathBuf {
220 let nonce = SystemTime::now()
221 .duration_since(UNIX_EPOCH)
222 .map_or(0, |duration| duration.as_nanos());
223 let seq = USER_DATA_DIR_COUNTER.fetch_add(1, Ordering::Relaxed);
224 std::env::temp_dir().join(format!(
225 "web-capture-chrome-{}-{nonce}-{seq}",
226 std::process::id()
227 ))
228}
229
230pub async fn capture_screenshot(url: &str) -> Result<Vec<u8>> {
247 info!("Capturing screenshot for URL: {}", url);
248
249 let chrome = find_chrome_executable().ok_or_else(|| {
250 WebCaptureError::ScreenshotError(
251 "Chrome/Chromium executable was not found. Set WEB_CAPTURE_CHROME, CHROME_PATH, or GOOGLE_CHROME_BIN.".to_string(),
252 )
253 })?;
254
255 let user_data_dir = temporary_user_data_dir();
256 std::fs::create_dir_all(&user_data_dir).map_err(|e| {
257 WebCaptureError::ScreenshotError(format!("Failed to create temp user data dir: {e}"))
258 })?;
259
260 let screenshot_path = temporary_screenshot_path();
261 let args = chrome_screenshot_args(&user_data_dir, &screenshot_path, url);
262 debug!(
263 chrome = %chrome.display(),
264 user_data_dir = %user_data_dir.display(),
265 screenshot_path = %screenshot_path.display(),
266 args = ?args,
267 "launching headless Chrome for screenshot capture"
268 );
269
270 let output_result = tokio::time::timeout(
271 Duration::from_secs(60),
272 Command::new(&chrome).args(&args).output(),
273 )
274 .await;
275 let _ = std::fs::remove_dir_all(&user_data_dir);
276
277 let output = output_result
278 .map_err(|_| {
279 WebCaptureError::ScreenshotError(format!(
280 "Timed out waiting for headless Chrome to capture {url}"
281 ))
282 })?
283 .map_err(|e| WebCaptureError::ScreenshotError(format!("Failed to launch Chrome: {e}")))?;
284 debug!(
285 status = %output.status,
286 stdout_bytes = output.stdout.len(),
287 stderr_bytes = output.stderr.len(),
288 stderr = %String::from_utf8_lossy(&output.stderr),
289 "headless Chrome screenshot capture finished"
290 );
291
292 if !output.status.success() {
293 let _ = std::fs::remove_file(&screenshot_path);
294 return Err(WebCaptureError::ScreenshotError(format!(
295 "Headless Chrome failed with status {}: {}",
296 output.status,
297 String::from_utf8_lossy(&output.stderr)
298 )));
299 }
300
301 let bytes = read_screenshot_bytes(&screenshot_path)?;
302 let _ = std::fs::remove_file(&screenshot_path);
303
304 if bytes.len() < 8 || &bytes[..8] != b"\x89PNG\r\n\x1a\n" {
305 return Err(WebCaptureError::ScreenshotError(
306 "Chrome screenshot output was not a valid PNG".to_string(),
307 ));
308 }
309
310 info!("Successfully captured screenshot ({} bytes)", bytes.len());
311 Ok(bytes)
312}
313
314fn temporary_screenshot_path() -> PathBuf {
315 let nonce = SystemTime::now()
316 .duration_since(UNIX_EPOCH)
317 .map_or(0, |duration| duration.as_nanos());
318 std::env::temp_dir().join(format!(
319 "web-capture-screenshot-{}-{nonce}.png",
320 std::process::id()
321 ))
322}
323
324fn chrome_screenshot_args(user_data_dir: &Path, screenshot_path: &Path, url: &str) -> Vec<String> {
325 let mut args = common_chrome_args(user_data_dir);
326 args.extend([
327 "--hide-scrollbars".to_string(),
328 "--window-size=1280,800".to_string(),
329 "--timeout=30000".to_string(),
330 format!("--screenshot={}", screenshot_path.display()),
331 url.to_string(),
332 ]);
333 args
334}
335
336fn read_screenshot_bytes(path: &Path) -> Result<Vec<u8>> {
337 std::fs::read(path).map_err(|e| {
338 WebCaptureError::ScreenshotError(format!("Failed to read screenshot file: {e}"))
339 })
340}