1use crate::{Result, WebCaptureError};
10use std::path::{Path, PathBuf};
11use std::sync::atomic::{AtomicU64, Ordering};
12use std::time::{Duration, SystemTime, UNIX_EPOCH};
13use tokio::process::Command;
14use tracing::info;
15
16static USER_DATA_DIR_COUNTER: AtomicU64 = AtomicU64::new(0);
17
18#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
20pub enum BrowserEngine {
21 #[default]
23 Chromiumoxide,
24}
25
26impl std::fmt::Display for BrowserEngine {
27 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
28 match self {
29 Self::Chromiumoxide => write!(f, "chromiumoxide"),
30 }
31 }
32}
33
34impl std::str::FromStr for BrowserEngine {
35 type Err = WebCaptureError;
36
37 fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
38 match s.to_lowercase().as_str() {
39 "chromiumoxide" | "chromium" | "chrome" => Ok(Self::Chromiumoxide),
40 _ => Err(WebCaptureError::BrowserError(format!(
41 "Unknown browser engine: {s}"
42 ))),
43 }
44 }
45}
46
47pub async fn render_html(url: &str) -> Result<String> {
66 info!("Rendering HTML for URL: {}", url);
67
68 let chrome = find_chrome_executable().ok_or_else(|| {
69 WebCaptureError::BrowserError(
70 "Chrome/Chromium executable was not found. Set WEB_CAPTURE_CHROME, CHROME_PATH, or GOOGLE_CHROME_BIN.".to_string(),
71 )
72 })?;
73 let user_data_dir = temporary_user_data_dir();
74 std::fs::create_dir_all(&user_data_dir)?;
75
76 let output = tokio::time::timeout(
77 Duration::from_secs(60),
78 Command::new(&chrome)
79 .arg("--headless=new")
80 .arg("--disable-gpu")
81 .arg("--disable-extensions")
82 .arg("--disable-dev-shm-usage")
83 .arg("--no-sandbox")
84 .arg("--dump-dom")
85 .arg(format!("--user-data-dir={}", user_data_dir.display()))
86 .arg(url)
87 .output(),
88 )
89 .await
90 .map_err(|_| {
91 WebCaptureError::BrowserError(format!(
92 "Timed out waiting for headless Chrome to render {url}"
93 ))
94 })?
95 .map_err(|e| WebCaptureError::BrowserError(format!("Failed to launch Chrome: {e}")))?;
96
97 let _ = std::fs::remove_dir_all(&user_data_dir);
98
99 if !output.status.success() {
100 return Err(WebCaptureError::BrowserError(format!(
101 "Headless Chrome failed with status {}: {}",
102 output.status,
103 String::from_utf8_lossy(&output.stderr)
104 )));
105 }
106
107 let html = String::from_utf8(output.stdout)
108 .map_err(|e| WebCaptureError::BrowserError(format!("Chrome output was not UTF-8: {e}")))?;
109
110 info!("Successfully rendered HTML ({} bytes)", html.len());
111 Ok(html)
112}
113
114fn find_chrome_executable() -> Option<PathBuf> {
115 for env_var in [
116 "WEB_CAPTURE_CHROME",
117 "CHROME_PATH",
118 "GOOGLE_CHROME_BIN",
119 "CHROMIUM_PATH",
120 ] {
121 if let Ok(path) = std::env::var(env_var) {
122 let candidate = PathBuf::from(path);
123 if candidate.exists() {
124 return Some(candidate);
125 }
126 }
127 }
128
129 for name in [
130 "google-chrome",
131 "google-chrome-stable",
132 "chromium",
133 "chromium-browser",
134 "chrome",
135 ] {
136 if let Some(path) = find_on_path(name) {
137 return Some(path);
138 }
139 }
140
141 for path in [
142 "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
143 "/Applications/Chromium.app/Contents/MacOS/Chromium",
144 r"C:\Program Files\Google\Chrome\Application\chrome.exe",
145 r"C:\Program Files (x86)\Google\Chrome\Application\chrome.exe",
146 r"C:\Program Files\Chromium\Application\chrome.exe",
147 ] {
148 let candidate = PathBuf::from(path);
149 if candidate.exists() {
150 return Some(candidate);
151 }
152 }
153
154 None
155}
156
157fn find_on_path(name: &str) -> Option<PathBuf> {
158 let paths = std::env::var_os("PATH")?;
159 for dir in std::env::split_paths(&paths) {
160 let candidate = dir.join(name);
161 if candidate.exists() {
162 return Some(candidate);
163 }
164 #[cfg(windows)]
165 {
166 let candidate = dir.join(format!("{name}.exe"));
167 if candidate.exists() {
168 return Some(candidate);
169 }
170 }
171 }
172 None
173}
174
175fn temporary_user_data_dir() -> PathBuf {
176 let nonce = SystemTime::now()
177 .duration_since(UNIX_EPOCH)
178 .map_or(0, |duration| duration.as_nanos());
179 let seq = USER_DATA_DIR_COUNTER.fetch_add(1, Ordering::Relaxed);
180 std::env::temp_dir().join(format!(
181 "web-capture-chrome-{}-{nonce}-{seq}",
182 std::process::id()
183 ))
184}
185
186pub async fn capture_screenshot(url: &str) -> Result<Vec<u8>> {
203 info!("Capturing screenshot for URL: {}", url);
204
205 let chrome = find_chrome_executable().ok_or_else(|| {
206 WebCaptureError::ScreenshotError(
207 "Chrome/Chromium executable was not found. Set WEB_CAPTURE_CHROME, CHROME_PATH, or GOOGLE_CHROME_BIN.".to_string(),
208 )
209 })?;
210
211 let user_data_dir = temporary_user_data_dir();
212 std::fs::create_dir_all(&user_data_dir).map_err(|e| {
213 WebCaptureError::ScreenshotError(format!("Failed to create temp user data dir: {e}"))
214 })?;
215
216 let screenshot_path = temporary_screenshot_path();
217 let screenshot_arg = format!("--screenshot={}", screenshot_path.display());
218
219 let output = tokio::time::timeout(
220 Duration::from_secs(60),
221 Command::new(&chrome)
222 .arg("--headless=new")
223 .arg("--disable-gpu")
224 .arg("--disable-extensions")
225 .arg("--disable-dev-shm-usage")
226 .arg("--no-sandbox")
227 .arg("--hide-scrollbars")
228 .arg("--window-size=1280,800")
229 .arg(&screenshot_arg)
230 .arg(format!("--user-data-dir={}", user_data_dir.display()))
231 .arg(url)
232 .output(),
233 )
234 .await
235 .map_err(|_| {
236 WebCaptureError::ScreenshotError(format!(
237 "Timed out waiting for headless Chrome to capture {url}"
238 ))
239 })?
240 .map_err(|e| WebCaptureError::ScreenshotError(format!("Failed to launch Chrome: {e}")))?;
241
242 let _ = std::fs::remove_dir_all(&user_data_dir);
243
244 if !output.status.success() {
245 let _ = std::fs::remove_file(&screenshot_path);
246 return Err(WebCaptureError::ScreenshotError(format!(
247 "Headless Chrome failed with status {}: {}",
248 output.status,
249 String::from_utf8_lossy(&output.stderr)
250 )));
251 }
252
253 let bytes = read_screenshot_bytes(&screenshot_path)?;
254 let _ = std::fs::remove_file(&screenshot_path);
255
256 if bytes.len() < 8 || &bytes[..8] != b"\x89PNG\r\n\x1a\n" {
257 return Err(WebCaptureError::ScreenshotError(
258 "Chrome screenshot output was not a valid PNG".to_string(),
259 ));
260 }
261
262 info!("Successfully captured screenshot ({} bytes)", bytes.len());
263 Ok(bytes)
264}
265
266fn temporary_screenshot_path() -> PathBuf {
267 let nonce = SystemTime::now()
268 .duration_since(UNIX_EPOCH)
269 .map_or(0, |duration| duration.as_nanos());
270 std::env::temp_dir().join(format!(
271 "web-capture-screenshot-{}-{nonce}.png",
272 std::process::id()
273 ))
274}
275
276fn read_screenshot_bytes(path: &Path) -> Result<Vec<u8>> {
277 std::fs::read(path).map_err(|e| {
278 WebCaptureError::ScreenshotError(format!("Failed to read screenshot file: {e}"))
279 })
280}