1use anyhow::{Context, Result, bail};
7use async_trait::async_trait;
8use futures_util::{SinkExt, StreamExt};
9use serde_json::{Value, json};
10use std::sync::Arc;
11use std::time::Duration;
12use tokio::sync::Mutex;
13use tokio_tungstenite::tungstenite::Message;
14use tracing::debug;
15
16use localgpt_core::agent::providers::ToolSchema;
17use localgpt_core::agent::tools::Tool;
18
19const CDP_TIMEOUT: Duration = Duration::from_secs(30);
21
22pub struct BrowserTool {
24 port: u16,
25 state: Arc<Mutex<BrowserState>>,
26}
27
28enum BrowserState {
29 NotStarted,
30 Running {
31 _process: tokio::process::Child, ws_url: String,
33 },
34}
35
36impl BrowserTool {
37 pub fn new(port: u16) -> Self {
38 Self {
39 port,
40 state: Arc::new(Mutex::new(BrowserState::NotStarted)),
41 }
42 }
43
44 async fn ensure_browser(&self) -> Result<String> {
46 let mut state = self.state.lock().await;
47 match &*state {
48 BrowserState::Running { ws_url, .. } => Ok(ws_url.clone()),
49 BrowserState::NotStarted => {
50 let chrome_path = detect_chrome()?;
51 debug!("Launching headless Chrome: {}", chrome_path);
52
53 let user_data = std::env::temp_dir().join("localgpt-chrome");
54 let process = tokio::process::Command::new(&chrome_path)
55 .args([
56 "--headless=new",
57 "--disable-gpu",
58 "--no-sandbox",
59 "--disable-dev-shm-usage",
60 &format!("--remote-debugging-port={}", self.port),
61 &format!("--user-data-dir={}", user_data.display()),
62 "--no-first-run",
63 "--no-default-browser-check",
64 "about:blank",
65 ])
66 .stdout(std::process::Stdio::null())
67 .stderr(std::process::Stdio::null())
68 .kill_on_drop(true)
69 .spawn()
70 .context("Failed to launch Chrome")?;
71
72 let ws_url = wait_for_chrome(self.port).await?;
74 debug!("Chrome ready at {}", ws_url);
75
76 *state = BrowserState::Running {
77 _process: process,
78 ws_url: ws_url.clone(),
79 };
80 Ok(ws_url)
81 }
82 }
83 }
84
85 async fn do_navigate(&self, url: &str) -> Result<String> {
86 let ws_url = self.ensure_browser().await?;
87 let mut cdp = CdpConnection::connect(&ws_url).await?;
88
89 cdp.send("Page.enable", json!({})).await?;
90 cdp.send("Page.navigate", json!({"url": url})).await?;
91
92 tokio::time::sleep(Duration::from_secs(2)).await;
94
95 let result = cdp
97 .send(
98 "Runtime.evaluate",
99 json!({"expression": "document.title + '\\n\\n' + document.body?.innerText?.substring(0, 10000) || ''"}),
100 )
101 .await?;
102
103 let text = result["result"]["value"]
104 .as_str()
105 .unwrap_or("(empty page)")
106 .to_string();
107
108 Ok(format!("URL: {}\n\n{}", url, text))
109 }
110
111 async fn do_screenshot(&self, url: Option<&str>) -> Result<String> {
112 let ws_url = self.ensure_browser().await?;
113 let mut cdp = CdpConnection::connect(&ws_url).await?;
114
115 if let Some(url) = url {
116 cdp.send("Page.enable", json!({})).await?;
117 cdp.send("Page.navigate", json!({"url": url})).await?;
118 tokio::time::sleep(Duration::from_secs(2)).await;
119 }
120
121 let result = cdp
122 .send("Page.captureScreenshot", json!({"format": "png"}))
123 .await?;
124
125 let data = result["data"]
126 .as_str()
127 .ok_or_else(|| anyhow::anyhow!("No screenshot data returned"))?;
128
129 Ok(format!("Screenshot captured ({} bytes base64)", data.len()))
130 }
131
132 async fn do_text(&self, selector: Option<&str>) -> Result<String> {
133 let ws_url = self.ensure_browser().await?;
134 let mut cdp = CdpConnection::connect(&ws_url).await?;
135
136 let expr = if let Some(sel) = selector {
137 format!(
138 "document.querySelector('{}')?.innerText || '(element not found)'",
139 sel.replace('\'', "\\'")
140 )
141 } else {
142 "document.body?.innerText || '(empty)'".to_string()
143 };
144
145 let result = cdp
146 .send("Runtime.evaluate", json!({"expression": expr}))
147 .await?;
148
149 Ok(result["result"]["value"]
150 .as_str()
151 .unwrap_or("(no text)")
152 .to_string())
153 }
154
155 async fn do_click(&self, selector: &str) -> Result<String> {
156 let ws_url = self.ensure_browser().await?;
157 let mut cdp = CdpConnection::connect(&ws_url).await?;
158
159 let expr = format!(
160 "(() => {{ const el = document.querySelector('{}'); if (el) {{ el.click(); return 'clicked'; }} return 'element not found'; }})()",
161 selector.replace('\'', "\\'")
162 );
163
164 let result = cdp
165 .send("Runtime.evaluate", json!({"expression": expr}))
166 .await?;
167
168 Ok(result["result"]["value"]
169 .as_str()
170 .unwrap_or("unknown")
171 .to_string())
172 }
173}
174
175#[async_trait]
176impl Tool for BrowserTool {
177 fn name(&self) -> &str {
178 "browser"
179 }
180
181 fn schema(&self) -> ToolSchema {
182 ToolSchema {
183 name: "browser".to_string(),
184 description: "Interact with web pages via headless Chrome. Actions: navigate (load URL and get text), screenshot (capture page as PNG), text (extract text by CSS selector), click (click element by selector).".to_string(),
185 parameters: json!({
186 "type": "object",
187 "properties": {
188 "action": {
189 "type": "string",
190 "enum": ["navigate", "screenshot", "text", "click"],
191 "description": "Action to perform"
192 },
193 "url": {
194 "type": "string",
195 "description": "URL to navigate to (required for navigate/screenshot)"
196 },
197 "selector": {
198 "type": "string",
199 "description": "CSS selector (for text/click actions)"
200 }
201 },
202 "required": ["action"]
203 }),
204 }
205 }
206
207 async fn execute(&self, arguments: &str) -> Result<String> {
208 let args: Value = serde_json::from_str(arguments)?;
209 let action = args["action"]
210 .as_str()
211 .ok_or_else(|| anyhow::anyhow!("Missing action"))?;
212
213 match action {
214 "navigate" => {
215 let url = args["url"]
216 .as_str()
217 .ok_or_else(|| anyhow::anyhow!("navigate requires 'url'"))?;
218 self.do_navigate(url).await
219 }
220 "screenshot" => {
221 let url = args["url"].as_str();
222 self.do_screenshot(url).await
223 }
224 "text" => {
225 let selector = args["selector"].as_str();
226 self.do_text(selector).await
227 }
228 "click" => {
229 let selector = args["selector"]
230 .as_str()
231 .ok_or_else(|| anyhow::anyhow!("click requires 'selector'"))?;
232 self.do_click(selector).await
233 }
234 other => bail!(
235 "Unknown browser action: '{}'. Use: navigate, screenshot, text, click",
236 other
237 ),
238 }
239 }
240}
241
242impl Drop for BrowserTool {
243 fn drop(&mut self) {
244 }
246}
247
248struct CdpConnection {
251 ws: tokio_tungstenite::WebSocketStream<
252 tokio_tungstenite::MaybeTlsStream<tokio::net::TcpStream>,
253 >,
254 next_id: u32,
255}
256
257impl CdpConnection {
258 async fn connect(ws_url: &str) -> Result<Self> {
259 let (ws, _) = tokio::time::timeout(CDP_TIMEOUT, tokio_tungstenite::connect_async(ws_url))
260 .await
261 .map_err(|_| anyhow::anyhow!("CDP WebSocket connection timed out"))??;
262
263 Ok(Self { ws, next_id: 1 })
264 }
265
266 async fn send(&mut self, method: &str, params: Value) -> Result<Value> {
267 let id = self.next_id;
268 self.next_id += 1;
269
270 let msg = json!({
271 "id": id,
272 "method": method,
273 "params": params
274 });
275
276 self.ws
277 .send(Message::Text(msg.to_string().into()))
278 .await
279 .context("Failed to send CDP command")?;
280
281 loop {
283 let response = tokio::time::timeout(CDP_TIMEOUT, self.ws.next())
284 .await
285 .map_err(|_| anyhow::anyhow!("CDP response timed out for {}", method))?
286 .ok_or_else(|| anyhow::anyhow!("CDP WebSocket closed"))??;
287
288 if let Message::Text(text) = response {
289 let parsed: Value = serde_json::from_str(text.as_ref())?;
290 if parsed["id"].as_u64() == Some(id as u64) {
291 if let Some(error) = parsed.get("error") {
292 bail!("CDP error for {}: {}", method, error);
293 }
294 return Ok(parsed["result"].clone());
295 }
296 }
298 }
299 }
300}
301
302pub fn detect_chrome() -> Result<String> {
306 let candidates = if cfg!(target_os = "macos") {
307 vec![
308 "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
309 "/Applications/Chromium.app/Contents/MacOS/Chromium",
310 ]
311 } else {
312 vec![]
313 };
314
315 for path in &candidates {
317 if std::path::Path::new(path).exists() {
318 return Ok(path.to_string());
319 }
320 }
321
322 for name in &["google-chrome", "chromium", "chromium-browser", "chrome"] {
324 if which::which(name).is_ok() {
325 return Ok(name.to_string());
326 }
327 }
328
329 bail!(
330 "Chrome/Chromium not found. Install Google Chrome or Chromium.\n\
331 Searched: {:?} + PATH lookup for google-chrome, chromium, chrome",
332 candidates
333 )
334}
335
336async fn wait_for_chrome(port: u16) -> Result<String> {
338 let url = format!("http://127.0.0.1:{}/json/version", port);
339 let client = reqwest::Client::new();
340
341 for attempt in 0..20 {
342 tokio::time::sleep(Duration::from_millis(250)).await;
343
344 if let Ok(resp) = client.get(&url).send().await
345 && let Ok(info) = resp.json::<Value>().await
346 && let Some(ws_url) = info["webSocketDebuggerUrl"].as_str()
347 {
348 return Ok(ws_url.to_string());
349 }
350
351 if attempt > 0 && attempt % 5 == 0 {
352 debug!("Waiting for Chrome to start (attempt {})", attempt);
353 }
354 }
355
356 bail!("Chrome failed to start within 5 seconds on port {}", port)
357}
358
359#[cfg(test)]
360mod tests {
361 use super::*;
362
363 #[test]
364 fn test_browser_tool_schema() {
365 let tool = BrowserTool::new(9222);
366 assert_eq!(tool.name(), "browser");
367 let schema = tool.schema();
368 assert_eq!(schema.name, "browser");
369 let params = &schema.parameters;
370 assert!(params["properties"]["action"].is_object());
371 assert!(params["properties"]["url"].is_object());
372 assert!(params["properties"]["selector"].is_object());
373 assert_eq!(params["required"][0], "action");
374 }
375
376 #[test]
377 fn test_detect_chrome_path() {
378 let result = detect_chrome();
381 if let Ok(path) = &result {
382 assert!(!path.is_empty());
383 }
384 }
386
387 #[tokio::test]
388 async fn test_browser_missing_action() {
389 let tool = BrowserTool::new(19222); let result = tool.execute(r#"{"url": "https://example.com"}"#).await;
391 assert!(result.is_err());
392 assert!(result.unwrap_err().to_string().contains("Missing action"));
393 }
394
395 #[tokio::test]
396 async fn test_browser_unknown_action() {
397 let tool = BrowserTool::new(19222);
398 let result = tool.execute(r#"{"action": "fly"}"#).await;
399 assert!(result.is_err());
400 assert!(
401 result
402 .unwrap_err()
403 .to_string()
404 .contains("Unknown browser action")
405 );
406 }
407
408 #[tokio::test]
409 async fn test_browser_navigate_missing_url() {
410 let tool = BrowserTool::new(19222);
411 let result = tool.execute(r#"{"action": "navigate"}"#).await;
412 assert!(result.is_err());
413 assert!(result.unwrap_err().to_string().contains("requires 'url'"));
414 }
415
416 #[tokio::test]
417 async fn test_browser_click_missing_selector() {
418 let tool = BrowserTool::new(19222);
419 let result = tool.execute(r#"{"action": "click"}"#).await;
420 assert!(result.is_err());
421 assert!(
422 result
423 .unwrap_err()
424 .to_string()
425 .contains("requires 'selector'")
426 );
427 }
428}