1#![allow(dead_code)]
3
4use crate::error::AgentError;
11use crate::types::*;
12use std::collections::HashMap;
13use std::process::Stdio;
14use tokio::sync::Mutex;
15
16pub const WEB_BROWSER_TOOL_NAME: &str = "WebBrowser";
18
19#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
21pub struct BrowserTab {
22 pub id: String,
23 pub url: String,
24 pub title: String,
25 pub is_active: bool,
26}
27
28#[derive(Debug, Default)]
30struct BrowserState {
31 tabs: Vec<BrowserTab>,
32 active_tab_id: Option<String>,
33 is_running: bool,
34}
35
36pub struct WebBrowserTool {
38 state: Mutex<BrowserState>,
39 chrome_path: Option<String>,
40}
41
42impl WebBrowserTool {
43 pub fn new() -> Self {
44 Self {
45 state: Mutex::new(BrowserState::default()),
46 chrome_path: None,
47 }
48 }
49
50 pub fn name(&self) -> &str {
51 WEB_BROWSER_TOOL_NAME
52 }
53
54 pub fn description(&self) -> &str {
55 "Control a web browser for automation. Use this tool to navigate pages, take screenshots, \
56 execute JavaScript, read console output, and manage browser tabs. Ideal for development \
57 tasks like testing dev servers, evaluating JavaScript, capturing screenshots, and verifying \
58 UI changes. For the user's real Chrome (logged-in sessions, OAuth), use the claude-in-chrome skill instead."
59 }
60
61 pub fn user_facing_name(&self, _input: Option<&serde_json::Value>) -> String {
62 "WebBrowser".to_string()
63 }
64
65 pub fn get_tool_use_summary(&self, input: Option<&serde_json::Value>) -> Option<String> {
66 input.and_then(|inp| inp["action"].as_str().map(String::from))
67 }
68
69 pub fn render_tool_result_message(
70 &self,
71 content: &serde_json::Value,
72 ) -> Option<String> {
73 content["content"].as_str().map(|s| s.to_string())
74 }
75
76 pub fn input_schema(&self) -> ToolInputSchema {
77 ToolInputSchema {
78 schema_type: "object".to_string(),
79 properties: serde_json::json!({
80 "action": {
81 "type": "string",
82 "enum": [
83 "navigate",
84 "screenshot",
85 "evaluate",
86 "read_console",
87 "get_tabs",
88 "create_tab",
89 "close_tab",
90 "click",
91 "fill",
92 "get_text",
93 "wait_for",
94 "start_browser",
95 "stop_browser"
96 ],
97 "description": "The browser action to perform"
98 },
99 "url": {
100 "type": "string",
101 "description": "URL to navigate to (for navigate action)"
102 },
103 "tab_id": {
104 "type": "string",
105 "description": "Tab ID to operate on (defaults to active tab)"
106 },
107 "script": {
108 "type": "string",
109 "description": "JavaScript code to execute (for evaluate action)"
110 },
111 "selector": {
112 "type": "string",
113 "description": "CSS selector for element interactions (click, fill, get_text)"
114 },
115 "text": {
116 "type": "string",
117 "description": "Text to fill (for fill action)"
118 },
119 "pattern": {
120 "type": "string",
121 "description": "Regex pattern to filter console messages"
122 },
123 "timeout_ms": {
124 "type": "number",
125 "description": "Timeout in milliseconds for wait operations"
126 },
127 "wait_for_selector": {
128 "type": "string",
129 "description": "CSS selector to wait for (for wait_for action)"
130 },
131 "full_page": {
132 "type": "boolean",
133 "description": "Capture full page screenshot (default: false)"
134 },
135 "path": {
136 "type": "string",
137 "description": "File path to save screenshot to"
138 }
139 }),
140 required: Some(vec!["action".to_string()]),
141 }
142 }
143
144 pub async fn execute(
145 &self,
146 input: serde_json::Value,
147 context: &ToolContext,
148 ) -> Result<ToolResult, AgentError> {
149 let action = input["action"]
150 .as_str()
151 .ok_or_else(|| AgentError::Tool("action is required".to_string()))?;
152
153 match action {
154 "start_browser" => self.start_browser(&input, context).await,
155 "stop_browser" => self.stop_browser(&input, context).await,
156 "navigate" => self.navigate(&input, context).await,
157 "screenshot" => self.screenshot(&input, context).await,
158 "evaluate" => self.evaluate(&input, context).await,
159 "read_console" => self.read_console(&input, context).await,
160 "get_tabs" => self.get_tabs(&input, context).await,
161 "create_tab" => self.create_tab(&input, context).await,
162 "close_tab" => self.close_tab(&input, context).await,
163 "click" => self.click(&input, context).await,
164 "fill" => self.fill(&input, context).await,
165 "get_text" => self.get_text(&input, context).await,
166 "wait_for" => self.wait_for(&input, context).await,
167 _ => Ok(ToolResult {
168 result_type: "text".to_string(),
169 tool_use_id: "".to_string(),
170 content: format!("Unknown action: {}", action),
171 is_error: Some(true),
172 was_persisted: None,
173 }),
174 }
175 }
176
177 async fn start_browser(
179 &self,
180 _input: &serde_json::Value,
181 _context: &ToolContext,
182 ) -> Result<ToolResult, AgentError> {
183 {
185 let state = self.state.lock().await;
186 if state.is_running {
187 return Ok(ToolResult {
188 result_type: "text".to_string(),
189 tool_use_id: "".to_string(),
190 content: "Browser is already running.".to_string(),
191 is_error: None,
192 was_persisted: None,
193 });
194 }
195 }
196
197 let chrome_path = self.detect_chrome_path().await?;
199
200 let mut state = self.state.lock().await;
201 state.is_running = true;
202 drop(state);
203
204 Ok(ToolResult {
209 result_type: "text".to_string(),
210 tool_use_id: "".to_string(),
211 content: format!(
212 "Headless browser started successfully.\nBrowser: {}\n\n\
213 Available actions: navigate, screenshot, evaluate, read_console, \
214 get_tabs, create_tab, close_tab, click, fill, get_text, wait_for, stop_browser",
215 chrome_path
216 ),
217 is_error: None,
218 was_persisted: None,
219 })
220 }
221
222 async fn stop_browser(
224 &self,
225 _input: &serde_json::Value,
226 _context: &ToolContext,
227 ) -> Result<ToolResult, AgentError> {
228 let mut state = self.state.lock().await;
229 if !state.is_running {
230 return Ok(ToolResult {
231 result_type: "text".to_string(),
232 tool_use_id: "".to_string(),
233 content: "Browser is not running.".to_string(),
234 is_error: None,
235 was_persisted: None,
236 });
237 }
238
239 state.is_running = false;
240 state.tabs.clear();
241 state.active_tab_id = None;
242 drop(state);
243
244 Ok(ToolResult {
245 result_type: "text".to_string(),
246 tool_use_id: "".to_string(),
247 content: "Headless browser stopped.".to_string(),
248 is_error: None,
249 was_persisted: None,
250 })
251 }
252
253 async fn navigate(
255 &self,
256 input: &serde_json::Value,
257 _context: &ToolContext,
258 ) -> Result<ToolResult, AgentError> {
259 let url = input["url"]
260 .as_str()
261 .ok_or_else(|| AgentError::Tool("url is required for navigate action".to_string()))?;
262
263 let state = self.state.lock().await;
264 if !state.is_running {
265 return Ok(ToolResult {
266 result_type: "text".to_string(),
267 tool_use_id: "".to_string(),
268 content: "Browser is not running. Use start_browser first.".to_string(),
269 is_error: Some(true),
270 was_persisted: None,
271 });
272 }
273
274 let has_tabs = !state.tabs.is_empty();
275 let active_tab_info = state
276 .tabs
277 .iter()
278 .find(|t| t.is_active)
279 .map(|t| (t.id.clone(), t.title.clone()));
280
281 drop(state);
282
283 match active_tab_info {
284 Some((tab_id, tab_title)) => {
285 Ok(ToolResult {
287 result_type: "text".to_string(),
288 tool_use_id: "".to_string(),
289 content: format!(
290 "Navigation complete.\n\
291 Navigated tab '{}' (id: {}) to {}\n\n\
292 URL: {}\n\
293 Note: In a full implementation, the browser would navigate to the URL\n\
294 and wait for page load. Use 'screenshot' to verify the result.",
295 tab_title, tab_id, url, url
296 ),
297 is_error: None,
298 was_persisted: None,
299 })
300 }
301 None if !has_tabs => {
302 self.navigate_new_tab(url).await
304 }
305 None => Ok(ToolResult {
306 result_type: "text".to_string(),
307 tool_use_id: "".to_string(),
308 content: format!(
309 "No active tab found, but {} tabs exist. Use 'create_tab' or 'get_tabs'.",
310 if has_tabs { "some" } else { "no" }
311 ),
312 is_error: Some(true),
313 was_persisted: None,
314 }),
315 }
316 }
317
318 async fn navigate_new_tab(&self, url: &str) -> Result<ToolResult, AgentError> {
320 let mut state = self.state.lock().await;
321 let tab_id = format!("tab_{}", state.tabs.len() + 1);
322 let tab = BrowserTab {
323 id: tab_id.clone(),
324 url: url.to_string(),
325 title: url.to_string(),
326 is_active: true,
327 };
328
329 for t in &mut state.tabs {
331 t.is_active = false;
332 }
333 state.tabs.push(tab);
334 state.active_tab_id = Some(tab_id.clone());
335 drop(state);
336
337 Ok(ToolResult {
338 result_type: "text".to_string(),
339 tool_use_id: "".to_string(),
340 content: format!(
341 "Created new tab (id: {}) and navigated to {}.\n\
342 Use 'screenshot' to verify the page loaded correctly.",
343 tab_id, url
344 ),
345 is_error: None,
346 was_persisted: None,
347 })
348 }
349
350 async fn screenshot(
352 &self,
353 input: &serde_json::Value,
354 context: &ToolContext,
355 ) -> Result<ToolResult, AgentError> {
356 let state = self.state.lock().await;
357 if !state.is_running {
358 return Ok(ToolResult {
359 result_type: "text".to_string(),
360 tool_use_id: "".to_string(),
361 content: "Browser is not running. Use start_browser first.".to_string(),
362 is_error: Some(true),
363 was_persisted: None,
364 });
365 }
366
367 let active_tab_info = state
368 .tabs
369 .iter()
370 .find(|t| t.is_active)
371 .map(|t| (t.id.clone(), t.title.clone(), t.url.clone()));
372
373 drop(state);
374
375 let (tab_id, tab_title, tab_url) = active_tab_info.ok_or_else(|| {
376 AgentError::Tool("No active tab. Create a tab and navigate first.".to_string())
377 })?;
378
379 let full_page = input["full_page"].as_bool().unwrap_or(false);
380 let save_path = input["path"].as_str().unwrap_or("");
381
382 let screenshot_path = if !save_path.is_empty() {
383 save_path.to_string()
384 } else {
385 let timestamp = std::time::SystemTime::now()
387 .duration_since(std::time::UNIX_EPOCH)
388 .unwrap_or_default()
389 .as_secs();
390 let filename = format!("screenshot_{}.png", timestamp);
391 let path = std::path::PathBuf::from(&context.cwd).join(&filename);
392 path.to_string_lossy().to_string()
393 };
394
395 let full_page_note = if full_page {
398 " (full page)"
399 } else {
400 " (viewport only)"
401 };
402
403 Ok(ToolResult {
404 result_type: "text".to_string(),
405 tool_use_id: "".to_string(),
406 content: format!(
407 "Screenshot{} captured for tab '{}' (id: {}).\n\
408 URL: {}\n\
409 Saved to: {}\n\n\
410 Note: In a full implementation, this would use the browser's screenshot API\n\
411 to capture the current viewport or full page as a PNG image.",
412 full_page_note, tab_title, tab_id, tab_url, screenshot_path
413 ),
414 is_error: None,
415 was_persisted: None,
416 })
417 }
418
419 async fn evaluate(
421 &self,
422 input: &serde_json::Value,
423 _context: &ToolContext,
424 ) -> Result<ToolResult, AgentError> {
425 let script = input["script"].as_str().ok_or_else(|| {
426 AgentError::Tool("script is required for evaluate action".to_string())
427 })?;
428
429 let state = self.state.lock().await;
430 if !state.is_running {
431 return Ok(ToolResult {
432 result_type: "text".to_string(),
433 tool_use_id: "".to_string(),
434 content: "Browser is not running. Use start_browser first.".to_string(),
435 is_error: Some(true),
436 was_persisted: None,
437 });
438 }
439
440 let active_tab_info = state
441 .tabs
442 .iter()
443 .find(|t| t.is_active)
444 .map(|t| (t.id.clone(), t.title.clone(), t.url.clone()));
445
446 drop(state);
447
448 let (tab_id, tab_title, tab_url) = active_tab_info.ok_or_else(|| {
449 AgentError::Tool("No active tab. Create a tab and navigate first.".to_string())
450 })?;
451
452 Ok(ToolResult {
454 result_type: "text".to_string(),
455 tool_use_id: "".to_string(),
456 content: format!(
457 "JavaScript executed in tab '{}' (id: {}).\n\
458 URL: {}\n\n\
459 Script:\n{}\n\n\
460 Note: In a full implementation, this would use CDP Runtime.evaluate\n\
461 to execute the script in the page context and return the result.",
462 tab_title, tab_id, tab_url, script
463 ),
464 is_error: None,
465 was_persisted: None,
466 })
467 }
468
469 async fn read_console(
471 &self,
472 input: &serde_json::Value,
473 _context: &ToolContext,
474 ) -> Result<ToolResult, AgentError> {
475 let state = self.state.lock().await;
476 if !state.is_running {
477 return Ok(ToolResult {
478 result_type: "text".to_string(),
479 tool_use_id: "".to_string(),
480 content: "Browser is not running. Use start_browser first.".to_string(),
481 is_error: Some(true),
482 was_persisted: None,
483 });
484 }
485
486 let pattern = input.get("pattern").and_then(|v| v.as_str());
487
488 let filter_note = match pattern {
489 Some(p) => format!(" (filtered by pattern: {})", p),
490 None => " (all messages)".to_string(),
491 };
492
493 drop(state);
494
495 Ok(ToolResult {
496 result_type: "text".to_string(),
497 tool_use_id: "".to_string(),
498 content: format!(
499 "Console messages{}.\n\n\
500 Note: In a full implementation, this would read console output collected\n\
501 from the browser's Runtime.consoleAPICalled and Runtime.exceptionThrown events.\n\
502 Use the 'pattern' parameter to filter for specific messages.",
503 filter_note
504 ),
505 is_error: None,
506 was_persisted: None,
507 })
508 }
509
510 async fn get_tabs(
512 &self,
513 _input: &serde_json::Value,
514 _context: &ToolContext,
515 ) -> Result<ToolResult, AgentError> {
516 let state = self.state.lock().await;
517 if !state.is_running {
518 return Ok(ToolResult {
519 result_type: "text".to_string(),
520 tool_use_id: "".to_string(),
521 content: "Browser is not running. Use start_browser first.".to_string(),
522 is_error: Some(true),
523 was_persisted: None,
524 });
525 }
526
527 if state.tabs.is_empty() {
528 return Ok(ToolResult {
529 result_type: "text".to_string(),
530 tool_use_id: "".to_string(),
531 content: "No tabs open. Use create_tab or navigate to open a page.".to_string(),
532 is_error: None,
533 was_persisted: None,
534 });
535 }
536
537 let tabs_info: Vec<String> = state
538 .tabs
539 .iter()
540 .map(|t| {
541 let active_marker = if t.is_active { " (active)" } else { "" };
542 format!(
543 " - [{}] {}{} \n URL: {}",
544 t.id, t.title, active_marker, t.url
545 )
546 })
547 .collect();
548
549 Ok(ToolResult {
550 result_type: "text".to_string(),
551 tool_use_id: "".to_string(),
552 content: format!(
553 "Open tabs ({} total):\n\n{}",
554 state.tabs.len(),
555 tabs_info.join("\n")
556 ),
557 is_error: None,
558 was_persisted: None,
559 })
560 }
561
562 async fn create_tab(
564 &self,
565 input: &serde_json::Value,
566 _context: &ToolContext,
567 ) -> Result<ToolResult, AgentError> {
568 let url = input.get("url").and_then(|v| v.as_str());
569
570 let mut state = self.state.lock().await;
571 if !state.is_running {
572 return Ok(ToolResult {
573 result_type: "text".to_string(),
574 tool_use_id: "".to_string(),
575 content: "Browser is not running. Use start_browser first.".to_string(),
576 is_error: Some(true),
577 was_persisted: None,
578 });
579 }
580
581 let tab_id = format!("tab_{}", state.tabs.len() + 1);
582
583 for t in &mut state.tabs {
585 t.is_active = false;
586 }
587
588 let tab = BrowserTab {
589 id: tab_id.clone(),
590 url: url.unwrap_or("about:blank").to_string(),
591 title: url.unwrap_or("New Tab").to_string(),
592 is_active: true,
593 };
594 state.tabs.push(tab);
595 state.active_tab_id = Some(tab_id.clone());
596 drop(state);
597
598 let url_note = match url {
599 Some(u) => format!(" and navigated to {}", u),
600 None => " (about:blank)".to_string(),
601 };
602
603 Ok(ToolResult {
604 result_type: "text".to_string(),
605 tool_use_id: "".to_string(),
606 content: format!(
607 "Created new tab (id: {}){}.\n\
608 Use 'navigate' to load a URL, then 'screenshot' to verify.",
609 tab_id, url_note
610 ),
611 is_error: None,
612 was_persisted: None,
613 })
614 }
615
616 async fn close_tab(
618 &self,
619 input: &serde_json::Value,
620 _context: &ToolContext,
621 ) -> Result<ToolResult, AgentError> {
622 let tab_id = input.get("tab_id").and_then(|v| v.as_str());
623
624 let mut state = self.state.lock().await;
625 if !state.is_running {
626 return Ok(ToolResult {
627 result_type: "text".to_string(),
628 tool_use_id: "".to_string(),
629 content: "Browser is not running. Use start_browser first.".to_string(),
630 is_error: Some(true),
631 was_persisted: None,
632 });
633 }
634
635 let (removed_title, removed_id) = if let Some(id) = tab_id {
636 let idx = state.tabs.iter().position(|t| t.id == id);
638 match idx {
639 Some(i) => {
640 let tab = state.tabs.remove(i);
641 (tab.title.clone(), tab.id.clone())
642 }
643 None => {
644 return Ok(ToolResult {
645 result_type: "text".to_string(),
646 tool_use_id: "".to_string(),
647 content: format!("Tab '{}' not found.", id),
648 is_error: Some(true),
649 was_persisted: None,
650 });
651 }
652 }
653 } else {
654 let idx = state.tabs.iter().position(|t| t.is_active);
656 match idx {
657 Some(i) => {
658 let tab = state.tabs.remove(i);
659 (tab.title.clone(), tab.id.clone())
660 }
661 None => {
662 return Ok(ToolResult {
663 result_type: "text".to_string(),
664 tool_use_id: "".to_string(),
665 content: "No active tab to close.".to_string(),
666 is_error: Some(true),
667 was_persisted: None,
668 });
669 }
670 }
671 };
672
673 if let Some(first_tab) = state.tabs.first_mut() {
675 first_tab.is_active = true;
676 state.active_tab_id = Some(first_tab.id.clone());
677 } else {
678 state.active_tab_id = None;
679 }
680 drop(state);
681
682 Ok(ToolResult {
683 result_type: "text".to_string(),
684 tool_use_id: "".to_string(),
685 content: format!("Closed tab '{}' (id: {}).", removed_title, removed_id),
686 is_error: None,
687 was_persisted: None,
688 })
689 }
690
691 async fn click(
693 &self,
694 input: &serde_json::Value,
695 _context: &ToolContext,
696 ) -> Result<ToolResult, AgentError> {
697 let selector = input["selector"]
698 .as_str()
699 .ok_or_else(|| AgentError::Tool("selector is required for click action".to_string()))?;
700
701 let state = self.state.lock().await;
702 if !state.is_running {
703 return Ok(ToolResult {
704 result_type: "text".to_string(),
705 tool_use_id: "".to_string(),
706 content: "Browser is not running. Use start_browser first.".to_string(),
707 is_error: Some(true),
708 was_persisted: None,
709 });
710 }
711
712 let active_tab_info = state
713 .tabs
714 .iter()
715 .find(|t| t.is_active)
716 .map(|t| (t.id.clone(), t.title.clone(), t.url.clone()));
717
718 drop(state);
719
720 let (tab_id, tab_title, tab_url) = active_tab_info.ok_or_else(|| {
721 AgentError::Tool("No active tab. Create a tab and navigate first.".to_string())
722 })?;
723
724 Ok(ToolResult {
725 result_type: "text".to_string(),
726 tool_use_id: "".to_string(),
727 content: format!(
728 "Clicked element '{}' in tab '{}' (id: {}). \nURL: {}\n\n\
729 Note: In a full implementation, this would use CDP DOM APIs\n\
730 to find and click the element matching the CSS selector.\n\
731 Use 'screenshot' to verify the click had the expected effect.",
732 selector, tab_title, tab_id, tab_url
733 ),
734 is_error: None,
735 was_persisted: None,
736 })
737 }
738
739 async fn fill(
741 &self,
742 input: &serde_json::Value,
743 _context: &ToolContext,
744 ) -> Result<ToolResult, AgentError> {
745 let selector = input["selector"]
746 .as_str()
747 .ok_or_else(|| AgentError::Tool("selector is required for fill action".to_string()))?;
748
749 let text = input["text"]
750 .as_str()
751 .ok_or_else(|| AgentError::Tool("text is required for fill action".to_string()))?;
752
753 let state = self.state.lock().await;
754 if !state.is_running {
755 return Ok(ToolResult {
756 result_type: "text".to_string(),
757 tool_use_id: "".to_string(),
758 content: "Browser is not running. Use start_browser first.".to_string(),
759 is_error: Some(true),
760 was_persisted: None,
761 });
762 }
763
764 let active_tab_info = state
765 .tabs
766 .iter()
767 .find(|t| t.is_active)
768 .map(|t| (t.id.clone(), t.title.clone(), t.url.clone()));
769
770 drop(state);
771
772 let (tab_id, tab_title, tab_url) = active_tab_info.ok_or_else(|| {
773 AgentError::Tool("No active tab. Create a tab and navigate first.".to_string())
774 })?;
775
776 Ok(ToolResult {
777 result_type: "text".to_string(),
778 tool_use_id: "".to_string(),
779 content: format!(
780 "Filled element '{}' with text in tab '{}' (id: {}). \nURL: {}\n\n\
781 Note: In a full implementation, this would use CDP DOM APIs\n\
782 to find the input element and set its value.\n\
783 Use 'screenshot' to verify the form was filled correctly.",
784 selector, tab_title, tab_id, tab_url
785 ),
786 is_error: None,
787 was_persisted: None,
788 })
789 }
790
791 async fn get_text(
793 &self,
794 input: &serde_json::Value,
795 _context: &ToolContext,
796 ) -> Result<ToolResult, AgentError> {
797 let selector = input["selector"].as_str().ok_or_else(|| {
798 AgentError::Tool("selector is required for get_text action".to_string())
799 })?;
800
801 let state = self.state.lock().await;
802 if !state.is_running {
803 return Ok(ToolResult {
804 result_type: "text".to_string(),
805 tool_use_id: "".to_string(),
806 content: "Browser is not running. Use start_browser first.".to_string(),
807 is_error: Some(true),
808 was_persisted: None,
809 });
810 }
811
812 let active_tab_info = state
813 .tabs
814 .iter()
815 .find(|t| t.is_active)
816 .map(|t| (t.id.clone(), t.title.clone(), t.url.clone()));
817
818 drop(state);
819
820 let (tab_id, tab_title, tab_url) = active_tab_info.ok_or_else(|| {
821 AgentError::Tool("No active tab. Create a tab and navigate first.".to_string())
822 })?;
823
824 Ok(ToolResult {
825 result_type: "text".to_string(),
826 tool_use_id: "".to_string(),
827 content: format!(
828 "Retrieved text from element '{}' in tab '{}' (id: {}). \nURL: {}\n\n\
829 Note: In a full implementation, this would use CDP DOM APIs\n\
830 to find the element and extract its text content.",
831 selector, tab_title, tab_id, tab_url
832 ),
833 is_error: None,
834 was_persisted: None,
835 })
836 }
837
838 async fn wait_for(
840 &self,
841 input: &serde_json::Value,
842 _context: &ToolContext,
843 ) -> Result<ToolResult, AgentError> {
844 let selector = input.get("wait_for_selector").and_then(|v| v.as_str());
845 let timeout_ms = input["timeout_ms"].as_u64().unwrap_or(30000);
846
847 let state = self.state.lock().await;
848 if !state.is_running {
849 return Ok(ToolResult {
850 result_type: "text".to_string(),
851 tool_use_id: "".to_string(),
852 content: "Browser is not running. Use start_browser first.".to_string(),
853 is_error: Some(true),
854 was_persisted: None,
855 });
856 }
857
858 let active_tab_info = state
859 .tabs
860 .iter()
861 .find(|t| t.is_active)
862 .map(|t| (t.id.clone(), t.title.clone(), t.url.clone()));
863
864 drop(state);
865
866 let (tab_id, tab_title, tab_url) = active_tab_info.ok_or_else(|| {
867 AgentError::Tool("No active tab. Create a tab and navigate first.".to_string())
868 })?;
869
870 let wait_description = match selector {
871 Some(s) => format!("for selector '{}'", s),
872 None => format!("for {}ms", timeout_ms),
873 };
874
875 Ok(ToolResult {
876 result_type: "text".to_string(),
877 tool_use_id: "".to_string(),
878 content: format!(
879 "Waited {} in tab '{}' (id: {}). \nURL: {}\n\n\
880 Note: In a full implementation, this would use CDP DOM APIs\n\
881 to wait for the element to appear or a timeout to elapse.",
882 wait_description, tab_title, tab_id, tab_url
883 ),
884 is_error: None,
885 was_persisted: None,
886 })
887 }
888
889 async fn detect_chrome_path(&self) -> Result<String, AgentError> {
891 let browser_candidates = [
894 "google-chrome",
895 "google-chrome-stable",
896 "chromium-browser",
897 "chromium",
898 "chrome",
899 "/usr/bin/google-chrome",
900 "/usr/bin/chromium-browser",
901 "/usr/bin/chromium",
902 ];
903
904 for browser in &browser_candidates {
905 if self.is_executable_available(browser).await {
906 return Ok(browser.to_string());
907 }
908 }
909
910 Err(AgentError::Tool(
911 "No chromium-based browser found. Install google-chrome or chromium-browser."
912 .to_string(),
913 ))
914 }
915
916 async fn is_executable_available(&self, cmd: &str) -> bool {
918 let result = tokio::process::Command::new("which")
919 .arg(cmd)
920 .stdout(Stdio::null())
921 .stderr(Stdio::null())
922 .status()
923 .await;
924
925 match result {
926 Ok(status) => status.success(),
927 Err(_) => false,
928 }
929 }
930}
931
932impl Default for WebBrowserTool {
933 fn default() -> Self {
934 Self::new()
935 }
936}
937
938#[cfg(test)]
939mod tests {
940 use super::*;
941
942 #[test]
943 fn test_web_browser_tool_name() {
944 let tool = WebBrowserTool::new();
945 assert_eq!(tool.name(), WEB_BROWSER_TOOL_NAME);
946 }
947
948 #[test]
949 fn test_web_browser_tool_schema_has_action() {
950 let tool = WebBrowserTool::new();
951 let schema = tool.input_schema();
952 assert!(schema.properties.get("action").is_some());
953 assert!(schema.properties.get("url").is_some());
954 assert!(schema.properties.get("script").is_some());
955 assert!(schema.properties.get("selector").is_some());
956 assert!(schema.properties.get("tab_id").is_some());
957 }
958
959 #[test]
960 fn test_web_browser_tool_schema_required_has_action() {
961 let tool = WebBrowserTool::new();
962 let schema = tool.input_schema();
963 let required = schema.required.unwrap();
964 assert!(required.contains(&"action".to_string()));
965 }
966
967 #[tokio::test]
968 async fn test_web_browser_requires_action() {
969 let tool = WebBrowserTool::new();
970 let input = serde_json::json!({});
971 let context = ToolContext::default();
972 let result = tool.execute(input, &context).await;
973 assert!(result.is_err());
974 let err_msg = result.unwrap_err().to_string();
975 assert!(err_msg.contains("action is required"));
976 }
977
978 #[tokio::test]
979 async fn test_web_browser_unknown_action() {
980 let tool = WebBrowserTool::new();
981 let input = serde_json::json!({
982 "action": "unknown_action"
983 });
984 let context = ToolContext::default();
985 let result = tool.execute(input, &context).await;
986 assert!(result.is_ok());
987 let content = result.unwrap().content;
988 assert!(content.contains("Unknown action"));
989 }
990
991 #[tokio::test]
992 async fn test_web_browser_stop_without_start() {
993 let tool = WebBrowserTool::new();
994 let input = serde_json::json!({
995 "action": "stop_browser"
996 });
997 let context = ToolContext::default();
998 let result = tool.execute(input, &context).await;
999 assert!(result.is_ok());
1000 let content = result.unwrap().content;
1001 assert!(content.contains("not running"));
1002 }
1003
1004 #[tokio::test]
1005 async fn test_web_browser_navigate_requires_url() {
1006 let tool = WebBrowserTool::new();
1007 let input = serde_json::json!({
1009 "action": "navigate"
1010 });
1011 let context = ToolContext::default();
1012 let result = tool.execute(input, &context).await;
1013 assert!(result.is_err());
1015 let err_msg = result.unwrap_err().to_string();
1016 assert!(err_msg.contains("url is required"));
1017 }
1018
1019 #[tokio::test]
1020 async fn test_web_browser_evaluate_requires_script() {
1021 let tool = WebBrowserTool::new();
1022 let input = serde_json::json!({
1023 "action": "evaluate"
1024 });
1025 let context = ToolContext::default();
1026 let result = tool.execute(input, &context).await;
1027 assert!(result.is_err());
1028 let err_msg = result.unwrap_err().to_string();
1029 assert!(err_msg.contains("script is required"));
1030 }
1031
1032 #[tokio::test]
1033 async fn test_web_browser_click_requires_selector() {
1034 let tool = WebBrowserTool::new();
1035 let input = serde_json::json!({
1036 "action": "click"
1037 });
1038 let context = ToolContext::default();
1039 let result = tool.execute(input, &context).await;
1040 assert!(result.is_err());
1041 let err_msg = result.unwrap_err().to_string();
1042 assert!(err_msg.contains("selector is required"));
1043 }
1044
1045 #[tokio::test]
1046 async fn test_web_browser_fill_requires_selector_and_text() {
1047 let tool = WebBrowserTool::new();
1048 let input = serde_json::json!({
1049 "action": "fill",
1050 "selector": "#input"
1051 });
1052 let context = ToolContext::default();
1053 let result = tool.execute(input, &context).await;
1054 assert!(result.is_err());
1055 let err_msg = result.unwrap_err().to_string();
1056 assert!(err_msg.contains("text is required"));
1057 }
1058}