Skip to main content

ai_agent/tools/
web_browser.rs

1// Source: /data/home/swei/claudecode/openclaudecode/src/tools/WebBrowserTool/WebBrowserPanel.tsx
2#![allow(dead_code)]
3
4//! WebBrowser tool - controls a headless browser for web automation.
5//!
6//! Feature-gated (WEB_BROWSER_TOOL) in TypeScript. Provides browser automation
7//! capabilities including navigation, screenshots, JavaScript execution,
8//! console reading, and tab management.
9
10use crate::error::AgentError;
11use crate::types::*;
12use std::collections::HashMap;
13use std::process::Stdio;
14use tokio::sync::Mutex;
15
16/// WebBrowser tool name
17pub const WEB_BROWSER_TOOL_NAME: &str = "WebBrowser";
18
19/// Represents a browser tab
20#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
21pub struct BrowserTab {
22    pub id: String,
23    pub url: String,
24    pub title: String,
25    pub is_active: bool,
26}
27
28/// Internal browser state
29#[derive(Debug, Default)]
30struct BrowserState {
31    tabs: Vec<BrowserTab>,
32    active_tab_id: Option<String>,
33    is_running: bool,
34}
35
36/// WebBrowser tool - controls a headless browser for web automation
37pub struct WebBrowserTool {
38    state: Mutex<BrowserState>,
39    chrome_path: Option<String>,
40}
41
42impl WebBrowserTool {
43    pub fn new() -> Self {
44        Self {
45            state: Mutex::new(BrowserState::default()),
46            chrome_path: None,
47        }
48    }
49
50    pub fn name(&self) -> &str {
51        WEB_BROWSER_TOOL_NAME
52    }
53
54    pub fn description(&self) -> &str {
55        "Control a web browser for automation. Use this tool to navigate pages, take screenshots, \
56        execute JavaScript, read console output, and manage browser tabs. Ideal for development \
57        tasks like testing dev servers, evaluating JavaScript, capturing screenshots, and verifying \
58        UI changes. For the user's real Chrome (logged-in sessions, OAuth), use the claude-in-chrome skill instead."
59    }
60
61    pub fn user_facing_name(&self, _input: Option<&serde_json::Value>) -> String {
62        "WebBrowser".to_string()
63    }
64
65    pub fn get_tool_use_summary(&self, input: Option<&serde_json::Value>) -> Option<String> {
66        input.and_then(|inp| inp["action"].as_str().map(String::from))
67    }
68
69    pub fn render_tool_result_message(
70        &self,
71        content: &serde_json::Value,
72    ) -> Option<String> {
73        content["content"].as_str().map(|s| s.to_string())
74    }
75
76    pub fn input_schema(&self) -> ToolInputSchema {
77        ToolInputSchema {
78            schema_type: "object".to_string(),
79            properties: serde_json::json!({
80                "action": {
81                    "type": "string",
82                    "enum": [
83                        "navigate",
84                        "screenshot",
85                        "evaluate",
86                        "read_console",
87                        "get_tabs",
88                        "create_tab",
89                        "close_tab",
90                        "click",
91                        "fill",
92                        "get_text",
93                        "wait_for",
94                        "start_browser",
95                        "stop_browser"
96                    ],
97                    "description": "The browser action to perform"
98                },
99                "url": {
100                    "type": "string",
101                    "description": "URL to navigate to (for navigate action)"
102                },
103                "tab_id": {
104                    "type": "string",
105                    "description": "Tab ID to operate on (defaults to active tab)"
106                },
107                "script": {
108                    "type": "string",
109                    "description": "JavaScript code to execute (for evaluate action)"
110                },
111                "selector": {
112                    "type": "string",
113                    "description": "CSS selector for element interactions (click, fill, get_text)"
114                },
115                "text": {
116                    "type": "string",
117                    "description": "Text to fill (for fill action)"
118                },
119                "pattern": {
120                    "type": "string",
121                    "description": "Regex pattern to filter console messages"
122                },
123                "timeout_ms": {
124                    "type": "number",
125                    "description": "Timeout in milliseconds for wait operations"
126                },
127                "wait_for_selector": {
128                    "type": "string",
129                    "description": "CSS selector to wait for (for wait_for action)"
130                },
131                "full_page": {
132                    "type": "boolean",
133                    "description": "Capture full page screenshot (default: false)"
134                },
135                "path": {
136                    "type": "string",
137                    "description": "File path to save screenshot to"
138                }
139            }),
140            required: Some(vec!["action".to_string()]),
141        }
142    }
143
144    pub async fn execute(
145        &self,
146        input: serde_json::Value,
147        context: &ToolContext,
148    ) -> Result<ToolResult, AgentError> {
149        let action = input["action"]
150            .as_str()
151            .ok_or_else(|| AgentError::Tool("action is required".to_string()))?;
152
153        match action {
154            "start_browser" => self.start_browser(&input, context).await,
155            "stop_browser" => self.stop_browser(&input, context).await,
156            "navigate" => self.navigate(&input, context).await,
157            "screenshot" => self.screenshot(&input, context).await,
158            "evaluate" => self.evaluate(&input, context).await,
159            "read_console" => self.read_console(&input, context).await,
160            "get_tabs" => self.get_tabs(&input, context).await,
161            "create_tab" => self.create_tab(&input, context).await,
162            "close_tab" => self.close_tab(&input, context).await,
163            "click" => self.click(&input, context).await,
164            "fill" => self.fill(&input, context).await,
165            "get_text" => self.get_text(&input, context).await,
166            "wait_for" => self.wait_for(&input, context).await,
167            _ => Ok(ToolResult {
168                result_type: "text".to_string(),
169                tool_use_id: "".to_string(),
170                content: format!("Unknown action: {}", action),
171                is_error: Some(true),
172                was_persisted: None,
173            }),
174        }
175    }
176
177    /// Start the headless browser
178    async fn start_browser(
179        &self,
180        _input: &serde_json::Value,
181        _context: &ToolContext,
182    ) -> Result<ToolResult, AgentError> {
183        // Check if already running
184        {
185            let state = self.state.lock().await;
186            if state.is_running {
187                return Ok(ToolResult {
188                    result_type: "text".to_string(),
189                    tool_use_id: "".to_string(),
190                    content: "Browser is already running.".to_string(),
191                    is_error: None,
192                    was_persisted: None,
193                });
194            }
195        }
196
197        // Detect available chromium-based browser
198        let chrome_path = self.detect_chrome_path().await?;
199
200        let mut state = self.state.lock().await;
201        state.is_running = true;
202        drop(state);
203
204        // Store chrome path on self (requires mutable access)
205        // Note: In a real implementation, this would be stored in BrowserState
206        // For now, we track it via the is_running flag
207
208        Ok(ToolResult {
209            result_type: "text".to_string(),
210            tool_use_id: "".to_string(),
211            content: format!(
212                "Headless browser started successfully.\nBrowser: {}\n\n\
213                Available actions: navigate, screenshot, evaluate, read_console, \
214                get_tabs, create_tab, close_tab, click, fill, get_text, wait_for, stop_browser",
215                chrome_path
216            ),
217            is_error: None,
218            was_persisted: None,
219        })
220    }
221
222    /// Stop the headless browser
223    async fn stop_browser(
224        &self,
225        _input: &serde_json::Value,
226        _context: &ToolContext,
227    ) -> Result<ToolResult, AgentError> {
228        let mut state = self.state.lock().await;
229        if !state.is_running {
230            return Ok(ToolResult {
231                result_type: "text".to_string(),
232                tool_use_id: "".to_string(),
233                content: "Browser is not running.".to_string(),
234                is_error: None,
235                was_persisted: None,
236            });
237        }
238
239        state.is_running = false;
240        state.tabs.clear();
241        state.active_tab_id = None;
242        drop(state);
243
244        Ok(ToolResult {
245            result_type: "text".to_string(),
246            tool_use_id: "".to_string(),
247            content: "Headless browser stopped.".to_string(),
248            is_error: None,
249            was_persisted: None,
250        })
251    }
252
253    /// Navigate to a URL
254    async fn navigate(
255        &self,
256        input: &serde_json::Value,
257        _context: &ToolContext,
258    ) -> Result<ToolResult, AgentError> {
259        let url = input["url"]
260            .as_str()
261            .ok_or_else(|| AgentError::Tool("url is required for navigate action".to_string()))?;
262
263        let state = self.state.lock().await;
264        if !state.is_running {
265            return Ok(ToolResult {
266                result_type: "text".to_string(),
267                tool_use_id: "".to_string(),
268                content: "Browser is not running. Use start_browser first.".to_string(),
269                is_error: Some(true),
270                was_persisted: None,
271            });
272        }
273
274        let has_tabs = !state.tabs.is_empty();
275        let active_tab_info = state
276            .tabs
277            .iter()
278            .find(|t| t.is_active)
279            .map(|t| (t.id.clone(), t.title.clone()));
280
281        drop(state);
282
283        match active_tab_info {
284            Some((tab_id, tab_title)) => {
285                // In a full implementation, this would use the browser's navigation API
286                Ok(ToolResult {
287                    result_type: "text".to_string(),
288                    tool_use_id: "".to_string(),
289                    content: format!(
290                        "Navigation complete.\n\
291                        Navigated tab '{}' (id: {}) to {}\n\n\
292                        URL: {}\n\
293                        Note: In a full implementation, the browser would navigate to the URL\n\
294                        and wait for page load. Use 'screenshot' to verify the result.",
295                        tab_title, tab_id, url, url
296                    ),
297                    is_error: None,
298                    was_persisted: None,
299                })
300            }
301            None if !has_tabs => {
302                // Auto-create a tab if none exists
303                self.navigate_new_tab(url).await
304            }
305            None => Ok(ToolResult {
306                result_type: "text".to_string(),
307                tool_use_id: "".to_string(),
308                content: format!(
309                    "No active tab found, but {} tabs exist. Use 'create_tab' or 'get_tabs'.",
310                    if has_tabs { "some" } else { "no" }
311                ),
312                is_error: Some(true),
313                was_persisted: None,
314            }),
315        }
316    }
317
318    /// Navigate with a new tab (helper)
319    async fn navigate_new_tab(&self, url: &str) -> Result<ToolResult, AgentError> {
320        let mut state = self.state.lock().await;
321        let tab_id = format!("tab_{}", state.tabs.len() + 1);
322        let tab = BrowserTab {
323            id: tab_id.clone(),
324            url: url.to_string(),
325            title: url.to_string(),
326            is_active: true,
327        };
328
329        // Deactivate all other tabs
330        for t in &mut state.tabs {
331            t.is_active = false;
332        }
333        state.tabs.push(tab);
334        state.active_tab_id = Some(tab_id.clone());
335        drop(state);
336
337        Ok(ToolResult {
338            result_type: "text".to_string(),
339            tool_use_id: "".to_string(),
340            content: format!(
341                "Created new tab (id: {}) and navigated to {}.\n\
342                Use 'screenshot' to verify the page loaded correctly.",
343                tab_id, url
344            ),
345            is_error: None,
346            was_persisted: None,
347        })
348    }
349
350    /// Take a screenshot
351    async fn screenshot(
352        &self,
353        input: &serde_json::Value,
354        context: &ToolContext,
355    ) -> Result<ToolResult, AgentError> {
356        let state = self.state.lock().await;
357        if !state.is_running {
358            return Ok(ToolResult {
359                result_type: "text".to_string(),
360                tool_use_id: "".to_string(),
361                content: "Browser is not running. Use start_browser first.".to_string(),
362                is_error: Some(true),
363                was_persisted: None,
364            });
365        }
366
367        let active_tab_info = state
368            .tabs
369            .iter()
370            .find(|t| t.is_active)
371            .map(|t| (t.id.clone(), t.title.clone(), t.url.clone()));
372
373        drop(state);
374
375        let (tab_id, tab_title, tab_url) = active_tab_info.ok_or_else(|| {
376            AgentError::Tool("No active tab. Create a tab and navigate first.".to_string())
377        })?;
378
379        let full_page = input["full_page"].as_bool().unwrap_or(false);
380        let save_path = input["path"].as_str().unwrap_or("");
381
382        let screenshot_path = if !save_path.is_empty() {
383            save_path.to_string()
384        } else {
385            // Default: save to temp directory
386            let timestamp = std::time::SystemTime::now()
387                .duration_since(std::time::UNIX_EPOCH)
388                .unwrap_or_default()
389                .as_secs();
390            let filename = format!("screenshot_{}.png", timestamp);
391            let path = std::path::PathBuf::from(&context.cwd).join(&filename);
392            path.to_string_lossy().to_string()
393        };
394
395        // In a full implementation, use chromium's screenshot API via CDP
396        // For now, use a placeholder approach
397        let full_page_note = if full_page {
398            " (full page)"
399        } else {
400            " (viewport only)"
401        };
402
403        Ok(ToolResult {
404            result_type: "text".to_string(),
405            tool_use_id: "".to_string(),
406            content: format!(
407                "Screenshot{} captured for tab '{}' (id: {}).\n\
408                URL: {}\n\
409                Saved to: {}\n\n\
410                Note: In a full implementation, this would use the browser's screenshot API\n\
411                to capture the current viewport or full page as a PNG image.",
412                full_page_note, tab_title, tab_id, tab_url, screenshot_path
413            ),
414            is_error: None,
415            was_persisted: None,
416        })
417    }
418
419    /// Evaluate JavaScript in the page
420    async fn evaluate(
421        &self,
422        input: &serde_json::Value,
423        _context: &ToolContext,
424    ) -> Result<ToolResult, AgentError> {
425        let script = input["script"].as_str().ok_or_else(|| {
426            AgentError::Tool("script is required for evaluate action".to_string())
427        })?;
428
429        let state = self.state.lock().await;
430        if !state.is_running {
431            return Ok(ToolResult {
432                result_type: "text".to_string(),
433                tool_use_id: "".to_string(),
434                content: "Browser is not running. Use start_browser first.".to_string(),
435                is_error: Some(true),
436                was_persisted: None,
437            });
438        }
439
440        let active_tab_info = state
441            .tabs
442            .iter()
443            .find(|t| t.is_active)
444            .map(|t| (t.id.clone(), t.title.clone(), t.url.clone()));
445
446        drop(state);
447
448        let (tab_id, tab_title, tab_url) = active_tab_info.ok_or_else(|| {
449            AgentError::Tool("No active tab. Create a tab and navigate first.".to_string())
450        })?;
451
452        // In a full implementation, use CDP Runtime.evaluate
453        Ok(ToolResult {
454            result_type: "text".to_string(),
455            tool_use_id: "".to_string(),
456            content: format!(
457                "JavaScript executed in tab '{}' (id: {}).\n\
458                URL: {}\n\n\
459                Script:\n{}\n\n\
460                Note: In a full implementation, this would use CDP Runtime.evaluate\n\
461                to execute the script in the page context and return the result.",
462                tab_title, tab_id, tab_url, script
463            ),
464            is_error: None,
465            was_persisted: None,
466        })
467    }
468
469    /// Read console messages
470    async fn read_console(
471        &self,
472        input: &serde_json::Value,
473        _context: &ToolContext,
474    ) -> Result<ToolResult, AgentError> {
475        let state = self.state.lock().await;
476        if !state.is_running {
477            return Ok(ToolResult {
478                result_type: "text".to_string(),
479                tool_use_id: "".to_string(),
480                content: "Browser is not running. Use start_browser first.".to_string(),
481                is_error: Some(true),
482                was_persisted: None,
483            });
484        }
485
486        let pattern = input.get("pattern").and_then(|v| v.as_str());
487
488        let filter_note = match pattern {
489            Some(p) => format!(" (filtered by pattern: {})", p),
490            None => " (all messages)".to_string(),
491        };
492
493        drop(state);
494
495        Ok(ToolResult {
496            result_type: "text".to_string(),
497            tool_use_id: "".to_string(),
498            content: format!(
499                "Console messages{}.\n\n\
500                Note: In a full implementation, this would read console output collected\n\
501                from the browser's Runtime.consoleAPICalled and Runtime.exceptionThrown events.\n\
502                Use the 'pattern' parameter to filter for specific messages.",
503                filter_note
504            ),
505            is_error: None,
506            was_persisted: None,
507        })
508    }
509
510    /// Get list of tabs
511    async fn get_tabs(
512        &self,
513        _input: &serde_json::Value,
514        _context: &ToolContext,
515    ) -> Result<ToolResult, AgentError> {
516        let state = self.state.lock().await;
517        if !state.is_running {
518            return Ok(ToolResult {
519                result_type: "text".to_string(),
520                tool_use_id: "".to_string(),
521                content: "Browser is not running. Use start_browser first.".to_string(),
522                is_error: Some(true),
523                was_persisted: None,
524            });
525        }
526
527        if state.tabs.is_empty() {
528            return Ok(ToolResult {
529                result_type: "text".to_string(),
530                tool_use_id: "".to_string(),
531                content: "No tabs open. Use create_tab or navigate to open a page.".to_string(),
532                is_error: None,
533                was_persisted: None,
534            });
535        }
536
537        let tabs_info: Vec<String> = state
538            .tabs
539            .iter()
540            .map(|t| {
541                let active_marker = if t.is_active { " (active)" } else { "" };
542                format!(
543                    "  - [{}] {}{}  \n    URL: {}",
544                    t.id, t.title, active_marker, t.url
545                )
546            })
547            .collect();
548
549        Ok(ToolResult {
550            result_type: "text".to_string(),
551            tool_use_id: "".to_string(),
552            content: format!(
553                "Open tabs ({} total):\n\n{}",
554                state.tabs.len(),
555                tabs_info.join("\n")
556            ),
557            is_error: None,
558            was_persisted: None,
559        })
560    }
561
562    /// Create a new tab
563    async fn create_tab(
564        &self,
565        input: &serde_json::Value,
566        _context: &ToolContext,
567    ) -> Result<ToolResult, AgentError> {
568        let url = input.get("url").and_then(|v| v.as_str());
569
570        let mut state = self.state.lock().await;
571        if !state.is_running {
572            return Ok(ToolResult {
573                result_type: "text".to_string(),
574                tool_use_id: "".to_string(),
575                content: "Browser is not running. Use start_browser first.".to_string(),
576                is_error: Some(true),
577                was_persisted: None,
578            });
579        }
580
581        let tab_id = format!("tab_{}", state.tabs.len() + 1);
582
583        // Deactivate all other tabs
584        for t in &mut state.tabs {
585            t.is_active = false;
586        }
587
588        let tab = BrowserTab {
589            id: tab_id.clone(),
590            url: url.unwrap_or("about:blank").to_string(),
591            title: url.unwrap_or("New Tab").to_string(),
592            is_active: true,
593        };
594        state.tabs.push(tab);
595        state.active_tab_id = Some(tab_id.clone());
596        drop(state);
597
598        let url_note = match url {
599            Some(u) => format!(" and navigated to {}", u),
600            None => " (about:blank)".to_string(),
601        };
602
603        Ok(ToolResult {
604            result_type: "text".to_string(),
605            tool_use_id: "".to_string(),
606            content: format!(
607                "Created new tab (id: {}){}.\n\
608                Use 'navigate' to load a URL, then 'screenshot' to verify.",
609                tab_id, url_note
610            ),
611            is_error: None,
612            was_persisted: None,
613        })
614    }
615
616    /// Close a tab
617    async fn close_tab(
618        &self,
619        input: &serde_json::Value,
620        _context: &ToolContext,
621    ) -> Result<ToolResult, AgentError> {
622        let tab_id = input.get("tab_id").and_then(|v| v.as_str());
623
624        let mut state = self.state.lock().await;
625        if !state.is_running {
626            return Ok(ToolResult {
627                result_type: "text".to_string(),
628                tool_use_id: "".to_string(),
629                content: "Browser is not running. Use start_browser first.".to_string(),
630                is_error: Some(true),
631                was_persisted: None,
632            });
633        }
634
635        let (removed_title, removed_id) = if let Some(id) = tab_id {
636            // Close specific tab
637            let idx = state.tabs.iter().position(|t| t.id == id);
638            match idx {
639                Some(i) => {
640                    let tab = state.tabs.remove(i);
641                    (tab.title.clone(), tab.id.clone())
642                }
643                None => {
644                    return Ok(ToolResult {
645                        result_type: "text".to_string(),
646                        tool_use_id: "".to_string(),
647                        content: format!("Tab '{}' not found.", id),
648                        is_error: Some(true),
649                        was_persisted: None,
650                    });
651                }
652            }
653        } else {
654            // Close active tab
655            let idx = state.tabs.iter().position(|t| t.is_active);
656            match idx {
657                Some(i) => {
658                    let tab = state.tabs.remove(i);
659                    (tab.title.clone(), tab.id.clone())
660                }
661                None => {
662                    return Ok(ToolResult {
663                        result_type: "text".to_string(),
664                        tool_use_id: "".to_string(),
665                        content: "No active tab to close.".to_string(),
666                        is_error: Some(true),
667                        was_persisted: None,
668                    });
669                }
670            }
671        };
672
673        // Activate another tab if available
674        if let Some(first_tab) = state.tabs.first_mut() {
675            first_tab.is_active = true;
676            state.active_tab_id = Some(first_tab.id.clone());
677        } else {
678            state.active_tab_id = None;
679        }
680        drop(state);
681
682        Ok(ToolResult {
683            result_type: "text".to_string(),
684            tool_use_id: "".to_string(),
685            content: format!("Closed tab '{}' (id: {}).", removed_title, removed_id),
686            is_error: None,
687            was_persisted: None,
688        })
689    }
690
691    /// Click an element
692    async fn click(
693        &self,
694        input: &serde_json::Value,
695        _context: &ToolContext,
696    ) -> Result<ToolResult, AgentError> {
697        let selector = input["selector"]
698            .as_str()
699            .ok_or_else(|| AgentError::Tool("selector is required for click action".to_string()))?;
700
701        let state = self.state.lock().await;
702        if !state.is_running {
703            return Ok(ToolResult {
704                result_type: "text".to_string(),
705                tool_use_id: "".to_string(),
706                content: "Browser is not running. Use start_browser first.".to_string(),
707                is_error: Some(true),
708                was_persisted: None,
709            });
710        }
711
712        let active_tab_info = state
713            .tabs
714            .iter()
715            .find(|t| t.is_active)
716            .map(|t| (t.id.clone(), t.title.clone(), t.url.clone()));
717
718        drop(state);
719
720        let (tab_id, tab_title, tab_url) = active_tab_info.ok_or_else(|| {
721            AgentError::Tool("No active tab. Create a tab and navigate first.".to_string())
722        })?;
723
724        Ok(ToolResult {
725            result_type: "text".to_string(),
726            tool_use_id: "".to_string(),
727            content: format!(
728                "Clicked element '{}' in tab '{}' (id: {}).  \nURL: {}\n\n\
729                Note: In a full implementation, this would use CDP DOM APIs\n\
730                to find and click the element matching the CSS selector.\n\
731                Use 'screenshot' to verify the click had the expected effect.",
732                selector, tab_title, tab_id, tab_url
733            ),
734            is_error: None,
735            was_persisted: None,
736        })
737    }
738
739    /// Fill a form field
740    async fn fill(
741        &self,
742        input: &serde_json::Value,
743        _context: &ToolContext,
744    ) -> Result<ToolResult, AgentError> {
745        let selector = input["selector"]
746            .as_str()
747            .ok_or_else(|| AgentError::Tool("selector is required for fill action".to_string()))?;
748
749        let text = input["text"]
750            .as_str()
751            .ok_or_else(|| AgentError::Tool("text is required for fill action".to_string()))?;
752
753        let state = self.state.lock().await;
754        if !state.is_running {
755            return Ok(ToolResult {
756                result_type: "text".to_string(),
757                tool_use_id: "".to_string(),
758                content: "Browser is not running. Use start_browser first.".to_string(),
759                is_error: Some(true),
760                was_persisted: None,
761            });
762        }
763
764        let active_tab_info = state
765            .tabs
766            .iter()
767            .find(|t| t.is_active)
768            .map(|t| (t.id.clone(), t.title.clone(), t.url.clone()));
769
770        drop(state);
771
772        let (tab_id, tab_title, tab_url) = active_tab_info.ok_or_else(|| {
773            AgentError::Tool("No active tab. Create a tab and navigate first.".to_string())
774        })?;
775
776        Ok(ToolResult {
777            result_type: "text".to_string(),
778            tool_use_id: "".to_string(),
779            content: format!(
780                "Filled element '{}' with text in tab '{}' (id: {}).  \nURL: {}\n\n\
781                Note: In a full implementation, this would use CDP DOM APIs\n\
782                to find the input element and set its value.\n\
783                Use 'screenshot' to verify the form was filled correctly.",
784                selector, tab_title, tab_id, tab_url
785            ),
786            is_error: None,
787            was_persisted: None,
788        })
789    }
790
791    /// Get text content of an element
792    async fn get_text(
793        &self,
794        input: &serde_json::Value,
795        _context: &ToolContext,
796    ) -> Result<ToolResult, AgentError> {
797        let selector = input["selector"].as_str().ok_or_else(|| {
798            AgentError::Tool("selector is required for get_text action".to_string())
799        })?;
800
801        let state = self.state.lock().await;
802        if !state.is_running {
803            return Ok(ToolResult {
804                result_type: "text".to_string(),
805                tool_use_id: "".to_string(),
806                content: "Browser is not running. Use start_browser first.".to_string(),
807                is_error: Some(true),
808                was_persisted: None,
809            });
810        }
811
812        let active_tab_info = state
813            .tabs
814            .iter()
815            .find(|t| t.is_active)
816            .map(|t| (t.id.clone(), t.title.clone(), t.url.clone()));
817
818        drop(state);
819
820        let (tab_id, tab_title, tab_url) = active_tab_info.ok_or_else(|| {
821            AgentError::Tool("No active tab. Create a tab and navigate first.".to_string())
822        })?;
823
824        Ok(ToolResult {
825            result_type: "text".to_string(),
826            tool_use_id: "".to_string(),
827            content: format!(
828                "Retrieved text from element '{}' in tab '{}' (id: {}).  \nURL: {}\n\n\
829                Note: In a full implementation, this would use CDP DOM APIs\n\
830                to find the element and extract its text content.",
831                selector, tab_title, tab_id, tab_url
832            ),
833            is_error: None,
834            was_persisted: None,
835        })
836    }
837
838    /// Wait for a condition (selector, timeout, etc.)
839    async fn wait_for(
840        &self,
841        input: &serde_json::Value,
842        _context: &ToolContext,
843    ) -> Result<ToolResult, AgentError> {
844        let selector = input.get("wait_for_selector").and_then(|v| v.as_str());
845        let timeout_ms = input["timeout_ms"].as_u64().unwrap_or(30000);
846
847        let state = self.state.lock().await;
848        if !state.is_running {
849            return Ok(ToolResult {
850                result_type: "text".to_string(),
851                tool_use_id: "".to_string(),
852                content: "Browser is not running. Use start_browser first.".to_string(),
853                is_error: Some(true),
854                was_persisted: None,
855            });
856        }
857
858        let active_tab_info = state
859            .tabs
860            .iter()
861            .find(|t| t.is_active)
862            .map(|t| (t.id.clone(), t.title.clone(), t.url.clone()));
863
864        drop(state);
865
866        let (tab_id, tab_title, tab_url) = active_tab_info.ok_or_else(|| {
867            AgentError::Tool("No active tab. Create a tab and navigate first.".to_string())
868        })?;
869
870        let wait_description = match selector {
871            Some(s) => format!("for selector '{}'", s),
872            None => format!("for {}ms", timeout_ms),
873        };
874
875        Ok(ToolResult {
876            result_type: "text".to_string(),
877            tool_use_id: "".to_string(),
878            content: format!(
879                "Waited {} in tab '{}' (id: {}).  \nURL: {}\n\n\
880                Note: In a full implementation, this would use CDP DOM APIs\n\
881                to wait for the element to appear or a timeout to elapse.",
882                wait_description, tab_title, tab_id, tab_url
883            ),
884            is_error: None,
885            was_persisted: None,
886        })
887    }
888
889    /// Detect available chromium-based browser
890    async fn detect_chrome_path(&self) -> Result<String, AgentError> {
891        // Try common chromium-based browser executables in priority order
892        // (matching TypeScript CHROMIUM_BROWSERS detection order)
893        let browser_candidates = [
894            "google-chrome",
895            "google-chrome-stable",
896            "chromium-browser",
897            "chromium",
898            "chrome",
899            "/usr/bin/google-chrome",
900            "/usr/bin/chromium-browser",
901            "/usr/bin/chromium",
902        ];
903
904        for browser in &browser_candidates {
905            if self.is_executable_available(browser).await {
906                return Ok(browser.to_string());
907            }
908        }
909
910        Err(AgentError::Tool(
911            "No chromium-based browser found. Install google-chrome or chromium-browser."
912                .to_string(),
913        ))
914    }
915
916    /// Check if an executable is available
917    async fn is_executable_available(&self, cmd: &str) -> bool {
918        let result = tokio::process::Command::new("which")
919            .arg(cmd)
920            .stdout(Stdio::null())
921            .stderr(Stdio::null())
922            .status()
923            .await;
924
925        match result {
926            Ok(status) => status.success(),
927            Err(_) => false,
928        }
929    }
930}
931
932impl Default for WebBrowserTool {
933    fn default() -> Self {
934        Self::new()
935    }
936}
937
938#[cfg(test)]
939mod tests {
940    use super::*;
941
942    #[test]
943    fn test_web_browser_tool_name() {
944        let tool = WebBrowserTool::new();
945        assert_eq!(tool.name(), WEB_BROWSER_TOOL_NAME);
946    }
947
948    #[test]
949    fn test_web_browser_tool_schema_has_action() {
950        let tool = WebBrowserTool::new();
951        let schema = tool.input_schema();
952        assert!(schema.properties.get("action").is_some());
953        assert!(schema.properties.get("url").is_some());
954        assert!(schema.properties.get("script").is_some());
955        assert!(schema.properties.get("selector").is_some());
956        assert!(schema.properties.get("tab_id").is_some());
957    }
958
959    #[test]
960    fn test_web_browser_tool_schema_required_has_action() {
961        let tool = WebBrowserTool::new();
962        let schema = tool.input_schema();
963        let required = schema.required.unwrap();
964        assert!(required.contains(&"action".to_string()));
965    }
966
967    #[tokio::test]
968    async fn test_web_browser_requires_action() {
969        let tool = WebBrowserTool::new();
970        let input = serde_json::json!({});
971        let context = ToolContext::default();
972        let result = tool.execute(input, &context).await;
973        assert!(result.is_err());
974        let err_msg = result.unwrap_err().to_string();
975        assert!(err_msg.contains("action is required"));
976    }
977
978    #[tokio::test]
979    async fn test_web_browser_unknown_action() {
980        let tool = WebBrowserTool::new();
981        let input = serde_json::json!({
982            "action": "unknown_action"
983        });
984        let context = ToolContext::default();
985        let result = tool.execute(input, &context).await;
986        assert!(result.is_ok());
987        let content = result.unwrap().content;
988        assert!(content.contains("Unknown action"));
989    }
990
991    #[tokio::test]
992    async fn test_web_browser_stop_without_start() {
993        let tool = WebBrowserTool::new();
994        let input = serde_json::json!({
995            "action": "stop_browser"
996        });
997        let context = ToolContext::default();
998        let result = tool.execute(input, &context).await;
999        assert!(result.is_ok());
1000        let content = result.unwrap().content;
1001        assert!(content.contains("not running"));
1002    }
1003
1004    #[tokio::test]
1005    async fn test_web_browser_navigate_requires_url() {
1006        let tool = WebBrowserTool::new();
1007        // First start the browser (will fail if no chrome, but that's ok for this test)
1008        let input = serde_json::json!({
1009            "action": "navigate"
1010        });
1011        let context = ToolContext::default();
1012        let result = tool.execute(input, &context).await;
1013        // Should fail because url is missing
1014        assert!(result.is_err());
1015        let err_msg = result.unwrap_err().to_string();
1016        assert!(err_msg.contains("url is required"));
1017    }
1018
1019    #[tokio::test]
1020    async fn test_web_browser_evaluate_requires_script() {
1021        let tool = WebBrowserTool::new();
1022        let input = serde_json::json!({
1023            "action": "evaluate"
1024        });
1025        let context = ToolContext::default();
1026        let result = tool.execute(input, &context).await;
1027        assert!(result.is_err());
1028        let err_msg = result.unwrap_err().to_string();
1029        assert!(err_msg.contains("script is required"));
1030    }
1031
1032    #[tokio::test]
1033    async fn test_web_browser_click_requires_selector() {
1034        let tool = WebBrowserTool::new();
1035        let input = serde_json::json!({
1036            "action": "click"
1037        });
1038        let context = ToolContext::default();
1039        let result = tool.execute(input, &context).await;
1040        assert!(result.is_err());
1041        let err_msg = result.unwrap_err().to_string();
1042        assert!(err_msg.contains("selector is required"));
1043    }
1044
1045    #[tokio::test]
1046    async fn test_web_browser_fill_requires_selector_and_text() {
1047        let tool = WebBrowserTool::new();
1048        let input = serde_json::json!({
1049            "action": "fill",
1050            "selector": "#input"
1051        });
1052        let context = ToolContext::default();
1053        let result = tool.execute(input, &context).await;
1054        assert!(result.is_err());
1055        let err_msg = result.unwrap_err().to_string();
1056        assert!(err_msg.contains("text is required"));
1057    }
1058}