use std::borrow::Cow;
use crate::tools::{PlanDecision, Tool, ToolResult, schema_to_tool_params};
use schemars::JsonSchema;
use serde::Deserialize;
use serde_json::Value;
use std::sync::{Arc, atomic::AtomicBool};
#[cfg(feature = "browser_cdp")]
mod cdp;
mod dispatch;
#[cfg(not(feature = "browser_cdp"))]
mod lite;
#[derive(Deserialize, JsonSchema)]
#[allow(dead_code)]
struct BrowserParams {
action: String,
#[serde(default)]
url: Option<String>,
#[serde(default)]
tab_id: Option<String>,
#[serde(default)]
selector: Option<String>,
#[serde(default)]
text: Option<String>,
#[serde(default)]
key: Option<String>,
#[serde(default)]
script: Option<String>,
#[serde(default)]
output_dir: Option<String>,
#[serde(default)]
full_page: Option<bool>,
#[serde(default)]
headless: Option<bool>,
}
#[derive(Debug)]
pub struct BrowserTool;
impl BrowserTool {
pub const NAME: &'static str = "Browser";
}
impl Tool for BrowserTool {
fn name(&self) -> &str {
Self::NAME
}
fn description(&self) -> Cow<'_, str> {
"Browser automation tool for web browsing, interaction, and content extraction. Available actions:\n\
- status: Check browser running status and number of open tabs\n\
- start: Launch a browser instance (use headless param to control window visibility)\n\
- stop: Stop the browser and close all tabs\n\
- tabs: List all open tabs with their IDs and URLs\n\
- open: Open a new tab and navigate to the specified URL (requires url), returns tab_id\n\
- navigate: Navigate an existing tab to a new URL (requires url, optional tab_id)\n\
- screenshot: Capture a page screenshot as PNG (requires output_dir, optional full_page)\n\
- snapshot: Get a page snapshot with title, URL, and interactive element list (buttons, inputs, links, etc.) for understanding page structure\n\
- content: Extract page body text (intelligently removes navbars, scripts, and noise)\n\
- close: Close a specific tab (requires tab_id)\n\
- click: Click a page element (requires selector, CSS selector)\n\
- type: Type text into an input field (requires selector and text, supports Unicode)\n\
- press: Simulate a key press (requires key, e.g. Enter, Tab, Escape)\n\
- evaluate: Execute JavaScript in the page context (requires script)\n\
Typical flow: open a page → use snapshot to discover elements → use the selector field from snapshot (e.g. [data-jref=\"e3\"]) with click/type/press to interact → use content to get results.\
Note: snapshot injects a data-jref attribute on each element and returns the corresponding selector; always use that selector for click/type instead of constructing your own.".into()
}
fn parameters_schema(&self) -> Value {
schema_to_tool_params::<BrowserParams>()
}
fn execute(&self, arguments: &str, _cancelled: &Arc<AtomicBool>) -> ToolResult {
let params: BrowserParams = match serde_json::from_str(arguments) {
Ok(p) => p,
Err(e) => {
return ToolResult {
output: format!("参数解析失败: {}", e),
is_error: true,
images: vec![],
plan_decision: PlanDecision::None,
};
}
};
let args: Value = serde_json::from_str(arguments).unwrap_or_default();
#[cfg(feature = "browser_cdp")]
{
dispatch::exec_browser_cdp(&args, ¶ms.action)
}
#[cfg(not(feature = "browser_cdp"))]
{
dispatch::exec_browser_stub(&args, ¶ms.action)
}
}
fn requires_confirmation(&self) -> bool {
false
}
}