use std::sync::Arc;
use async_trait::async_trait;
use car_engine::ToolExecutor;
use car_ir::ToolSchema;
use serde_json::{json, Value};
use tokio::sync::RwLock;
use crate::backend::BrowserBackend;
use crate::perception::pipeline::PerceptionPipeline;
use crate::perception::ui_map::UiMap;
pub struct BrowserToolExecutor {
backend: Arc<dyn BrowserBackend>,
pipeline: Arc<dyn PerceptionPipeline>,
last_ui_map: Arc<RwLock<Option<UiMap>>>,
}
impl BrowserToolExecutor {
pub fn new(
backend: Arc<dyn BrowserBackend>,
pipeline: Arc<dyn PerceptionPipeline>,
) -> Self {
Self {
backend,
pipeline,
last_ui_map: Arc::new(RwLock::new(None)),
}
}
async fn resolve_element_id(&self, element_id: &str) -> String {
let guard = self.last_ui_map.read().await;
if let Some(ui_map) = guard.as_ref() {
if let Some(element) = ui_map.get_element(element_id) {
if let Some(ref ax_ref) = element.ax_ref {
return ax_ref.clone();
}
}
}
element_id.to_string()
}
pub fn tool_schemas() -> Vec<ToolSchema> {
vec![
ToolSchema {
name: "browse_navigate".to_string(),
description: "Navigate the browser to a URL".to_string(),
parameters: json!({
"type": "object",
"properties": {
"url": { "type": "string", "description": "URL to navigate to" }
},
"required": ["url"]
}),
returns: Some(json!({"type": "object", "properties": {"url": {"type": "string"}}})),
idempotent: false,
cache_ttl_secs: None,
rate_limit: None,
},
ToolSchema {
name: "browse_click".to_string(),
description: "Click on a UI element by accessibility node ID".to_string(),
parameters: json!({
"type": "object",
"properties": {
"element_id": { "type": "string", "description": "Accessibility node ID (e.g. 'el_5')" }
},
"required": ["element_id"]
}),
returns: Some(json!({"type": "object"})),
idempotent: false,
cache_ttl_secs: None,
rate_limit: None,
},
ToolSchema {
name: "browse_type".to_string(),
description: "Type text into a UI element by accessibility node ID".to_string(),
parameters: json!({
"type": "object",
"properties": {
"element_id": { "type": "string", "description": "Accessibility node ID of a text field" },
"text": { "type": "string", "description": "Text to enter" }
},
"required": ["element_id", "text"]
}),
returns: Some(json!({"type": "object"})),
idempotent: false,
cache_ttl_secs: None,
rate_limit: None,
},
ToolSchema {
name: "browse_scroll".to_string(),
description: "Scroll the browser page".to_string(),
parameters: json!({
"type": "object",
"properties": {
"delta_y": { "type": "integer", "description": "Scroll amount (positive = down, negative = up)" }
},
"required": ["delta_y"]
}),
returns: Some(json!({"type": "object"})),
idempotent: false,
cache_ttl_secs: None,
rate_limit: None,
},
ToolSchema {
name: "browse_observe".to_string(),
description: "Observe the current browser state: take screenshot, extract accessibility tree, produce UiMap".to_string(),
parameters: json!({
"type": "object",
"properties": {}
}),
returns: Some(json!({
"type": "object",
"properties": {
"url": {"type": "string"},
"title": {"type": "string"},
"ui_map": {"type": "string"},
"screenshot_base64": {"type": "string"}
}
})),
idempotent: true,
cache_ttl_secs: None,
rate_limit: None,
},
]
}
async fn handle_navigate(&self, params: &Value) -> Result<Value, String> {
let url = params
.get("url")
.and_then(|v| v.as_str())
.ok_or("Missing required parameter: url")?;
self.backend
.navigate(url)
.await
.map_err(|e| e.to_string())?;
Ok(json!({"url": url, "status": "navigated"}))
}
async fn handle_click(&self, params: &Value) -> Result<Value, String> {
let element_id = params
.get("element_id")
.and_then(|v| v.as_str())
.ok_or("Missing required parameter: element_id")?;
let resolved_id = self.resolve_element_id(element_id).await;
self.backend
.click_element(&resolved_id)
.await
.map_err(|e| e.to_string())?;
Ok(json!({"element_id": element_id, "resolved_id": resolved_id, "status": "clicked"}))
}
async fn handle_type(&self, params: &Value) -> Result<Value, String> {
let element_id = params
.get("element_id")
.and_then(|v| v.as_str())
.ok_or("Missing required parameter: element_id")?;
let text = params
.get("text")
.and_then(|v| v.as_str())
.ok_or("Missing required parameter: text")?;
let resolved_id = self.resolve_element_id(element_id).await;
self.backend
.type_into_element(&resolved_id, text)
.await
.map_err(|e| e.to_string())?;
Ok(json!({"element_id": element_id, "resolved_id": resolved_id, "text": text, "status": "typed"}))
}
async fn handle_scroll(&self, params: &Value) -> Result<Value, String> {
let delta_y = params
.get("delta_y")
.and_then(|v| v.as_i64())
.ok_or("Missing required parameter: delta_y")? as i32;
self.backend
.inject_scroll(delta_y)
.await
.map_err(|e| e.to_string())?;
Ok(json!({"delta_y": delta_y, "status": "scrolled"}))
}
async fn handle_observe(&self, _params: &Value) -> Result<Value, String> {
let screenshot = self
.backend
.capture_screenshot()
.await
.map_err(|e| e.to_string())?;
let a11y_nodes = self
.backend
.get_accessibility_tree()
.await
.map_err(|e| e.to_string())?;
let url = self.backend.get_current_url().map_err(|e| e.to_string())?;
let title = self
.backend
.get_page_title()
.await
.map_err(|e| e.to_string())?;
let viewport = self.backend.get_viewport().map_err(|e| e.to_string())?;
let ui_map = self
.pipeline
.perceive(&screenshot, &a11y_nodes, &url, viewport)
.await
.map_err(|e| e.to_string())?;
{
let mut guard = self.last_ui_map.write().await;
*guard = Some(ui_map.clone());
}
let screenshot_b64 = base64::Engine::encode(
&base64::engine::general_purpose::STANDARD,
&screenshot,
);
let ui_map_text = ui_map.format_compact();
Ok(json!({
"url": url,
"title": title,
"ui_map": ui_map_text,
"screenshot_base64": screenshot_b64,
"element_count": ui_map.elements.len(),
"viewport": {
"width": viewport.width,
"height": viewport.height,
}
}))
}
}
#[async_trait]
impl ToolExecutor for BrowserToolExecutor {
async fn execute(&self, tool: &str, params: &Value) -> Result<Value, String> {
match tool {
"browse_navigate" => self.handle_navigate(params).await,
"browse_click" => self.handle_click(params).await,
"browse_type" => self.handle_type(params).await,
"browse_scroll" => self.handle_scroll(params).await,
"browse_observe" => self.handle_observe(params).await,
_ => Err(format!("Unknown browser tool: {tool}")),
}
}
}