1use std::sync::Arc;
7
8use async_trait::async_trait;
9use car_engine::ToolExecutor;
10use car_ir::ToolSchema;
11use serde_json::{json, Value};
12use tokio::sync::RwLock;
13
14use crate::backend::BrowserBackend;
15use crate::perception::pipeline::PerceptionPipeline;
16use crate::perception::ui_map::UiMap;
17
18pub struct BrowserToolExecutor {
23 backend: Arc<dyn BrowserBackend>,
24 pipeline: Arc<dyn PerceptionPipeline>,
25 last_ui_map: Arc<RwLock<Option<UiMap>>>,
27}
28
29impl BrowserToolExecutor {
30 pub fn new(
32 backend: Arc<dyn BrowserBackend>,
33 pipeline: Arc<dyn PerceptionPipeline>,
34 ) -> Self {
35 Self {
36 backend,
37 pipeline,
38 last_ui_map: Arc::new(RwLock::new(None)),
39 }
40 }
41
42 async fn resolve_element_id(&self, element_id: &str) -> String {
46 let guard = self.last_ui_map.read().await;
47 if let Some(ui_map) = guard.as_ref() {
48 if let Some(element) = ui_map.get_element(element_id) {
49 if let Some(ref ax_ref) = element.ax_ref {
50 return ax_ref.clone();
51 }
52 }
53 }
54 element_id.to_string()
56 }
57
58 pub fn tool_schemas() -> Vec<ToolSchema> {
60 vec![
61 ToolSchema {
62 name: "browse_navigate".to_string(),
63 description: "Navigate the browser to a URL".to_string(),
64 parameters: json!({
65 "type": "object",
66 "properties": {
67 "url": { "type": "string", "description": "URL to navigate to" }
68 },
69 "required": ["url"]
70 }),
71 returns: Some(json!({"type": "object", "properties": {"url": {"type": "string"}}})),
72 idempotent: false,
73 cache_ttl_secs: None,
74 rate_limit: None,
75 },
76 ToolSchema {
77 name: "browse_click".to_string(),
78 description: "Click on a UI element by accessibility node ID".to_string(),
79 parameters: json!({
80 "type": "object",
81 "properties": {
82 "element_id": { "type": "string", "description": "Accessibility node ID (e.g. 'el_5')" }
83 },
84 "required": ["element_id"]
85 }),
86 returns: Some(json!({"type": "object"})),
87 idempotent: false,
88 cache_ttl_secs: None,
89 rate_limit: None,
90 },
91 ToolSchema {
92 name: "browse_type".to_string(),
93 description: "Type text into a UI element by accessibility node ID".to_string(),
94 parameters: json!({
95 "type": "object",
96 "properties": {
97 "element_id": { "type": "string", "description": "Accessibility node ID of a text field" },
98 "text": { "type": "string", "description": "Text to enter" }
99 },
100 "required": ["element_id", "text"]
101 }),
102 returns: Some(json!({"type": "object"})),
103 idempotent: false,
104 cache_ttl_secs: None,
105 rate_limit: None,
106 },
107 ToolSchema {
108 name: "browse_scroll".to_string(),
109 description: "Scroll the browser page".to_string(),
110 parameters: json!({
111 "type": "object",
112 "properties": {
113 "delta_y": { "type": "integer", "description": "Scroll amount (positive = down, negative = up)" }
114 },
115 "required": ["delta_y"]
116 }),
117 returns: Some(json!({"type": "object"})),
118 idempotent: false,
119 cache_ttl_secs: None,
120 rate_limit: None,
121 },
122 ToolSchema {
123 name: "browse_observe".to_string(),
124 description: "Observe the current browser state: take screenshot, extract accessibility tree, produce UiMap".to_string(),
125 parameters: json!({
126 "type": "object",
127 "properties": {}
128 }),
129 returns: Some(json!({
130 "type": "object",
131 "properties": {
132 "url": {"type": "string"},
133 "title": {"type": "string"},
134 "ui_map": {"type": "string"},
135 "screenshot_base64": {"type": "string"}
136 }
137 })),
138 idempotent: true,
139 cache_ttl_secs: None,
140 rate_limit: None,
141 },
142 ]
143 }
144
145 async fn handle_navigate(&self, params: &Value) -> Result<Value, String> {
146 let url = params
147 .get("url")
148 .and_then(|v| v.as_str())
149 .ok_or("Missing required parameter: url")?;
150 self.backend
151 .navigate(url)
152 .await
153 .map_err(|e| e.to_string())?;
154 Ok(json!({"url": url, "status": "navigated"}))
155 }
156
157 async fn handle_click(&self, params: &Value) -> Result<Value, String> {
158 let element_id = params
159 .get("element_id")
160 .and_then(|v| v.as_str())
161 .ok_or("Missing required parameter: element_id")?;
162 let resolved_id = self.resolve_element_id(element_id).await;
163 self.backend
164 .click_element(&resolved_id)
165 .await
166 .map_err(|e| e.to_string())?;
167 Ok(json!({"element_id": element_id, "resolved_id": resolved_id, "status": "clicked"}))
168 }
169
170 async fn handle_type(&self, params: &Value) -> Result<Value, String> {
171 let element_id = params
172 .get("element_id")
173 .and_then(|v| v.as_str())
174 .ok_or("Missing required parameter: element_id")?;
175 let text = params
176 .get("text")
177 .and_then(|v| v.as_str())
178 .ok_or("Missing required parameter: text")?;
179 let resolved_id = self.resolve_element_id(element_id).await;
180 self.backend
181 .type_into_element(&resolved_id, text)
182 .await
183 .map_err(|e| e.to_string())?;
184 Ok(json!({"element_id": element_id, "resolved_id": resolved_id, "text": text, "status": "typed"}))
185 }
186
187 async fn handle_scroll(&self, params: &Value) -> Result<Value, String> {
188 let delta_y = params
189 .get("delta_y")
190 .and_then(|v| v.as_i64())
191 .ok_or("Missing required parameter: delta_y")? as i32;
192 self.backend
193 .inject_scroll(delta_y)
194 .await
195 .map_err(|e| e.to_string())?;
196 Ok(json!({"delta_y": delta_y, "status": "scrolled"}))
197 }
198
199 async fn handle_observe(&self, _params: &Value) -> Result<Value, String> {
200 let screenshot = self
201 .backend
202 .capture_screenshot()
203 .await
204 .map_err(|e| e.to_string())?;
205 let a11y_nodes = self
206 .backend
207 .get_accessibility_tree()
208 .await
209 .map_err(|e| e.to_string())?;
210 let url = self.backend.get_current_url().map_err(|e| e.to_string())?;
211 let title = self
212 .backend
213 .get_page_title()
214 .await
215 .map_err(|e| e.to_string())?;
216 let viewport = self.backend.get_viewport().map_err(|e| e.to_string())?;
217
218 let ui_map = self
219 .pipeline
220 .perceive(&screenshot, &a11y_nodes, &url, viewport)
221 .await
222 .map_err(|e| e.to_string())?;
223
224 {
226 let mut guard = self.last_ui_map.write().await;
227 *guard = Some(ui_map.clone());
228 }
229
230 let screenshot_b64 = base64::Engine::encode(
231 &base64::engine::general_purpose::STANDARD,
232 &screenshot,
233 );
234
235 let ui_map_text = ui_map.format_compact();
236
237 Ok(json!({
238 "url": url,
239 "title": title,
240 "ui_map": ui_map_text,
241 "screenshot_base64": screenshot_b64,
242 "element_count": ui_map.elements.len(),
243 "viewport": {
244 "width": viewport.width,
245 "height": viewport.height,
246 }
247 }))
248 }
249}
250
251#[async_trait]
252impl ToolExecutor for BrowserToolExecutor {
253 async fn execute(&self, tool: &str, params: &Value) -> Result<Value, String> {
254 match tool {
255 "browse_navigate" => self.handle_navigate(params).await,
256 "browse_click" => self.handle_click(params).await,
257 "browse_type" => self.handle_type(params).await,
258 "browse_scroll" => self.handle_scroll(params).await,
259 "browse_observe" => self.handle_observe(params).await,
260 _ => Err(format!("Unknown browser tool: {tool}")),
261 }
262 }
263}