1use std::sync::Arc;
7
8use async_trait::async_trait;
9use car_engine::ToolExecutor;
10use car_ir::ToolSchema;
11use serde_json::{json, Value};
12use tokio::sync::RwLock;
13
14use crate::backend::BrowserBackend;
15use crate::models::{Modifier, WaitCondition};
16use crate::perception::pipeline::PerceptionPipeline;
17use crate::perception::ui_map::UiMap;
18
19pub struct BrowserToolExecutor {
24 backend: Arc<dyn BrowserBackend>,
25 pipeline: Arc<dyn PerceptionPipeline>,
26 last_ui_map: Arc<RwLock<Option<UiMap>>>,
28}
29
30impl BrowserToolExecutor {
31 pub fn new(
33 backend: Arc<dyn BrowserBackend>,
34 pipeline: Arc<dyn PerceptionPipeline>,
35 ) -> Self {
36 Self {
37 backend,
38 pipeline,
39 last_ui_map: Arc::new(RwLock::new(None)),
40 }
41 }
42
43 async fn resolve_element_id(&self, element_id: &str) -> String {
47 let guard = self.last_ui_map.read().await;
48 if let Some(ui_map) = guard.as_ref() {
49 if let Some(element) = ui_map.get_element(element_id) {
50 if let Some(ref ax_ref) = element.ax_ref {
51 return ax_ref.clone();
52 }
53 }
54 }
55 element_id.to_string()
57 }
58
59 pub fn tool_schemas() -> Vec<ToolSchema> {
61 vec![
62 ToolSchema {
63 name: "browse_navigate".to_string(),
64 description: "Navigate the browser to a URL".to_string(),
65 parameters: json!({
66 "type": "object",
67 "properties": {
68 "url": { "type": "string", "description": "URL to navigate to" }
69 },
70 "required": ["url"]
71 }),
72 returns: Some(json!({"type": "object", "properties": {"url": {"type": "string"}}})),
73 idempotent: false,
74 cache_ttl_secs: None,
75 rate_limit: None,
76 },
77 ToolSchema {
78 name: "browse_click".to_string(),
79 description: "Click on a UI element by accessibility node ID".to_string(),
80 parameters: json!({
81 "type": "object",
82 "properties": {
83 "element_id": { "type": "string", "description": "Accessibility node ID (e.g. 'el_5')" }
84 },
85 "required": ["element_id"]
86 }),
87 returns: Some(json!({"type": "object"})),
88 idempotent: false,
89 cache_ttl_secs: None,
90 rate_limit: None,
91 },
92 ToolSchema {
93 name: "browse_type".to_string(),
94 description: "Type text into a UI element by accessibility node ID".to_string(),
95 parameters: json!({
96 "type": "object",
97 "properties": {
98 "element_id": { "type": "string", "description": "Accessibility node ID of a text field" },
99 "text": { "type": "string", "description": "Text to enter" }
100 },
101 "required": ["element_id", "text"]
102 }),
103 returns: Some(json!({"type": "object"})),
104 idempotent: false,
105 cache_ttl_secs: None,
106 rate_limit: None,
107 },
108 ToolSchema {
109 name: "browse_scroll".to_string(),
110 description: "Scroll the browser page".to_string(),
111 parameters: json!({
112 "type": "object",
113 "properties": {
114 "delta_y": { "type": "integer", "description": "Scroll amount (positive = down, negative = up)" }
115 },
116 "required": ["delta_y"]
117 }),
118 returns: Some(json!({"type": "object"})),
119 idempotent: false,
120 cache_ttl_secs: None,
121 rate_limit: None,
122 },
123 ToolSchema {
124 name: "browse_keypress".to_string(),
125 description: "Press a key with optional modifiers".to_string(),
126 parameters: json!({
127 "type": "object",
128 "properties": {
129 "key": { "type": "string", "description": "Key to press (e.g. 'Enter', 'a', 'Tab')" },
130 "modifiers": {
131 "type": "array",
132 "items": { "type": "string", "enum": ["shift", "control", "alt", "meta"] },
133 "description": "Optional modifier keys to hold during keypress"
134 }
135 },
136 "required": ["key"]
137 }),
138 returns: Some(json!({"type": "object", "properties": {"key": {"type": "string"}, "status": {"type": "string"}}})),
139 idempotent: false,
140 cache_ttl_secs: None,
141 rate_limit: None,
142 },
143 ToolSchema {
144 name: "browse_wait".to_string(),
145 description: "Wait for a browser condition to be met".to_string(),
146 parameters: json!({
147 "type": "object",
148 "properties": {
149 "condition": { "type": "string", "description": "Condition to wait for: 'page_loaded' or 'url_changed'" },
150 "timeout_ms": { "type": "number", "description": "Timeout in milliseconds (default: 5000)" }
151 },
152 "required": ["condition"]
153 }),
154 returns: Some(json!({"type": "object", "properties": {"condition": {"type": "string"}, "met": {"type": "boolean"}}})),
155 idempotent: true,
156 cache_ttl_secs: None,
157 rate_limit: None,
158 },
159 ToolSchema {
160 name: "browse_observe".to_string(),
161 description: "Observe the current browser state: take screenshot, extract accessibility tree, produce UiMap".to_string(),
162 parameters: json!({
163 "type": "object",
164 "properties": {
165 "include_screenshot": { "type": "boolean", "description": "Include base64 screenshot inline (default: false, returns file path instead)" }
166 }
167 }),
168 returns: Some(json!({
169 "type": "object",
170 "properties": {
171 "url": {"type": "string"},
172 "title": {"type": "string"},
173 "ui_map": {"type": "string"},
174 "screenshot_path": {"type": "string"},
175 "screenshot_base64": {"type": "string", "description": "Only present if include_screenshot=true"}
176 }
177 })),
178 idempotent: true,
179 cache_ttl_secs: None,
180 rate_limit: None,
181 },
182 ]
183 }
184
185 async fn handle_navigate(&self, params: &Value) -> Result<Value, String> {
186 let url = params
187 .get("url")
188 .and_then(|v| v.as_str())
189 .ok_or("Missing required parameter: url")?;
190 self.backend
191 .navigate(url)
192 .await
193 .map_err(|e| e.to_string())?;
194 Ok(json!({"url": url, "status": "navigated"}))
195 }
196
197 async fn handle_click(&self, params: &Value) -> Result<Value, String> {
198 let element_id = params
199 .get("element_id")
200 .and_then(|v| v.as_str())
201 .ok_or("Missing required parameter: element_id")?;
202 let resolved_id = self.resolve_element_id(element_id).await;
203 self.backend
204 .click_element(&resolved_id)
205 .await
206 .map_err(|e| e.to_string())?;
207 Ok(json!({"element_id": element_id, "resolved_id": resolved_id, "status": "clicked"}))
208 }
209
210 async fn handle_type(&self, params: &Value) -> Result<Value, String> {
211 let element_id = params
212 .get("element_id")
213 .and_then(|v| v.as_str())
214 .ok_or("Missing required parameter: element_id")?;
215 let text = params
216 .get("text")
217 .and_then(|v| v.as_str())
218 .ok_or("Missing required parameter: text")?;
219 let resolved_id = self.resolve_element_id(element_id).await;
220 self.backend
221 .type_into_element(&resolved_id, text)
222 .await
223 .map_err(|e| e.to_string())?;
224 Ok(json!({"element_id": element_id, "resolved_id": resolved_id, "text": text, "status": "typed"}))
225 }
226
227 async fn handle_scroll(&self, params: &Value) -> Result<Value, String> {
228 let delta_y = params
229 .get("delta_y")
230 .and_then(|v| v.as_i64())
231 .ok_or("Missing required parameter: delta_y")? as i32;
232 self.backend
233 .inject_scroll(delta_y)
234 .await
235 .map_err(|e| e.to_string())?;
236 Ok(json!({"delta_y": delta_y, "status": "scrolled"}))
237 }
238
239 async fn handle_keypress(&self, params: &Value) -> Result<Value, String> {
240 let key = params
241 .get("key")
242 .and_then(|v| v.as_str())
243 .ok_or("Missing required parameter: key")?;
244 let modifiers: Vec<Modifier> = params
245 .get("modifiers")
246 .and_then(|v| v.as_array())
247 .map(|arr| {
248 arr.iter()
249 .filter_map(|m| match m.as_str()? {
250 "shift" => Some(Modifier::Shift),
251 "control" => Some(Modifier::Control),
252 "alt" => Some(Modifier::Alt),
253 "meta" => Some(Modifier::Meta),
254 _ => None,
255 })
256 .collect()
257 })
258 .unwrap_or_default();
259 self.backend
260 .inject_keypress(key, &modifiers)
261 .await
262 .map_err(|e| e.to_string())?;
263 Ok(json!({"key": key, "status": "pressed"}))
264 }
265
266 async fn handle_wait(&self, params: &Value) -> Result<Value, String> {
267 let condition_str = params
268 .get("condition")
269 .and_then(|v| v.as_str())
270 .ok_or("Missing required parameter: condition")?;
271 let timeout_ms = params
272 .get("timeout_ms")
273 .and_then(|v| v.as_u64())
274 .unwrap_or(5000);
275 let condition = match condition_str {
281 "page_loaded" => WaitCondition::PageLoaded,
282 "url_changed" => WaitCondition::UrlChanged,
283 s if s.starts_with("a11y_contains_text:") => WaitCondition::A11yContainsText {
284 text: s["a11y_contains_text:".len()..].to_string(),
285 },
286 s if s.starts_with("element_with_name:") => {
287 let rest = &s["element_with_name:".len()..];
288 let (name_contains, role) = match rest.split_once('@') {
289 Some((n, r)) => (n.to_string(), Some(r.to_string())),
290 None => (rest.to_string(), None),
291 };
292 WaitCondition::ElementWithName {
293 name_contains,
294 role,
295 }
296 }
297 other => return Err(format!("Unknown wait condition: {other}")),
298 };
299 let met = self
300 .backend
301 .wait_until(&condition, timeout_ms)
302 .await
303 .map_err(|e| e.to_string())?;
304 Ok(json!({"condition": condition_str, "met": met}))
305 }
306
307 async fn handle_observe(&self, _params: &Value) -> Result<Value, String> {
308 let screenshot = self
309 .backend
310 .capture_screenshot()
311 .await
312 .map_err(|e| e.to_string())?;
313 let a11y_nodes = self
314 .backend
315 .get_accessibility_tree()
316 .await
317 .map_err(|e| e.to_string())?;
318 let url = self.backend.get_current_url().map_err(|e| e.to_string())?;
319 let title = self
320 .backend
321 .get_page_title()
322 .await
323 .map_err(|e| e.to_string())?;
324 let viewport = self.backend.get_viewport().map_err(|e| e.to_string())?;
325
326 let ui_map = self
327 .pipeline
328 .perceive(&screenshot, &a11y_nodes, &url, viewport)
329 .await
330 .map_err(|e| e.to_string())?;
331
332 {
334 let mut guard = self.last_ui_map.write().await;
335 *guard = Some(ui_map.clone());
336 }
337
338 let ui_map_text = ui_map.format_summary();
339
340 let screenshot_path = {
342 let dir = std::env::temp_dir().join("car-browser-screenshots");
343 let _ = std::fs::create_dir_all(&dir);
344 let path = dir.join(format!("{}.png", uuid::Uuid::new_v4()));
345 std::fs::write(&path, &screenshot).map_err(|e| e.to_string())?;
346 path.to_string_lossy().to_string()
347 };
348
349 let result = json!({
352 "url": url,
353 "title": title,
354 "ui_map": ui_map_text,
355 "screenshot_path": screenshot_path,
356 "element_count": ui_map.elements.len(),
357 "viewport": {
358 "width": viewport.width,
359 "height": viewport.height,
360 }
361 });
362
363 Ok(result)
364 }
365}
366
367#[async_trait]
368impl ToolExecutor for BrowserToolExecutor {
369 async fn execute(&self, tool: &str, params: &Value) -> Result<Value, String> {
370 match tool {
371 "browse_navigate" => self.handle_navigate(params).await,
372 "browse_click" => self.handle_click(params).await,
373 "browse_type" => self.handle_type(params).await,
374 "browse_scroll" => self.handle_scroll(params).await,
375 "browse_keypress" => self.handle_keypress(params).await,
376 "browse_wait" => self.handle_wait(params).await,
377 "browse_observe" => self.handle_observe(params).await,
378 _ => Err(format!("Unknown browser tool: {tool}")),
379 }
380 }
381}