1use std::sync::Arc;
12
13use async_trait::async_trait;
14use oxi_sdk::{AgentTool, AgentToolResult, ToolContext};
15use serde_json::{json, Value};
16use tokio::sync::{oneshot, Mutex};
17
18pub struct BrowserTool {
22 browser: Arc<oxibrowser_core::Browser>,
23 tab: Arc<Mutex<Option<oxibrowser_core::Tab>>>,
24}
25
26impl BrowserTool {
27 pub fn new(browser: Arc<oxibrowser_core::Browser>) -> Self {
29 Self {
30 browser,
31 tab: Arc::new(Mutex::new(None)),
32 }
33 }
34
35 #[cfg(feature = "browser")]
37 pub fn from_kernel(kernel: &crate::kernel_handle::KernelHandle) -> Self {
38 Self::new(kernel.browser.browser().clone())
39 }
40
41 async fn get_or_create_tab(&self) -> anyhow::Result<oxibrowser_core::Tab> {
43 let mut guard = self.tab.lock().await;
44 let needs_new = match guard.as_ref() {
45 None => true,
46 Some(t) => t.is_closed(),
47 };
48 if needs_new {
49 let tab = self.browser.new_tab().await?;
50 *guard = Some(tab.clone());
51 }
52 Ok(guard.as_ref().unwrap().clone())
53 }
54}
55
56impl std::fmt::Debug for BrowserTool {
57 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
58 f.debug_struct("BrowserTool").finish()
59 }
60}
61
62#[async_trait]
63impl AgentTool for BrowserTool {
64 fn name(&self) -> &str {
65 "browser"
66 }
67
68 fn label(&self) -> &str {
69 "Browser"
70 }
71
72 fn description(&self) -> &'static str {
73 "Browse the web using a headless browser. Actions: browse(url), goto(url), back(), forward(), reload(), post(url, body, content_type), click(selector), type(selector, text), press_key(key), evaluate(js), evaluate_await(js), content(), query_all(selector), wait_for(selector, timeout_ms), load_resources(), screenshot(), run_script(yaml), close()"
74 }
75
76 fn parameters_schema(&self) -> Value {
77 json!({
78 "type": "object",
79 "properties": {
80 "action": {
81 "type": "string",
82 "enum": [
83 "browse",
84 "goto",
85 "back",
86 "forward",
87 "reload",
88 "post",
89 "click",
90 "type",
91 "press_key",
92 "evaluate",
93 "evaluate_await",
94 "content",
95 "query_all",
96 "wait_for",
97 "load_resources",
98 "screenshot",
99 "run_script",
100 "close"
101 ],
102 "description": "Browser action to perform"
103 },
104 "url": {
105 "type": "string",
106 "description": "URL (browse, goto, post actions)"
107 },
108 "selector": {
109 "type": "string",
110 "description": "CSS selector (click, type, query_all, wait_for actions)"
111 },
112 "text": {
113 "type": "string",
114 "description": "Text to type (type action)"
115 },
116 "key": {
117 "type": "string",
118 "description": "Key to press (press_key action, e.g. 'Enter', 'Tab')"
119 },
120 "javascript": {
121 "type": "string",
122 "description": "JavaScript code (evaluate, evaluate_await actions)"
123 },
124 "body": {
125 "type": "string",
126 "description": "Request body (post action)"
127 },
128 "content_type": {
129 "type": "string",
130 "description": "Content-Type header (post action)"
131 },
132 "timeout_ms": {
133 "type": "integer",
134 "description": "Timeout in milliseconds (wait_for action)"
135 },
136 "width": {
137 "type": "integer",
138 "description": "Viewport width for screenshot (default 1280)"
139 },
140 "script": {
141 "type": "string",
142 "description": "YAML script for run_script action. Supports: goto, click, fill, type, wait, evaluate, extract, screenshot, if, retry, set, echo, sleep, and more."
143 }
144 },
145 "required": ["action"]
146 })
147 }
148
149 async fn execute(
150 &self,
151 _tool_call_id: &str,
152 params: Value,
153 _signal: Option<oneshot::Receiver<()>>,
154 _ctx: &ToolContext,
155 ) -> Result<AgentToolResult, String> {
156 let action = params
157 .get("action")
158 .and_then(|v| v.as_str())
159 .ok_or_else(|| "Missing required parameter: action".to_string())?;
160
161 match action {
162 "browse" => {
164 let url = param_str(¶ms, "url", "browse requires 'url'")?;
165 match self.browser.browse(url).await {
166 Ok(r) => Ok(AgentToolResult::success(format_browse(&r))),
167 Err(e) => Ok(AgentToolResult::error(format!("Browse failed: {}", e))),
168 }
169 }
170
171 "goto" => {
173 let url = param_str(¶ms, "url", "goto requires 'url'")?;
174 let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
175 match tab.goto(url).await {
176 Ok(r) => Ok(AgentToolResult::success(format_browse(&r))),
177 Err(e) => Ok(AgentToolResult::error(format!("Navigation failed: {}", e))),
178 }
179 }
180 "back" => {
181 let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
182 match tab.back().await {
183 Ok(r) => Ok(AgentToolResult::success(format_browse(&r))),
184 Err(e) => Ok(AgentToolResult::error(format!("Back failed: {}", e))),
185 }
186 }
187 "forward" => {
188 let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
189 match tab.forward().await {
190 Ok(r) => Ok(AgentToolResult::success(format_browse(&r))),
191 Err(e) => Ok(AgentToolResult::error(format!("Forward failed: {}", e))),
192 }
193 }
194 "reload" => {
195 let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
196 match tab.reload().await {
197 Ok(r) => Ok(AgentToolResult::success(format_browse(&r))),
198 Err(e) => Ok(AgentToolResult::error(format!("Reload failed: {}", e))),
199 }
200 }
201 "post" => {
202 let url = param_str(¶ms, "url", "post requires 'url'")?;
203 let body = param_str(¶ms, "body", "post requires 'body'")?;
204 let ct = params
205 .get("content_type")
206 .and_then(|v| v.as_str())
207 .unwrap_or("application/json");
208 let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
209 match tab.post(url, body, ct).await {
210 Ok(r) => Ok(AgentToolResult::success(format_browse(&r))),
211 Err(e) => Ok(AgentToolResult::error(format!("POST failed: {}", e))),
212 }
213 }
214
215 "click" => {
217 let selector = param_str(¶ms, "selector", "click requires 'selector'")?;
218 let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
219 match tab.click(selector).await {
220 Ok(()) => Ok(AgentToolResult::success(format!("Clicked '{}'", selector))),
221 Err(e) => Ok(AgentToolResult::error(format!("Click failed: {}", e))),
222 }
223 }
224 "type" => {
225 let selector = param_str(¶ms, "selector", "type requires 'selector'")?;
226 let text = param_str(¶ms, "text", "type requires 'text'")?;
227 let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
228 match tab.r#type(selector, text).await {
229 Ok(()) => Ok(AgentToolResult::success(format!(
230 "Typed {} chars into '{}'",
231 text.len(),
232 selector
233 ))),
234 Err(e) => Ok(AgentToolResult::error(format!("Type failed: {}", e))),
235 }
236 }
237 "press_key" => {
238 let key = param_str(¶ms, "key", "press_key requires 'key'")?;
239 let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
240 match tab.press_key(key).await {
241 Ok(()) => Ok(AgentToolResult::success(format!("Pressed '{}'", key))),
242 Err(e) => Ok(AgentToolResult::error(format!("Press key failed: {}", e))),
243 }
244 }
245
246 "content" => {
248 let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
249 match tab.content().await {
250 Ok(r) => Ok(AgentToolResult::success(format_browse(&r))),
251 Err(e) => Ok(AgentToolResult::error(format!("Content failed: {}", e))),
252 }
253 }
254 "query_all" => {
255 let selector = param_str(¶ms, "selector", "query_all requires 'selector'")?;
256 let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
257 match tab.query_all(selector).await {
258 Ok(texts) => {
259 let output = if texts.is_empty() {
260 format!("No elements found matching '{}'", selector)
261 } else {
262 texts
263 .iter()
264 .enumerate()
265 .map(|(i, t)| format!("{}. {}", i + 1, t))
266 .collect::<Vec<_>>()
267 .join("\n")
268 };
269 Ok(AgentToolResult::success(output))
270 }
271 Err(e) => Ok(AgentToolResult::error(format!("Query failed: {}", e))),
272 }
273 }
274 "evaluate" => {
275 let js = param_str(¶ms, "javascript", "evaluate requires 'javascript'")?;
276 let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
277 match tab.evaluate(js).await {
278 Ok(value) => {
279 let output =
280 serde_json::to_string_pretty(&value).unwrap_or_else(|_| value.to_string());
281 Ok(AgentToolResult::success(output))
282 }
283 Err(e) => Ok(AgentToolResult::error(format!("JS evaluation failed: {}", e))),
284 }
285 }
286 "evaluate_await" => {
287 let js = param_str(¶ms, "javascript", "evaluate_await requires 'javascript'")?;
288 let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
289 match tab.evaluate_await(js).await {
290 Ok(value) => {
291 let output =
292 serde_json::to_string_pretty(&value).unwrap_or_else(|_| value.to_string());
293 Ok(AgentToolResult::success(output))
294 }
295 Err(e) => Ok(AgentToolResult::error(format!("JS evaluation failed: {}", e))),
296 }
297 }
298
299 "wait_for" => {
301 let selector = param_str(¶ms, "selector", "wait_for requires 'selector'")?;
302 let timeout_ms = params.get("timeout_ms").and_then(|v| v.as_u64()).unwrap_or(30_000);
303 let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
304 match tab.wait_for(selector, timeout_ms).await {
305 Ok(()) => Ok(AgentToolResult::success(format!(
306 "Element '{}' found within {}ms",
307 selector, timeout_ms
308 ))),
309 Err(e) => Ok(AgentToolResult::error(format!("wait_for failed: {}", e))),
310 }
311 }
312
313 "load_resources" => {
315 let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
316 match tab.load_resources().await {
317 Ok(count) => {
318 Ok(AgentToolResult::success(format!("Loaded {} resources", count)))
319 }
320 Err(e) => {
321 Ok(AgentToolResult::error(format!("load_resources failed: {}", e)))
322 }
323 }
324 }
325
326 "screenshot" => {
328 let width = params.get("width").and_then(|v| v.as_u64()).unwrap_or(1280) as u32;
329 let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
330 match tab.screenshot(width).await {
331 Ok(png) => Ok(AgentToolResult::success(format!(
332 "Screenshot: {} bytes (PNG, {}px wide)",
333 png.len(),
334 width
335 ))),
336 Err(e) => Ok(AgentToolResult::error(format!("Screenshot failed: {}", e))),
337 }
338 }
339
340 "run_script" => {
342 let yaml =
343 param_str(¶ms, "script", "run_script requires 'script' (YAML string)")?;
344 let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
345 let mut runner = oxibrowser_core::script::ScriptRunner::new(&tab);
346 match runner.run(yaml).await {
347 Ok(result) => {
348 let output = serde_json::to_string_pretty(&result)
349 .unwrap_or_else(|e| format!("{{\"error\": \"{}\"}}", e));
350 Ok(AgentToolResult::success(output))
351 }
352 Err(e) => Ok(AgentToolResult::error(format!(
353 "Script failed: {}",
354 e
355 ))),
356 }
357 }
358
359 "close" => {
361 let mut guard = self.tab.lock().await;
362 if let Some(t) = guard.take() {
363 let _ = t.close().await;
364 }
365 Ok(AgentToolResult::success("Tab closed"))
366 }
367
368 other => Err(format!(
369 "Unknown browser action '{}'. Valid: browse, goto, back, forward, reload, post, click, type, press_key, evaluate, evaluate_await, content, query_all, wait_for, load_resources, screenshot, run_script, close",
370 other
371 )),
372 }
373 }
374}
375
376fn format_browse(r: &oxibrowser_core::BrowseResult) -> String {
382 let md = &r.markdown;
383 if md.len() > 50_000 {
384 let cut = md.floor_char_boundary(50_000);
386 format!(
387 "URL: {} (status {})\nTitle: {}\n\n{}\n\n... (truncated, {} total chars)",
388 r.url,
389 r.status,
390 r.title,
391 &md[..cut],
392 md.len()
393 )
394 } else if md.is_empty() {
395 format!(
396 "URL: {} (status {})\nTitle: {}\n(no content)",
397 r.url, r.status, r.title
398 )
399 } else {
400 format!(
401 "URL: {} (status {})\nTitle: {}\n\n{}",
402 r.url, r.status, r.title, md
403 )
404 }
405}
406
407fn param_str<'a>(params: &'a Value, key: &str, error_msg: &str) -> Result<&'a str, String> {
409 params
410 .get(key)
411 .and_then(|v| v.as_str())
412 .ok_or_else(|| error_msg.to_string())
413}
414
415#[cfg(test)]
416mod tests {
417 use super::*;
418
419 #[test]
420 fn test_schema_covers_all_actions() {
421 let actions = vec![
422 "browse",
423 "goto",
424 "back",
425 "forward",
426 "reload",
427 "post",
428 "click",
429 "type",
430 "press_key",
431 "evaluate",
432 "evaluate_await",
433 "content",
434 "query_all",
435 "wait_for",
436 "load_resources",
437 "screenshot",
438 "run_script",
439 "close",
440 ];
441 assert!(actions.len() >= 16);
442 assert!(actions.contains(&"browse"));
443 assert!(actions.contains(&"goto"));
444 assert!(actions.contains(&"run_script"));
445 }
446}