1use std::sync::Arc;
10
11use async_trait::async_trait;
12use oxi_sdk::{AgentTool, AgentToolResult, ToolContext};
13use serde_json::{json, Value};
14use tokio::sync::{oneshot, Mutex, OnceCell};
15
16pub struct BrowserTool {
21 browser: OnceCell<Arc<oxibrowser_core::Browser>>,
23 init: BrowserInit,
25 tab: Arc<Mutex<Option<oxibrowser_core::Tab>>>,
26}
27
28enum BrowserInit {
30 Ready(Arc<oxibrowser_core::Browser>),
32 #[cfg(feature = "browser")]
34 Lazy(std::sync::Arc<crate::kernel_handle::BrowserApi>),
35}
36
37impl BrowserTool {
38 pub fn new(browser: Arc<oxibrowser_core::Browser>) -> Self {
40 let cell = OnceCell::new();
41 Self {
43 browser: cell,
44 init: BrowserInit::Ready(browser),
45 tab: Arc::new(Mutex::new(None)),
46 }
47 }
48
49 #[cfg(feature = "browser")]
54 pub fn from_kernel(kernel: &crate::kernel_handle::KernelHandle) -> Self {
55 Self {
56 browser: OnceCell::new(),
57 init: BrowserInit::Lazy(Arc::new(kernel.browser.clone())),
58 tab: Arc::new(Mutex::new(None)),
59 }
60 }
61
62 async fn get_browser(&self) -> Result<Arc<oxibrowser_core::Browser>, String> {
64 let browser = self
65 .browser
66 .get_or_try_init(|| async {
67 match &self.init {
68 BrowserInit::Ready(b) => Ok::<_, String>(b.clone()),
69 #[cfg(feature = "browser")]
70 BrowserInit::Lazy(api) => api
71 .browser()
72 .await
73 .map(Arc::clone)
74 .map_err(|e| e.to_string()),
75 }
76 })
77 .await?;
78 Ok(browser.clone())
79 }
80
81 async fn get_or_create_tab(&self) -> anyhow::Result<oxibrowser_core::Tab> {
83 let browser = self.get_browser().await.map_err(anyhow::Error::msg)?;
84 let mut guard = self.tab.lock().await;
85 let needs_new = match guard.as_ref() {
86 None => true,
87 Some(t) => t.is_closed(),
88 };
89 if needs_new {
90 let tab = browser.new_tab().await?;
91 *guard = Some(tab.clone());
92 }
93 Ok(guard.as_ref().unwrap().clone())
94 }
95}
96
97impl std::fmt::Debug for BrowserTool {
98 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
99 f.debug_struct("BrowserTool").finish()
100 }
101}
102
103#[async_trait]
104impl AgentTool for BrowserTool {
105 fn name(&self) -> &str {
106 "browser"
107 }
108
109 fn label(&self) -> &str {
110 "Browser"
111 }
112
113 fn description(&self) -> &'static str {
114 "Browse the web using a headless browser. Actions: browse(url), goto(url), back(), forward(), reload(), post(url, body, content_type), click(selector), type(selector, text), press_key(key), evaluate(js), evaluate_await(js), content(), query_all(selector), wait_for(selector, timeout_ms), load_resources(), screenshot(), run_script(yaml), close()"
115 }
116
117 fn parameters_schema(&self) -> Value {
118 json!({
119 "type": "object",
120 "properties": {
121 "action": {
122 "type": "string",
123 "enum": [
124 "browse",
125 "goto",
126 "back",
127 "forward",
128 "reload",
129 "post",
130 "click",
131 "type",
132 "press_key",
133 "evaluate",
134 "evaluate_await",
135 "content",
136 "query_all",
137 "wait_for",
138 "load_resources",
139 "screenshot",
140 "run_script",
141 "close"
142 ],
143 "description": "Browser action to perform"
144 },
145 "url": {
146 "type": "string",
147 "description": "URL (browse, goto, post actions)"
148 },
149 "selector": {
150 "type": "string",
151 "description": "CSS selector (click, type, query_all, wait_for actions)"
152 },
153 "text": {
154 "type": "string",
155 "description": "Text to type (type action)"
156 },
157 "key": {
158 "type": "string",
159 "description": "Key to press (press_key action, e.g. 'Enter', 'Tab')"
160 },
161 "javascript": {
162 "type": "string",
163 "description": "JavaScript code (evaluate, evaluate_await actions)"
164 },
165 "body": {
166 "type": "string",
167 "description": "Request body (post action)"
168 },
169 "content_type": {
170 "type": "string",
171 "description": "Content-Type header (post action)"
172 },
173 "timeout_ms": {
174 "type": "integer",
175 "description": "Timeout in milliseconds (wait_for action)"
176 },
177 "width": {
178 "type": "integer",
179 "description": "Viewport width for screenshot (default 1280)"
180 },
181 "script": {
182 "type": "string",
183 "description": "YAML script for run_script action. Supports: goto, click, fill, type, wait, evaluate, extract, screenshot, if, retry, set, echo, sleep, and more."
184 }
185 },
186 "required": ["action"]
187 })
188 }
189
190 async fn execute(
191 &self,
192 _tool_call_id: &str,
193 params: Value,
194 _signal: Option<oneshot::Receiver<()>>,
195 _ctx: &ToolContext,
196 ) -> Result<AgentToolResult, String> {
197 let action = params
198 .get("action")
199 .and_then(|v| v.as_str())
200 .ok_or_else(|| "Missing required parameter: action".to_string())?;
201
202 let browser = self.get_browser().await?;
204
205 match action {
206 "browse" => {
208 let url = param_str(¶ms, "url", "browse requires 'url'")?;
209 match browser.browse(url).await {
210 Ok(r) => Ok(AgentToolResult::success(format_browse(&r))),
211 Err(e) => Ok(AgentToolResult::error(format!("Browse failed: {}", e))),
212 }
213 }
214
215 "goto" => {
217 let url = param_str(¶ms, "url", "goto requires 'url'")?;
218 let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
219 match tab.goto(url).await {
220 Ok(r) => Ok(AgentToolResult::success(format_browse(&r))),
221 Err(e) => Ok(AgentToolResult::error(format!("Navigation failed: {}", e))),
222 }
223 }
224 "back" => {
225 let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
226 match tab.back().await {
227 Ok(r) => Ok(AgentToolResult::success(format_browse(&r))),
228 Err(e) => Ok(AgentToolResult::error(format!("Back failed: {}", e))),
229 }
230 }
231 "forward" => {
232 let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
233 match tab.forward().await {
234 Ok(r) => Ok(AgentToolResult::success(format_browse(&r))),
235 Err(e) => Ok(AgentToolResult::error(format!("Forward failed: {}", e))),
236 }
237 }
238 "reload" => {
239 let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
240 match tab.reload().await {
241 Ok(r) => Ok(AgentToolResult::success(format_browse(&r))),
242 Err(e) => Ok(AgentToolResult::error(format!("Reload failed: {}", e))),
243 }
244 }
245 "post" => {
246 let url = param_str(¶ms, "url", "post requires 'url'")?;
247 let body = param_str(¶ms, "body", "post requires 'body'")?;
248 let ct = params
249 .get("content_type")
250 .and_then(|v| v.as_str())
251 .unwrap_or("application/json");
252 let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
253 match tab.post(url, body, ct).await {
254 Ok(r) => Ok(AgentToolResult::success(format_browse(&r))),
255 Err(e) => Ok(AgentToolResult::error(format!("POST failed: {}", e))),
256 }
257 }
258
259 "click" => {
261 let selector = param_str(¶ms, "selector", "click requires 'selector'")?;
262 let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
263 match tab.click(selector).await {
264 Ok(()) => Ok(AgentToolResult::success(format!("Clicked '{}'", selector))),
265 Err(e) => Ok(AgentToolResult::error(format!("Click failed: {}", e))),
266 }
267 }
268 "type" => {
269 let selector = param_str(¶ms, "selector", "type requires 'selector'")?;
270 let text = param_str(¶ms, "text", "type requires 'text'")?;
271 let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
272 match tab.r#type(selector, text).await {
273 Ok(()) => Ok(AgentToolResult::success(format!(
274 "Typed {} chars into '{}'",
275 text.len(),
276 selector
277 ))),
278 Err(e) => Ok(AgentToolResult::error(format!("Type failed: {}", e))),
279 }
280 }
281 "press_key" => {
282 let key = param_str(¶ms, "key", "press_key requires 'key'")?;
283 let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
284 match tab.press_key(key).await {
285 Ok(()) => Ok(AgentToolResult::success(format!("Pressed '{}'", key))),
286 Err(e) => Ok(AgentToolResult::error(format!("Press key failed: {}", e))),
287 }
288 }
289
290 "content" => {
292 let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
293 match tab.content().await {
294 Ok(r) => Ok(AgentToolResult::success(format_browse(&r))),
295 Err(e) => Ok(AgentToolResult::error(format!("Content failed: {}", e))),
296 }
297 }
298 "query_all" => {
299 let selector = param_str(¶ms, "selector", "query_all requires 'selector'")?;
300 let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
301 match tab.query_all(selector).await {
302 Ok(texts) => {
303 let output = if texts.is_empty() {
304 format!("No elements found matching '{}'", selector)
305 } else {
306 texts
307 .iter()
308 .enumerate()
309 .map(|(i, t)| format!("{}. {}", i + 1, t))
310 .collect::<Vec<_>>()
311 .join("\n")
312 };
313 Ok(AgentToolResult::success(output))
314 }
315 Err(e) => Ok(AgentToolResult::error(format!("Query failed: {}", e))),
316 }
317 }
318 "evaluate" => {
319 let js = param_str(¶ms, "javascript", "evaluate requires 'javascript'")?;
320 let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
321 match tab.evaluate(js).await {
322 Ok(value) => {
323 let output =
324 serde_json::to_string_pretty(&value).unwrap_or_else(|_| value.to_string());
325 Ok(AgentToolResult::success(output))
326 }
327 Err(e) => Ok(AgentToolResult::error(format!("JS evaluation failed: {}", e))),
328 }
329 }
330 "evaluate_await" => {
331 let js = param_str(¶ms, "javascript", "evaluate_await requires 'javascript'")?;
332 let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
333 match tab.evaluate_await(js).await {
334 Ok(value) => {
335 let output =
336 serde_json::to_string_pretty(&value).unwrap_or_else(|_| value.to_string());
337 Ok(AgentToolResult::success(output))
338 }
339 Err(e) => Ok(AgentToolResult::error(format!("JS evaluation failed: {}", e))),
340 }
341 }
342
343 "wait_for" => {
345 let selector = param_str(¶ms, "selector", "wait_for requires 'selector'")?;
346 let timeout_ms = params.get("timeout_ms").and_then(|v| v.as_u64()).unwrap_or(30_000);
347 let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
348 match tab.wait_for(selector, timeout_ms).await {
349 Ok(()) => Ok(AgentToolResult::success(format!(
350 "Element '{}' found within {}ms",
351 selector, timeout_ms
352 ))),
353 Err(e) => Ok(AgentToolResult::error(format!("wait_for failed: {}", e))),
354 }
355 }
356
357 "load_resources" => {
359 let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
360 match tab.load_resources().await {
361 Ok(count) => {
362 Ok(AgentToolResult::success(format!("Loaded {} resources", count)))
363 }
364 Err(e) => {
365 Ok(AgentToolResult::error(format!("load_resources failed: {}", e)))
366 }
367 }
368 }
369
370 "screenshot" => {
372 let width = params.get("width").and_then(|v| v.as_u64()).unwrap_or(1280) as u32;
373 let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
374 match tab.screenshot(width).await {
375 Ok(png) => Ok(AgentToolResult::success(format!(
376 "Screenshot: {} bytes (PNG, {}px wide)",
377 png.len(),
378 width
379 ))),
380 Err(e) => Ok(AgentToolResult::error(format!("Screenshot failed: {}", e))),
381 }
382 }
383
384 "run_script" => {
386 let yaml =
387 param_str(¶ms, "script", "run_script requires 'script' (YAML string)")?;
388 let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
389 let mut runner = oxibrowser_core::script::ScriptRunner::new(&tab);
390 match runner.run(yaml).await {
391 Ok(result) => {
392 let output = serde_json::to_string_pretty(&result)
393 .unwrap_or_else(|e| format!("{{\"error\": \"{}\"}}", e));
394 Ok(AgentToolResult::success(output))
395 }
396 Err(e) => Ok(AgentToolResult::error(format!(
397 "Script failed: {}",
398 e
399 ))),
400 }
401 }
402
403 "close" => {
405 let mut guard = self.tab.lock().await;
406 if let Some(t) = guard.take() {
407 let _ = t.close().await;
408 }
409 Ok(AgentToolResult::success("Tab closed"))
410 }
411
412 other => Err(format!(
413 "Unknown browser action '{}'. Valid: browse, goto, back, forward, reload, post, click, type, press_key, evaluate, evaluate_await, content, query_all, wait_for, load_resources, screenshot, run_script, close",
414 other
415 )),
416 }
417 }
418}
419
420fn format_browse(r: &oxibrowser_core::BrowseResult) -> String {
426 let md = &r.markdown;
427 if md.len() > 50_000 {
428 let cut = md.floor_char_boundary(50_000);
429 format!(
430 "URL: {} (status {})\nTitle: {}\n\n{}\n\n... (truncated, {} total chars)",
431 r.url,
432 r.status,
433 r.title,
434 &md[..cut],
435 md.len()
436 )
437 } else if md.is_empty() {
438 format!(
439 "URL: {} (status {})\nTitle: {}\n(no content)",
440 r.url, r.status, r.title
441 )
442 } else {
443 format!(
444 "URL: {} (status {})\nTitle: {}\n\n{}",
445 r.url, r.status, r.title, md
446 )
447 }
448}
449
450fn param_str<'a>(params: &'a Value, key: &str, error_msg: &str) -> Result<&'a str, String> {
452 params
453 .get(key)
454 .and_then(|v| v.as_str())
455 .ok_or_else(|| error_msg.to_string())
456}
457
458#[cfg(test)]
459mod tests {
460 #[test]
461 fn test_schema_covers_all_actions() {
462 let actions = vec![
463 "browse",
464 "goto",
465 "back",
466 "forward",
467 "reload",
468 "post",
469 "click",
470 "type",
471 "press_key",
472 "evaluate",
473 "evaluate_await",
474 "content",
475 "query_all",
476 "wait_for",
477 "load_resources",
478 "screenshot",
479 "run_script",
480 "close",
481 ];
482 assert!(actions.len() >= 16);
483 assert!(actions.contains(&"browse"));
484 assert!(actions.contains(&"goto"));
485 assert!(actions.contains(&"run_script"));
486 }
487}