1use std::sync::Arc;
10
11use async_trait::async_trait;
12use oxi_sdk::{AgentTool, AgentToolResult, ToolContext};
13use serde_json::{json, Value};
14use tokio::sync::{oneshot, Mutex, OnceCell};
15
16pub struct BrowserTool {
21 browser: OnceCell<Arc<oxibrowser_core::Browser>>,
23 init: BrowserInit,
25 tab: Arc<Mutex<Option<oxibrowser_core::Tab>>>,
26}
27
28enum BrowserInit {
30 Ready(Arc<oxibrowser_core::Browser>),
32 #[cfg(feature = "browser")]
34 Lazy(std::sync::Arc<crate::kernel_handle::BrowserApi>),
35}
36
37impl BrowserTool {
38 pub fn new(browser: Arc<oxibrowser_core::Browser>) -> Self {
40 let cell = OnceCell::new();
41 Self {
43 browser: cell,
44 init: BrowserInit::Ready(browser),
45 tab: Arc::new(Mutex::new(None)),
46 }
47 }
48
49 #[cfg(feature = "browser")]
54 pub fn from_kernel(kernel: &crate::kernel_handle::KernelHandle) -> Self {
55 Self {
56 browser: OnceCell::new(),
57 init: BrowserInit::Lazy(Arc::new(kernel.browser.clone())),
58 tab: Arc::new(Mutex::new(None)),
59 }
60 }
61
62 async fn get_browser(&self) -> Result<Arc<oxibrowser_core::Browser>, String> {
64 let browser = self
65 .browser
66 .get_or_try_init(|| async {
67 match &self.init {
68 BrowserInit::Ready(b) => Ok::<_, String>(b.clone()),
69 #[cfg(feature = "browser")]
70 BrowserInit::Lazy(api) => {
71 api.browser().await.map(|b| b.clone()).map_err(|e| e.to_string())
72 }
73 }
74 })
75 .await?;
76 Ok(browser.clone())
77 }
78
79 async fn get_or_create_tab(&self) -> anyhow::Result<oxibrowser_core::Tab> {
81 let browser = self.get_browser().await.map_err(anyhow::Error::msg)?;
82 let mut guard = self.tab.lock().await;
83 let needs_new = match guard.as_ref() {
84 None => true,
85 Some(t) => t.is_closed(),
86 };
87 if needs_new {
88 let tab = browser.new_tab().await?;
89 *guard = Some(tab.clone());
90 }
91 Ok(guard.as_ref().unwrap().clone())
92 }
93}
94
95impl std::fmt::Debug for BrowserTool {
96 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
97 f.debug_struct("BrowserTool").finish()
98 }
99}
100
101#[async_trait]
102impl AgentTool for BrowserTool {
103 fn name(&self) -> &str {
104 "browser"
105 }
106
107 fn label(&self) -> &str {
108 "Browser"
109 }
110
111 fn description(&self) -> &'static str {
112 "Browse the web using a headless browser. Actions: browse(url), goto(url), back(), forward(), reload(), post(url, body, content_type), click(selector), type(selector, text), press_key(key), evaluate(js), evaluate_await(js), content(), query_all(selector), wait_for(selector, timeout_ms), load_resources(), screenshot(), run_script(yaml), close()"
113 }
114
115 fn parameters_schema(&self) -> Value {
116 json!({
117 "type": "object",
118 "properties": {
119 "action": {
120 "type": "string",
121 "enum": [
122 "browse",
123 "goto",
124 "back",
125 "forward",
126 "reload",
127 "post",
128 "click",
129 "type",
130 "press_key",
131 "evaluate",
132 "evaluate_await",
133 "content",
134 "query_all",
135 "wait_for",
136 "load_resources",
137 "screenshot",
138 "run_script",
139 "close"
140 ],
141 "description": "Browser action to perform"
142 },
143 "url": {
144 "type": "string",
145 "description": "URL (browse, goto, post actions)"
146 },
147 "selector": {
148 "type": "string",
149 "description": "CSS selector (click, type, query_all, wait_for actions)"
150 },
151 "text": {
152 "type": "string",
153 "description": "Text to type (type action)"
154 },
155 "key": {
156 "type": "string",
157 "description": "Key to press (press_key action, e.g. 'Enter', 'Tab')"
158 },
159 "javascript": {
160 "type": "string",
161 "description": "JavaScript code (evaluate, evaluate_await actions)"
162 },
163 "body": {
164 "type": "string",
165 "description": "Request body (post action)"
166 },
167 "content_type": {
168 "type": "string",
169 "description": "Content-Type header (post action)"
170 },
171 "timeout_ms": {
172 "type": "integer",
173 "description": "Timeout in milliseconds (wait_for action)"
174 },
175 "width": {
176 "type": "integer",
177 "description": "Viewport width for screenshot (default 1280)"
178 },
179 "script": {
180 "type": "string",
181 "description": "YAML script for run_script action. Supports: goto, click, fill, type, wait, evaluate, extract, screenshot, if, retry, set, echo, sleep, and more."
182 }
183 },
184 "required": ["action"]
185 })
186 }
187
188 async fn execute(
189 &self,
190 _tool_call_id: &str,
191 params: Value,
192 _signal: Option<oneshot::Receiver<()>>,
193 _ctx: &ToolContext,
194 ) -> Result<AgentToolResult, String> {
195 let action = params
196 .get("action")
197 .and_then(|v| v.as_str())
198 .ok_or_else(|| "Missing required parameter: action".to_string())?;
199
200 let browser = self.get_browser().await?;
202
203 match action {
204 "browse" => {
206 let url = param_str(¶ms, "url", "browse requires 'url'")?;
207 match browser.browse(url).await {
208 Ok(r) => Ok(AgentToolResult::success(format_browse(&r))),
209 Err(e) => Ok(AgentToolResult::error(format!("Browse failed: {}", e))),
210 }
211 }
212
213 "goto" => {
215 let url = param_str(¶ms, "url", "goto requires 'url'")?;
216 let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
217 match tab.goto(url).await {
218 Ok(r) => Ok(AgentToolResult::success(format_browse(&r))),
219 Err(e) => Ok(AgentToolResult::error(format!("Navigation failed: {}", e))),
220 }
221 }
222 "back" => {
223 let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
224 match tab.back().await {
225 Ok(r) => Ok(AgentToolResult::success(format_browse(&r))),
226 Err(e) => Ok(AgentToolResult::error(format!("Back failed: {}", e))),
227 }
228 }
229 "forward" => {
230 let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
231 match tab.forward().await {
232 Ok(r) => Ok(AgentToolResult::success(format_browse(&r))),
233 Err(e) => Ok(AgentToolResult::error(format!("Forward failed: {}", e))),
234 }
235 }
236 "reload" => {
237 let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
238 match tab.reload().await {
239 Ok(r) => Ok(AgentToolResult::success(format_browse(&r))),
240 Err(e) => Ok(AgentToolResult::error(format!("Reload failed: {}", e))),
241 }
242 }
243 "post" => {
244 let url = param_str(¶ms, "url", "post requires 'url'")?;
245 let body = param_str(¶ms, "body", "post requires 'body'")?;
246 let ct = params
247 .get("content_type")
248 .and_then(|v| v.as_str())
249 .unwrap_or("application/json");
250 let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
251 match tab.post(url, body, ct).await {
252 Ok(r) => Ok(AgentToolResult::success(format_browse(&r))),
253 Err(e) => Ok(AgentToolResult::error(format!("POST failed: {}", e))),
254 }
255 }
256
257 "click" => {
259 let selector = param_str(¶ms, "selector", "click requires 'selector'")?;
260 let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
261 match tab.click(selector).await {
262 Ok(()) => Ok(AgentToolResult::success(format!("Clicked '{}'", selector))),
263 Err(e) => Ok(AgentToolResult::error(format!("Click failed: {}", e))),
264 }
265 }
266 "type" => {
267 let selector = param_str(¶ms, "selector", "type requires 'selector'")?;
268 let text = param_str(¶ms, "text", "type requires 'text'")?;
269 let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
270 match tab.r#type(selector, text).await {
271 Ok(()) => Ok(AgentToolResult::success(format!(
272 "Typed {} chars into '{}'",
273 text.len(),
274 selector
275 ))),
276 Err(e) => Ok(AgentToolResult::error(format!("Type failed: {}", e))),
277 }
278 }
279 "press_key" => {
280 let key = param_str(¶ms, "key", "press_key requires 'key'")?;
281 let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
282 match tab.press_key(key).await {
283 Ok(()) => Ok(AgentToolResult::success(format!("Pressed '{}'", key))),
284 Err(e) => Ok(AgentToolResult::error(format!("Press key failed: {}", e))),
285 }
286 }
287
288 "content" => {
290 let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
291 match tab.content().await {
292 Ok(r) => Ok(AgentToolResult::success(format_browse(&r))),
293 Err(e) => Ok(AgentToolResult::error(format!("Content failed: {}", e))),
294 }
295 }
296 "query_all" => {
297 let selector = param_str(¶ms, "selector", "query_all requires 'selector'")?;
298 let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
299 match tab.query_all(selector).await {
300 Ok(texts) => {
301 let output = if texts.is_empty() {
302 format!("No elements found matching '{}'", selector)
303 } else {
304 texts
305 .iter()
306 .enumerate()
307 .map(|(i, t)| format!("{}. {}", i + 1, t))
308 .collect::<Vec<_>>()
309 .join("\n")
310 };
311 Ok(AgentToolResult::success(output))
312 }
313 Err(e) => Ok(AgentToolResult::error(format!("Query failed: {}", e))),
314 }
315 }
316 "evaluate" => {
317 let js = param_str(¶ms, "javascript", "evaluate requires 'javascript'")?;
318 let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
319 match tab.evaluate(js).await {
320 Ok(value) => {
321 let output =
322 serde_json::to_string_pretty(&value).unwrap_or_else(|_| value.to_string());
323 Ok(AgentToolResult::success(output))
324 }
325 Err(e) => Ok(AgentToolResult::error(format!("JS evaluation failed: {}", e))),
326 }
327 }
328 "evaluate_await" => {
329 let js = param_str(¶ms, "javascript", "evaluate_await requires 'javascript'")?;
330 let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
331 match tab.evaluate_await(js).await {
332 Ok(value) => {
333 let output =
334 serde_json::to_string_pretty(&value).unwrap_or_else(|_| value.to_string());
335 Ok(AgentToolResult::success(output))
336 }
337 Err(e) => Ok(AgentToolResult::error(format!("JS evaluation failed: {}", e))),
338 }
339 }
340
341 "wait_for" => {
343 let selector = param_str(¶ms, "selector", "wait_for requires 'selector'")?;
344 let timeout_ms = params.get("timeout_ms").and_then(|v| v.as_u64()).unwrap_or(30_000);
345 let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
346 match tab.wait_for(selector, timeout_ms).await {
347 Ok(()) => Ok(AgentToolResult::success(format!(
348 "Element '{}' found within {}ms",
349 selector, timeout_ms
350 ))),
351 Err(e) => Ok(AgentToolResult::error(format!("wait_for failed: {}", e))),
352 }
353 }
354
355 "load_resources" => {
357 let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
358 match tab.load_resources().await {
359 Ok(count) => {
360 Ok(AgentToolResult::success(format!("Loaded {} resources", count)))
361 }
362 Err(e) => {
363 Ok(AgentToolResult::error(format!("load_resources failed: {}", e)))
364 }
365 }
366 }
367
368 "screenshot" => {
370 let width = params.get("width").and_then(|v| v.as_u64()).unwrap_or(1280) as u32;
371 let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
372 match tab.screenshot(width).await {
373 Ok(png) => Ok(AgentToolResult::success(format!(
374 "Screenshot: {} bytes (PNG, {}px wide)",
375 png.len(),
376 width
377 ))),
378 Err(e) => Ok(AgentToolResult::error(format!("Screenshot failed: {}", e))),
379 }
380 }
381
382 "run_script" => {
384 let yaml =
385 param_str(¶ms, "script", "run_script requires 'script' (YAML string)")?;
386 let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
387 let mut runner = oxibrowser_core::script::ScriptRunner::new(&tab);
388 match runner.run(yaml).await {
389 Ok(result) => {
390 let output = serde_json::to_string_pretty(&result)
391 .unwrap_or_else(|e| format!("{{\"error\": \"{}\"}}", e));
392 Ok(AgentToolResult::success(output))
393 }
394 Err(e) => Ok(AgentToolResult::error(format!(
395 "Script failed: {}",
396 e
397 ))),
398 }
399 }
400
401 "close" => {
403 let mut guard = self.tab.lock().await;
404 if let Some(t) = guard.take() {
405 let _ = t.close().await;
406 }
407 Ok(AgentToolResult::success("Tab closed"))
408 }
409
410 other => Err(format!(
411 "Unknown browser action '{}'. Valid: browse, goto, back, forward, reload, post, click, type, press_key, evaluate, evaluate_await, content, query_all, wait_for, load_resources, screenshot, run_script, close",
412 other
413 )),
414 }
415 }
416}
417
418fn format_browse(r: &oxibrowser_core::BrowseResult) -> String {
424 let md = &r.markdown;
425 if md.len() > 50_000 {
426 let cut = md.floor_char_boundary(50_000);
427 format!(
428 "URL: {} (status {})\nTitle: {}\n\n{}\n\n... (truncated, {} total chars)",
429 r.url,
430 r.status,
431 r.title,
432 &md[..cut],
433 md.len()
434 )
435 } else if md.is_empty() {
436 format!(
437 "URL: {} (status {})\nTitle: {}\n(no content)",
438 r.url, r.status, r.title
439 )
440 } else {
441 format!(
442 "URL: {} (status {})\nTitle: {}\n\n{}",
443 r.url, r.status, r.title, md
444 )
445 }
446}
447
448fn param_str<'a>(params: &'a Value, key: &str, error_msg: &str) -> Result<&'a str, String> {
450 params
451 .get(key)
452 .and_then(|v| v.as_str())
453 .ok_or_else(|| error_msg.to_string())
454}
455
456#[cfg(test)]
457mod tests {
458 #[test]
459 fn test_schema_covers_all_actions() {
460 let actions = vec![
461 "browse", "goto", "back", "forward", "reload", "post",
462 "click", "type", "press_key", "evaluate", "evaluate_await",
463 "content", "query_all", "wait_for", "load_resources",
464 "screenshot", "run_script", "close",
465 ];
466 assert!(actions.len() >= 16);
467 assert!(actions.contains(&"browse"));
468 assert!(actions.contains(&"goto"));
469 assert!(actions.contains(&"run_script"));
470 }
471}