Skip to main content

oxios_kernel/tools/browser/
browser_tool.rs

1//! Agent-facing browser tool — the single gateway to all browser capabilities.
2//!
3//! Wraps `oxibrowser_core::Browser` behind the `AgentTool` interface so agents
4//! can browse the web in their tool-calling loop.
5//!
6//! Three usage patterns:
7//! - **`browse`** — one-shot URL → Markdown (90% of agent requests)
8//! - **`goto`/`click`/`type`/...** — interactive session via `Tab`
9//! - **`run_script`** — YAML scenario execution via `ScriptRunner`
10
11use std::sync::Arc;
12
13use async_trait::async_trait;
14use oxi_sdk::{AgentTool, AgentToolResult, ToolContext};
15use serde_json::{json, Value};
16use tokio::sync::{oneshot, Mutex};
17
18/// Agent tool for web browsing via the embedded OxiBrowser engine.
19///
20/// Owns a `Browser` instance and lazily creates a `Tab` for interactive use.
21pub struct BrowserTool {
22    browser: Arc<oxibrowser_core::Browser>,
23    tab: Arc<Mutex<Option<oxibrowser_core::Tab>>>,
24}
25
26impl BrowserTool {
27    /// Create a new browser tool with an already-initialized browser.
28    pub fn new(browser: Arc<oxibrowser_core::Browser>) -> Self {
29        Self {
30            browser,
31            tab: Arc::new(Mutex::new(None)),
32        }
33    }
34
35    /// Create a `BrowserTool` from a [`KernelHandle`].
36    #[cfg(feature = "browser")]
37    pub fn from_kernel(kernel: &crate::kernel_handle::KernelHandle) -> Self {
38        Self::new(kernel.browser.browser().clone())
39    }
40
41    /// Get or create an interactive tab.
42    async fn get_or_create_tab(&self) -> anyhow::Result<oxibrowser_core::Tab> {
43        let mut guard = self.tab.lock().await;
44        let needs_new = match guard.as_ref() {
45            None => true,
46            Some(t) => t.is_closed(),
47        };
48        if needs_new {
49            let tab = self.browser.new_tab().await?;
50            *guard = Some(tab.clone());
51        }
52        Ok(guard.as_ref().unwrap().clone())
53    }
54}
55
56impl std::fmt::Debug for BrowserTool {
57    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
58        f.debug_struct("BrowserTool").finish()
59    }
60}
61
62#[async_trait]
63impl AgentTool for BrowserTool {
64    fn name(&self) -> &str {
65        "browser"
66    }
67
68    fn label(&self) -> &str {
69        "Browser"
70    }
71
72    fn description(&self) -> &'static str {
73        "Browse the web using a headless browser. Actions: browse(url), goto(url), back(), forward(), reload(), post(url, body, content_type), click(selector), type(selector, text), press_key(key), evaluate(js), evaluate_await(js), content(), query_all(selector), wait_for(selector, timeout_ms), load_resources(), screenshot(), run_script(yaml), close()"
74    }
75
76    fn parameters_schema(&self) -> Value {
77        json!({
78            "type": "object",
79            "properties": {
80                "action": {
81                    "type": "string",
82                    "enum": [
83                        "browse",
84                        "goto",
85                        "back",
86                        "forward",
87                        "reload",
88                        "post",
89                        "click",
90                        "type",
91                        "press_key",
92                        "evaluate",
93                        "evaluate_await",
94                        "content",
95                        "query_all",
96                        "wait_for",
97                        "load_resources",
98                        "screenshot",
99                        "run_script",
100                        "close"
101                    ],
102                    "description": "Browser action to perform"
103                },
104                "url": {
105                    "type": "string",
106                    "description": "URL (browse, goto, post actions)"
107                },
108                "selector": {
109                    "type": "string",
110                    "description": "CSS selector (click, type, query_all, wait_for actions)"
111                },
112                "text": {
113                    "type": "string",
114                    "description": "Text to type (type action)"
115                },
116                "key": {
117                    "type": "string",
118                    "description": "Key to press (press_key action, e.g. 'Enter', 'Tab')"
119                },
120                "javascript": {
121                    "type": "string",
122                    "description": "JavaScript code (evaluate, evaluate_await actions)"
123                },
124                "body": {
125                    "type": "string",
126                    "description": "Request body (post action)"
127                },
128                "content_type": {
129                    "type": "string",
130                    "description": "Content-Type header (post action)"
131                },
132                "timeout_ms": {
133                    "type": "integer",
134                    "description": "Timeout in milliseconds (wait_for action)"
135                },
136                "width": {
137                    "type": "integer",
138                    "description": "Viewport width for screenshot (default 1280)"
139                },
140                "script": {
141                    "type": "string",
142                    "description": "YAML script for run_script action. Supports: goto, click, fill, type, wait, evaluate, extract, screenshot, if, retry, set, echo, sleep, and more."
143                }
144            },
145            "required": ["action"]
146        })
147    }
148
149    async fn execute(
150        &self,
151        _tool_call_id: &str,
152        params: Value,
153        _signal: Option<oneshot::Receiver<()>>,
154        _ctx: &ToolContext,
155    ) -> Result<AgentToolResult, String> {
156        let action = params
157            .get("action")
158            .and_then(|v| v.as_str())
159            .ok_or_else(|| "Missing required parameter: action".to_string())?;
160
161        match action {
162            // ── One-shot browse ────────────────────────────────────
163            "browse" => {
164                let url = param_str(&params, "url", "browse requires 'url'")?;
165                match self.browser.browse(url).await {
166                    Ok(r) => Ok(AgentToolResult::success(format_browse(&r))),
167                    Err(e) => Ok(AgentToolResult::error(format!("Browse failed: {}", e))),
168                }
169            }
170
171            // ── Interactive navigation ─────────────────────────────
172            "goto" => {
173                let url = param_str(&params, "url", "goto requires 'url'")?;
174                let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
175                match tab.goto(url).await {
176                    Ok(r) => Ok(AgentToolResult::success(format_browse(&r))),
177                    Err(e) => Ok(AgentToolResult::error(format!("Navigation failed: {}", e))),
178                }
179            }
180            "back" => {
181                let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
182                match tab.back().await {
183                    Ok(r) => Ok(AgentToolResult::success(format_browse(&r))),
184                    Err(e) => Ok(AgentToolResult::error(format!("Back failed: {}", e))),
185                }
186            }
187            "forward" => {
188                let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
189                match tab.forward().await {
190                    Ok(r) => Ok(AgentToolResult::success(format_browse(&r))),
191                    Err(e) => Ok(AgentToolResult::error(format!("Forward failed: {}", e))),
192                }
193            }
194            "reload" => {
195                let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
196                match tab.reload().await {
197                    Ok(r) => Ok(AgentToolResult::success(format_browse(&r))),
198                    Err(e) => Ok(AgentToolResult::error(format!("Reload failed: {}", e))),
199                }
200            }
201            "post" => {
202                let url = param_str(&params, "url", "post requires 'url'")?;
203                let body = param_str(&params, "body", "post requires 'body'")?;
204                let ct = params
205                    .get("content_type")
206                    .and_then(|v| v.as_str())
207                    .unwrap_or("application/json");
208                let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
209                match tab.post(url, body, ct).await {
210                    Ok(r) => Ok(AgentToolResult::success(format_browse(&r))),
211                    Err(e) => Ok(AgentToolResult::error(format!("POST failed: {}", e))),
212                }
213            }
214
215            // ── Interaction ────────────────────────────────────────
216            "click" => {
217                let selector = param_str(&params, "selector", "click requires 'selector'")?;
218                let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
219                match tab.click(selector).await {
220                    Ok(()) => Ok(AgentToolResult::success(format!("Clicked '{}'", selector))),
221                    Err(e) => Ok(AgentToolResult::error(format!("Click failed: {}", e))),
222                }
223            }
224            "type" => {
225                let selector = param_str(&params, "selector", "type requires 'selector'")?;
226                let text = param_str(&params, "text", "type requires 'text'")?;
227                let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
228                match tab.r#type(selector, text).await {
229                    Ok(()) => Ok(AgentToolResult::success(format!(
230                        "Typed {} chars into '{}'",
231                        text.len(),
232                        selector
233                    ))),
234                    Err(e) => Ok(AgentToolResult::error(format!("Type failed: {}", e))),
235                }
236            }
237            "press_key" => {
238                let key = param_str(&params, "key", "press_key requires 'key'")?;
239                let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
240                match tab.press_key(key).await {
241                    Ok(()) => Ok(AgentToolResult::success(format!("Pressed '{}'", key))),
242                    Err(e) => Ok(AgentToolResult::error(format!("Press key failed: {}", e))),
243                }
244            }
245
246            // ── Content extraction ─────────────────────────────────
247            "content" => {
248                let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
249                match tab.content().await {
250                    Ok(r) => Ok(AgentToolResult::success(format_browse(&r))),
251                    Err(e) => Ok(AgentToolResult::error(format!("Content failed: {}", e))),
252                }
253            }
254            "query_all" => {
255                let selector = param_str(&params, "selector", "query_all requires 'selector'")?;
256                let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
257                match tab.query_all(selector).await {
258                    Ok(texts) => {
259                        let output = if texts.is_empty() {
260                            format!("No elements found matching '{}'", selector)
261                        } else {
262                            texts
263                                .iter()
264                                .enumerate()
265                                .map(|(i, t)| format!("{}. {}", i + 1, t))
266                                .collect::<Vec<_>>()
267                                .join("\n")
268                        };
269                        Ok(AgentToolResult::success(output))
270                    }
271                    Err(e) => Ok(AgentToolResult::error(format!("Query failed: {}", e))),
272                }
273            }
274            "evaluate" => {
275                let js = param_str(&params, "javascript", "evaluate requires 'javascript'")?;
276                let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
277                match tab.evaluate(js).await {
278                    Ok(value) => {
279                        let output =
280                            serde_json::to_string_pretty(&value).unwrap_or_else(|_| value.to_string());
281                        Ok(AgentToolResult::success(output))
282                    }
283                    Err(e) => Ok(AgentToolResult::error(format!("JS evaluation failed: {}", e))),
284                }
285            }
286            "evaluate_await" => {
287                let js = param_str(&params, "javascript", "evaluate_await requires 'javascript'")?;
288                let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
289                match tab.evaluate_await(js).await {
290                    Ok(value) => {
291                        let output =
292                            serde_json::to_string_pretty(&value).unwrap_or_else(|_| value.to_string());
293                        Ok(AgentToolResult::success(output))
294                    }
295                    Err(e) => Ok(AgentToolResult::error(format!("JS evaluation failed: {}", e))),
296                }
297            }
298
299            // ── Waiting ────────────────────────────────────────────
300            "wait_for" => {
301                let selector = param_str(&params, "selector", "wait_for requires 'selector'")?;
302                let timeout_ms = params.get("timeout_ms").and_then(|v| v.as_u64()).unwrap_or(30_000);
303                let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
304                match tab.wait_for(selector, timeout_ms).await {
305                    Ok(()) => Ok(AgentToolResult::success(format!(
306                        "Element '{}' found within {}ms",
307                        selector, timeout_ms
308                    ))),
309                    Err(e) => Ok(AgentToolResult::error(format!("wait_for failed: {}", e))),
310                }
311            }
312
313            // ── Sub-resources ──────────────────────────────────────
314            "load_resources" => {
315                let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
316                match tab.load_resources().await {
317                    Ok(count) => {
318                        Ok(AgentToolResult::success(format!("Loaded {} resources", count)))
319                    }
320                    Err(e) => {
321                        Ok(AgentToolResult::error(format!("load_resources failed: {}", e)))
322                    }
323                }
324            }
325
326            // ── Screenshot ─────────────────────────────────────────
327            "screenshot" => {
328                let width = params.get("width").and_then(|v| v.as_u64()).unwrap_or(1280) as u32;
329                let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
330                match tab.screenshot(width).await {
331                    Ok(png) => Ok(AgentToolResult::success(format!(
332                        "Screenshot: {} bytes (PNG, {}px wide)",
333                        png.len(),
334                        width
335                    ))),
336                    Err(e) => Ok(AgentToolResult::error(format!("Screenshot failed: {}", e))),
337                }
338            }
339
340            // ── Script execution ────────────────────────────────────
341            "run_script" => {
342                let yaml =
343                    param_str(&params, "script", "run_script requires 'script' (YAML string)")?;
344                let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
345                let mut runner = oxibrowser_core::script::ScriptRunner::new(&tab);
346                match runner.run(yaml).await {
347                    Ok(result) => {
348                        let output = serde_json::to_string_pretty(&result)
349                            .unwrap_or_else(|e| format!("{{\"error\": \"{}\"}}", e));
350                        Ok(AgentToolResult::success(output))
351                    }
352                    Err(e) => Ok(AgentToolResult::error(format!(
353                        "Script failed: {}",
354                        e
355                    ))),
356                }
357            }
358
359            // ── Lifecycle ──────────────────────────────────────────
360            "close" => {
361                let mut guard = self.tab.lock().await;
362                if let Some(t) = guard.take() {
363                    let _ = t.close().await;
364                }
365                Ok(AgentToolResult::success("Tab closed"))
366            }
367
368            other => Err(format!(
369                "Unknown browser action '{}'. Valid: browse, goto, back, forward, reload, post, click, type, press_key, evaluate, evaluate_await, content, query_all, wait_for, load_resources, screenshot, run_script, close",
370                other
371            )),
372        }
373    }
374}
375
376// ---------------------------------------------------------------------------
377// Helpers
378// ---------------------------------------------------------------------------
379
380/// Format a `BrowseResult` for agent consumption.
381fn format_browse(r: &oxibrowser_core::BrowseResult) -> String {
382    let md = &r.markdown;
383    if md.len() > 50_000 {
384        // Find a safe UTF-8 boundary near 50_000 to avoid panic on multi-byte chars.
385        let cut = md.floor_char_boundary(50_000);
386        format!(
387            "URL: {} (status {})\nTitle: {}\n\n{}\n\n... (truncated, {} total chars)",
388            r.url,
389            r.status,
390            r.title,
391            &md[..cut],
392            md.len()
393        )
394    } else if md.is_empty() {
395        format!(
396            "URL: {} (status {})\nTitle: {}\n(no content)",
397            r.url, r.status, r.title
398        )
399    } else {
400        format!(
401            "URL: {} (status {})\nTitle: {}\n\n{}",
402            r.url, r.status, r.title, md
403        )
404    }
405}
406
407/// Extract a required string parameter (borrowed).
408fn param_str<'a>(params: &'a Value, key: &str, error_msg: &str) -> Result<&'a str, String> {
409    params
410        .get(key)
411        .and_then(|v| v.as_str())
412        .ok_or_else(|| error_msg.to_string())
413}
414
415#[cfg(test)]
416mod tests {
417    use super::*;
418
419    #[test]
420    fn test_schema_covers_all_actions() {
421        let actions = vec![
422            "browse",
423            "goto",
424            "back",
425            "forward",
426            "reload",
427            "post",
428            "click",
429            "type",
430            "press_key",
431            "evaluate",
432            "evaluate_await",
433            "content",
434            "query_all",
435            "wait_for",
436            "load_resources",
437            "screenshot",
438            "run_script",
439            "close",
440        ];
441        assert!(actions.len() >= 16);
442        assert!(actions.contains(&"browse"));
443        assert!(actions.contains(&"goto"));
444        assert!(actions.contains(&"run_script"));
445    }
446}