Skip to main content

oxios_kernel/tools/browser/
browser_tool.rs

1//! Agent-facing browser tool — the single gateway to all browser capabilities.
2//!
3//! Wraps `oxibrowser_core::Browser` behind the `AgentTool` interface so agents
4//! can browse the web in their tool-calling loop.
5//!
6//! The browser engine is initialized **lazily** on first tool invocation.
7//! This avoids panics from `block_on` inside an existing tokio runtime.
8
9use std::sync::Arc;
10
11use async_trait::async_trait;
12use oxi_sdk::{AgentTool, AgentToolResult, ToolContext};
13use serde_json::{json, Value};
14use tokio::sync::{oneshot, Mutex, OnceCell};
15
16/// Agent tool for web browsing via the embedded OxiBrowser engine.
17///
18/// Lazily initializes the browser on first `execute()` call.
19/// `from_kernel()` is safe to call from sync context.
20pub struct BrowserTool {
21    /// Lazily-initialized browser engine.
22    browser: OnceCell<Arc<oxibrowser_core::Browser>>,
23    /// Config source for lazy initialization.
24    init: BrowserInit,
25    tab: Arc<Mutex<Option<oxibrowser_core::Tab>>>,
26}
27
28/// How to obtain a Browser instance.
29enum BrowserInit {
30    /// Already have one — use directly.
31    Ready(Arc<oxibrowser_core::Browser>),
32    /// Initialize lazily from BrowserApi on first use.
33    #[cfg(feature = "browser")]
34    Lazy(std::sync::Arc<crate::kernel_handle::BrowserApi>),
35}
36
37impl BrowserTool {
38    /// Create a new browser tool with an already-initialized browser.
39    pub fn new(browser: Arc<oxibrowser_core::Browser>) -> Self {
40        let cell = OnceCell::new();
41        // We can't set it here because browser is moved, so we use Ready variant
42        Self {
43            browser: cell,
44            init: BrowserInit::Ready(browser),
45            tab: Arc::new(Mutex::new(None)),
46        }
47    }
48
49    /// Create a `BrowserTool` from a [`KernelHandle`].
50    ///
51    /// Does **not** initialize the browser yet — that happens on first use.
52    /// This is safe to call from a synchronous context.
53    #[cfg(feature = "browser")]
54    pub fn from_kernel(kernel: &crate::kernel_handle::KernelHandle) -> Self {
55        Self {
56            browser: OnceCell::new(),
57            init: BrowserInit::Lazy(Arc::new(kernel.browser.clone())),
58            tab: Arc::new(Mutex::new(None)),
59        }
60    }
61
62    /// Get or lazily initialize the browser engine.
63    async fn get_browser(&self) -> Result<Arc<oxibrowser_core::Browser>, String> {
64        let browser = self
65            .browser
66            .get_or_try_init(|| async {
67                match &self.init {
68                    BrowserInit::Ready(b) => Ok::<_, String>(b.clone()),
69                    #[cfg(feature = "browser")]
70                    BrowserInit::Lazy(api) => api
71                        .browser()
72                        .await
73                        .map(Arc::clone)
74                        .map_err(|e| e.to_string()),
75                }
76            })
77            .await?;
78        Ok(browser.clone())
79    }
80
81    /// Get or create an interactive tab.
82    async fn get_or_create_tab(&self) -> anyhow::Result<oxibrowser_core::Tab> {
83        let browser = self.get_browser().await.map_err(anyhow::Error::msg)?;
84        let mut guard = self.tab.lock().await;
85        let needs_new = match guard.as_ref() {
86            None => true,
87            Some(t) => t.is_closed(),
88        };
89        if needs_new {
90            let tab = browser.new_tab().await?;
91            *guard = Some(tab.clone());
92        }
93        Ok(guard.as_ref().unwrap().clone())
94    }
95}
96
97impl std::fmt::Debug for BrowserTool {
98    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
99        f.debug_struct("BrowserTool").finish()
100    }
101}
102
103#[async_trait]
104impl AgentTool for BrowserTool {
105    fn name(&self) -> &str {
106        "browser"
107    }
108
109    fn label(&self) -> &str {
110        "Browser"
111    }
112
113    fn description(&self) -> &'static str {
114        "Browse the web using a headless browser. Actions: browse(url), goto(url), back(), forward(), reload(), post(url, body, content_type), click(selector), type(selector, text), press_key(key), evaluate(js), evaluate_await(js), content(), query_all(selector), wait_for(selector, timeout_ms), load_resources(), screenshot(), run_script(yaml), close()"
115    }
116
117    fn parameters_schema(&self) -> Value {
118        json!({
119            "type": "object",
120            "properties": {
121                "action": {
122                    "type": "string",
123                    "enum": [
124                        "browse",
125                        "goto",
126                        "back",
127                        "forward",
128                        "reload",
129                        "post",
130                        "click",
131                        "type",
132                        "press_key",
133                        "evaluate",
134                        "evaluate_await",
135                        "content",
136                        "query_all",
137                        "wait_for",
138                        "load_resources",
139                        "screenshot",
140                        "run_script",
141                        "close"
142                    ],
143                    "description": "Browser action to perform"
144                },
145                "url": {
146                    "type": "string",
147                    "description": "URL (browse, goto, post actions)"
148                },
149                "selector": {
150                    "type": "string",
151                    "description": "CSS selector (click, type, query_all, wait_for actions)"
152                },
153                "text": {
154                    "type": "string",
155                    "description": "Text to type (type action)"
156                },
157                "key": {
158                    "type": "string",
159                    "description": "Key to press (press_key action, e.g. 'Enter', 'Tab')"
160                },
161                "javascript": {
162                    "type": "string",
163                    "description": "JavaScript code (evaluate, evaluate_await actions)"
164                },
165                "body": {
166                    "type": "string",
167                    "description": "Request body (post action)"
168                },
169                "content_type": {
170                    "type": "string",
171                    "description": "Content-Type header (post action)"
172                },
173                "timeout_ms": {
174                    "type": "integer",
175                    "description": "Timeout in milliseconds (wait_for action)"
176                },
177                "width": {
178                    "type": "integer",
179                    "description": "Viewport width for screenshot (default 1280)"
180                },
181                "script": {
182                    "type": "string",
183                    "description": "YAML script for run_script action. Supports: goto, click, fill, type, wait, evaluate, extract, screenshot, if, retry, set, echo, sleep, and more."
184                }
185            },
186            "required": ["action"]
187        })
188    }
189
190    async fn execute(
191        &self,
192        _tool_call_id: &str,
193        params: Value,
194        _signal: Option<oneshot::Receiver<()>>,
195        _ctx: &ToolContext,
196    ) -> Result<AgentToolResult, String> {
197        let action = params
198            .get("action")
199            .and_then(|v| v.as_str())
200            .ok_or_else(|| "Missing required parameter: action".to_string())?;
201
202        // Grab the browser reference (lazy init happens here if needed).
203        let browser = self.get_browser().await?;
204
205        match action {
206            // ── One-shot browse ────────────────────────────────────
207            "browse" => {
208                let url = param_str(&params, "url", "browse requires 'url'")?;
209                match browser.browse(url).await {
210                    Ok(r) => Ok(AgentToolResult::success(format_browse(&r))),
211                    Err(e) => Ok(AgentToolResult::error(format!("Browse failed: {}", e))),
212                }
213            }
214
215            // ── Interactive navigation ─────────────────────────────
216            "goto" => {
217                let url = param_str(&params, "url", "goto requires 'url'")?;
218                let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
219                match tab.goto(url).await {
220                    Ok(r) => Ok(AgentToolResult::success(format_browse(&r))),
221                    Err(e) => Ok(AgentToolResult::error(format!("Navigation failed: {}", e))),
222                }
223            }
224            "back" => {
225                let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
226                match tab.back().await {
227                    Ok(r) => Ok(AgentToolResult::success(format_browse(&r))),
228                    Err(e) => Ok(AgentToolResult::error(format!("Back failed: {}", e))),
229                }
230            }
231            "forward" => {
232                let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
233                match tab.forward().await {
234                    Ok(r) => Ok(AgentToolResult::success(format_browse(&r))),
235                    Err(e) => Ok(AgentToolResult::error(format!("Forward failed: {}", e))),
236                }
237            }
238            "reload" => {
239                let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
240                match tab.reload().await {
241                    Ok(r) => Ok(AgentToolResult::success(format_browse(&r))),
242                    Err(e) => Ok(AgentToolResult::error(format!("Reload failed: {}", e))),
243                }
244            }
245            "post" => {
246                let url = param_str(&params, "url", "post requires 'url'")?;
247                let body = param_str(&params, "body", "post requires 'body'")?;
248                let ct = params
249                    .get("content_type")
250                    .and_then(|v| v.as_str())
251                    .unwrap_or("application/json");
252                let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
253                match tab.post(url, body, ct).await {
254                    Ok(r) => Ok(AgentToolResult::success(format_browse(&r))),
255                    Err(e) => Ok(AgentToolResult::error(format!("POST failed: {}", e))),
256                }
257            }
258
259            // ── Interaction ────────────────────────────────────────
260            "click" => {
261                let selector = param_str(&params, "selector", "click requires 'selector'")?;
262                let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
263                match tab.click(selector).await {
264                    Ok(()) => Ok(AgentToolResult::success(format!("Clicked '{}'", selector))),
265                    Err(e) => Ok(AgentToolResult::error(format!("Click failed: {}", e))),
266                }
267            }
268            "type" => {
269                let selector = param_str(&params, "selector", "type requires 'selector'")?;
270                let text = param_str(&params, "text", "type requires 'text'")?;
271                let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
272                match tab.r#type(selector, text).await {
273                    Ok(()) => Ok(AgentToolResult::success(format!(
274                        "Typed {} chars into '{}'",
275                        text.len(),
276                        selector
277                    ))),
278                    Err(e) => Ok(AgentToolResult::error(format!("Type failed: {}", e))),
279                }
280            }
281            "press_key" => {
282                let key = param_str(&params, "key", "press_key requires 'key'")?;
283                let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
284                match tab.press_key(key).await {
285                    Ok(()) => Ok(AgentToolResult::success(format!("Pressed '{}'", key))),
286                    Err(e) => Ok(AgentToolResult::error(format!("Press key failed: {}", e))),
287                }
288            }
289
290            // ── Content extraction ─────────────────────────────────
291            "content" => {
292                let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
293                match tab.content().await {
294                    Ok(r) => Ok(AgentToolResult::success(format_browse(&r))),
295                    Err(e) => Ok(AgentToolResult::error(format!("Content failed: {}", e))),
296                }
297            }
298            "query_all" => {
299                let selector = param_str(&params, "selector", "query_all requires 'selector'")?;
300                let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
301                match tab.query_all(selector).await {
302                    Ok(texts) => {
303                        let output = if texts.is_empty() {
304                            format!("No elements found matching '{}'", selector)
305                        } else {
306                            texts
307                                .iter()
308                                .enumerate()
309                                .map(|(i, t)| format!("{}. {}", i + 1, t))
310                                .collect::<Vec<_>>()
311                                .join("\n")
312                        };
313                        Ok(AgentToolResult::success(output))
314                    }
315                    Err(e) => Ok(AgentToolResult::error(format!("Query failed: {}", e))),
316                }
317            }
318            "evaluate" => {
319                let js = param_str(&params, "javascript", "evaluate requires 'javascript'")?;
320                let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
321                match tab.evaluate(js).await {
322                    Ok(value) => {
323                        let output =
324                            serde_json::to_string_pretty(&value).unwrap_or_else(|_| value.to_string());
325                        Ok(AgentToolResult::success(output))
326                    }
327                    Err(e) => Ok(AgentToolResult::error(format!("JS evaluation failed: {}", e))),
328                }
329            }
330            "evaluate_await" => {
331                let js = param_str(&params, "javascript", "evaluate_await requires 'javascript'")?;
332                let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
333                match tab.evaluate_await(js).await {
334                    Ok(value) => {
335                        let output =
336                            serde_json::to_string_pretty(&value).unwrap_or_else(|_| value.to_string());
337                        Ok(AgentToolResult::success(output))
338                    }
339                    Err(e) => Ok(AgentToolResult::error(format!("JS evaluation failed: {}", e))),
340                }
341            }
342
343            // ── Waiting ────────────────────────────────────────────
344            "wait_for" => {
345                let selector = param_str(&params, "selector", "wait_for requires 'selector'")?;
346                let timeout_ms = params.get("timeout_ms").and_then(|v| v.as_u64()).unwrap_or(30_000);
347                let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
348                match tab.wait_for(selector, timeout_ms).await {
349                    Ok(()) => Ok(AgentToolResult::success(format!(
350                        "Element '{}' found within {}ms",
351                        selector, timeout_ms
352                    ))),
353                    Err(e) => Ok(AgentToolResult::error(format!("wait_for failed: {}", e))),
354                }
355            }
356
357            // ── Sub-resources ──────────────────────────────────────
358            "load_resources" => {
359                let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
360                match tab.load_resources().await {
361                    Ok(count) => {
362                        Ok(AgentToolResult::success(format!("Loaded {} resources", count)))
363                    }
364                    Err(e) => {
365                        Ok(AgentToolResult::error(format!("load_resources failed: {}", e)))
366                    }
367                }
368            }
369
370            // ── Screenshot ─────────────────────────────────────────
371            "screenshot" => {
372                let width = params.get("width").and_then(|v| v.as_u64()).unwrap_or(1280) as u32;
373                let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
374                match tab.screenshot(width).await {
375                    Ok(png) => Ok(AgentToolResult::success(format!(
376                        "Screenshot: {} bytes (PNG, {}px wide)",
377                        png.len(),
378                        width
379                    ))),
380                    Err(e) => Ok(AgentToolResult::error(format!("Screenshot failed: {}", e))),
381                }
382            }
383
384            // ── Script execution ────────────────────────────────────
385            "run_script" => {
386                let yaml =
387                    param_str(&params, "script", "run_script requires 'script' (YAML string)")?;
388                let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
389                let mut runner = oxibrowser_core::script::ScriptRunner::new(&tab);
390                match runner.run(yaml).await {
391                    Ok(result) => {
392                        let output = serde_json::to_string_pretty(&result)
393                            .unwrap_or_else(|e| format!("{{\"error\": \"{}\"}}", e));
394                        Ok(AgentToolResult::success(output))
395                    }
396                    Err(e) => Ok(AgentToolResult::error(format!(
397                        "Script failed: {}",
398                        e
399                    ))),
400                }
401            }
402
403            // ── Lifecycle ──────────────────────────────────────────
404            "close" => {
405                let mut guard = self.tab.lock().await;
406                if let Some(t) = guard.take() {
407                    let _ = t.close().await;
408                }
409                Ok(AgentToolResult::success("Tab closed"))
410            }
411
412            other => Err(format!(
413                "Unknown browser action '{}'. Valid: browse, goto, back, forward, reload, post, click, type, press_key, evaluate, evaluate_await, content, query_all, wait_for, load_resources, screenshot, run_script, close",
414                other
415            )),
416        }
417    }
418}
419
420// ---------------------------------------------------------------------------
421// Helpers
422// ---------------------------------------------------------------------------
423
424/// Format a `BrowseResult` for agent consumption.
425fn format_browse(r: &oxibrowser_core::BrowseResult) -> String {
426    let md = &r.markdown;
427    if md.len() > 50_000 {
428        let cut = md.floor_char_boundary(50_000);
429        format!(
430            "URL: {} (status {})\nTitle: {}\n\n{}\n\n... (truncated, {} total chars)",
431            r.url,
432            r.status,
433            r.title,
434            &md[..cut],
435            md.len()
436        )
437    } else if md.is_empty() {
438        format!(
439            "URL: {} (status {})\nTitle: {}\n(no content)",
440            r.url, r.status, r.title
441        )
442    } else {
443        format!(
444            "URL: {} (status {})\nTitle: {}\n\n{}",
445            r.url, r.status, r.title, md
446        )
447    }
448}
449
450/// Extract a required string parameter (borrowed).
451fn param_str<'a>(params: &'a Value, key: &str, error_msg: &str) -> Result<&'a str, String> {
452    params
453        .get(key)
454        .and_then(|v| v.as_str())
455        .ok_or_else(|| error_msg.to_string())
456}
457
458#[cfg(test)]
459mod tests {
460    #[test]
461    fn test_schema_covers_all_actions() {
462        let actions = vec![
463            "browse",
464            "goto",
465            "back",
466            "forward",
467            "reload",
468            "post",
469            "click",
470            "type",
471            "press_key",
472            "evaluate",
473            "evaluate_await",
474            "content",
475            "query_all",
476            "wait_for",
477            "load_resources",
478            "screenshot",
479            "run_script",
480            "close",
481        ];
482        assert!(actions.len() >= 16);
483        assert!(actions.contains(&"browse"));
484        assert!(actions.contains(&"goto"));
485        assert!(actions.contains(&"run_script"));
486    }
487}