Skip to main content

oxios_kernel/tools/browser/
browser_tool.rs

1//! Agent-facing browser tool — the single gateway to all browser capabilities.
2//!
3//! Wraps `oxibrowser_core::Browser` behind the `AgentTool` interface so agents
4//! can browse the web in their tool-calling loop.
5//!
6//! The browser engine is initialized **lazily** on first tool invocation.
7//! This avoids panics from `block_on` inside an existing tokio runtime.
8
9use std::sync::Arc;
10
11use async_trait::async_trait;
12use oxi_sdk::{AgentTool, AgentToolResult, ToolContext};
13use serde_json::{json, Value};
14use tokio::sync::{oneshot, Mutex, OnceCell};
15
16/// Agent tool for web browsing via the embedded OxiBrowser engine.
17///
18/// Lazily initializes the browser on first `execute()` call.
19/// `from_kernel()` is safe to call from sync context.
20pub struct BrowserTool {
21    /// Lazily-initialized browser engine.
22    browser: OnceCell<Arc<oxibrowser_core::Browser>>,
23    /// Config source for lazy initialization.
24    init: BrowserInit,
25    tab: Arc<Mutex<Option<oxibrowser_core::Tab>>>,
26}
27
28/// How to obtain a Browser instance.
29enum BrowserInit {
30    /// Already have one — use directly.
31    Ready(Arc<oxibrowser_core::Browser>),
32    /// Initialize lazily from BrowserApi on first use.
33    #[cfg(feature = "browser")]
34    Lazy(std::sync::Arc<crate::kernel_handle::BrowserApi>),
35}
36
37impl BrowserTool {
38    /// Create a new browser tool with an already-initialized browser.
39    pub fn new(browser: Arc<oxibrowser_core::Browser>) -> Self {
40        let cell = OnceCell::new();
41        // We can't set it here because browser is moved, so we use Ready variant
42        Self {
43            browser: cell,
44            init: BrowserInit::Ready(browser),
45            tab: Arc::new(Mutex::new(None)),
46        }
47    }
48
49    /// Create a `BrowserTool` from a [`KernelHandle`].
50    ///
51    /// Does **not** initialize the browser yet — that happens on first use.
52    /// This is safe to call from a synchronous context.
53    #[cfg(feature = "browser")]
54    pub fn from_kernel(kernel: &crate::kernel_handle::KernelHandle) -> Self {
55        Self {
56            browser: OnceCell::new(),
57            init: BrowserInit::Lazy(Arc::new(kernel.browser.clone())),
58            tab: Arc::new(Mutex::new(None)),
59        }
60    }
61
62    /// Get or lazily initialize the browser engine.
63    async fn get_browser(&self) -> Result<Arc<oxibrowser_core::Browser>, String> {
64        let browser = self
65            .browser
66            .get_or_try_init(|| async {
67                match &self.init {
68                    BrowserInit::Ready(b) => Ok::<_, String>(b.clone()),
69                    #[cfg(feature = "browser")]
70                    BrowserInit::Lazy(api) => {
71                        api.browser().await.map(|b| b.clone()).map_err(|e| e.to_string())
72                    }
73                }
74            })
75            .await?;
76        Ok(browser.clone())
77    }
78
79    /// Get or create an interactive tab.
80    async fn get_or_create_tab(&self) -> anyhow::Result<oxibrowser_core::Tab> {
81        let browser = self.get_browser().await.map_err(anyhow::Error::msg)?;
82        let mut guard = self.tab.lock().await;
83        let needs_new = match guard.as_ref() {
84            None => true,
85            Some(t) => t.is_closed(),
86        };
87        if needs_new {
88            let tab = browser.new_tab().await?;
89            *guard = Some(tab.clone());
90        }
91        Ok(guard.as_ref().unwrap().clone())
92    }
93}
94
95impl std::fmt::Debug for BrowserTool {
96    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
97        f.debug_struct("BrowserTool").finish()
98    }
99}
100
101#[async_trait]
102impl AgentTool for BrowserTool {
103    fn name(&self) -> &str {
104        "browser"
105    }
106
107    fn label(&self) -> &str {
108        "Browser"
109    }
110
111    fn description(&self) -> &'static str {
112        "Browse the web using a headless browser. Actions: browse(url), goto(url), back(), forward(), reload(), post(url, body, content_type), click(selector), type(selector, text), press_key(key), evaluate(js), evaluate_await(js), content(), query_all(selector), wait_for(selector, timeout_ms), load_resources(), screenshot(), run_script(yaml), close()"
113    }
114
115    fn parameters_schema(&self) -> Value {
116        json!({
117            "type": "object",
118            "properties": {
119                "action": {
120                    "type": "string",
121                    "enum": [
122                        "browse",
123                        "goto",
124                        "back",
125                        "forward",
126                        "reload",
127                        "post",
128                        "click",
129                        "type",
130                        "press_key",
131                        "evaluate",
132                        "evaluate_await",
133                        "content",
134                        "query_all",
135                        "wait_for",
136                        "load_resources",
137                        "screenshot",
138                        "run_script",
139                        "close"
140                    ],
141                    "description": "Browser action to perform"
142                },
143                "url": {
144                    "type": "string",
145                    "description": "URL (browse, goto, post actions)"
146                },
147                "selector": {
148                    "type": "string",
149                    "description": "CSS selector (click, type, query_all, wait_for actions)"
150                },
151                "text": {
152                    "type": "string",
153                    "description": "Text to type (type action)"
154                },
155                "key": {
156                    "type": "string",
157                    "description": "Key to press (press_key action, e.g. 'Enter', 'Tab')"
158                },
159                "javascript": {
160                    "type": "string",
161                    "description": "JavaScript code (evaluate, evaluate_await actions)"
162                },
163                "body": {
164                    "type": "string",
165                    "description": "Request body (post action)"
166                },
167                "content_type": {
168                    "type": "string",
169                    "description": "Content-Type header (post action)"
170                },
171                "timeout_ms": {
172                    "type": "integer",
173                    "description": "Timeout in milliseconds (wait_for action)"
174                },
175                "width": {
176                    "type": "integer",
177                    "description": "Viewport width for screenshot (default 1280)"
178                },
179                "script": {
180                    "type": "string",
181                    "description": "YAML script for run_script action. Supports: goto, click, fill, type, wait, evaluate, extract, screenshot, if, retry, set, echo, sleep, and more."
182                }
183            },
184            "required": ["action"]
185        })
186    }
187
188    async fn execute(
189        &self,
190        _tool_call_id: &str,
191        params: Value,
192        _signal: Option<oneshot::Receiver<()>>,
193        _ctx: &ToolContext,
194    ) -> Result<AgentToolResult, String> {
195        let action = params
196            .get("action")
197            .and_then(|v| v.as_str())
198            .ok_or_else(|| "Missing required parameter: action".to_string())?;
199
200        // Grab the browser reference (lazy init happens here if needed).
201        let browser = self.get_browser().await?;
202
203        match action {
204            // ── One-shot browse ────────────────────────────────────
205            "browse" => {
206                let url = param_str(&params, "url", "browse requires 'url'")?;
207                match browser.browse(url).await {
208                    Ok(r) => Ok(AgentToolResult::success(format_browse(&r))),
209                    Err(e) => Ok(AgentToolResult::error(format!("Browse failed: {}", e))),
210                }
211            }
212
213            // ── Interactive navigation ─────────────────────────────
214            "goto" => {
215                let url = param_str(&params, "url", "goto requires 'url'")?;
216                let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
217                match tab.goto(url).await {
218                    Ok(r) => Ok(AgentToolResult::success(format_browse(&r))),
219                    Err(e) => Ok(AgentToolResult::error(format!("Navigation failed: {}", e))),
220                }
221            }
222            "back" => {
223                let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
224                match tab.back().await {
225                    Ok(r) => Ok(AgentToolResult::success(format_browse(&r))),
226                    Err(e) => Ok(AgentToolResult::error(format!("Back failed: {}", e))),
227                }
228            }
229            "forward" => {
230                let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
231                match tab.forward().await {
232                    Ok(r) => Ok(AgentToolResult::success(format_browse(&r))),
233                    Err(e) => Ok(AgentToolResult::error(format!("Forward failed: {}", e))),
234                }
235            }
236            "reload" => {
237                let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
238                match tab.reload().await {
239                    Ok(r) => Ok(AgentToolResult::success(format_browse(&r))),
240                    Err(e) => Ok(AgentToolResult::error(format!("Reload failed: {}", e))),
241                }
242            }
243            "post" => {
244                let url = param_str(&params, "url", "post requires 'url'")?;
245                let body = param_str(&params, "body", "post requires 'body'")?;
246                let ct = params
247                    .get("content_type")
248                    .and_then(|v| v.as_str())
249                    .unwrap_or("application/json");
250                let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
251                match tab.post(url, body, ct).await {
252                    Ok(r) => Ok(AgentToolResult::success(format_browse(&r))),
253                    Err(e) => Ok(AgentToolResult::error(format!("POST failed: {}", e))),
254                }
255            }
256
257            // ── Interaction ────────────────────────────────────────
258            "click" => {
259                let selector = param_str(&params, "selector", "click requires 'selector'")?;
260                let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
261                match tab.click(selector).await {
262                    Ok(()) => Ok(AgentToolResult::success(format!("Clicked '{}'", selector))),
263                    Err(e) => Ok(AgentToolResult::error(format!("Click failed: {}", e))),
264                }
265            }
266            "type" => {
267                let selector = param_str(&params, "selector", "type requires 'selector'")?;
268                let text = param_str(&params, "text", "type requires 'text'")?;
269                let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
270                match tab.r#type(selector, text).await {
271                    Ok(()) => Ok(AgentToolResult::success(format!(
272                        "Typed {} chars into '{}'",
273                        text.len(),
274                        selector
275                    ))),
276                    Err(e) => Ok(AgentToolResult::error(format!("Type failed: {}", e))),
277                }
278            }
279            "press_key" => {
280                let key = param_str(&params, "key", "press_key requires 'key'")?;
281                let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
282                match tab.press_key(key).await {
283                    Ok(()) => Ok(AgentToolResult::success(format!("Pressed '{}'", key))),
284                    Err(e) => Ok(AgentToolResult::error(format!("Press key failed: {}", e))),
285                }
286            }
287
288            // ── Content extraction ─────────────────────────────────
289            "content" => {
290                let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
291                match tab.content().await {
292                    Ok(r) => Ok(AgentToolResult::success(format_browse(&r))),
293                    Err(e) => Ok(AgentToolResult::error(format!("Content failed: {}", e))),
294                }
295            }
296            "query_all" => {
297                let selector = param_str(&params, "selector", "query_all requires 'selector'")?;
298                let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
299                match tab.query_all(selector).await {
300                    Ok(texts) => {
301                        let output = if texts.is_empty() {
302                            format!("No elements found matching '{}'", selector)
303                        } else {
304                            texts
305                                .iter()
306                                .enumerate()
307                                .map(|(i, t)| format!("{}. {}", i + 1, t))
308                                .collect::<Vec<_>>()
309                                .join("\n")
310                        };
311                        Ok(AgentToolResult::success(output))
312                    }
313                    Err(e) => Ok(AgentToolResult::error(format!("Query failed: {}", e))),
314                }
315            }
316            "evaluate" => {
317                let js = param_str(&params, "javascript", "evaluate requires 'javascript'")?;
318                let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
319                match tab.evaluate(js).await {
320                    Ok(value) => {
321                        let output =
322                            serde_json::to_string_pretty(&value).unwrap_or_else(|_| value.to_string());
323                        Ok(AgentToolResult::success(output))
324                    }
325                    Err(e) => Ok(AgentToolResult::error(format!("JS evaluation failed: {}", e))),
326                }
327            }
328            "evaluate_await" => {
329                let js = param_str(&params, "javascript", "evaluate_await requires 'javascript'")?;
330                let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
331                match tab.evaluate_await(js).await {
332                    Ok(value) => {
333                        let output =
334                            serde_json::to_string_pretty(&value).unwrap_or_else(|_| value.to_string());
335                        Ok(AgentToolResult::success(output))
336                    }
337                    Err(e) => Ok(AgentToolResult::error(format!("JS evaluation failed: {}", e))),
338                }
339            }
340
341            // ── Waiting ────────────────────────────────────────────
342            "wait_for" => {
343                let selector = param_str(&params, "selector", "wait_for requires 'selector'")?;
344                let timeout_ms = params.get("timeout_ms").and_then(|v| v.as_u64()).unwrap_or(30_000);
345                let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
346                match tab.wait_for(selector, timeout_ms).await {
347                    Ok(()) => Ok(AgentToolResult::success(format!(
348                        "Element '{}' found within {}ms",
349                        selector, timeout_ms
350                    ))),
351                    Err(e) => Ok(AgentToolResult::error(format!("wait_for failed: {}", e))),
352                }
353            }
354
355            // ── Sub-resources ──────────────────────────────────────
356            "load_resources" => {
357                let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
358                match tab.load_resources().await {
359                    Ok(count) => {
360                        Ok(AgentToolResult::success(format!("Loaded {} resources", count)))
361                    }
362                    Err(e) => {
363                        Ok(AgentToolResult::error(format!("load_resources failed: {}", e)))
364                    }
365                }
366            }
367
368            // ── Screenshot ─────────────────────────────────────────
369            "screenshot" => {
370                let width = params.get("width").and_then(|v| v.as_u64()).unwrap_or(1280) as u32;
371                let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
372                match tab.screenshot(width).await {
373                    Ok(png) => Ok(AgentToolResult::success(format!(
374                        "Screenshot: {} bytes (PNG, {}px wide)",
375                        png.len(),
376                        width
377                    ))),
378                    Err(e) => Ok(AgentToolResult::error(format!("Screenshot failed: {}", e))),
379                }
380            }
381
382            // ── Script execution ────────────────────────────────────
383            "run_script" => {
384                let yaml =
385                    param_str(&params, "script", "run_script requires 'script' (YAML string)")?;
386                let tab = self.get_or_create_tab().await.map_err(|e| e.to_string())?;
387                let mut runner = oxibrowser_core::script::ScriptRunner::new(&tab);
388                match runner.run(yaml).await {
389                    Ok(result) => {
390                        let output = serde_json::to_string_pretty(&result)
391                            .unwrap_or_else(|e| format!("{{\"error\": \"{}\"}}", e));
392                        Ok(AgentToolResult::success(output))
393                    }
394                    Err(e) => Ok(AgentToolResult::error(format!(
395                        "Script failed: {}",
396                        e
397                    ))),
398                }
399            }
400
401            // ── Lifecycle ──────────────────────────────────────────
402            "close" => {
403                let mut guard = self.tab.lock().await;
404                if let Some(t) = guard.take() {
405                    let _ = t.close().await;
406                }
407                Ok(AgentToolResult::success("Tab closed"))
408            }
409
410            other => Err(format!(
411                "Unknown browser action '{}'. Valid: browse, goto, back, forward, reload, post, click, type, press_key, evaluate, evaluate_await, content, query_all, wait_for, load_resources, screenshot, run_script, close",
412                other
413            )),
414        }
415    }
416}
417
418// ---------------------------------------------------------------------------
419// Helpers
420// ---------------------------------------------------------------------------
421
422/// Format a `BrowseResult` for agent consumption.
423fn format_browse(r: &oxibrowser_core::BrowseResult) -> String {
424    let md = &r.markdown;
425    if md.len() > 50_000 {
426        let cut = md.floor_char_boundary(50_000);
427        format!(
428            "URL: {} (status {})\nTitle: {}\n\n{}\n\n... (truncated, {} total chars)",
429            r.url,
430            r.status,
431            r.title,
432            &md[..cut],
433            md.len()
434        )
435    } else if md.is_empty() {
436        format!(
437            "URL: {} (status {})\nTitle: {}\n(no content)",
438            r.url, r.status, r.title
439        )
440    } else {
441        format!(
442            "URL: {} (status {})\nTitle: {}\n\n{}",
443            r.url, r.status, r.title, md
444        )
445    }
446}
447
448/// Extract a required string parameter (borrowed).
449fn param_str<'a>(params: &'a Value, key: &str, error_msg: &str) -> Result<&'a str, String> {
450    params
451        .get(key)
452        .and_then(|v| v.as_str())
453        .ok_or_else(|| error_msg.to_string())
454}
455
456#[cfg(test)]
457mod tests {
458    #[test]
459    fn test_schema_covers_all_actions() {
460        let actions = vec![
461            "browse", "goto", "back", "forward", "reload", "post",
462            "click", "type", "press_key", "evaluate", "evaluate_await",
463            "content", "query_all", "wait_for", "load_resources",
464            "screenshot", "run_script", "close",
465        ];
466        assert!(actions.len() >= 16);
467        assert!(actions.contains(&"browse"));
468        assert!(actions.contains(&"goto"));
469        assert!(actions.contains(&"run_script"));
470    }
471}