Skip to main content

oxi_agent/tools/browse/
browse_tool.rs

1//! Browse tool — render a web page and return its content.
2//!
3//! Opens exactly **one** tab per request and extracts all content from it.
4//! Never calls engine-level methods that would open additional tabs.
5
6use super::config::BrowseConfig;
7use super::engine::BrowserEngine;
8use super::helpers;
9use super::tab_guard::TabGuard;
10use crate::tools::{AgentTool, AgentToolResult, ToolContext, ToolError, ToolExecutionMode};
11use async_trait::async_trait;
12use serde_json::{json, Value};
13use std::sync::Arc;
14use tokio::sync::oneshot;
15
16/// Render a web page using the built-in headless browser.
17///
18/// Returns page content as markdown, html, text, or a list of links.
19pub struct BrowseTool {
20    engine: Arc<dyn BrowserEngine>,
21    config: BrowseConfig,
22}
23
24impl BrowseTool {
25    /// Create with the given engine and default config.
26    pub fn new(engine: Arc<dyn BrowserEngine>) -> Self {
27        Self {
28            engine,
29            config: BrowseConfig::default(),
30        }
31    }
32
33    /// Create with custom configuration.
34    pub fn with_config(engine: Arc<dyn BrowserEngine>, config: BrowseConfig) -> Self {
35        Self { engine, config }
36    }
37}
38
39#[async_trait]
40impl AgentTool for BrowseTool {
41    fn name(&self) -> &str {
42        "browse"
43    }
44
45    fn label(&self) -> &str {
46        "Browse"
47    }
48
49    fn description(&self) -> &str {
50        "Browse a web page with a built-in headless browser. Renders JavaScript-powered \
51         pages and returns content as markdown (default), html, or links. Use when \
52         web_search results are insufficient and you need to read the actual page content. \
53         Supports waiting for dynamic content via CSS selectors."
54    }
55
56    fn parameters_schema(&self) -> Value {
57        json!({
58            "type": "object",
59            "properties": {
60                "url": {
61                    "type": "string",
62                    "description": "URL to browse"
63                },
64                "format": {
65                    "type": "string",
66                    "enum": ["markdown", "html", "text", "links"],
67                    "default": "markdown",
68                    "description": "Output format: markdown (default), html, plain text, or list of links"
69                },
70                "selector": {
71                    "type": "string",
72                    "description": "CSS selector to extract only matching elements"
73                },
74                "wait_for": {
75                    "type": "string",
76                    "description": "CSS selector to wait for before extracting (for JS-rendered content)"
77                },
78                "screenshot": {
79                    "type": "boolean",
80                    "default": false,
81                    "description": "Include a PNG screenshot as an image block"
82                }
83            },
84            "required": ["url"]
85        })
86    }
87
88    fn on_progress(&self, callback: crate::tools::ProgressCallback) {
89        // The agent loop calls this *before* `execute`. The engine's
90        // background task (spawned by `OxiBrowserEngine::with_config`) will
91        // invoke `callback` with each browser event's `short_label()` for
92        // the duration of this tool call. The next tool call's `on_progress`
93        // will replace this one — there is no fan-out.
94        self.engine.progress_forwarder().set(callback);
95    }
96
97    /// Run sequentially — never in parallel with other tool calls.
98    ///
99    /// The `OxiBrowserEngine`'s `ProgressForwarder` is single-tenant: a
100    /// single `Mutex<Option<ProgressCallback>>` shared by all callers. If
101    /// two `BrowseTool::execute` calls overlapped, the second's
102    /// `on_progress` would overwrite the first's callback in the forwarder,
103    /// and progress events would be delivered to the wrong `tool_call_id`
104    /// (events for tool A would surface on tool B's UI). Sequential mode
105    /// is the simplest, safest fix: the agent loop serializes BrowseTool
106    /// calls so only one is in flight at a time.
107    ///
108    /// Future work: a per-`tool_call_id` forwarder (or per-tab routing via
109    /// a `tab_id` field on `oxibrowser_core::BrowserEvent`) is the proper
110    /// long-term fix and would let BrowseTool run in parallel again.
111    fn execution_mode(&self) -> ToolExecutionMode {
112        ToolExecutionMode::SequentialOnly
113    }
114
115    async fn execute(
116        &self,
117        _tool_call_id: &str,
118        params: Value,
119        _signal: Option<oneshot::Receiver<()>>,
120        _ctx: &ToolContext,
121    ) -> Result<AgentToolResult, ToolError> {
122        let url = params["url"]
123            .as_str()
124            .ok_or_else(|| "Missing required parameter: url".to_string())?;
125
126        let format = params["format"].as_str().unwrap_or("markdown");
127        let selector = params["selector"].as_str();
128        let wait_for = params["wait_for"].as_str();
129        let want_screenshot = params["screenshot"].as_bool().unwrap_or(false);
130
131        tracing::info!(url = %url, format = %format, "browsing page");
132
133        // Open exactly one tab for this request
134        let raw_tab = self
135            .engine
136            .new_tab()
137            .await
138            .map_err(|e| format!("Failed to open browser tab: {}", e))?;
139        let guard = TabGuard::new(raw_tab);
140        let tab = guard.tab();
141
142        // Navigate
143        let page = tab
144            .goto(url)
145            .await
146            .map_err(|e| format!("Navigation failed: {}", e))?;
147
148        // Wait for dynamic content if requested
149        if let Some(sel) = wait_for {
150            tab.wait_for(sel, self.config.default_wait_timeout_ms)
151                .await
152                .map_err(|e| format!("wait_for '{}' failed: {}", sel, e))?;
153        }
154
155        // Build output — all from the same tab
156        let output = match format {
157            "html" => {
158                if let Some(sel) = selector {
159                    tab.query_all(sel)
160                        .await
161                        .map_err(|e| e.to_string())?
162                        .join("\n\n")
163                } else {
164                    page.html.clone()
165                }
166            }
167            "links" => {
168                let links = helpers::extract_links(tab).await?;
169                helpers::format_links(&links)
170            }
171            "text" => {
172                if let Some(sel) = selector {
173                    tab.query_all(sel)
174                        .await
175                        .map_err(|e| e.to_string())?
176                        .join("\n")
177                } else {
178                    page.markdown.clone()
179                }
180            }
181            _ => {
182                // "markdown" (default)
183                if let Some(sel) = selector {
184                    tab.query_all(sel)
185                        .await
186                        .map_err(|e| e.to_string())?
187                        .join("\n\n")
188                } else {
189                    page.markdown.clone()
190                }
191            }
192        };
193
194        let title = page.title.clone();
195        let final_url = page.url.clone();
196        let status = page.status;
197
198        // Screenshot from the same tab (no re-render)
199        let screenshot_blocks = if want_screenshot {
200            match tab.screenshot(self.config.screenshot_width).await {
201                Ok(png) => {
202                    let b64 =
203                        base64::Engine::encode(&base64::engine::general_purpose::STANDARD, &png);
204                    let img =
205                        oxi_ai::ContentBlock::Image(oxi_ai::ImageContent::new(b64, "image/png"));
206                    Some(vec![img])
207                }
208                Err(e) => {
209                    tracing::warn!("screenshot failed for {}: {}", final_url, e);
210                    None
211                }
212            }
213        } else {
214            None
215        };
216
217        // Explicitly close the tab
218        guard.close().await;
219
220        let mut result = AgentToolResult::success(output).with_metadata(json!({
221            "url": final_url,
222            "title": title,
223            "status": status,
224        }));
225
226        if let Some(blocks) = screenshot_blocks {
227            result = result.with_content_blocks(blocks);
228        }
229
230        Ok(result)
231    }
232}
233
234#[cfg(test)]
235mod tests {
236    use super::*;
237    use crate::tools::browse::engine::{BrowserError, BrowserTab};
238    use async_trait::async_trait;
239
240    /// Minimal `BrowserEngine` stub. We never call `new_tab` in the test,
241    /// so the trait methods are allowed to return `Err` — the goal is just
242    /// to be able to construct a `BrowseTool` and read `execution_mode()`.
243    struct MockEngine;
244
245    #[async_trait]
246    impl BrowserEngine for MockEngine {
247        async fn new_tab(&self) -> Result<Box<dyn BrowserTab>, BrowserError> {
248            Err(BrowserError::Backend("MockEngine: no real browser".into()))
249        }
250
251        async fn close(&self) -> Result<(), BrowserError> {
252            Ok(())
253        }
254
255        async fn is_alive(&self) -> bool {
256            false
257        }
258    }
259
260    #[test]
261    fn browse_tool_is_sequential_only() {
262        // The BrowseTool must run sequentially because the OxiBrowserEngine's
263        // progress forwarder is single-tenant. If two BrowseTool executions
264        // ran in parallel, the second's on_progress would overwrite the first's
265        // callback and progress events would be routed to the wrong tool_call_id.
266        let tool = BrowseTool::new(std::sync::Arc::new(MockEngine));
267        assert!(matches!(
268            tool.execution_mode(),
269            crate::tools::ToolExecutionMode::SequentialOnly
270        ));
271    }
272}