Skip to main content

oxi_agent/tools/browse/
browse_tool.rs

1//! Browse tool — render a web page and return its content.
2//!
3//! Opens exactly **one** tab per request and extracts all content from it.
4//! Never calls engine-level methods that would open additional tabs.
5
6use super::config::BrowseConfig;
7use super::engine::BrowserEngine;
8use super::helpers;
9use super::tab_guard::TabGuard;
10use crate::tools::{AgentTool, AgentToolResult, ToolContext, ToolError, ToolExecutionMode};
11use async_trait::async_trait;
12use parking_lot::Mutex;
13use serde_json::{json, Value};
14use std::sync::Arc;
15use tokio::sync::oneshot;
16
17/// Render a web page using the built-in headless browser.
18///
19/// Returns page content as markdown, html, text, or a list of links.
20pub struct BrowseTool {
21    engine: Arc<dyn BrowserEngine>,
22    config: BrowseConfig,
23    /// Callback stored by `on_progress`, consumed in `execute` when the tab
24    /// is opened.
25    pending_callback: Mutex<Option<crate::tools::ProgressCallback>>,
26    /// Shared slot for the current tab's ID. The agent loop creates the slot
27    /// and passes it via `set_tab_id_slot`; BrowseTool writes `Some(tab_id)`
28    /// when it opens a tab and `None` on close.
29    tab_id_slot: Mutex<Arc<parking_lot::Mutex<Option<uuid::Uuid>>>>,
30}
31
32impl BrowseTool {
33    /// Create with the given engine and default config.
34    pub fn new(engine: Arc<dyn BrowserEngine>) -> Self {
35        Self {
36            engine,
37            config: BrowseConfig::default(),
38            pending_callback: Mutex::new(None),
39            tab_id_slot: Mutex::new(Arc::new(parking_lot::Mutex::new(None))),
40        }
41    }
42
43    /// Create with custom configuration.
44    pub fn with_config(engine: Arc<dyn BrowserEngine>, config: BrowseConfig) -> Self {
45        Self {
46            engine,
47            config,
48            pending_callback: Mutex::new(None),
49            tab_id_slot: Mutex::new(Arc::new(parking_lot::Mutex::new(None))),
50        }
51    }
52}
53
54#[async_trait]
55impl AgentTool for BrowseTool {
56    fn name(&self) -> &str {
57        "browse"
58    }
59
60    fn label(&self) -> &str {
61        "Browse"
62    }
63
64    fn description(&self) -> &str {
65        "Browse a web page with a built-in headless browser. Renders JavaScript-powered \
66         pages and returns content as markdown (default), html, or links. Use when \
67         web_search results are insufficient and you need to read the actual page content. \
68         Supports waiting for dynamic content via CSS selectors."
69    }
70
71    fn parameters_schema(&self) -> Value {
72        json!({
73            "type": "object",
74            "properties": {
75                "url": {
76                    "type": "string",
77                    "description": "URL to browse"
78                },
79                "format": {
80                    "type": "string",
81                    "enum": ["markdown", "html", "text", "links"],
82                    "default": "markdown",
83                    "description": "Output format: markdown (default), html, plain text, or list of links"
84                },
85                "selector": {
86                    "type": "string",
87                    "description": "CSS selector to extract only matching elements"
88                },
89                "wait_for": {
90                    "type": "string",
91                    "description": "CSS selector to wait for before extracting (for JS-rendered content)"
92                },
93                "screenshot": {
94                    "type": "boolean",
95                    "default": false,
96                    "description": "Include a PNG screenshot as an image block"
97                }
98            },
99            "required": ["url"]
100        })
101    }
102
103    fn on_progress(&self, callback: crate::tools::ProgressCallback) {
104        // The agent loop calls this *before* `execute`. We store the
105        // callback and register it on the actual tab once it's opened
106        // inside `execute`. This bridges the gap: `tab_id` is not known
107        // until the tab is created.
108        *self.pending_callback.lock() = Some(callback);
109    }
110
111    /// Sequential execution preserved for stability.
112    ///
113    /// Per-tab routing via `TabCallbackRegistry` now correctly routes
114    /// progress events by `tab_id`, making parallel execution safe.
115    /// However, `SequentialOnly` is kept for now unless a concrete
116    /// multi-tab use case requires parallel browse calls.
117    fn execution_mode(&self) -> ToolExecutionMode {
118        ToolExecutionMode::SequentialOnly
119    }
120
121    fn current_tab_id(&self) -> Option<uuid::Uuid> {
122        *self.tab_id_slot.lock().lock()
123    }
124
125    fn set_tab_id_slot(&self, slot: Arc<parking_lot::Mutex<Option<uuid::Uuid>>>) {
126        *self.tab_id_slot.lock() = slot;
127    }
128
129    async fn execute(
130        &self,
131        _tool_call_id: &str,
132        params: Value,
133        _signal: Option<oneshot::Receiver<()>>,
134        _ctx: &ToolContext,
135    ) -> Result<AgentToolResult, ToolError> {
136        let url = params["url"]
137            .as_str()
138            .ok_or_else(|| "Missing required parameter: url".to_string())?;
139
140        let format = params["format"].as_str().unwrap_or("markdown");
141        let selector = params["selector"].as_str();
142        let wait_for = params["wait_for"].as_str();
143        let want_screenshot = params["screenshot"].as_bool().unwrap_or(false);
144
145        tracing::info!(url = %url, format = %format, "browsing page");
146
147        // Open exactly one tab for this request
148        let raw_tab = self
149            .engine
150            .new_tab()
151            .await
152            .map_err(|e| format!("Failed to open browser tab: {}", e))?;
153
154        // Store the tab_id so the agent loop's progress callback can
155        // include it in `ToolExecutionUpdate` events.
156        let tab_id = raw_tab.tab_id();
157        *self.tab_id_slot.lock().lock() = Some(tab_id);
158
159        // Register the pending progress callback on this tab (keyed by tab_id).
160        if let Some(cb) = self.pending_callback.lock().take() {
161            #[cfg(feature = "native-browser")]
162            {
163                use super::oxibrowser_backend::OxiTab;
164                if let Some(oxi_tab) = raw_tab.as_any().downcast_ref::<OxiTab>() {
165                    oxi_tab.set_progress_callback(cb);
166                }
167            }
168            #[cfg(not(feature = "native-browser"))]
169            {
170                let _ = cb; // no-op without native browser
171            }
172        }
173
174        let guard = TabGuard::new(raw_tab);
175        let tab = guard.tab();
176
177        // Navigate
178        let page = tab
179            .goto(url)
180            .await
181            .map_err(|e| format!("Navigation failed: {}", e))?;
182
183        // Wait for dynamic content if requested
184        if let Some(sel) = wait_for {
185            tab.wait_for(sel, self.config.default_wait_timeout_ms)
186                .await
187                .map_err(|e| format!("wait_for '{}' failed: {}", sel, e))?;
188        }
189
190        // Build output — all from the same tab
191        let output = match format {
192            "html" => {
193                if let Some(sel) = selector {
194                    tab.query_all(sel)
195                        .await
196                        .map_err(|e| e.to_string())?
197                        .join("\n\n")
198                } else {
199                    page.html.clone()
200                }
201            }
202            "links" => {
203                let links = helpers::extract_links(tab).await?;
204                helpers::format_links(&links)
205            }
206            "text" => {
207                if let Some(sel) = selector {
208                    tab.query_all(sel)
209                        .await
210                        .map_err(|e| e.to_string())?
211                        .join("\n")
212                } else {
213                    page.markdown.clone()
214                }
215            }
216            _ => {
217                // "markdown" (default)
218                if let Some(sel) = selector {
219                    tab.query_all(sel)
220                        .await
221                        .map_err(|e| e.to_string())?
222                        .join("\n\n")
223                } else {
224                    page.markdown.clone()
225                }
226            }
227        };
228
229        let title = page.title.clone();
230        let final_url = page.url.clone();
231        let status = page.status;
232
233        // Screenshot from the same tab (no re-render)
234        let screenshot_blocks = if want_screenshot {
235            match tab.screenshot(self.config.screenshot_width).await {
236                Ok(png) => {
237                    let b64 =
238                        base64::Engine::encode(&base64::engine::general_purpose::STANDARD, &png);
239                    let img =
240                        oxi_ai::ContentBlock::Image(oxi_ai::ImageContent::new(b64, "image/png"));
241                    Some(vec![img])
242                }
243                Err(e) => {
244                    tracing::warn!("screenshot failed for {}: {}", final_url, e);
245                    None
246                }
247            }
248        } else {
249            None
250        };
251
252        // Explicitly close the tab and clear the tab_id slot
253        guard.close().await;
254        *self.tab_id_slot.lock().lock() = None;
255
256        let mut result = AgentToolResult::success(output).with_metadata(json!({
257            "url": final_url,
258            "title": title,
259            "status": status,
260        }));
261
262        if let Some(blocks) = screenshot_blocks {
263            result = result.with_content_blocks(blocks);
264        }
265
266        Ok(result)
267    }
268}
269
270#[cfg(test)]
271mod tests {
272    use super::*;
273    use crate::tools::browse::engine::{BrowserError, BrowserTab};
274    use async_trait::async_trait;
275
276    /// Minimal `BrowserEngine` stub. We never call `new_tab` in the test,
277    /// so the trait methods are allowed to return `Err` — the goal is just
278    /// to be able to construct a `BrowseTool` and read `execution_mode()`.
279    struct MockEngine;
280
281    #[async_trait]
282    impl BrowserEngine for MockEngine {
283        async fn new_tab(&self) -> Result<Box<dyn BrowserTab>, BrowserError> {
284            Err(BrowserError::Backend("MockEngine: no real browser".into()))
285        }
286
287        async fn close(&self) -> Result<(), BrowserError> {
288            Ok(())
289        }
290
291        async fn is_alive(&self) -> bool {
292            false
293        }
294    }
295
296    #[test]
297    fn browse_tool_is_sequential_only() {
298        let tool = BrowseTool::new(std::sync::Arc::new(MockEngine));
299        assert!(matches!(
300            tool.execution_mode(),
301            crate::tools::ToolExecutionMode::SequentialOnly
302        ));
303    }
304
305    #[test]
306    fn browse_tool_tab_id_slot_receives_id_from_agent_loop() {
307        // Simulate the agent loop's flow: set_tab_id_slot → write tab_id →
308        // read current_tab_id → clear.
309        let tool = BrowseTool::new(std::sync::Arc::new(MockEngine));
310
311        // Initially no tab_id
312        assert!(tool.current_tab_id().is_none());
313
314        // Agent loop creates a slot and passes it
315        let slot: Arc<parking_lot::Mutex<Option<uuid::Uuid>>> =
316            Arc::new(parking_lot::Mutex::new(None));
317        tool.set_tab_id_slot(Arc::clone(&slot));
318
319        // Simulate BrowseTool::execute opening a tab
320        let tab_id = uuid::Uuid::new_v4();
321        *slot.lock() = Some(tab_id);
322
323        // Agent loop's progress callback reads the slot
324        assert_eq!(tool.current_tab_id(), Some(tab_id));
325
326        // BrowseTool::execute closes the tab
327        *slot.lock() = None;
328        assert!(tool.current_tab_id().is_none());
329    }
330
331    #[test]
332    fn browse_tool_on_progress_stores_pending_callback() {
333        let tool = BrowseTool::new(std::sync::Arc::new(MockEngine));
334
335        let called = Arc::new(std::sync::atomic::AtomicBool::new(false));
336        let called_clone = Arc::clone(&called);
337        tool.on_progress(oxi_ai::progress_callback(move |_: String| {
338            called_clone.store(true, std::sync::atomic::Ordering::SeqCst);
339        }));
340
341        // The pending callback should be stored (not yet registered on any tab)
342        let pending = tool.pending_callback.lock();
343        assert!(
344            pending.is_some(),
345            "pending_callback should be set after on_progress"
346        );
347    }
348}