Skip to main content

oxi_agent/tools/browse/
browse_tool.rs

1//! Browse tool — render a web page and return its content.
2//!
3//! Opens exactly **one** tab per request and extracts all content from it.
4//! Never calls engine-level methods that would open additional tabs.
5
6use super::config::BrowseConfig;
7use super::engine::BrowserEngine;
8use super::helpers;
9use super::tab_guard::TabGuard;
10use crate::tools::{AgentTool, AgentToolResult, ToolContext, ToolError, ToolExecutionMode};
11use async_trait::async_trait;
12use parking_lot::Mutex;
13use serde_json::{json, Value};
14use std::sync::Arc;
15use tokio::sync::oneshot;
16
17/// Render a web page using the built-in headless browser.
18///
19/// Returns page content as markdown, html, text, or a list of links.
20pub struct BrowseTool {
21    engine: Arc<dyn BrowserEngine>,
22    config: BrowseConfig,
23    /// Shared callback management (progress + browse progress).
24    callbacks: super::callback_mixin::BrowseCallbacks,
25    /// Shared slot for the current tab's ID. The agent loop creates the slot
26    /// and passes it via `set_tab_id_slot`; BrowseTool writes `Some(tab_id)`
27    /// when it opens a tab and `None` on close.
28    tab_id_slot: Mutex<Arc<parking_lot::Mutex<Option<uuid::Uuid>>>>,
29}
30
31impl BrowseTool {
32    /// Create with the given engine and default config.
33    pub fn new(engine: Arc<dyn BrowserEngine>) -> Self {
34        Self {
35            engine,
36            config: BrowseConfig::default(),
37            callbacks: super::callback_mixin::BrowseCallbacks::new(),
38            tab_id_slot: Mutex::new(Arc::new(parking_lot::Mutex::new(None))),
39        }
40    }
41
42    /// Create with custom configuration.
43    pub fn with_config(engine: Arc<dyn BrowserEngine>, config: BrowseConfig) -> Self {
44        Self {
45            engine,
46            config,
47            callbacks: super::callback_mixin::BrowseCallbacks::new(),
48            tab_id_slot: Mutex::new(Arc::new(parking_lot::Mutex::new(None))),
49        }
50    }
51}
52
53#[async_trait]
54impl AgentTool for BrowseTool {
55    fn name(&self) -> &str {
56        "browse"
57    }
58
59    fn label(&self) -> &str {
60        "Browse"
61    }
62
63    fn description(&self) -> &str {
64        "Browse a web page with a built-in headless browser. Renders JavaScript-powered \
65         pages and returns content as markdown (default), html, or links. Use when \
66         web_search results are insufficient and you need to read the actual page content. \
67         Supports waiting for dynamic content via CSS selectors."
68    }
69
70    fn parameters_schema(&self) -> Value {
71        json!({
72            "type": "object",
73            "properties": {
74                "url": {
75                    "type": "string",
76                    "description": "URL to browse"
77                },
78                "format": {
79                    "type": "string",
80                    "enum": ["markdown", "html", "text", "links"],
81                    "default": "markdown",
82                    "description": "Output format: markdown (default), html, plain text, or list of links"
83                },
84                "selector": {
85                    "type": "string",
86                    "description": "CSS selector to extract only matching elements"
87                },
88                "wait_for": {
89                    "type": "string",
90                    "description": "CSS selector to wait for before extracting (for JS-rendered content)"
91                },
92                "screenshot": {
93                    "type": "boolean",
94                    "default": false,
95                    "description": "Include a PNG screenshot as an image block"
96                }
97            },
98            "required": ["url"]
99        })
100    }
101
102    fn on_progress(&self, callback: crate::tools::ProgressCallback) {
103        self.callbacks.store_progress(callback);
104    }
105
106    fn on_browse_progress(
107        &self,
108        callback: Arc<dyn Fn(super::BrowseProgress) + Send + Sync>,
109    ) {
110        self.callbacks.store_browse(callback);
111    }
112
113    /// Sequential execution preserved for stability.
114    ///
115    /// Per-tab routing via `TabCallbackRegistry` now correctly routes
116    /// progress events by `tab_id`, making parallel execution safe.
117    /// However, `SequentialOnly` is kept for now unless a concrete
118    /// multi-tab use case requires parallel browse calls.
119    fn execution_mode(&self) -> ToolExecutionMode {
120        ToolExecutionMode::SequentialOnly
121    }
122
123    fn current_tab_id(&self) -> Option<uuid::Uuid> {
124        *self.tab_id_slot.lock().lock()
125    }
126
127    fn set_tab_id_slot(&self, slot: Arc<parking_lot::Mutex<Option<uuid::Uuid>>>) {
128        *self.tab_id_slot.lock() = slot;
129    }
130
131    async fn execute(
132        &self,
133        _tool_call_id: &str,
134        params: Value,
135        _signal: Option<oneshot::Receiver<()>>,
136        _ctx: &ToolContext,
137    ) -> Result<AgentToolResult, ToolError> {
138        let url = params["url"]
139            .as_str()
140            .ok_or_else(|| "Missing required parameter: url".to_string())?;
141
142        let format = params["format"].as_str().unwrap_or("markdown");
143        let selector = params["selector"].as_str();
144        let wait_for = params["wait_for"].as_str();
145        let want_screenshot = params["screenshot"].as_bool().unwrap_or(false);
146
147        tracing::info!(url = %url, format = %format, "browsing page");
148
149        // Open exactly one tab for this request
150        let raw_tab = self
151            .engine
152            .new_tab()
153            .await
154            .map_err(|e| format!("Failed to open browser tab: {}", e))?;
155
156        // Store the tab_id so the agent loop's progress callback can
157        // include it in `ToolExecutionUpdate` events.
158        let tab_id = raw_tab.tab_id();
159        *self.tab_id_slot.lock().lock() = Some(tab_id);
160
161        // Register the pending callbacks on this tab.
162        self.callbacks.register_on_tab(raw_tab.as_ref());
163
164        let guard = TabGuard::new(raw_tab);
165        let tab = guard.tab();
166
167        // Navigate
168        let page = tab
169            .goto(url)
170            .await
171            .map_err(|e| format!("Navigation failed: {}", e))?;
172
173        // Wait for dynamic content if requested
174        if let Some(sel) = wait_for {
175            tab.wait_for(sel, self.config.default_wait_timeout_ms)
176                .await
177                .map_err(|e| format!("wait_for '{}' failed: {}", sel, e))?;
178        }
179
180        // Build output — all from the same tab
181        let output = match format {
182            "html" => {
183                if let Some(sel) = selector {
184                    tab.query_all(sel)
185                        .await
186                        .map_err(|e| e.to_string())?
187                        .join("\n\n")
188                } else {
189                    page.html.clone()
190                }
191            }
192            "links" => {
193                let links = helpers::extract_links(tab).await?;
194                helpers::format_links(&links)
195            }
196            "text" => {
197                if let Some(sel) = selector {
198                    tab.query_all(sel)
199                        .await
200                        .map_err(|e| e.to_string())?
201                        .join("\n")
202                } else {
203                    page.markdown.clone()
204                }
205            }
206            _ => {
207                // "markdown" (default)
208                if let Some(sel) = selector {
209                    tab.query_all(sel)
210                        .await
211                        .map_err(|e| e.to_string())?
212                        .join("\n\n")
213                } else {
214                    page.markdown.clone()
215                }
216            }
217        };
218
219        let title = page.title.clone();
220        let final_url = page.url.clone();
221        let status = page.status;
222
223        // Screenshot from the same tab (no re-render)
224        let screenshot_blocks = if want_screenshot {
225            match tab.screenshot(self.config.screenshot_width).await {
226                Ok(png) => {
227                    let b64 =
228                        base64::Engine::encode(&base64::engine::general_purpose::STANDARD, &png);
229                    let img =
230                        oxi_ai::ContentBlock::Image(oxi_ai::ImageContent::new(b64, "image/png"));
231                    Some(vec![img])
232                }
233                Err(e) => {
234                    tracing::warn!("screenshot failed for {}: {}", final_url, e);
235                    None
236                }
237            }
238        } else {
239            None
240        };
241
242        // Explicitly close the tab and clear the tab_id slot
243        guard.close().await;
244        *self.tab_id_slot.lock().lock() = None;
245
246        let mut result = AgentToolResult::success(output).with_metadata(json!({
247            "url": final_url,
248            "title": title,
249            "status": status,
250        }));
251
252        if let Some(blocks) = screenshot_blocks {
253            result = result.with_content_blocks(blocks);
254        }
255
256        Ok(result)
257    }
258}
259
260#[cfg(test)]
261mod tests {
262    use super::*;
263    use crate::tools::browse::engine::{BrowserError, BrowserTab};
264    use async_trait::async_trait;
265
266    /// Minimal `BrowserEngine` stub. We never call `new_tab` in the test,
267    /// so the trait methods are allowed to return `Err` — the goal is just
268    /// to be able to construct a `BrowseTool` and read `execution_mode()`.
269    struct MockEngine;
270
271    #[async_trait]
272    impl BrowserEngine for MockEngine {
273        async fn new_tab(&self) -> Result<Box<dyn BrowserTab>, BrowserError> {
274            Err(BrowserError::Backend("MockEngine: no real browser".into()))
275        }
276
277        async fn close(&self) -> Result<(), BrowserError> {
278            Ok(())
279        }
280
281        async fn is_alive(&self) -> bool {
282            false
283        }
284    }
285
286    #[test]
287    fn browse_tool_is_sequential_only() {
288        let tool = BrowseTool::new(std::sync::Arc::new(MockEngine));
289        assert!(matches!(
290            tool.execution_mode(),
291            crate::tools::ToolExecutionMode::SequentialOnly
292        ));
293    }
294
295    #[test]
296    fn browse_tool_tab_id_slot_receives_id_from_agent_loop() {
297        // Simulate the agent loop's flow: set_tab_id_slot → write tab_id →
298        // read current_tab_id → clear.
299        let tool = BrowseTool::new(std::sync::Arc::new(MockEngine));
300
301        // Initially no tab_id
302        assert!(tool.current_tab_id().is_none());
303
304        // Agent loop creates a slot and passes it
305        let slot: Arc<parking_lot::Mutex<Option<uuid::Uuid>>> =
306            Arc::new(parking_lot::Mutex::new(None));
307        tool.set_tab_id_slot(Arc::clone(&slot));
308
309        // Simulate BrowseTool::execute opening a tab
310        let tab_id = uuid::Uuid::new_v4();
311        *slot.lock() = Some(tab_id);
312
313        // Agent loop's progress callback reads the slot
314        assert_eq!(tool.current_tab_id(), Some(tab_id));
315
316        // BrowseTool::execute closes the tab
317        *slot.lock() = None;
318        assert!(tool.current_tab_id().is_none());
319    }
320
321}