Skip to main content

oxi_agent/tools/browse/
engine.rs

1//! Browser engine abstraction layer.
2
3#![allow(missing_docs)]
4//!
5//! Defines the core traits (`BrowserEngine`, `BrowserTab`) and shared
6//! types that all browser tools depend on. These traits are always compiled
7//! (no feature gates) so tools can use them regardless of the backend.
8//!
9//! Actual backend implementations (e.g. oxibrowser-core) are behind
10//! `#[cfg(feature = "native-browser")]` in `oxibrowser_backend.rs`.
11
12use async_trait::async_trait;
13use parking_lot::Mutex;
14use serde::{Deserialize, Serialize};
15use serde_json::Value;
16use std::collections::HashMap;
17use std::sync::Arc;
18
19/// Errors that can occur during browser operations.
20#[derive(Debug, thiserror::Error)]
21pub enum BrowserError {
22    #[error("navigation failed: {0}")]
23    Navigation(String),
24    #[error("element not found: {0}")]
25    ElementNotFound(String),
26    #[error("timeout: {0}")]
27    Timeout(String),
28    #[error("evaluation error: {0}")]
29    Evaluation(String),
30    #[error("screenshot failed: {0}")]
31    Screenshot(String),
32    #[error("tab closed: {0}")]
33    TabClosed(String),
34    #[error("browser error: {0}")]
35    Backend(String),
36    #[error("no active session — call 'open' first")]
37    NoActiveSession,
38}
39
40impl From<BrowserError> for crate::tools::ToolError {
41    fn from(e: BrowserError) -> Self {
42        e.to_string()
43    }
44}
45
46/// Shared page content returned by `goto` and `content` methods.
47#[derive(Debug, Clone, Serialize, Deserialize)]
48pub struct PageContent {
49    /// Final URL after redirects.
50    pub url: String,
51    /// Page title.
52    pub title: String,
53    /// HTTP status code.
54    pub status: u16,
55    /// Rendered page content as markdown.
56    pub markdown: String,
57    /// Raw HTML body.
58    #[serde(default)]
59    pub html: String,
60}
61
62impl PageContent {
63    /// Create an empty page (for mock / fallback).
64    pub fn empty() -> Self {
65        Self {
66            url: String::new(),
67            title: String::new(),
68            status: 0,
69            markdown: String::new(),
70            html: String::new(),
71        }
72    }
73}
74
75/// A single link on a page.
76#[derive(Debug, Clone, Serialize, Deserialize)]
77pub struct LinkInfo {
78    #[allow(missing_docs)]
79    pub text: String,
80    #[allow(missing_docs)]
81    pub href: String,
82}
83
84/// A single element matched by a CSS selector.
85#[derive(Debug, Clone, Serialize, Deserialize)]
86pub struct ElementInfo {
87    #[allow(missing_docs)]
88    pub tag: String,
89    #[allow(missing_docs)]
90    pub text: String,
91    #[serde(default)]
92    #[allow(missing_docs)]
93    pub attributes: HashMap<String, String>,
94}
95
96// ── BrowserTab trait ──────────────────────────────────────────────────────────
97
98/// Operations available on a single browser tab.
99///
100/// Implementors handle their own async runtime; this trait only
101/// defines the interface contract.
102#[async_trait]
103pub trait BrowserTab: Send + Sync {
104    /// Navigate to `url` and return page content.
105    async fn goto(&self, url: &str) -> Result<PageContent, BrowserError>;
106
107    /// Click an element matching `selector`.
108    async fn click(&self, selector: &str) -> Result<(), BrowserError>;
109
110    /// Type text into an element matching `selector`.
111    async fn type_(&self, selector: &str, text: &str) -> Result<(), BrowserError>;
112
113    /// Fill (set value of) an element matching `selector`.
114    async fn fill(&self, selector: &str, value: &str) -> Result<(), BrowserError>;
115
116    /// Press a keyboard combo (e.g. `"Enter"`, `"Control+c"`).
117    async fn press(&self, combo: &str) -> Result<(), BrowserError>;
118
119    /// Wait for an element matching `selector` to appear.
120    async fn wait_for(&self, selector: &str, timeout_ms: u64) -> Result<(), BrowserError>;
121
122    /// Get the current page content (markdown + html).
123    async fn content(&self) -> Result<PageContent, BrowserError>;
124
125    /// Get text content of all elements matching `selector`.
126    async fn query_all(&self, selector: &str) -> Result<Vec<String>, BrowserError>;
127
128    /// Evaluate a JavaScript expression and return the JSON result.
129    async fn evaluate(&self, js: &str) -> Result<Value, BrowserError>;
130
131    /// Capture a screenshot and return PNG bytes.
132    async fn screenshot(&self, width: u32) -> Result<Vec<u8>, BrowserError>;
133
134    /// Close this tab.
135    async fn close(&self) -> Result<(), BrowserError>;
136
137    /// Navigate back in history. Returns the rendered page content.
138    async fn back(&self) -> Result<PageContent, BrowserError>;
139
140    /// Navigate forward in history. Returns the rendered page content.
141    async fn forward(&self) -> Result<PageContent, BrowserError>;
142
143    /// Reload the current page. Returns the rendered page content.
144    async fn reload(&self) -> Result<PageContent, BrowserError>;
145
146    /// Select an option in a `<select>` element.
147    async fn select_option(&self, selector: &str, value: &str) -> Result<(), BrowserError>;
148
149    /// Check a checkbox or radio input.
150    async fn check(&self, selector: &str) -> Result<(), BrowserError>;
151
152    /// Uncheck a checkbox or radio input.
153    async fn uncheck(&self, selector: &str) -> Result<(), BrowserError>;
154
155    // ── Advanced interaction ───────────────────────────────────
156
157    /// Clear the value of an input element.
158    async fn clear(&self, selector: &str) -> Result<(), BrowserError> {
159        self.fill(selector, "").await
160    }
161
162    /// Hover over an element.
163    async fn hover(&self, selector: &str) -> Result<(), BrowserError> {
164        let sel = serde_json::to_string(selector).unwrap_or_default();
165        let js = format!(
166            r#"(function() {{ var el = document.querySelector({sel}); if (!el) return null; el.dispatchEvent(new MouseEvent('mouseover', {{bubbles:true}})); return el.tagName; }})()"#
167        );
168        self.evaluate(&js).await.map(|_| ())
169    }
170
171    /// Double-click an element.
172    async fn double_click(&self, selector: &str) -> Result<(), BrowserError> {
173        let sel = serde_json::to_string(selector).unwrap_or_default();
174        let js = format!(
175            r#"(function() {{ var el = document.querySelector({sel}); if (!el) return null; el.dispatchEvent(new MouseEvent('dblclick', {{bubbles:true}})); return el.tagName; }})()"#
176        );
177        self.evaluate(&js).await.map(|_| ())
178    }
179
180    /// Right-click (context menu) an element.
181    async fn right_click(&self, selector: &str) -> Result<(), BrowserError> {
182        let sel = serde_json::to_string(selector).unwrap_or_default();
183        let js = format!(
184            r#"(function() {{ var el = document.querySelector({sel}); if (!el) return null; el.dispatchEvent(new MouseEvent('contextmenu', {{bubbles:true, button:2}})); return el.tagName; }})()"#
185        );
186        self.evaluate(&js).await.map(|_| ())
187    }
188
189    /// Scroll the page by delta pixels.
190    async fn scroll(&self, delta_x: f64, delta_y: f64) -> Result<(), BrowserError> {
191        let js = format!("window.scrollBy({}, {})", delta_x, delta_y);
192        self.evaluate(&js).await.map(|_| ())
193    }
194
195    /// Scroll an element into view.
196    async fn scroll_into_view(&self, selector: &str) -> Result<(), BrowserError> {
197        let sel = serde_json::to_string(selector).unwrap_or_default();
198        let js = format!(
199            r#"(function() {{ var el = document.querySelector({sel}); if (!el) return null; el.scrollIntoView(); return el.tagName; }})()"#
200        );
201        self.evaluate(&js).await.map(|_| ())
202    }
203
204    /// Drag from one element to another.
205    async fn drag(&self, from_selector: &str, to_selector: &str) -> Result<(), BrowserError> {
206        let from_sel = serde_json::to_string(from_selector).unwrap_or_default();
207        let to_sel = serde_json::to_string(to_selector).unwrap_or_default();
208        let js = format!(
209            r#"(function() {{ var src = document.querySelector({from_sel}); var dst = document.querySelector({to_sel}); if (!src || !dst) return null; src.dispatchEvent(new DragEvent('dragstart', {{bubbles:true}})); dst.dispatchEvent(new DragEvent('drop', {{bubbles:true}})); src.dispatchEvent(new DragEvent('dragend', {{bubbles:true}})); return 'ok'; }})()"#
210        );
211        self.evaluate(&js).await.map(|_| ())
212    }
213
214    /// Upload a file to a file input element.
215    async fn upload_file(&self, selector: &str, path: &str) -> Result<(), BrowserError> {
216        let sel = serde_json::to_string(selector).unwrap_or_default();
217        let p = serde_json::to_string(path).unwrap_or_default();
218        let js = format!(
219            r#"(function() {{ var el = document.querySelector({sel}); if (!el || el.type !== 'file') return null; if (typeof DataTransfer === 'undefined') return null; var dt = new DataTransfer(); var f = new File([], {p}.split('/').pop()); dt.items.add(f); el.files = dt.files; el.dispatchEvent(new Event('change', {{bubbles:true}})); return el.tagName; }})()"#
220        );
221        self.evaluate(&js).await.map(|_| ())
222    }
223
224    /// Get the value or text content of an element.
225    async fn get_value(&self, selector: &str) -> Result<String, BrowserError> {
226        let sel = serde_json::to_string(selector).unwrap_or_default();
227        let js = format!(
228            r#"(function() {{ var el = document.querySelector({sel}); if (!el) return null; return (el.value !== undefined ? el.value : el.textContent) || ''; }})()"#
229        );
230        let val = self.evaluate(&js).await?;
231        Ok(val.as_str().unwrap_or("").to_string())
232    }
233
234    /// Evaluate JS that may return a promise; awaits by default.
235    async fn evaluate_await(&self, js: &str) -> Result<Value, BrowserError> {
236        self.evaluate(js).await
237    }
238
239    /// Returns `true` if this tab has been closed.
240    fn is_closed(&self) -> bool {
241        false
242    }
243}
244
245// ── BrowserEngine trait ───────────────────────────────────────────────────────
246
247/// Factory for opening and managing browser tabs.
248///
249/// This trait is implemented by backends (e.g. oxibrowser-core) and
250/// consumed by the tool layer via `Arc<dyn BrowserEngine>`.
251#[async_trait]
252pub trait BrowserEngine: Send + Sync {
253    /// Fetch a URL and return page content (no tab management).
254    async fn fetch(&self, url: &str) -> Result<PageContent, BrowserError> {
255        let tab = self.new_tab().await?;
256        let content = tab.goto(url).await;
257        let _ = tab.close().await;
258        content
259    }
260
261    /// Open a new browser tab and return it.
262    async fn new_tab(&self) -> Result<Box<dyn BrowserTab>, BrowserError>;
263
264    /// Close all open tabs and shut down the browser instance.
265    async fn close(&self) -> Result<(), BrowserError>;
266
267    /// Returns `true` if the browser is still alive.
268    async fn is_alive(&self) -> bool;
269
270    /// Access the engine's progress forwarder.
271    ///
272    /// Tools (e.g. `BrowseTool`) call `engine.progress_forwarder().set(cb)`
273    /// from their `on_progress` implementation. Backends that stream
274    /// lifecycle events (oxibrowser-core) spawn a background task in their
275    /// constructor that drains the browser's event channel and invokes
276    /// the currently-installed callback. Backends without event streaming
277    /// return an empty forwarder — `set` becomes a no-op, no events fire.
278    ///
279    /// Default implementation returns a fresh empty forwarder.
280    fn progress_forwarder(&self) -> Arc<ProgressForwarder> {
281        Arc::new(ProgressForwarder::new())
282    }
283}
284
285// ── ProgressForwarder ─────────────────────────────────────────────────────
286
287/// Shared slot for a single progress callback.
288///
289/// The engine is single-tenant: only one tool call may be in flight at a
290/// time. `BrowseTool` enforces this via `ToolExecutionMode::SequentialOnly`.
291///
292/// Holds at most one `ProgressCallback` at a time. Tools set the callback
293/// before `execute`; a backend's background task reads it and invokes it
294/// with each event's `short_label()`. Replacing the callback (the next
295/// tool call) drops the old one — there is no fan-out.
296///
297/// The default forwarder (returned by the trait's default `progress_forwarder`)
298/// is a no-op: `invoke` silently drops messages. This is the right
299/// behaviour for backends that don't stream events.
300pub struct ProgressForwarder {
301    callback: Mutex<Option<crate::tools::ProgressCallback>>,
302}
303
304impl Default for ProgressForwarder {
305    fn default() -> Self {
306        Self::new()
307    }
308}
309
310impl ProgressForwarder {
311    /// Create an empty forwarder.
312    pub fn new() -> Self {
313        Self {
314            callback: Mutex::new(None),
315        }
316    }
317
318    /// Install (or replace) the callback. Called from `AgentTool::on_progress`.
319    pub fn set(&self, cb: crate::tools::ProgressCallback) {
320        *self.callback.lock() = Some(cb);
321    }
322
323    /// Remove the callback. Subsequent `invoke` calls become no-ops until
324    /// the next `set`.
325    pub fn clear(&self) {
326        *self.callback.lock() = None;
327    }
328
329    /// Invoke the currently-installed callback, if any. Never panics; never
330    /// blocks. If the callback itself panics, the panic propagates (we do
331    /// not silently swallow — that would hide bugs in user code).
332    pub fn invoke(&self, msg: String) {
333        if let Some(cb) = self.callback.lock().as_ref() {
334            cb(msg);
335        }
336    }
337
338    /// Whether a callback is currently installed. Useful for tests.
339    pub fn is_set(&self) -> bool {
340        self.callback.lock().is_some()
341    }
342}
343
344#[cfg(test)]
345mod tests {
346    use super::*;
347    use std::sync::atomic::{AtomicUsize, Ordering};
348
349    #[test]
350    fn progress_forwarder_default_is_empty() {
351        let pf = ProgressForwarder::new();
352        assert!(!pf.is_set());
353        // invoke on empty forwarder is a silent no-op
354        pf.invoke("should be dropped".into());
355    }
356
357    #[test]
358    fn progress_forwarder_set_and_invoke() {
359        let pf = ProgressForwarder::new();
360        let count = Arc::new(AtomicUsize::new(0));
361        let count_clone = Arc::clone(&count);
362        pf.set(oxi_ai::progress_callback(move |msg: String| {
363            assert_eq!(msg, "hello");
364            count_clone.fetch_add(1, Ordering::SeqCst);
365        }));
366        assert!(pf.is_set());
367
368        pf.invoke("hello".into());
369        pf.invoke("hello".into());
370        assert_eq!(count.load(Ordering::SeqCst), 2);
371    }
372
373    #[test]
374    fn progress_forwarder_set_replaces_previous() {
375        let pf = ProgressForwarder::new();
376        let count_a = Arc::new(AtomicUsize::new(0));
377        let count_b = Arc::new(AtomicUsize::new(0));
378
379        let ca = Arc::clone(&count_a);
380        pf.set(oxi_ai::progress_callback(move |_| {
381            ca.fetch_add(1, Ordering::SeqCst);
382        }));
383        pf.invoke("first".into());
384        assert_eq!(count_a.load(Ordering::SeqCst), 1);
385        assert_eq!(count_b.load(Ordering::SeqCst), 0);
386
387        // Replace with a new callback — the old one is dropped.
388        let cb_clone = Arc::clone(&count_b);
389        pf.set(oxi_ai::progress_callback(move |_| {
390            cb_clone.fetch_add(1, Ordering::SeqCst);
391        }));
392        pf.invoke("second".into());
393        assert_eq!(
394            count_a.load(Ordering::SeqCst),
395            1,
396            "old callback should not fire"
397        );
398        assert_eq!(count_b.load(Ordering::SeqCst), 1);
399    }
400
401    #[test]
402    fn progress_forwarder_clear() {
403        let pf = ProgressForwarder::new();
404        let count = Arc::new(AtomicUsize::new(0));
405        let c = Arc::clone(&count);
406        pf.set(oxi_ai::progress_callback(move |_| {
407            c.fetch_add(1, Ordering::SeqCst);
408        }));
409        pf.invoke("x".into());
410        assert_eq!(count.load(Ordering::SeqCst), 1);
411
412        pf.clear();
413        assert!(!pf.is_set());
414        pf.invoke("y".into());
415        assert_eq!(
416            count.load(Ordering::SeqCst),
417            1,
418            "invoke after clear is no-op"
419        );
420    }
421
422    #[test]
423    fn page_content_empty() {
424        let p = PageContent::empty();
425        assert!(p.url.is_empty());
426        assert_eq!(p.status, 0);
427    }
428
429    #[test]
430    fn browser_error_display() {
431        let e = BrowserError::Navigation("connection refused".into());
432        assert!(e.to_string().contains("navigation failed"));
433    }
434
435    #[test]
436    fn link_info_serde() {
437        let link = LinkInfo {
438            text: "Example".into(),
439            href: "https://example.com".into(),
440        };
441        let json = serde_json::to_string(&link).unwrap();
442        let restored: LinkInfo = serde_json::from_str(&json).unwrap();
443        assert_eq!(restored.text, "Example");
444        assert_eq!(restored.href, "https://example.com");
445    }
446
447    #[test]
448    fn element_info_serde() {
449        let elem = ElementInfo {
450            tag: "DIV".into(),
451            text: "Hello".into(),
452            attributes: [("class".into(), "item".into())].into(),
453        };
454        let json = serde_json::to_string(&elem).unwrap();
455        assert!(json.contains("DIV"));
456        assert!(json.contains("Hello"));
457    }
458
459    #[test]
460    fn browser_error_no_active_session() {
461        let e = BrowserError::NoActiveSession;
462        assert!(e.to_string().contains("no active session"));
463    }
464}