Skip to main content

oxi_agent/tools/browse/
engine.rs

1//! Browser engine abstraction layer.
2
3#![allow(missing_docs)]
4//!
5//! Defines the core traits (`BrowserEngine`, `BrowserTab`) and shared
6//! types that all browser tools depend on. These traits are always compiled
7//! (no feature gates) so tools can use them regardless of the backend.
8//!
9//! Actual backend implementations (e.g. oxibrowser-core) are behind
10//! `#[cfg(feature = "native-browser")]` in `oxibrowser_backend.rs`.
11
12use async_trait::async_trait;
13use parking_lot::Mutex;
14use serde::{Deserialize, Serialize};
15use serde_json::Value;
16use std::collections::HashMap;
17use std::sync::Arc;
18
19/// Errors that can occur during browser operations.
20#[derive(Debug, thiserror::Error)]
21pub enum BrowserError {
22    #[error("navigation failed: {0}")]
23    Navigation(String),
24    #[error("element not found: {0}")]
25    ElementNotFound(String),
26    #[error("timeout: {0}")]
27    Timeout(String),
28    #[error("evaluation error: {0}")]
29    Evaluation(String),
30    #[error("screenshot failed: {0}")]
31    Screenshot(String),
32    #[error("tab closed: {0}")]
33    TabClosed(String),
34    #[error("browser error: {0}")]
35    Backend(String),
36    #[error("no active session — call 'open' first")]
37    NoActiveSession,
38}
39
40impl From<BrowserError> for crate::tools::ToolError {
41    fn from(e: BrowserError) -> Self {
42        e.to_string()
43    }
44}
45
46/// Shared page content returned by `goto` and `content` methods.
47#[derive(Debug, Clone, Serialize, Deserialize)]
48pub struct PageContent {
49    /// Final URL after redirects.
50    pub url: String,
51    /// Page title.
52    pub title: String,
53    /// HTTP status code.
54    pub status: u16,
55    /// Rendered page content as markdown.
56    pub markdown: String,
57    /// Raw HTML body.
58    #[serde(default)]
59    pub html: String,
60}
61
62impl PageContent {
63    /// Create an empty page (for mock / fallback).
64    pub fn empty() -> Self {
65        Self {
66            url: String::new(),
67            title: String::new(),
68            status: 0,
69            markdown: String::new(),
70            html: String::new(),
71        }
72    }
73}
74
75/// A single link on a page.
76#[derive(Debug, Clone, Serialize, Deserialize)]
77pub struct LinkInfo {
78    #[allow(missing_docs)]
79    pub text: String,
80    #[allow(missing_docs)]
81    pub href: String,
82}
83
84/// A single element matched by a CSS selector.
85#[derive(Debug, Clone, Serialize, Deserialize)]
86pub struct ElementInfo {
87    #[allow(missing_docs)]
88    pub tag: String,
89    #[allow(missing_docs)]
90    pub text: String,
91    #[serde(default)]
92    #[allow(missing_docs)]
93    pub attributes: HashMap<String, String>,
94}
95
96// ── BrowserTab trait ──────────────────────────────────────────────────────────
97
98/// Operations available on a single browser tab.
99///
100/// Implementors handle their own async runtime; this trait only
101/// defines the interface contract.
102#[async_trait]
103pub trait BrowserTab: Send + Sync {
104    /// Navigate to `url` and return page content.
105    async fn goto(&self, url: &str) -> Result<PageContent, BrowserError>;
106
107    /// Click an element matching `selector`.
108    async fn click(&self, selector: &str) -> Result<(), BrowserError>;
109
110    /// Type text into an element matching `selector`.
111    async fn type_(&self, selector: &str, text: &str) -> Result<(), BrowserError>;
112
113    /// Fill (set value of) an element matching `selector`.
114    async fn fill(&self, selector: &str, value: &str) -> Result<(), BrowserError>;
115
116    /// Press a keyboard combo (e.g. `"Enter"`, `"Control+c"`).
117    async fn press(&self, combo: &str) -> Result<(), BrowserError>;
118
119    /// Wait for an element matching `selector` to appear.
120    async fn wait_for(&self, selector: &str, timeout_ms: u64) -> Result<(), BrowserError>;
121
122    /// Get the current page content (markdown + html).
123    async fn content(&self) -> Result<PageContent, BrowserError>;
124
125    /// Get text content of all elements matching `selector`.
126    async fn query_all(&self, selector: &str) -> Result<Vec<String>, BrowserError>;
127
128    /// Evaluate a JavaScript expression and return the JSON result.
129    async fn evaluate(&self, js: &str) -> Result<Value, BrowserError>;
130
131    /// Capture a screenshot and return PNG bytes.
132    async fn screenshot(&self, width: u32) -> Result<Vec<u8>, BrowserError>;
133
134    /// Close this tab.
135    async fn close(&self) -> Result<(), BrowserError>;
136
137    /// Navigate back in history. Returns the rendered page content.
138    async fn back(&self) -> Result<PageContent, BrowserError>;
139
140    /// Navigate forward in history. Returns the rendered page content.
141    async fn forward(&self) -> Result<PageContent, BrowserError>;
142
143    /// Reload the current page. Returns the rendered page content.
144    async fn reload(&self) -> Result<PageContent, BrowserError>;
145
146    /// Select an option in a `<select>` element.
147    async fn select_option(&self, selector: &str, value: &str) -> Result<(), BrowserError>;
148
149    /// Check a checkbox or radio input.
150    async fn check(&self, selector: &str) -> Result<(), BrowserError>;
151
152    /// Uncheck a checkbox or radio input.
153    async fn uncheck(&self, selector: &str) -> Result<(), BrowserError>;
154
155    // ── Advanced interaction ───────────────────────────────────
156
157    /// Clear the value of an input element.
158    async fn clear(&self, selector: &str) -> Result<(), BrowserError> {
159        self.fill(selector, "").await
160    }
161
162    /// Hover over an element.
163    async fn hover(&self, selector: &str) -> Result<(), BrowserError> {
164        let sel = serde_json::to_string(selector).unwrap_or_default();
165        let js = format!(
166            r#"(function() {{ var el = document.querySelector({sel}); if (!el) return null; el.dispatchEvent(new MouseEvent('mouseover', {{bubbles:true}})); return el.tagName; }})()"#
167        );
168        self.evaluate(&js).await.map(|_| ())
169    }
170
171    /// Double-click an element.
172    async fn double_click(&self, selector: &str) -> Result<(), BrowserError> {
173        let sel = serde_json::to_string(selector).unwrap_or_default();
174        let js = format!(
175            r#"(function() {{ var el = document.querySelector({sel}); if (!el) return null; el.dispatchEvent(new MouseEvent('dblclick', {{bubbles:true}})); return el.tagName; }})()"#
176        );
177        self.evaluate(&js).await.map(|_| ())
178    }
179
180    /// Right-click (context menu) an element.
181    async fn right_click(&self, selector: &str) -> Result<(), BrowserError> {
182        let sel = serde_json::to_string(selector).unwrap_or_default();
183        let js = format!(
184            r#"(function() {{ var el = document.querySelector({sel}); if (!el) return null; el.dispatchEvent(new MouseEvent('contextmenu', {{bubbles:true, button:2}})); return el.tagName; }})()"#
185        );
186        self.evaluate(&js).await.map(|_| ())
187    }
188
189    /// Scroll the page by delta pixels.
190    async fn scroll(&self, delta_x: f64, delta_y: f64) -> Result<(), BrowserError> {
191        let js = format!("window.scrollBy({}, {})", delta_x, delta_y);
192        self.evaluate(&js).await.map(|_| ())
193    }
194
195    /// Scroll an element into view.
196    async fn scroll_into_view(&self, selector: &str) -> Result<(), BrowserError> {
197        let sel = serde_json::to_string(selector).unwrap_or_default();
198        let js = format!(
199            r#"(function() {{ var el = document.querySelector({sel}); if (!el) return null; el.scrollIntoView(); return el.tagName; }})()"#
200        );
201        self.evaluate(&js).await.map(|_| ())
202    }
203
204    /// Drag from one element to another.
205    async fn drag(&self, from_selector: &str, to_selector: &str) -> Result<(), BrowserError> {
206        let from_sel = serde_json::to_string(from_selector).unwrap_or_default();
207        let to_sel = serde_json::to_string(to_selector).unwrap_or_default();
208        let js = format!(
209            r#"(function() {{ var src = document.querySelector({from_sel}); var dst = document.querySelector({to_sel}); if (!src || !dst) return null; src.dispatchEvent(new DragEvent('dragstart', {{bubbles:true}})); dst.dispatchEvent(new DragEvent('drop', {{bubbles:true}})); src.dispatchEvent(new DragEvent('dragend', {{bubbles:true}})); return 'ok'; }})()"#
210        );
211        self.evaluate(&js).await.map(|_| ())
212    }
213
214    /// Upload a file to a file input element.
215    async fn upload_file(&self, selector: &str, path: &str) -> Result<(), BrowserError> {
216        let sel = serde_json::to_string(selector).unwrap_or_default();
217        let p = serde_json::to_string(path).unwrap_or_default();
218        let js = format!(
219            r#"(function() {{ var el = document.querySelector({sel}); if (!el || el.type !== 'file') return null; if (typeof DataTransfer === 'undefined') return null; var dt = new DataTransfer(); var f = new File([], {p}.split('/').pop()); dt.items.add(f); el.files = dt.files; el.dispatchEvent(new Event('change', {{bubbles:true}})); return el.tagName; }})()"#
220        );
221        self.evaluate(&js).await.map(|_| ())
222    }
223
224    /// Get the value or text content of an element.
225    async fn get_value(&self, selector: &str) -> Result<String, BrowserError> {
226        let sel = serde_json::to_string(selector).unwrap_or_default();
227        let js = format!(
228            r#"(function() {{ var el = document.querySelector({sel}); if (!el) return null; return (el.value !== undefined ? el.value : el.textContent) || ''; }})()"#
229        );
230        let val = self.evaluate(&js).await?;
231        Ok(val.as_str().unwrap_or("").to_string())
232    }
233
234    /// Evaluate JS that may return a promise; awaits by default.
235    async fn evaluate_await(&self, js: &str) -> Result<Value, BrowserError> {
236        self.evaluate(js).await
237    }
238
239    /// Returns `true` if this tab has been closed.
240    fn is_closed(&self) -> bool {
241        false
242    }
243}
244
245// ── BrowserEngine trait ───────────────────────────────────────────────────────
246
247/// Factory for opening and managing browser tabs.
248///
249/// This trait is implemented by backends (e.g. oxibrowser-core) and
250/// consumed by the tool layer via `Arc<dyn BrowserEngine>`.
251#[async_trait]
252pub trait BrowserEngine: Send + Sync {
253    /// Fetch a URL and return page content (no tab management).
254    async fn fetch(&self, url: &str) -> Result<PageContent, BrowserError> {
255        let tab = self.new_tab().await?;
256        let content = tab.goto(url).await;
257        let _ = tab.close().await;
258        content
259    }
260
261    /// Open a new browser tab and return it.
262    async fn new_tab(&self) -> Result<Box<dyn BrowserTab>, BrowserError>;
263
264    /// Close all open tabs and shut down the browser instance.
265    async fn close(&self) -> Result<(), BrowserError>;
266
267    /// Returns `true` if the browser is still alive.
268    async fn is_alive(&self) -> bool;
269
270    /// Access the engine's progress forwarder.
271    ///
272    /// Tools (e.g. `BrowseTool`) call `engine.progress_forwarder().set(cb)`
273    /// from their `on_progress` implementation. Backends that stream
274    /// lifecycle events (oxibrowser-core) spawn a background task in their
275    /// constructor that drains the browser's event channel and invokes
276    /// the currently-installed callback. Backends without event streaming
277    /// return an empty forwarder — `set` becomes a no-op, no events fire.
278    ///
279    /// Default implementation returns a fresh empty forwarder.
280    fn progress_forwarder(&self) -> Arc<ProgressForwarder> {
281        Arc::new(ProgressForwarder::new())
282    }
283}
284
285// ── ProgressForwarder ─────────────────────────────────────────────────────
286
287/// Shared slot for a single progress callback.
288///
289/// Holds at most one `ProgressCallback` at a time. Tools set the callback
290/// before `execute`; a backend's background task reads it and invokes it
291/// with each event's `short_label()`. Replacing the callback (the next
292/// tool call) drops the old one — there is no fan-out.
293///
294/// The default forwarder (returned by the trait's default `progress_forwarder`)
295/// is a no-op: `invoke` silently drops messages. This is the right
296/// behaviour for backends that don't stream events.
297pub struct ProgressForwarder {
298    callback: Mutex<Option<crate::tools::ProgressCallback>>,
299}
300
301impl Default for ProgressForwarder {
302    fn default() -> Self {
303        Self::new()
304    }
305}
306
307impl ProgressForwarder {
308    /// Create an empty forwarder.
309    pub fn new() -> Self {
310        Self {
311            callback: Mutex::new(None),
312        }
313    }
314
315    /// Install (or replace) the callback. Called from `AgentTool::on_progress`.
316    pub fn set(&self, cb: crate::tools::ProgressCallback) {
317        *self.callback.lock() = Some(cb);
318    }
319
320    /// Remove the callback. Subsequent `invoke` calls become no-ops until
321    /// the next `set`.
322    pub fn clear(&self) {
323        *self.callback.lock() = None;
324    }
325
326    /// Invoke the currently-installed callback, if any. Never panics; never
327    /// blocks. If the callback itself panics, the panic propagates (we do
328    /// not silently swallow — that would hide bugs in user code).
329    pub fn invoke(&self, msg: String) {
330        if let Some(cb) = self.callback.lock().as_ref() {
331            cb(msg);
332        }
333    }
334
335    /// Whether a callback is currently installed. Useful for tests.
336    pub fn is_set(&self) -> bool {
337        self.callback.lock().is_some()
338    }
339}
340
341
342#[cfg(test)]
343mod tests {
344    use super::*;
345    use std::sync::atomic::{AtomicUsize, Ordering};
346
347    #[test]
348    fn progress_forwarder_default_is_empty() {
349        let pf = ProgressForwarder::new();
350        assert!(!pf.is_set());
351        // invoke on empty forwarder is a silent no-op
352        pf.invoke("should be dropped".into());
353    }
354
355    #[test]
356    fn progress_forwarder_set_and_invoke() {
357        let pf = ProgressForwarder::new();
358        let count = Arc::new(AtomicUsize::new(0));
359        let count_clone = Arc::clone(&count);
360        pf.set(oxi_ai::progress_callback(move |msg: String| {
361            assert_eq!(msg, "hello");
362            count_clone.fetch_add(1, Ordering::SeqCst);
363        }));
364        assert!(pf.is_set());
365
366        pf.invoke("hello".into());
367        pf.invoke("hello".into());
368        assert_eq!(count.load(Ordering::SeqCst), 2);
369    }
370
371    #[test]
372    fn progress_forwarder_set_replaces_previous() {
373        let pf = ProgressForwarder::new();
374        let count_a = Arc::new(AtomicUsize::new(0));
375        let count_b = Arc::new(AtomicUsize::new(0));
376
377        let ca = Arc::clone(&count_a);
378        pf.set(oxi_ai::progress_callback(move |_| {
379            ca.fetch_add(1, Ordering::SeqCst);
380        }));
381        pf.invoke("first".into());
382        assert_eq!(count_a.load(Ordering::SeqCst), 1);
383        assert_eq!(count_b.load(Ordering::SeqCst), 0);
384
385        // Replace with a new callback — the old one is dropped.
386        let cb_clone = Arc::clone(&count_b);
387        pf.set(oxi_ai::progress_callback(move |_| {
388            cb_clone.fetch_add(1, Ordering::SeqCst);
389        }));
390        pf.invoke("second".into());
391        assert_eq!(count_a.load(Ordering::SeqCst), 1, "old callback should not fire");
392        assert_eq!(count_b.load(Ordering::SeqCst), 1);
393    }
394
395    #[test]
396    fn progress_forwarder_clear() {
397        let pf = ProgressForwarder::new();
398        let count = Arc::new(AtomicUsize::new(0));
399        let c = Arc::clone(&count);
400        pf.set(oxi_ai::progress_callback(move |_| {
401            c.fetch_add(1, Ordering::SeqCst);
402        }));
403        pf.invoke("x".into());
404        assert_eq!(count.load(Ordering::SeqCst), 1);
405
406        pf.clear();
407        assert!(!pf.is_set());
408        pf.invoke("y".into());
409        assert_eq!(count.load(Ordering::SeqCst), 1, "invoke after clear is no-op");
410    }
411
412    #[test]
413    fn page_content_empty() {
414        let p = PageContent::empty();
415        assert!(p.url.is_empty());
416        assert_eq!(p.status, 0);
417    }
418
419    #[test]
420    fn browser_error_display() {
421        let e = BrowserError::Navigation("connection refused".into());
422        assert!(e.to_string().contains("navigation failed"));
423    }
424
425    #[test]
426    fn link_info_serde() {
427        let link = LinkInfo {
428            text: "Example".into(),
429            href: "https://example.com".into(),
430        };
431        let json = serde_json::to_string(&link).unwrap();
432        let restored: LinkInfo = serde_json::from_str(&json).unwrap();
433        assert_eq!(restored.text, "Example");
434        assert_eq!(restored.href, "https://example.com");
435    }
436
437    #[test]
438    fn element_info_serde() {
439        let elem = ElementInfo {
440            tag: "DIV".into(),
441            text: "Hello".into(),
442            attributes: [("class".into(), "item".into())].into(),
443        };
444        let json = serde_json::to_string(&elem).unwrap();
445        assert!(json.contains("DIV"));
446        assert!(json.contains("Hello"));
447    }
448
449    #[test]
450    fn browser_error_no_active_session() {
451        let e = BrowserError::NoActiveSession;
452        assert!(e.to_string().contains("no active session"));
453    }
454}