Skip to main content

oxi_agent/tools/browse/
engine.rs

1//! Browser engine abstraction layer.
2
3#![allow(missing_docs)]
4//!
5//! Defines the core traits (`BrowserEngine`, `BrowserTab`) and shared
6//! types that all browser tools depend on. These traits are always compiled
7//! (no feature gates) so tools can use them regardless of the backend.
8//!
9//! Actual backend implementations (e.g. oxibrowser-core) are behind
10//! `#[cfg(feature = "native-browser")]` in `oxibrowser_backend.rs`.
11
12use async_trait::async_trait;
13use parking_lot::Mutex;
14use serde::{Deserialize, Serialize};
15use serde_json::Value;
16use std::collections::HashMap;
17use std::sync::Arc;
18
19/// Errors that can occur during browser operations.
20#[derive(Debug, thiserror::Error)]
21pub enum BrowserError {
22    #[error("navigation failed: {0}")]
23    Navigation(String),
24    #[error("element not found: {0}")]
25    ElementNotFound(String),
26    #[error("timeout: {0}")]
27    Timeout(String),
28    #[error("evaluation error: {0}")]
29    Evaluation(String),
30    #[error("screenshot failed: {0}")]
31    Screenshot(String),
32    #[error("tab closed: {0}")]
33    TabClosed(String),
34    #[error("browser error: {0}")]
35    Backend(String),
36    #[error("no active session — call 'open' first")]
37    NoActiveSession,
38}
39
40impl From<BrowserError> for crate::tools::ToolError {
41    fn from(e: BrowserError) -> Self {
42        e.to_string()
43    }
44}
45
46/// Shared page content returned by `goto` and `content` methods.
47#[derive(Debug, Clone, Serialize, Deserialize)]
48pub struct PageContent {
49    /// Final URL after redirects.
50    pub url: String,
51    /// Page title.
52    pub title: String,
53    /// HTTP status code.
54    pub status: u16,
55    /// Rendered page content as markdown.
56    pub markdown: String,
57    /// Raw HTML body.
58    #[serde(default)]
59    pub html: String,
60}
61
62impl PageContent {
63    /// Create an empty page (for mock / fallback).
64    pub fn empty() -> Self {
65        Self {
66            url: String::new(),
67            title: String::new(),
68            status: 0,
69            markdown: String::new(),
70            html: String::new(),
71        }
72    }
73}
74
75/// A single link on a page.
76#[derive(Debug, Clone, Serialize, Deserialize)]
77pub struct LinkInfo {
78    #[allow(missing_docs)]
79    pub text: String,
80    #[allow(missing_docs)]
81    pub href: String,
82}
83
84/// A single element matched by a CSS selector.
85#[derive(Debug, Clone, Serialize, Deserialize)]
86pub struct ElementInfo {
87    #[allow(missing_docs)]
88    pub tag: String,
89    #[allow(missing_docs)]
90    pub text: String,
91    #[serde(default)]
92    #[allow(missing_docs)]
93    pub attributes: HashMap<String, String>,
94}
95
96// ── BrowserTab trait ──────────────────────────────────────────────────────────
97
98/// Operations available on a single browser tab.
99///
100/// Implementors handle their own async runtime; this trait only
101/// defines the interface contract.
102#[async_trait]
103pub trait BrowserTab: Send + Sync {
104    /// Navigate to `url` and return page content.
105    async fn goto(&self, url: &str) -> Result<PageContent, BrowserError>;
106
107    /// Click an element matching `selector`.
108    async fn click(&self, selector: &str) -> Result<(), BrowserError>;
109
110    /// Type text into an element matching `selector`.
111    async fn type_(&self, selector: &str, text: &str) -> Result<(), BrowserError>;
112
113    /// Fill (set value of) an element matching `selector`.
114    async fn fill(&self, selector: &str, value: &str) -> Result<(), BrowserError>;
115
116    /// Press a keyboard combo (e.g. `"Enter"`, `"Control+c"`).
117    async fn press(&self, combo: &str) -> Result<(), BrowserError>;
118
119    /// Wait for an element matching `selector` to appear.
120    async fn wait_for(&self, selector: &str, timeout_ms: u64) -> Result<(), BrowserError>;
121
122    /// Get the current page content (markdown + html).
123    async fn content(&self) -> Result<PageContent, BrowserError>;
124
125    /// Get text content of all elements matching `selector`.
126    async fn query_all(&self, selector: &str) -> Result<Vec<String>, BrowserError>;
127
128    /// Evaluate a JavaScript expression and return the JSON result.
129    async fn evaluate(&self, js: &str) -> Result<Value, BrowserError>;
130
131    /// Capture a screenshot and return PNG bytes.
132    async fn screenshot(&self, width: u32) -> Result<Vec<u8>, BrowserError>;
133
134    /// Close this tab.
135    async fn close(&self) -> Result<(), BrowserError>;
136
137    /// Navigate back in history. Returns the rendered page content.
138    async fn back(&self) -> Result<PageContent, BrowserError>;
139
140    /// Navigate forward in history. Returns the rendered page content.
141    async fn forward(&self) -> Result<PageContent, BrowserError>;
142
143    /// Reload the current page. Returns the rendered page content.
144    async fn reload(&self) -> Result<PageContent, BrowserError>;
145
146    /// Select an option in a `<select>` element.
147    async fn select_option(&self, selector: &str, value: &str) -> Result<(), BrowserError>;
148
149    /// Check a checkbox or radio input.
150    async fn check(&self, selector: &str) -> Result<(), BrowserError>;
151
152    /// Uncheck a checkbox or radio input.
153    async fn uncheck(&self, selector: &str) -> Result<(), BrowserError>;
154
155    // ── Advanced interaction ───────────────────────────────────
156
157    /// Clear the value of an input element.
158    async fn clear(&self, selector: &str) -> Result<(), BrowserError> {
159        self.fill(selector, "").await
160    }
161
162    /// Hover over an element.
163    async fn hover(&self, selector: &str) -> Result<(), BrowserError> {
164        let sel = serde_json::to_string(selector).unwrap_or_default();
165        let js = format!(
166            r#"(function() {{ var el = document.querySelector({sel}); if (!el) return null; el.dispatchEvent(new MouseEvent('mouseover', {{bubbles:true}})); return el.tagName; }})()"#
167        );
168        self.evaluate(&js).await.map(|_| ())
169    }
170
171    /// Double-click an element.
172    async fn double_click(&self, selector: &str) -> Result<(), BrowserError> {
173        let sel = serde_json::to_string(selector).unwrap_or_default();
174        let js = format!(
175            r#"(function() {{ var el = document.querySelector({sel}); if (!el) return null; el.dispatchEvent(new MouseEvent('dblclick', {{bubbles:true}})); return el.tagName; }})()"#
176        );
177        self.evaluate(&js).await.map(|_| ())
178    }
179
180    /// Right-click (context menu) an element.
181    async fn right_click(&self, selector: &str) -> Result<(), BrowserError> {
182        let sel = serde_json::to_string(selector).unwrap_or_default();
183        let js = format!(
184            r#"(function() {{ var el = document.querySelector({sel}); if (!el) return null; el.dispatchEvent(new MouseEvent('contextmenu', {{bubbles:true, button:2}})); return el.tagName; }})()"#
185        );
186        self.evaluate(&js).await.map(|_| ())
187    }
188
189    /// Scroll the page by delta pixels.
190    async fn scroll(&self, delta_x: f64, delta_y: f64) -> Result<(), BrowserError> {
191        let js = format!("window.scrollBy({}, {})", delta_x, delta_y);
192        self.evaluate(&js).await.map(|_| ())
193    }
194
195    /// Scroll an element into view.
196    async fn scroll_into_view(&self, selector: &str) -> Result<(), BrowserError> {
197        let sel = serde_json::to_string(selector).unwrap_or_default();
198        let js = format!(
199            r#"(function() {{ var el = document.querySelector({sel}); if (!el) return null; el.scrollIntoView(); return el.tagName; }})()"#
200        );
201        self.evaluate(&js).await.map(|_| ())
202    }
203
204    /// Drag from one element to another.
205    async fn drag(&self, from_selector: &str, to_selector: &str) -> Result<(), BrowserError> {
206        let from_sel = serde_json::to_string(from_selector).unwrap_or_default();
207        let to_sel = serde_json::to_string(to_selector).unwrap_or_default();
208        let js = format!(
209            r#"(function() {{ var src = document.querySelector({from_sel}); var dst = document.querySelector({to_sel}); if (!src || !dst) return null; src.dispatchEvent(new DragEvent('dragstart', {{bubbles:true}})); dst.dispatchEvent(new DragEvent('drop', {{bubbles:true}})); src.dispatchEvent(new DragEvent('dragend', {{bubbles:true}})); return 'ok'; }})()"#
210        );
211        self.evaluate(&js).await.map(|_| ())
212    }
213
214    /// Upload a file to a file input element.
215    async fn upload_file(&self, selector: &str, path: &str) -> Result<(), BrowserError> {
216        let sel = serde_json::to_string(selector).unwrap_or_default();
217        let p = serde_json::to_string(path).unwrap_or_default();
218        let js = format!(
219            r#"(function() {{ var el = document.querySelector({sel}); if (!el || el.type !== 'file') return null; if (typeof DataTransfer === 'undefined') return null; var dt = new DataTransfer(); var f = new File([], {p}.split('/').pop()); dt.items.add(f); el.files = dt.files; el.dispatchEvent(new Event('change', {{bubbles:true}})); return el.tagName; }})()"#
220        );
221        self.evaluate(&js).await.map(|_| ())
222    }
223
224    /// Get the value or text content of an element.
225    async fn get_value(&self, selector: &str) -> Result<String, BrowserError> {
226        let sel = serde_json::to_string(selector).unwrap_or_default();
227        let js = format!(
228            r#"(function() {{ var el = document.querySelector({sel}); if (!el) return null; return (el.value !== undefined ? el.value : el.textContent) || ''; }})()"#
229        );
230        let val = self.evaluate(&js).await?;
231        Ok(val.as_str().unwrap_or("").to_string())
232    }
233
234    /// Evaluate JS that may return a promise; awaits by default.
235    async fn evaluate_await(&self, js: &str) -> Result<Value, BrowserError> {
236        self.evaluate(js).await
237    }
238
239    /// Returns `true` if this tab has been closed.
240    fn is_closed(&self) -> bool {
241        false
242    }
243
244    /// Return this tab's unique ID, if the backend supports it.
245    /// Defaults to `Uuid::nil()` for backends that don't track tab identity.
246    fn tab_id(&self) -> uuid::Uuid {
247        uuid::Uuid::nil()
248    }
249
250    /// Support downcasting for backend-specific access.
251    fn as_any(&self) -> &dyn std::any::Any {
252        // Default: no concrete type info.
253        &std::marker::PhantomData::<()>
254    }
255
256    /// Clear any registered progress callback for this tab.
257    /// Defaults to no-op — only backends with callback registries override.
258    fn clear_progress_callback(&self) {}
259}
260
261// ── BrowserEngine trait ───────────────────────────────────────────────────────
262
263/// Factory for opening and managing browser tabs.
264///
265/// This trait is implemented by backends (e.g. oxibrowser-core) and
266/// consumed by the tool layer via `Arc<dyn BrowserEngine>`.
267#[async_trait]
268pub trait BrowserEngine: Send + Sync {
269    /// Fetch a URL and return page content (no tab management).
270    async fn fetch(&self, url: &str) -> Result<PageContent, BrowserError> {
271        let tab = self.new_tab().await?;
272        let content = tab.goto(url).await;
273        let _ = tab.close().await;
274        content
275    }
276
277    /// Open a new browser tab and return it.
278    async fn new_tab(&self) -> Result<Box<dyn BrowserTab>, BrowserError>;
279
280    /// Close all open tabs and shut down the browser instance.
281    async fn close(&self) -> Result<(), BrowserError>;
282
283    /// Returns `true` if the browser is still alive.
284    async fn is_alive(&self) -> bool;
285
286    /// Access the engine's per-tab callback registry.
287    ///
288    /// Tools (e.g. `BrowseTool`) register per-tab callbacks keyed by
289    /// `tab_id`. The backend's background event-drain task extracts
290    /// `tab_id` from each `BrowserEvent` and routes it to the correct
291    /// callback. Backends without event streaming return an empty
292    /// registry — `set`/`invoke` become no-ops.
293    ///
294    /// Default implementation returns a fresh empty registry.
295    fn callback_registry(&self) -> Arc<TabCallbackRegistry> {
296        Arc::new(TabCallbackRegistry::new())
297    }
298}
299
300// ── TabCallbackRegistry ──────────────────────────────────────────────────
301
302/// Per-`tab_id` callback registry for browser event routing.
303///
304/// Each `BrowseTool` invocation opens its own tab and registers a callback
305/// keyed by the tab's `tab_id`. The engine's background event-drain task
306/// extracts `tab_id` from each `BrowserEvent` and routes it to the correct
307/// callback. Multiple tabs can be active concurrently — each receives only
308/// its own events.
309///
310/// Tabs that have no registered callback (e.g. opened outside of a tool
311/// call) are silently ignored — `invoke` is a no-op for unknown tab IDs.
312pub struct TabCallbackRegistry {
313    callbacks: Mutex<HashMap<uuid::Uuid, crate::tools::ProgressCallback>>,
314}
315
316impl Default for TabCallbackRegistry {
317    fn default() -> Self {
318        Self::new()
319    }
320}
321
322impl TabCallbackRegistry {
323    /// Create an empty registry.
324    pub fn new() -> Self {
325        Self {
326            callbacks: Mutex::new(HashMap::new()),
327        }
328    }
329
330    /// Register a callback for the given `tab_id`.
331    pub fn set(&self, tab_id: uuid::Uuid, cb: crate::tools::ProgressCallback) {
332        self.callbacks.lock().insert(tab_id, cb);
333    }
334
335    /// Remove the callback for `tab_id`. Called when the tab is closed.
336    pub fn clear(&self, tab_id: &uuid::Uuid) {
337        self.callbacks.lock().remove(tab_id);
338    }
339
340    /// Invoke the callback for `tab_id`, if one is registered.
341    /// Never panics; never blocks. If the callback itself panics, the
342    /// panic propagates.
343    pub fn invoke(&self, tab_id: &uuid::Uuid, msg: String) {
344        if let Some(cb) = self.callbacks.lock().get(tab_id).cloned() {
345            cb(msg);
346        }
347    }
348
349    /// Whether a callback is registered for the given `tab_id`.
350    pub fn is_set(&self, tab_id: &uuid::Uuid) -> bool {
351        self.callbacks.lock().contains_key(tab_id)
352    }
353
354    /// Number of currently registered callbacks.
355    pub fn len(&self) -> usize {
356        self.callbacks.lock().len()
357    }
358
359    /// Returns `true` if no callbacks are registered.
360    pub fn is_empty(&self) -> bool {
361        self.callbacks.lock().is_empty()
362    }
363}
364
365#[cfg(test)]
366mod tests {
367    use super::*;
368    use std::sync::atomic::{AtomicUsize, Ordering};
369
370    #[test]
371    fn tab_callback_registry_default_is_empty() {
372        let reg = TabCallbackRegistry::new();
373        assert!(reg.is_empty());
374        assert_eq!(reg.len(), 0);
375        // invoke on empty registry is a silent no-op
376        let nil = uuid::Uuid::nil();
377        reg.invoke(&nil, "should be dropped".into());
378    }
379
380    #[test]
381    fn tab_callback_registry_set_and_invoke() {
382        let reg = TabCallbackRegistry::new();
383        let tab_a = uuid::Uuid::new_v4();
384        let tab_b = uuid::Uuid::new_v4();
385        let count = Arc::new(AtomicUsize::new(0));
386        let count_clone = Arc::clone(&count);
387        reg.set(
388            tab_a,
389            oxi_ai::progress_callback(move |msg: String| {
390                assert_eq!(msg, "hello");
391                count_clone.fetch_add(1, Ordering::SeqCst);
392            }),
393        );
394        assert!(reg.is_set(&tab_a));
395        assert!(!reg.is_set(&tab_b));
396
397        reg.invoke(&tab_a, "hello".into());
398        reg.invoke(&tab_a, "hello".into());
399        // invoke for unregistered tab_b is a no-op
400        reg.invoke(&tab_b, "hello".into());
401        assert_eq!(count.load(Ordering::SeqCst), 2);
402    }
403
404    #[test]
405    fn tab_callback_registry_set_per_tab_isolation() {
406        let reg = TabCallbackRegistry::new();
407        let tab_a = uuid::Uuid::new_v4();
408        let tab_b = uuid::Uuid::new_v4();
409        let count_a = Arc::new(AtomicUsize::new(0));
410        let count_b = Arc::new(AtomicUsize::new(0));
411
412        let ca = Arc::clone(&count_a);
413        reg.set(
414            tab_a,
415            oxi_ai::progress_callback(move |_| {
416                ca.fetch_add(1, Ordering::SeqCst);
417            }),
418        );
419        let cb_clone = Arc::clone(&count_b);
420        reg.set(
421            tab_b,
422            oxi_ai::progress_callback(move |_| {
423                cb_clone.fetch_add(1, Ordering::SeqCst);
424            }),
425        );
426
427        reg.invoke(&tab_a, "event".into());
428        assert_eq!(count_a.load(Ordering::SeqCst), 1);
429        assert_eq!(count_b.load(Ordering::SeqCst), 0);
430
431        reg.invoke(&tab_b, "event".into());
432        assert_eq!(count_a.load(Ordering::SeqCst), 1);
433        assert_eq!(count_b.load(Ordering::SeqCst), 1);
434    }
435
436    #[test]
437    fn tab_callback_registry_clear() {
438        let reg = TabCallbackRegistry::new();
439        let tab_a = uuid::Uuid::new_v4();
440        let count = Arc::new(AtomicUsize::new(0));
441        let c = Arc::clone(&count);
442        reg.set(
443            tab_a,
444            oxi_ai::progress_callback(move |_| {
445                c.fetch_add(1, Ordering::SeqCst);
446            }),
447        );
448        reg.invoke(&tab_a, "x".into());
449        assert_eq!(count.load(Ordering::SeqCst), 1);
450
451        reg.clear(&tab_a);
452        assert!(!reg.is_set(&tab_a));
453        reg.invoke(&tab_a, "y".into());
454        assert_eq!(
455            count.load(Ordering::SeqCst),
456            1,
457            "invoke after clear is no-op"
458        );
459    }
460
461    #[test]
462    fn page_content_empty() {
463        let p = PageContent::empty();
464        assert!(p.url.is_empty());
465        assert_eq!(p.status, 0);
466    }
467
468    #[test]
469    fn browser_error_display() {
470        let e = BrowserError::Navigation("connection refused".into());
471        assert!(e.to_string().contains("navigation failed"));
472    }
473
474    #[test]
475    fn link_info_serde() {
476        let link = LinkInfo {
477            text: "Example".into(),
478            href: "https://example.com".into(),
479        };
480        let json = serde_json::to_string(&link).unwrap();
481        let restored: LinkInfo = serde_json::from_str(&json).unwrap();
482        assert_eq!(restored.text, "Example");
483        assert_eq!(restored.href, "https://example.com");
484    }
485
486    #[test]
487    fn element_info_serde() {
488        let elem = ElementInfo {
489            tag: "DIV".into(),
490            text: "Hello".into(),
491            attributes: [("class".into(), "item".into())].into(),
492        };
493        let json = serde_json::to_string(&elem).unwrap();
494        assert!(json.contains("DIV"));
495        assert!(json.contains("Hello"));
496    }
497
498    #[test]
499    fn browser_error_no_active_session() {
500        let e = BrowserError::NoActiveSession;
501        assert!(e.to_string().contains("no active session"));
502    }
503}