Skip to main content

car_browser/
backend.rs

1//! Browser backend trait — the abstract interface that all browser implementations must satisfy.
2//!
3//! This maps 1:1 to human-equivalent perception and input (Manifesto Principles 3-5).
4
5use async_trait::async_trait;
6use thiserror::Error;
7
8use crate::models::{A11yNode, Modifier, Viewport, WaitCondition};
9
10/// Errors that can occur in browser operations.
11#[derive(Error, Debug)]
12pub enum BrowserError {
13    #[error("Screenshot capture failed: {0}")]
14    ScreenshotFailed(String),
15
16    #[error("Accessibility tree extraction failed: {0}")]
17    AccessibilityFailed(String),
18
19    #[error("Navigation failed: {0}")]
20    NavigationFailed(String),
21
22    #[error("Input injection failed: {0}")]
23    InputFailed(String),
24
25    #[error("Element not found: {0}")]
26    ElementNotFound(String),
27
28    #[error("Platform internal error: {0}")]
29    PlatformInternal(String),
30
31    #[error("Wait condition timed out")]
32    Timeout,
33
34    #[error("Browser not available")]
35    NotAvailable,
36
37    #[error("Not supported: {0}")]
38    Unsupported(String),
39}
40
41/// Abstract browser backend trait.
42///
43/// Implementations drive a real browser (Tauri WebView, headless Chromium, etc.)
44/// through human-equivalent perception and input only.
45///
46/// # Perception (what the AI can see)
47/// - Screenshots: rendered pixels
48/// - Accessibility tree: semantic structure exposed to assistive technologies
49///
50/// # Actions (what the AI can do)
51/// - Click, type, scroll, keypress — all map 1:1 to human input
52/// - Navigation — equivalent to typing a URL
53///
54/// # Disallowed
55/// - DOM traversal, JS execution for data extraction, hidden attributes
56/// - Network traffic inspection, cookie/storage introspection
57#[async_trait]
58pub trait BrowserBackend: Send + Sync {
59    // =========================================================================
60    // Perception
61    // =========================================================================
62
63    /// Capture a screenshot of the current page as PNG data.
64    async fn capture_screenshot(&self) -> Result<Vec<u8>, BrowserError>;
65
66    /// Extract the accessibility tree from the current page.
67    async fn get_accessibility_tree(&self) -> Result<Vec<A11yNode>, BrowserError>;
68
69    /// Get the current viewport dimensions.
70    fn get_viewport(&self) -> Result<Viewport, BrowserError>;
71
72    /// Get the current page URL.
73    fn get_current_url(&self) -> Result<String, BrowserError>;
74
75    /// Get the current page title.
76    async fn get_page_title(&self) -> Result<String, BrowserError>;
77
78    // =========================================================================
79    // Navigation
80    // =========================================================================
81
82    /// Navigate to a URL.
83    async fn navigate(&self, url: &str) -> Result<(), BrowserError>;
84
85    // =========================================================================
86    // Human-equivalent input (Manifesto Principle 5)
87    // =========================================================================
88
89    /// Click at viewport coordinates.
90    async fn inject_click(&self, x: f64, y: f64) -> Result<(), BrowserError>;
91
92    /// Type text into the focused element.
93    async fn inject_text(&self, text: &str) -> Result<(), BrowserError>;
94
95    /// Press a key with optional modifiers.
96    async fn inject_keypress(
97        &self,
98        key: &str,
99        modifiers: &[Modifier],
100    ) -> Result<(), BrowserError>;
101
102    /// Scroll the page.
103    async fn inject_scroll(&self, delta_y: i32) -> Result<(), BrowserError>;
104
105    // =========================================================================
106    // Accessibility actions (VoiceOver-equivalent)
107    // =========================================================================
108
109    /// Click an element by accessibility node ID (AXPress).
110    async fn click_element(&self, node_id: &str) -> Result<(), BrowserError>;
111
112    /// Type text into an element by accessibility node ID.
113    async fn type_into_element(&self, node_id: &str, text: &str) -> Result<(), BrowserError>;
114
115    /// Focus an element by accessibility node ID.
116    async fn focus_element(&self, node_id: &str) -> Result<(), BrowserError>;
117
118    // =========================================================================
119    // Wait conditions
120    // =========================================================================
121
122    /// Check if page is fully loaded.
123    async fn is_page_loaded(&self) -> Result<bool, BrowserError>;
124
125    /// Wait for a condition to be met.
126    async fn wait_until(
127        &self,
128        condition: &WaitCondition,
129        timeout_ms: u64,
130    ) -> Result<bool, BrowserError>;
131
132    /// Check if an element matching a description exists in the accessibility tree.
133    async fn element_exists_a11y(
134        &self,
135        name_contains: &str,
136        role: Option<&str>,
137    ) -> Result<bool, BrowserError>;
138
139    // =========================================================================
140    // Lifecycle
141    // =========================================================================
142
143    /// Shut down the browser backend and release resources.
144    ///
145    /// For headless Chromium: terminates the browser process.
146    /// For Tauri: WebView cleanup.
147    /// Default: no-op.
148    async fn shutdown(&self) -> Result<(), BrowserError> {
149        Ok(())
150    }
151}