car_browser/backend.rs
1//! Browser backend trait — the abstract interface that all browser implementations must satisfy.
2//!
3//! This maps 1:1 to human-equivalent perception and input (Manifesto Principles 3-5).
4
5use async_trait::async_trait;
6use thiserror::Error;
7
8use crate::models::{A11yNode, CookieParam, Modifier, Viewport, WaitCondition};
9
10/// Errors that can occur in browser operations.
11#[derive(Error, Debug)]
12pub enum BrowserError {
13 #[error("Screenshot capture failed: {0}")]
14 ScreenshotFailed(String),
15
16 #[error("Accessibility tree extraction failed: {0}")]
17 AccessibilityFailed(String),
18
19 #[error("Navigation failed: {0}")]
20 NavigationFailed(String),
21
22 #[error("Input injection failed: {0}")]
23 InputFailed(String),
24
25 #[error("Element not found: {0}")]
26 ElementNotFound(String),
27
28 #[error("Platform internal error: {0}")]
29 PlatformInternal(String),
30
31 #[error("Wait condition timed out")]
32 Timeout,
33
34 #[error("Browser not available: {0}")]
35 NotAvailable(String),
36
37 #[error("Not supported: {0}")]
38 Unsupported(String),
39}
40
41/// Abstract browser backend trait.
42///
43/// Implementations drive a real browser (Tauri WebView, headless Chromium, etc.)
44/// through human-equivalent perception and input only.
45///
46/// # Perception (what the AI can see)
47/// - Screenshots: rendered pixels
48/// - Accessibility tree: semantic structure exposed to assistive technologies
49///
50/// # Actions (what the AI can do)
51/// - Click, type, scroll, keypress — all map 1:1 to human input
52/// - Navigation — equivalent to typing a URL
53///
54/// # Disallowed
55/// - DOM traversal, JS execution for data extraction, hidden attributes
56/// - Network traffic inspection, cookie/storage introspection
57#[async_trait]
58pub trait BrowserBackend: Send + Sync {
59 // =========================================================================
60 // Perception
61 // =========================================================================
62
63 /// Capture a screenshot of the current page as PNG data.
64 async fn capture_screenshot(&self) -> Result<Vec<u8>, BrowserError>;
65
66 /// Extract the accessibility tree from the current page.
67 async fn get_accessibility_tree(&self) -> Result<Vec<A11yNode>, BrowserError>;
68
69 /// Get the current viewport dimensions.
70 fn get_viewport(&self) -> Result<Viewport, BrowserError>;
71
72 /// Get the current page URL.
73 fn get_current_url(&self) -> Result<String, BrowserError>;
74
75 /// Get the current page title.
76 async fn get_page_title(&self) -> Result<String, BrowserError>;
77
78 // =========================================================================
79 // Navigation
80 // =========================================================================
81
82 /// Navigate to a URL.
83 async fn navigate(&self, url: &str) -> Result<(), BrowserError>;
84
85 // =========================================================================
86 // Human-equivalent input (Manifesto Principle 5)
87 // =========================================================================
88
89 /// Click at viewport coordinates.
90 async fn inject_click(&self, x: f64, y: f64) -> Result<(), BrowserError>;
91
92 /// Type text into the focused element.
93 async fn inject_text(&self, text: &str) -> Result<(), BrowserError>;
94
95 /// Press a key with optional modifiers.
96 async fn inject_keypress(&self, key: &str, modifiers: &[Modifier]) -> Result<(), BrowserError>;
97
98 /// Scroll the page.
99 async fn inject_scroll(&self, delta_y: i32) -> Result<(), BrowserError>;
100
101 // =========================================================================
102 // Accessibility actions (VoiceOver-equivalent)
103 // =========================================================================
104
105 /// Click an element by accessibility node ID (AXPress).
106 async fn click_element(&self, node_id: &str) -> Result<(), BrowserError>;
107
108 /// Type text into an element by accessibility node ID.
109 async fn type_into_element(&self, node_id: &str, text: &str) -> Result<(), BrowserError>;
110
111 /// Focus an element by accessibility node ID.
112 async fn focus_element(&self, node_id: &str) -> Result<(), BrowserError>;
113
114 // =========================================================================
115 // Wait conditions
116 // =========================================================================
117
118 /// Check if page is fully loaded.
119 async fn is_page_loaded(&self) -> Result<bool, BrowserError>;
120
121 /// Wait for a condition to be met.
122 async fn wait_until(
123 &self,
124 condition: &WaitCondition,
125 timeout_ms: u64,
126 ) -> Result<bool, BrowserError>;
127
128 /// Check if an element matching a description exists in the accessibility tree.
129 async fn element_exists_a11y(
130 &self,
131 name_contains: &str,
132 role: Option<&str>,
133 ) -> Result<bool, BrowserError>;
134
135 // =========================================================================
136 // Auth state injection (pre-navigation)
137 // =========================================================================
138
139 /// Inject cookies into the browser. Must be called before navigation
140 /// for the cookies to be sent with the first request.
141 async fn set_cookies(&self, _cookies: &[CookieParam]) -> Result<(), BrowserError> {
142 Err(BrowserError::Unsupported(
143 "set_cookies not implemented".into(),
144 ))
145 }
146
147 /// Set localStorage items for a given origin.
148 /// The browser will briefly navigate to the origin to set the items.
149 async fn set_local_storage(
150 &self,
151 _origin: &str,
152 _items: &[(String, String)],
153 ) -> Result<(), BrowserError> {
154 Err(BrowserError::Unsupported(
155 "set_local_storage not implemented".into(),
156 ))
157 }
158
159 /// Set extra HTTP headers to include on every request.
160 async fn set_extra_headers(&self, _headers: &[(String, String)]) -> Result<(), BrowserError> {
161 Err(BrowserError::Unsupported(
162 "set_extra_headers not implemented".into(),
163 ))
164 }
165
166 // =========================================================================
167 // Lifecycle
168 // =========================================================================
169
170 /// Shut down the browser backend and release resources.
171 ///
172 /// For headless Chromium: terminates the browser process.
173 /// For Tauri: WebView cleanup.
174 /// Default: no-op.
175 async fn shutdown(&self) -> Result<(), BrowserError> {
176 Ok(())
177 }
178}