car_browser/backend.rs
1//! Browser backend trait — the abstract interface that all browser implementations must satisfy.
2//!
3//! This maps 1:1 to human-equivalent perception and input (Manifesto Principles 3-5).
4
5use async_trait::async_trait;
6use thiserror::Error;
7
8use crate::models::{A11yNode, Modifier, Viewport, WaitCondition};
9
10/// Errors that can occur in browser operations.
11#[derive(Error, Debug)]
12pub enum BrowserError {
13 #[error("Screenshot capture failed: {0}")]
14 ScreenshotFailed(String),
15
16 #[error("Accessibility tree extraction failed: {0}")]
17 AccessibilityFailed(String),
18
19 #[error("Navigation failed: {0}")]
20 NavigationFailed(String),
21
22 #[error("Input injection failed: {0}")]
23 InputFailed(String),
24
25 #[error("Element not found: {0}")]
26 ElementNotFound(String),
27
28 #[error("Platform internal error: {0}")]
29 PlatformInternal(String),
30
31 #[error("Wait condition timed out")]
32 Timeout,
33
34 #[error("Browser not available")]
35 NotAvailable,
36
37 #[error("Not supported: {0}")]
38 Unsupported(String),
39}
40
41/// Abstract browser backend trait.
42///
43/// Implementations drive a real browser (Tauri WebView, headless Chromium, etc.)
44/// through human-equivalent perception and input only.
45///
46/// # Perception (what the AI can see)
47/// - Screenshots: rendered pixels
48/// - Accessibility tree: semantic structure exposed to assistive technologies
49///
50/// # Actions (what the AI can do)
51/// - Click, type, scroll, keypress — all map 1:1 to human input
52/// - Navigation — equivalent to typing a URL
53///
54/// # Disallowed
55/// - DOM traversal, JS execution for data extraction, hidden attributes
56/// - Network traffic inspection, cookie/storage introspection
57#[async_trait]
58pub trait BrowserBackend: Send + Sync {
59 // =========================================================================
60 // Perception
61 // =========================================================================
62
63 /// Capture a screenshot of the current page as PNG data.
64 async fn capture_screenshot(&self) -> Result<Vec<u8>, BrowserError>;
65
66 /// Extract the accessibility tree from the current page.
67 async fn get_accessibility_tree(&self) -> Result<Vec<A11yNode>, BrowserError>;
68
69 /// Get the current viewport dimensions.
70 fn get_viewport(&self) -> Result<Viewport, BrowserError>;
71
72 /// Get the current page URL.
73 fn get_current_url(&self) -> Result<String, BrowserError>;
74
75 /// Get the current page title.
76 async fn get_page_title(&self) -> Result<String, BrowserError>;
77
78 // =========================================================================
79 // Navigation
80 // =========================================================================
81
82 /// Navigate to a URL.
83 async fn navigate(&self, url: &str) -> Result<(), BrowserError>;
84
85 // =========================================================================
86 // Human-equivalent input (Manifesto Principle 5)
87 // =========================================================================
88
89 /// Click at viewport coordinates.
90 async fn inject_click(&self, x: f64, y: f64) -> Result<(), BrowserError>;
91
92 /// Type text into the focused element.
93 async fn inject_text(&self, text: &str) -> Result<(), BrowserError>;
94
95 /// Press a key with optional modifiers.
96 async fn inject_keypress(
97 &self,
98 key: &str,
99 modifiers: &[Modifier],
100 ) -> Result<(), BrowserError>;
101
102 /// Scroll the page.
103 async fn inject_scroll(&self, delta_y: i32) -> Result<(), BrowserError>;
104
105 // =========================================================================
106 // Accessibility actions (VoiceOver-equivalent)
107 // =========================================================================
108
109 /// Click an element by accessibility node ID (AXPress).
110 async fn click_element(&self, node_id: &str) -> Result<(), BrowserError>;
111
112 /// Type text into an element by accessibility node ID.
113 async fn type_into_element(&self, node_id: &str, text: &str) -> Result<(), BrowserError>;
114
115 /// Focus an element by accessibility node ID.
116 async fn focus_element(&self, node_id: &str) -> Result<(), BrowserError>;
117
118 // =========================================================================
119 // Wait conditions
120 // =========================================================================
121
122 /// Check if page is fully loaded.
123 async fn is_page_loaded(&self) -> Result<bool, BrowserError>;
124
125 /// Wait for a condition to be met.
126 async fn wait_until(
127 &self,
128 condition: &WaitCondition,
129 timeout_ms: u64,
130 ) -> Result<bool, BrowserError>;
131
132 /// Check if an element matching a description exists in the accessibility tree.
133 async fn element_exists_a11y(
134 &self,
135 name_contains: &str,
136 role: Option<&str>,
137 ) -> Result<bool, BrowserError>;
138
139 // =========================================================================
140 // Lifecycle
141 // =========================================================================
142
143 /// Shut down the browser backend and release resources.
144 ///
145 /// For headless Chromium: terminates the browser process.
146 /// For Tauri: WebView cleanup.
147 /// Default: no-op.
148 async fn shutdown(&self) -> Result<(), BrowserError> {
149 Ok(())
150 }
151}