1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
use schemars::JsonSchema;
use serde::Deserialize;
use crate::AutomataError;
// ── Browser / TabInfo ─────────────────────────────────────────────────────────
/// Basic info about a browser tab.
#[derive(Debug, Clone)]
pub struct TabInfo {
pub title: String,
pub url: String,
}
/// CDP browser: operations for controlling browser sessions and tabs.
pub trait Browser: Send + Sync + 'static {
/// Ensure the browser is running with CDP enabled.
fn ensure(&self) -> Result<(), AutomataError>;
/// Open a new tab at `url` (or about:blank). Returns the CDP tab ID.
fn open_tab(&self, url: Option<&str>) -> Result<String, AutomataError>;
/// Close a tab by CDP target ID.
fn close_tab(&self, tab_id: &str) -> Result<(), AutomataError>;
/// Bring a tab to the foreground (switch to it).
fn activate_tab(&self, tab_id: &str) -> Result<(), AutomataError>;
/// Navigate a tab to a URL.
fn navigate(&self, tab_id: &str, url: &str) -> Result<(), AutomataError>;
/// Evaluate a JS expression in a tab. Returns the string result.
fn eval(&self, tab_id: &str, expr: &str) -> Result<String, AutomataError>;
/// Title + URL of a specific tab.
fn tab_info(&self, tab_id: &str) -> Result<TabInfo, AutomataError>;
/// All open tabs: (tab_id, TabInfo).
fn tabs(&self) -> Result<Vec<(String, TabInfo)>, AutomataError>;
}
/// Type of mouse click to perform.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize, JsonSchema)]
#[serde(rename_all = "snake_case")]
pub enum ClickType {
Left,
Double,
Triple,
Right,
Middle,
}
/// A UI element handle. Implementations wrap platform-specific COM/UIA objects.
///
/// All property methods return `None` / empty on a stale handle; interaction
/// methods return `Err(AutomataError::Platform(...))` on staleness or failure.
pub trait Element: Clone + 'static {
// ── Properties ───────────────────────────────────────────────────────────
/// Localized name / label of the element. `None` if empty or unavailable.
fn name(&self) -> Option<String>;
/// Localized role string (e.g. "Button", "Pane", "ToolBar").
fn role(&self) -> String;
/// Value text (ValuePattern) or name fallback.
fn text(&self) -> Result<String, AutomataError>;
/// Direct children's names joined by newlines, excluding the element's own name.
fn inner_text(&self) -> Result<String, AutomataError>;
fn is_enabled(&self) -> Result<bool, AutomataError>;
fn is_visible(&self) -> Result<bool, AutomataError>;
fn process_id(&self) -> Result<u32, AutomataError>;
/// Process name (without .exe) for this element's owning process.
/// Returns `None` on non-Windows or if the lookup fails.
fn process_name(&self) -> Option<String> {
None
}
/// Native window handle (HWND as u64) for this element's owning window.
/// Returns `None` on non-Windows or if the lookup fails.
fn hwnd(&self) -> Option<u64> {
None
}
/// UIA AutomationId property. `None` if empty or unavailable.
fn automation_id(&self) -> Option<String> {
None
}
/// Bounding box as `(x, y, width, height)`.
fn bounds(&self) -> Result<(i32, i32, i32, i32), AutomataError>;
/// Direct children. Returns `Err` on a stale handle.
fn children(&self) -> Result<Vec<Self>, AutomataError>;
/// Returns `false` when the element has been detached from the accessibility
/// tree (e.g. a dismissed dialog). Root windows have the desktop as parent so
/// they return `true`. Default returns `true` for platforms that don't implement it.
fn has_parent(&self) -> bool {
true
}
/// Navigate to this element's parent. Returns `None` at the root or on error.
fn parent(&self) -> Option<Self> {
None
}
// ── Interactions ─────────────────────────────────────────────────────────
fn click(&self) -> Result<(), AutomataError>;
fn double_click(&self) -> Result<(), AutomataError>;
/// Move the mouse cursor to the centre of the element without clicking.
fn hover(&self) -> Result<(), AutomataError>;
/// Click at a position expressed as fractions of the element's bounding box.
fn click_at(&self, x_pct: f64, y_pct: f64, kind: ClickType) -> Result<(), AutomataError>;
fn type_text(&self, text: &str) -> Result<(), AutomataError>;
fn press_key(&self, key: &str) -> Result<(), AutomataError>;
/// Set a field's value directly via IValuePattern (avoids needing to
/// select-all + type). Preferred over `type_text` for pre-filled fields.
fn set_value(&self, value: &str) -> Result<(), AutomataError>;
fn focus(&self) -> Result<(), AutomataError>;
/// Activate this element via UIA's `IInvokePattern::Invoke()`.
///
/// Unlike `click()`, `invoke()` does not require a valid bounding rect —
/// it works on off-screen elements whose bounds are `(0,0,1,1)` because they
/// are scrolled out of view. Prefer this over `Click` + `ScrollIntoView`
/// for items in virtualised or scrollable lists (e.g. Settings nav items,
/// WinUI ListView rows) where mouse-wheel scrolling causes elastic snap-back.
///
/// Falls back to `click()` when the element does not support `InvokePattern`.
fn invoke(&self) -> Result<(), AutomataError> {
// Default: fall back to click for platforms that don't override this.
self.click()
}
/// Scroll ancestor containers until this element is within their visible
/// viewport. Uses `ScrollItemPattern` when supported; falls back to a
/// geometric ancestor walk with `ScrollPattern`.
fn scroll_into_view(&self) -> Result<(), AutomataError>;
fn activate_window(&self) -> Result<(), AutomataError>;
fn minimize_window(&self) -> Result<(), AutomataError>;
fn close(&self) -> Result<(), AutomataError>;
}
/// Platform desktop: discovers windows and provides foreground state.
pub trait Desktop: Send + 'static {
type Elem: Element;
type Browser: Browser;
/// CDP browser handle. Used by the workflow engine for browser automation.
fn browser(&self) -> &Self::Browser;
/// All top-level application windows currently visible.
fn application_windows(&self) -> Result<Vec<Self::Elem>, AutomataError>;
/// Launch an executable by name or full path. Returns the process ID.
fn open_application(&self, exe: &str) -> Result<u32, AutomataError>;
/// The element that currently has keyboard focus (topmost modal if a dialog
/// is present). Returns `None` if the foreground window is unknown.
fn foreground_window(&self) -> Option<Self::Elem>;
/// Raw HWND as `u64` for process-ownership checks without a full element
/// query. Returns `None` on non-Windows or when nothing is focused.
fn foreground_hwnd(&self) -> Option<u64>;
/// All currently visible tooltip windows on the desktop.
/// Returns an empty vec on non-Windows or if none are present.
fn tooltip_windows(&self) -> Vec<Self::Elem> {
vec![]
}
/// Top-level window handles in Z-order (topmost first).
///
/// Used by the anchor resolver to prefer the topmost window of a process
/// when multiple windows match the same filter. Returns an empty vec on
/// non-Windows or if the enumeration fails.
fn hwnd_z_order(&self) -> Vec<u64> {
vec![]
}
}