1use chrono::{DateTime, Utc};
7use serde::{Deserialize, Serialize};
8use sha2::{Digest, Sha256};
9
10use crate::models::{Bounds, Viewport};
11
12#[derive(Debug, Clone, Serialize, Deserialize)]
14pub struct UiMap {
15 pub id: String,
16 pub timestamp: DateTime<Utc>,
17 pub url: String,
18 pub elements: Vec<UiElement>,
19 pub text_blocks: Vec<TextBlock>,
20 pub page_signals: PageSignals,
21 pub viewport: Viewport,
22 pub content_hash: String,
23 pub screenshot_path: String,
24}
25
26impl UiMap {
27 pub fn new(
28 url: String,
29 elements: Vec<UiElement>,
30 text_blocks: Vec<TextBlock>,
31 page_signals: PageSignals,
32 viewport: Viewport,
33 screenshot_path: String,
34 ) -> Self {
35 let id = uuid::Uuid::new_v4().to_string();
36 let timestamp = Utc::now();
37 let mut map = Self {
38 id,
39 timestamp,
40 url,
41 elements,
42 text_blocks,
43 page_signals,
44 viewport,
45 content_hash: String::new(),
46 screenshot_path,
47 };
48 map.content_hash = map.compute_content_hash();
49 map
50 }
51
52 pub fn compute_content_hash(&self) -> String {
53 let mut hasher = Sha256::new();
54 for element in &self.elements {
55 hasher.update(element.id.as_bytes());
56 hasher.update(element.role.to_hash_string().as_bytes());
57 if let Some(name) = &element.name {
58 hasher.update(name.as_bytes());
59 }
60 hasher.update(element.states.to_hash_string().as_bytes());
61 }
62 for block in &self.text_blocks {
63 hasher.update(block.text.as_bytes());
64 }
65 hasher.update(self.page_signals.to_hash_string().as_bytes());
66 hex::encode(hasher.finalize())
67 }
68
69 pub fn get_element(&self, element_id: &str) -> Option<&UiElement> {
70 self.elements.iter().find(|e| e.id == element_id)
71 }
72
73 pub fn get_elements_by_role(&self, role: UiRole) -> Vec<&UiElement> {
74 self.elements
75 .iter()
76 .filter(|e| std::mem::discriminant(&e.role) == std::mem::discriminant(&role))
77 .collect()
78 }
79
80 pub fn interactive_elements(&self) -> Vec<&UiElement> {
81 self.elements
82 .iter()
83 .filter(|e| e.role.is_interactable() && e.is_interactable())
84 .collect()
85 }
86
87 pub fn estimate_tokens(&self, interactive_only: bool) -> usize {
88 let count = if interactive_only {
89 self.interactive_elements().len()
90 } else {
91 self.elements.len()
92 };
93 count * 20 + 30
94 }
95
96 pub fn average_confidence(&self) -> f32 {
97 if self.elements.is_empty() {
98 return 0.0;
99 }
100 let sum: f32 = self.elements.iter().map(|e| e.confidence).sum();
101 sum / self.elements.len() as f32
102 }
103
104 pub fn format_summary(&self) -> String {
108 use std::fmt::Write;
109 let mut output = String::new();
110
111 if self.page_signals.has_blocking_element() {
113 if self.page_signals.modal_present {
114 let _ = writeln!(output, "⚠ Modal dialog present");
115 }
116 if self.page_signals.cookie_banner {
117 let _ = writeln!(output, "⚠ Cookie banner present");
118 }
119 }
120 if self.page_signals.loading_indicator {
121 let _ = writeln!(output, "⏳ Page loading...");
122 }
123
124 let _ = writeln!(output, "\n## Visible Text");
126 let mut seen_texts: std::collections::HashSet<String> = std::collections::HashSet::new();
127 for el in &self.elements {
128 if let Some(ref name) = el.name {
129 let text = name.trim().to_string();
130 if !text.is_empty() && text.len() > 1 && seen_texts.insert(text.clone()) {
131 let role = el.role.to_hash_string();
132 let truncated = if text.len() > 80 {
133 let end = text.floor_char_boundary(77);
134 format!("{}...", &text[..end])
135 } else {
136 text
137 };
138 let _ = writeln!(output, " ({}) {}", role, truncated);
139 }
140 }
141 }
142
143 let _ = writeln!(output, "\n## Interactive Elements");
145 let interactive = self.interactive_elements();
146 for (i, el) in interactive.iter().enumerate().take(50) {
147 let role_str = el.role.to_hash_string();
148 let name_str = el.name.as_deref()
149 .map(|n| {
150 if n.len() > 40 {
151 let end = n.floor_char_boundary(37);
152 format!(" \"{}...\"", &n[..end])
153 } else {
154 format!(" \"{}\"", n)
155 }
156 })
157 .unwrap_or_default();
158 let mut state_parts = Vec::new();
159 if element_states_for_summary(&el.states, &mut state_parts) {
160 let _ = writeln!(output, "[{}] {}{} {}", el.id, role_str, name_str, state_parts.join(" "));
161 } else {
162 let _ = writeln!(output, "[{}] {}{}", el.id, role_str, name_str);
163 }
164 let _ = i; }
166 if interactive.len() > 50 {
167 let _ = writeln!(output, " ... and {} more interactive elements", interactive.len() - 50);
168 }
169
170 output
171 }
172
173 pub fn format_compact(&self) -> String {
177 use std::fmt::Write;
178 let mut output = String::new();
179
180 if self.page_signals.has_blocking_element() {
182 if self.page_signals.modal_present {
183 let _ = writeln!(output, "⚠ Modal dialog present");
184 }
185 if self.page_signals.cookie_banner {
186 let _ = writeln!(output, "⚠ Cookie banner present");
187 }
188 }
189 if self.page_signals.loading_indicator {
190 let _ = writeln!(output, "⏳ Page loading...");
191 }
192
193 let elements: Vec<&UiElement> = if self.elements.len() > 40 {
195 self.interactive_elements()
196 } else {
197 self.elements.iter().collect()
198 };
199
200 for element in &elements {
201 let role_str = element.role.to_hash_string();
202 let name_str = element
203 .name
204 .as_deref()
205 .map(|n| {
206 if n.len() > 50 {
207 let truncated: String = n.chars().take(47).collect();
208 format!(" \"{}...\"", truncated)
209 } else {
210 format!(" \"{}\"", n)
211 }
212 })
213 .unwrap_or_default();
214
215 let (cx, cy) = element.bounds.center();
216 let pos_str = format!(" ({:.0},{:.0})", cx, cy);
217
218 let mut state_parts = Vec::new();
219 if element.states.focused {
220 state_parts.push("focused");
221 }
222 if !element.states.enabled {
223 state_parts.push("disabled");
224 }
225 if element.states.checked == Some(true) {
226 state_parts.push("checked");
227 }
228 if element.states.expanded == Some(true) {
229 state_parts.push("expanded");
230 }
231 let state_str = if state_parts.is_empty() {
232 String::new()
233 } else {
234 format!(" {}", state_parts.join(" "))
235 };
236
237 let _ = writeln!(
238 output,
239 "[{}] {}{}{}{}",
240 element.id, role_str, name_str, pos_str, state_str
241 );
242 }
243
244 output
245 }
246}
247
248fn element_states_for_summary(states: &UiState, parts: &mut Vec<&'static str>) -> bool {
249 if states.focused { parts.push("focused"); }
250 if !states.enabled { parts.push("disabled"); }
251 if states.checked == Some(true) { parts.push("checked"); }
252 if states.expanded == Some(true) { parts.push("expanded"); }
253 !parts.is_empty()
254}
255
256#[derive(Debug, Clone, Serialize, Deserialize)]
258pub struct UiElement {
259 pub id: String,
260 pub role: UiRole,
261 pub name: Option<String>,
262 pub value: Option<String>,
264 pub bounds: Bounds,
265 pub states: UiState,
266 pub confidence: f32,
267 pub source: ElementSource,
268 pub icon_type: Option<IconType>,
269 pub children: Vec<String>,
270 pub ax_ref: Option<String>,
272}
273
274impl UiElement {
275 pub fn is_interactable(&self) -> bool {
276 self.states.enabled && self.bounds.width > 0.0 && self.bounds.height > 0.0
277 }
278
279 pub fn center(&self) -> (f64, f64) {
280 self.bounds.center()
281 }
282
283 pub fn accepts_text(&self) -> bool {
284 matches!(self.role, UiRole::TextInput)
285 }
286
287 pub fn is_clickable(&self) -> bool {
288 matches!(self.role, UiRole::Button | UiRole::Link)
289 }
290}
291
292#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
294#[serde(rename_all = "snake_case")]
295pub enum UiRole {
296 Button,
297 Link,
298 TextInput,
299 Checkbox,
300 Radio,
301 Dropdown,
302 Menu,
303 MenuItem,
304 Tab,
305 Dialog,
306 Image,
307 Text,
308 Container,
309 List,
310 ListItem,
311 Table,
312 TableRow,
313 TableCell,
314 Toolbar,
315 Other(String),
316}
317
318impl UiRole {
319 pub fn from_ax_role(ax_role: &str) -> Self {
320 match ax_role.to_lowercase().as_str() {
321 "button" | "pushbutton" => UiRole::Button,
322 "link" | "weblink" => UiRole::Link,
323 "textbox" | "textfield" | "textarea" | "combobox" | "searchfield" => UiRole::TextInput,
324 "checkbox" => UiRole::Checkbox,
325 "radio" | "radiobutton" => UiRole::Radio,
326 "select" | "listbox" | "popupbutton" => UiRole::Dropdown,
327 "menu" | "menubar" => UiRole::Menu,
328 "menuitem" | "menuitemcheckbox" | "menuitemradio" => UiRole::MenuItem,
329 "tab" | "tabitem" => UiRole::Tab,
330 "dialog" | "alertdialog" | "sheet" => UiRole::Dialog,
331 "image" | "img" => UiRole::Image,
332 "statictext" | "label" | "heading" => UiRole::Text,
333 "group" | "generic" | "section" | "div" | "webarea" => UiRole::Container,
334 "list" => UiRole::List,
335 "listitem" => UiRole::ListItem,
336 "table" | "grid" => UiRole::Table,
337 "row" | "tablerow" => UiRole::TableRow,
338 "cell" | "tablecell" | "gridcell" => UiRole::TableCell,
339 "toolbar" => UiRole::Toolbar,
340 other => UiRole::Other(other.to_string()),
341 }
342 }
343
344 pub fn is_interactable(&self) -> bool {
345 matches!(
346 self,
347 UiRole::Button
348 | UiRole::Link
349 | UiRole::TextInput
350 | UiRole::Checkbox
351 | UiRole::Radio
352 | UiRole::Dropdown
353 | UiRole::MenuItem
354 | UiRole::Tab
355 )
356 }
357
358 pub fn to_hash_string(&self) -> String {
359 match self {
360 UiRole::Button => "button".to_string(),
361 UiRole::Link => "link".to_string(),
362 UiRole::TextInput => "text_input".to_string(),
363 UiRole::Checkbox => "checkbox".to_string(),
364 UiRole::Radio => "radio".to_string(),
365 UiRole::Dropdown => "dropdown".to_string(),
366 UiRole::Menu => "menu".to_string(),
367 UiRole::MenuItem => "menu_item".to_string(),
368 UiRole::Tab => "tab".to_string(),
369 UiRole::Dialog => "dialog".to_string(),
370 UiRole::Image => "image".to_string(),
371 UiRole::Text => "text".to_string(),
372 UiRole::Container => "container".to_string(),
373 UiRole::List => "list".to_string(),
374 UiRole::ListItem => "list_item".to_string(),
375 UiRole::Table => "table".to_string(),
376 UiRole::TableRow => "table_row".to_string(),
377 UiRole::TableCell => "table_cell".to_string(),
378 UiRole::Toolbar => "toolbar".to_string(),
379 UiRole::Other(s) => format!("other:{}", s),
380 }
381 }
382}
383
384#[derive(Debug, Clone, Default, Serialize, Deserialize)]
386pub struct UiState {
387 pub enabled: bool,
388 pub focused: bool,
389 pub selected: bool,
390 pub checked: Option<bool>,
391 pub expanded: Option<bool>,
392 pub readonly: bool,
393 pub required: bool,
394}
395
396impl UiState {
397 pub fn enabled() -> Self {
398 Self {
399 enabled: true,
400 ..Default::default()
401 }
402 }
403
404 pub fn disabled() -> Self {
405 Self {
406 enabled: false,
407 ..Default::default()
408 }
409 }
410
411 pub fn from_ax_states(
412 disabled: bool,
413 focused: bool,
414 selected: Option<bool>,
415 checked: Option<bool>,
416 expanded: Option<bool>,
417 ) -> Self {
418 Self {
419 enabled: !disabled,
420 focused,
421 selected: selected.unwrap_or(false),
422 checked,
423 expanded,
424 readonly: false,
425 required: false,
426 }
427 }
428
429 pub fn to_hash_string(&self) -> String {
430 fn opt_bool_str(opt: Option<bool>) -> &'static str {
431 match opt {
432 None => "none",
433 Some(true) => "true",
434 Some(false) => "false",
435 }
436 }
437 format!(
438 "en:{},fo:{},se:{},ch:{},ex:{},ro:{},rq:{}",
439 self.enabled,
440 self.focused,
441 self.selected,
442 opt_bool_str(self.checked),
443 opt_bool_str(self.expanded),
444 self.readonly,
445 self.required
446 )
447 }
448}
449
450#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
451#[serde(rename_all = "snake_case")]
452pub enum ElementSource {
453 AccessibilityTree,
454 VisualDetector,
455 Ocr,
456 Merged { sources: Vec<ElementSource> },
457}
458
459impl ElementSource {
460 pub fn base_confidence(&self) -> f32 {
461 match self {
462 ElementSource::AccessibilityTree => 0.90,
463 ElementSource::VisualDetector => 0.75,
464 ElementSource::Ocr => 0.70,
465 ElementSource::Merged { .. } => 0.98,
466 }
467 }
468}
469
470#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
471#[serde(rename_all = "snake_case")]
472pub enum IconType {
473 Close, Menu, Search, Back, Forward, Refresh, Settings, Share,
474 Download, Upload, Edit, Delete, Add, Remove, Expand, Collapse,
475 Play, Pause, Stop, Mute, Unmute, Fullscreen, ExitFullscreen,
476 Info, Help, Warning, Error, Success, Unknown,
477}
478
479#[derive(Debug, Clone, Serialize, Deserialize)]
480pub struct TextBlock {
481 pub text: String,
482 pub bounds: Bounds,
483 pub source: TextSource,
484 pub confidence: f32,
485}
486
487impl TextBlock {
488 pub fn from_ax(text: String, bounds: Bounds) -> Self {
489 Self {
490 text,
491 bounds,
492 source: TextSource::AccessibilityTree,
493 confidence: 1.0,
494 }
495 }
496}
497
498#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
499#[serde(rename_all = "snake_case")]
500pub enum TextSource {
501 AccessibilityTree,
502 Ocr,
503}
504
505#[derive(Debug, Clone, Default, Serialize, Deserialize)]
506pub struct PageSignals {
507 pub modal_present: bool,
508 pub cookie_banner: bool,
509 pub error_banner: bool,
510 pub loading_indicator: bool,
511 pub scroll_position: f32,
512 pub page_type_hint: Option<String>,
513}
514
515impl PageSignals {
516 pub fn has_blocking_element(&self) -> bool {
517 self.modal_present || self.cookie_banner
518 }
519
520 pub fn needs_special_handling(&self) -> bool {
521 matches!(
522 self.page_type_hint.as_deref(),
523 Some("login") | Some("checkout") | Some("payment")
524 )
525 }
526
527 pub fn to_hash_string(&self) -> String {
528 format!(
529 "mo:{},co:{},er:{},lo:{},sc:{:.2},ty:{}",
530 self.modal_present,
531 self.cookie_banner,
532 self.error_banner,
533 self.loading_indicator,
534 self.scroll_position,
535 self.page_type_hint.as_deref().unwrap_or("none")
536 )
537 }
538}
539
540#[cfg(test)]
541mod tests {
542 use super::*;
543
544 #[test]
545 fn test_ui_role_from_ax() {
546 assert_eq!(UiRole::from_ax_role("button"), UiRole::Button);
547 assert_eq!(UiRole::from_ax_role("textfield"), UiRole::TextInput);
548 assert!(matches!(UiRole::from_ax_role("custom"), UiRole::Other(_)));
549 }
550
551 #[test]
552 fn test_format_compact() {
553 let viewport = Viewport { width: 1280, height: 720, device_pixel_ratio: 2.0 };
554 let map = UiMap::new(
555 "https://example.com".to_string(),
556 vec![UiElement {
557 id: "el_0".to_string(),
558 role: UiRole::Button,
559 name: Some("Submit".to_string()),
560 value: None,
561 bounds: Bounds::new(100.0, 100.0, 80.0, 30.0),
562 states: UiState { focused: true, ..UiState::enabled() },
563 confidence: 0.95,
564 source: ElementSource::AccessibilityTree,
565 icon_type: None,
566 children: vec![],
567 ax_ref: None,
568 }],
569 vec![],
570 PageSignals::default(),
571 viewport,
572 String::new(),
573 );
574 let compact = map.format_compact();
575 assert!(compact.contains("[el_0]"));
576 assert!(compact.contains("button"));
577 assert!(compact.contains("Submit"));
578 assert!(compact.contains("focused"));
579 }
580}