1use chrono::{DateTime, Utc};
7use serde::{Deserialize, Serialize};
8use sha2::{Digest, Sha256};
9
10use crate::models::{Bounds, Viewport};
11
12#[derive(Debug, Clone, Serialize, Deserialize)]
14pub struct UiMap {
15 pub id: String,
16 pub timestamp: DateTime<Utc>,
17 pub url: String,
18 pub elements: Vec<UiElement>,
19 pub text_blocks: Vec<TextBlock>,
20 pub page_signals: PageSignals,
21 pub viewport: Viewport,
22 pub content_hash: String,
23 pub screenshot_path: String,
24}
25
26impl UiMap {
27 pub fn new(
28 url: String,
29 elements: Vec<UiElement>,
30 text_blocks: Vec<TextBlock>,
31 page_signals: PageSignals,
32 viewport: Viewport,
33 screenshot_path: String,
34 ) -> Self {
35 let id = uuid::Uuid::new_v4().to_string();
36 let timestamp = Utc::now();
37 let mut map = Self {
38 id,
39 timestamp,
40 url,
41 elements,
42 text_blocks,
43 page_signals,
44 viewport,
45 content_hash: String::new(),
46 screenshot_path,
47 };
48 map.content_hash = map.compute_content_hash();
49 map
50 }
51
52 pub fn compute_content_hash(&self) -> String {
53 let mut hasher = Sha256::new();
54 for element in &self.elements {
55 hasher.update(element.id.as_bytes());
56 hasher.update(element.role.to_hash_string().as_bytes());
57 if let Some(name) = &element.name {
58 hasher.update(name.as_bytes());
59 }
60 hasher.update(element.states.to_hash_string().as_bytes());
61 }
62 for block in &self.text_blocks {
63 hasher.update(block.text.as_bytes());
64 }
65 hasher.update(self.page_signals.to_hash_string().as_bytes());
66 hex::encode(hasher.finalize())
67 }
68
69 pub fn get_element(&self, element_id: &str) -> Option<&UiElement> {
70 self.elements.iter().find(|e| e.id == element_id)
71 }
72
73 pub fn get_elements_by_role(&self, role: UiRole) -> Vec<&UiElement> {
74 self.elements
75 .iter()
76 .filter(|e| std::mem::discriminant(&e.role) == std::mem::discriminant(&role))
77 .collect()
78 }
79
80 pub fn interactive_elements(&self) -> Vec<&UiElement> {
81 self.elements
82 .iter()
83 .filter(|e| e.role.is_interactable() && e.is_interactable())
84 .collect()
85 }
86
87 pub fn estimate_tokens(&self, interactive_only: bool) -> usize {
88 let count = if interactive_only {
89 self.interactive_elements().len()
90 } else {
91 self.elements.len()
92 };
93 count * 20 + 30
94 }
95
96 pub fn average_confidence(&self) -> f32 {
97 if self.elements.is_empty() {
98 return 0.0;
99 }
100 let sum: f32 = self.elements.iter().map(|e| e.confidence).sum();
101 sum / self.elements.len() as f32
102 }
103
104 pub fn format_summary(&self) -> String {
108 use std::fmt::Write;
109 let mut output = String::new();
110
111 if self.page_signals.has_blocking_element() {
113 if self.page_signals.modal_present {
114 let _ = writeln!(output, "⚠ Modal dialog present");
115 }
116 if self.page_signals.cookie_banner {
117 let _ = writeln!(output, "⚠ Cookie banner present");
118 }
119 }
120 if self.page_signals.loading_indicator {
121 let _ = writeln!(output, "⏳ Page loading...");
122 }
123
124 let _ = writeln!(output, "\n## Visible Text");
126 let mut seen_texts: std::collections::HashSet<String> = std::collections::HashSet::new();
127 for el in &self.elements {
128 if let Some(ref name) = el.name {
129 let text = name.trim().to_string();
130 if !text.is_empty() && text.len() > 1 && seen_texts.insert(text.clone()) {
131 let role = el.role.to_hash_string();
132 let truncated = if text.len() > 80 {
133 let end = text.floor_char_boundary(77);
134 format!("{}...", &text[..end])
135 } else {
136 text
137 };
138 let _ = writeln!(output, " ({}) {}", role, truncated);
139 }
140 }
141 }
142
143 let _ = writeln!(output, "\n## Interactive Elements");
145 let interactive = self.interactive_elements();
146 for (i, el) in interactive.iter().enumerate().take(50) {
147 let role_str = el.role.to_hash_string();
148 let name_str = el
149 .name
150 .as_deref()
151 .map(|n| {
152 if n.len() > 40 {
153 let end = n.floor_char_boundary(37);
154 format!(" \"{}...\"", &n[..end])
155 } else {
156 format!(" \"{}\"", n)
157 }
158 })
159 .unwrap_or_default();
160 let mut state_parts = Vec::new();
161 if element_states_for_summary(&el.states, &mut state_parts) {
162 let _ = writeln!(
163 output,
164 "[{}] {}{} {}",
165 el.id,
166 role_str,
167 name_str,
168 state_parts.join(" ")
169 );
170 } else {
171 let _ = writeln!(output, "[{}] {}{}", el.id, role_str, name_str);
172 }
173 let _ = i; }
175 if interactive.len() > 50 {
176 let _ = writeln!(
177 output,
178 " ... and {} more interactive elements",
179 interactive.len() - 50
180 );
181 }
182
183 output
184 }
185
186 pub fn format_compact(&self) -> String {
190 use std::fmt::Write;
191 let mut output = String::new();
192
193 if self.page_signals.has_blocking_element() {
195 if self.page_signals.modal_present {
196 let _ = writeln!(output, "⚠ Modal dialog present");
197 }
198 if self.page_signals.cookie_banner {
199 let _ = writeln!(output, "⚠ Cookie banner present");
200 }
201 }
202 if self.page_signals.loading_indicator {
203 let _ = writeln!(output, "⏳ Page loading...");
204 }
205
206 let elements: Vec<&UiElement> = if self.elements.len() > 40 {
208 self.interactive_elements()
209 } else {
210 self.elements.iter().collect()
211 };
212
213 for element in &elements {
214 let role_str = element.role.to_hash_string();
215 let name_str = element
216 .name
217 .as_deref()
218 .map(|n| {
219 if n.len() > 50 {
220 let truncated: String = n.chars().take(47).collect();
221 format!(" \"{}...\"", truncated)
222 } else {
223 format!(" \"{}\"", n)
224 }
225 })
226 .unwrap_or_default();
227
228 let (cx, cy) = element.bounds.center();
229 let pos_str = format!(" ({:.0},{:.0})", cx, cy);
230
231 let mut state_parts = Vec::new();
232 if element.states.focused {
233 state_parts.push("focused");
234 }
235 if !element.states.enabled {
236 state_parts.push("disabled");
237 }
238 if element.states.checked == Some(true) {
239 state_parts.push("checked");
240 }
241 if element.states.expanded == Some(true) {
242 state_parts.push("expanded");
243 }
244 let state_str = if state_parts.is_empty() {
245 String::new()
246 } else {
247 format!(" {}", state_parts.join(" "))
248 };
249
250 let _ = writeln!(
251 output,
252 "[{}] {}{}{}{}",
253 element.id, role_str, name_str, pos_str, state_str
254 );
255 }
256
257 output
258 }
259}
260
261fn element_states_for_summary(states: &UiState, parts: &mut Vec<&'static str>) -> bool {
262 if states.focused {
263 parts.push("focused");
264 }
265 if !states.enabled {
266 parts.push("disabled");
267 }
268 if states.checked == Some(true) {
269 parts.push("checked");
270 }
271 if states.expanded == Some(true) {
272 parts.push("expanded");
273 }
274 !parts.is_empty()
275}
276
277#[derive(Debug, Clone, Serialize, Deserialize)]
279pub struct UiElement {
280 pub id: String,
281 pub role: UiRole,
282 pub name: Option<String>,
283 pub value: Option<String>,
285 pub bounds: Bounds,
286 pub states: UiState,
287 pub confidence: f32,
288 pub source: ElementSource,
289 pub icon_type: Option<IconType>,
290 pub children: Vec<String>,
291 pub ax_ref: Option<String>,
293}
294
295impl UiElement {
296 pub fn is_interactable(&self) -> bool {
297 self.states.enabled && self.bounds.width > 0.0 && self.bounds.height > 0.0
298 }
299
300 pub fn center(&self) -> (f64, f64) {
301 self.bounds.center()
302 }
303
304 pub fn accepts_text(&self) -> bool {
305 matches!(self.role, UiRole::TextInput)
306 }
307
308 pub fn is_clickable(&self) -> bool {
309 matches!(self.role, UiRole::Button | UiRole::Link)
310 }
311}
312
313#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
315#[serde(rename_all = "snake_case")]
316pub enum UiRole {
317 Button,
318 Link,
319 TextInput,
320 Checkbox,
321 Radio,
322 Dropdown,
323 Menu,
324 MenuItem,
325 Tab,
326 Dialog,
327 Image,
328 Text,
329 Container,
330 List,
331 ListItem,
332 Table,
333 TableRow,
334 TableCell,
335 Toolbar,
336 Other(String),
337}
338
339impl UiRole {
340 pub fn from_ax_role(ax_role: &str) -> Self {
341 match ax_role.to_lowercase().as_str() {
342 "button" | "pushbutton" => UiRole::Button,
343 "link" | "weblink" => UiRole::Link,
344 "textbox" | "textfield" | "textarea" | "combobox" | "searchfield" => UiRole::TextInput,
345 "checkbox" => UiRole::Checkbox,
346 "radio" | "radiobutton" => UiRole::Radio,
347 "select" | "listbox" | "popupbutton" => UiRole::Dropdown,
348 "menu" | "menubar" => UiRole::Menu,
349 "menuitem" | "menuitemcheckbox" | "menuitemradio" => UiRole::MenuItem,
350 "tab" | "tabitem" => UiRole::Tab,
351 "dialog" | "alertdialog" | "sheet" => UiRole::Dialog,
352 "image" | "img" => UiRole::Image,
353 "statictext" | "label" | "heading" => UiRole::Text,
354 "group" | "generic" | "section" | "div" | "webarea" => UiRole::Container,
355 "list" => UiRole::List,
356 "listitem" => UiRole::ListItem,
357 "table" | "grid" => UiRole::Table,
358 "row" | "tablerow" => UiRole::TableRow,
359 "cell" | "tablecell" | "gridcell" => UiRole::TableCell,
360 "toolbar" => UiRole::Toolbar,
361 other => UiRole::Other(other.to_string()),
362 }
363 }
364
365 pub fn is_interactable(&self) -> bool {
366 matches!(
367 self,
368 UiRole::Button
369 | UiRole::Link
370 | UiRole::TextInput
371 | UiRole::Checkbox
372 | UiRole::Radio
373 | UiRole::Dropdown
374 | UiRole::MenuItem
375 | UiRole::Tab
376 )
377 }
378
379 pub fn to_hash_string(&self) -> String {
380 match self {
381 UiRole::Button => "button".to_string(),
382 UiRole::Link => "link".to_string(),
383 UiRole::TextInput => "text_input".to_string(),
384 UiRole::Checkbox => "checkbox".to_string(),
385 UiRole::Radio => "radio".to_string(),
386 UiRole::Dropdown => "dropdown".to_string(),
387 UiRole::Menu => "menu".to_string(),
388 UiRole::MenuItem => "menu_item".to_string(),
389 UiRole::Tab => "tab".to_string(),
390 UiRole::Dialog => "dialog".to_string(),
391 UiRole::Image => "image".to_string(),
392 UiRole::Text => "text".to_string(),
393 UiRole::Container => "container".to_string(),
394 UiRole::List => "list".to_string(),
395 UiRole::ListItem => "list_item".to_string(),
396 UiRole::Table => "table".to_string(),
397 UiRole::TableRow => "table_row".to_string(),
398 UiRole::TableCell => "table_cell".to_string(),
399 UiRole::Toolbar => "toolbar".to_string(),
400 UiRole::Other(s) => format!("other:{}", s),
401 }
402 }
403}
404
405#[derive(Debug, Clone, Default, Serialize, Deserialize)]
407pub struct UiState {
408 pub enabled: bool,
409 pub focused: bool,
410 pub selected: bool,
411 pub checked: Option<bool>,
412 pub expanded: Option<bool>,
413 pub readonly: bool,
414 pub required: bool,
415}
416
417impl UiState {
418 pub fn enabled() -> Self {
419 Self {
420 enabled: true,
421 ..Default::default()
422 }
423 }
424
425 pub fn disabled() -> Self {
426 Self {
427 enabled: false,
428 ..Default::default()
429 }
430 }
431
432 pub fn from_ax_states(
433 disabled: bool,
434 focused: bool,
435 selected: Option<bool>,
436 checked: Option<bool>,
437 expanded: Option<bool>,
438 ) -> Self {
439 Self {
440 enabled: !disabled,
441 focused,
442 selected: selected.unwrap_or(false),
443 checked,
444 expanded,
445 readonly: false,
446 required: false,
447 }
448 }
449
450 pub fn to_hash_string(&self) -> String {
451 fn opt_bool_str(opt: Option<bool>) -> &'static str {
452 match opt {
453 None => "none",
454 Some(true) => "true",
455 Some(false) => "false",
456 }
457 }
458 format!(
459 "en:{},fo:{},se:{},ch:{},ex:{},ro:{},rq:{}",
460 self.enabled,
461 self.focused,
462 self.selected,
463 opt_bool_str(self.checked),
464 opt_bool_str(self.expanded),
465 self.readonly,
466 self.required
467 )
468 }
469}
470
471#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
472#[serde(rename_all = "snake_case")]
473pub enum ElementSource {
474 AccessibilityTree,
475 VisualDetector,
476 Ocr,
477 Merged { sources: Vec<ElementSource> },
478}
479
480impl ElementSource {
481 pub fn base_confidence(&self) -> f32 {
482 match self {
483 ElementSource::AccessibilityTree => 0.90,
484 ElementSource::VisualDetector => 0.75,
485 ElementSource::Ocr => 0.70,
486 ElementSource::Merged { .. } => 0.98,
487 }
488 }
489}
490
491#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
492#[serde(rename_all = "snake_case")]
493pub enum IconType {
494 Close,
495 Menu,
496 Search,
497 Back,
498 Forward,
499 Refresh,
500 Settings,
501 Share,
502 Download,
503 Upload,
504 Edit,
505 Delete,
506 Add,
507 Remove,
508 Expand,
509 Collapse,
510 Play,
511 Pause,
512 Stop,
513 Mute,
514 Unmute,
515 Fullscreen,
516 ExitFullscreen,
517 Info,
518 Help,
519 Warning,
520 Error,
521 Success,
522 Unknown,
523}
524
525#[derive(Debug, Clone, Serialize, Deserialize)]
526pub struct TextBlock {
527 pub text: String,
528 pub bounds: Bounds,
529 pub source: TextSource,
530 pub confidence: f32,
531}
532
533impl TextBlock {
534 pub fn from_ax(text: String, bounds: Bounds) -> Self {
535 Self {
536 text,
537 bounds,
538 source: TextSource::AccessibilityTree,
539 confidence: 1.0,
540 }
541 }
542}
543
544#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
545#[serde(rename_all = "snake_case")]
546pub enum TextSource {
547 AccessibilityTree,
548 Ocr,
549}
550
551#[derive(Debug, Clone, Default, Serialize, Deserialize)]
552pub struct PageSignals {
553 pub modal_present: bool,
554 pub cookie_banner: bool,
555 pub error_banner: bool,
556 pub loading_indicator: bool,
557 pub scroll_position: f32,
558 pub page_type_hint: Option<String>,
559}
560
561impl PageSignals {
562 pub fn has_blocking_element(&self) -> bool {
563 self.modal_present || self.cookie_banner
564 }
565
566 pub fn needs_special_handling(&self) -> bool {
567 matches!(
568 self.page_type_hint.as_deref(),
569 Some("login") | Some("checkout") | Some("payment")
570 )
571 }
572
573 pub fn to_hash_string(&self) -> String {
574 format!(
575 "mo:{},co:{},er:{},lo:{},sc:{:.2},ty:{}",
576 self.modal_present,
577 self.cookie_banner,
578 self.error_banner,
579 self.loading_indicator,
580 self.scroll_position,
581 self.page_type_hint.as_deref().unwrap_or("none")
582 )
583 }
584}
585
586#[cfg(test)]
587mod tests {
588 use super::*;
589
590 #[test]
591 fn test_ui_role_from_ax() {
592 assert_eq!(UiRole::from_ax_role("button"), UiRole::Button);
593 assert_eq!(UiRole::from_ax_role("textfield"), UiRole::TextInput);
594 assert!(matches!(UiRole::from_ax_role("custom"), UiRole::Other(_)));
595 }
596
597 #[test]
598 fn test_format_compact() {
599 let viewport = Viewport {
600 width: 1280,
601 height: 720,
602 device_pixel_ratio: 2.0,
603 };
604 let map = UiMap::new(
605 "https://example.com".to_string(),
606 vec![UiElement {
607 id: "el_0".to_string(),
608 role: UiRole::Button,
609 name: Some("Submit".to_string()),
610 value: None,
611 bounds: Bounds::new(100.0, 100.0, 80.0, 30.0),
612 states: UiState {
613 focused: true,
614 ..UiState::enabled()
615 },
616 confidence: 0.95,
617 source: ElementSource::AccessibilityTree,
618 icon_type: None,
619 children: vec![],
620 ax_ref: None,
621 }],
622 vec![],
623 PageSignals::default(),
624 viewport,
625 String::new(),
626 );
627 let compact = map.format_compact();
628 assert!(compact.contains("[el_0]"));
629 assert!(compact.contains("button"));
630 assert!(compact.contains("Submit"));
631 assert!(compact.contains("focused"));
632 }
633}