use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use sha2::{Digest, Sha256};
use crate::models::{Bounds, Viewport};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct UiMap {
pub id: String,
pub timestamp: DateTime<Utc>,
pub url: String,
pub elements: Vec<UiElement>,
pub text_blocks: Vec<TextBlock>,
pub page_signals: PageSignals,
pub viewport: Viewport,
pub content_hash: String,
pub screenshot_path: String,
}
impl UiMap {
pub fn new(
url: String,
elements: Vec<UiElement>,
text_blocks: Vec<TextBlock>,
page_signals: PageSignals,
viewport: Viewport,
screenshot_path: String,
) -> Self {
let id = uuid::Uuid::new_v4().to_string();
let timestamp = Utc::now();
let mut map = Self {
id,
timestamp,
url,
elements,
text_blocks,
page_signals,
viewport,
content_hash: String::new(),
screenshot_path,
};
map.content_hash = map.compute_content_hash();
map
}
pub fn compute_content_hash(&self) -> String {
let mut hasher = Sha256::new();
for element in &self.elements {
hasher.update(element.id.as_bytes());
hasher.update(element.role.to_hash_string().as_bytes());
if let Some(name) = &element.name {
hasher.update(name.as_bytes());
}
hasher.update(element.states.to_hash_string().as_bytes());
}
for block in &self.text_blocks {
hasher.update(block.text.as_bytes());
}
hasher.update(self.page_signals.to_hash_string().as_bytes());
hex::encode(hasher.finalize())
}
pub fn get_element(&self, element_id: &str) -> Option<&UiElement> {
self.elements.iter().find(|e| e.id == element_id)
}
pub fn get_elements_by_role(&self, role: UiRole) -> Vec<&UiElement> {
self.elements
.iter()
.filter(|e| std::mem::discriminant(&e.role) == std::mem::discriminant(&role))
.collect()
}
pub fn interactive_elements(&self) -> Vec<&UiElement> {
self.elements
.iter()
.filter(|e| e.role.is_interactable() && e.is_interactable())
.collect()
}
pub fn estimate_tokens(&self, interactive_only: bool) -> usize {
let count = if interactive_only {
self.interactive_elements().len()
} else {
self.elements.len()
};
count * 20 + 30
}
pub fn average_confidence(&self) -> f32 {
if self.elements.is_empty() {
return 0.0;
}
let sum: f32 = self.elements.iter().map(|e| e.confidence).sum();
sum / self.elements.len() as f32
}
pub fn format_summary(&self) -> String {
use std::fmt::Write;
let mut output = String::new();
if self.page_signals.has_blocking_element() {
if self.page_signals.modal_present {
let _ = writeln!(output, "⚠ Modal dialog present");
}
if self.page_signals.cookie_banner {
let _ = writeln!(output, "⚠ Cookie banner present");
}
}
if self.page_signals.loading_indicator {
let _ = writeln!(output, "⏳ Page loading...");
}
let _ = writeln!(output, "\n## Visible Text");
let mut seen_texts: std::collections::HashSet<String> = std::collections::HashSet::new();
for el in &self.elements {
if let Some(ref name) = el.name {
let text = name.trim().to_string();
if !text.is_empty() && text.len() > 1 && seen_texts.insert(text.clone()) {
let role = el.role.to_hash_string();
let truncated = if text.len() > 80 {
let end = text.floor_char_boundary(77);
format!("{}...", &text[..end])
} else {
text
};
let _ = writeln!(output, " ({}) {}", role, truncated);
}
}
}
let _ = writeln!(output, "\n## Interactive Elements");
let interactive = self.interactive_elements();
for (i, el) in interactive.iter().enumerate().take(50) {
let role_str = el.role.to_hash_string();
let name_str = el.name.as_deref()
.map(|n| {
if n.len() > 40 {
let end = n.floor_char_boundary(37);
format!(" \"{}...\"", &n[..end])
} else {
format!(" \"{}\"", n)
}
})
.unwrap_or_default();
let mut state_parts = Vec::new();
if element_states_for_summary(&el.states, &mut state_parts) {
let _ = writeln!(output, "[{}] {}{} {}", el.id, role_str, name_str, state_parts.join(" "));
} else {
let _ = writeln!(output, "[{}] {}{}", el.id, role_str, name_str);
}
let _ = i; }
if interactive.len() > 50 {
let _ = writeln!(output, " ... and {} more interactive elements", interactive.len() - 50);
}
output
}
pub fn format_compact(&self) -> String {
use std::fmt::Write;
let mut output = String::new();
if self.page_signals.has_blocking_element() {
if self.page_signals.modal_present {
let _ = writeln!(output, "⚠ Modal dialog present");
}
if self.page_signals.cookie_banner {
let _ = writeln!(output, "⚠ Cookie banner present");
}
}
if self.page_signals.loading_indicator {
let _ = writeln!(output, "⏳ Page loading...");
}
let elements: Vec<&UiElement> = if self.elements.len() > 40 {
self.interactive_elements()
} else {
self.elements.iter().collect()
};
for element in &elements {
let role_str = element.role.to_hash_string();
let name_str = element
.name
.as_deref()
.map(|n| {
if n.len() > 50 {
let truncated: String = n.chars().take(47).collect();
format!(" \"{}...\"", truncated)
} else {
format!(" \"{}\"", n)
}
})
.unwrap_or_default();
let (cx, cy) = element.bounds.center();
let pos_str = format!(" ({:.0},{:.0})", cx, cy);
let mut state_parts = Vec::new();
if element.states.focused {
state_parts.push("focused");
}
if !element.states.enabled {
state_parts.push("disabled");
}
if element.states.checked == Some(true) {
state_parts.push("checked");
}
if element.states.expanded == Some(true) {
state_parts.push("expanded");
}
let state_str = if state_parts.is_empty() {
String::new()
} else {
format!(" {}", state_parts.join(" "))
};
let _ = writeln!(
output,
"[{}] {}{}{}{}",
element.id, role_str, name_str, pos_str, state_str
);
}
output
}
}
fn element_states_for_summary(states: &UiState, parts: &mut Vec<&'static str>) -> bool {
if states.focused { parts.push("focused"); }
if !states.enabled { parts.push("disabled"); }
if states.checked == Some(true) { parts.push("checked"); }
if states.expanded == Some(true) { parts.push("expanded"); }
!parts.is_empty()
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct UiElement {
pub id: String,
pub role: UiRole,
pub name: Option<String>,
pub value: Option<String>,
pub bounds: Bounds,
pub states: UiState,
pub confidence: f32,
pub source: ElementSource,
pub icon_type: Option<IconType>,
pub children: Vec<String>,
pub ax_ref: Option<String>,
}
impl UiElement {
pub fn is_interactable(&self) -> bool {
self.states.enabled && self.bounds.width > 0.0 && self.bounds.height > 0.0
}
pub fn center(&self) -> (f64, f64) {
self.bounds.center()
}
pub fn accepts_text(&self) -> bool {
matches!(self.role, UiRole::TextInput)
}
pub fn is_clickable(&self) -> bool {
matches!(self.role, UiRole::Button | UiRole::Link)
}
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum UiRole {
Button,
Link,
TextInput,
Checkbox,
Radio,
Dropdown,
Menu,
MenuItem,
Tab,
Dialog,
Image,
Text,
Container,
List,
ListItem,
Table,
TableRow,
TableCell,
Toolbar,
Other(String),
}
impl UiRole {
pub fn from_ax_role(ax_role: &str) -> Self {
match ax_role.to_lowercase().as_str() {
"button" | "pushbutton" => UiRole::Button,
"link" | "weblink" => UiRole::Link,
"textbox" | "textfield" | "textarea" | "combobox" | "searchfield" => UiRole::TextInput,
"checkbox" => UiRole::Checkbox,
"radio" | "radiobutton" => UiRole::Radio,
"select" | "listbox" | "popupbutton" => UiRole::Dropdown,
"menu" | "menubar" => UiRole::Menu,
"menuitem" | "menuitemcheckbox" | "menuitemradio" => UiRole::MenuItem,
"tab" | "tabitem" => UiRole::Tab,
"dialog" | "alertdialog" | "sheet" => UiRole::Dialog,
"image" | "img" => UiRole::Image,
"statictext" | "label" | "heading" => UiRole::Text,
"group" | "generic" | "section" | "div" | "webarea" => UiRole::Container,
"list" => UiRole::List,
"listitem" => UiRole::ListItem,
"table" | "grid" => UiRole::Table,
"row" | "tablerow" => UiRole::TableRow,
"cell" | "tablecell" | "gridcell" => UiRole::TableCell,
"toolbar" => UiRole::Toolbar,
other => UiRole::Other(other.to_string()),
}
}
pub fn is_interactable(&self) -> bool {
matches!(
self,
UiRole::Button
| UiRole::Link
| UiRole::TextInput
| UiRole::Checkbox
| UiRole::Radio
| UiRole::Dropdown
| UiRole::MenuItem
| UiRole::Tab
)
}
pub fn to_hash_string(&self) -> String {
match self {
UiRole::Button => "button".to_string(),
UiRole::Link => "link".to_string(),
UiRole::TextInput => "text_input".to_string(),
UiRole::Checkbox => "checkbox".to_string(),
UiRole::Radio => "radio".to_string(),
UiRole::Dropdown => "dropdown".to_string(),
UiRole::Menu => "menu".to_string(),
UiRole::MenuItem => "menu_item".to_string(),
UiRole::Tab => "tab".to_string(),
UiRole::Dialog => "dialog".to_string(),
UiRole::Image => "image".to_string(),
UiRole::Text => "text".to_string(),
UiRole::Container => "container".to_string(),
UiRole::List => "list".to_string(),
UiRole::ListItem => "list_item".to_string(),
UiRole::Table => "table".to_string(),
UiRole::TableRow => "table_row".to_string(),
UiRole::TableCell => "table_cell".to_string(),
UiRole::Toolbar => "toolbar".to_string(),
UiRole::Other(s) => format!("other:{}", s),
}
}
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct UiState {
pub enabled: bool,
pub focused: bool,
pub selected: bool,
pub checked: Option<bool>,
pub expanded: Option<bool>,
pub readonly: bool,
pub required: bool,
}
impl UiState {
pub fn enabled() -> Self {
Self {
enabled: true,
..Default::default()
}
}
pub fn disabled() -> Self {
Self {
enabled: false,
..Default::default()
}
}
pub fn from_ax_states(
disabled: bool,
focused: bool,
selected: Option<bool>,
checked: Option<bool>,
expanded: Option<bool>,
) -> Self {
Self {
enabled: !disabled,
focused,
selected: selected.unwrap_or(false),
checked,
expanded,
readonly: false,
required: false,
}
}
pub fn to_hash_string(&self) -> String {
fn opt_bool_str(opt: Option<bool>) -> &'static str {
match opt {
None => "none",
Some(true) => "true",
Some(false) => "false",
}
}
format!(
"en:{},fo:{},se:{},ch:{},ex:{},ro:{},rq:{}",
self.enabled,
self.focused,
self.selected,
opt_bool_str(self.checked),
opt_bool_str(self.expanded),
self.readonly,
self.required
)
}
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[serde(rename_all = "snake_case")]
pub enum ElementSource {
AccessibilityTree,
VisualDetector,
Ocr,
Merged { sources: Vec<ElementSource> },
}
impl ElementSource {
pub fn base_confidence(&self) -> f32 {
match self {
ElementSource::AccessibilityTree => 0.90,
ElementSource::VisualDetector => 0.75,
ElementSource::Ocr => 0.70,
ElementSource::Merged { .. } => 0.98,
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum IconType {
Close, Menu, Search, Back, Forward, Refresh, Settings, Share,
Download, Upload, Edit, Delete, Add, Remove, Expand, Collapse,
Play, Pause, Stop, Mute, Unmute, Fullscreen, ExitFullscreen,
Info, Help, Warning, Error, Success, Unknown,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TextBlock {
pub text: String,
pub bounds: Bounds,
pub source: TextSource,
pub confidence: f32,
}
impl TextBlock {
pub fn from_ax(text: String, bounds: Bounds) -> Self {
Self {
text,
bounds,
source: TextSource::AccessibilityTree,
confidence: 1.0,
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum TextSource {
AccessibilityTree,
Ocr,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct PageSignals {
pub modal_present: bool,
pub cookie_banner: bool,
pub error_banner: bool,
pub loading_indicator: bool,
pub scroll_position: f32,
pub page_type_hint: Option<String>,
}
impl PageSignals {
pub fn has_blocking_element(&self) -> bool {
self.modal_present || self.cookie_banner
}
pub fn needs_special_handling(&self) -> bool {
matches!(
self.page_type_hint.as_deref(),
Some("login") | Some("checkout") | Some("payment")
)
}
pub fn to_hash_string(&self) -> String {
format!(
"mo:{},co:{},er:{},lo:{},sc:{:.2},ty:{}",
self.modal_present,
self.cookie_banner,
self.error_banner,
self.loading_indicator,
self.scroll_position,
self.page_type_hint.as_deref().unwrap_or("none")
)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_ui_role_from_ax() {
assert_eq!(UiRole::from_ax_role("button"), UiRole::Button);
assert_eq!(UiRole::from_ax_role("textfield"), UiRole::TextInput);
assert!(matches!(UiRole::from_ax_role("custom"), UiRole::Other(_)));
}
#[test]
fn test_format_compact() {
let viewport = Viewport { width: 1280, height: 720, device_pixel_ratio: 2.0 };
let map = UiMap::new(
"https://example.com".to_string(),
vec![UiElement {
id: "el_0".to_string(),
role: UiRole::Button,
name: Some("Submit".to_string()),
value: None,
bounds: Bounds::new(100.0, 100.0, 80.0, 30.0),
states: UiState { focused: true, ..UiState::enabled() },
confidence: 0.95,
source: ElementSource::AccessibilityTree,
icon_type: None,
children: vec![],
ax_ref: None,
}],
vec![],
PageSignals::default(),
viewport,
String::new(),
);
let compact = map.format_compact();
assert!(compact.contains("[el_0]"));
assert!(compact.contains("button"));
assert!(compact.contains("Submit"));
assert!(compact.contains("focused"));
}
}