use crate::intent::{SceneGraph, SceneNode};
use crate::intent_matching::{score_node, MatchContext};
#[derive(Debug, Clone, PartialEq)]
pub enum QueryType {
FindElement(String),
DescribeScene,
CheckState(String),
CountElements(String),
}
#[derive(Debug, Clone)]
pub struct SceneMatch {
pub element_role: String,
pub element_label: String,
pub element_path: Vec<String>,
pub bounds: Option<(f64, f64, f64, f64)>,
pub match_score: f64,
pub match_reason: String,
}
#[derive(Debug, Clone)]
pub struct SceneResult {
pub matches: Vec<SceneMatch>,
pub confidence: f64,
pub scene_description: Option<String>,
}
#[derive(Debug, Default)]
pub struct SceneEngine;
const FIND_THRESHOLD: f64 = 0.25;
const COUNT_THRESHOLD: f64 = 0.25;
const MAX_FIND_RESULTS: usize = 10;
impl SceneEngine {
#[must_use]
pub fn new() -> Self {
Self
}
#[must_use]
pub fn query(&self, query: &str, scene: &SceneGraph) -> SceneResult {
match parse_query_type(query) {
QueryType::DescribeScene => self.describe_scene(scene),
QueryType::FindElement(subject) => self.find_elements(&subject, scene),
QueryType::CheckState(subject) => self.check_state(&subject, scene),
QueryType::CountElements(subject) => self.count_elements(&subject, scene),
}
}
}
#[must_use]
pub fn parse_query_type(query: &str) -> QueryType {
let lower = query.trim().to_lowercase();
if is_describe_query(&lower) {
return QueryType::DescribeScene;
}
if let Some(subject) = strip_prefix(&lower, CHECK_PREFIXES) {
return QueryType::CheckState(subject.trim().to_string());
}
if let Some(subject) = strip_prefix(&lower, COUNT_PREFIXES) {
return QueryType::CountElements(subject.trim().to_string());
}
let subject = strip_prefix(&lower, FIND_PREFIXES)
.unwrap_or(lower.as_str())
.trim()
.to_string();
QueryType::FindElement(subject)
}
const DESCRIBE_KEYWORDS: &[&str] = &[
"what's on screen",
"what is on screen",
"describe the screen",
"describe the interface",
"describe the ui",
"what do you see",
"what's visible",
"what is visible",
"show me the screen",
"list elements",
"list all elements",
];
const CHECK_PREFIXES: &[&str] = &[
"is the ",
"is there a ",
"is there an ",
"are there ",
"check if ",
"check whether ",
"does the ",
];
const COUNT_PREFIXES: &[&str] = &["how many ", "count the ", "count all ", "number of "];
const FIND_PREFIXES: &[&str] = &[
"find the ",
"find a ",
"find an ",
"locate the ",
"locate a ",
"locate an ",
"where is the ",
"where is a ",
"get the ",
"click the ",
"press the ",
];
fn is_describe_query(lower: &str) -> bool {
DESCRIBE_KEYWORDS.iter().any(|&kw| lower.contains(kw))
}
fn strip_prefix<'s>(s: &'s str, prefixes: &[&str]) -> Option<&'s str> {
prefixes.iter().find_map(|&prefix| s.strip_prefix(prefix))
}
impl SceneEngine {
fn find_elements(&self, subject: &str, scene: &SceneGraph) -> SceneResult {
let ctx = MatchContext::from_query(subject);
let mut scored = score_all_nodes(scene, &ctx);
scored.retain(|(score, _)| *score >= FIND_THRESHOLD);
scored.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
scored.truncate(MAX_FIND_RESULTS);
let confidence = scored.first().map_or(0.0, |(s, _)| *s);
let matches = scored
.into_iter()
.map(|(score, node)| build_scene_match(score, node, scene))
.collect();
SceneResult {
matches,
confidence,
scene_description: None,
}
}
fn describe_scene(&self, scene: &SceneGraph) -> SceneResult {
let description = build_scene_description(scene);
SceneResult {
matches: vec![],
confidence: if scene.is_empty() { 0.0 } else { 1.0 },
scene_description: Some(description),
}
}
fn check_state(&self, subject: &str, scene: &SceneGraph) -> SceneResult {
let mut base = self.find_elements(subject, scene);
base.scene_description = build_state_description(subject, &base);
base
}
fn count_elements(&self, subject: &str, scene: &SceneGraph) -> SceneResult {
let ctx = MatchContext::from_query(subject);
let count = scene
.iter()
.filter(|node| {
let (score, _) = score_node(node, &ctx, scene);
score >= COUNT_THRESHOLD
})
.count();
let description = format!(
"Found {count} element{} matching \"{}\".",
if count == 1 { "" } else { "s" },
subject
);
SceneResult {
matches: vec![],
confidence: if count > 0 { 1.0 } else { 0.0 },
scene_description: Some(description),
}
}
}
fn score_all_nodes<'g>(scene: &'g SceneGraph, ctx: &MatchContext) -> Vec<(f64, &'g SceneNode)> {
scene
.iter()
.map(|node| {
let (score, _) = score_node(node, ctx, scene);
(score, node)
})
.collect()
}
fn build_scene_match(score: f64, node: &SceneNode, scene: &SceneGraph) -> SceneMatch {
let (_, reason) = score_node(node, &MatchContext::from_query(""), scene);
let element_label = node
.text_labels()
.first()
.copied()
.unwrap_or("")
.to_string();
let element_role = node.role.clone().unwrap_or_default();
let element_path = ancestor_roles(node, scene);
let match_reason = if reason == "enabled state only" || reason.is_empty() {
format!("score {score:.2}")
} else {
reason
};
SceneMatch {
element_role,
element_label,
element_path,
bounds: node.bounds,
match_score: score,
match_reason,
}
}
fn ancestor_roles(node: &SceneNode, scene: &SceneGraph) -> Vec<String> {
let mut path: Vec<String> = Vec::new();
let mut current_parent = node.parent;
while let Some(parent_id) = current_parent {
let Some(parent_node) = scene.get(parent_id) else {
break;
};
let role = parent_node.role.clone().unwrap_or_default();
path.push(role);
current_parent = parent_node.parent;
}
path.reverse();
path
}
fn build_scene_description(scene: &SceneGraph) -> String {
if scene.is_empty() {
return "The screen appears to be empty.".into();
}
let mut parts: Vec<String> = Vec::new();
for node in scene.iter() {
if node.depth > 1 {
continue;
}
let role = node.role.as_deref().unwrap_or("element");
let label = node
.text_labels()
.first()
.copied()
.unwrap_or("(unlabelled)");
parts.push(format!("{role} \"{label}\""));
}
if parts.is_empty() {
return "No visible top-level elements found.".into();
}
format!("On screen: {}.", parts.join(", "))
}
fn build_state_description(subject: &str, result: &SceneResult) -> Option<String> {
let present = !result.matches.is_empty() && result.confidence >= FIND_THRESHOLD;
Some(format!(
"\"{}\" is {}.",
subject,
if present { "present" } else { "not found" }
))
}
#[cfg(test)]
mod tests {
use super::*;
use crate::intent::{build_scene_from_nodes, NodeId, SceneNode};
fn btn(id: usize, title: &str) -> SceneNode {
SceneNode {
id: NodeId(id),
parent: None,
children: vec![],
role: Some("AXButton".into()),
title: Some(title.into()),
label: None,
value: None,
description: None,
identifier: None,
bounds: Some((0.0, f64::from(id as u32) * 40.0, 80.0, 30.0)),
enabled: true,
depth: 1,
}
}
fn field(id: usize, label: &str) -> SceneNode {
SceneNode {
id: NodeId(id),
parent: None,
children: vec![],
role: Some("AXTextField".into()),
title: None,
label: Some(label.into()),
value: None,
description: None,
identifier: None,
bounds: Some((100.0, f64::from(id as u32) * 40.0, 200.0, 25.0)),
enabled: true,
depth: 1,
}
}
fn window(id: usize, title: &str) -> SceneNode {
SceneNode {
id: NodeId(id),
parent: None,
children: vec![],
role: Some("AXWindow".into()),
title: Some(title.into()),
label: None,
value: None,
description: None,
identifier: None,
bounds: Some((0.0, 0.0, 800.0, 600.0)),
enabled: true,
depth: 0,
}
}
fn child_btn(id: usize, parent_id: usize, title: &str) -> SceneNode {
let mut node = btn(id, title);
node.parent = Some(NodeId(parent_id));
node.depth = 2;
node
}
fn engine() -> SceneEngine {
SceneEngine::new()
}
#[test]
fn parse_query_type_find_element_by_default() {
let qt = parse_query_type("submit button");
assert_eq!(qt, QueryType::FindElement("submit button".into()));
}
#[test]
fn parse_query_type_find_strips_find_prefix() {
let qt = parse_query_type("find the login button");
assert_eq!(qt, QueryType::FindElement("login button".into()));
}
#[test]
fn parse_query_type_describe_scene_variants() {
for query in &[
"what's on screen?",
"describe the screen",
"describe the interface",
"list elements",
] {
let qt = parse_query_type(query);
assert_eq!(qt, QueryType::DescribeScene, "failed for: {query}");
}
}
#[test]
fn parse_query_type_check_state_strips_prefix() {
let qt = parse_query_type("is the dialog open?");
assert_eq!(qt, QueryType::CheckState("dialog open?".into()));
}
#[test]
fn parse_query_type_count_elements_strips_prefix() {
let qt = parse_query_type("how many buttons are there?");
assert_eq!(qt, QueryType::CountElements("buttons are there?".into()));
}
#[test]
fn find_element_exact_label_match_returns_match() {
let scene = build_scene_from_nodes(vec![btn(0, "Submit"), btn(1, "Cancel")]);
let result = engine().query("find the submit button", &scene);
assert!(!result.matches.is_empty());
assert!(result.confidence > 0.5, "confidence={}", result.confidence);
assert_eq!(result.matches[0].element_role, "AXButton");
}
#[test]
fn find_element_fuzzy_label_match_submit_order() {
let scene = build_scene_from_nodes(vec![btn(0, "Submit Order"), btn(1, "Cancel")]);
let result = engine().query("Submit", &scene);
assert!(!result.matches.is_empty());
let top = &result.matches[0];
assert!(
top.element_label.to_lowercase().contains("submit"),
"expected label containing 'submit', got '{}'",
top.element_label
);
}
#[test]
fn find_element_role_and_label_returns_button() {
let scene = build_scene_from_nodes(vec![btn(0, "Login"), field(1, "Login")]);
let result = engine().query("login button", &scene);
assert!(!result.matches.is_empty());
assert_eq!(
result.matches[0].element_role, "AXButton",
"button should outrank text field for 'button' query"
);
}
#[test]
fn find_element_no_match_returns_empty_low_confidence() {
let scene = build_scene_from_nodes(vec![btn(0, "Foo"), btn(1, "Bar")]);
let result = engine().query("find the purple wizard hat", &scene);
assert!(
result.matches.is_empty() || result.confidence < 0.15,
"expected low confidence for unrelated query, got confidence={}",
result.confidence
);
}
#[test]
fn find_element_matches_ranked_descending() {
let scene = build_scene_from_nodes(vec![
btn(0, "Submit"),
btn(1, "Submit Form"),
btn(2, "Cancel"),
]);
let result = engine().query("submit", &scene);
let scores: Vec<f64> = result.matches.iter().map(|m| m.match_score).collect();
for window in scores.windows(2) {
assert!(
window[0] >= window[1],
"scores not sorted: {:.3} < {:.3}",
window[0],
window[1]
);
}
}
#[test]
fn find_element_match_reason_non_empty() {
let scene = build_scene_from_nodes(vec![btn(0, "OK")]);
let result = engine().query("ok", &scene);
for m in &result.matches {
assert!(!m.match_reason.is_empty(), "match_reason must not be empty");
}
}
#[test]
fn find_element_hierarchy_match_path_populated() {
let parent = window(0, "Login");
let child = child_btn(1, 0, "Confirm");
let scene = build_scene_from_nodes(vec![parent, child]);
let result = engine().query("confirm button", &scene);
let confirm_match = result.matches.iter().find(|m| m.element_label == "Confirm");
assert!(confirm_match.is_some(), "should find 'Confirm' button");
let path = &confirm_match.unwrap().element_path;
assert!(!path.is_empty(), "path should list ancestors");
assert!(
path.iter().any(|r| r == "AXWindow"),
"path should include AXWindow, got: {path:?}"
);
}
#[test]
fn find_element_bounds_propagated() {
let scene = build_scene_from_nodes(vec![btn(0, "Save")]);
let result = engine().query("save", &scene);
assert!(result.matches[0].bounds.is_some());
}
#[test]
fn describe_scene_lists_top_level_elements() {
let scene = build_scene_from_nodes(vec![btn(0, "OK"), btn(1, "Cancel")]);
let result = engine().query("what's on screen?", &scene);
let desc = result
.scene_description
.expect("description should be present");
assert!(desc.contains("OK"), "should mention 'OK'; got: {desc}");
assert!(
desc.contains("Cancel"),
"should mention 'Cancel'; got: {desc}"
);
}
#[test]
fn describe_scene_empty_screen_message() {
let result = engine().query("describe the screen", &SceneGraph::empty());
let desc = result.scene_description.expect("description present");
assert!(!desc.is_empty());
assert!(desc.to_lowercase().contains("empty"));
}
#[test]
fn describe_scene_no_matches_returned() {
let scene = build_scene_from_nodes(vec![btn(0, "OK")]);
let result = engine().query("what's on screen?", &scene);
assert!(result.matches.is_empty());
}
#[test]
fn check_state_present_element_returns_present_description() {
let scene = build_scene_from_nodes(vec![btn(0, "Close Dialog"), btn(1, "OK")]);
let result = engine().query("is the dialog open?", &scene);
let desc = result
.scene_description
.expect("check state should produce description");
assert!(desc.contains("present"), "expected 'present' in: {desc}");
}
#[test]
fn check_state_absent_element_returns_not_found() {
let scene = build_scene_from_nodes(vec![btn(0, "OK")]);
let result = engine().query("is the export wizard shown?", &scene);
let desc = result.scene_description.unwrap_or_default();
let absent = result.matches.is_empty() || desc.contains("not found");
assert!(absent, "should indicate absence; desc: {desc}");
}
#[test]
fn count_elements_returns_correct_count_description() {
let scene = build_scene_from_nodes(vec![
btn(0, "Apply Settings"),
btn(1, "Apply Changes"),
btn(2, "Apply All"),
field(3, "Username"),
]);
let result = engine().query("how many apply buttons", &scene);
let desc = result
.scene_description
.expect("count should produce description");
assert!(
!desc.contains("Found 0"),
"expected non-zero count; desc: {desc}"
);
assert!(result.confidence > 0.0);
}
#[test]
fn count_elements_zero_match_description() {
let scene = build_scene_from_nodes(vec![btn(0, "OK")]);
let result = engine().query("how many sliders are visible?", &scene);
let desc = result.scene_description.unwrap_or_default();
assert!(
desc.contains('0') || result.confidence == 0.0,
"expected zero count; desc: {desc}"
);
}
#[test]
fn all_query_types_safe_on_empty_scene() {
let e = engine();
let g = SceneGraph::empty();
let _ = e.query("find the submit button", &g);
let _ = e.query("what's on screen?", &g);
let _ = e.query("is the save button visible?", &g);
let _ = e.query("how many items are in the list?", &g);
}
}