car_browser/perception/
pipeline.rs1use async_trait::async_trait;
4use thiserror::Error;
5
6use crate::models::{A11yNode, Viewport};
7use super::ax_converter::AxConverter;
8use super::signals::SignalDetector;
9use super::ui_map::{TextBlock, UiMap};
10
11#[derive(Error, Debug)]
12pub enum PerceptionError {
13 #[error("Conversion failed: {0}")]
14 ConversionFailed(String),
15 #[error("Signal detection failed: {0}")]
16 SignalDetectionFailed(String),
17}
18
19#[async_trait]
21pub trait PerceptionPipeline: Send + Sync {
22 async fn perceive(
23 &self,
24 screenshot: &[u8],
25 a11y_nodes: &[A11yNode],
26 url: &str,
27 viewport: Viewport,
28 ) -> Result<UiMap, PerceptionError>;
29}
30
31pub struct BasicPerceptionPipeline {
33 converter: AxConverter,
34 signal_detector: SignalDetector,
35}
36
37impl BasicPerceptionPipeline {
38 pub fn new() -> Self {
39 Self {
40 converter: AxConverter::new(),
41 signal_detector: SignalDetector::new(),
42 }
43 }
44}
45
46impl Default for BasicPerceptionPipeline {
47 fn default() -> Self {
48 Self::new()
49 }
50}
51
52#[async_trait]
53impl PerceptionPipeline for BasicPerceptionPipeline {
54 async fn perceive(
55 &self,
56 _screenshot: &[u8],
57 a11y_nodes: &[A11yNode],
58 url: &str,
59 viewport: Viewport,
60 ) -> Result<UiMap, PerceptionError> {
61 let elements = self.converter.convert(a11y_nodes);
62 let text_blocks = self.extract_text_blocks(a11y_nodes);
63 let page_signals = self.signal_detector.detect(a11y_nodes);
64
65 Ok(UiMap::new(
66 url.to_string(),
67 elements,
68 text_blocks,
69 page_signals,
70 viewport,
71 String::new(),
72 ))
73 }
74}
75
76impl BasicPerceptionPipeline {
77 fn extract_text_blocks(&self, nodes: &[A11yNode]) -> Vec<TextBlock> {
78 nodes
79 .iter()
80 .filter(|n| {
81 let role = n.role.to_lowercase();
82 matches!(role.as_str(), "statictext" | "label" | "heading")
83 && n.name.is_some()
84 && n.bounds.width > 0.0
85 && n.bounds.height > 0.0
86 })
87 .map(|n| {
88 TextBlock::from_ax(
89 n.name.clone().unwrap_or_default(),
90 n.bounds,
91 )
92 })
93 .collect()
94 }
95}
96
97#[cfg(test)]
98mod tests {
99 use super::*;
100 use crate::models::Bounds;
101
102 #[tokio::test]
103 async fn test_basic_pipeline() {
104 let pipeline = BasicPerceptionPipeline::new();
105 let nodes = vec![
106 A11yNode {
107 node_id: "n0".to_string(),
108 role: "button".to_string(),
109 name: Some("Submit".to_string()),
110 value: None,
111 bounds: Bounds::new(100.0, 100.0, 80.0, 30.0),
112 children: vec![],
113 focusable: true,
114 focused: false,
115 disabled: false,
116 },
117 A11yNode {
118 node_id: "n1".to_string(),
119 role: "statictext".to_string(),
120 name: Some("Welcome".to_string()),
121 value: None,
122 bounds: Bounds::new(0.0, 0.0, 200.0, 20.0),
123 children: vec![],
124 focusable: false,
125 focused: false,
126 disabled: false,
127 },
128 ];
129 let viewport = Viewport { width: 1280, height: 720, device_pixel_ratio: 2.0 };
130
131 let ui_map = pipeline.perceive(&[], &nodes, "https://example.com", viewport).await.unwrap();
132 assert_eq!(ui_map.elements.len(), 2);
133 assert_eq!(ui_map.text_blocks.len(), 1);
134 assert_eq!(ui_map.text_blocks[0].text, "Welcome");
135 }
136}