car_browser/perception/
pipeline.rs1use async_trait::async_trait;
4use thiserror::Error;
5
6use super::ax_converter::AxConverter;
7use super::signals::SignalDetector;
8use super::ui_map::{TextBlock, UiMap};
9use crate::models::{A11yNode, Viewport};
10
11#[derive(Error, Debug)]
12pub enum PerceptionError {
13 #[error("Conversion failed: {0}")]
14 ConversionFailed(String),
15 #[error("Signal detection failed: {0}")]
16 SignalDetectionFailed(String),
17}
18
19#[async_trait]
21pub trait PerceptionPipeline: Send + Sync {
22 async fn perceive(
23 &self,
24 screenshot: &[u8],
25 a11y_nodes: &[A11yNode],
26 url: &str,
27 viewport: Viewport,
28 ) -> Result<UiMap, PerceptionError>;
29}
30
31pub struct BasicPerceptionPipeline {
33 converter: AxConverter,
34 signal_detector: SignalDetector,
35}
36
37impl BasicPerceptionPipeline {
38 pub fn new() -> Self {
39 Self {
40 converter: AxConverter::new(),
41 signal_detector: SignalDetector::new(),
42 }
43 }
44}
45
46impl Default for BasicPerceptionPipeline {
47 fn default() -> Self {
48 Self::new()
49 }
50}
51
52#[async_trait]
53impl PerceptionPipeline for BasicPerceptionPipeline {
54 async fn perceive(
55 &self,
56 _screenshot: &[u8],
57 a11y_nodes: &[A11yNode],
58 url: &str,
59 viewport: Viewport,
60 ) -> Result<UiMap, PerceptionError> {
61 let elements = self.converter.convert(a11y_nodes);
62 let text_blocks = self.extract_text_blocks(a11y_nodes);
63 let page_signals = self.signal_detector.detect(a11y_nodes);
64
65 Ok(UiMap::new(
66 url.to_string(),
67 elements,
68 text_blocks,
69 page_signals,
70 viewport,
71 String::new(),
72 ))
73 }
74}
75
76impl BasicPerceptionPipeline {
77 fn extract_text_blocks(&self, nodes: &[A11yNode]) -> Vec<TextBlock> {
78 nodes
79 .iter()
80 .filter(|n| {
81 let role = n.role.to_lowercase();
82 matches!(role.as_str(), "statictext" | "label" | "heading")
83 && n.name.is_some()
84 && n.bounds.width > 0.0
85 && n.bounds.height > 0.0
86 })
87 .map(|n| TextBlock::from_ax(n.name.clone().unwrap_or_default(), n.bounds))
88 .collect()
89 }
90}
91
92#[cfg(test)]
93mod tests {
94 use super::*;
95 use crate::models::Bounds;
96
97 #[tokio::test]
98 async fn test_basic_pipeline() {
99 let pipeline = BasicPerceptionPipeline::new();
100 let nodes = vec![
101 A11yNode {
102 node_id: "n0".to_string(),
103 role: "button".to_string(),
104 name: Some("Submit".to_string()),
105 value: None,
106 bounds: Bounds::new(100.0, 100.0, 80.0, 30.0),
107 children: vec![],
108 focusable: true,
109 focused: false,
110 disabled: false,
111 },
112 A11yNode {
113 node_id: "n1".to_string(),
114 role: "statictext".to_string(),
115 name: Some("Welcome".to_string()),
116 value: None,
117 bounds: Bounds::new(0.0, 0.0, 200.0, 20.0),
118 children: vec![],
119 focusable: false,
120 focused: false,
121 disabled: false,
122 },
123 ];
124 let viewport = Viewport {
125 width: 1280,
126 height: 720,
127 device_pixel_ratio: 2.0,
128 };
129
130 let ui_map = pipeline
131 .perceive(&[], &nodes, "https://example.com", viewport)
132 .await
133 .unwrap();
134 assert_eq!(ui_map.elements.len(), 2);
135 assert_eq!(ui_map.text_blocks.len(), 1);
136 assert_eq!(ui_map.text_blocks[0].text, "Welcome");
137 }
138}