Skip to main content

car_browser/perception/
pipeline.rs

1//! Perception pipeline trait and basic implementation.
2
3use async_trait::async_trait;
4use thiserror::Error;
5
6use crate::models::{A11yNode, Viewport};
7use super::ax_converter::AxConverter;
8use super::signals::SignalDetector;
9use super::ui_map::{TextBlock, UiMap};
10
11#[derive(Error, Debug)]
12pub enum PerceptionError {
13    #[error("Conversion failed: {0}")]
14    ConversionFailed(String),
15    #[error("Signal detection failed: {0}")]
16    SignalDetectionFailed(String),
17}
18
19/// Trait for perception pipelines that convert raw browser state to UiMap.
20#[async_trait]
21pub trait PerceptionPipeline: Send + Sync {
22    async fn perceive(
23        &self,
24        screenshot: &[u8],
25        a11y_nodes: &[A11yNode],
26        url: &str,
27        viewport: Viewport,
28    ) -> Result<UiMap, PerceptionError>;
29}
30
31/// Basic perception pipeline using accessibility tree only (no visual analysis).
32pub struct BasicPerceptionPipeline {
33    converter: AxConverter,
34    signal_detector: SignalDetector,
35}
36
37impl BasicPerceptionPipeline {
38    pub fn new() -> Self {
39        Self {
40            converter: AxConverter::new(),
41            signal_detector: SignalDetector::new(),
42        }
43    }
44}
45
46impl Default for BasicPerceptionPipeline {
47    fn default() -> Self {
48        Self::new()
49    }
50}
51
52#[async_trait]
53impl PerceptionPipeline for BasicPerceptionPipeline {
54    async fn perceive(
55        &self,
56        _screenshot: &[u8],
57        a11y_nodes: &[A11yNode],
58        url: &str,
59        viewport: Viewport,
60    ) -> Result<UiMap, PerceptionError> {
61        let elements = self.converter.convert(a11y_nodes);
62        let text_blocks = self.extract_text_blocks(a11y_nodes);
63        let page_signals = self.signal_detector.detect(a11y_nodes);
64
65        Ok(UiMap::new(
66            url.to_string(),
67            elements,
68            text_blocks,
69            page_signals,
70            viewport,
71            String::new(),
72        ))
73    }
74}
75
76impl BasicPerceptionPipeline {
77    fn extract_text_blocks(&self, nodes: &[A11yNode]) -> Vec<TextBlock> {
78        nodes
79            .iter()
80            .filter(|n| {
81                let role = n.role.to_lowercase();
82                matches!(role.as_str(), "statictext" | "label" | "heading")
83                    && n.name.is_some()
84                    && n.bounds.width > 0.0
85                    && n.bounds.height > 0.0
86            })
87            .map(|n| {
88                TextBlock::from_ax(
89                    n.name.clone().unwrap_or_default(),
90                    n.bounds,
91                )
92            })
93            .collect()
94    }
95}
96
97#[cfg(test)]
98mod tests {
99    use super::*;
100    use crate::models::Bounds;
101
102    #[tokio::test]
103    async fn test_basic_pipeline() {
104        let pipeline = BasicPerceptionPipeline::new();
105        let nodes = vec![
106            A11yNode {
107                node_id: "n0".to_string(),
108                role: "button".to_string(),
109                name: Some("Submit".to_string()),
110                value: None,
111                bounds: Bounds::new(100.0, 100.0, 80.0, 30.0),
112                children: vec![],
113                focusable: true,
114                focused: false,
115                disabled: false,
116            },
117            A11yNode {
118                node_id: "n1".to_string(),
119                role: "statictext".to_string(),
120                name: Some("Welcome".to_string()),
121                value: None,
122                bounds: Bounds::new(0.0, 0.0, 200.0, 20.0),
123                children: vec![],
124                focusable: false,
125                focused: false,
126                disabled: false,
127            },
128        ];
129        let viewport = Viewport { width: 1280, height: 720, device_pixel_ratio: 2.0 };
130
131        let ui_map = pipeline.perceive(&[], &nodes, "https://example.com", viewport).await.unwrap();
132        assert_eq!(ui_map.elements.len(), 2);
133        assert_eq!(ui_map.text_blocks.len(), 1);
134        assert_eq!(ui_map.text_blocks[0].text, "Welcome");
135    }
136}