Skip to main content

car_browser/perception/
pipeline.rs

1//! Perception pipeline trait and basic implementation.
2
3use async_trait::async_trait;
4use thiserror::Error;
5
6use super::ax_converter::AxConverter;
7use super::signals::SignalDetector;
8use super::ui_map::{TextBlock, UiMap};
9use crate::models::{A11yNode, Viewport};
10
11#[derive(Error, Debug)]
12pub enum PerceptionError {
13    #[error("Conversion failed: {0}")]
14    ConversionFailed(String),
15    #[error("Signal detection failed: {0}")]
16    SignalDetectionFailed(String),
17}
18
19/// Trait for perception pipelines that convert raw browser state to UiMap.
20#[async_trait]
21pub trait PerceptionPipeline: Send + Sync {
22    async fn perceive(
23        &self,
24        screenshot: &[u8],
25        a11y_nodes: &[A11yNode],
26        url: &str,
27        viewport: Viewport,
28    ) -> Result<UiMap, PerceptionError>;
29}
30
31/// Basic perception pipeline using accessibility tree only (no visual analysis).
32pub struct BasicPerceptionPipeline {
33    converter: AxConverter,
34    signal_detector: SignalDetector,
35}
36
37impl BasicPerceptionPipeline {
38    pub fn new() -> Self {
39        Self {
40            converter: AxConverter::new(),
41            signal_detector: SignalDetector::new(),
42        }
43    }
44}
45
46impl Default for BasicPerceptionPipeline {
47    fn default() -> Self {
48        Self::new()
49    }
50}
51
52#[async_trait]
53impl PerceptionPipeline for BasicPerceptionPipeline {
54    async fn perceive(
55        &self,
56        _screenshot: &[u8],
57        a11y_nodes: &[A11yNode],
58        url: &str,
59        viewport: Viewport,
60    ) -> Result<UiMap, PerceptionError> {
61        let elements = self.converter.convert(a11y_nodes);
62        let text_blocks = self.extract_text_blocks(a11y_nodes);
63        let page_signals = self.signal_detector.detect(a11y_nodes);
64
65        Ok(UiMap::new(
66            url.to_string(),
67            elements,
68            text_blocks,
69            page_signals,
70            viewport,
71            String::new(),
72        ))
73    }
74}
75
76impl BasicPerceptionPipeline {
77    fn extract_text_blocks(&self, nodes: &[A11yNode]) -> Vec<TextBlock> {
78        nodes
79            .iter()
80            .filter(|n| {
81                let role = n.role.to_lowercase();
82                matches!(role.as_str(), "statictext" | "label" | "heading")
83                    && n.name.is_some()
84                    && n.bounds.width > 0.0
85                    && n.bounds.height > 0.0
86            })
87            .map(|n| TextBlock::from_ax(n.name.clone().unwrap_or_default(), n.bounds))
88            .collect()
89    }
90}
91
92#[cfg(test)]
93mod tests {
94    use super::*;
95    use crate::models::Bounds;
96
97    #[tokio::test]
98    async fn test_basic_pipeline() {
99        let pipeline = BasicPerceptionPipeline::new();
100        let nodes = vec![
101            A11yNode {
102                node_id: "n0".to_string(),
103                role: "button".to_string(),
104                name: Some("Submit".to_string()),
105                value: None,
106                bounds: Bounds::new(100.0, 100.0, 80.0, 30.0),
107                children: vec![],
108                focusable: true,
109                focused: false,
110                disabled: false,
111            },
112            A11yNode {
113                node_id: "n1".to_string(),
114                role: "statictext".to_string(),
115                name: Some("Welcome".to_string()),
116                value: None,
117                bounds: Bounds::new(0.0, 0.0, 200.0, 20.0),
118                children: vec![],
119                focusable: false,
120                focused: false,
121                disabled: false,
122            },
123        ];
124        let viewport = Viewport {
125            width: 1280,
126            height: 720,
127            device_pixel_ratio: 2.0,
128        };
129
130        let ui_map = pipeline
131            .perceive(&[], &nodes, "https://example.com", viewport)
132            .await
133            .unwrap();
134        assert_eq!(ui_map.elements.len(), 2);
135        assert_eq!(ui_map.text_blocks.len(), 1);
136        assert_eq!(ui_map.text_blocks[0].text, "Welcome");
137    }
138}