adk_browser/tools/
extract.rs

1//! Extract tool for getting content from the page.
2
3use crate::session::BrowserSession;
4use adk_core::{Result, Tool, ToolContext};
5use async_trait::async_trait;
6use serde_json::{json, Value};
7use std::sync::Arc;
8
9/// Tool for extracting text content from elements.
10pub struct ExtractTextTool {
11    browser: Arc<BrowserSession>,
12}
13
14impl ExtractTextTool {
15    /// Create a new extract text tool with a shared browser session.
16    pub fn new(browser: Arc<BrowserSession>) -> Self {
17        Self { browser }
18    }
19}
20
21#[async_trait]
22impl Tool for ExtractTextTool {
23    fn name(&self) -> &str {
24        "browser_extract_text"
25    }
26
27    fn description(&self) -> &str {
28        "Extract text content from one or more elements on the page."
29    }
30
31    fn parameters_schema(&self) -> Option<Value> {
32        Some(json!({
33            "type": "object",
34            "properties": {
35                "selector": {
36                    "type": "string",
37                    "description": "CSS selector for the element(s) to extract text from"
38                },
39                "all": {
40                    "type": "boolean",
41                    "description": "If true, extract from all matching elements. If false, only first match (default: false)"
42                }
43            },
44            "required": ["selector"]
45        }))
46    }
47
48    fn response_schema(&self) -> Option<Value> {
49        Some(json!({
50            "type": "object",
51            "properties": {
52                "success": { "type": "boolean" },
53                "text": { "type": "string" },
54                "texts": {
55                    "type": "array",
56                    "items": { "type": "string" }
57                },
58                "count": { "type": "integer" }
59            }
60        }))
61    }
62
63    async fn execute(&self, _ctx: Arc<dyn ToolContext>, args: Value) -> Result<Value> {
64        let selector = args
65            .get("selector")
66            .and_then(|v| v.as_str())
67            .ok_or_else(|| adk_core::AdkError::Tool("Missing 'selector' parameter".to_string()))?;
68
69        let all = args.get("all").and_then(|v| v.as_bool()).unwrap_or(false);
70
71        if all {
72            let elements = self.browser.find_elements(selector).await?;
73            let mut texts = Vec::new();
74
75            for element in elements {
76                if let Ok(text) = element.text().await {
77                    texts.push(text);
78                }
79            }
80
81            Ok(json!({
82                "success": true,
83                "texts": texts,
84                "count": texts.len()
85            }))
86        } else {
87            let text = self.browser.get_text(selector).await?;
88
89            Ok(json!({
90                "success": true,
91                "text": text
92            }))
93        }
94    }
95}
96
97/// Tool for extracting attribute values.
98pub struct ExtractAttributeTool {
99    browser: Arc<BrowserSession>,
100}
101
102impl ExtractAttributeTool {
103    pub fn new(browser: Arc<BrowserSession>) -> Self {
104        Self { browser }
105    }
106}
107
108#[async_trait]
109impl Tool for ExtractAttributeTool {
110    fn name(&self) -> &str {
111        "browser_extract_attribute"
112    }
113
114    fn description(&self) -> &str {
115        "Extract an attribute value from an element (e.g., href, src, value)."
116    }
117
118    fn parameters_schema(&self) -> Option<Value> {
119        Some(json!({
120            "type": "object",
121            "properties": {
122                "selector": {
123                    "type": "string",
124                    "description": "CSS selector for the element"
125                },
126                "attribute": {
127                    "type": "string",
128                    "description": "Name of the attribute to extract (e.g., 'href', 'src', 'value', 'class')"
129                }
130            },
131            "required": ["selector", "attribute"]
132        }))
133    }
134
135    async fn execute(&self, _ctx: Arc<dyn ToolContext>, args: Value) -> Result<Value> {
136        let selector = args
137            .get("selector")
138            .and_then(|v| v.as_str())
139            .ok_or_else(|| adk_core::AdkError::Tool("Missing 'selector' parameter".to_string()))?;
140
141        let attribute = args
142            .get("attribute")
143            .and_then(|v| v.as_str())
144            .ok_or_else(|| adk_core::AdkError::Tool("Missing 'attribute' parameter".to_string()))?;
145
146        let value = self.browser.get_attribute(selector, attribute).await?;
147
148        Ok(json!({
149            "success": true,
150            "attribute": attribute,
151            "value": value
152        }))
153    }
154}
155
156/// Tool for extracting links from the page.
157pub struct ExtractLinksTool {
158    browser: Arc<BrowserSession>,
159}
160
161impl ExtractLinksTool {
162    pub fn new(browser: Arc<BrowserSession>) -> Self {
163        Self { browser }
164    }
165}
166
167#[async_trait]
168impl Tool for ExtractLinksTool {
169    fn name(&self) -> &str {
170        "browser_extract_links"
171    }
172
173    fn description(&self) -> &str {
174        "Extract all links from the page or a specific container."
175    }
176
177    fn parameters_schema(&self) -> Option<Value> {
178        Some(json!({
179            "type": "object",
180            "properties": {
181                "selector": {
182                    "type": "string",
183                    "description": "Optional CSS selector to limit link extraction to a container"
184                },
185                "include_text": {
186                    "type": "boolean",
187                    "description": "Include link text in results (default: true)"
188                }
189            }
190        }))
191    }
192
193    async fn execute(&self, _ctx: Arc<dyn ToolContext>, args: Value) -> Result<Value> {
194        let container = args.get("selector").and_then(|v| v.as_str());
195        let include_text = args.get("include_text").and_then(|v| v.as_bool()).unwrap_or(true);
196
197        let link_selector = if let Some(sel) = container {
198            format!("{} a[href]", sel)
199        } else {
200            "a[href]".to_string()
201        };
202
203        let elements = self.browser.find_elements(&link_selector).await?;
204        let mut links = Vec::new();
205
206        for element in elements {
207            let href = element.attr("href").await.ok().flatten();
208            let text = if include_text { element.text().await.ok() } else { None };
209
210            if let Some(href) = href {
211                links.push(json!({
212                    "href": href,
213                    "text": text
214                }));
215            }
216        }
217
218        Ok(json!({
219            "success": true,
220            "links": links,
221            "count": links.len()
222        }))
223    }
224}
225
226/// Tool for getting page info (title, URL, etc.).
227pub struct PageInfoTool {
228    browser: Arc<BrowserSession>,
229}
230
231impl PageInfoTool {
232    pub fn new(browser: Arc<BrowserSession>) -> Self {
233        Self { browser }
234    }
235}
236
237#[async_trait]
238impl Tool for PageInfoTool {
239    fn name(&self) -> &str {
240        "browser_page_info"
241    }
242
243    fn description(&self) -> &str {
244        "Get information about the current page (title, URL, etc.)."
245    }
246
247    fn parameters_schema(&self) -> Option<Value> {
248        Some(json!({
249            "type": "object",
250            "properties": {}
251        }))
252    }
253
254    async fn execute(&self, _ctx: Arc<dyn ToolContext>, _args: Value) -> Result<Value> {
255        let url = self.browser.current_url().await?;
256        let title = self.browser.title().await?;
257
258        Ok(json!({
259            "success": true,
260            "url": url,
261            "title": title
262        }))
263    }
264}
265
266/// Tool for getting the page HTML source.
267pub struct PageSourceTool {
268    browser: Arc<BrowserSession>,
269}
270
271impl PageSourceTool {
272    pub fn new(browser: Arc<BrowserSession>) -> Self {
273        Self { browser }
274    }
275}
276
277#[async_trait]
278impl Tool for PageSourceTool {
279    fn name(&self) -> &str {
280        "browser_page_source"
281    }
282
283    fn description(&self) -> &str {
284        "Get the HTML source of the current page. Warning: may be large."
285    }
286
287    fn parameters_schema(&self) -> Option<Value> {
288        Some(json!({
289            "type": "object",
290            "properties": {
291                "max_length": {
292                    "type": "integer",
293                    "description": "Maximum characters to return (default: 50000)"
294                }
295            }
296        }))
297    }
298
299    async fn execute(&self, _ctx: Arc<dyn ToolContext>, args: Value) -> Result<Value> {
300        let max_length = args.get("max_length").and_then(|v| v.as_u64()).unwrap_or(50000) as usize;
301
302        let source = self.browser.page_source().await?;
303        let truncated = source.len() > max_length;
304        let html =
305            if truncated { source.chars().take(max_length).collect::<String>() } else { source };
306
307        Ok(json!({
308            "success": true,
309            "html": html,
310            "truncated": truncated,
311            "total_length": html.len()
312        }))
313    }
314}