adk_browser/lib.rs
1//! # adk-browser
2#![allow(clippy::result_large_err)]
3//!
4//! Browser automation tools for ADK agents using WebDriver (via thirtyfour).
5//!
6//! ## Overview
7//!
8//! This crate provides browser automation capabilities as ADK tools, allowing
9//! LLM agents to interact with web pages. Tools are designed to work with any
10//! LlmAgent and inherit all ADK benefits (callbacks, session management, etc.).
11//!
12//! ## Quick Start
13//!
14//! ```rust,ignore
15//! use adk_browser::{BrowserSession, BrowserConfig, BrowserToolset};
16//! use adk_agent::LlmAgentBuilder;
17//! use std::sync::Arc;
18//!
19//! async fn example() -> anyhow::Result<()> {
20//! // Create browser session
21//! let config = BrowserConfig::new()
22//! .headless(true)
23//! .viewport(1920, 1080);
24//!
25//! let browser = Arc::new(BrowserSession::new(config));
26//! browser.start().await?;
27//!
28//! // Create toolset
29//! let toolset = BrowserToolset::new(browser.clone());
30//!
31//! // Add tools to agent (example - requires model)
32//! // let agent = LlmAgentBuilder::new("browser_agent")
33//! // .model(model)
34//! // .instruction("You are a web automation assistant.")
35//! // .tools(toolset.all_tools())
36//! // .build()?;
37//!
38//! // Clean up
39//! browser.stop().await?;
40//! Ok(())
41//! }
42//! ```
43//!
44//! ## Available Tools
45//!
46//! ### Navigation
47//! - `browser_navigate` - Navigate to a URL
48//! - `browser_back` - Go back in history
49//! - `browser_forward` - Go forward in history
50//! - `browser_refresh` - Refresh the page
51//!
52//! ### Interaction
53//! - `browser_click` - Click on an element
54//! - `browser_double_click` - Double-click an element
55//! - `browser_type` - Type text into an input
56//! - `browser_clear` - Clear an input field
57//! - `browser_select` - Select from a dropdown
58//!
59//! ### Extraction
60//! - `browser_extract_text` - Get text from elements
61//! - `browser_extract_attribute` - Get attribute values
62//! - `browser_extract_links` - Get all links on page
63//! - `browser_page_info` - Get current URL and title
64//! - `browser_page_source` - Get HTML source
65//!
66//! ### Screenshots
67//! - `browser_screenshot` - Capture page or element screenshot
68//!
69//! ### Waiting
70//! - `browser_wait_for_element` - Wait for element to appear
71//! - `browser_wait` - Wait for a duration
72//! - `browser_wait_for_page_load` - Wait for page to load
73//! - `browser_wait_for_text` - Wait for text to appear
74//!
75//! ### JavaScript
76//! - `browser_evaluate_js` - Execute JavaScript code
77//! - `browser_scroll` - Scroll the page
78//! - `browser_hover` - Hover over an element
79//! - `browser_handle_alert` - Handle JavaScript alerts
80//!
81//! ### Cookies
82//! - `browser_get_cookies` - Get all cookies
83//! - `browser_get_cookie` - Get a specific cookie
84//! - `browser_add_cookie` - Add a cookie
85//! - `browser_delete_cookie` - Delete a cookie
86//! - `browser_delete_all_cookies` - Delete all cookies
87//!
88//! ### Windows/Tabs
89//! - `browser_list_windows` - List all windows/tabs
90//! - `browser_new_tab` - Open a new tab
91//! - `browser_new_window` - Open a new window
92//! - `browser_switch_window` - Switch to a window
93//! - `browser_close_window` - Close current window
94//! - `browser_maximize_window` - Maximize window
95//! - `browser_minimize_window` - Minimize window
96//! - `browser_set_window_size` - Set window size
97//!
98//! ### Frames
99//! - `browser_switch_to_frame` - Switch to an iframe
100//! - `browser_switch_to_parent_frame` - Exit current iframe
101//! - `browser_switch_to_default_content` - Exit all iframes
102//!
103//! ### Advanced Actions
104//! - `browser_drag_and_drop` - Drag and drop elements
105//! - `browser_right_click` - Right-click (context menu)
106//! - `browser_focus` - Focus on an element
107//! - `browser_element_state` - Check element state
108//! - `browser_press_key` - Press keyboard keys
109//! - `browser_file_upload` - Upload files
110//! - `browser_print_to_pdf` - Print page to PDF
111//!
112//! ## Requirements
113//!
114//! A WebDriver server (like ChromeDriver, geckodriver, or Selenium) must be
115//! running and accessible. By default, tools connect to `http://localhost:4444`.
116//!
117//! ### Starting ChromeDriver
118//!
119//! ```bash
120//! # Install ChromeDriver (macOS)
121//! brew install chromedriver
122//!
123//! # Start ChromeDriver
124//! chromedriver --port=4444
125//! ```
126//!
127//! ### Using Docker
128//!
129//! ```bash
130//! docker run -d -p 4444:4444 selenium/standalone-chrome
131//! ```
132//!
133//! ## Architecture
134//!
135//! Tools are implemented using the ADK `Tool` trait, allowing them to:
136//! - Work with any LLM model (Gemini, OpenAI, Anthropic)
137//! - Use callbacks for monitoring and control
138//! - Access session state and artifacts
139//! - Compose with other tools and agents
140//!
141//! ```text
142//! ┌─────────────────────────────────────────────────┐
143//! │ LlmAgent │
144//! │ (with callbacks, session, artifacts, memory) │
145//! └─────────────────────────────────────────────────┘
146//! │
147//! ▼
148//! ┌─────────────────────────────────────────────────┐
149//! │ BrowserToolset │
150//! │ NavigateTool, ClickTool, TypeTool, ... │
151//! └─────────────────────────────────────────────────┘
152//! │
153//! ▼
154//! ┌─────────────────────────────────────────────────┐
155//! │ BrowserSession │
156//! │ (wraps thirtyfour WebDriver) │
157//! └─────────────────────────────────────────────────┘
158//! │
159//! ▼
160//! WebDriver Server
161//! (ChromeDriver, etc.)
162//! ```
163
164mod config;
165mod escape;
166mod pool;
167mod session;
168pub mod tools;
169mod toolset;
170
171// Re-export main types
172pub use config::{BrowserConfig, BrowserType};
173pub use escape::escape_js_string;
174pub use pool::BrowserSessionPool;
175pub use session::{BrowserSession, ElementState, shared_session};
176pub use toolset::{BrowserProfile, BrowserToolset, minimal_browser_tools, readonly_browser_tools};
177
178// Re-export individual tools for selective use
179pub use tools::{
180 // Cookies
181 AddCookieTool,
182 // JavaScript
183 AlertTool,
184 // Navigation
185 BackTool,
186 // Interaction
187 ClearTool,
188 ClickTool,
189 // Windows/Tabs
190 CloseWindowTool,
191 DeleteAllCookiesTool,
192 DeleteCookieTool,
193 DoubleClickTool,
194 // Advanced Actions
195 DragAndDropTool,
196 ElementStateTool,
197 EvaluateJsTool,
198 // Extraction
199 ExtractAttributeTool,
200 ExtractLinksTool,
201 ExtractTextTool,
202 FileUploadTool,
203 FocusTool,
204 ForwardTool,
205 GetCookieTool,
206 GetCookiesTool,
207 HoverTool,
208 ListWindowsTool,
209 MaximizeWindowTool,
210 MinimizeWindowTool,
211 NavigateTool,
212 NewTabTool,
213 NewWindowTool,
214 PageInfoTool,
215 PageSourceTool,
216 PressKeyTool,
217 PrintToPdfTool,
218 RefreshTool,
219 RightClickTool,
220 // Screenshots
221 ScreenshotTool,
222 ScrollTool,
223 SelectTool,
224 SetWindowSizeTool,
225 // Frames
226 SwitchToDefaultContentTool,
227 SwitchToFrameTool,
228 SwitchToParentFrameTool,
229 SwitchWindowTool,
230 TypeTool,
231 // Waiting
232 WaitForElementTool,
233 WaitForPageLoadTool,
234 WaitForTextTool,
235 WaitTool,
236};
237
238/// Prelude module for convenient imports.
239pub mod prelude {
240 pub use crate::config::{BrowserConfig, BrowserType};
241 pub use crate::pool::BrowserSessionPool;
242 pub use crate::session::{BrowserSession, shared_session};
243 pub use crate::toolset::{
244 BrowserProfile, BrowserToolset, minimal_browser_tools, readonly_browser_tools,
245 };
246}