adk_browser/lib.rs
1//! # adk-browser
2//!
3//! Browser automation tools for ADK agents using WebDriver (via thirtyfour).
4//!
5//! ## Overview
6//!
7//! This crate provides browser automation capabilities as ADK tools, allowing
8//! LLM agents to interact with web pages. Tools are designed to work with any
9//! LlmAgent and inherit all ADK benefits (callbacks, session management, etc.).
10//!
11//! ## Quick Start
12//!
13//! ```rust,ignore
14//! use adk_browser::{BrowserSession, BrowserConfig, BrowserToolset};
15//! use adk_agent::LlmAgentBuilder;
16//! use std::sync::Arc;
17//!
18//! async fn example() -> anyhow::Result<()> {
19//! // Create browser session
20//! let config = BrowserConfig::new()
21//! .headless(true)
22//! .viewport(1920, 1080);
23//!
24//! let browser = Arc::new(BrowserSession::new(config));
25//! browser.start().await?;
26//!
27//! // Create toolset
28//! let toolset = BrowserToolset::new(browser.clone());
29//!
30//! // Add tools to agent (example - requires model)
31//! // let agent = LlmAgentBuilder::new("browser_agent")
32//! // .model(model)
33//! // .instruction("You are a web automation assistant.")
34//! // .tools(toolset.all_tools())
35//! // .build()?;
36//!
37//! // Clean up
38//! browser.stop().await?;
39//! Ok(())
40//! }
41//! ```
42//!
43//! ## Available Tools
44//!
45//! ### Navigation
46//! - `browser_navigate` - Navigate to a URL
47//! - `browser_back` - Go back in history
48//! - `browser_forward` - Go forward in history
49//! - `browser_refresh` - Refresh the page
50//!
51//! ### Interaction
52//! - `browser_click` - Click on an element
53//! - `browser_double_click` - Double-click an element
54//! - `browser_type` - Type text into an input
55//! - `browser_clear` - Clear an input field
56//! - `browser_select` - Select from a dropdown
57//!
58//! ### Extraction
59//! - `browser_extract_text` - Get text from elements
60//! - `browser_extract_attribute` - Get attribute values
61//! - `browser_extract_links` - Get all links on page
62//! - `browser_page_info` - Get current URL and title
63//! - `browser_page_source` - Get HTML source
64//!
65//! ### Screenshots
66//! - `browser_screenshot` - Capture page or element screenshot
67//!
68//! ### Waiting
69//! - `browser_wait_for_element` - Wait for element to appear
70//! - `browser_wait` - Wait for a duration
71//! - `browser_wait_for_page_load` - Wait for page to load
72//! - `browser_wait_for_text` - Wait for text to appear
73//!
74//! ### JavaScript
75//! - `browser_evaluate_js` - Execute JavaScript code
76//! - `browser_scroll` - Scroll the page
77//! - `browser_hover` - Hover over an element
78//! - `browser_handle_alert` - Handle JavaScript alerts
79//!
80//! ### Cookies
81//! - `browser_get_cookies` - Get all cookies
82//! - `browser_get_cookie` - Get a specific cookie
83//! - `browser_add_cookie` - Add a cookie
84//! - `browser_delete_cookie` - Delete a cookie
85//! - `browser_delete_all_cookies` - Delete all cookies
86//!
87//! ### Windows/Tabs
88//! - `browser_list_windows` - List all windows/tabs
89//! - `browser_new_tab` - Open a new tab
90//! - `browser_new_window` - Open a new window
91//! - `browser_switch_window` - Switch to a window
92//! - `browser_close_window` - Close current window
93//! - `browser_maximize_window` - Maximize window
94//! - `browser_minimize_window` - Minimize window
95//! - `browser_set_window_size` - Set window size
96//!
97//! ### Frames
98//! - `browser_switch_to_frame` - Switch to an iframe
99//! - `browser_switch_to_parent_frame` - Exit current iframe
100//! - `browser_switch_to_default_content` - Exit all iframes
101//!
102//! ### Advanced Actions
103//! - `browser_drag_and_drop` - Drag and drop elements
104//! - `browser_right_click` - Right-click (context menu)
105//! - `browser_focus` - Focus on an element
106//! - `browser_element_state` - Check element state
107//! - `browser_press_key` - Press keyboard keys
108//! - `browser_file_upload` - Upload files
109//! - `browser_print_to_pdf` - Print page to PDF
110//!
111//! ## Requirements
112//!
113//! A WebDriver server (like ChromeDriver, geckodriver, or Selenium) must be
114//! running and accessible. By default, tools connect to `http://localhost:4444`.
115//!
116//! ### Starting ChromeDriver
117//!
118//! ```bash
119//! # Install ChromeDriver (macOS)
120//! brew install chromedriver
121//!
122//! # Start ChromeDriver
123//! chromedriver --port=4444
124//! ```
125//!
126//! ### Using Docker
127//!
128//! ```bash
129//! docker run -d -p 4444:4444 selenium/standalone-chrome
130//! ```
131//!
132//! ## Architecture
133//!
134//! Tools are implemented using the ADK `Tool` trait, allowing them to:
135//! - Work with any LLM model (Gemini, OpenAI, Anthropic)
136//! - Use callbacks for monitoring and control
137//! - Access session state and artifacts
138//! - Compose with other tools and agents
139//!
140//! ```text
141//! ┌─────────────────────────────────────────────────┐
142//! │ LlmAgent │
143//! │ (with callbacks, session, artifacts, memory) │
144//! └─────────────────────────────────────────────────┘
145//! │
146//! ▼
147//! ┌─────────────────────────────────────────────────┐
148//! │ BrowserToolset │
149//! │ NavigateTool, ClickTool, TypeTool, ... │
150//! └─────────────────────────────────────────────────┘
151//! │
152//! ▼
153//! ┌─────────────────────────────────────────────────┐
154//! │ BrowserSession │
155//! │ (wraps thirtyfour WebDriver) │
156//! └─────────────────────────────────────────────────┘
157//! │
158//! ▼
159//! WebDriver Server
160//! (ChromeDriver, etc.)
161//! ```
162
163mod config;
164mod session;
165pub mod tools;
166mod toolset;
167
168// Re-export main types
169pub use config::{BrowserConfig, BrowserType};
170pub use session::{shared_session, BrowserSession, ElementState};
171pub use toolset::{minimal_browser_tools, readonly_browser_tools, BrowserToolset};
172
173// Re-export individual tools for selective use
174pub use tools::{
175 // Cookies
176 AddCookieTool,
177 // JavaScript
178 AlertTool,
179 // Navigation
180 BackTool,
181 // Interaction
182 ClearTool,
183 ClickTool,
184 // Windows/Tabs
185 CloseWindowTool,
186 DeleteAllCookiesTool,
187 DeleteCookieTool,
188 DoubleClickTool,
189 // Advanced Actions
190 DragAndDropTool,
191 ElementStateTool,
192 EvaluateJsTool,
193 // Extraction
194 ExtractAttributeTool,
195 ExtractLinksTool,
196 ExtractTextTool,
197 FileUploadTool,
198 FocusTool,
199 ForwardTool,
200 GetCookieTool,
201 GetCookiesTool,
202 HoverTool,
203 ListWindowsTool,
204 MaximizeWindowTool,
205 MinimizeWindowTool,
206 NavigateTool,
207 NewTabTool,
208 NewWindowTool,
209 PageInfoTool,
210 PageSourceTool,
211 PressKeyTool,
212 PrintToPdfTool,
213 RefreshTool,
214 RightClickTool,
215 // Screenshots
216 ScreenshotTool,
217 ScrollTool,
218 SelectTool,
219 SetWindowSizeTool,
220 // Frames
221 SwitchToDefaultContentTool,
222 SwitchToFrameTool,
223 SwitchToParentFrameTool,
224 SwitchWindowTool,
225 TypeTool,
226 // Waiting
227 WaitForElementTool,
228 WaitForPageLoadTool,
229 WaitForTextTool,
230 WaitTool,
231};
232
233/// Prelude module for convenient imports.
234pub mod prelude {
235 pub use crate::config::{BrowserConfig, BrowserType};
236 pub use crate::session::{shared_session, BrowserSession};
237 pub use crate::toolset::{minimal_browser_tools, readonly_browser_tools, BrowserToolset};
238}