adk_browser/lib.rs
1//! # adk-browser
2//!
3//! Browser automation tools for ADK agents using WebDriver (via thirtyfour).
4//!
5//! ## Overview
6//!
7//! This crate provides browser automation capabilities as ADK tools, allowing
8//! LLM agents to interact with web pages. Tools are designed to work with any
9//! LlmAgent and inherit all ADK benefits (callbacks, session management, etc.).
10//!
11//! ## Quick Start
12//!
13//! ```rust,ignore
14//! use adk_browser::{BrowserSession, BrowserConfig, BrowserToolset};
15//! use adk_agent::LlmAgentBuilder;
16//! use std::sync::Arc;
17//!
18//! async fn example() -> anyhow::Result<()> {
19//! // Create browser session
20//! let config = BrowserConfig::new()
21//! .headless(true)
22//! .viewport(1920, 1080);
23//!
24//! let browser = Arc::new(BrowserSession::new(config));
25//! browser.start().await?;
26//!
27//! // Create toolset
28//! let toolset = BrowserToolset::new(browser.clone());
29//!
30//! // Add tools to agent (example - requires model)
31//! // let agent = LlmAgentBuilder::new("browser_agent")
32//! // .model(model)
33//! // .instruction("You are a web automation assistant.")
34//! // .tools(toolset.all_tools())
35//! // .build()?;
36//!
37//! // Clean up
38//! browser.stop().await?;
39//! Ok(())
40//! }
41//! ```
42//!
43//! ## Available Tools
44//!
45//! ### Navigation
46//! - `browser_navigate` - Navigate to a URL
47//! - `browser_back` - Go back in history
48//! - `browser_forward` - Go forward in history
49//! - `browser_refresh` - Refresh the page
50//!
51//! ### Interaction
52//! - `browser_click` - Click on an element
53//! - `browser_double_click` - Double-click an element
54//! - `browser_type` - Type text into an input
55//! - `browser_clear` - Clear an input field
56//! - `browser_select` - Select from a dropdown
57//!
58//! ### Extraction
59//! - `browser_extract_text` - Get text from elements
60//! - `browser_extract_attribute` - Get attribute values
61//! - `browser_extract_links` - Get all links on page
62//! - `browser_page_info` - Get current URL and title
63//! - `browser_page_source` - Get HTML source
64//!
65//! ### Screenshots
66//! - `browser_screenshot` - Capture page or element screenshot
67//!
68//! ### Waiting
69//! - `browser_wait_for_element` - Wait for element to appear
70//! - `browser_wait` - Wait for a duration
71//! - `browser_wait_for_page_load` - Wait for page to load
72//! - `browser_wait_for_text` - Wait for text to appear
73//!
74//! ### JavaScript
75//! - `browser_evaluate_js` - Execute JavaScript code
76//! - `browser_scroll` - Scroll the page
77//! - `browser_hover` - Hover over an element
78//! - `browser_handle_alert` - Handle JavaScript alerts
79//!
80//! ### Cookies
81//! - `browser_get_cookies` - Get all cookies
82//! - `browser_get_cookie` - Get a specific cookie
83//! - `browser_add_cookie` - Add a cookie
84//! - `browser_delete_cookie` - Delete a cookie
85//! - `browser_delete_all_cookies` - Delete all cookies
86//!
87//! ### Windows/Tabs
88//! - `browser_list_windows` - List all windows/tabs
89//! - `browser_new_tab` - Open a new tab
90//! - `browser_new_window` - Open a new window
91//! - `browser_switch_window` - Switch to a window
92//! - `browser_close_window` - Close current window
93//! - `browser_maximize_window` - Maximize window
94//! - `browser_minimize_window` - Minimize window
95//! - `browser_set_window_size` - Set window size
96//!
97//! ### Frames
98//! - `browser_switch_to_frame` - Switch to an iframe
99//! - `browser_switch_to_parent_frame` - Exit current iframe
100//! - `browser_switch_to_default_content` - Exit all iframes
101//!
102//! ### Advanced Actions
103//! - `browser_drag_and_drop` - Drag and drop elements
104//! - `browser_right_click` - Right-click (context menu)
105//! - `browser_focus` - Focus on an element
106//! - `browser_element_state` - Check element state
107//! - `browser_press_key` - Press keyboard keys
108//! - `browser_file_upload` - Upload files
109//! - `browser_print_to_pdf` - Print page to PDF
110//!
111//! ## Requirements
112//!
113//! A WebDriver server (like ChromeDriver, geckodriver, or Selenium) must be
114//! running and accessible. By default, tools connect to `http://localhost:4444`.
115//!
116//! ### Starting ChromeDriver
117//!
118//! ```bash
119//! # Install ChromeDriver (macOS)
120//! brew install chromedriver
121//!
122//! # Start ChromeDriver
123//! chromedriver --port=4444
124//! ```
125//!
126//! ### Using Docker
127//!
128//! ```bash
129//! docker run -d -p 4444:4444 selenium/standalone-chrome
130//! ```
131//!
132//! ## Architecture
133//!
134//! Tools are implemented using the ADK `Tool` trait, allowing them to:
135//! - Work with any LLM model (Gemini, OpenAI, Anthropic)
136//! - Use callbacks for monitoring and control
137//! - Access session state and artifacts
138//! - Compose with other tools and agents
139//!
140//! ```text
141//! ┌─────────────────────────────────────────────────┐
142//! │ LlmAgent │
143//! │ (with callbacks, session, artifacts, memory) │
144//! └─────────────────────────────────────────────────┘
145//! │
146//! ▼
147//! ┌─────────────────────────────────────────────────┐
148//! │ BrowserToolset │
149//! │ NavigateTool, ClickTool, TypeTool, ... │
150//! └─────────────────────────────────────────────────┘
151//! │
152//! ▼
153//! ┌─────────────────────────────────────────────────┐
154//! │ BrowserSession │
155//! │ (wraps thirtyfour WebDriver) │
156//! └─────────────────────────────────────────────────┘
157//! │
158//! ▼
159//! WebDriver Server
160//! (ChromeDriver, etc.)
161//! ```
162
163mod config;
164mod escape;
165mod pool;
166mod session;
167pub mod tools;
168mod toolset;
169
170// Re-export main types
171pub use config::{BrowserConfig, BrowserType};
172pub use escape::escape_js_string;
173pub use pool::BrowserSessionPool;
174pub use session::{BrowserSession, ElementState, shared_session};
175pub use toolset::{BrowserProfile, BrowserToolset, minimal_browser_tools, readonly_browser_tools};
176
177// Re-export individual tools for selective use
178pub use tools::{
179 // Cookies
180 AddCookieTool,
181 // JavaScript
182 AlertTool,
183 // Navigation
184 BackTool,
185 // Interaction
186 ClearTool,
187 ClickTool,
188 // Windows/Tabs
189 CloseWindowTool,
190 DeleteAllCookiesTool,
191 DeleteCookieTool,
192 DoubleClickTool,
193 // Advanced Actions
194 DragAndDropTool,
195 ElementStateTool,
196 EvaluateJsTool,
197 // Extraction
198 ExtractAttributeTool,
199 ExtractLinksTool,
200 ExtractTextTool,
201 FileUploadTool,
202 FocusTool,
203 ForwardTool,
204 GetCookieTool,
205 GetCookiesTool,
206 HoverTool,
207 ListWindowsTool,
208 MaximizeWindowTool,
209 MinimizeWindowTool,
210 NavigateTool,
211 NewTabTool,
212 NewWindowTool,
213 PageInfoTool,
214 PageSourceTool,
215 PressKeyTool,
216 PrintToPdfTool,
217 RefreshTool,
218 RightClickTool,
219 // Screenshots
220 ScreenshotTool,
221 ScrollTool,
222 SelectTool,
223 SetWindowSizeTool,
224 // Frames
225 SwitchToDefaultContentTool,
226 SwitchToFrameTool,
227 SwitchToParentFrameTool,
228 SwitchWindowTool,
229 TypeTool,
230 // Waiting
231 WaitForElementTool,
232 WaitForPageLoadTool,
233 WaitForTextTool,
234 WaitTool,
235};
236
237/// Prelude module for convenient imports.
238pub mod prelude {
239 pub use crate::config::{BrowserConfig, BrowserType};
240 pub use crate::pool::BrowserSessionPool;
241 pub use crate::session::{BrowserSession, shared_session};
242 pub use crate::toolset::{
243 BrowserProfile, BrowserToolset, minimal_browser_tools, readonly_browser_tools,
244 };
245}