Skip to main content

adk_browser/
lib.rs

1//! # adk-browser
2#![allow(clippy::result_large_err)]
3//!
4//! Browser automation tools for ADK agents using WebDriver (via thirtyfour).
5//!
6//! ## Overview
7//!
8//! This crate provides browser automation capabilities as ADK tools, allowing
9//! LLM agents to interact with web pages. Tools are designed to work with any
10//! LlmAgent and inherit all ADK benefits (callbacks, session management, etc.).
11//!
12//! ## Quick Start
13//!
14//! ```rust,ignore
15//! use adk_browser::{BrowserSession, BrowserConfig, BrowserToolset};
16//! use adk_agent::LlmAgentBuilder;
17//! use std::sync::Arc;
18//!
19//! async fn example() -> anyhow::Result<()> {
20//!     // Create browser session
21//!     let config = BrowserConfig::new()
22//!         .headless(true)
23//!         .viewport(1920, 1080);
24//!
25//!     let browser = Arc::new(BrowserSession::new(config));
26//!     browser.start().await?;
27//!
28//!     // Create toolset
29//!     let toolset = BrowserToolset::new(browser.clone());
30//!
31//!     // Add tools to agent (example - requires model)
32//!     // let agent = LlmAgentBuilder::new("browser_agent")
33//!     //     .model(model)
34//!     //     .instruction("You are a web automation assistant.")
35//!     //     .tools(toolset.all_tools())
36//!     //     .build()?;
37//!
38//!     // Clean up
39//!     browser.stop().await?;
40//!     Ok(())
41//! }
42//! ```
43//!
44//! ## Available Tools
45//!
46//! ### Navigation
47//! - `browser_navigate` - Navigate to a URL
48//! - `browser_back` - Go back in history
49//! - `browser_forward` - Go forward in history
50//! - `browser_refresh` - Refresh the page
51//!
52//! ### Interaction
53//! - `browser_click` - Click on an element
54//! - `browser_double_click` - Double-click an element
55//! - `browser_type` - Type text into an input
56//! - `browser_clear` - Clear an input field
57//! - `browser_select` - Select from a dropdown
58//!
59//! ### Extraction
60//! - `browser_extract_text` - Get text from elements
61//! - `browser_extract_attribute` - Get attribute values
62//! - `browser_extract_links` - Get all links on page
63//! - `browser_page_info` - Get current URL and title
64//! - `browser_page_source` - Get HTML source
65//!
66//! ### Screenshots
67//! - `browser_screenshot` - Capture page or element screenshot
68//!
69//! ### Waiting
70//! - `browser_wait_for_element` - Wait for element to appear
71//! - `browser_wait` - Wait for a duration
72//! - `browser_wait_for_page_load` - Wait for page to load
73//! - `browser_wait_for_text` - Wait for text to appear
74//!
75//! ### JavaScript
76//! - `browser_evaluate_js` - Execute JavaScript code
77//! - `browser_scroll` - Scroll the page
78//! - `browser_hover` - Hover over an element
79//! - `browser_handle_alert` - Handle JavaScript alerts
80//!
81//! ### Cookies
82//! - `browser_get_cookies` - Get all cookies
83//! - `browser_get_cookie` - Get a specific cookie
84//! - `browser_add_cookie` - Add a cookie
85//! - `browser_delete_cookie` - Delete a cookie
86//! - `browser_delete_all_cookies` - Delete all cookies
87//!
88//! ### Windows/Tabs
89//! - `browser_list_windows` - List all windows/tabs
90//! - `browser_new_tab` - Open a new tab
91//! - `browser_new_window` - Open a new window
92//! - `browser_switch_window` - Switch to a window
93//! - `browser_close_window` - Close current window
94//! - `browser_maximize_window` - Maximize window
95//! - `browser_minimize_window` - Minimize window
96//! - `browser_set_window_size` - Set window size
97//!
98//! ### Frames
99//! - `browser_switch_to_frame` - Switch to an iframe
100//! - `browser_switch_to_parent_frame` - Exit current iframe
101//! - `browser_switch_to_default_content` - Exit all iframes
102//!
103//! ### Advanced Actions
104//! - `browser_drag_and_drop` - Drag and drop elements
105//! - `browser_right_click` - Right-click (context menu)
106//! - `browser_focus` - Focus on an element
107//! - `browser_element_state` - Check element state
108//! - `browser_press_key` - Press keyboard keys
109//! - `browser_file_upload` - Upload files
110//! - `browser_print_to_pdf` - Print page to PDF
111//!
112//! ## Requirements
113//!
114//! A WebDriver server (like ChromeDriver, geckodriver, or Selenium) must be
115//! running and accessible. By default, tools connect to `http://localhost:4444`.
116//!
117//! ### Starting ChromeDriver
118//!
119//! ```bash
120//! # Install ChromeDriver (macOS)
121//! brew install chromedriver
122//!
123//! # Start ChromeDriver
124//! chromedriver --port=4444
125//! ```
126//!
127//! ### Using Docker
128//!
129//! ```bash
130//! docker run -d -p 4444:4444 selenium/standalone-chrome
131//! ```
132//!
133//! ## Architecture
134//!
135//! Tools are implemented using the ADK `Tool` trait, allowing them to:
136//! - Work with any LLM model (Gemini, OpenAI, Anthropic)
137//! - Use callbacks for monitoring and control
138//! - Access session state and artifacts
139//! - Compose with other tools and agents
140//!
141//! ```text
142//! ┌─────────────────────────────────────────────────┐
143//! │                   LlmAgent                       │
144//! │  (with callbacks, session, artifacts, memory)   │
145//! └─────────────────────────────────────────────────┘
146//!                        │
147//!                        ▼
148//! ┌─────────────────────────────────────────────────┐
149//! │               BrowserToolset                     │
150//! │  NavigateTool, ClickTool, TypeTool, ...         │
151//! └─────────────────────────────────────────────────┘
152//!                        │
153//!                        ▼
154//! ┌─────────────────────────────────────────────────┐
155//! │              BrowserSession                      │
156//! │         (wraps thirtyfour WebDriver)            │
157//! └─────────────────────────────────────────────────┘
158//!                        │
159//!                        ▼
160//!              WebDriver Server
161//!           (ChromeDriver, etc.)
162//! ```
163
164mod config;
165mod escape;
166mod pool;
167mod session;
168pub mod tools;
169mod toolset;
170
171// Re-export main types
172pub use config::{BrowserConfig, BrowserType};
173pub use escape::escape_js_string;
174pub use pool::BrowserSessionPool;
175pub use session::{BrowserSession, ElementState, shared_session};
176pub use toolset::{BrowserProfile, BrowserToolset, minimal_browser_tools, readonly_browser_tools};
177
178// Re-export individual tools for selective use
179pub use tools::{
180    // Cookies
181    AddCookieTool,
182    // JavaScript
183    AlertTool,
184    // Navigation
185    BackTool,
186    // Interaction
187    ClearTool,
188    ClickTool,
189    // Windows/Tabs
190    CloseWindowTool,
191    DeleteAllCookiesTool,
192    DeleteCookieTool,
193    DoubleClickTool,
194    // Advanced Actions
195    DragAndDropTool,
196    ElementStateTool,
197    EvaluateJsTool,
198    // Extraction
199    ExtractAttributeTool,
200    ExtractLinksTool,
201    ExtractTextTool,
202    FileUploadTool,
203    FocusTool,
204    ForwardTool,
205    GetCookieTool,
206    GetCookiesTool,
207    HoverTool,
208    ListWindowsTool,
209    MaximizeWindowTool,
210    MinimizeWindowTool,
211    NavigateTool,
212    NewTabTool,
213    NewWindowTool,
214    PageInfoTool,
215    PageSourceTool,
216    PressKeyTool,
217    PrintToPdfTool,
218    RefreshTool,
219    RightClickTool,
220    // Screenshots
221    ScreenshotTool,
222    ScrollTool,
223    SelectTool,
224    SetWindowSizeTool,
225    // Frames
226    SwitchToDefaultContentTool,
227    SwitchToFrameTool,
228    SwitchToParentFrameTool,
229    SwitchWindowTool,
230    TypeTool,
231    // Waiting
232    WaitForElementTool,
233    WaitForPageLoadTool,
234    WaitForTextTool,
235    WaitTool,
236};
237
238/// Prelude module for convenient imports.
239pub mod prelude {
240    pub use crate::config::{BrowserConfig, BrowserType};
241    pub use crate::pool::BrowserSessionPool;
242    pub use crate::session::{BrowserSession, shared_session};
243    pub use crate::toolset::{
244        BrowserProfile, BrowserToolset, minimal_browser_tools, readonly_browser_tools,
245    };
246}