Skip to main content

adk_browser/
lib.rs

1//! # adk-browser
2//!
3//! Browser automation tools for ADK agents using WebDriver (via thirtyfour).
4//!
5//! ## Overview
6//!
7//! This crate provides browser automation capabilities as ADK tools, allowing
8//! LLM agents to interact with web pages. Tools are designed to work with any
9//! LlmAgent and inherit all ADK benefits (callbacks, session management, etc.).
10//!
11//! ## Quick Start
12//!
13//! ```rust,ignore
14//! use adk_browser::{BrowserSession, BrowserConfig, BrowserToolset};
15//! use adk_agent::LlmAgentBuilder;
16//! use std::sync::Arc;
17//!
18//! async fn example() -> anyhow::Result<()> {
19//!     // Create browser session
20//!     let config = BrowserConfig::new()
21//!         .headless(true)
22//!         .viewport(1920, 1080);
23//!
24//!     let browser = Arc::new(BrowserSession::new(config));
25//!     browser.start().await?;
26//!
27//!     // Create toolset
28//!     let toolset = BrowserToolset::new(browser.clone());
29//!
30//!     // Add tools to agent (example - requires model)
31//!     // let agent = LlmAgentBuilder::new("browser_agent")
32//!     //     .model(model)
33//!     //     .instruction("You are a web automation assistant.")
34//!     //     .tools(toolset.all_tools())
35//!     //     .build()?;
36//!
37//!     // Clean up
38//!     browser.stop().await?;
39//!     Ok(())
40//! }
41//! ```
42//!
43//! ## Available Tools
44//!
45//! ### Navigation
46//! - `browser_navigate` - Navigate to a URL
47//! - `browser_back` - Go back in history
48//! - `browser_forward` - Go forward in history
49//! - `browser_refresh` - Refresh the page
50//!
51//! ### Interaction
52//! - `browser_click` - Click on an element
53//! - `browser_double_click` - Double-click an element
54//! - `browser_type` - Type text into an input
55//! - `browser_clear` - Clear an input field
56//! - `browser_select` - Select from a dropdown
57//!
58//! ### Extraction
59//! - `browser_extract_text` - Get text from elements
60//! - `browser_extract_attribute` - Get attribute values
61//! - `browser_extract_links` - Get all links on page
62//! - `browser_page_info` - Get current URL and title
63//! - `browser_page_source` - Get HTML source
64//!
65//! ### Screenshots
66//! - `browser_screenshot` - Capture page or element screenshot
67//!
68//! ### Waiting
69//! - `browser_wait_for_element` - Wait for element to appear
70//! - `browser_wait` - Wait for a duration
71//! - `browser_wait_for_page_load` - Wait for page to load
72//! - `browser_wait_for_text` - Wait for text to appear
73//!
74//! ### JavaScript
75//! - `browser_evaluate_js` - Execute JavaScript code
76//! - `browser_scroll` - Scroll the page
77//! - `browser_hover` - Hover over an element
78//! - `browser_handle_alert` - Handle JavaScript alerts
79//!
80//! ### Cookies
81//! - `browser_get_cookies` - Get all cookies
82//! - `browser_get_cookie` - Get a specific cookie
83//! - `browser_add_cookie` - Add a cookie
84//! - `browser_delete_cookie` - Delete a cookie
85//! - `browser_delete_all_cookies` - Delete all cookies
86//!
87//! ### Windows/Tabs
88//! - `browser_list_windows` - List all windows/tabs
89//! - `browser_new_tab` - Open a new tab
90//! - `browser_new_window` - Open a new window
91//! - `browser_switch_window` - Switch to a window
92//! - `browser_close_window` - Close current window
93//! - `browser_maximize_window` - Maximize window
94//! - `browser_minimize_window` - Minimize window
95//! - `browser_set_window_size` - Set window size
96//!
97//! ### Frames
98//! - `browser_switch_to_frame` - Switch to an iframe
99//! - `browser_switch_to_parent_frame` - Exit current iframe
100//! - `browser_switch_to_default_content` - Exit all iframes
101//!
102//! ### Advanced Actions
103//! - `browser_drag_and_drop` - Drag and drop elements
104//! - `browser_right_click` - Right-click (context menu)
105//! - `browser_focus` - Focus on an element
106//! - `browser_element_state` - Check element state
107//! - `browser_press_key` - Press keyboard keys
108//! - `browser_file_upload` - Upload files
109//! - `browser_print_to_pdf` - Print page to PDF
110//!
111//! ## Requirements
112//!
113//! A WebDriver server (like ChromeDriver, geckodriver, or Selenium) must be
114//! running and accessible. By default, tools connect to `http://localhost:4444`.
115//!
116//! ### Starting ChromeDriver
117//!
118//! ```bash
119//! # Install ChromeDriver (macOS)
120//! brew install chromedriver
121//!
122//! # Start ChromeDriver
123//! chromedriver --port=4444
124//! ```
125//!
126//! ### Using Docker
127//!
128//! ```bash
129//! docker run -d -p 4444:4444 selenium/standalone-chrome
130//! ```
131//!
132//! ## Architecture
133//!
134//! Tools are implemented using the ADK `Tool` trait, allowing them to:
135//! - Work with any LLM model (Gemini, OpenAI, Anthropic)
136//! - Use callbacks for monitoring and control
137//! - Access session state and artifacts
138//! - Compose with other tools and agents
139//!
140//! ```text
141//! ┌─────────────────────────────────────────────────┐
142//! │                   LlmAgent                       │
143//! │  (with callbacks, session, artifacts, memory)   │
144//! └─────────────────────────────────────────────────┘
145//!                        │
146//!                        ▼
147//! ┌─────────────────────────────────────────────────┐
148//! │               BrowserToolset                     │
149//! │  NavigateTool, ClickTool, TypeTool, ...         │
150//! └─────────────────────────────────────────────────┘
151//!                        │
152//!                        ▼
153//! ┌─────────────────────────────────────────────────┐
154//! │              BrowserSession                      │
155//! │         (wraps thirtyfour WebDriver)            │
156//! └─────────────────────────────────────────────────┘
157//!                        │
158//!                        ▼
159//!              WebDriver Server
160//!           (ChromeDriver, etc.)
161//! ```
162
163mod config;
164mod escape;
165mod pool;
166mod session;
167pub mod tools;
168mod toolset;
169
170// Re-export main types
171pub use config::{BrowserConfig, BrowserType};
172pub use escape::escape_js_string;
173pub use pool::BrowserSessionPool;
174pub use session::{BrowserSession, ElementState, shared_session};
175pub use toolset::{BrowserProfile, BrowserToolset, minimal_browser_tools, readonly_browser_tools};
176
177// Re-export individual tools for selective use
178pub use tools::{
179    // Cookies
180    AddCookieTool,
181    // JavaScript
182    AlertTool,
183    // Navigation
184    BackTool,
185    // Interaction
186    ClearTool,
187    ClickTool,
188    // Windows/Tabs
189    CloseWindowTool,
190    DeleteAllCookiesTool,
191    DeleteCookieTool,
192    DoubleClickTool,
193    // Advanced Actions
194    DragAndDropTool,
195    ElementStateTool,
196    EvaluateJsTool,
197    // Extraction
198    ExtractAttributeTool,
199    ExtractLinksTool,
200    ExtractTextTool,
201    FileUploadTool,
202    FocusTool,
203    ForwardTool,
204    GetCookieTool,
205    GetCookiesTool,
206    HoverTool,
207    ListWindowsTool,
208    MaximizeWindowTool,
209    MinimizeWindowTool,
210    NavigateTool,
211    NewTabTool,
212    NewWindowTool,
213    PageInfoTool,
214    PageSourceTool,
215    PressKeyTool,
216    PrintToPdfTool,
217    RefreshTool,
218    RightClickTool,
219    // Screenshots
220    ScreenshotTool,
221    ScrollTool,
222    SelectTool,
223    SetWindowSizeTool,
224    // Frames
225    SwitchToDefaultContentTool,
226    SwitchToFrameTool,
227    SwitchToParentFrameTool,
228    SwitchWindowTool,
229    TypeTool,
230    // Waiting
231    WaitForElementTool,
232    WaitForPageLoadTool,
233    WaitForTextTool,
234    WaitTool,
235};
236
237/// Prelude module for convenient imports.
238pub mod prelude {
239    pub use crate::config::{BrowserConfig, BrowserType};
240    pub use crate::pool::BrowserSessionPool;
241    pub use crate::session::{BrowserSession, shared_session};
242    pub use crate::toolset::{
243        BrowserProfile, BrowserToolset, minimal_browser_tools, readonly_browser_tools,
244    };
245}