adk_browser/
lib.rs

1//! # adk-browser
2//!
3//! Browser automation tools for ADK agents using WebDriver (via thirtyfour).
4//!
5//! ## Overview
6//!
7//! This crate provides browser automation capabilities as ADK tools, allowing
8//! LLM agents to interact with web pages. Tools are designed to work with any
9//! LlmAgent and inherit all ADK benefits (callbacks, session management, etc.).
10//!
11//! ## Quick Start
12//!
13//! ```rust,ignore
14//! use adk_browser::{BrowserSession, BrowserConfig, BrowserToolset};
15//! use adk_agent::LlmAgentBuilder;
16//! use std::sync::Arc;
17//!
18//! async fn example() -> anyhow::Result<()> {
19//!     // Create browser session
20//!     let config = BrowserConfig::new()
21//!         .headless(true)
22//!         .viewport(1920, 1080);
23//!
24//!     let browser = Arc::new(BrowserSession::new(config));
25//!     browser.start().await?;
26//!
27//!     // Create toolset
28//!     let toolset = BrowserToolset::new(browser.clone());
29//!
30//!     // Add tools to agent (example - requires model)
31//!     // let agent = LlmAgentBuilder::new("browser_agent")
32//!     //     .model(model)
33//!     //     .instruction("You are a web automation assistant.")
34//!     //     .tools(toolset.all_tools())
35//!     //     .build()?;
36//!
37//!     // Clean up
38//!     browser.stop().await?;
39//!     Ok(())
40//! }
41//! ```
42//!
43//! ## Available Tools
44//!
45//! ### Navigation
46//! - `browser_navigate` - Navigate to a URL
47//! - `browser_back` - Go back in history
48//! - `browser_forward` - Go forward in history
49//! - `browser_refresh` - Refresh the page
50//!
51//! ### Interaction
52//! - `browser_click` - Click on an element
53//! - `browser_double_click` - Double-click an element
54//! - `browser_type` - Type text into an input
55//! - `browser_clear` - Clear an input field
56//! - `browser_select` - Select from a dropdown
57//!
58//! ### Extraction
59//! - `browser_extract_text` - Get text from elements
60//! - `browser_extract_attribute` - Get attribute values
61//! - `browser_extract_links` - Get all links on page
62//! - `browser_page_info` - Get current URL and title
63//! - `browser_page_source` - Get HTML source
64//!
65//! ### Screenshots
66//! - `browser_screenshot` - Capture page or element screenshot
67//!
68//! ### Waiting
69//! - `browser_wait_for_element` - Wait for element to appear
70//! - `browser_wait` - Wait for a duration
71//! - `browser_wait_for_page_load` - Wait for page to load
72//! - `browser_wait_for_text` - Wait for text to appear
73//!
74//! ### JavaScript
75//! - `browser_evaluate_js` - Execute JavaScript code
76//! - `browser_scroll` - Scroll the page
77//! - `browser_hover` - Hover over an element
78//! - `browser_handle_alert` - Handle JavaScript alerts
79//!
80//! ### Cookies
81//! - `browser_get_cookies` - Get all cookies
82//! - `browser_get_cookie` - Get a specific cookie
83//! - `browser_add_cookie` - Add a cookie
84//! - `browser_delete_cookie` - Delete a cookie
85//! - `browser_delete_all_cookies` - Delete all cookies
86//!
87//! ### Windows/Tabs
88//! - `browser_list_windows` - List all windows/tabs
89//! - `browser_new_tab` - Open a new tab
90//! - `browser_new_window` - Open a new window
91//! - `browser_switch_window` - Switch to a window
92//! - `browser_close_window` - Close current window
93//! - `browser_maximize_window` - Maximize window
94//! - `browser_minimize_window` - Minimize window
95//! - `browser_set_window_size` - Set window size
96//!
97//! ### Frames
98//! - `browser_switch_to_frame` - Switch to an iframe
99//! - `browser_switch_to_parent_frame` - Exit current iframe
100//! - `browser_switch_to_default_content` - Exit all iframes
101//!
102//! ### Advanced Actions
103//! - `browser_drag_and_drop` - Drag and drop elements
104//! - `browser_right_click` - Right-click (context menu)
105//! - `browser_focus` - Focus on an element
106//! - `browser_element_state` - Check element state
107//! - `browser_press_key` - Press keyboard keys
108//! - `browser_file_upload` - Upload files
109//! - `browser_print_to_pdf` - Print page to PDF
110//!
111//! ## Requirements
112//!
113//! A WebDriver server (like ChromeDriver, geckodriver, or Selenium) must be
114//! running and accessible. By default, tools connect to `http://localhost:4444`.
115//!
116//! ### Starting ChromeDriver
117//!
118//! ```bash
119//! # Install ChromeDriver (macOS)
120//! brew install chromedriver
121//!
122//! # Start ChromeDriver
123//! chromedriver --port=4444
124//! ```
125//!
126//! ### Using Docker
127//!
128//! ```bash
129//! docker run -d -p 4444:4444 selenium/standalone-chrome
130//! ```
131//!
132//! ## Architecture
133//!
134//! Tools are implemented using the ADK `Tool` trait, allowing them to:
135//! - Work with any LLM model (Gemini, OpenAI, Anthropic)
136//! - Use callbacks for monitoring and control
137//! - Access session state and artifacts
138//! - Compose with other tools and agents
139//!
140//! ```text
141//! ┌─────────────────────────────────────────────────┐
142//! │                   LlmAgent                       │
143//! │  (with callbacks, session, artifacts, memory)   │
144//! └─────────────────────────────────────────────────┘
145//!                        │
146//!                        ▼
147//! ┌─────────────────────────────────────────────────┐
148//! │               BrowserToolset                     │
149//! │  NavigateTool, ClickTool, TypeTool, ...         │
150//! └─────────────────────────────────────────────────┘
151//!                        │
152//!                        ▼
153//! ┌─────────────────────────────────────────────────┐
154//! │              BrowserSession                      │
155//! │         (wraps thirtyfour WebDriver)            │
156//! └─────────────────────────────────────────────────┘
157//!                        │
158//!                        ▼
159//!              WebDriver Server
160//!           (ChromeDriver, etc.)
161//! ```
162
163mod config;
164mod session;
165pub mod tools;
166mod toolset;
167
168// Re-export main types
169pub use config::{BrowserConfig, BrowserType};
170pub use session::{shared_session, BrowserSession, ElementState};
171pub use toolset::{minimal_browser_tools, readonly_browser_tools, BrowserToolset};
172
173// Re-export individual tools for selective use
174pub use tools::{
175    // Cookies
176    AddCookieTool,
177    // JavaScript
178    AlertTool,
179    // Navigation
180    BackTool,
181    // Interaction
182    ClearTool,
183    ClickTool,
184    // Windows/Tabs
185    CloseWindowTool,
186    DeleteAllCookiesTool,
187    DeleteCookieTool,
188    DoubleClickTool,
189    // Advanced Actions
190    DragAndDropTool,
191    ElementStateTool,
192    EvaluateJsTool,
193    // Extraction
194    ExtractAttributeTool,
195    ExtractLinksTool,
196    ExtractTextTool,
197    FileUploadTool,
198    FocusTool,
199    ForwardTool,
200    GetCookieTool,
201    GetCookiesTool,
202    HoverTool,
203    ListWindowsTool,
204    MaximizeWindowTool,
205    MinimizeWindowTool,
206    NavigateTool,
207    NewTabTool,
208    NewWindowTool,
209    PageInfoTool,
210    PageSourceTool,
211    PressKeyTool,
212    PrintToPdfTool,
213    RefreshTool,
214    RightClickTool,
215    // Screenshots
216    ScreenshotTool,
217    ScrollTool,
218    SelectTool,
219    SetWindowSizeTool,
220    // Frames
221    SwitchToDefaultContentTool,
222    SwitchToFrameTool,
223    SwitchToParentFrameTool,
224    SwitchWindowTool,
225    TypeTool,
226    // Waiting
227    WaitForElementTool,
228    WaitForPageLoadTool,
229    WaitForTextTool,
230    WaitTool,
231};
232
233/// Prelude module for convenient imports.
234pub mod prelude {
235    pub use crate::config::{BrowserConfig, BrowserType};
236    pub use crate::session::{shared_session, BrowserSession};
237    pub use crate::toolset::{minimal_browser_tools, readonly_browser_tools, BrowserToolset};
238}