use crate::agent::json_extractor::JSONExtractor;
use crate::agent::views::{
ActionResult, AgentHistory, AgentHistoryList, AgentOutput, AgentSettings, AgentState,
};
use crate::error::{BrowsingError, Result};
use crate::llm::base::{ChatMessage, ChatModel};
use crate::tools::Tools;
use crate::tools::views::ActionModel;
use crate::traits::{BrowserClient, DOMProcessor};
use serde_json::Value;
use tracing::info;
pub struct Agent<L: ChatModel> {
task: String,
browser: Box<dyn BrowserClient>,
llm: L,
tools: Tools,
dom_processor: Box<dyn DOMProcessor>,
max_steps: u32,
settings: AgentSettings,
state: AgentState,
history: AgentHistoryList,
usage_tracker: UsageTracker,
}
struct UsageTracker {
total_prompt_tokens: u32,
total_completion_tokens: u32,
total_tokens: u32,
}
impl UsageTracker {
fn new() -> Self {
Self {
total_prompt_tokens: 0,
total_completion_tokens: 0,
total_tokens: 0,
}
}
fn add_usage(&mut self, usage: &crate::llm::base::ChatInvokeUsage) {
self.total_prompt_tokens += usage.prompt_tokens;
self.total_completion_tokens += usage.completion_tokens;
self.total_tokens += usage.total_tokens;
}
fn to_summary(&self) -> crate::tokens::views::UsageSummary {
crate::tokens::views::UsageSummary {
prompt_tokens: Some(self.total_prompt_tokens),
completion_tokens: Some(self.total_completion_tokens),
total_tokens: Some(self.total_tokens),
cost: None, }
}
}
impl<L: ChatModel> Agent<L> {
pub fn new(
task: String,
browser: Box<dyn BrowserClient>,
dom_processor: Box<dyn DOMProcessor>,
llm: L,
) -> Self {
Self {
task: task.clone(),
browser,
llm,
tools: Tools::default(),
dom_processor,
max_steps: 100,
settings: AgentSettings::default(),
state: AgentState::default(),
history: AgentHistoryList {
history: vec![],
usage: None,
},
usage_tracker: UsageTracker::new(),
}
}
pub fn with_max_steps(mut self, max_steps: u32) -> Self {
self.max_steps = max_steps;
self
}
pub fn with_settings(mut self, settings: AgentSettings) -> Self {
self.settings = settings;
self
}
pub async fn run(&mut self) -> Result<AgentHistoryList> {
self.browser.start().await?;
let cdp_client = self.browser.get_cdp_client()?;
let session_info = self.browser.get_session_info().await?;
let dom_processor = Box::new(
crate::dom::DOMProcessorImpl::new()
.with_cdp_client(cdp_client, session_info.session_id)
.with_target_id(session_info.target_id),
);
self.dom_processor = dom_processor;
let initial_url = crate::utils::extract_urls(&self.task).first().cloned();
if let Some(url) = initial_url {
self.browser.navigate(&url).await?;
}
let signal_handler = crate::utils::signal::SignalHandler::new();
let _shutdown_listener = signal_handler.spawn_shutdown_listener();
for step in 0..self.max_steps {
if signal_handler.is_shutdown_requested()
|| crate::utils::signal::is_shutdown_requested()
{
info!("🛑 Shutdown requested, stopping agent execution");
break;
}
self.state.n_steps = step + 1;
let page_state = self.get_page_state().await?;
let messages = self.build_messages(&page_state)?;
let response = self.llm.chat(&messages).await?;
if let Some(ref usage) = response.usage {
self.track_usage(usage);
}
let agent_output = self.parse_agent_output(&response.completion)?;
let mut results = vec![];
for action_value in &agent_output.action {
let action: ActionModel = serde_json::from_value(action_value.clone())
.map_err(|e| BrowsingError::Agent(format!("Failed to parse action: {e}")))?;
match self.execute_action(&action).await {
Ok(result) => results.push(result),
Err(e) => {
results.push(ActionResult {
error: Some(e.to_string()),
..Default::default()
});
}
}
}
let history_item = AgentHistory {
model_output: Some(agent_output.clone()),
result: results.clone(),
state: crate::browser::views::BrowserStateHistory {
url: self.browser.get_current_url().await.unwrap_or_default(),
title: "Unknown".to_string(),
tabs: vec![],
interacted_element: vec![],
screenshot_path: None,
},
metadata: None,
state_message: None,
};
self.history.history.push(history_item);
if self.is_task_complete(&results) {
break;
}
}
self.history.usage = Some(self.usage_tracker.to_summary());
if let Err(e) = self.browser.stop().await {
info!("⚠ Browser stop warning: {e}");
}
Ok(self.history.clone())
}
fn track_usage(&mut self, usage: &crate::llm::base::ChatInvokeUsage) {
self.usage_tracker.add_usage(usage);
}
async fn get_page_state(&self) -> Result<String> {
self.dom_processor.get_page_state_string().await
}
fn build_messages(&self, page_state: &str) -> Result<Vec<ChatMessage>> {
let mut messages = vec![];
if let Some(ref system_prompt) = self.settings.override_system_message {
messages.push(ChatMessage::system(system_prompt.clone()));
} else {
messages.push(ChatMessage::system(
"You are a browser automation agent. Help the user complete their task."
.to_string(),
));
}
messages.push(ChatMessage::user(format!(
"Task: {}\n\nPage state:\n{}",
self.task, page_state
)));
Ok(messages)
}
fn parse_agent_output(&self, response: &str) -> Result<AgentOutput> {
let extractor = JSONExtractor::new();
let json_str = extractor.extract_from_response(response);
tracing::debug!("Raw LLM response: {}", response);
tracing::debug!("Extracted JSON: {}", json_str);
let value: Value = match serde_json::from_str(&json_str) {
Ok(v) => v,
Err(_) => {
let repaired = anyrepair::repair(&json_str).unwrap_or_else(|_| json_str.clone());
serde_json::from_str(&repaired)
.map_err(|e| BrowsingError::Agent(format!("Failed to parse agent output: {e}")))?
}
};
let agent_output: AgentOutput = serde_json::from_value(value.clone()).map_err(|e| {
tracing::error!("Failed to deserialize agent output. Value: {}", value);
BrowsingError::Agent(format!("Failed to deserialize agent output: {e}"))
})?;
Ok(agent_output)
}
async fn execute_action(&mut self, action: &ActionModel) -> Result<ActionResult> {
let selector_map = self.dom_processor.get_selector_map().await.ok();
self.tools
.act(action.clone(), &mut *self.browser, selector_map.as_ref())
.await
}
fn is_task_complete(&self, results: &[ActionResult]) -> bool {
results.iter().any(|r| r.is_done == Some(true))
}
}