browsing 0.1.3 - Docs.rs

//! Agent view types and data structures

use serde::{Deserialize, Serialize};
use std::collections::HashMap;

/// Configuration options for the Agent
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AgentSettings {
    /// Whether to use vision mode
    pub use_vision: VisionMode,
    /// Level of detail for vision processing
    pub vision_detail_level: VisionDetailLevel,
    /// Path to save conversation history
    pub save_conversation_path: Option<String>,
    /// Encoding for saved conversation files
    pub save_conversation_path_encoding: Option<String>,
    /// Maximum number of consecutive failures before stopping
    pub max_failures: u32,
    /// Whether to generate GIF of the interaction
    pub generate_gif: bool,
    /// Override for the default system message
    pub override_system_message: Option<String>,
    /// Additional text to extend the system message
    pub extend_system_message: Option<String>,
    /// List of attributes to include in DOM processing
    pub include_attributes: Option<Vec<String>>,
    /// Maximum number of actions per step
    pub max_actions_per_step: u32,
    /// Whether to enable thinking mode
    pub use_thinking: bool,
    /// Whether to enable flash mode
    pub flash_mode: bool,
    /// Whether to use judge mode for evaluation
    pub use_judge: bool,
    /// Maximum number of history items to keep
    pub max_history_items: Option<u32>,
    /// Whether to calculate and track costs
    pub calculate_cost: bool,
    /// Whether to include tool call examples
    pub include_tool_call_examples: bool,
    /// Timeout for LLM calls in seconds
    pub llm_timeout: u32,
    /// Timeout for each step in seconds
    pub step_timeout: u32,
    /// Whether to provide final response after failure
    pub final_response_after_failure: bool,
}

/// Vision mode options for the agent
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(untagged)]
pub enum VisionMode {
    /// Automatically determine if vision should be used
    Auto,
    /// Explicitly enable or disable vision
    Enabled(bool),
}

/// Vision detail level options for the agent
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum VisionDetailLevel {
    /// Automatically determine the detail level
    Auto,
    /// Low detail level
    Low,
    /// High detail level
    High,
}

impl Default for AgentSettings {
    fn default() -> Self {
        Self {
            use_vision: VisionMode::Enabled(true),
            vision_detail_level: VisionDetailLevel::Auto,
            save_conversation_path: None,
            save_conversation_path_encoding: Some("utf-8".to_string()),
            max_failures: 3,
            generate_gif: false,
            override_system_message: None,
            extend_system_message: None,
            include_attributes: None,
            max_actions_per_step: 4,
            use_thinking: true,
            flash_mode: false,
            use_judge: true,
            max_history_items: None,
            calculate_cost: false,
            include_tool_call_examples: false,
            llm_timeout: 60,
            step_timeout: 180,
            final_response_after_failure: true,
        }
    }
}

/// Holds all state information for an Agent
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AgentState {
    /// Unique identifier for the agent
    pub agent_id: String,
    /// Current step number
    pub n_steps: u32,
    /// Number of consecutive failures
    pub consecutive_failures: u32,
    /// Results from the last action
    pub last_result: Option<Vec<ActionResult>>,
    /// Last plan generated by the agent
    pub last_plan: Option<String>,
    /// Last output from the model
    pub last_model_output: Option<AgentOutput>,
    /// Whether the agent is paused
    pub paused: bool,
    /// Whether the agent is stopped
    pub stopped: bool,
    /// Whether the session has been initialized
    pub session_initialized: bool,
    /// Whether there is a follow-up task
    pub follow_up_task: bool,
}

impl Default for AgentState {
    fn default() -> Self {
        Self {
            agent_id: uuid::Uuid::now_v7().to_string(),
            n_steps: 1,
            consecutive_failures: 0,
            last_result: None,
            last_plan: None,
            last_model_output: None,
            paused: false,
            stopped: false,
            session_initialized: false,
            follow_up_task: false,
        }
    }
}

/// Information about a single step
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AgentStepInfo {
    /// Current step number
    pub step_number: u32,
    /// Maximum number of steps allowed
    pub max_steps: u32,
}

impl AgentStepInfo {
    /// Returns true if this is the last step
    pub fn is_last_step(&self) -> bool {
        self.step_number >= self.max_steps - 1
    }
}

/// LLM judgement of agent trace
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct JudgementResult {
    /// Reasoning behind the judgement
    pub reasoning: Option<String>,
    /// Final verdict (success/failure)
    pub verdict: bool,
    /// Reason for failure if applicable
    pub failure_reason: Option<String>,
    /// Whether the task is impossible
    pub impossible_task: bool,
    /// Whether a captcha was reached
    pub reached_captcha: bool,
}

/// Result of executing an action
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ActionResult {
    /// Whether the task is done
    pub is_done: Option<bool>,
    /// Whether the action was successful
    pub success: Option<bool>,
    /// Judgement result from the LLM
    pub judgement: Option<JudgementResult>,
    /// Error message if applicable
    pub error: Option<String>,
    /// List of attachments
    pub attachments: Option<Vec<String>>,
    /// List of images
    pub images: Option<Vec<ImageData>>,
    /// Long term memory content
    pub long_term_memory: Option<String>,
    /// Extracted content from the page
    pub extracted_content: Option<String>,
    /// Whether to include extracted content only once
    pub include_extracted_content_only_once: bool,
    /// Additional metadata
    pub metadata: Option<HashMap<String, serde_json::Value>>,
}

/// Image data structure
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ImageData {
    /// Name of the image
    pub name: String,
    /// Base64 encoded image data
    pub data: String,
}

impl Default for ActionResult {
    fn default() -> Self {
        Self {
            is_done: Some(false),
            success: None,
            judgement: None,
            error: None,
            attachments: None,
            images: None,
            long_term_memory: None,
            extracted_content: None,
            include_extracted_content_only_once: false,
            metadata: None,
        }
    }
}

impl ActionResult {
    /// Creates a success result with extracted content and long-term memory.
    /// Reduces repetition across action handlers.
    pub fn success_with_memory(memory: impl Into<String>) -> Self {
        let memory = memory.into();
        Self {
            extracted_content: Some(memory.clone()),
            long_term_memory: Some(memory),
            ..Default::default()
        }
    }

    /// Creates a done result indicating task completion.
    pub fn done(text: impl Into<String>) -> Self {
        Self {
            extracted_content: Some(text.into()),
            is_done: Some(true),
            success: Some(true),
            ..Default::default()
        }
    }
}

/// Metadata for a single step including timing and token information
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct StepMetadata {
    /// Start time of the step (timestamp)
    pub step_start_time: f64,
    /// End time of the step (timestamp)
    pub step_end_time: f64,
    /// Step number
    pub step_number: u32,
}

impl StepMetadata {
    /// Returns the duration of the step in seconds
    pub fn duration_seconds(&self) -> f64 {
        self.step_end_time - self.step_start_time
    }
}

/// Agent's reasoning process
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AgentBrain {
    /// Current thinking process
    pub thinking: Option<String>,
    /// Evaluation of the previous goal
    pub evaluation_previous_goal: String,
    /// Memory content
    pub memory: String,
    /// Next goal to achieve
    pub next_goal: String,
}

/// Agent output from LLM
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AgentOutput {
    /// Thinking process from the LLM
    pub thinking: Option<String>,
    /// Evaluation of the previous goal
    pub evaluation_previous_goal: Option<String>,
    /// Memory content
    pub memory: Option<String>,
    /// Next goal to achieve
    pub next_goal: Option<String>,
    /// Actions to be performed
    pub action: Vec<serde_json::Value>, // ActionModel - will be properly typed later
}

impl AgentOutput {
    /// Creates an AgentOutput from a JSON string
    pub fn from_json(json: &str) -> Result<Self, serde_json::Error> {
        serde_json::from_str(json)
    }
}

impl AgentOutput {
    /// Returns the current brain state from the output
    pub fn current_state(&self) -> AgentBrain {
        AgentBrain {
            thinking: self.thinking.clone(),
            evaluation_previous_goal: self.evaluation_previous_goal.clone().unwrap_or_default(),
            memory: self.memory.clone().unwrap_or_default(),
            next_goal: self.next_goal.clone().unwrap_or_default(),
        }
    }
}

/// History item for agent actions
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AgentHistory {
    /// Output from the model
    pub model_output: Option<AgentOutput>,
    /// Result of actions taken
    pub result: Vec<ActionResult>,
    /// Browser state at the time
    pub state: crate::browser::views::BrowserStateHistory,
    /// Metadata about the step
    pub metadata: Option<StepMetadata>,
    /// State message
    pub state_message: Option<String>,
}

/// List of AgentHistory messages
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AgentHistoryList {
    /// List of agent history items
    pub history: Vec<AgentHistory>,
    /// Token usage summary
    pub usage: Option<crate::tokens::views::UsageSummary>,
}

impl AgentHistoryList {
    /// Returns the total duration of all steps in seconds
    pub fn total_duration_seconds(&self) -> f64 {
        self.history
            .iter()
            .filter_map(|h| h.metadata.as_ref())
            .map(|m| m.duration_seconds())
            .sum()
    }

    /// Returns the number of steps in the history
    pub fn number_of_steps(&self) -> usize {
        self.history.len()
    }

    /// Returns true if the task is done
    pub fn is_done(&self) -> bool {
        self.history
            .last()
            .and_then(|h| h.result.last())
            .and_then(|r| r.is_done)
            .unwrap_or(false)
    }

    /// Returns true if the task was successful
    pub fn is_successful(&self) -> Option<bool> {
        self.history
            .last()
            .and_then(|h| h.result.last())
            .and_then(|r| r.success)
    }

    /// Returns true if there were any errors
    pub fn has_errors(&self) -> bool {
        self.history
            .iter()
            .any(|h| h.result.iter().any(|r| r.error.is_some()))
    }
}