Skip to main content

mermaid_cli/agents/
types.rs

1use serde::{Deserialize, Serialize};
2
3/// Represents an action that the AI wants to perform
4#[derive(Debug, Clone, Serialize, Deserialize)]
5pub enum AgentAction {
6    /// Read one or more files (executor decides parallelization)
7    ReadFile { paths: Vec<String> },
8    /// Write or create a file
9    WriteFile { path: String, content: String },
10    /// Make targeted edits to a file by replacing specific text
11    EditFile {
12        path: String,
13        old_string: String,
14        new_string: String,
15    },
16    /// Delete a file
17    DeleteFile { path: String },
18    /// Create a directory
19    CreateDirectory { path: String },
20    /// Execute a shell command
21    ExecuteCommand {
22        command: String,
23        working_dir: Option<String>,
24        timeout: Option<u64>,
25    },
26    /// Web search via Ollama Cloud API (executor decides parallelization)
27    WebSearch { queries: Vec<(String, usize)> },
28    /// Fetch a URL's content via Ollama Cloud API
29    WebFetch { url: String },
30    /// Spawn an autonomous sub-agent with its own conversation context
31    SpawnAgent { prompt: String, description: String },
32    /// Capture a screenshot of the screen (or a focused window/monitor/region/window)
33    Screenshot {
34        mode: String,            // "fullscreen", "focused", "monitor", "region", "window"
35        monitor: Option<String>, // monitor name for "monitor" mode (e.g., "DP-0")
36        region: Option<String>,  // "X,Y,WIDTHxHEIGHT" for "region" mode
37        window: Option<String>,  // window title for "window" mode (e.g., "Discord")
38    },
39    /// Click at screen coordinates
40    Click {
41        x: i32,
42        y: i32,
43        button: String,
44        /// Optional ID of the screenshot whose coordinate space these
45        /// `(x, y)` refer to. When omitted, the most recent screenshot
46        /// is used (preserves backward compatibility). When specified
47        /// and the registry has evicted that ID, the click errors
48        /// cleanly instead of silently using the wrong scale/offset.
49        #[serde(default)]
50        screenshot_id: Option<u64>,
51    },
52    /// Type a text string at the current cursor position
53    TypeText { text: String },
54    /// Press a key or key combination
55    PressKey { key: String },
56    /// Scroll in a direction
57    Scroll { direction: String, amount: i32 },
58    /// Move mouse cursor to coordinates
59    MouseMove {
60        x: i32,
61        y: i32,
62        /// Same semantic as `Click::screenshot_id`.
63        #[serde(default)]
64        screenshot_id: Option<u64>,
65    },
66    /// List all visible window titles (lightweight, no screenshot)
67    ListWindows,
68    /// Dynamic MCP tool call (dispatched to an MCP server at runtime)
69    McpToolCall {
70        server_name: String,
71        tool_name: String,
72        arguments: serde_json::Value,
73    },
74    /// Placeholder for tool calls that failed to parse (never executed)
75    ParseError { message: String },
76}
77
78/// Result of an agent action
79#[derive(Debug, Clone, Serialize, Deserialize)]
80#[must_use]
81pub enum ActionResult {
82    Success {
83        output: String,
84        #[serde(default)]
85        images: Option<Vec<String>>,
86    },
87    Error {
88        error: String,
89    },
90}
91
92impl AgentAction {
93    /// Extract a (type_label, target) pair for display or logging
94    pub fn display_info(&self) -> (&str, String) {
95        match self {
96            AgentAction::ReadFile { paths } => {
97                if paths.len() == 1 {
98                    ("Read", paths[0].clone())
99                } else {
100                    ("Read", format!("{} files", paths.len()))
101                }
102            },
103            AgentAction::WriteFile { path, .. } => ("Write", path.clone()),
104            AgentAction::EditFile { path, .. } => ("Edit", path.clone()),
105            AgentAction::DeleteFile { path } => ("Delete", path.clone()),
106            AgentAction::CreateDirectory { path } => ("Bash", format!("mkdir -p {}", path)),
107            AgentAction::ExecuteCommand { command, .. } => ("Bash", command.clone()),
108            AgentAction::WebSearch { queries } => {
109                if queries.len() == 1 {
110                    ("Web Search", queries[0].0.clone())
111                } else {
112                    ("Web Search", format!("{} queries", queries.len()))
113                }
114            },
115            AgentAction::WebFetch { url } => ("Web Fetch", url.clone()),
116            AgentAction::SpawnAgent { description, .. } => ("Agent", description.clone()),
117            AgentAction::Screenshot { mode, window, .. } => {
118                let target = match mode.as_str() {
119                    "focused" => "focused window".to_string(),
120                    "monitor" => "monitor".to_string(),
121                    "region" => "region".to_string(),
122                    "window" => {
123                        format!("window \"{}\"", window.as_deref().unwrap_or("?"))
124                    },
125                    _ => "screen capture".to_string(),
126                };
127                ("Screenshot", target)
128            },
129            AgentAction::Click { x, y, button, .. } => {
130                ("Click", format!("({}, {}) {}", x, y, button))
131            },
132            AgentAction::TypeText { text } => ("Type", text.chars().take(30).collect()),
133            AgentAction::PressKey { key } => ("Key", key.clone()),
134            AgentAction::Scroll { direction, amount } => {
135                ("Scroll", format!("{} {}", direction, amount))
136            },
137            AgentAction::MouseMove { x, y, .. } => ("Move", format!("({}, {})", x, y)),
138            AgentAction::ListWindows => ("ListWindows", "visible windows".to_string()),
139            AgentAction::McpToolCall {
140                server_name,
141                tool_name,
142                ..
143            } => ("MCP", format!("{}:{}", server_name, tool_name)),
144            AgentAction::ParseError { message } => ("Error", message.clone()),
145        }
146    }
147}
148
149/// Display representation of an action for UI rendering
150#[derive(Debug, Clone, Serialize, Deserialize)]
151pub struct ActionDisplay {
152    /// Type of action (e.g., "Write", "Bash", "Read", "Edit", "Delete", "Agent")
153    pub action_type: String,
154    /// Target of the action (file path, command, etc.)
155    pub target: String,
156    /// Result of the action
157    pub result: ActionResult,
158    /// Type-specific display data
159    #[serde(default)]
160    pub details: ActionDetails,
161    /// Duration of long-running actions in seconds
162    pub duration_seconds: Option<f64>,
163}
164
165/// Type-specific display data for action results
166#[derive(Debug, Clone, Default, Serialize, Deserialize)]
167pub enum ActionDetails {
168    /// No extra display data (Delete, CreateDirectory, or old conversations)
169    #[default]
170    Simple,
171    /// Text preview with optional line count (Read, Bash, Git, WebSearch, etc.)
172    Preview {
173        text: String,
174        line_count: Option<usize>,
175    },
176    /// File write with content for syntax-highlighted preview
177    FileContent { line_count: usize, content: String },
178    /// File edit with summary and diff for color-coded display
179    Diff { summary: String, diff: String },
180    /// Agent completion with summary and tool use count
181    Agent { summary: String, tool_uses: usize },
182}