Skip to main content

mermaid_cli/agents/
types.rs

1use serde::{Deserialize, Serialize};
2
3/// Represents an action that the AI wants to perform
4#[derive(Debug, Clone, Serialize, Deserialize)]
5pub enum AgentAction {
6    /// Read one or more files (executor decides parallelization)
7    ReadFile {
8        paths: Vec<String>,
9    },
10    /// Write or create a file
11    WriteFile {
12        path: String,
13        content: String,
14    },
15    /// Make targeted edits to a file by replacing specific text
16    EditFile {
17        path: String,
18        old_string: String,
19        new_string: String,
20    },
21    /// Delete a file
22    DeleteFile {
23        path: String,
24    },
25    /// Create a directory
26    CreateDirectory {
27        path: String,
28    },
29    /// Execute a shell command
30    ExecuteCommand {
31        command: String,
32        working_dir: Option<String>,
33        timeout: Option<u64>,
34    },
35    /// Web search via Ollama Cloud API (executor decides parallelization)
36    WebSearch {
37        queries: Vec<(String, usize)>,
38    },
39    /// Fetch a URL's content via Ollama Cloud API
40    WebFetch {
41        url: String,
42    },
43    /// Spawn an autonomous sub-agent with its own conversation context
44    SpawnAgent {
45        prompt: String,
46        description: String,
47    },
48    /// Capture a screenshot of the screen (or a focused window/monitor/region/window)
49    Screenshot {
50        mode: String,            // "fullscreen", "focused", "monitor", "region", "window"
51        monitor: Option<String>, // monitor name for "monitor" mode (e.g., "DP-0")
52        region: Option<String>,  // "X,Y,WIDTHxHEIGHT" for "region" mode
53        window: Option<String>,  // window title for "window" mode (e.g., "Discord")
54    },
55    /// Click at screen coordinates
56    Click { x: i32, y: i32, button: String },
57    /// Type a text string at the current cursor position
58    TypeText { text: String },
59    /// Press a key or key combination
60    PressKey { key: String },
61    /// Scroll in a direction
62    Scroll { direction: String, amount: i32 },
63    /// Move mouse cursor to coordinates
64    MouseMove { x: i32, y: i32 },
65    /// List all visible window titles (lightweight, no screenshot)
66    ListWindows,
67    /// Dynamic MCP tool call (dispatched to an MCP server at runtime)
68    McpToolCall {
69        server_name: String,
70        tool_name: String,
71        arguments: serde_json::Value,
72    },
73    /// Placeholder for tool calls that failed to parse (never executed)
74    ParseError {
75        message: String,
76    },
77}
78
79/// Result of an agent action
80#[derive(Debug, Clone, Serialize, Deserialize)]
81#[must_use]
82pub enum ActionResult {
83    Success {
84        output: String,
85        #[serde(default)]
86        images: Option<Vec<String>>,
87    },
88    Error {
89        error: String,
90    },
91}
92
93impl AgentAction {
94    /// Extract a (type_label, target) pair for display or logging
95    pub fn display_info(&self) -> (&str, String) {
96        match self {
97            AgentAction::ReadFile { paths } => {
98                if paths.len() == 1 {
99                    ("Read", paths[0].clone())
100                } else {
101                    ("Read", format!("{} files", paths.len()))
102                }
103            },
104            AgentAction::WriteFile { path, .. } => ("Write", path.clone()),
105            AgentAction::EditFile { path, .. } => ("Edit", path.clone()),
106            AgentAction::DeleteFile { path } => ("Delete", path.clone()),
107            AgentAction::CreateDirectory { path } => ("Bash", format!("mkdir -p {}", path)),
108            AgentAction::ExecuteCommand { command, .. } => ("Bash", command.clone()),
109            AgentAction::WebSearch { queries } => {
110                if queries.len() == 1 {
111                    ("Web Search", queries[0].0.clone())
112                } else {
113                    ("Web Search", format!("{} queries", queries.len()))
114                }
115            },
116            AgentAction::WebFetch { url } => ("Web Fetch", url.clone()),
117            AgentAction::SpawnAgent { description, .. } => ("Agent", description.clone()),
118            AgentAction::Screenshot { mode, window, .. } => {
119                let target = match mode.as_str() {
120                    "focused" => "focused window".to_string(),
121                    "monitor" => "monitor".to_string(),
122                    "region" => "region".to_string(),
123                    "window" => {
124                        format!("window \"{}\"", window.as_deref().unwrap_or("?"))
125                    },
126                    _ => "screen capture".to_string(),
127                };
128                ("Screenshot", target)
129            },
130            AgentAction::Click { x, y, button } => {
131                ("Click", format!("({}, {}) {}", x, y, button))
132            },
133            AgentAction::TypeText { text } => {
134                ("Type", text.chars().take(30).collect())
135            },
136            AgentAction::PressKey { key } => ("Key", key.clone()),
137            AgentAction::Scroll { direction, amount } => {
138                ("Scroll", format!("{} {}", direction, amount))
139            },
140            AgentAction::MouseMove { x, y } => ("Move", format!("({}, {})", x, y)),
141            AgentAction::ListWindows => ("ListWindows", "visible windows".to_string()),
142            AgentAction::McpToolCall {
143                server_name,
144                tool_name,
145                ..
146            } => ("MCP", format!("{}:{}", server_name, tool_name)),
147            AgentAction::ParseError { message } => ("Error", message.clone()),
148        }
149    }
150}
151
152/// Display representation of an action for UI rendering
153#[derive(Debug, Clone, Serialize, Deserialize)]
154pub struct ActionDisplay {
155    /// Type of action (e.g., "Write", "Bash", "Read", "Edit", "Delete", "Agent")
156    pub action_type: String,
157    /// Target of the action (file path, command, etc.)
158    pub target: String,
159    /// Result of the action
160    pub result: ActionResult,
161    /// Type-specific display data
162    #[serde(default)]
163    pub details: ActionDetails,
164    /// Duration of long-running actions in seconds
165    pub duration_seconds: Option<f64>,
166}
167
168/// Type-specific display data for action results
169#[derive(Debug, Clone, Default, Serialize, Deserialize)]
170pub enum ActionDetails {
171    /// No extra display data (Delete, CreateDirectory, or old conversations)
172    #[default]
173    Simple,
174    /// Text preview with optional line count (Read, Bash, Git, WebSearch, etc.)
175    Preview {
176        text: String,
177        line_count: Option<usize>,
178    },
179    /// File write with content for syntax-highlighted preview
180    FileContent { line_count: usize, content: String },
181    /// File edit with summary and diff for color-coded display
182    Diff { summary: String, diff: String },
183    /// Agent completion with summary and tool use count
184    Agent { summary: String, tool_uses: usize },
185}
186