Skip to main content

mermaid_cli/models/
tools.rs

1//! Ollama Tools API support for native function calling
2//!
3//! This module defines Mermaid's available tools in Ollama's JSON Schema format,
4//! replacing the legacy text-based action block system.
5
6use serde::{Deserialize, Serialize};
7use serde_json::json;
8use std::sync::LazyLock;
9
10/// A tool available to the model (Ollama format)
11#[derive(Debug, Clone, Serialize, Deserialize)]
12pub struct Tool {
13    #[serde(rename = "type")]
14    pub type_: String,
15    pub function: ToolFunction,
16}
17
18/// Function definition for a tool
19#[derive(Debug, Clone, Serialize, Deserialize)]
20pub struct ToolFunction {
21    pub name: String,
22    pub description: String,
23    pub parameters: serde_json::Value,
24}
25
26/// Registry of all available Mermaid tools
27pub struct ToolRegistry {
28    tools: Vec<Tool>,
29}
30
31/// Cached Ollama JSON format for the static tool definitions.
32/// Built once on first access, reused for every chat() call.
33static OLLAMA_TOOLS_CACHE: LazyLock<Vec<serde_json::Value>> = LazyLock::new(|| {
34    let registry = ToolRegistry::mermaid_tools();
35    registry.tools.iter().map(|t| json!(t)).collect()
36});
37
38impl ToolRegistry {
39    /// Create a new registry with all Mermaid tools
40    pub fn mermaid_tools() -> Self {
41        Self {
42            tools: vec![
43                Self::read_file_tool(),
44                Self::write_file_tool(),
45                Self::delete_file_tool(),
46                Self::create_directory_tool(),
47                Self::execute_command_tool(),
48                Self::edit_file_tool(),
49                Self::web_search_tool(),
50                Self::web_fetch_tool(),
51                Self::agent_tool(),
52                Self::screenshot_tool(),
53                Self::list_windows_tool(),
54                Self::click_tool(),
55                Self::type_text_tool(),
56                Self::press_key_tool(),
57                Self::scroll_tool(),
58                Self::mouse_move_tool(),
59            ],
60        }
61    }
62
63    /// Get a reference to the cached Ollama tool definitions without constructing a registry
64    pub fn ollama_tools_cached() -> &'static [serde_json::Value] {
65        &OLLAMA_TOOLS_CACHE
66    }
67
68    /// Get all tools
69    pub fn tools(&self) -> &[Tool] {
70        &self.tools
71    }
72
73    // Tool Definitions
74
75    fn read_file_tool() -> Tool {
76        Tool {
77            type_: "function".to_string(),
78            function: ToolFunction {
79                name: "read_file".to_string(),
80                description: "Read a file from the filesystem. Can read files anywhere on the system the user has access to, including outside the current project directory. Supports text files, PDFs (sent to vision models), and images.".to_string(),
81                parameters: json!({
82                    "type": "object",
83                    "properties": {
84                        "path": {
85                            "type": "string",
86                            "description": "Absolute or relative path to the file to read. Use absolute paths (e.g., /home/user/file.pdf) for files outside the project."
87                        }
88                    },
89                    "required": ["path"]
90                }),
91            },
92        }
93    }
94
95    fn write_file_tool() -> Tool {
96        Tool {
97            type_: "function".to_string(),
98            function: ToolFunction {
99                name: "write_file".to_string(),
100                description: "Write or create a file in the current project directory. Creates parent directories if they don't exist. Creates a timestamped backup if the file already exists.".to_string(),
101                parameters: json!({
102                    "type": "object",
103                    "properties": {
104                        "path": {
105                            "type": "string",
106                            "description": "Path to the file to write, relative to the project root or absolute (must be within project)"
107                        },
108                        "content": {
109                            "type": "string",
110                            "description": "The complete file content to write"
111                        }
112                    },
113                    "required": ["path", "content"]
114                }),
115            },
116        }
117    }
118
119    fn delete_file_tool() -> Tool {
120        Tool {
121            type_: "function".to_string(),
122            function: ToolFunction {
123                name: "delete_file".to_string(),
124                description: "Delete a file from the project directory. Creates a timestamped backup before deletion for recovery.".to_string(),
125                parameters: json!({
126                    "type": "object",
127                    "properties": {
128                        "path": {
129                            "type": "string",
130                            "description": "Path to the file to delete"
131                        }
132                    },
133                    "required": ["path"]
134                }),
135            },
136        }
137    }
138
139    fn create_directory_tool() -> Tool {
140        Tool {
141            type_: "function".to_string(),
142            function: ToolFunction {
143                name: "create_directory".to_string(),
144                description:
145                    "Create a new directory in the project. Creates parent directories if needed."
146                        .to_string(),
147                parameters: json!({
148                    "type": "object",
149                    "properties": {
150                        "path": {
151                            "type": "string",
152                            "description": "Path to the directory to create"
153                        }
154                    },
155                    "required": ["path"]
156                }),
157            },
158        }
159    }
160
161    fn execute_command_tool() -> Tool {
162        Tool {
163            type_: "function".to_string(),
164            function: ToolFunction {
165                name: "execute_command".to_string(),
166                description: "Execute any command: terminal commands, launch GUI apps, run scripts, start servers. Use for builds, tests, git operations, opening applications (e.g., 'firefox &', 'discord &'), and anything else you can run from a shell. For long-running processes (servers, GUI apps), set a short timeout (e.g., 5) -- the process keeps running after timeout.".to_string(),
167                parameters: json!({
168                    "type": "object",
169                    "properties": {
170                        "command": {
171                            "type": "string",
172                            "description": "The command to execute (e.g., 'cargo test', 'npm install', 'firefox &', 'discord &')"
173                        },
174                        "working_dir": {
175                            "type": "string",
176                            "description": "Optional working directory to run the command in. Defaults to project root."
177                        },
178                        "timeout": {
179                            "type": "integer",
180                            "description": "Timeout in seconds (default: 30, max: 300). For servers/daemons, use a short timeout like 5 since the process continues running after timeout."
181                        }
182                    },
183                    "required": ["command"]
184                }),
185            },
186        }
187    }
188
189    fn edit_file_tool() -> Tool {
190        Tool {
191            type_: "function".to_string(),
192            function: ToolFunction {
193                name: "edit_file".to_string(),
194                description: "Make targeted edits to a file by replacing specific text. \
195                    The old_string must match exactly and uniquely in the file. \
196                    Prefer this over write_file for modifying existing files."
197                    .to_string(),
198                parameters: json!({
199                    "type": "object",
200                    "properties": {
201                        "path": {
202                            "type": "string",
203                            "description": "Path to the file to edit"
204                        },
205                        "old_string": {
206                            "type": "string",
207                            "description": "The exact text to find and replace (must be unique in the file)"
208                        },
209                        "new_string": {
210                            "type": "string",
211                            "description": "The new text to replace old_string with"
212                        }
213                    },
214                    "required": ["path", "old_string", "new_string"]
215                }),
216            },
217        }
218    }
219
220    fn web_search_tool() -> Tool {
221        Tool {
222            type_: "function".to_string(),
223            function: ToolFunction {
224                name: "web_search".to_string(),
225                description: "Search the web for information. Returns full page content in markdown format for deep analysis. Use for current information, library documentation, version-specific questions, or any time-sensitive data.".to_string(),
226                parameters: json!({
227                    "type": "object",
228                    "properties": {
229                        "query": {
230                            "type": "string",
231                            "description": "Search query. Be specific and include version numbers when relevant (e.g., 'Rust async tokio 1.40 new features')"
232                        },
233                        "max_results": {
234                            "type": "integer",
235                            "description": "Number of results to fetch (1-10). Use 3 for simple facts, 5-7 for research, 10 for comprehensive analysis.",
236                            "minimum": 1,
237                            "maximum": 10
238                        }
239                    },
240                    "required": ["query", "max_results"]
241                }),
242            },
243        }
244    }
245
246    fn web_fetch_tool() -> Tool {
247        Tool {
248            type_: "function".to_string(),
249            function: ToolFunction {
250                name: "web_fetch".to_string(),
251                description: "Fetch content from a URL and return it as clean markdown. Use for reading documentation pages, articles, GitHub READMEs, or any web page the user references.".to_string(),
252                parameters: json!({
253                    "type": "object",
254                    "properties": {
255                        "url": {
256                            "type": "string",
257                            "description": "The URL to fetch content from (e.g., 'https://docs.rs/tokio/latest')"
258                        }
259                    },
260                    "required": ["url"]
261                }),
262            },
263        }
264    }
265
266    fn agent_tool() -> Tool {
267        Tool {
268            type_: "function".to_string(),
269            function: ToolFunction {
270                name: "agent".to_string(),
271                description: "Spawn an autonomous sub-agent to handle a task independently. \
272                    The agent gets its own conversation context and full tool access. \
273                    Give it a self-contained task via the prompt parameter. \
274                    Multiple agent calls in one response run in parallel."
275                    .to_string(),
276                parameters: json!({
277                    "type": "object",
278                    "properties": {
279                        "prompt": {
280                            "type": "string",
281                            "description": "The task for the agent to complete"
282                        },
283                        "description": {
284                            "type": "string",
285                            "description": "Short label for the UI (e.g., 'Read src/models/ files')"
286                        }
287                    },
288                    "required": ["prompt", "description"]
289                }),
290            },
291        }
292    }
293
294    fn screenshot_tool() -> Tool {
295        Tool {
296            type_: "function".to_string(),
297            function: ToolFunction {
298                name: "screenshot".to_string(),
299                description: "Capture a screenshot. Defaults to fullscreen. For interacting with a specific app, use 'window' mode with the window title (use list_windows first). Also supports 'focused' (active window), 'monitor' (single display), 'region' (specific area). Click/type/key actions automatically return a screenshot, so you don't need to call this after those.".to_string(),
300                parameters: json!({
301                    "type": "object",
302                    "properties": {
303                        "mode": {
304                            "type": "string",
305                            "description": "Capture mode: 'fullscreen' (default), 'window' (specific window by title — best for targeting apps), 'focused' (active window), 'monitor' (single display), 'region' (rectangular area)",
306                            "enum": ["fullscreen", "focused", "monitor", "region", "window"]
307                        },
308                        "window": {
309                            "type": "string",
310                            "description": "Window title for 'window' mode (e.g., 'Discord', 'Firefox'). Use list_windows to discover available windows."
311                        },
312                        "monitor": {
313                            "type": "string",
314                            "description": "Monitor/output name for 'monitor' mode (e.g., 'DP-0', 'HDMI-1')."
315                        },
316                        "region": {
317                            "type": "string",
318                            "description": "Region for 'region' mode, format: 'X,Y,WIDTHxHEIGHT' in screen pixels (e.g., '0,0,1920x1080')"
319                        }
320                    },
321                    "required": []
322                }),
323            },
324        }
325    }
326
327    fn list_windows_tool() -> Tool {
328        Tool {
329            type_: "function".to_string(),
330            function: ToolFunction {
331                name: "list_windows".to_string(),
332                description: "List all visible window titles. Lightweight (no screenshot). Use to discover windows before screenshot(mode: 'window', window: '...').".to_string(),
333                parameters: json!({
334                    "type": "object",
335                    "properties": {},
336                    "required": []
337                }),
338            },
339        }
340    }
341
342    fn click_tool() -> Tool {
343        Tool {
344            type_: "function".to_string(),
345            function: ToolFunction {
346                name: "click".to_string(),
347                description: "Click at screen coordinates. Take a screenshot first to identify target coordinates. The optional screenshot_id selects which screenshot's coordinate space (x, y) refer to — useful when chaining multiple screenshots; if omitted, the most recent screenshot is used.".to_string(),
348                parameters: json!({
349                    "type": "object",
350                    "properties": {
351                        "x": { "type": "integer", "description": "X coordinate (pixels from left)" },
352                        "y": { "type": "integer", "description": "Y coordinate (pixels from top)" },
353                        "button": { "type": "string", "description": "Mouse button: 'left' (default), 'right', or 'middle'", "enum": ["left", "right", "middle"] },
354                        "screenshot_id": { "type": "integer", "description": "Optional id of the screenshot whose coordinates these refer to (from the screenshot tool's success message). Omit to use the most recent." }
355                    },
356                    "required": ["x", "y"]
357                }),
358            },
359        }
360    }
361
362    fn type_text_tool() -> Tool {
363        Tool {
364            type_: "function".to_string(),
365            function: ToolFunction {
366                name: "type_text".to_string(),
367                description: "Type text at the current cursor position. IMPORTANT: You must click the target input field first to give it focus. Without clicking first, keystrokes go to the wrong window.".to_string(),
368                parameters: json!({
369                    "type": "object",
370                    "properties": {
371                        "text": { "type": "string", "description": "The text to type" }
372                    },
373                    "required": ["text"]
374                }),
375            },
376        }
377    }
378
379    fn press_key_tool() -> Tool {
380        Tool {
381            type_: "function".to_string(),
382            function: ToolFunction {
383                name: "press_key".to_string(),
384                description: "Press a key or key combination. Examples: 'Return', 'ctrl+s', 'alt+Tab', 'ctrl+shift+t', 'BackSpace', 'Escape'.".to_string(),
385                parameters: json!({
386                    "type": "object",
387                    "properties": {
388                        "key": { "type": "string", "description": "Key name or combo (e.g., 'Return', 'ctrl+s', 'alt+F4')" }
389                    },
390                    "required": ["key"]
391                }),
392            },
393        }
394    }
395
396    fn scroll_tool() -> Tool {
397        Tool {
398            type_: "function".to_string(),
399            function: ToolFunction {
400                name: "scroll".to_string(),
401                description: "Scroll the screen up or down.".to_string(),
402                parameters: json!({
403                    "type": "object",
404                    "properties": {
405                        "direction": { "type": "string", "description": "Scroll direction", "enum": ["up", "down"] },
406                        "amount": { "type": "integer", "description": "Number of scroll steps (default: 3)" }
407                    },
408                    "required": ["direction"]
409                }),
410            },
411        }
412    }
413
414    fn mouse_move_tool() -> Tool {
415        Tool {
416            type_: "function".to_string(),
417            function: ToolFunction {
418                name: "mouse_move".to_string(),
419                description: "Move the mouse cursor to screen coordinates without clicking. The optional screenshot_id selects which screenshot's coordinate space (x, y) refer to; if omitted, the most recent screenshot is used.".to_string(),
420                parameters: json!({
421                    "type": "object",
422                    "properties": {
423                        "x": { "type": "integer", "description": "X coordinate" },
424                        "y": { "type": "integer", "description": "Y coordinate" },
425                        "screenshot_id": { "type": "integer", "description": "Optional id of the screenshot whose coordinates these refer to. Omit to use the most recent." }
426                    },
427                    "required": ["x", "y"]
428                }),
429            },
430        }
431    }
432}
433
434/// Convert MCP tool definitions to Ollama's tool format.
435///
436/// Each tool is namespaced as `mcp__{server_name}__{tool_name}` following
437/// the Claude Code convention for MCP tool naming.
438pub fn mcp_tools_to_ollama(tools: &[(String, crate::mcp::McpToolDef)]) -> Vec<serde_json::Value> {
439    tools
440        .iter()
441        .map(|(server_name, tool)| {
442            let namespaced_name = format!("mcp__{}__{}", server_name, tool.name);
443            json!({
444                "type": "function",
445                "function": {
446                    "name": namespaced_name,
447                    "description": tool.description,
448                    "parameters": tool.input_schema,
449                }
450            })
451        })
452        .collect()
453}
454
455#[cfg(test)]
456mod tests {
457    use super::*;
458
459    #[test]
460    fn test_tool_registry_creation() {
461        let registry = ToolRegistry::mermaid_tools();
462        assert_eq!(registry.tools().len(), 16, "Should have 16 tools defined");
463    }
464
465    #[test]
466    fn test_tool_serialization() {
467        let ollama_tools = ToolRegistry::ollama_tools_cached();
468
469        assert_eq!(ollama_tools.len(), 16);
470
471        // Verify first tool has correct structure
472        let first_tool = &ollama_tools[0];
473        assert!(first_tool.get("type").is_some());
474        assert!(first_tool.get("function").is_some());
475    }
476
477    #[test]
478    fn test_read_file_tool_schema() {
479        let tool = ToolRegistry::read_file_tool();
480        assert_eq!(tool.function.name, "read_file");
481        assert!(tool.function.description.contains("Read a file"));
482
483        let params = tool.function.parameters.as_object().unwrap();
484        assert!(params.get("properties").is_some());
485        assert!(params.get("required").is_some());
486    }
487}